From 750c2d07f7f317903ae11ab57c4b45b7b613f682 Mon Sep 17 00:00:00 2001 From: John Kessenich Date: Fri, 26 May 2017 00:01:36 -0600 Subject: [PATCH] SPV: When passing structs of opaque types, flatten and pass the members instead. This avoids either A) needing uniformConstant struct, or B) initializing a struct with opaque members, as writing them is not allowed. --- Test/baseResults/hlsl.flattenOpaque.frag.out | 297 +++++++++++++++++++ Test/hlsl.flattenOpaque.frag | 40 +++ gtests/Hlsl.FromFile.cpp | 1 + hlsl/hlslParseHelper.cpp | 252 +++++++++++----- hlsl/hlslParseHelper.h | 6 +- hlsl/hlslParseables.cpp | 2 +- 6 files changed, 528 insertions(+), 70 deletions(-) create mode 100755 Test/baseResults/hlsl.flattenOpaque.frag.out create mode 100644 Test/hlsl.flattenOpaque.frag diff --git a/Test/baseResults/hlsl.flattenOpaque.frag.out b/Test/baseResults/hlsl.flattenOpaque.frag.out new file mode 100755 index 000000000..392ff722a --- /dev/null +++ b/Test/baseResults/hlsl.flattenOpaque.frag.out @@ -0,0 +1,297 @@ +hlsl.flattenOpaque.frag +Shader version: 500 +gl_FragCoord origin is upper left +0:? Sequence +0:15 Function Definition: osCall1(struct-os-p11; ( temp 4-component vector of float) +0:15 Function Parameters: +0:? 's2D' ( in sampler) +0:? Sequence +0:16 Branch: Return with expression +0:16 texture ( temp 4-component vector of float) +0:16 Construct combined texture-sampler ( temp sampler2D) +0:16 'tex' ( uniform texture2D) +0:? 's2D' ( in sampler) +0:? Constant: +0:? 0.200000 +0:? 0.300000 +0:20 Function Definition: osCall2(struct-os-p11;vf2; ( temp 4-component vector of float) +0:20 Function Parameters: +0:? 's2D' ( in sampler) +0:20 'f2' ( in 2-component vector of float) +0:? Sequence +0:21 Branch: Return with expression +0:21 texture ( temp 4-component vector of float) +0:21 Construct combined texture-sampler ( temp sampler2D) +0:21 'tex' ( uniform texture2D) +0:? 's2D' ( in sampler) +0:21 'f2' ( in 2-component vector of float) +0:25 Function Definition: os2Call1(struct-os2-p1-t211; ( temp 4-component vector of float) +0:25 Function Parameters: +0:? 's2D' ( in sampler) +0:? 'tex' ( in texture2D) +0:? Sequence +0:26 Branch: Return with expression +0:26 texture ( temp 4-component vector of float) +0:26 Construct combined texture-sampler ( temp sampler2D) +0:? 'tex' ( in texture2D) +0:? 's2D' ( in sampler) +0:? Constant: +0:? 0.200000 +0:? 0.300000 +0:30 Function Definition: os2Call2(struct-os2-p1-t211;vf2; ( temp 4-component vector of float) +0:30 Function Parameters: +0:? 's2D' ( in sampler) +0:? 'tex' ( in texture2D) +0:30 'f2' ( in 2-component vector of float) +0:? Sequence +0:31 Branch: Return with expression +0:31 texture ( temp 4-component vector of float) +0:31 Construct combined texture-sampler ( temp sampler2D) +0:? 'tex' ( in texture2D) +0:? 's2D' ( in sampler) +0:31 'f2' ( in 2-component vector of float) +0:35 Function Definition: @main( ( temp 4-component vector of float) +0:35 Function Parameters: +0:? Sequence +0:39 Branch: Return with expression +0:38 add ( temp 4-component vector of float) +0:37 add ( temp 4-component vector of float) +0:36 add ( temp 4-component vector of float) +0:36 Function Call: osCall1(struct-os-p11; ( temp 4-component vector of float) +0:? 's2D' ( uniform sampler) +0:37 Function Call: osCall2(struct-os-p11;vf2; ( temp 4-component vector of float) +0:? 's2D' ( uniform sampler) +0:? Constant: +0:? 0.200000 +0:? 0.300000 +0:38 Function Call: os2Call1(struct-os2-p1-t211; ( temp 4-component vector of float) +0:? 's2D' ( uniform sampler) +0:? 'tex' ( uniform texture2D) +0:39 Function Call: os2Call2(struct-os2-p1-t211;vf2; ( temp 4-component vector of float) +0:? 's2D' ( uniform sampler) +0:? 'tex' ( uniform texture2D) +0:? Constant: +0:? 0.200000 +0:? 0.300000 +0:35 Function Definition: main( ( temp void) +0:35 Function Parameters: +0:? Sequence +0:35 move second child to first child ( temp 4-component vector of float) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:35 Function Call: @main( ( temp 4-component vector of float) +0:? Linker Objects +0:? 'tex' ( uniform texture2D) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) + + +Linked fragment stage: + + +Shader version: 500 +gl_FragCoord origin is upper left +0:? Sequence +0:15 Function Definition: osCall1(struct-os-p11; ( temp 4-component vector of float) +0:15 Function Parameters: +0:? 's2D' ( in sampler) +0:? Sequence +0:16 Branch: Return with expression +0:16 texture ( temp 4-component vector of float) +0:16 Construct combined texture-sampler ( temp sampler2D) +0:16 'tex' ( uniform texture2D) +0:? 's2D' ( in sampler) +0:? Constant: +0:? 0.200000 +0:? 0.300000 +0:20 Function Definition: osCall2(struct-os-p11;vf2; ( temp 4-component vector of float) +0:20 Function Parameters: +0:? 's2D' ( in sampler) +0:20 'f2' ( in 2-component vector of float) +0:? Sequence +0:21 Branch: Return with expression +0:21 texture ( temp 4-component vector of float) +0:21 Construct combined texture-sampler ( temp sampler2D) +0:21 'tex' ( uniform texture2D) +0:? 's2D' ( in sampler) +0:21 'f2' ( in 2-component vector of float) +0:25 Function Definition: os2Call1(struct-os2-p1-t211; ( temp 4-component vector of float) +0:25 Function Parameters: +0:? 's2D' ( in sampler) +0:? 'tex' ( in texture2D) +0:? Sequence +0:26 Branch: Return with expression +0:26 texture ( temp 4-component vector of float) +0:26 Construct combined texture-sampler ( temp sampler2D) +0:? 'tex' ( in texture2D) +0:? 's2D' ( in sampler) +0:? Constant: +0:? 0.200000 +0:? 0.300000 +0:30 Function Definition: os2Call2(struct-os2-p1-t211;vf2; ( temp 4-component vector of float) +0:30 Function Parameters: +0:? 's2D' ( in sampler) +0:? 'tex' ( in texture2D) +0:30 'f2' ( in 2-component vector of float) +0:? Sequence +0:31 Branch: Return with expression +0:31 texture ( temp 4-component vector of float) +0:31 Construct combined texture-sampler ( temp sampler2D) +0:? 'tex' ( in texture2D) +0:? 's2D' ( in sampler) +0:31 'f2' ( in 2-component vector of float) +0:35 Function Definition: @main( ( temp 4-component vector of float) +0:35 Function Parameters: +0:? Sequence +0:39 Branch: Return with expression +0:38 add ( temp 4-component vector of float) +0:37 add ( temp 4-component vector of float) +0:36 add ( temp 4-component vector of float) +0:36 Function Call: osCall1(struct-os-p11; ( temp 4-component vector of float) +0:? 's2D' ( uniform sampler) +0:37 Function Call: osCall2(struct-os-p11;vf2; ( temp 4-component vector of float) +0:? 's2D' ( uniform sampler) +0:? Constant: +0:? 0.200000 +0:? 0.300000 +0:38 Function Call: os2Call1(struct-os2-p1-t211; ( temp 4-component vector of float) +0:? 's2D' ( uniform sampler) +0:? 'tex' ( uniform texture2D) +0:39 Function Call: os2Call2(struct-os2-p1-t211;vf2; ( temp 4-component vector of float) +0:? 's2D' ( uniform sampler) +0:? 'tex' ( uniform texture2D) +0:? Constant: +0:? 0.200000 +0:? 0.300000 +0:35 Function Definition: main( ( temp void) +0:35 Function Parameters: +0:? Sequence +0:35 move second child to first child ( temp 4-component vector of float) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) +0:35 Function Call: @main( ( temp 4-component vector of float) +0:? Linker Objects +0:? 'tex' ( uniform texture2D) +0:? '@entryPointOutput' (layout( location=0) out 4-component vector of float) + +// Module Version 10000 +// Generated by (magic number): 80001 +// Id's are bound by 85 + + Capability Shader + 1: ExtInstImport "GLSL.std.450" + MemoryModel Logical GLSL450 + EntryPoint Fragment 4 "main" 83 + ExecutionMode 4 OriginUpperLeft + Source HLSL 500 + Name 4 "main" + Name 12 "osCall1(struct-os-p11;" + Name 11 "s2D" + Name 19 "osCall2(struct-os-p11;vf2;" + Name 17 "s2D" + Name 18 "f2" + Name 26 "os2Call1(struct-os2-p1-t211;" + Name 24 "s2D" + Name 25 "tex" + Name 32 "os2Call2(struct-os2-p1-t211;vf2;" + Name 29 "s2D" + Name 30 "tex" + Name 31 "f2" + Name 35 "@main(" + Name 37 "tex" + Name 68 "s2D" + Name 70 "param" + Name 73 "s2D" + Name 74 "tex" + Name 77 "param" + Name 83 "@entryPointOutput" + Decorate 37(tex) DescriptorSet 0 + Decorate 68(s2D) DescriptorSet 0 + Decorate 73(s2D) DescriptorSet 0 + Decorate 74(tex) DescriptorSet 0 + Decorate 83(@entryPointOutput) Location 0 + 2: TypeVoid + 3: TypeFunction 2 + 6: TypeSampler + 7: TypePointer UniformConstant 6 + 8: TypeFloat 32 + 9: TypeVector 8(float) 4 + 10: TypeFunction 9(fvec4) 7(ptr) + 14: TypeVector 8(float) 2 + 15: TypePointer Function 14(fvec2) + 16: TypeFunction 9(fvec4) 7(ptr) 15(ptr) + 21: TypeImage 8(float) 2D sampled format:Unknown + 22: TypePointer UniformConstant 21 + 23: TypeFunction 9(fvec4) 7(ptr) 22(ptr) + 28: TypeFunction 9(fvec4) 7(ptr) 22(ptr) 15(ptr) + 34: TypeFunction 9(fvec4) + 37(tex): 22(ptr) Variable UniformConstant + 40: TypeSampledImage 21 + 42: 8(float) Constant 1045220557 + 43: 8(float) Constant 1050253722 + 44: 14(fvec2) ConstantComposite 42 43 + 68(s2D): 7(ptr) Variable UniformConstant + 73(s2D): 7(ptr) Variable UniformConstant + 74(tex): 22(ptr) Variable UniformConstant + 82: TypePointer Output 9(fvec4) +83(@entryPointOutput): 82(ptr) Variable Output + 4(main): 2 Function None 3 + 5: Label + 84: 9(fvec4) FunctionCall 35(@main() + Store 83(@entryPointOutput) 84 + Return + FunctionEnd +12(osCall1(struct-os-p11;): 9(fvec4) Function None 10 + 11(s2D): 7(ptr) FunctionParameter + 13: Label + 38: 21 Load 37(tex) + 39: 6 Load 11(s2D) + 41: 40 SampledImage 38 39 + 45: 9(fvec4) ImageSampleImplicitLod 41 44 + ReturnValue 45 + FunctionEnd +19(osCall2(struct-os-p11;vf2;): 9(fvec4) Function None 16 + 17(s2D): 7(ptr) FunctionParameter + 18(f2): 15(ptr) FunctionParameter + 20: Label + 48: 21 Load 37(tex) + 49: 6 Load 17(s2D) + 50: 40 SampledImage 48 49 + 51: 14(fvec2) Load 18(f2) + 52: 9(fvec4) ImageSampleImplicitLod 50 51 + ReturnValue 52 + FunctionEnd +26(os2Call1(struct-os2-p1-t211;): 9(fvec4) Function None 23 + 24(s2D): 7(ptr) FunctionParameter + 25(tex): 22(ptr) FunctionParameter + 27: Label + 55: 21 Load 25(tex) + 56: 6 Load 24(s2D) + 57: 40 SampledImage 55 56 + 58: 9(fvec4) ImageSampleImplicitLod 57 44 + ReturnValue 58 + FunctionEnd +32(os2Call2(struct-os2-p1-t211;vf2;): 9(fvec4) Function None 28 + 29(s2D): 7(ptr) FunctionParameter + 30(tex): 22(ptr) FunctionParameter + 31(f2): 15(ptr) FunctionParameter + 33: Label + 61: 21 Load 30(tex) + 62: 6 Load 29(s2D) + 63: 40 SampledImage 61 62 + 64: 14(fvec2) Load 31(f2) + 65: 9(fvec4) ImageSampleImplicitLod 63 64 + ReturnValue 65 + FunctionEnd + 35(@main(): 9(fvec4) Function None 34 + 36: Label + 70(param): 15(ptr) Variable Function + 77(param): 15(ptr) Variable Function + 69: 9(fvec4) FunctionCall 12(osCall1(struct-os-p11;) 68(s2D) + Store 70(param) 44 + 71: 9(fvec4) FunctionCall 19(osCall2(struct-os-p11;vf2;) 68(s2D) 70(param) + 72: 9(fvec4) FAdd 69 71 + 75: 9(fvec4) FunctionCall 26(os2Call1(struct-os2-p1-t211;) 73(s2D) 74(tex) + 76: 9(fvec4) FAdd 72 75 + Store 77(param) 44 + 78: 9(fvec4) FunctionCall 32(os2Call2(struct-os2-p1-t211;vf2;) 73(s2D) 74(tex) 77(param) + 79: 9(fvec4) FAdd 76 78 + ReturnValue 79 + FunctionEnd diff --git a/Test/hlsl.flattenOpaque.frag b/Test/hlsl.flattenOpaque.frag new file mode 100644 index 000000000..279be8a0b --- /dev/null +++ b/Test/hlsl.flattenOpaque.frag @@ -0,0 +1,40 @@ +struct os { + sampler2D s2D; +}; + +struct os2 { + sampler2D s2D; + Texture2D tex; +}; + +Texture2D tex; +os s; +os2 s2; + +float4 osCall1(os s) +{ + return tex.Sample(s.s2D, float2(0.2, 0.3)); +} + +float4 osCall2(os s, float2 f2) +{ + return tex.Sample(s.s2D, f2); +} + +float4 os2Call1(os2 s) +{ + return s.tex.Sample(s.s2D, float2(0.2, 0.3)); +} + +float4 os2Call2(os2 s, float2 f2) +{ + return s.tex.Sample(s.s2D, f2); +} + +float4 main() : SV_TARGET0 +{ + return osCall1(s) + + osCall2(s, float2(0.2, 0.3)) + + os2Call1(s2) + + os2Call2(s2, float2(0.2, 0.3)); +} diff --git a/gtests/Hlsl.FromFile.cpp b/gtests/Hlsl.FromFile.cpp index 87c830251..91d7ae61a 100644 --- a/gtests/Hlsl.FromFile.cpp +++ b/gtests/Hlsl.FromFile.cpp @@ -114,6 +114,7 @@ INSTANTIATE_TEST_CASE_P( {"hlsl.float1.frag", "PixelShaderFunction"}, {"hlsl.float4.frag", "PixelShaderFunction"}, {"hlsl.flatten.return.frag", "main"}, + {"hlsl.flattenOpaque.frag", "main"}, {"hlsl.forLoop.frag", "PixelShaderFunction"}, {"hlsl.gather.array.dx10.frag", "main"}, {"hlsl.gather.basic.dx10.frag", "main"}, diff --git a/hlsl/hlslParseHelper.cpp b/hlsl/hlslParseHelper.cpp index a105b84dc..43949b51b 100755 --- a/hlsl/hlslParseHelper.cpp +++ b/hlsl/hlslParseHelper.cpp @@ -774,7 +774,7 @@ TIntermTyped* HlslParseContext::handleBracketDereference(const TSourceLoc& loc, else { // at least one of base and index is variable... - if (base->getAsSymbolNode() && (wasFlattened(base) || shouldFlattenUniform(base->getType()))) { + if (base->getAsSymbolNode() && (wasFlattened(base) || shouldFlatten(base->getType()))) { if (index->getQualifier().storage != EvqConst) error(loc, "Invalid variable index to flattened array", base->getAsSymbolNode()->getName().c_str(), ""); @@ -981,7 +981,7 @@ TIntermTyped* HlslParseContext::handleDotDereference(const TSourceLoc& loc, TInt } } if (fieldFound) { - if (base->getAsSymbolNode() && (wasFlattened(base) || shouldFlattenUniform(base->getType()))) { + if (base->getAsSymbolNode() && (wasFlattened(base) || shouldFlatten(base->getType()))) { result = flattenAccess(base, member); } else { // Update the base and member to access if this was a split structure. @@ -1115,14 +1115,13 @@ TType& HlslParseContext::split(TType& type, TString name, const TType* outerStru return type; } -// Is this a uniform array which should be flattened? -bool HlslParseContext::shouldFlattenUniform(const TType& type) const +// Is this a uniform array or structure which should be flattened? +bool HlslParseContext::shouldFlatten(const TType& type) const { const TStorageQualifier qualifier = type.getQualifier().storage; - return qualifier == EvqUniform && - ((type.isArray() && intermediate.getFlattenUniformArrays()) || type.isStruct()) && - type.containsOpaque(); + return (qualifier == EvqUniform && type.isArray() && intermediate.getFlattenUniformArrays()) || + type.isStruct() && type.containsOpaque(); } // Top level variable flattening: construct data @@ -1285,16 +1284,22 @@ bool HlslParseContext::wasSplit(const TIntermTyped* node) const // Turn an access into an aggregate that was flattened to instead be // an access to the individual variable the member was flattened to. // Assumes shouldFlatten() or equivalent was called first. +// Also assumes that initFlattening() and finalizeFlattening() bracket usage. TIntermTyped* HlslParseContext::flattenAccess(TIntermTyped* base, int member) { const TType dereferencedType(base->getType(), member); // dereferenced type - const TIntermSymbol& symbolNode = *base->getAsSymbolNode(); - const auto flattenData = flattenMap.find(symbolNode.getId()); + TIntermTyped* flattened = flattenAccess(symbolNode.getId(), member, dereferencedType); + + return flattened ? flattened : base; +} +TIntermTyped* HlslParseContext::flattenAccess(int uniqueId, int member, const TType& dereferencedType) +{ + const auto flattenData = flattenMap.find(uniqueId); if (flattenData == flattenMap.end()) - return base; + return nullptr; // Calculate new cumulative offset from the packed tree flattenOffset.back() = flattenData->second.offsets[flattenOffset.back() + member]; @@ -1307,7 +1312,7 @@ TIntermTyped* HlslParseContext::flattenAccess(TIntermTyped* base, int member) } else { // If this is not the final flattening, accumulate the position and return // an object of the partially dereferenced type. - return new TIntermSymbol(symbolNode.getId(), "flattenShadow", dereferencedType); + return new TIntermSymbol(uniqueId, "flattenShadow", dereferencedType); } } @@ -1663,15 +1668,32 @@ TIntermAggregate* HlslParseContext::handleFunctionDefinition(const TSourceLoc& l symbolTable.makeInternalVariable(*variable); pushImplicitThis(variable); } + // Insert the parameters with name in the symbol table. if (! symbolTable.insert(*variable)) error(loc, "redefinition", variable->getName().c_str(), ""); - // Add the parameter to the AST - paramNodes = intermediate.growAggregate(paramNodes, - intermediate.addSymbol(*variable, loc), - loc); - // Add hidden parameter for struct buffer counters, if needed. + // Add parameters to the AST list. + if (shouldFlatten(variable->getType())) { + // Expand the AST parameter nodes (but not the name mangling or symbol table view) + // for structures that need to be flattened. + flatten(loc, *variable); + const TTypeList* structure = variable->getType().getStruct(); + for (int mem = 0; mem < (int)structure->size(); ++mem) { + initFlattening(); + paramNodes = intermediate.growAggregate(paramNodes, + flattenAccess(variable->getUniqueId(), mem, *(*structure)[mem].type), + loc); + finalizeFlattening(); + } + } else { + // Add the parameter to the AST + paramNodes = intermediate.growAggregate(paramNodes, + intermediate.addSymbol(*variable, loc), + loc); + } + + // Add hidden AST parameter for struct buffer counters, if needed. addStructBufferHiddenCounterParam(loc, param, paramNodes); } else paramNodes = intermediate.growAggregate(paramNodes, intermediate.addSymbol(*param.type, loc), loc); @@ -2265,7 +2287,7 @@ TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op const bool flattened = isLeft ? isFlattenLeft : isFlattenRight; const bool split = isLeft ? isSplitLeft : isSplitRight; const TIntermTyped* outer = isLeft ? outerLeft : outerRight; - const TVector& flatVariables = isLeft ? *leftVariables : *rightVariables; + const TVector& flatVariables = isLeft ? *leftVariables : *rightVariables; // Index operator if it's an aggregate, else EOpNull const TOperator op = node->getType().isArray() ? EOpIndexDirect : @@ -2320,7 +2342,7 @@ TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op const int elementsToCopy = std::min(elementsL, elementsR); // array case - for (int element=0; element < elementsToCopy; ++element) { + for (int element = 0; element < elementsToCopy; ++element) { arrayElement.push_back(element); // Add a new AST symbol node if we have a temp variable holding a complex RHS. @@ -2511,7 +2533,7 @@ bool HlslParseContext::hasStructBuffCounter(const TType& type) const case EbvRWStructuredBuffer: // ... return true; default: - return false; // the other structuredbfufer types do not have a counter. + return false; // the other structuredbuffer types do not have a counter. } } @@ -4419,14 +4441,18 @@ TIntermTyped* HlslParseContext::handleFunctionCall(const TSourceLoc& loc, TFunct pushFrontArguments(intermediate.addSymbol(*getImplicitThis(thisDepth)), arguments); } - // Convert 'in' arguments + // Convert 'in' arguments, so that types match. + // However, skip those that need expansion, that is covered next. if (arguments) addInputArgumentConversions(*fnCandidate, arguments); - // If any argument is a pass-by-reference struct buffer with an associated counter - // buffer, we have to add another hidden parameter for that counter. - if (aggregate && !builtIn) - addStructBuffArguments(loc, aggregate); + // Expand arguments. Some arguments must physically expand to a different set + // than what the shader declared and passes. + if (arguments && !builtIn) + expandArguments(loc, *fnCandidate, arguments); + + // Expansion may have changed the form of arguments + aggregate = arguments ? arguments->getAsAggregate() : nullptr; op = fnCandidate->getBuiltInOp(); if (builtIn && op != EOpNull) { @@ -4464,24 +4490,35 @@ TIntermTyped* HlslParseContext::handleFunctionCall(const TSourceLoc& loc, TFunct decomposeSampleMethods(loc, result, arguments); // HLSL->AST sample method decompositions decomposeGeometryMethods(loc, result, arguments); // HLSL->AST geometry method decompositions + // Create the qualifier list, carried in the AST for the call. + // Because some arguments expand to multiple arguments, the qualifier list will + // be longer than the formal parameter list. + if (result == fnNode && result->getAsAggregate()) { + TQualifierList& qualifierList = result->getAsAggregate()->getQualifierList(); + for (int i = 0; i < fnCandidate->getParamCount(); ++i) { + TStorageQualifier qual = (*fnCandidate)[i].type->getQualifier().storage; + if (hasStructBuffCounter(*(*fnCandidate)[i].type)) { + // add buffer and counter buffer argument qualifier + qualifierList.push_back(qual); + qualifierList.push_back(qual); + } else if (shouldFlatten(*(*fnCandidate)[i].type)) { + // add structure member expansion + for (int memb = 0; memb < (int)(*fnCandidate)[i].type->getStruct()->size(); ++memb) + qualifierList.push_back(qual); + } else { + // Normal 1:1 case + qualifierList.push_back(qual); + } + } + } + // Convert 'out' arguments. If it was a constant folded built-in, it won't be an aggregate anymore. // Built-ins with a single argument aren't called with an aggregate, but they also don't have an output. // Also, build the qualifier list for user function calls, which are always called with an aggregate. // We don't do this is if there has been a decomposition, which will have added its own conversions // for output parameters. - if (result == fnNode && result->getAsAggregate()) { - TQualifierList& qualifierList = result->getAsAggregate()->getQualifierList(); - for (int i = 0; i < fnCandidate->getParamCount(); ++i) { - TStorageQualifier qual = (*fnCandidate)[i].type->getQualifier().storage; - qualifierList.push_back(qual); - - // add counter buffer argument qualifier - if (hasStructBuffCounter(*(*fnCandidate)[i].type)) - qualifierList.push_back(qual); - } - + if (result == fnNode && result->getAsAggregate()) result = addOutputArgumentConversions(*fnCandidate, *result->getAsOperator()); - } } } @@ -4512,20 +4549,22 @@ void HlslParseContext::pushFrontArguments(TIntermTyped* front, TIntermTyped*& ar void HlslParseContext::addInputArgumentConversions(const TFunction& function, TIntermTyped*& arguments) { TIntermAggregate* aggregate = arguments->getAsAggregate(); - const auto setArg = [&](int argNum, TIntermTyped* arg) { + + // Replace a single argument with a single argument. + const auto setArg = [&](int paramNum, TIntermTyped* arg) { if (function.getParamCount() == 1) arguments = arg; else { - if (aggregate) - aggregate->getSequence()[argNum] = arg; - else + if (aggregate == nullptr) arguments = arg; + else + aggregate->getSequence()[paramNum] = arg; } }; // Process each argument's conversion - for (int i = 0; i < function.getParamCount(); ++i) { - if (! function[i].type->getQualifier().isParamInput()) + for (int param = 0; param < function.getParamCount(); ++param) { + if (! function[param].type->getQualifier().isParamInput()) continue; // At this early point there is a slight ambiguity between whether an aggregate 'arguments' @@ -4533,42 +4572,121 @@ void HlslParseContext::addInputArgumentConversions(const TFunction& function, TI // means take 'arguments' itself as the one argument. TIntermTyped* arg = function.getParamCount() == 1 ? arguments->getAsTyped() - : (aggregate ? aggregate->getSequence()[i]->getAsTyped() : arguments->getAsTyped()); - if (*function[i].type != arg->getType()) { + : (aggregate ? + aggregate->getSequence()[param]->getAsTyped() : + arguments->getAsTyped()); + if (*function[param].type != arg->getType()) { // In-qualified arguments just need an extra node added above the argument to // convert to the correct type. - TIntermTyped* convArg = intermediate.addConversion(EOpFunctionCall, *function[i].type, arg); + TIntermTyped* convArg = intermediate.addConversion(EOpFunctionCall, *function[param].type, arg); if (convArg != nullptr) - convArg = intermediate.addUniShapeConversion(EOpFunctionCall, *function[i].type, convArg); + convArg = intermediate.addUniShapeConversion(EOpFunctionCall, *function[param].type, convArg); if (convArg != nullptr) - setArg(i, convArg); + setArg(param, convArg); else - error(arg->getLoc(), "cannot convert input argument, argument", "", "%d", i); + error(arg->getLoc(), "cannot convert input argument, argument", "", "%d", param); } else { if (wasFlattened(arg) || wasSplit(arg)) { - // Will make a two-level subtree. - // The deepest will copy member-by-member to build the structure to pass. - // The level above that will be a two-operand EOpComma sequence that follows the copy by the - // object itself. - TVariable* internalAggregate = makeInternalVariable("aggShadow", *function[i].type); - internalAggregate->getWritableType().getQualifier().makeTemporary(); - TIntermSymbol* internalSymbolNode = new TIntermSymbol(internalAggregate->getUniqueId(), - internalAggregate->getName(), - internalAggregate->getType()); - internalSymbolNode->setLoc(arg->getLoc()); - // This makes the deepest level, the member-wise copy - TIntermAggregate* assignAgg = handleAssign(arg->getLoc(), EOpAssign, internalSymbolNode, arg)->getAsAggregate(); + // If both formal and calling arg are to be flattened, leave that to argument + // expansion, not conversion. + if (!shouldFlatten(*function[param].type)) { + // Will make a two-level subtree. + // The deepest will copy member-by-member to build the structure to pass. + // The level above that will be a two-operand EOpComma sequence that follows the copy by the + // object itself. + TVariable* internalAggregate = makeInternalVariable("aggShadow", *function[param].type); + internalAggregate->getWritableType().getQualifier().makeTemporary(); + TIntermSymbol* internalSymbolNode = new TIntermSymbol(internalAggregate->getUniqueId(), + internalAggregate->getName(), + internalAggregate->getType()); + internalSymbolNode->setLoc(arg->getLoc()); + // This makes the deepest level, the member-wise copy + TIntermAggregate* assignAgg = handleAssign(arg->getLoc(), EOpAssign, internalSymbolNode, arg)->getAsAggregate(); - // Now, pair that with the resulting aggregate. - assignAgg = intermediate.growAggregate(assignAgg, internalSymbolNode, arg->getLoc()); - assignAgg->setOperator(EOpComma); - assignAgg->setType(internalAggregate->getType()); - setArg(i, assignAgg); + // Now, pair that with the resulting aggregate. + assignAgg = intermediate.growAggregate(assignAgg, internalSymbolNode, arg->getLoc()); + assignAgg->setOperator(EOpComma); + assignAgg->setType(internalAggregate->getType()); + setArg(param, assignAgg); + } } } } } +// +// Add any needed implicit expansion of calling arguments from what the shader listed to what's +// internally needed for the AST (given the constraints downstream). +// +void HlslParseContext::expandArguments(const TSourceLoc& loc, const TFunction& function, TIntermTyped*& arguments) +{ + TIntermAggregate* aggregate = arguments->getAsAggregate(); + int functionParamNumberOffset = 0; + + // Replace a single argument with a single argument. + const auto setArg = [&](int paramNum, TIntermTyped* arg) { + if (function.getParamCount() + functionParamNumberOffset == 1) + arguments = arg; + else { + if (aggregate == nullptr) + arguments = arg; + else + aggregate->getSequence()[paramNum] = arg; + } + }; + + // Replace a single argument with a list of arguments + const auto setArgList = [&](int paramNum, const TVector& args) { + if (args.size() == 1) + setArg(paramNum, args.front()); + else { + if (function.getParamCount() + functionParamNumberOffset == 1) { + arguments = intermediate.makeAggregate(args.front()); + std::for_each(args.begin() + 1, args.end(), + [&](TIntermTyped* arg) { + arguments = intermediate.growAggregate(arguments, arg); + }); + } else { + auto it = aggregate->getSequence().erase(aggregate->getSequence().begin() + paramNum); + aggregate->getSequence().insert(it, args.begin(), args.end()); + } + } + functionParamNumberOffset += (args.size() - 1); + }; + + // Process each argument's conversion + for (int param = 0; param < function.getParamCount(); ++param) { + // At this early point there is a slight ambiguity between whether an aggregate 'arguments' + // is the single argument itself or its children are the arguments. Only one argument + // means take 'arguments' itself as the one argument. + TIntermTyped* arg = function.getParamCount() == 1 + ? arguments->getAsTyped() + : (aggregate ? + aggregate->getSequence()[param + functionParamNumberOffset]->getAsTyped() : + arguments->getAsTyped()); + + if (wasFlattened(arg) && shouldFlatten(*function[param].type)) { + // Need to pass the structure members instead of the structure. + TVector memberArgs; + for (int memb = 0; memb < (int)arg->getType().getStruct()->size(); ++memb) { + initFlattening(); + memberArgs.push_back(flattenAccess(arg, memb)); + finalizeFlattening(); + } + setArgList(param + functionParamNumberOffset, memberArgs); + } + } + + // TODO: if we need both hidden counter args (below) and struct expansion (above) + // the two algorithms need to be merged: Each assumes the list starts out 1:1 between + // parameters and arguments. + + // If any argument is a pass-by-reference struct buffer with an associated counter + // buffer, we have to add another hidden parameter for that counter. + if (aggregate) + addStructBuffArguments(loc, aggregate); +} + // // Add any needed implicit output conversions for function-call arguments. This // can require a new tree topology, complicated further by whether the function @@ -4682,7 +4800,7 @@ void HlslParseContext::addStructBuffArguments(const TSourceLoc& loc, TIntermAggr TIntermSequence argsWithCounterBuffers; - for (int param=0; paramgetSequence().size()); ++param) { + for (int param = 0; param < int(aggregate->getSequence().size()); ++param) { argsWithCounterBuffers.push_back(aggregate->getSequence()[param]); if (hasStructBuffCounter(aggregate->getSequence()[param]->getAsTyped()->getType())) { @@ -6769,7 +6887,7 @@ TIntermNode* HlslParseContext::declareVariable(const TSourceLoc& loc, const TStr inheritGlobalDefaults(type.getQualifier()); - const bool flattenVar = shouldFlattenUniform(type); + const bool flattenVar = shouldFlatten(type); // correct IO in the type switch (type.getQualifier().storage) { diff --git a/hlsl/hlslParseHelper.h b/hlsl/hlslParseHelper.h index f7b293abb..eeba37e5e 100755 --- a/hlsl/hlslParseHelper.h +++ b/hlsl/hlslParseHelper.h @@ -96,6 +96,7 @@ public: void decomposeGeometryMethods(const TSourceLoc&, TIntermTyped*& node, TIntermNode* arguments); void pushFrontArguments(TIntermTyped* front, TIntermTyped*& arguments); void addInputArgumentConversions(const TFunction&, TIntermTyped*&); + void expandArguments(const TSourceLoc&, const TFunction&, TIntermTyped*&); TIntermTyped* addOutputArgumentConversions(const TFunction&, TIntermOperator&); void builtInOpCheck(const TSourceLoc&, const TFunction&, TIntermOperator&); TFunction* makeConstructorCall(const TSourceLoc&, const TType&); @@ -236,13 +237,14 @@ protected: // Array and struct flattening TIntermTyped* flattenAccess(TIntermTyped* base, int member); - bool shouldFlattenUniform(const TType&) const; + TIntermTyped* flattenAccess(int uniqueId, int member, const TType&); + bool shouldFlatten(const TType&) const; bool wasFlattened(const TIntermTyped* node) const; bool wasFlattened(int id) const { return flattenMap.find(id) != flattenMap.end(); } int addFlattenedMember(const TSourceLoc& loc, const TVariable&, const TType&, TFlattenData&, const TString& name, bool track); bool isFinalFlattening(const TType& type) const { return !(type.isStruct() || type.isArray()); } - // Structure splitting (splits interstage builtin types into its own struct) + // Structure splitting (splits interstage built-in types into its own struct) TIntermTyped* splitAccessStruct(const TSourceLoc& loc, TIntermTyped*& base, int& member); void splitAccessArray(const TSourceLoc& loc, TIntermTyped* base, TIntermTyped* index); TType& split(TType& type, TString name, const TType* outerStructType = nullptr); diff --git a/hlsl/hlslParseables.cpp b/hlsl/hlslParseables.cpp index e094aef9d..db67c39df 100755 --- a/hlsl/hlslParseables.cpp +++ b/hlsl/hlslParseables.cpp @@ -502,7 +502,7 @@ void TBuiltInParseablesHlsl::initialize(int /*version*/, EProfile /*profile*/, c static const EShLanguageMask EShLangAll = EShLanguageMask(EShLangCount - 1); // These are the actual stage masks defined in the documentation, in case they are - // needed for furture validation. For now, they are commented out, and set below + // needed for future validation. For now, they are commented out, and set below // to EShLangAll, to allow any intrinsic to be used in any shader, which is legal // if it is not called. //