HLSL: opcode specific promotion rules for interlocked ops

PR #577 addresses most but not all of the intrinsic promotion problems.
This PR resolves all known cases in the remainder.

Interlocked ops need special promotion rules because at the time
of function selection, the first argument has not been converted
to a buffer object.  It's just an int or uint, but you don't want
to convert THAT argument, because that implies converting the
buffer object itself.  Rather, you can convert other arguments,
but want to stay in the same "family" of functions.  E.g, if
the first interlocked arg is a uint, use only the uint family,
never the int family, you can convert the other args as you please.

This PR allows making such opcode and arg specific choices by
passing the op and arg to the convertible lambda.  The code in
the new test "hlsl.promote.atomic.frag" would not compile without
this change, but it must compile.

Also, it provides better handling of downconversions (to "worse"
types), which are permitted in HLSL.  The existing method of
selecting upconversions is unchanged, but if that doesn't find
any valid ones, then it will allow downconversions.  In effect
this always uses an upconversion if there is one.
This commit is contained in:
steve-lunarg 2016-12-06 15:50:11 -07:00
parent b56f4ac72c
commit 05f75142d6
7 changed files with 169 additions and 8 deletions

View File

@ -0,0 +1,109 @@
hlsl.promote.atomic.frag
Shader version: 450
gl_FragCoord origin is upper left
0:? Sequence
0:5 Function Definition: main( (temp 4-component vector of float)
0:5 Function Parameters:
0:? Sequence
0:13 move second child to first child (temp int)
0:13 'Orig' (temp int)
0:13 Convert uint to int (temp int)
0:13 imageAtomicAdd (temp uint)
0:13 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
0:13 'Loc' (temp int)
0:13 Convert int to uint (temp uint)
0:13 'Inc' (temp int)
0:15 Sequence
0:15 move second child to first child (temp 4-component vector of float)
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
0:? Constant:
0:? 0.000000
0:? 0.000000
0:? 0.000000
0:? 0.000000
0:15 Branch: Return
0:? Linker Objects
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
0:? 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
Linked fragment stage:
Shader version: 450
gl_FragCoord origin is upper left
0:? Sequence
0:5 Function Definition: main( (temp 4-component vector of float)
0:5 Function Parameters:
0:? Sequence
0:13 move second child to first child (temp int)
0:13 'Orig' (temp int)
0:13 Convert uint to int (temp int)
0:13 imageAtomicAdd (temp uint)
0:13 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
0:13 'Loc' (temp int)
0:13 Convert int to uint (temp uint)
0:13 'Inc' (temp int)
0:15 Sequence
0:15 move second child to first child (temp 4-component vector of float)
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
0:? Constant:
0:? 0.000000
0:? 0.000000
0:? 0.000000
0:? 0.000000
0:15 Branch: Return
0:? Linker Objects
0:? '@entryPointOutput' (layout(location=0 ) out 4-component vector of float)
0:? 's_uintbuff' (layout(r32ui ) uniform uimageBuffer)
// Module Version 10000
// Generated by (magic number): 80001
// Id's are bound by 31
Capability Shader
Capability SampledBuffer
1: ExtInstImport "GLSL.std.450"
MemoryModel Logical GLSL450
EntryPoint Fragment 4 "main" 27
ExecutionMode 4 OriginUpperLeft
Name 4 "main"
Name 8 "Orig"
Name 12 "s_uintbuff"
Name 13 "Loc"
Name 15 "Inc"
Name 27 "@entryPointOutput"
Decorate 12(s_uintbuff) DescriptorSet 0
Decorate 27(@entryPointOutput) Location 0
2: TypeVoid
3: TypeFunction 2
6: TypeInt 32 1
7: TypePointer Function 6(int)
9: TypeInt 32 0
10: TypeImage 9(int) Buffer nonsampled format:R32ui
11: TypePointer UniformConstant 10
12(s_uintbuff): 11(ptr) Variable UniformConstant
18: 9(int) Constant 0
19: TypePointer Image 9(int)
21: 9(int) Constant 1
24: TypeFloat 32
25: TypeVector 24(float) 4
26: TypePointer Output 25(fvec4)
27(@entryPointOutput): 26(ptr) Variable Output
28: 24(float) Constant 0
29: 25(fvec4) ConstantComposite 28 28 28 28
4(main): 2 Function None 3
5: Label
8(Orig): 7(ptr) Variable Function
13(Loc): 7(ptr) Variable Function
15(Inc): 7(ptr) Variable Function
14: 6(int) Load 13(Loc)
16: 6(int) Load 15(Inc)
17: 9(int) Bitcast 16
20: 19(ptr) ImageTexelPointer 12(s_uintbuff) 14 18
22: 9(int) AtomicIAdd 20 21 18 17
23: 6(int) Bitcast 22
Store 8(Orig) 23
Store 27(@entryPointOutput) 29
Return
FunctionEnd

View File

@ -0,0 +1,17 @@
RWBuffer<uint> s_uintbuff; // UINT RWBuffer ...
float4 main() : SV_Target
{
int Loc; // ... with INT variables
int Inc;
int Orig;
// This must select the uint flavor of SPIR-V atomic op, and promote
// the other arguments as required. The output value from the
// imageAtomicAdd AST will be converted to an int for 'Orig'.
InterlockedAdd(s_uintbuff[Loc], Inc, Orig);
return float4(0,0,0,0);
}

View File

@ -304,7 +304,7 @@ TVariable* TParseContextBase::getEditableVariable(const char* name)
const TFunction* TParseContextBase::selectFunction(
const TVector<const TFunction*> candidateList,
const TFunction& call,
std::function<bool(const TType& from, const TType& to)> convertible,
std::function<bool(const TType& from, const TType& to, TOperator op, int arg)> convertible,
std::function<bool(const TType& from, const TType& to1, const TType& to2)> better,
/* output */ bool& tie)
{
@ -356,13 +356,13 @@ const TFunction* TParseContextBase::selectFunction(
bool viable = true;
for (int param = 0; param < candidate.getParamCount(); ++param) {
if (candidate[param].type->getQualifier().isParamInput()) {
if (! convertible(*call[param].type, *candidate[param].type)) {
if (! convertible(*call[param].type, *candidate[param].type, candidate.getBuiltInOp(), param)) {
viable = false;
break;
}
}
if (candidate[param].type->getQualifier().isParamOutput()) {
if (! convertible(*candidate[param].type, *call[param].type)) {
if (! convertible(*candidate[param].type, *call[param].type, candidate.getBuiltInOp(), param)) {
viable = false;
break;
}

View File

@ -4875,7 +4875,7 @@ const TFunction* TParseContext::findFunction400(const TSourceLoc& loc, const TFu
symbolTable.findFunctionNameList(call.getMangledName(), candidateList, builtIn);
// can 'from' convert to 'to'?
const auto convertible = [this](const TType& from, const TType& to) -> bool {
const auto convertible = [this](const TType& from, const TType& to, TOperator, int) -> bool {
if (from == to)
return true;
if (from.isArray() || to.isArray() || ! from.sameElementShape(to))

View File

@ -167,7 +167,7 @@ protected:
// see implementation for detail
const TFunction* selectFunction(const TVector<const TFunction*>, const TFunction&,
std::function<bool(const TType&, const TType&)>,
std::function<bool(const TType&, const TType&, TOperator, int arg)>,
std::function<bool(const TType&, const TType&, const TType&)>,
/* output */ bool& tie);

View File

@ -163,6 +163,7 @@ INSTANTIATE_TEST_CASE_P(
{"hlsl.partialInit.frag", "PixelShaderFunction"},
{"hlsl.pp.line.frag", "main"},
{"hlsl.precise.frag", "main"},
{"hlsl.promote.atomic.frag", "main"},
{"hlsl.promote.binary.frag", "main"},
{"hlsl.promote.vec1.frag", "main"},
{"hlsl.promotions.frag", "main"},

View File

@ -4394,8 +4394,10 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
return candidateList[0];
}
bool allowOnlyUpConversions = true;
// can 'from' convert to 'to'?
const auto convertible = [this](const TType& from, const TType& to) -> bool {
const auto convertible = [&](const TType& from, const TType& to, TOperator op, int arg) -> bool {
if (from == to)
return true;
@ -4404,9 +4406,33 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
from.isStruct() || to.isStruct())
return false;
switch (op) {
case EOpInterlockedAdd:
case EOpInterlockedAnd:
case EOpInterlockedCompareExchange:
case EOpInterlockedCompareStore:
case EOpInterlockedExchange:
case EOpInterlockedMax:
case EOpInterlockedMin:
case EOpInterlockedOr:
case EOpInterlockedXor:
// We do not promote the texture or image type for these ocodes. Normally that would not
// be an issue because it's a buffer, but we haven't decomposed the opcode yet, and at this
// stage it's merely e.g, a basic integer type.
//
// Instead, we want to promote other arguments, but stay within the same family. In other
// words, InterlockedAdd(RWBuffer<int>, ...) will always use the int flavor, never the uint flavor,
// but it is allowed to promote its other arguments.
if (arg == 0)
return false;
default:
break;
}
// basic types have to be convertible
if (! intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType(), EOpFunctionCall))
return false;
if (allowOnlyUpConversions)
if (! intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType(), EOpFunctionCall))
return false;
// shapes have to be convertible
if ((from.isScalarOrVec1() && to.isScalarOrVec1()) ||
@ -4472,6 +4498,14 @@ const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, const TFu
// send to the generic selector
const TFunction* bestMatch = selectFunction(candidateList, call, convertible, better, tie);
if (bestMatch == nullptr) {
// If there is nothing selected by allowing only up-conversions (to a larger linearize() value),
// we instead try down-conversions, which are valid in HLSL, but not preferred if there are any
// upconversions possible.
allowOnlyUpConversions = false;
bestMatch = selectFunction(candidateList, call, convertible, better, tie);
}
if (bestMatch == nullptr) {
error(loc, "no matching overloaded function found", call.getName().c_str(), "");
return nullptr;