HLSL: phase 4 of rwtexture support: add image atomics

This PR will turn Interlocked* intrinsics using rwtexture or rwbuffer
object as the first parameter into the proper OpImageAtomic* operations.
This commit is contained in:
steve-lunarg 2016-10-19 10:15:25 -06:00
parent 5d45eadedc
commit 22322361d6
4 changed files with 5574 additions and 26 deletions

File diff suppressed because it is too large Load Diff

244
Test/hlsl.rw.atomics.frag Normal file
View File

@ -0,0 +1,244 @@
SamplerState g_sSamp;
RWTexture1D <float> g_tTex1df1;
RWTexture1D <int> g_tTex1di1;
RWTexture1D <uint> g_tTex1du1;
RWTexture2D <float> g_tTex2df1;
RWTexture2D <int> g_tTex2di1;
RWTexture2D <uint> g_tTex2du1;
RWTexture3D <float> g_tTex3df1;
RWTexture3D <int> g_tTex3di1;
RWTexture3D <uint> g_tTex3du1;
RWTexture1DArray <float> g_tTex1df1a;
RWTexture1DArray <int> g_tTex1di1a;
RWTexture1DArray <uint> g_tTex1du1a;
RWTexture2DArray <float> g_tTex2df1a;
RWTexture2DArray <int> g_tTex2di1a;
RWTexture2DArray <uint> g_tTex2du1a;
RWBuffer <float> g_tBuffF;
RWBuffer <int> g_tBuffI;
RWBuffer <uint> g_tBuffU;
struct PS_OUTPUT
{
float4 Color : SV_Target0;
};
uniform uint u1;
uniform uint2 u2;
uniform uint3 u3;
uniform uint u1b;
uniform uint u1c;
uniform int i1;
uniform int2 i2;
uniform int3 i3;
uniform int i1b;
uniform int i1c;
PS_OUTPUT main()
{
uint out_u1;
int out_i1;
// 1D int
InterlockedAdd(g_tTex1di1[i1], i1b);
InterlockedAdd(g_tTex1di1[i1], i1, out_i1);
InterlockedAnd(g_tTex1di1[i1], i1b);
InterlockedAnd(g_tTex1di1[i1], i1, out_i1);
InterlockedCompareExchange(g_tTex1di1[i1], i1b, i1c, out_i1);
InterlockedExchange(g_tTex1di1[i1], i1, out_i1);
InterlockedMax(g_tTex1di1[i1], i1b);
InterlockedMax(g_tTex1di1[i1], i1, out_i1);
InterlockedMin(g_tTex1di1[i1], i1b);
InterlockedMin(g_tTex1di1[i1], i1, out_i1);
InterlockedOr(g_tTex1di1[i1], i1b);
InterlockedOr(g_tTex1di1[i1], i1, out_i1);
InterlockedXor(g_tTex1di1[i1], i1b);
InterlockedXor(g_tTex1di1[i1], i1, out_i1);
// 1D uint
InterlockedAdd(g_tTex1du1[u1], u1);
InterlockedAdd(g_tTex1du1[u1], u1, out_u1);
InterlockedAnd(g_tTex1du1[u1], u1);
InterlockedAnd(g_tTex1du1[u1], u1, out_u1);
InterlockedCompareExchange(g_tTex1du1[u1], u1b, u1c, out_u1);
InterlockedExchange(g_tTex1du1[u1], u1, out_u1);
InterlockedMax(g_tTex1du1[u1], u1);
InterlockedMax(g_tTex1du1[u1], u1, out_u1);
InterlockedMin(g_tTex1du1[u1], u1);
InterlockedMin(g_tTex1du1[u1], u1, out_u1);
InterlockedOr(g_tTex1du1[u1], u1);
InterlockedOr(g_tTex1du1[u1], u1, out_u1);
InterlockedXor(g_tTex1du1[u1], u1);
InterlockedXor(g_tTex1du1[u1], u1, out_u1);
// 2D int
InterlockedAdd(g_tTex2di1[i2], i1b);
InterlockedAdd(g_tTex2di1[i2], i1, out_i1);
InterlockedAnd(g_tTex2di1[i2], i1b);
InterlockedAnd(g_tTex2di1[i2], i1, out_i1);
InterlockedCompareExchange(g_tTex2di1[i2], i1b, i1c, out_i1);
InterlockedExchange(g_tTex2di1[i2], i1, out_i1);
InterlockedMax(g_tTex2di1[i2], i1b);
InterlockedMax(g_tTex2di1[i2], i1, out_i1);
InterlockedMin(g_tTex2di1[i2], i1b);
InterlockedMin(g_tTex2di1[i2], i1, out_i1);
InterlockedOr(g_tTex2di1[i2], i1b);
InterlockedOr(g_tTex2di1[i2], i1, out_i1);
InterlockedXor(g_tTex2di1[i2], i1b);
InterlockedXor(g_tTex2di1[i2], i1, out_i1);
// 2D uint
InterlockedAdd(g_tTex2du1[u2], u1);
InterlockedAdd(g_tTex2du1[u2], u1, out_u1);
InterlockedAnd(g_tTex2du1[u2], u1);
InterlockedAnd(g_tTex2du1[u2], u1, out_u1);
InterlockedCompareExchange(g_tTex2du1[u2], u1b, u1c, out_u1);
InterlockedExchange(g_tTex2du1[u2], u1, out_u1);
InterlockedMax(g_tTex2du1[u2], u1);
InterlockedMax(g_tTex2du1[u2], u1, out_u1);
InterlockedMin(g_tTex2du1[u2], u1);
InterlockedMin(g_tTex2du1[u2], u1, out_u1);
InterlockedOr(g_tTex2du1[u2], u1);
InterlockedOr(g_tTex2du1[u2], u1, out_u1);
InterlockedXor(g_tTex2du1[u2], u1);
InterlockedXor(g_tTex2du1[u2], u1, out_u1);
// 3D int
InterlockedAdd(g_tTex3di1[i3], i1b);
InterlockedAdd(g_tTex3di1[i3], i1, out_i1);
InterlockedAnd(g_tTex3di1[i3], i1b);
InterlockedAnd(g_tTex3di1[i3], i1, out_i1);
InterlockedCompareExchange(g_tTex3di1[i3], i1b, i1c, out_i1);
InterlockedExchange(g_tTex3di1[i3], i1, out_i1);
InterlockedMax(g_tTex3di1[i3], i1b);
InterlockedMax(g_tTex3di1[i3], i1, out_i1);
InterlockedMin(g_tTex3di1[i3], i1b);
InterlockedMin(g_tTex3di1[i3], i1, out_i1);
InterlockedOr(g_tTex3di1[i3], i1b);
InterlockedOr(g_tTex3di1[i3], i1, out_i1);
InterlockedXor(g_tTex3di1[i3], i1b);
InterlockedXor(g_tTex3di1[i3], i1, out_i1);
// 3D uint
InterlockedAdd(g_tTex3du1[u3], u1);
InterlockedAdd(g_tTex3du1[u3], u1, out_u1);
InterlockedAnd(g_tTex3du1[u3], u1);
InterlockedAnd(g_tTex3du1[u3], u1, out_u1);
InterlockedCompareExchange(g_tTex3du1[u3], u1b, u1c, out_u1);
InterlockedExchange(g_tTex3du1[u3], u1, out_u1);
InterlockedMax(g_tTex3du1[u3], u1);
InterlockedMax(g_tTex3du1[u3], u1, out_u1);
InterlockedMin(g_tTex3du1[u3], u1);
InterlockedMin(g_tTex3du1[u3], u1, out_u1);
InterlockedOr(g_tTex3du1[u3], u1);
InterlockedOr(g_tTex3du1[u3], u1, out_u1);
InterlockedXor(g_tTex3du1[u3], u1);
InterlockedXor(g_tTex3du1[u3], u1, out_u1);
// 1D array int
InterlockedAdd(g_tTex1di1a[i2], i1b);
InterlockedAdd(g_tTex1di1a[i2], i1, out_i1);
InterlockedAnd(g_tTex1di1a[i2], i1b);
InterlockedAnd(g_tTex1di1a[i2], i1, out_i1);
InterlockedCompareExchange(g_tTex1di1a[i2], i1b, i1c, out_i1);
InterlockedExchange(g_tTex1di1a[i2], i1, out_i1);
InterlockedMax(g_tTex1di1a[i2], i1b);
InterlockedMax(g_tTex1di1a[i2], i1, out_i1);
InterlockedMin(g_tTex1di1a[i2], i1b);
InterlockedMin(g_tTex1di1a[i2], i1, out_i1);
InterlockedOr(g_tTex1di1a[i2], i1b);
InterlockedOr(g_tTex1di1a[i2], i1, out_i1);
InterlockedXor(g_tTex1di1a[i2], i1b);
InterlockedXor(g_tTex1di1a[i2], i1, out_i1);
// 1D array uint
InterlockedAdd(g_tTex1du1a[u2], u1);
InterlockedAdd(g_tTex1du1a[u2], u1, out_u1);
InterlockedAnd(g_tTex1du1a[u2], u1);
InterlockedAnd(g_tTex1du1a[u2], u1, out_u1);
InterlockedCompareExchange(g_tTex1du1a[u2], u1b, u1c, out_u1);
InterlockedExchange(g_tTex1du1a[u2], u1, out_u1);
InterlockedMax(g_tTex1du1a[u2], u1);
InterlockedMax(g_tTex1du1a[u2], u1, out_u1);
InterlockedMin(g_tTex1du1a[u2], u1);
InterlockedMin(g_tTex1du1a[u2], u1, out_u1);
InterlockedOr(g_tTex1du1a[u2], u1);
InterlockedOr(g_tTex1du1a[u2], u1, out_u1);
InterlockedXor(g_tTex1du1a[u2], u1);
InterlockedXor(g_tTex1du1a[u2], u1, out_u1);
// 2D array int
InterlockedAdd(g_tTex1di1a[i2], i1b);
InterlockedAdd(g_tTex1di1a[i2], i1, out_i1);
InterlockedAnd(g_tTex1di1a[i2], i1b);
InterlockedAnd(g_tTex1di1a[i2], i1, out_i1);
InterlockedCompareExchange(g_tTex1di1a[i2], i1b, i1c, out_i1);
InterlockedExchange(g_tTex1di1a[i2], i1, out_i1);
InterlockedMax(g_tTex1di1a[i2], i1b);
InterlockedMax(g_tTex1di1a[i2], i1, out_i1);
InterlockedMin(g_tTex1di1a[i2], i1b);
InterlockedMin(g_tTex1di1a[i2], i1, out_i1);
InterlockedOr(g_tTex1di1a[i2], i1b);
InterlockedOr(g_tTex1di1a[i2], i1, out_i1);
InterlockedXor(g_tTex1di1a[i2], i1b);
InterlockedXor(g_tTex1di1a[i2], i1, out_i1);
// 2D array uint
InterlockedAdd(g_tTex1du1a[u2], u1);
InterlockedAdd(g_tTex1du1a[u2], u1, out_u1);
InterlockedAnd(g_tTex1du1a[u2], u1);
InterlockedAnd(g_tTex1du1a[u2], u1, out_u1);
InterlockedCompareExchange(g_tTex1du1a[u2], u1b, u1c, out_u1);
InterlockedExchange(g_tTex1du1a[u2], u1, out_u1);
InterlockedMax(g_tTex1du1a[u2], u1);
InterlockedMax(g_tTex1du1a[u2], u1, out_u1);
InterlockedMin(g_tTex1du1a[u2], u1);
InterlockedMin(g_tTex1du1a[u2], u1, out_u1);
InterlockedOr(g_tTex1du1a[u2], u1);
InterlockedOr(g_tTex1du1a[u2], u1, out_u1);
InterlockedXor(g_tTex1du1a[u2], u1);
InterlockedXor(g_tTex1du1a[u2], u1, out_u1);
// buffer int
InterlockedAdd(g_tBuffI[i1], i1b);
InterlockedAdd(g_tBuffI[i1], i1, out_i1);
InterlockedAnd(g_tBuffI[i1], i1b);
InterlockedAnd(g_tBuffI[i1], i1, out_i1);
InterlockedCompareExchange(g_tBuffI[i1], i1b, i1c, out_i1);
InterlockedExchange(g_tBuffI[i1], i1, out_i1);
InterlockedMax(g_tBuffI[i1], i1b);
InterlockedMax(g_tBuffI[i1], i1, out_i1);
InterlockedMin(g_tBuffI[i1], i1b);
InterlockedMin(g_tBuffI[i1], i1, out_i1);
InterlockedOr(g_tBuffI[i1], i1b);
InterlockedOr(g_tBuffI[i1], i1, out_i1);
InterlockedXor(g_tBuffI[i1], i1b);
InterlockedXor(g_tBuffI[i1], i1, out_i1);
// buffer uint
InterlockedAdd(g_tBuffU[u1], u1);
InterlockedAdd(g_tBuffU[u1], u1, out_u1);
InterlockedAnd(g_tBuffU[u1], u1);
InterlockedAnd(g_tBuffU[u1], u1, out_u1);
InterlockedCompareExchange(g_tBuffU[u1], u1b, u1c, out_u1);
InterlockedExchange(g_tBuffU[u1], u1, out_u1);
InterlockedMax(g_tBuffU[u1], u1);
InterlockedMax(g_tBuffU[u1], u1, out_u1);
InterlockedMin(g_tBuffU[u1], u1);
InterlockedMin(g_tBuffU[u1], u1, out_u1);
InterlockedOr(g_tBuffU[u1], u1);
InterlockedOr(g_tBuffU[u1], u1, out_u1);
InterlockedXor(g_tBuffU[u1], u1);
InterlockedXor(g_tBuffU[u1], u1, out_u1);
PS_OUTPUT psout;
psout.Color = 1.0;
return psout;
}

View File

@ -150,6 +150,7 @@ INSTANTIATE_TEST_CASE_P(
{"hlsl.pp.line.frag", "main"},
{"hlsl.precise.frag", "main"},
{"hlsl.promotions.frag", "main"},
{"hlsl.rw.atomics.frag", "main"},
{"hlsl.rw.bracket.frag", "main"},
{"hlsl.rw.scalar.bracket.frag", "main"},
{"hlsl.rw.vec2.bracket.frag", "main"},

View File

@ -1397,13 +1397,13 @@ TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op
TOperator HlslParseContext::mapAtomicOp(const TSourceLoc& loc, TOperator op, bool isImage)
{
switch (op) {
case EOpInterlockedAdd: return isImage ? EOpImageAtomicAdd : EOpAtomicAdd;
case EOpInterlockedAnd: return isImage ? EOpImageAtomicAnd : EOpAtomicAnd;
case EOpInterlockedAdd: return isImage ? EOpImageAtomicAdd : EOpAtomicAdd;
case EOpInterlockedAnd: return isImage ? EOpImageAtomicAnd : EOpAtomicAnd;
case EOpInterlockedCompareExchange: return isImage ? EOpImageAtomicCompSwap : EOpAtomicCompSwap;
case EOpInterlockedMax: return isImage ? EOpImageAtomicMax : EOpAtomicMax;
case EOpInterlockedMin: return isImage ? EOpImageAtomicMin : EOpAtomicMin;
case EOpInterlockedOr: return isImage ? EOpImageAtomicOr : EOpAtomicOr;
case EOpInterlockedXor: return isImage ? EOpImageAtomicXor : EOpAtomicXor;
case EOpInterlockedMax: return isImage ? EOpImageAtomicMax : EOpAtomicMax;
case EOpInterlockedMin: return isImage ? EOpImageAtomicMin : EOpAtomicMin;
case EOpInterlockedOr: return isImage ? EOpImageAtomicOr : EOpAtomicOr;
case EOpInterlockedXor: return isImage ? EOpImageAtomicXor : EOpAtomicXor;
case EOpInterlockedExchange: return isImage ? EOpImageAtomicExchange : EOpAtomicExchange;
case EOpInterlockedCompareStore: // TODO: ...
default:
@ -2052,6 +2052,27 @@ void HlslParseContext::decomposeSampleMethods(const TSourceLoc& loc, TIntermType
//
void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& node, TIntermNode* arguments)
{
// Helper to find image data for image atomics:
// OpImageLoad(image[idx])
// We take the image load apart and add its params to the atomic op aggregate node
const auto imageAtomicParams = [this, &loc, &node](TIntermAggregate* atomic, TIntermTyped* load) {
TIntermAggregate* loadOp = load->getAsAggregate();
if (loadOp == nullptr) {
error(loc, "unknown image type in atomic operation", "", "");
node = nullptr;
return;
}
atomic->getSequence().push_back(loadOp->getSequence()[0]);
atomic->getSequence().push_back(loadOp->getSequence()[1]);
};
// Return true if this is an imageLoad, which we will change to an image atomic.
const auto isImageParam = [](TIntermTyped* image) {
TIntermAggregate* imageAggregate = image->getAsAggregate();
return imageAggregate != nullptr && imageAggregate->getOp() == EOpImageLoad;
};
// HLSL intrinsics can be pass through to native AST opcodes, or decomposed here to existing AST
// opcodes for compatibility with existing software stacks.
static const bool decomposeHlslIntrinsics = true;
@ -2232,27 +2253,43 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*&
case EOpInterlockedXor: // ...
case EOpInterlockedExchange: // always has output arg
{
TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();
TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();
TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped(); // dest
TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped(); // value
TIntermTyped* arg2 = nullptr;
const bool isImage = arg0->getType().isImage();
if (argAggregate->getSequence().size() > 2)
arg2 = argAggregate->getSequence()[2]->getAsTyped();
const bool isImage = isImageParam(arg0);
const TOperator atomicOp = mapAtomicOp(loc, op, isImage);
if (argAggregate->getSequence().size() > 2) {
// optional output param is present. return value goes to arg2.
TIntermTyped* arg2 = argAggregate->getSequence()[2]->getAsTyped();
TIntermAggregate* atomic = new TIntermAggregate(atomicOp);
atomic->getSequence().push_back(arg0);
TIntermAggregate* atomic = new TIntermAggregate(atomicOp);
atomic->setType(arg0->getType());
atomic->getWritableType().getQualifier().makeTemporary();
atomic->setLoc(loc);
if (isImage) {
// orig_value = imageAtomicOp(image, loc, data)
imageAtomicParams(atomic, arg0);
atomic->getSequence().push_back(arg1);
atomic->setLoc(loc);
atomic->setType(arg0->getType());
atomic->getWritableType().getQualifier().makeTemporary();
node = intermediate.addAssign(EOpAssign, arg2, atomic, loc);
if (argAggregate->getSequence().size() > 2) {
node = intermediate.addAssign(EOpAssign, arg2, atomic, loc);
} else {
node = atomic; // no assignment needed, as there was no out var.
}
} else {
// Set the matching operator. Since output is absent, this is all we need to do.
node->getAsAggregate()->setOperator(atomicOp);
// Normal memory variable:
// arg0 = mem, arg1 = data, arg2(optional,out) = orig_value
if (argAggregate->getSequence().size() > 2) {
// optional output param is present. return value goes to arg2.
atomic->getSequence().push_back(arg0);
atomic->getSequence().push_back(arg1);
node = intermediate.addAssign(EOpAssign, arg2, atomic, loc);
} else {
// Set the matching operator. Since output is absent, this is all we need to do.
node->getAsAggregate()->setOperator(atomicOp);
}
}
break;
@ -2265,15 +2302,20 @@ void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*&
TIntermTyped* arg2 = argAggregate->getSequence()[2]->getAsTyped(); // value
TIntermTyped* arg3 = argAggregate->getSequence()[3]->getAsTyped(); // orig
const bool isImage = arg0->getType().isImage();
const bool isImage = isImageParam(arg0);
TIntermAggregate* atomic = new TIntermAggregate(mapAtomicOp(loc, op, isImage));
atomic->getSequence().push_back(arg0);
atomic->getSequence().push_back(arg1);
atomic->getSequence().push_back(arg2);
atomic->setLoc(loc);
atomic->setType(arg2->getType());
atomic->getWritableType().getQualifier().makeTemporary();
if (isImage) {
imageAtomicParams(atomic, arg0);
} else {
atomic->getSequence().push_back(arg0);
}
atomic->getSequence().push_back(arg1);
atomic->getSequence().push_back(arg2);
node = intermediate.addAssign(EOpAssign, arg3, atomic, loc);
break;