Merge branch 'fragment-shader-interlock' of git://github.com/cdavis5e/SPIRV-Cross into interlock

This commit is contained in:
Hans-Kristian Arntzen 2019-09-04 10:35:08 +02:00
commit f577836439
26 changed files with 934 additions and 9 deletions

View File

@ -0,0 +1,24 @@
RWByteAddressBuffer _9 : register(u6, space0);
globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0);
RasterizerOrderedByteAddressBuffer _52 : register(u4, space0);
RWTexture2D<unorm float4> img4 : register(u5, space0);
RasterizerOrderedTexture2D<unorm float4> img : register(u0, space0);
RasterizerOrderedTexture2D<unorm float4> img3 : register(u2, space0);
RasterizerOrderedTexture2D<uint> img2 : register(u1, space0);
void frag_main()
{
_9.Store(0, uint(0));
img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f);
img[int2(0, 0)] = img3[int2(0, 0)];
uint _39;
InterlockedAdd(img2[int2(0, 0)], 1u, _39);
_42.Store(0, uint(int(_42.Load(0)) + 42));
uint _55;
_42.InterlockedAnd(4, _52.Load(0), _55);
}
void main()
{
frag_main();
}

View File

@ -0,0 +1,43 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Buffer3
{
int baz;
};
struct Buffer
{
int foo;
uint bar;
};
struct Buffer2
{
uint quux;
};
struct spvDescriptorSetBuffer0
{
device Buffer3* m_9 [[id(0)]];
texture2d<float, access::write> img4 [[id(1)]];
texture2d<float, access::write> img [[id(2), raster_order_group(0)]];
texture2d<float> img3 [[id(3), raster_order_group(0)]];
volatile device Buffer* m_34 [[id(4), raster_order_group(0)]];
device Buffer2* m_44 [[id(5), raster_order_group(0)]];
};
fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
{
(*spvDescriptorSet0.m_9).baz = 0;
spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0)));
(*spvDescriptorSet0.m_34).foo += 42;
uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed);
}

View File

@ -0,0 +1,33 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Buffer3
{
int baz;
};
struct Buffer
{
int foo;
uint bar;
};
struct Buffer2
{
uint quux;
};
fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d<float, access::write> img4 [[texture(0)]], texture2d<float, access::write> img [[texture(1), raster_order_group(0)]], texture2d<float> img3 [[texture(2), raster_order_group(0)]])
{
_9.baz = 0;
img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
img.write(img3.read(uint2(int2(0))), uint2(int2(0)));
_34.foo += 42;
uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed);
}

View File

@ -0,0 +1,23 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(pixel_interlock_ordered) in;
layout(binding = 2, std430) coherent buffer Buffer
{
int foo;
uint bar;
} _30;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
_30.foo += 42;
uint _41 = atomicAnd(_30.bar, 255u);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,23 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(pixel_interlock_unordered) in;
layout(binding = 2, std430) coherent buffer Buffer
{
int foo;
uint bar;
} _30;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
_30.foo += 42;
uint _41 = atomicAnd(_30.bar, 255u);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,23 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(sample_interlock_ordered) in;
layout(binding = 2, std430) coherent buffer Buffer
{
int foo;
uint bar;
} _30;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
_30.foo += 42;
uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0]));
endInvocationInterlockARB();
}

View File

@ -0,0 +1,23 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(sample_interlock_unordered) in;
layout(binding = 2, std430) coherent buffer Buffer
{
int foo;
uint bar;
} _30;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
_30.foo += 42;
uint _41 = atomicAnd(_30.bar, 255u);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,24 @@
RWByteAddressBuffer _9 : register(u6, space0);
globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0);
RasterizerOrderedByteAddressBuffer _52 : register(u4, space0);
RWTexture2D<unorm float4> img4 : register(u5, space0);
RasterizerOrderedTexture2D<unorm float4> img : register(u0, space0);
RasterizerOrderedTexture2D<unorm float4> img3 : register(u2, space0);
RasterizerOrderedTexture2D<uint> img2 : register(u1, space0);
void frag_main()
{
_9.Store(0, uint(0));
img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f);
img[int2(0, 0)] = img3[int2(0, 0)];
uint _39;
InterlockedAdd(img2[int2(0, 0)], 1u, _39);
_42.Store(0, uint(int(_42.Load(0)) + 42));
uint _55;
_42.InterlockedAnd(4, _52.Load(0), _55);
}
void main()
{
frag_main();
}

View File

@ -0,0 +1,43 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Buffer3
{
int baz;
};
struct Buffer
{
int foo;
uint bar;
};
struct Buffer2
{
uint quux;
};
struct spvDescriptorSetBuffer0
{
device Buffer3* m_9 [[id(0)]];
texture2d<float, access::write> img4 [[id(1)]];
texture2d<float, access::write> img [[id(2), raster_order_group(0)]];
texture2d<float> img3 [[id(3), raster_order_group(0)]];
volatile device Buffer* m_34 [[id(4), raster_order_group(0)]];
device Buffer2* m_44 [[id(5), raster_order_group(0)]];
};
fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
{
(*spvDescriptorSet0.m_9).baz = 0;
spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0)));
(*spvDescriptorSet0.m_34).foo += 42;
uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed);
}

View File

@ -0,0 +1,33 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#include <metal_stdlib>
#include <simd/simd.h>
#include <metal_atomic>
using namespace metal;
struct Buffer3
{
int baz;
};
struct Buffer
{
int foo;
uint bar;
};
struct Buffer2
{
uint quux;
};
fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d<float, access::write> img4 [[texture(0)]], texture2d<float, access::write> img [[texture(1), raster_order_group(0)]], texture2d<float> img3 [[texture(2), raster_order_group(0)]])
{
_9.baz = 0;
img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
img.write(img3.read(uint2(int2(0))), uint2(int2(0)));
_34.foo += 42;
uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed);
}

View File

@ -0,0 +1,23 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(pixel_interlock_ordered) in;
layout(binding = 2, std430) coherent buffer Buffer
{
int foo;
uint bar;
} _30;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
_30.foo += 42;
uint _41 = atomicAnd(_30.bar, 255u);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,23 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(pixel_interlock_unordered) in;
layout(binding = 2, std430) coherent buffer Buffer
{
int foo;
uint bar;
} _30;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
_30.foo += 42;
uint _41 = atomicAnd(_30.bar, 255u);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,23 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(sample_interlock_ordered) in;
layout(binding = 2, std430) coherent buffer Buffer
{
int foo;
uint bar;
} _30;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
_30.foo += 42;
uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0]));
endInvocationInterlockARB();
}

View File

@ -0,0 +1,23 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(sample_interlock_unordered) in;
layout(binding = 2, std430) coherent buffer Buffer
{
int foo;
uint bar;
} _30;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
_30.foo += 42;
uint _41 = atomicAnd(_30.bar, 255u);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,36 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(pixel_interlock_ordered) in;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
layout(binding = 2, rgba8) uniform readonly image2D img3;
layout(binding = 3) coherent buffer Buffer
{
int foo;
uint bar;
};
layout(binding = 4) buffer Buffer2
{
uint quux;
};
layout(binding = 5, rgba8) uniform writeonly image2D img4;
layout(binding = 6) buffer Buffer3
{
int baz;
};
void main()
{
// Deliberately outside the critical section to test usage tracking.
baz = 0;
imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
beginInvocationInterlockARB();
imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
imageAtomicAdd(img2, ivec2(0, 0), 1u);
foo += 42;
atomicAnd(bar, quux);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,36 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(pixel_interlock_ordered) in;
layout(binding = 0, rgba8) uniform writeonly image2D img;
//layout(binding = 1, r32ui) uniform uimage2D img2;
layout(binding = 2, rgba8) uniform readonly image2D img3;
layout(binding = 3) coherent buffer Buffer
{
int foo;
uint bar;
};
layout(binding = 4) buffer Buffer2
{
uint quux;
};
layout(binding = 5, rgba8) uniform writeonly image2D img4;
layout(binding = 6) buffer Buffer3
{
int baz;
};
void main()
{
// Deliberately outside the critical section to test usage tracking.
baz = 0;
imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
beginInvocationInterlockARB();
imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
//imageAtomicAdd(img2, ivec2(0, 0), 1u);
foo += 42;
atomicAnd(bar, quux);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,36 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(pixel_interlock_ordered) in;
layout(binding = 0, rgba8) uniform writeonly image2D img;
//layout(binding = 1, r32ui) uniform uimage2D img2;
layout(binding = 2, rgba8) uniform readonly image2D img3;
layout(binding = 3) coherent buffer Buffer
{
int foo;
uint bar;
};
layout(binding = 4) buffer Buffer2
{
uint quux;
};
layout(binding = 5, rgba8) uniform writeonly image2D img4;
layout(binding = 6) buffer Buffer3
{
int baz;
};
void main()
{
// Deliberately outside the critical section to test usage tracking.
baz = 0;
imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
beginInvocationInterlockARB();
imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
//imageAtomicAdd(img2, ivec2(0, 0), 1u);
foo += 42;
atomicAnd(bar, quux);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,22 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(pixel_interlock_ordered) in;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
layout(binding = 2) coherent buffer Buffer
{
int foo;
uint bar;
};
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
imageAtomicAdd(img2, ivec2(0, 0), 1u);
foo += 42;
atomicAnd(bar, 0xff);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,22 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(pixel_interlock_unordered) in;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
layout(binding = 2) coherent buffer Buffer
{
int foo;
uint bar;
};
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
imageAtomicAdd(img2, ivec2(0, 0), 1u);
foo += 42;
atomicAnd(bar, 0xff);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,22 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(sample_interlock_ordered) in;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
layout(binding = 2) coherent buffer Buffer
{
int foo;
uint bar;
};
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
imageAtomicAdd(img2, ivec2(0, 0), 1u);
foo += 42;
atomicAnd(bar, gl_SampleMaskIn[0]);
endInvocationInterlockARB();
}

View File

@ -0,0 +1,22 @@
#version 450
#extension GL_ARB_fragment_shader_interlock : require
layout(sample_interlock_unordered) in;
layout(binding = 0, rgba8) uniform writeonly image2D img;
layout(binding = 1, r32ui) uniform uimage2D img2;
layout(binding = 2) coherent buffer Buffer
{
int foo;
uint bar;
};
void main()
{
beginInvocationInterlockARB();
imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
imageAtomicAdd(img2, ivec2(0, 0), 1u);
foo += 42;
atomicAnd(bar, 0xff);
endInvocationInterlockARB();
}

View File

@ -4249,6 +4249,221 @@ void Compiler::analyze_non_block_pointer_types()
sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
}
bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
{
if (opcode == OpBeginInvocationInterlockEXT)
{
in_crit_sec = true;
return true;
}
if (opcode == OpEndInvocationInterlockEXT)
{
// End critical section--nothing more to do.
return false;
}
// We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
switch (opcode)
{
case OpLoad:
{
if (length < 3)
return false;
uint32_t ptr = args[2];
auto *var = compiler.maybe_get_backing_variable(ptr);
// We're only concerned with buffer and image memory here.
if (!var)
break;
switch (var->storage)
{
default:
break;
case StorageClassUniformConstant:
{
uint32_t result_type = args[0];
uint32_t id = args[1];
compiler.set<SPIRExpression>(id, "", result_type, true);
compiler.register_read(id, ptr, true);
break;
}
case StorageClassUniform:
// Must have BufferBlock; we only care about SSBOs.
if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
break;
// fallthrough
case StorageClassStorageBuffer:
if (!in_crit_sec)
break;
compiler.interlocked_resources.insert(var->self);
break;
}
break;
}
case OpInBoundsAccessChain:
case OpAccessChain:
case OpPtrAccessChain:
{
if (length < 3)
return false;
uint32_t result_type = args[0];
auto &type = compiler.get<SPIRType>(result_type);
if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
type.storage == StorageClassStorageBuffer)
{
uint32_t id = args[1];
uint32_t ptr = args[2];
compiler.set<SPIRExpression>(id, "", result_type, true);
compiler.register_read(id, ptr, true);
}
break;
}
case OpImageTexelPointer:
{
if (length < 3)
return false;
uint32_t result_type = args[0];
uint32_t id = args[1];
uint32_t ptr = args[2];
auto &e = compiler.set<SPIRExpression>(id, "", result_type, true);
auto *var = compiler.maybe_get_backing_variable(ptr);
if (var)
e.loaded_from = var->self;
}
case OpStore:
case OpImageWrite:
case OpAtomicStore:
{
if (length < 1)
return false;
if (!in_crit_sec)
break;
uint32_t ptr = args[0];
auto *var = compiler.maybe_get_backing_variable(ptr);
if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
var->storage == StorageClassStorageBuffer))
compiler.interlocked_resources.insert(var->self);
break;
}
case OpCopyMemory:
{
if (length < 2)
return false;
if (!in_crit_sec)
break;
uint32_t dst = args[0];
uint32_t src = args[1];
auto *dst_var = compiler.maybe_get_backing_variable(dst);
auto *src_var = compiler.maybe_get_backing_variable(src);
if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
compiler.interlocked_resources.insert(dst_var->self);
if (src_var)
{
if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
break;
if (src_var->storage == StorageClassUniform &&
!compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
break;
compiler.interlocked_resources.insert(src_var->self);
}
break;
}
case OpImageRead:
case OpAtomicLoad:
{
if (length < 3)
return false;
if (!in_crit_sec)
break;
uint32_t ptr = args[2];
auto *var = compiler.maybe_get_backing_variable(ptr);
// We're only concerned with buffer and image memory here.
if (!var)
break;
switch (var->storage)
{
default:
break;
case StorageClassUniform:
// Must have BufferBlock; we only care about SSBOs.
if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
break;
// fallthrough
case StorageClassUniformConstant:
case StorageClassStorageBuffer:
compiler.interlocked_resources.insert(var->self);
break;
}
break;
}
case OpAtomicExchange:
case OpAtomicCompareExchange:
case OpAtomicIIncrement:
case OpAtomicIDecrement:
case OpAtomicIAdd:
case OpAtomicISub:
case OpAtomicSMin:
case OpAtomicUMin:
case OpAtomicSMax:
case OpAtomicUMax:
case OpAtomicAnd:
case OpAtomicOr:
case OpAtomicXor:
{
if (length < 3)
return false;
if (!in_crit_sec)
break;
uint32_t ptr = args[2];
auto *var = compiler.maybe_get_backing_variable(ptr);
if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
var->storage == StorageClassStorageBuffer))
compiler.interlocked_resources.insert(var->self);
break;
}
default:
break;
}
return true;
}
void Compiler::analyze_interlocked_resource_usage()
{
InterlockedResourceAccessHandler handler(*this);
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
}
bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
{
if (!type.pointer)

View File

@ -945,6 +945,27 @@ protected:
bool single_function);
bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);
// Finds all resources that are written to from inside the critical section, if present.
// The critical section is delimited by OpBeginInvocationInterlockEXT and
// OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written
// while inside the critical section must be placed in a raster order group.
struct InterlockedResourceAccessHandler : OpcodeHandler
{
InterlockedResourceAccessHandler(Compiler &compiler_)
: compiler(compiler_)
{
}
bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
Compiler &compiler;
bool in_crit_sec = false;
};
void analyze_interlocked_resource_usage();
// The set of all resources written while inside the critical section, if present.
std::unordered_set<uint32_t> interlocked_resources;
void make_constant_null(uint32_t id, uint32_t type);
std::unordered_map<uint32_t, std::string> declared_block_names;

View File

@ -605,6 +605,26 @@ void CompilerGLSL::emit_header()
if (execution.flags.get(ExecutionModePostDepthCoverage))
require_extension_internal("GL_ARB_post_depth_coverage");
// Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
{
if (options.es)
{
if (options.version < 310)
SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
require_extension_internal("GL_NV_fragment_shader_interlock");
}
else
{
if (options.version < 420)
require_extension_internal("GL_ARB_shader_image_load_store");
require_extension_internal("GL_ARB_fragment_shader_interlock");
}
}
for (auto &ext : forced_extensions)
{
if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
@ -784,6 +804,15 @@ void CompilerGLSL::emit_header()
if (execution.flags.get(ExecutionModePostDepthCoverage))
inputs.push_back("post_depth_coverage");
if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
inputs.push_back("pixel_interlock_ordered");
else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
inputs.push_back("pixel_interlock_unordered");
else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
inputs.push_back("sample_interlock_ordered");
else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
inputs.push_back("sample_interlock_unordered");
if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
statement("layout(depth_greater) out float gl_FragDepth;");
else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
@ -10109,6 +10138,32 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
break;
case OpBeginInvocationInterlockEXT:
if (options.es)
{
require_extension_internal("GL_NV_fragment_shader_interlock");
statement("beginInvocationInterlockNV();");
}
else
{
require_extension_internal("GL_ARB_fragment_shader_interlock");
statement("beginInvocationInterlockARB();");
}
break;
case OpEndInvocationInterlockEXT:
if (options.es)
{
require_extension_internal("GL_NV_fragment_shader_interlock");
statement("endInvocationInterlockNV();");
}
else
{
require_extension_internal("GL_ARB_fragment_shader_interlock");
statement("endInvocationInterlockARB();");
}
break;
default:
statement("// unimplemented op ", instruction.op);
break;

View File

@ -203,7 +203,7 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
}
}
string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
{
auto &imagetype = get<SPIRType>(type.image.type);
const char *dim = nullptr;
@ -235,7 +235,12 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
if (type.image.sampled == 1)
return join("Buffer<", type_to_glsl(imagetype), components, ">");
else if (type.image.sampled == 2)
{
if (interlocked_resources.count(id))
return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype),
">");
return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">");
}
else
SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
case DimSubpassData:
@ -248,6 +253,8 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
const char *arrayed = type.image.arrayed ? "Array" : "";
const char *ms = type.image.ms ? "MS" : "";
const char *rw = typed_load ? "RW" : "";
if (typed_load && interlocked_resources.count(id))
rw = "RasterizerOrdered";
return join(rw, "Texture", dim, ms, arrayed, "<",
typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
join(type_to_glsl(imagetype), components),
@ -1848,9 +1855,13 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
Bitset flags = ir.get_buffer_block_flags(var);
bool is_readonly = flags.get(DecorationNonWritable);
bool is_coherent = flags.get(DecorationCoherent);
bool is_interlocked = interlocked_resources.count(var.self) > 0;
const char *type_name = "ByteAddressBuffer ";
if (!is_readonly)
type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer ";
add_resource_name(var.self);
statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ",
to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type),
to_resource_binding(var), ";");
}
else
{
@ -4673,6 +4684,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
case OpIsHelperInvocationEXT:
SPIRV_CROSS_THROW("helperInvocationEXT() is not supported in HLSL.");
case OpBeginInvocationInterlockEXT:
case OpEndInvocationInterlockEXT:
if (hlsl_options.shader_model < 51)
SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1.");
break; // Nothing to do in the body
default:
CompilerGLSL::emit_instruction(instruction);
break;
@ -4850,6 +4867,12 @@ string CompilerHLSL::compile()
validate_shader_model();
update_active_builtins();
analyze_image_and_sampler_usage();
if (get_execution_model() == ExecutionModelFragment &&
(get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
analyze_interlocked_resource_usage();
// Subpass input needs SV_Position.
if (need_subpass_input)

View File

@ -852,6 +852,12 @@ string CompilerMSL::compile()
update_active_builtins();
analyze_image_and_sampler_usage();
analyze_sampled_image_usage();
if (get_execution_model() == ExecutionModelFragment &&
(get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
analyze_interlocked_resource_usage();
preprocess_op_codes();
build_implicit_builtins();
@ -5541,6 +5547,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
emit_op(ops[0], ops[1], "simd_is_helper_thread()", false);
break;
case OpBeginInvocationInterlockEXT:
case OpEndInvocationInterlockEXT:
if (!msl_options.supports_msl_version(2, 0))
SPIRV_CROSS_THROW("Raster order groups require MSL 2.0.");
break; // Nothing to do in the body
default:
CompilerGLSL::emit_instruction(instruction);
break;
@ -7436,8 +7448,15 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
bool is_builtin = is_member_builtin(type, index, &builtin);
if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
return join(" [[id(",
get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")]]");
{
string quals = join(
" [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")");
if (interlocked_resources.count(
get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)))
quals += ", raster_order_group(0)";
quals += "]]";
return quals;
}
// Vertex function inputs
if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput)
@ -8239,7 +8258,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
ep_args += ", ";
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) +
r.name + "_" + convert_to_string(i);
ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]";
ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")";
if (interlocked_resources.count(var_id))
ep_args += ", raster_order_group(0)";
ep_args += "]]";
}
}
else
@ -8248,7 +8270,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
ep_args += ", ";
ep_args +=
get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name;
ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
if (interlocked_resources.count(var_id))
ep_args += ", raster_order_group(0)";
ep_args += "]]";
}
break;
}
@ -8264,7 +8289,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
ep_args += image_type_glsl(type, var_id) + " " + r.name;
if (r.plane > 0)
ep_args += join(plane_name_suffix, r.plane);
ep_args += " [[texture(" + convert_to_string(r.index) + ")]]";
ep_args += " [[texture(" + convert_to_string(r.index) + ")";
if (interlocked_resources.count(var_id))
ep_args += ", raster_order_group(0)";
ep_args += "]]";
break;
default:
if (!ep_args.empty())
@ -8274,7 +8302,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
type_to_glsl(type, var_id) + "& " + r.name;
else
ep_args += type_to_glsl(type, var_id) + " " + r.name;
ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
if (interlocked_resources.count(var_id))
ep_args += ", raster_order_group(0)";
ep_args += "]]";
break;
}
}