Rewrite barrier handling in HLSL.

This commit is contained in:
Hans-Kristian Arntzen 2018-01-09 12:41:13 +01:00
parent 27ad8c0922
commit 9c3d4e7c60
7 changed files with 114 additions and 69 deletions

View File

@ -2,21 +2,21 @@ static const uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u);
void comp_main()
{
GroupMemoryBarrier();
AllMemoryBarrier();
DeviceMemoryBarrier();
DeviceMemoryBarrier();
AllMemoryBarrier();
GroupMemoryBarrierWithGroupSync();
AllMemoryBarrier();
GroupMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
GroupMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
DeviceMemoryBarrier();
DeviceMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
DeviceMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
DeviceMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
DeviceMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
DeviceMemoryBarrierWithGroupSync();
DeviceMemoryBarrierWithGroupSync();
GroupMemoryBarrierWithGroupSync();
AllMemoryBarrier();
GroupMemoryBarrierWithGroupSync();
GroupMemoryBarrierWithGroupSync();
}
[numthreads(4, 1, 1)]

View File

@ -16,7 +16,7 @@ groupshared float sShared[4];
void comp_main()
{
sShared[gl_LocalInvocationIndex] = asfloat(_22.Load(gl_GlobalInvocationID.x * 4 + 0));
DeviceMemoryBarrierWithGroupSync();
GroupMemoryBarrierWithGroupSync();
_44.Store(gl_GlobalInvocationID.x * 4 + 0, asuint(sShared[(4u - gl_LocalInvocationIndex) - 1u]));
}

View File

@ -2,12 +2,12 @@ static const uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u);
void barrier_shared()
{
DeviceMemoryBarrier();
GroupMemoryBarrier();
}
void full_barrier()
{
DeviceMemoryBarrier();
AllMemoryBarrier();
}
void image_barrier()
@ -22,41 +22,41 @@ void buffer_barrier()
void group_barrier()
{
DeviceMemoryBarrier();
AllMemoryBarrier();
}
void barrier_shared_exec()
{
DeviceMemoryBarrierWithGroupSync();
GroupMemoryBarrierWithGroupSync();
}
void full_barrier_exec()
{
DeviceMemoryBarrier();
DeviceMemoryBarrierWithGroupSync();
AllMemoryBarrier();
GroupMemoryBarrierWithGroupSync();
}
void image_barrier_exec()
{
DeviceMemoryBarrier();
DeviceMemoryBarrierWithGroupSync();
GroupMemoryBarrierWithGroupSync();
}
void buffer_barrier_exec()
{
DeviceMemoryBarrier();
DeviceMemoryBarrierWithGroupSync();
GroupMemoryBarrierWithGroupSync();
}
void group_barrier_exec()
{
DeviceMemoryBarrier();
DeviceMemoryBarrierWithGroupSync();
AllMemoryBarrier();
GroupMemoryBarrierWithGroupSync();
}
void exec_barrier()
{
DeviceMemoryBarrierWithGroupSync();
GroupMemoryBarrierWithGroupSync();
}
void comp_main()

View File

@ -18,7 +18,7 @@ void comp_main()
uint ident = gl_GlobalInvocationID.x;
float idata = asfloat(_22.Load(ident * 4 + 0));
sShared[gl_LocalInvocationIndex] = idata;
DeviceMemoryBarrierWithGroupSync();
GroupMemoryBarrierWithGroupSync();
_44.Store(ident * 4 + 0, asuint(sShared[(4u - gl_LocalInvocationIndex) - 1u]));
}

View File

@ -25,16 +25,6 @@ using namespace spv;
using namespace spirv_cross;
using namespace std;
static uint32_t mask_relevant_memory_semantics(uint32_t semantics)
{
return semantics & (MemorySemanticsAtomicCounterMemoryMask |
MemorySemanticsImageMemoryMask |
MemorySemanticsWorkgroupMemoryMask |
MemorySemanticsUniformMemoryMask |
MemorySemanticsCrossWorkgroupMemoryMask |
MemorySemanticsSubgroupMemoryMask);
}
static bool packing_is_vec4_padded(BufferPackingStandard packing)
{
switch (packing)
@ -8449,3 +8439,14 @@ const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction
else
return nullptr;
}
uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
{
return semantics & (MemorySemanticsAtomicCounterMemoryMask |
MemorySemanticsImageMemoryMask |
MemorySemanticsWorkgroupMemoryMask |
MemorySemanticsUniformMemoryMask |
MemorySemanticsCrossWorkgroupMemoryMask |
MemorySemanticsSubgroupMemoryMask);
}

View File

@ -510,6 +510,7 @@ protected:
bool can_use_io_location(spv::StorageClass storage);
const Instruction *get_next_instruction_in_block(const Instruction &instr);
static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
private:
void init()

View File

@ -3421,48 +3421,91 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
break;
}
case OpControlBarrier:
case OpMemoryBarrier:
{
uint32_t mem = get<SPIRConstant>(ops[1]).scalar();
uint32_t memory;
uint32_t semantics;
// If the next instruction is OpControlBarrier and it does what we need, this opcode can be a noop.
const Instruction *next = get_next_instruction_in_block(instruction);
if (next && next->op == OpControlBarrier)
if (opcode == OpMemoryBarrier)
{
auto *next_ops = stream(*next);
uint32_t next_mem = get<SPIRConstant>(next_ops[2]).scalar();
next_mem |= MemorySemanticsWorkgroupMemoryMask; // Barrier in HLSL always implies GroupSync.
if ((next_mem & mem) == mem)
break;
memory = get<SPIRConstant>(ops[0]).scalar();
semantics = get<SPIRConstant>(ops[1]).scalar();
}
// We cannot forward any loads beyond the memory barrier.
if (mem)
flush_all_active_variables();
if (mem == MemorySemanticsWorkgroupMemoryMask)
statement("GroupMemoryBarrier();");
else if (mem)
statement("DeviceMemoryBarrier();");
break;
}
case OpControlBarrier:
{
uint32_t mem = get<SPIRConstant>(ops[2]).scalar();
// We cannot forward any loads beyond the memory barrier.
if (mem)
flush_all_active_variables();
if (mem == MemorySemanticsWorkgroupMemoryMask)
statement("GroupMemoryBarrierWithGroupSync();");
else if (mem)
statement("DeviceMemoryBarrierWithGroupSync();");
else
{
// There is no "GroupSync" standalone function.
statement("GroupMemoryBarrierWithGroupSync();");
memory = get<SPIRConstant>(ops[1]).scalar();
semantics = get<SPIRConstant>(ops[2]).scalar();
}
// We only care about these flags, acquire/release and friends are not relevant to GLSL.
semantics = mask_relevant_memory_semantics(semantics);
if (opcode == OpMemoryBarrier)
{
// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
// does what we need, so we avoid redundant barriers.
const Instruction *next = get_next_instruction_in_block(instruction);
if (next && next->op == OpControlBarrier)
{
auto *next_ops = stream(*next);
uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar();
uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar();
next_semantics = mask_relevant_memory_semantics(next_semantics);
// There is no "just execution barrier" in HLSL.
// If there are no memory semantics for next instruction, we will imply group shared memory is synced.
if (next_semantics == 0)
next_semantics = MemorySemanticsWorkgroupMemoryMask;
bool memory_scope_covered = false;
if (next_memory == memory)
memory_scope_covered = true;
else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
{
// If we only care about workgroup memory, either Device or Workgroup scope is fine,
// scope does not have to match.
if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
(memory == ScopeDevice || memory == ScopeWorkgroup))
{
memory_scope_covered = true;
}
}
else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
{
// The control barrier has device scope, but the memory barrier just has workgroup scope.
memory_scope_covered = true;
}
// If we have the same memory scope, and all memory types are covered, we're good.
if (memory_scope_covered && (semantics & next_semantics) == semantics)
break;
}
}
// We are synchronizing some memory or syncing execution,
// so we cannot forward any loads beyond the memory barrier.
if (semantics || opcode == OpControlBarrier)
flush_all_active_variables();
if (opcode == OpControlBarrier)
{
// We cannot emit just execution barrier, for no memory semantics pick the cheapest option.
if (semantics == MemorySemanticsWorkgroupMemoryMask || semantics == 0)
statement("GroupMemoryBarrierWithGroupSync();");
else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0)
statement("DeviceMemoryBarrierWithGroupSync();");
else
statement("AllMemoryBarrierWithGroupSync();");
}
else
{
if (semantics == MemorySemanticsWorkgroupMemoryMask)
statement("GroupMemoryBarrier();");
else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0)
statement("DeviceMemoryBarrier();");
else
statement("AllMemoryBarrier();");
}
break;
}