Overhaul barrier handling in GLSL.
This commit is contained in:
parent
23f0abf112
commit
9c72aa00c9
@ -16,8 +16,8 @@ shared float sShared[4];
|
||||
void main()
|
||||
{
|
||||
sShared[gl_LocalInvocationIndex] = _22.in_data[gl_GlobalInvocationID.x];
|
||||
memoryBarrier();
|
||||
memoryBarrier();
|
||||
memoryBarrierShared();
|
||||
memoryBarrierShared();
|
||||
barrier();
|
||||
_44.out_data[gl_GlobalInvocationID.x] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
|
||||
}
|
||||
|
@ -18,8 +18,8 @@ void main()
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float idata = _22.in_data[ident];
|
||||
sShared[gl_LocalInvocationIndex] = idata;
|
||||
memoryBarrier();
|
||||
memoryBarrier();
|
||||
memoryBarrierShared();
|
||||
memoryBarrierShared();
|
||||
barrier();
|
||||
_44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];
|
||||
}
|
||||
|
132
spirv_glsl.cpp
132
spirv_glsl.cpp
@ -25,6 +25,16 @@ using namespace spv;
|
||||
using namespace spirv_cross;
|
||||
using namespace std;
|
||||
|
||||
static uint32_t mask_relevant_memory_semantics(uint32_t semantics)
|
||||
{
|
||||
return semantics & (MemorySemanticsAtomicCounterMemoryMask |
|
||||
MemorySemanticsImageMemoryMask |
|
||||
MemorySemanticsWorkgroupMemoryMask |
|
||||
MemorySemanticsUniformMemoryMask |
|
||||
MemorySemanticsCrossWorkgroupMemoryMask |
|
||||
MemorySemanticsSubgroupMemoryMask);
|
||||
}
|
||||
|
||||
static bool packing_is_vec4_padded(BufferPackingStandard packing)
|
||||
{
|
||||
switch (packing)
|
||||
@ -6629,37 +6639,97 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
|
||||
// Compute
|
||||
case OpControlBarrier:
|
||||
{
|
||||
// Ignore execution and memory scope.
|
||||
if (get_entry_point().model == ExecutionModelGLCompute)
|
||||
{
|
||||
uint32_t mem = get<SPIRConstant>(ops[2]).scalar();
|
||||
|
||||
// We cannot forward any loads beyond the memory barrier.
|
||||
if (mem)
|
||||
flush_all_active_variables();
|
||||
|
||||
if (mem == MemorySemanticsWorkgroupMemoryMask)
|
||||
statement("memoryBarrierShared();");
|
||||
else if (mem)
|
||||
statement("memoryBarrier();");
|
||||
}
|
||||
statement("barrier();");
|
||||
break;
|
||||
}
|
||||
|
||||
case OpMemoryBarrier:
|
||||
{
|
||||
uint32_t mem = get<SPIRConstant>(ops[1]).scalar();
|
||||
if (get_entry_point().model == ExecutionModelTessellationControl)
|
||||
{
|
||||
// Control shaders only have barriers, and it implies memory barriers.
|
||||
if (opcode == OpControlBarrier)
|
||||
statement("barrier();");
|
||||
break;
|
||||
}
|
||||
|
||||
// We cannot forward any loads beyond the memory barrier.
|
||||
if (mem)
|
||||
uint32_t memory;
|
||||
uint32_t semantics;
|
||||
|
||||
if (opcode == OpMemoryBarrier)
|
||||
{
|
||||
memory = get<SPIRConstant>(ops[0]).scalar();
|
||||
semantics = get<SPIRConstant>(ops[1]).scalar();
|
||||
}
|
||||
else
|
||||
{
|
||||
memory = get<SPIRConstant>(ops[1]).scalar();
|
||||
semantics = get<SPIRConstant>(ops[2]).scalar();
|
||||
}
|
||||
|
||||
// We only care about these flags, acquire/release and friends are not relevant to GLSL.
|
||||
semantics = mask_relevant_memory_semantics(semantics);
|
||||
|
||||
if (opcode == OpMemoryBarrier)
|
||||
{
|
||||
// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
|
||||
// does what we need, so we avoid redundant barriers.
|
||||
const Instruction *next = get_next_instruction_in_block(instruction);
|
||||
if (next && next->op == OpControlBarrier)
|
||||
{
|
||||
auto *next_ops = stream(*next);
|
||||
uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar();
|
||||
uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar();
|
||||
next_semantics = mask_relevant_memory_semantics(next_semantics);
|
||||
|
||||
// If we have the same memory scope, and all memory types are covered, we're good.
|
||||
if (next_memory == memory && (semantics & next_semantics) == semantics)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// We are synchronizing some memory or syncing execution,
|
||||
// so we cannot forward any loads beyond the memory barrier.
|
||||
if (semantics || opcode == OpControlBarrier)
|
||||
flush_all_active_variables();
|
||||
|
||||
if (mem == MemorySemanticsWorkgroupMemoryMask)
|
||||
statement("memoryBarrierShared();");
|
||||
else if (mem)
|
||||
statement("memoryBarrier();");
|
||||
if (memory == ScopeWorkgroup) // Only need to consider memory within a group
|
||||
{
|
||||
if (semantics == MemorySemanticsWorkgroupMemoryMask)
|
||||
statement("memoryBarrierShared();");
|
||||
else if (semantics != 0)
|
||||
statement("groupMemoryBarrier();");
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask |
|
||||
MemorySemanticsUniformMemoryMask |
|
||||
MemorySemanticsImageMemoryMask |
|
||||
MemorySemanticsAtomicCounterMemoryMask;
|
||||
|
||||
if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
|
||||
{
|
||||
// These are not relevant for GLSL, but assume it means memoryBarrier().
|
||||
// memoryBarrier() does everything, so no need to test anything else.
|
||||
statement("memoryBarrier();");
|
||||
}
|
||||
else if ((semantics & all_barriers) == all_barriers)
|
||||
{
|
||||
// Short-hand instead of emitting 4 barriers.
|
||||
statement("memoryBarrier();");
|
||||
}
|
||||
else
|
||||
{
|
||||
// Pick out individual barriers.
|
||||
if (semantics & MemorySemanticsWorkgroupMemoryMask)
|
||||
statement("memoryBarrierShared();");
|
||||
if (semantics & MemorySemanticsUniformMemoryMask)
|
||||
statement("memoryBarrierBuffer();");
|
||||
if (semantics & MemorySemanticsImageMemoryMask)
|
||||
statement("memoryBarrierImage();");
|
||||
if (semantics & MemorySemanticsAtomicCounterMemoryMask)
|
||||
statement("memoryBarrierAtomicCounter();");
|
||||
}
|
||||
}
|
||||
|
||||
if (opcode == OpControlBarrier)
|
||||
statement("barrier();");
|
||||
break;
|
||||
}
|
||||
|
||||
@ -8350,3 +8420,13 @@ void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
|
||||
{
|
||||
// FIXME: This is kind of hacky. There should be a cleaner way.
|
||||
auto offset = uint32_t(&instr - current_emitting_block->ops.data());
|
||||
if ((offset + 1) < current_emitting_block->ops.size())
|
||||
return ¤t_emitting_block->ops[offset + 1];
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -509,6 +509,7 @@ protected:
|
||||
static std::string sanitize_underscores(const std::string &str);
|
||||
|
||||
bool can_use_io_location(spv::StorageClass storage);
|
||||
const Instruction *get_next_instruction_in_block(const Instruction &instr);
|
||||
|
||||
private:
|
||||
void init()
|
||||
|
@ -3017,16 +3017,6 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
}
|
||||
|
||||
const Instruction *CompilerHLSL::get_next_instruction_in_block(const Instruction &instr)
|
||||
{
|
||||
// FIXME: This is kind of hacky. There should be a cleaner way.
|
||||
uint32_t offset = uint32_t(&instr - current_emitting_block->ops.data());
|
||||
if ((offset + 1) < current_emitting_block->ops.size())
|
||||
return ¤t_emitting_block->ops[offset + 1];
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void CompilerHLSL::emit_instruction(const Instruction &instruction)
|
||||
{
|
||||
auto ops = stream(instruction);
|
||||
|
@ -107,7 +107,6 @@ private:
|
||||
void write_access_chain(const SPIRAccessChain &chain, uint32_t value);
|
||||
void emit_store(const Instruction &instruction);
|
||||
void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op);
|
||||
const Instruction *get_next_instruction_in_block(const Instruction &instr);
|
||||
|
||||
void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
|
||||
const std::string &qualifier) override;
|
||||
|
Loading…
Reference in New Issue
Block a user