Add GroupSync() in HLSL.
This commit is contained in:
parent
85eb972259
commit
ae236e7056
31
reference/shaders-hlsl/comp/shared.comp
Normal file
31
reference/shaders-hlsl/comp/shared.comp
Normal file
@ -0,0 +1,31 @@
|
||||
const uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u);
|
||||
|
||||
ByteAddressBuffer _22 : register(u0);
|
||||
RWByteAddressBuffer _44 : register(u1);
|
||||
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
static uint gl_LocalInvocationIndex;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
uint gl_LocalInvocationIndex : SV_GroupIndex;
|
||||
};
|
||||
|
||||
groupshared float sShared[4];
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float idata = asfloat(_22.Load(ident * 4 + 0));
|
||||
sShared[gl_LocalInvocationIndex] = idata;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
_44.Store(ident * 4 + 0, asuint(sShared[(4u - gl_LocalInvocationIndex) - 1u]));
|
||||
}
|
||||
|
||||
[numthreads(4, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
|
||||
comp_main();
|
||||
}
|
27
shaders-hlsl/comp/shared.comp
Normal file
27
shaders-hlsl/comp/shared.comp
Normal file
@ -0,0 +1,27 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 4) in;
|
||||
|
||||
shared float sShared[gl_WorkGroupSize.x];
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
float in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
float out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float idata = in_data[ident];
|
||||
|
||||
sShared[gl_LocalInvocationIndex] = idata;
|
||||
memoryBarrierShared();
|
||||
barrier();
|
||||
|
||||
out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u];
|
||||
}
|
||||
|
@ -4888,6 +4888,14 @@ bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &r
|
||||
return true;
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_block_instructions(const SPIRBlock &block)
|
||||
{
|
||||
current_emitting_block = █
|
||||
for (auto &op : block.ops)
|
||||
emit_instruction(op);
|
||||
current_emitting_block = nullptr;
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
{
|
||||
auto ops = stream(instruction);
|
||||
@ -6262,10 +6270,16 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
if (get_entry_point().model == ExecutionModelGLCompute)
|
||||
{
|
||||
uint32_t mem = get<SPIRConstant>(ops[2]).scalar();
|
||||
|
||||
// We cannot forward any loads beyond the memory barrier.
|
||||
if (mem)
|
||||
flush_all_active_variables();
|
||||
|
||||
if (mem == MemorySemanticsWorkgroupMemoryMask)
|
||||
statement("memoryBarrierShared();");
|
||||
else if (mem)
|
||||
statement("memoryBarrier();");
|
||||
|
||||
}
|
||||
statement("barrier();");
|
||||
break;
|
||||
@ -7338,8 +7352,7 @@ string CompilerGLSL::emit_continue_block(uint32_t continue_block)
|
||||
{
|
||||
propagate_loop_dominators(*block);
|
||||
// Write out all instructions we have in this block.
|
||||
for (auto &op : block->ops)
|
||||
emit_instruction(op);
|
||||
emit_block_instructions(*block);
|
||||
|
||||
// For plain branchless for/while continue blocks.
|
||||
if (block->next_block)
|
||||
@ -7410,8 +7423,7 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
|
||||
// If we're trying to create a true for loop,
|
||||
// we need to make sure that all opcodes before branch statement do not actually emit any code.
|
||||
// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
|
||||
for (auto &op : block.ops)
|
||||
emit_instruction(op);
|
||||
emit_block_instructions(block);
|
||||
|
||||
bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
|
||||
|
||||
@ -7462,8 +7474,7 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
|
||||
// If we're trying to create a true for loop,
|
||||
// we need to make sure that all opcodes before branch statement do not actually emit any code.
|
||||
// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
|
||||
for (auto &op : child.ops)
|
||||
emit_instruction(op);
|
||||
emit_block_instructions(child);
|
||||
|
||||
bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
|
||||
|
||||
@ -7569,8 +7580,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
{
|
||||
statement("do");
|
||||
begin_scope();
|
||||
for (auto &op : block.ops)
|
||||
emit_instruction(op);
|
||||
|
||||
emit_block_instructions(block);
|
||||
}
|
||||
else if (block.merge == SPIRBlock::MergeLoop)
|
||||
{
|
||||
@ -7582,13 +7593,12 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
|
||||
statement("for (;;)");
|
||||
begin_scope();
|
||||
for (auto &op : block.ops)
|
||||
emit_instruction(op);
|
||||
|
||||
emit_block_instructions(block);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto &op : block.ops)
|
||||
emit_instruction(op);
|
||||
emit_block_instructions(block);
|
||||
}
|
||||
|
||||
// If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
|
||||
|
@ -181,7 +181,12 @@ protected:
|
||||
|
||||
// Virtualize methods which need to be overridden by subclass targets like C++ and such.
|
||||
virtual void emit_function_prototype(SPIRFunction &func, uint64_t return_flags);
|
||||
|
||||
// Kinda ugly way to let opcodes peek at their neighbor instructions for trivial peephole scenarios.
|
||||
const SPIRBlock *current_emitting_block = nullptr;
|
||||
|
||||
virtual void emit_instruction(const Instruction &instr);
|
||||
void emit_block_instructions(const SPIRBlock &block);
|
||||
virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
|
||||
uint32_t count);
|
||||
virtual void emit_header();
|
||||
|
@ -2404,6 +2404,16 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
}
|
||||
|
||||
const Instruction *CompilerHLSL::get_next_instruction_in_block(const Instruction &instr)
|
||||
{
|
||||
// FIXME: This is kind of hacky. There should be a cleaner way.
|
||||
uint32_t offset = uint32_t(&instr - current_emitting_block->ops.data());
|
||||
if ((offset + 1) < current_emitting_block->ops.size())
|
||||
return ¤t_emitting_block->ops[offset + 1];
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void CompilerHLSL::emit_instruction(const Instruction &instruction)
|
||||
{
|
||||
auto ops = stream(instruction);
|
||||
@ -2783,6 +2793,52 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
|
||||
break;
|
||||
}
|
||||
|
||||
case OpMemoryBarrier:
|
||||
{
|
||||
uint32_t mem = get<SPIRConstant>(ops[1]).scalar();
|
||||
|
||||
// If the next instruction is OpControlBarrier and it does what we need, this opcode can be a noop.
|
||||
const Instruction *next = get_next_instruction_in_block(instruction);
|
||||
if (next && next->op == OpControlBarrier)
|
||||
{
|
||||
auto *next_ops = stream(*next);
|
||||
uint32_t next_mem = get<SPIRConstant>(next_ops[2]).scalar();
|
||||
next_mem |= MemorySemanticsWorkgroupMemoryMask; // Barrier in HLSL always implies GroupSync.
|
||||
if ((next_mem & mem) == mem)
|
||||
break;
|
||||
}
|
||||
|
||||
// We cannot forward any loads beyond the memory barrier.
|
||||
if (mem)
|
||||
flush_all_active_variables();
|
||||
|
||||
if (mem == MemorySemanticsWorkgroupMemoryMask)
|
||||
statement("GroupMemoryBarrier();");
|
||||
else if (mem)
|
||||
statement("DeviceMemoryBarrier();");
|
||||
break;
|
||||
}
|
||||
|
||||
case OpControlBarrier:
|
||||
{
|
||||
uint32_t mem = get<SPIRConstant>(ops[2]).scalar();
|
||||
|
||||
// We cannot forward any loads beyond the memory barrier.
|
||||
if (mem)
|
||||
flush_all_active_variables();
|
||||
|
||||
if (mem == MemorySemanticsWorkgroupMemoryMask)
|
||||
statement("GroupMemoryBarrierWithGroupSync();");
|
||||
else if (mem)
|
||||
statement("DeviceMemoryBarrierWithGroupSync();");
|
||||
else
|
||||
{
|
||||
// There is no "GroupSync" standalone function.
|
||||
statement("GroupMemoryBarrierWithGroupSync();");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
CompilerGLSL::emit_instruction(instruction);
|
||||
break;
|
||||
|
@ -93,6 +93,7 @@ private:
|
||||
std::string read_access_chain(const SPIRAccessChain &chain);
|
||||
void emit_store(const Instruction &instruction);
|
||||
void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op);
|
||||
const Instruction *get_next_instruction_in_block(const Instruction &instr);
|
||||
|
||||
void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
|
||||
const std::string &qualifier) override;
|
||||
|
Loading…
Reference in New Issue
Block a user