Merge pull request #311 from KhronosGroup/hlsl-image-load-store
HLSL image load/store, atomics, shared memory, barriers
This commit is contained in:
commit
b91ddf5acf
90
reference/shaders-hlsl/comp/atomic.comp
Normal file
90
reference/shaders-hlsl/comp/atomic.comp
Normal file
@ -0,0 +1,90 @@
|
||||
RWByteAddressBuffer ssbo : register(u2);
|
||||
RWTexture2D<uint> uImage : register(u0);
|
||||
RWTexture2D<int> iImage : register(u1);
|
||||
|
||||
groupshared int int_atomic;
|
||||
groupshared uint uint_atomic;
|
||||
groupshared int int_atomic_array[1];
|
||||
groupshared uint uint_atomic_array[1];
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint _19;
|
||||
InterlockedAdd(uImage[int2(1, 5)], 1u, _19);
|
||||
uint _27;
|
||||
InterlockedAdd(uImage[int2(1, 5)], 1u, _27);
|
||||
int _28 = int(_27);
|
||||
iImage[int2(1, 6)] = int4(_28, _28, _28, _28).x;
|
||||
uint _32;
|
||||
InterlockedOr(uImage[int2(1, 5)], 1u, _32);
|
||||
uint _34;
|
||||
InterlockedXor(uImage[int2(1, 5)], 1u, _34);
|
||||
uint _36;
|
||||
InterlockedAnd(uImage[int2(1, 5)], 1u, _36);
|
||||
uint _38;
|
||||
InterlockedMin(uImage[int2(1, 5)], 1u, _38);
|
||||
uint _40;
|
||||
InterlockedMax(uImage[int2(1, 5)], 1u, _40);
|
||||
uint _44;
|
||||
InterlockedCompareExchange(uImage[int2(1, 5)], 10u, 2u, _44);
|
||||
int _47;
|
||||
InterlockedAdd(iImage[int2(1, 6)], 1, _47);
|
||||
int _49;
|
||||
InterlockedOr(iImage[int2(1, 6)], 1, _49);
|
||||
int _51;
|
||||
InterlockedXor(iImage[int2(1, 6)], 1, _51);
|
||||
int _53;
|
||||
InterlockedAnd(iImage[int2(1, 6)], 1, _53);
|
||||
int _55;
|
||||
InterlockedMin(iImage[int2(1, 6)], 1, _55);
|
||||
int _57;
|
||||
InterlockedMax(iImage[int2(1, 6)], 1, _57);
|
||||
int _61;
|
||||
InterlockedCompareExchange(iImage[int2(1, 5)], 10, 2, _61);
|
||||
uint _68;
|
||||
ssbo.InterlockedAdd(0, 1u, _68);
|
||||
uint _70;
|
||||
ssbo.InterlockedOr(0, 1u, _70);
|
||||
uint _72;
|
||||
ssbo.InterlockedXor(0, 1u, _72);
|
||||
uint _74;
|
||||
ssbo.InterlockedAnd(0, 1u, _74);
|
||||
uint _76;
|
||||
ssbo.InterlockedMin(0, 1u, _76);
|
||||
uint _78;
|
||||
ssbo.InterlockedMax(0, 1u, _78);
|
||||
uint _80;
|
||||
ssbo.InterlockedExchange(0, 1u, _80);
|
||||
uint _82;
|
||||
ssbo.InterlockedCompareExchange(0, 10u, 2u, _82);
|
||||
int _85;
|
||||
ssbo.InterlockedAdd(4, 1, _85);
|
||||
int _87;
|
||||
ssbo.InterlockedOr(4, 1, _87);
|
||||
int _89;
|
||||
ssbo.InterlockedXor(4, 1, _89);
|
||||
int _91;
|
||||
ssbo.InterlockedAnd(4, 1, _91);
|
||||
int _93;
|
||||
ssbo.InterlockedMin(4, 1, _93);
|
||||
int _95;
|
||||
ssbo.InterlockedMax(4, 1, _95);
|
||||
int _97;
|
||||
ssbo.InterlockedExchange(4, 1, _97);
|
||||
int _99;
|
||||
ssbo.InterlockedCompareExchange(4, 10, 2, _99);
|
||||
int _102;
|
||||
InterlockedAdd(int_atomic, 10, _102);
|
||||
uint _105;
|
||||
InterlockedAdd(uint_atomic, 10u, _105);
|
||||
int _110;
|
||||
InterlockedAdd(int_atomic_array[0], 10, _110);
|
||||
uint _115;
|
||||
InterlockedAdd(uint_atomic_array[0], 10u, _115);
|
||||
}
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main()
|
||||
{
|
||||
comp_main();
|
||||
}
|
65
reference/shaders-hlsl/comp/image.comp
Normal file
65
reference/shaders-hlsl/comp/image.comp
Normal file
@ -0,0 +1,65 @@
|
||||
RWTexture2D<float> uImageInF : register(u0);
|
||||
RWTexture2D<float> uImageOutF : register(u1);
|
||||
RWTexture2D<int> uImageInI : register(u2);
|
||||
RWTexture2D<int> uImageOutI : register(u3);
|
||||
RWTexture2D<uint> uImageInU : register(u4);
|
||||
RWTexture2D<uint> uImageOutU : register(u5);
|
||||
RWBuffer<float> uImageInBuffer : register(u6);
|
||||
RWBuffer<float> uImageOutBuffer : register(u7);
|
||||
RWTexture2D<float2> uImageInF2 : register(u0);
|
||||
RWTexture2D<float2> uImageOutF2 : register(u1);
|
||||
RWTexture2D<int2> uImageInI2 : register(u2);
|
||||
RWTexture2D<int2> uImageOutI2 : register(u3);
|
||||
RWTexture2D<uint2> uImageInU2 : register(u4);
|
||||
RWTexture2D<uint2> uImageOutU2 : register(u5);
|
||||
RWBuffer<float2> uImageInBuffer2 : register(u6);
|
||||
RWBuffer<float2> uImageOutBuffer2 : register(u7);
|
||||
RWTexture2D<float4> uImageInF4 : register(u0);
|
||||
RWTexture2D<float4> uImageOutF4 : register(u1);
|
||||
RWTexture2D<int4> uImageInI4 : register(u2);
|
||||
RWTexture2D<int4> uImageOutI4 : register(u3);
|
||||
RWTexture2D<uint4> uImageInU4 : register(u4);
|
||||
RWTexture2D<uint4> uImageOutU4 : register(u5);
|
||||
RWBuffer<float4> uImageInBuffer4 : register(u6);
|
||||
RWBuffer<float4> uImageOutBuffer4 : register(u7);
|
||||
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
};
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
float4 f = float4(uImageInF[int2(gl_GlobalInvocationID.xy)]);
|
||||
uImageOutF[int2(gl_GlobalInvocationID.xy)] = f.x;
|
||||
int4 i = int4(uImageInI[int2(gl_GlobalInvocationID.xy)]);
|
||||
uImageOutI[int2(gl_GlobalInvocationID.xy)] = i.x;
|
||||
uint4 u = uint4(uImageInU[int2(gl_GlobalInvocationID.xy)]);
|
||||
uImageOutU[int2(gl_GlobalInvocationID.xy)] = u.x;
|
||||
float4 b = float4(uImageInBuffer[int(gl_GlobalInvocationID.x)]);
|
||||
uImageOutBuffer[int(gl_GlobalInvocationID.x)] = b.x;
|
||||
float4 f2 = uImageInF2[int2(gl_GlobalInvocationID.xy)].xyyy;
|
||||
uImageOutF2[int2(gl_GlobalInvocationID.xy)] = f2.xy;
|
||||
int4 i2 = uImageInI2[int2(gl_GlobalInvocationID.xy)].xyyy;
|
||||
uImageOutI2[int2(gl_GlobalInvocationID.xy)] = i2.xy;
|
||||
uint4 u2 = uImageInU2[int2(gl_GlobalInvocationID.xy)].xyyy;
|
||||
uImageOutU2[int2(gl_GlobalInvocationID.xy)] = u2.xy;
|
||||
float4 b2 = uImageInBuffer2[int(gl_GlobalInvocationID.x)].xyyy;
|
||||
uImageOutBuffer2[int(gl_GlobalInvocationID.x)] = b2.xy;
|
||||
float4 f4 = uImageInF4[int2(gl_GlobalInvocationID.xy)];
|
||||
uImageOutF4[int2(gl_GlobalInvocationID.xy)] = f4;
|
||||
int4 i4 = uImageInI4[int2(gl_GlobalInvocationID.xy)];
|
||||
uImageOutI4[int2(gl_GlobalInvocationID.xy)] = i4;
|
||||
uint4 u4 = uImageInU4[int2(gl_GlobalInvocationID.xy)];
|
||||
uImageOutU4[int2(gl_GlobalInvocationID.xy)] = u4;
|
||||
float4 b4 = uImageInBuffer4[int(gl_GlobalInvocationID.x)];
|
||||
uImageOutBuffer4[int(gl_GlobalInvocationID.x)] = b4;
|
||||
}
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
comp_main();
|
||||
}
|
31
reference/shaders-hlsl/comp/shared.comp
Normal file
31
reference/shaders-hlsl/comp/shared.comp
Normal file
@ -0,0 +1,31 @@
|
||||
static const uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u);
|
||||
|
||||
ByteAddressBuffer _22 : register(u0);
|
||||
RWByteAddressBuffer _44 : register(u1);
|
||||
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
static uint gl_LocalInvocationIndex;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
uint gl_LocalInvocationIndex : SV_GroupIndex;
|
||||
};
|
||||
|
||||
groupshared float sShared[4];
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float idata = asfloat(_22.Load(ident * 4 + 0));
|
||||
sShared[gl_LocalInvocationIndex] = idata;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
_44.Store(ident * 4 + 0, asuint(sShared[(4u - gl_LocalInvocationIndex) - 1u]));
|
||||
}
|
||||
|
||||
[numthreads(4, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex;
|
||||
comp_main();
|
||||
}
|
21
reference/shaders-hlsl/vert/texture_buffer.vert
Normal file
21
reference/shaders-hlsl/vert/texture_buffer.vert
Normal file
@ -0,0 +1,21 @@
|
||||
Buffer<float4> uSamp : register(t4);
|
||||
RWBuffer<float4> uSampo : register(u5);
|
||||
|
||||
static float4 gl_Position;
|
||||
struct SPIRV_Cross_Output
|
||||
{
|
||||
float4 gl_Position : SV_Position;
|
||||
};
|
||||
|
||||
void vert_main()
|
||||
{
|
||||
gl_Position = uSamp.Load(10) + uSampo[100];
|
||||
}
|
||||
|
||||
SPIRV_Cross_Output main()
|
||||
{
|
||||
vert_main();
|
||||
SPIRV_Cross_Output stage_output;
|
||||
stage_output.gl_Position = gl_Position;
|
||||
return stage_output;
|
||||
}
|
66
shaders-hlsl/comp/atomic.comp
Normal file
66
shaders-hlsl/comp/atomic.comp
Normal file
@ -0,0 +1,66 @@
|
||||
#version 310 es
|
||||
#extension GL_OES_shader_image_atomic : require
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(r32ui, binding = 0) uniform highp uimage2D uImage;
|
||||
layout(r32i, binding = 1) uniform highp iimage2D iImage;
|
||||
layout(binding = 2, std430) buffer SSBO
|
||||
{
|
||||
uint u32;
|
||||
int i32;
|
||||
} ssbo;
|
||||
|
||||
shared int int_atomic;
|
||||
shared uint uint_atomic;
|
||||
shared int int_atomic_array[1];
|
||||
shared uint uint_atomic_array[1];
|
||||
|
||||
void main()
|
||||
{
|
||||
imageAtomicAdd(uImage, ivec2(1, 5), 1u);
|
||||
|
||||
// Test that we do not invalidate OpImage variables which are loaded from UniformConstant
|
||||
// address space.
|
||||
imageStore(iImage, ivec2(1, 6), ivec4(imageAtomicAdd(uImage, ivec2(1, 5), 1u)));
|
||||
|
||||
imageAtomicOr(uImage, ivec2(1, 5), 1u);
|
||||
imageAtomicXor(uImage, ivec2(1, 5), 1u);
|
||||
imageAtomicAnd(uImage, ivec2(1, 5), 1u);
|
||||
imageAtomicMin(uImage, ivec2(1, 5), 1u);
|
||||
imageAtomicMax(uImage, ivec2(1, 5), 1u);
|
||||
//imageAtomicExchange(uImage, ivec2(1, 5), 1u);
|
||||
imageAtomicCompSwap(uImage, ivec2(1, 5), 10u, 2u);
|
||||
|
||||
imageAtomicAdd(iImage, ivec2(1, 6), 1);
|
||||
imageAtomicOr(iImage, ivec2(1, 6), 1);
|
||||
imageAtomicXor(iImage, ivec2(1, 6), 1);
|
||||
imageAtomicAnd(iImage, ivec2(1, 6), 1);
|
||||
imageAtomicMin(iImage, ivec2(1, 6), 1);
|
||||
imageAtomicMax(iImage, ivec2(1, 6), 1);
|
||||
//imageAtomicExchange(iImage, ivec2(1, 5), 1u);
|
||||
imageAtomicCompSwap(iImage, ivec2(1, 5), 10, 2);
|
||||
|
||||
atomicAdd(ssbo.u32, 1u);
|
||||
atomicOr(ssbo.u32, 1u);
|
||||
atomicXor(ssbo.u32, 1u);
|
||||
atomicAnd(ssbo.u32, 1u);
|
||||
atomicMin(ssbo.u32, 1u);
|
||||
atomicMax(ssbo.u32, 1u);
|
||||
atomicExchange(ssbo.u32, 1u);
|
||||
atomicCompSwap(ssbo.u32, 10u, 2u);
|
||||
|
||||
atomicAdd(ssbo.i32, 1);
|
||||
atomicOr(ssbo.i32, 1);
|
||||
atomicXor(ssbo.i32, 1);
|
||||
atomicAnd(ssbo.i32, 1);
|
||||
atomicMin(ssbo.i32, 1);
|
||||
atomicMax(ssbo.i32, 1);
|
||||
atomicExchange(ssbo.i32, 1);
|
||||
atomicCompSwap(ssbo.i32, 10, 2);
|
||||
|
||||
atomicAdd(int_atomic, 10);
|
||||
atomicAdd(uint_atomic, 10u);
|
||||
atomicAdd(int_atomic_array[0], 10);
|
||||
atomicAdd(uint_atomic_array[0], 10u);
|
||||
}
|
||||
|
69
shaders-hlsl/comp/image.comp
Normal file
69
shaders-hlsl/comp/image.comp
Normal file
@ -0,0 +1,69 @@
|
||||
#version 450
|
||||
layout(local_size_x = 1) in;
|
||||
|
||||
layout(r32f, binding = 0) uniform readonly image2D uImageInF;
|
||||
layout(r32f, binding = 1) uniform writeonly image2D uImageOutF;
|
||||
layout(r32i, binding = 2) uniform readonly iimage2D uImageInI;
|
||||
layout(r32i, binding = 3) uniform writeonly iimage2D uImageOutI;
|
||||
layout(r32ui, binding = 4) uniform readonly uimage2D uImageInU;
|
||||
layout(r32ui, binding = 5) uniform writeonly uimage2D uImageOutU;
|
||||
layout(r32f, binding = 6) uniform readonly imageBuffer uImageInBuffer;
|
||||
layout(r32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer;
|
||||
|
||||
layout(rg32f, binding = 0) uniform readonly image2D uImageInF2;
|
||||
layout(rg32f, binding = 1) uniform writeonly image2D uImageOutF2;
|
||||
layout(rg32i, binding = 2) uniform readonly iimage2D uImageInI2;
|
||||
layout(rg32i, binding = 3) uniform writeonly iimage2D uImageOutI2;
|
||||
layout(rg32ui, binding = 4) uniform readonly uimage2D uImageInU2;
|
||||
layout(rg32ui, binding = 5) uniform writeonly uimage2D uImageOutU2;
|
||||
layout(rg32f, binding = 6) uniform readonly imageBuffer uImageInBuffer2;
|
||||
layout(rg32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer2;
|
||||
|
||||
layout(rgba32f, binding = 0) uniform readonly image2D uImageInF4;
|
||||
layout(rgba32f, binding = 1) uniform writeonly image2D uImageOutF4;
|
||||
layout(rgba32i, binding = 2) uniform readonly iimage2D uImageInI4;
|
||||
layout(rgba32i, binding = 3) uniform writeonly iimage2D uImageOutI4;
|
||||
layout(rgba32ui, binding = 4) uniform readonly uimage2D uImageInU4;
|
||||
layout(rgba32ui, binding = 5) uniform writeonly uimage2D uImageOutU4;
|
||||
layout(rgba32f, binding = 6) uniform readonly imageBuffer uImageInBuffer4;
|
||||
layout(rgba32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer4;
|
||||
|
||||
void main()
|
||||
{
|
||||
vec4 f = imageLoad(uImageInF, ivec2(gl_GlobalInvocationID.xy));
|
||||
imageStore(uImageOutF, ivec2(gl_GlobalInvocationID.xy), f);
|
||||
|
||||
ivec4 i = imageLoad(uImageInI, ivec2(gl_GlobalInvocationID.xy));
|
||||
imageStore(uImageOutI, ivec2(gl_GlobalInvocationID.xy), i);
|
||||
|
||||
uvec4 u = imageLoad(uImageInU, ivec2(gl_GlobalInvocationID.xy));
|
||||
imageStore(uImageOutU, ivec2(gl_GlobalInvocationID.xy), u);
|
||||
|
||||
vec4 b = imageLoad(uImageInBuffer, int(gl_GlobalInvocationID.x));
|
||||
imageStore(uImageOutBuffer, int(gl_GlobalInvocationID.x), b);
|
||||
|
||||
vec4 f2 = imageLoad(uImageInF2, ivec2(gl_GlobalInvocationID.xy));
|
||||
imageStore(uImageOutF2, ivec2(gl_GlobalInvocationID.xy), f2);
|
||||
|
||||
ivec4 i2 = imageLoad(uImageInI2, ivec2(gl_GlobalInvocationID.xy));
|
||||
imageStore(uImageOutI2, ivec2(gl_GlobalInvocationID.xy), i2);
|
||||
|
||||
uvec4 u2 = imageLoad(uImageInU2, ivec2(gl_GlobalInvocationID.xy));
|
||||
imageStore(uImageOutU2, ivec2(gl_GlobalInvocationID.xy), u2);
|
||||
|
||||
vec4 b2 = imageLoad(uImageInBuffer2, int(gl_GlobalInvocationID.x));
|
||||
imageStore(uImageOutBuffer2, int(gl_GlobalInvocationID.x), b2);
|
||||
|
||||
vec4 f4 = imageLoad(uImageInF4, ivec2(gl_GlobalInvocationID.xy));
|
||||
imageStore(uImageOutF4, ivec2(gl_GlobalInvocationID.xy), f4);
|
||||
|
||||
ivec4 i4 = imageLoad(uImageInI4, ivec2(gl_GlobalInvocationID.xy));
|
||||
imageStore(uImageOutI4, ivec2(gl_GlobalInvocationID.xy), i4);
|
||||
|
||||
uvec4 u4 = imageLoad(uImageInU4, ivec2(gl_GlobalInvocationID.xy));
|
||||
imageStore(uImageOutU4, ivec2(gl_GlobalInvocationID.xy), u4);
|
||||
|
||||
vec4 b4 = imageLoad(uImageInBuffer4, int(gl_GlobalInvocationID.x));
|
||||
imageStore(uImageOutBuffer4, int(gl_GlobalInvocationID.x), b4);
|
||||
}
|
||||
|
27
shaders-hlsl/comp/shared.comp
Normal file
27
shaders-hlsl/comp/shared.comp
Normal file
@ -0,0 +1,27 @@
|
||||
#version 310 es
|
||||
layout(local_size_x = 4) in;
|
||||
|
||||
shared float sShared[gl_WorkGroupSize.x];
|
||||
|
||||
layout(std430, binding = 0) readonly buffer SSBO
|
||||
{
|
||||
float in_data[];
|
||||
};
|
||||
|
||||
layout(std430, binding = 1) writeonly buffer SSBO2
|
||||
{
|
||||
float out_data[];
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
uint ident = gl_GlobalInvocationID.x;
|
||||
float idata = in_data[ident];
|
||||
|
||||
sShared[gl_LocalInvocationIndex] = idata;
|
||||
memoryBarrierShared();
|
||||
barrier();
|
||||
|
||||
out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u];
|
||||
}
|
||||
|
9
shaders-hlsl/vert/texture_buffer.vert
Normal file
9
shaders-hlsl/vert/texture_buffer.vert
Normal file
@ -0,0 +1,9 @@
|
||||
#version 450
|
||||
|
||||
layout(binding = 4) uniform samplerBuffer uSamp;
|
||||
layout(rgba32f, binding = 5) uniform readonly imageBuffer uSampo;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = texelFetch(uSamp, 10) + imageLoad(uSampo, 100);
|
||||
}
|
@ -1704,17 +1704,15 @@ void CompilerGLSL::replace_fragment_outputs()
|
||||
}
|
||||
}
|
||||
|
||||
string CompilerGLSL::remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr)
|
||||
string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
|
||||
{
|
||||
auto &out_type = get<SPIRType>(result_type);
|
||||
|
||||
if (out_type.vecsize == input_components)
|
||||
return to_expression(expr);
|
||||
return expr;
|
||||
else if (input_components == 1)
|
||||
return join(type_to_glsl(out_type), "(", to_expression(expr), ")");
|
||||
return join(type_to_glsl(out_type), "(", expr, ")");
|
||||
else
|
||||
{
|
||||
auto e = to_enclosed_expression(expr) + ".";
|
||||
auto e = enclose_expression(expr) + ".";
|
||||
// Just clamp the swizzle index if we have more outputs than inputs.
|
||||
for (uint32_t c = 0; c < out_type.vecsize; c++)
|
||||
e += index_to_swizzle(min(c, input_components - 1));
|
||||
@ -4888,6 +4886,14 @@ bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &r
|
||||
return true;
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_block_instructions(const SPIRBlock &block)
|
||||
{
|
||||
current_emitting_block = █
|
||||
for (auto &op : block.ops)
|
||||
emit_instruction(op);
|
||||
current_emitting_block = nullptr;
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
{
|
||||
auto ops = stream(instruction);
|
||||
@ -6095,14 +6101,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
// since ImageRead always returns 4-component vectors and the backing type is opaque.
|
||||
if (!var->remapped_components)
|
||||
SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
|
||||
imgexpr = remap_swizzle(result_type, var->remapped_components, ops[2]);
|
||||
imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
|
||||
}
|
||||
else
|
||||
{
|
||||
// PLS input could have different number of components than what the SPIR expects, swizzle to
|
||||
// the appropriate vector size.
|
||||
uint32_t components = pls_format_to_components(itr->format);
|
||||
imgexpr = remap_swizzle(result_type, components, ops[2]);
|
||||
imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
|
||||
}
|
||||
pure = true;
|
||||
}
|
||||
@ -6143,6 +6149,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
|
||||
}
|
||||
}
|
||||
imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
|
||||
pure = true;
|
||||
}
|
||||
else
|
||||
@ -6160,6 +6167,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
}
|
||||
else
|
||||
imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", to_expression(ops[3]), ")");
|
||||
|
||||
imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
|
||||
pure = false;
|
||||
}
|
||||
|
||||
@ -6208,6 +6217,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
}
|
||||
|
||||
auto &type = expression_type(ops[0]);
|
||||
auto &value_type = expression_type(ops[2]);
|
||||
auto store_type = value_type;
|
||||
store_type.vecsize = 4;
|
||||
|
||||
if (type.image.ms)
|
||||
{
|
||||
uint32_t operands = ops[3];
|
||||
@ -6215,11 +6228,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
|
||||
uint32_t samples = ops[4];
|
||||
statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(samples),
|
||||
", ", to_expression(ops[2]), ");");
|
||||
", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
|
||||
}
|
||||
else
|
||||
statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]),
|
||||
");");
|
||||
statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ",
|
||||
remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
|
||||
|
||||
if (var && variable_storage_is_aliased(*var))
|
||||
flush_all_aliased_variables();
|
||||
@ -6262,6 +6275,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
if (get_entry_point().model == ExecutionModelGLCompute)
|
||||
{
|
||||
uint32_t mem = get<SPIRConstant>(ops[2]).scalar();
|
||||
|
||||
// We cannot forward any loads beyond the memory barrier.
|
||||
if (mem)
|
||||
flush_all_active_variables();
|
||||
|
||||
if (mem == MemorySemanticsWorkgroupMemoryMask)
|
||||
statement("memoryBarrierShared();");
|
||||
else if (mem)
|
||||
@ -7338,8 +7356,7 @@ string CompilerGLSL::emit_continue_block(uint32_t continue_block)
|
||||
{
|
||||
propagate_loop_dominators(*block);
|
||||
// Write out all instructions we have in this block.
|
||||
for (auto &op : block->ops)
|
||||
emit_instruction(op);
|
||||
emit_block_instructions(*block);
|
||||
|
||||
// For plain branchless for/while continue blocks.
|
||||
if (block->next_block)
|
||||
@ -7410,8 +7427,7 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
|
||||
// If we're trying to create a true for loop,
|
||||
// we need to make sure that all opcodes before branch statement do not actually emit any code.
|
||||
// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
|
||||
for (auto &op : block.ops)
|
||||
emit_instruction(op);
|
||||
emit_block_instructions(block);
|
||||
|
||||
bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
|
||||
|
||||
@ -7462,8 +7478,7 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method
|
||||
// If we're trying to create a true for loop,
|
||||
// we need to make sure that all opcodes before branch statement do not actually emit any code.
|
||||
// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
|
||||
for (auto &op : child.ops)
|
||||
emit_instruction(op);
|
||||
emit_block_instructions(child);
|
||||
|
||||
bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
|
||||
|
||||
@ -7569,8 +7584,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
{
|
||||
statement("do");
|
||||
begin_scope();
|
||||
for (auto &op : block.ops)
|
||||
emit_instruction(op);
|
||||
|
||||
emit_block_instructions(block);
|
||||
}
|
||||
else if (block.merge == SPIRBlock::MergeLoop)
|
||||
{
|
||||
@ -7582,13 +7597,12 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
|
||||
|
||||
statement("for (;;)");
|
||||
begin_scope();
|
||||
for (auto &op : block.ops)
|
||||
emit_instruction(op);
|
||||
|
||||
emit_block_instructions(block);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto &op : block.ops)
|
||||
emit_instruction(op);
|
||||
emit_block_instructions(block);
|
||||
}
|
||||
|
||||
// If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
|
||||
|
@ -181,7 +181,12 @@ protected:
|
||||
|
||||
// Virtualize methods which need to be overridden by subclass targets like C++ and such.
|
||||
virtual void emit_function_prototype(SPIRFunction &func, uint64_t return_flags);
|
||||
|
||||
// Kinda ugly way to let opcodes peek at their neighbor instructions for trivial peephole scenarios.
|
||||
const SPIRBlock *current_emitting_block = nullptr;
|
||||
|
||||
virtual void emit_instruction(const Instruction &instr);
|
||||
void emit_block_instructions(const SPIRBlock &block);
|
||||
virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
|
||||
uint32_t count);
|
||||
virtual void emit_header();
|
||||
@ -376,7 +381,7 @@ protected:
|
||||
uint32_t *matrix_stride = nullptr);
|
||||
|
||||
const char *index_to_swizzle(uint32_t index);
|
||||
std::string remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr);
|
||||
std::string remap_swizzle(const SPIRType &result_type, uint32_t input_components, const std::string &expr);
|
||||
std::string declare_temporary(uint32_t type, uint32_t id);
|
||||
void append_global_func_args(const SPIRFunction &func, uint32_t index, std::vector<std::string> &arglist);
|
||||
std::string to_expression(uint32_t id);
|
||||
|
380
spirv_hlsl.cpp
380
spirv_hlsl.cpp
@ -23,6 +23,134 @@ using namespace spv;
|
||||
using namespace spirv_cross;
|
||||
using namespace std;
|
||||
|
||||
static unsigned image_format_to_components(ImageFormat fmt)
|
||||
{
|
||||
switch (fmt)
|
||||
{
|
||||
case ImageFormatR8:
|
||||
case ImageFormatR16:
|
||||
case ImageFormatR8Snorm:
|
||||
case ImageFormatR16Snorm:
|
||||
case ImageFormatR16f:
|
||||
case ImageFormatR32f:
|
||||
case ImageFormatR8i:
|
||||
case ImageFormatR16i:
|
||||
case ImageFormatR32i:
|
||||
case ImageFormatR8ui:
|
||||
case ImageFormatR16ui:
|
||||
case ImageFormatR32ui:
|
||||
return 1;
|
||||
|
||||
case ImageFormatRg8:
|
||||
case ImageFormatRg16:
|
||||
case ImageFormatRg8Snorm:
|
||||
case ImageFormatRg16Snorm:
|
||||
case ImageFormatRg16f:
|
||||
case ImageFormatRg32f:
|
||||
case ImageFormatRg8i:
|
||||
case ImageFormatRg16i:
|
||||
case ImageFormatRg32i:
|
||||
case ImageFormatRg8ui:
|
||||
case ImageFormatRg16ui:
|
||||
case ImageFormatRg32ui:
|
||||
return 2;
|
||||
|
||||
case ImageFormatR11fG11fB10f:
|
||||
return 3;
|
||||
|
||||
case ImageFormatRgba8:
|
||||
case ImageFormatRgba16:
|
||||
case ImageFormatRgb10A2:
|
||||
case ImageFormatRgba8Snorm:
|
||||
case ImageFormatRgba16Snorm:
|
||||
case ImageFormatRgba16f:
|
||||
case ImageFormatRgba32f:
|
||||
case ImageFormatRgba8i:
|
||||
case ImageFormatRgba16i:
|
||||
case ImageFormatRgba32i:
|
||||
case ImageFormatRgba8ui:
|
||||
case ImageFormatRgba16ui:
|
||||
case ImageFormatRgba32ui:
|
||||
case ImageFormatRgb10a2ui:
|
||||
return 4;
|
||||
|
||||
default:
|
||||
SPIRV_CROSS_THROW("Unrecognized typed image format.");
|
||||
}
|
||||
}
|
||||
|
||||
static string image_format_to_type(ImageFormat fmt)
|
||||
{
|
||||
switch (fmt)
|
||||
{
|
||||
case ImageFormatR8:
|
||||
case ImageFormatR16:
|
||||
return "unorm float";
|
||||
case ImageFormatRg8:
|
||||
case ImageFormatRg16:
|
||||
return "unorm float2";
|
||||
case ImageFormatRgba8:
|
||||
case ImageFormatRgba16:
|
||||
return "unorm float4";
|
||||
case ImageFormatRgb10A2:
|
||||
return "unorm float4";
|
||||
|
||||
case ImageFormatR8Snorm:
|
||||
case ImageFormatR16Snorm:
|
||||
return "snorm float";
|
||||
case ImageFormatRg8Snorm:
|
||||
case ImageFormatRg16Snorm:
|
||||
return "snorm float2";
|
||||
case ImageFormatRgba8Snorm:
|
||||
case ImageFormatRgba16Snorm:
|
||||
return "snorm float4";
|
||||
|
||||
case ImageFormatR16f:
|
||||
case ImageFormatR32f:
|
||||
return "float";
|
||||
case ImageFormatRg16f:
|
||||
case ImageFormatRg32f:
|
||||
return "float2";
|
||||
case ImageFormatRgba16f:
|
||||
case ImageFormatRgba32f:
|
||||
return "float4";
|
||||
|
||||
case ImageFormatR11fG11fB10f:
|
||||
return "float3";
|
||||
|
||||
case ImageFormatR8i:
|
||||
case ImageFormatR16i:
|
||||
case ImageFormatR32i:
|
||||
return "int";
|
||||
case ImageFormatRg8i:
|
||||
case ImageFormatRg16i:
|
||||
case ImageFormatRg32i:
|
||||
return "int2";
|
||||
case ImageFormatRgba8i:
|
||||
case ImageFormatRgba16i:
|
||||
case ImageFormatRgba32i:
|
||||
return "int4";
|
||||
|
||||
case ImageFormatR8ui:
|
||||
case ImageFormatR16ui:
|
||||
case ImageFormatR32ui:
|
||||
return "uint";
|
||||
case ImageFormatRg8ui:
|
||||
case ImageFormatRg16ui:
|
||||
case ImageFormatRg32ui:
|
||||
return "uint2";
|
||||
case ImageFormatRgba8ui:
|
||||
case ImageFormatRgba16ui:
|
||||
case ImageFormatRgba32ui:
|
||||
return "uint4";
|
||||
case ImageFormatRgb10a2ui:
|
||||
return "int4";
|
||||
|
||||
default:
|
||||
SPIRV_CROSS_THROW("Unrecognized typed image format.");
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if an arithmetic operation does not change behavior depending on signedness.
|
||||
static bool opcode_is_sign_invariant(Op opcode)
|
||||
{
|
||||
@ -48,20 +176,26 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type)
|
||||
{
|
||||
auto &imagetype = get<SPIRType>(type.image.type);
|
||||
const char *dim = nullptr;
|
||||
bool typed_load = false;
|
||||
uint32_t components = 4;
|
||||
|
||||
switch (type.image.dim)
|
||||
{
|
||||
case Dim1D:
|
||||
typed_load = type.image.sampled == 2;
|
||||
dim = "1D";
|
||||
break;
|
||||
case Dim2D:
|
||||
typed_load = type.image.sampled == 2;
|
||||
dim = "2D";
|
||||
break;
|
||||
case Dim3D:
|
||||
typed_load = type.image.sampled == 2;
|
||||
dim = "3D";
|
||||
break;
|
||||
case DimCube:
|
||||
if (type.image.sampled == 2)
|
||||
SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL.");
|
||||
dim = "Cube";
|
||||
break;
|
||||
case DimRect:
|
||||
@ -70,10 +204,7 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type)
|
||||
if (type.image.sampled == 1)
|
||||
return join("Buffer<", type_to_glsl(imagetype), components, ">");
|
||||
else if (type.image.sampled == 2)
|
||||
{
|
||||
SPIRV_CROSS_THROW("RWBuffer is not implemented yet for HLSL.");
|
||||
//return join("RWBuffer<", type_to_glsl(imagetype), components, ">");
|
||||
}
|
||||
return join("RWBuffer<", image_format_to_type(type.image.format), ">");
|
||||
else
|
||||
SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
|
||||
case DimSubpassData:
|
||||
@ -84,7 +215,9 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type)
|
||||
}
|
||||
const char *arrayed = type.image.arrayed ? "Array" : "";
|
||||
const char *ms = type.image.ms ? "MS" : "";
|
||||
return join("Texture", dim, ms, arrayed, "<", type_to_glsl(imagetype), components, ">");
|
||||
const char *rw = typed_load ? "RW" : "";
|
||||
return join(rw, "Texture", dim, ms, arrayed, "<",
|
||||
typed_load ? image_format_to_type(type.image.format) : join(type_to_glsl(imagetype), components), ">");
|
||||
}
|
||||
|
||||
string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type)
|
||||
@ -894,7 +1027,19 @@ void CompilerHLSL::emit_resources()
|
||||
if (var.storage != StorageClassOutput)
|
||||
{
|
||||
add_resource_name(var.self);
|
||||
statement("static ", variable_decl(var), ";");
|
||||
|
||||
const char *storage = nullptr;
|
||||
switch (var.storage)
|
||||
{
|
||||
case StorageClassWorkgroup:
|
||||
storage = "groupshared";
|
||||
break;
|
||||
|
||||
default:
|
||||
storage = "static";
|
||||
break;
|
||||
}
|
||||
statement(storage, " ", variable_decl(var), ";");
|
||||
emitted = true;
|
||||
}
|
||||
}
|
||||
@ -1857,10 +2002,16 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
|
||||
switch (type.basetype)
|
||||
{
|
||||
case SPIRType::SampledImage:
|
||||
case SPIRType::Image:
|
||||
space = "t"; // SRV
|
||||
break;
|
||||
|
||||
case SPIRType::Image:
|
||||
if (type.image.sampled == 2)
|
||||
space = "u"; // UAV
|
||||
else
|
||||
space = "t"; // SRV
|
||||
break;
|
||||
|
||||
case SPIRType::Sampler:
|
||||
space = "s";
|
||||
break;
|
||||
@ -2224,6 +2375,101 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
|
||||
}
|
||||
}
|
||||
|
||||
void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
|
||||
{
|
||||
const char *atomic_op = nullptr;
|
||||
auto value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]);
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case OpAtomicISub:
|
||||
atomic_op = "InterlockedAdd";
|
||||
value_expr = join("-", enclose_expression(value_expr));
|
||||
break;
|
||||
|
||||
case OpAtomicSMin:
|
||||
case OpAtomicUMin:
|
||||
atomic_op = "InterlockedMin";
|
||||
break;
|
||||
|
||||
case OpAtomicSMax:
|
||||
case OpAtomicUMax:
|
||||
atomic_op = "InterlockedMax";
|
||||
break;
|
||||
|
||||
case OpAtomicAnd:
|
||||
atomic_op = "InterlockedAnd";
|
||||
break;
|
||||
|
||||
case OpAtomicOr:
|
||||
atomic_op = "InterlockedOr";
|
||||
break;
|
||||
|
||||
case OpAtomicXor:
|
||||
atomic_op = "InterlockedXor";
|
||||
break;
|
||||
|
||||
case OpAtomicIAdd:
|
||||
atomic_op = "InterlockedAdd";
|
||||
break;
|
||||
|
||||
case OpAtomicExchange:
|
||||
atomic_op = "InterlockedExchange";
|
||||
break;
|
||||
|
||||
case OpAtomicCompareExchange:
|
||||
if (length < 8)
|
||||
SPIRV_CROSS_THROW("Not enough data for opcode.");
|
||||
atomic_op = "InterlockedCompareExchange";
|
||||
value_expr = join(to_expression(ops[7]), ", ", value_expr);
|
||||
break;
|
||||
|
||||
default:
|
||||
SPIRV_CROSS_THROW("Unknown atomic opcode.");
|
||||
}
|
||||
|
||||
if (length < 6)
|
||||
SPIRV_CROSS_THROW("Not enough data for opcode.");
|
||||
|
||||
uint32_t result_type = ops[0];
|
||||
uint32_t id = ops[1];
|
||||
forced_temporaries.insert(ops[1]);
|
||||
|
||||
auto &type = get<SPIRType>(result_type);
|
||||
statement(variable_decl(type, to_name(id)), ";");
|
||||
|
||||
auto &data_type = expression_type(ops[2]);
|
||||
auto *chain = maybe_get<SPIRAccessChain>(ops[2]);
|
||||
SPIRType::BaseType expression_type;
|
||||
if (data_type.storage == StorageClassImage || !chain)
|
||||
{
|
||||
statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");");
|
||||
expression_type = data_type.basetype;
|
||||
}
|
||||
else
|
||||
{
|
||||
// RWByteAddress buffer is always uint in its underlying type.
|
||||
expression_type = SPIRType::UInt;
|
||||
statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, ", ",
|
||||
to_name(id), ");");
|
||||
}
|
||||
|
||||
auto expr = bitcast_expression(type, expression_type, to_name(id));
|
||||
set<SPIRExpression>(id, expr, result_type, true);
|
||||
flush_all_atomic_capable_variables();
|
||||
register_read(ops[1], ops[2], should_forward(ops[2]));
|
||||
}
|
||||
|
||||
const Instruction *CompilerHLSL::get_next_instruction_in_block(const Instruction &instr)
|
||||
{
|
||||
// FIXME: This is kind of hacky. There should be a cleaner way.
|
||||
uint32_t offset = uint32_t(&instr - current_emitting_block->ops.data());
|
||||
if ((offset + 1) < current_emitting_block->ops.size())
|
||||
return ¤t_emitting_block->ops[offset + 1];
|
||||
else
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void CompilerHLSL::emit_instruction(const Instruction &instruction)
|
||||
{
|
||||
auto ops = stream(instruction);
|
||||
@ -2548,6 +2794,126 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
|
||||
break;
|
||||
}
|
||||
|
||||
case OpImageRead:
|
||||
{
|
||||
uint32_t result_type = ops[0];
|
||||
uint32_t id = ops[1];
|
||||
auto *var = maybe_get_backing_variable(ops[2]);
|
||||
auto imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]");
|
||||
|
||||
// The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
|
||||
// except that the underlying type changes how the data is interpreted.
|
||||
if (var)
|
||||
imgexpr = remap_swizzle(get<SPIRType>(result_type),
|
||||
image_format_to_components(get<SPIRType>(var->basetype).image.format), imgexpr);
|
||||
|
||||
if (var && var->forwardable)
|
||||
{
|
||||
auto &e = emit_op(result_type, id, imgexpr, true);
|
||||
e.loaded_from = var->self;
|
||||
var->dependees.push_back(id);
|
||||
}
|
||||
else
|
||||
emit_op(result_type, id, imgexpr, false);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpImageWrite:
|
||||
{
|
||||
auto *var = maybe_get_backing_variable(ops[0]);
|
||||
|
||||
// The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4",
|
||||
// except that the underlying type changes how the data is interpreted.
|
||||
auto value_expr = to_expression(ops[2]);
|
||||
if (var)
|
||||
{
|
||||
auto &type = get<SPIRType>(var->basetype);
|
||||
auto narrowed_type = get<SPIRType>(type.image.type);
|
||||
narrowed_type.vecsize = image_format_to_components(type.image.format);
|
||||
value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr);
|
||||
}
|
||||
|
||||
statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";");
|
||||
if (var && variable_storage_is_aliased(*var))
|
||||
flush_all_aliased_variables();
|
||||
break;
|
||||
}
|
||||
|
||||
case OpImageTexelPointer:
|
||||
{
|
||||
uint32_t result_type = ops[0];
|
||||
uint32_t id = ops[1];
|
||||
auto &e =
|
||||
set<SPIRExpression>(id, join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"), result_type, true);
|
||||
|
||||
// When using the pointer, we need to know which variable it is actually loaded from.
|
||||
auto *var = maybe_get_backing_variable(ops[2]);
|
||||
e.loaded_from = var ? var->self : 0;
|
||||
break;
|
||||
}
|
||||
|
||||
case OpAtomicCompareExchange:
|
||||
case OpAtomicExchange:
|
||||
case OpAtomicISub:
|
||||
case OpAtomicSMin:
|
||||
case OpAtomicUMin:
|
||||
case OpAtomicSMax:
|
||||
case OpAtomicUMax:
|
||||
case OpAtomicAnd:
|
||||
case OpAtomicOr:
|
||||
case OpAtomicXor:
|
||||
case OpAtomicIAdd:
|
||||
{
|
||||
emit_atomic(ops, instruction.length, opcode);
|
||||
break;
|
||||
}
|
||||
|
||||
case OpMemoryBarrier:
|
||||
{
|
||||
uint32_t mem = get<SPIRConstant>(ops[1]).scalar();
|
||||
|
||||
// If the next instruction is OpControlBarrier and it does what we need, this opcode can be a noop.
|
||||
const Instruction *next = get_next_instruction_in_block(instruction);
|
||||
if (next && next->op == OpControlBarrier)
|
||||
{
|
||||
auto *next_ops = stream(*next);
|
||||
uint32_t next_mem = get<SPIRConstant>(next_ops[2]).scalar();
|
||||
next_mem |= MemorySemanticsWorkgroupMemoryMask; // Barrier in HLSL always implies GroupSync.
|
||||
if ((next_mem & mem) == mem)
|
||||
break;
|
||||
}
|
||||
|
||||
// We cannot forward any loads beyond the memory barrier.
|
||||
if (mem)
|
||||
flush_all_active_variables();
|
||||
|
||||
if (mem == MemorySemanticsWorkgroupMemoryMask)
|
||||
statement("GroupMemoryBarrier();");
|
||||
else if (mem)
|
||||
statement("DeviceMemoryBarrier();");
|
||||
break;
|
||||
}
|
||||
|
||||
case OpControlBarrier:
|
||||
{
|
||||
uint32_t mem = get<SPIRConstant>(ops[2]).scalar();
|
||||
|
||||
// We cannot forward any loads beyond the memory barrier.
|
||||
if (mem)
|
||||
flush_all_active_variables();
|
||||
|
||||
if (mem == MemorySemanticsWorkgroupMemoryMask)
|
||||
statement("GroupMemoryBarrierWithGroupSync();");
|
||||
else if (mem)
|
||||
statement("DeviceMemoryBarrierWithGroupSync();");
|
||||
else
|
||||
{
|
||||
// There is no "GroupSync" standalone function.
|
||||
statement("GroupMemoryBarrierWithGroupSync();");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
CompilerGLSL::emit_instruction(instruction);
|
||||
break;
|
||||
|
@ -92,6 +92,8 @@ private:
|
||||
void emit_load(const Instruction &instruction);
|
||||
std::string read_access_chain(const SPIRAccessChain &chain);
|
||||
void emit_store(const Instruction &instruction);
|
||||
void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op);
|
||||
const Instruction *get_next_instruction_in_block(const Instruction &instr);
|
||||
|
||||
void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
|
||||
const std::string &qualifier) override;
|
||||
|
Loading…
Reference in New Issue
Block a user