From 044d3c89110f2c11369a152720936f13140b06d6 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 20 Oct 2017 14:56:37 +0200 Subject: [PATCH 1/7] Basic image load store and atomics. --- spirv_hlsl.cpp | 164 ++++++++++++++++++++++++++++++++++++++++++++++++- spirv_hlsl.hpp | 1 + 2 files changed, 162 insertions(+), 3 deletions(-) diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index ff1007c8..d5e76351 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -48,20 +48,26 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) { auto &imagetype = get(type.image.type); const char *dim = nullptr; + const char *rw = ""; uint32_t components = 4; switch (type.image.dim) { case Dim1D: + rw = type.image.sampled == 2 ? "RW" : ""; dim = "1D"; break; case Dim2D: + rw = type.image.sampled == 2 ? "RW" : ""; dim = "2D"; break; case Dim3D: + rw = type.image.sampled == 2 ? "RW" : ""; dim = "3D"; break; case DimCube: + if (type.image.sampled == 2) + SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL."); dim = "Cube"; break; case DimRect: @@ -84,7 +90,7 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) } const char *arrayed = type.image.arrayed ? "Array" : ""; const char *ms = type.image.ms ? "MS" : ""; - return join("Texture", dim, ms, arrayed, "<", type_to_glsl(imagetype), components, ">"); + return join(rw, "Texture", dim, ms, arrayed, "<", type_to_glsl(imagetype), components, ">"); } string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type) @@ -894,7 +900,19 @@ void CompilerHLSL::emit_resources() if (var.storage != StorageClassOutput) { add_resource_name(var.self); - statement("static ", variable_decl(var), ";"); + + const char *storage = nullptr; + switch (var.storage) + { + case StorageClassWorkgroup: + storage = "groupshared"; + break; + + default: + storage = "static"; + break; + } + statement(storage, " ", variable_decl(var), ";"); emitted = true; } } @@ -1857,10 +1875,16 @@ string CompilerHLSL::to_resource_binding(const SPIRVariable &var) switch (type.basetype) { case SPIRType::SampledImage: - case SPIRType::Image: space = "t"; // SRV break; + case SPIRType::Image: + if (type.image.sampled == 2) + space = "u"; // UAV + else + space = "t"; // SRV + break; + case SPIRType::Sampler: space = "s"; break; @@ -2224,6 +2248,83 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction) } } +void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) +{ + const char *atomic_op = nullptr; + auto value_expr = to_expression(ops[5]); + switch (op) + { + case OpAtomicISub: + atomic_op = "InterlockedAdd"; + value_expr = join("-", enclose_expression(value_expr)); + break; + + case OpAtomicSMin: + case OpAtomicUMin: + atomic_op = "InterlockedMin"; + break; + + case OpAtomicSMax: + case OpAtomicUMax: + atomic_op = "InterlockedMax"; + break; + + case OpAtomicAnd: + atomic_op = "InterlockedAnd"; + break; + + case OpAtomicOr: + atomic_op = "InterlockedOr"; + break; + + case OpAtomicXor: + atomic_op = "InterlockedXor"; + break; + + case OpAtomicIAdd: + atomic_op = "InterlockedAdd"; + break; + + case OpAtomicExchange: + atomic_op = "InterlockedExchange"; + break; + + default: + SPIRV_CROSS_THROW("Unknown atomic opcode."); + } + + if (length < 6) + SPIRV_CROSS_THROW("Not enough data for opcode."); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + forced_temporaries.insert(ops[1]); + + auto &type = get(result_type); + statement(variable_decl(type, to_name(id)), ";"); + + auto &data_type = expression_type(ops[2]); + auto *chain = maybe_get(ops[2]); + SPIRType::BaseType expression_type; + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); + expression_type = data_type.basetype; + } + else + { + // RWByteAddress buffer is always uint in its underlying type. + expression_type = SPIRType::UInt; + statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, + ", ", value_expr, ", ", to_name(id), ");"); + } + + auto expr = bitcast_expression(type, expression_type, to_name(id)); + set(id, expr, result_type, true); + flush_all_atomic_capable_variables(); + register_read(ops[1], ops[2], should_forward(ops[2])); +} + void CompilerHLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); @@ -2548,6 +2649,63 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) break; } + case OpImageRead: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto *var = maybe_get_backing_variable(ops[2]); + auto imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"); + + if (var && var->forwardable) + { + auto &e = emit_op(result_type, id, imgexpr, true); + e.loaded_from = var->self; + var->dependees.push_back(id); + } + else + emit_op(result_type, id, imgexpr, false); + break; + } + + case OpImageWrite: + { + auto *var = maybe_get_backing_variable(ops[0]); + statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", to_expression(ops[2]), ";"); + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + break; + } + + case OpImageTexelPointer: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &e = set(id, join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"), result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : 0; + break; + } + + case OpAtomicCompareExchange: + break; + + case OpAtomicExchange: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + case OpAtomicIAdd: + { + emit_atomic(ops, instruction.length, opcode); + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp index 7a74ab17..d0ed2911 100644 --- a/spirv_hlsl.hpp +++ b/spirv_hlsl.hpp @@ -92,6 +92,7 @@ private: void emit_load(const Instruction &instruction); std::string read_access_chain(const SPIRAccessChain &chain); void emit_store(const Instruction &instruction); + void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op); void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier) override; From 85eb972259e67d69f51590b890c532164ffc81fd Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 20 Oct 2017 15:43:45 +0200 Subject: [PATCH 2/7] Add some HLSL tests for atomics and image load store. --- reference/shaders-hlsl/comp/atomic.comp | 90 +++++++++++++++++++++ reference/shaders-hlsl/comp/image.comp | 21 +++++ shaders-hlsl/comp/atomic.comp | 66 ++++++++++++++++ shaders-hlsl/comp/image.comp | 12 +++ spirv_hlsl.cpp | 101 +++++++++++++++++++++--- 5 files changed, 278 insertions(+), 12 deletions(-) create mode 100644 reference/shaders-hlsl/comp/atomic.comp create mode 100644 reference/shaders-hlsl/comp/image.comp create mode 100644 shaders-hlsl/comp/atomic.comp create mode 100644 shaders-hlsl/comp/image.comp diff --git a/reference/shaders-hlsl/comp/atomic.comp b/reference/shaders-hlsl/comp/atomic.comp new file mode 100644 index 00000000..52338941 --- /dev/null +++ b/reference/shaders-hlsl/comp/atomic.comp @@ -0,0 +1,90 @@ +RWByteAddressBuffer ssbo : register(u2); +RWTexture2D uImage : register(u0); +RWTexture2D iImage : register(u1); + +groupshared int int_atomic; +groupshared uint uint_atomic; +groupshared int int_atomic_array[1]; +groupshared uint uint_atomic_array[1]; + +void comp_main() +{ + uint _19; + InterlockedAdd(uImage[int2(1, 5)], 1u, _19); + uint _27; + InterlockedAdd(uImage[int2(1, 5)], 1u, _27); + int _28 = int(_27); + iImage[int2(1, 6)] = int4(_28, _28, _28, _28); + uint _32; + InterlockedOr(uImage[int2(1, 5)], 1u, _32); + uint _34; + InterlockedXor(uImage[int2(1, 5)], 1u, _34); + uint _36; + InterlockedAnd(uImage[int2(1, 5)], 1u, _36); + uint _38; + InterlockedMin(uImage[int2(1, 5)], 1u, _38); + uint _40; + InterlockedMax(uImage[int2(1, 5)], 1u, _40); + uint _44; + InterlockedCompareExchange(uImage[int2(1, 5)], 10u, 2u, _44); + int _47; + InterlockedAdd(iImage[int2(1, 6)], 1, _47); + int _49; + InterlockedOr(iImage[int2(1, 6)], 1, _49); + int _51; + InterlockedXor(iImage[int2(1, 6)], 1, _51); + int _53; + InterlockedAnd(iImage[int2(1, 6)], 1, _53); + int _55; + InterlockedMin(iImage[int2(1, 6)], 1, _55); + int _57; + InterlockedMax(iImage[int2(1, 6)], 1, _57); + int _61; + InterlockedCompareExchange(iImage[int2(1, 5)], 10, 2, _61); + uint _68; + ssbo.InterlockedAdd(0, 1u, _68); + uint _70; + ssbo.InterlockedOr(0, 1u, _70); + uint _72; + ssbo.InterlockedXor(0, 1u, _72); + uint _74; + ssbo.InterlockedAnd(0, 1u, _74); + uint _76; + ssbo.InterlockedMin(0, 1u, _76); + uint _78; + ssbo.InterlockedMax(0, 1u, _78); + uint _80; + ssbo.InterlockedExchange(0, 1u, _80); + uint _82; + ssbo.InterlockedCompareExchange(0, 10u, 2u, _82); + int _85; + ssbo.InterlockedAdd(4, 1, _85); + int _87; + ssbo.InterlockedOr(4, 1, _87); + int _89; + ssbo.InterlockedXor(4, 1, _89); + int _91; + ssbo.InterlockedAnd(4, 1, _91); + int _93; + ssbo.InterlockedMin(4, 1, _93); + int _95; + ssbo.InterlockedMax(4, 1, _95); + int _97; + ssbo.InterlockedExchange(4, 1, _97); + int _99; + ssbo.InterlockedCompareExchange(4, 10, 2, _99); + int _102; + InterlockedAdd(int_atomic, 10, _102); + uint _105; + InterlockedAdd(uint_atomic, 10u, _105); + int _110; + InterlockedAdd(int_atomic_array[0], 10, _110); + uint _115; + InterlockedAdd(uint_atomic_array[0], 10u, _115); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/image.comp b/reference/shaders-hlsl/comp/image.comp new file mode 100644 index 00000000..d260adca --- /dev/null +++ b/reference/shaders-hlsl/comp/image.comp @@ -0,0 +1,21 @@ +RWTexture2D uImageIn : register(u0); +RWTexture2D uImageOut : register(u1); + +static uint3 gl_GlobalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; +}; + +void comp_main() +{ + float4 v = uImageIn[int2(gl_GlobalInvocationID.xy)]; + uImageOut[int2(gl_GlobalInvocationID.xy)] = v; +} + +[numthreads(1, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + comp_main(); +} diff --git a/shaders-hlsl/comp/atomic.comp b/shaders-hlsl/comp/atomic.comp new file mode 100644 index 00000000..6f69ec72 --- /dev/null +++ b/shaders-hlsl/comp/atomic.comp @@ -0,0 +1,66 @@ +#version 310 es +#extension GL_OES_shader_image_atomic : require +layout(local_size_x = 1) in; + +layout(r32ui, binding = 0) uniform highp uimage2D uImage; +layout(r32i, binding = 1) uniform highp iimage2D iImage; +layout(binding = 2, std430) buffer SSBO +{ + uint u32; + int i32; +} ssbo; + +shared int int_atomic; +shared uint uint_atomic; +shared int int_atomic_array[1]; +shared uint uint_atomic_array[1]; + +void main() +{ + imageAtomicAdd(uImage, ivec2(1, 5), 1u); + + // Test that we do not invalidate OpImage variables which are loaded from UniformConstant + // address space. + imageStore(iImage, ivec2(1, 6), ivec4(imageAtomicAdd(uImage, ivec2(1, 5), 1u))); + + imageAtomicOr(uImage, ivec2(1, 5), 1u); + imageAtomicXor(uImage, ivec2(1, 5), 1u); + imageAtomicAnd(uImage, ivec2(1, 5), 1u); + imageAtomicMin(uImage, ivec2(1, 5), 1u); + imageAtomicMax(uImage, ivec2(1, 5), 1u); + //imageAtomicExchange(uImage, ivec2(1, 5), 1u); + imageAtomicCompSwap(uImage, ivec2(1, 5), 10u, 2u); + + imageAtomicAdd(iImage, ivec2(1, 6), 1); + imageAtomicOr(iImage, ivec2(1, 6), 1); + imageAtomicXor(iImage, ivec2(1, 6), 1); + imageAtomicAnd(iImage, ivec2(1, 6), 1); + imageAtomicMin(iImage, ivec2(1, 6), 1); + imageAtomicMax(iImage, ivec2(1, 6), 1); + //imageAtomicExchange(iImage, ivec2(1, 5), 1u); + imageAtomicCompSwap(iImage, ivec2(1, 5), 10, 2); + + atomicAdd(ssbo.u32, 1u); + atomicOr(ssbo.u32, 1u); + atomicXor(ssbo.u32, 1u); + atomicAnd(ssbo.u32, 1u); + atomicMin(ssbo.u32, 1u); + atomicMax(ssbo.u32, 1u); + atomicExchange(ssbo.u32, 1u); + atomicCompSwap(ssbo.u32, 10u, 2u); + + atomicAdd(ssbo.i32, 1); + atomicOr(ssbo.i32, 1); + atomicXor(ssbo.i32, 1); + atomicAnd(ssbo.i32, 1); + atomicMin(ssbo.i32, 1); + atomicMax(ssbo.i32, 1); + atomicExchange(ssbo.i32, 1); + atomicCompSwap(ssbo.i32, 10, 2); + + atomicAdd(int_atomic, 10); + atomicAdd(uint_atomic, 10u); + atomicAdd(int_atomic_array[0], 10); + atomicAdd(uint_atomic_array[0], 10u); +} + diff --git a/shaders-hlsl/comp/image.comp b/shaders-hlsl/comp/image.comp new file mode 100644 index 00000000..28a39eb8 --- /dev/null +++ b/shaders-hlsl/comp/image.comp @@ -0,0 +1,12 @@ +#version 450 +layout(local_size_x = 1) in; + +layout(r32f, binding = 0) uniform readonly mediump image2D uImageIn; +layout(r32f, binding = 1) uniform writeonly mediump image2D uImageOut; + +void main() +{ + vec4 v = imageLoad(uImageIn, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), v); +} + diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index d5e76351..30fc243b 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -23,6 +23,78 @@ using namespace spv; using namespace spirv_cross; using namespace std; +static string image_format_to_type(ImageFormat fmt) +{ + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + return "unorm float"; + case ImageFormatRg8: + case ImageFormatRg16: + return "unorm float2"; + case ImageFormatRgba8: + case ImageFormatRgba16: + return "unorm float4"; + case ImageFormatRgb10A2: + return "unorm float4"; + + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + return "snorm float"; + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + return "snorm float2"; + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + return "snorm float4"; + + case ImageFormatR16f: + case ImageFormatR32f: + return "float"; + case ImageFormatRg16f: + case ImageFormatRg32f: + return "float2"; + case ImageFormatRgba16f: + case ImageFormatRgba32f: + return "float4"; + + case ImageFormatR11fG11fB10f: + return "float3"; + + case ImageFormatR8i: + case ImageFormatR16i: + case ImageFormatR32i: + return "int"; + case ImageFormatRg8i: + case ImageFormatRg16i: + case ImageFormatRg32i: + return "int2"; + case ImageFormatRgba8i: + case ImageFormatRgba16i: + case ImageFormatRgba32i: + return "int4"; + + case ImageFormatR8ui: + case ImageFormatR16ui: + case ImageFormatR32ui: + return "uint"; + case ImageFormatRg8ui: + case ImageFormatRg16ui: + case ImageFormatRg32ui: + return "uint2"; + case ImageFormatRgba8ui: + case ImageFormatRgba16ui: + case ImageFormatRgba32ui: + return "uint4"; + case ImageFormatRgb10a2ui: + return "int4"; + + default: + SPIRV_CROSS_THROW("Unrecognized typed image format."); + } +} + // Returns true if an arithmetic operation does not change behavior depending on signedness. static bool opcode_is_sign_invariant(Op opcode) { @@ -48,21 +120,21 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) { auto &imagetype = get(type.image.type); const char *dim = nullptr; - const char *rw = ""; + bool typed_load = false; uint32_t components = 4; switch (type.image.dim) { case Dim1D: - rw = type.image.sampled == 2 ? "RW" : ""; + typed_load = type.image.sampled == 2; dim = "1D"; break; case Dim2D: - rw = type.image.sampled == 2 ? "RW" : ""; + typed_load = type.image.sampled == 2; dim = "2D"; break; case Dim3D: - rw = type.image.sampled == 2 ? "RW" : ""; + typed_load = type.image.sampled == 2; dim = "3D"; break; case DimCube: @@ -76,10 +148,7 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) if (type.image.sampled == 1) return join("Buffer<", type_to_glsl(imagetype), components, ">"); else if (type.image.sampled == 2) - { - SPIRV_CROSS_THROW("RWBuffer is not implemented yet for HLSL."); - //return join("RWBuffer<", type_to_glsl(imagetype), components, ">"); - } + return join("RWBuffer<", image_format_to_type(imagetype.image.format), components, ">"); else SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); case DimSubpassData: @@ -90,7 +159,9 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) } const char *arrayed = type.image.arrayed ? "Array" : ""; const char *ms = type.image.ms ? "MS" : ""; - return join(rw, "Texture", dim, ms, arrayed, "<", type_to_glsl(imagetype), components, ">"); + const char *rw = typed_load ? "RW" : ""; + return join(rw, "Texture", dim, ms, arrayed, "<", + typed_load ? image_format_to_type(type.image.format) : join(type_to_glsl(imagetype), components), ">"); } string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type) @@ -2251,7 +2322,8 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction) void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) { const char *atomic_op = nullptr; - auto value_expr = to_expression(ops[5]); + auto value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]); + switch (op) { case OpAtomicISub: @@ -2289,6 +2361,13 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) atomic_op = "InterlockedExchange"; break; + case OpAtomicCompareExchange: + if (length < 8) + SPIRV_CROSS_THROW("Not enough data for opcode."); + atomic_op = "InterlockedCompareExchange"; + value_expr = join(to_expression(ops[7]), ", ", value_expr); + break; + default: SPIRV_CROSS_THROW("Unknown atomic opcode."); } @@ -2689,8 +2768,6 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) } case OpAtomicCompareExchange: - break; - case OpAtomicExchange: case OpAtomicISub: case OpAtomicSMin: From ae236e70563dc48c652cd9fa1d4205a77dce7a46 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 20 Oct 2017 16:18:02 +0200 Subject: [PATCH 3/7] Add GroupSync() in HLSL. --- reference/shaders-hlsl/comp/shared.comp | 31 ++++++++++++++ shaders-hlsl/comp/shared.comp | 27 ++++++++++++ spirv_glsl.cpp | 34 +++++++++------ spirv_glsl.hpp | 5 +++ spirv_hlsl.cpp | 56 +++++++++++++++++++++++++ spirv_hlsl.hpp | 1 + 6 files changed, 142 insertions(+), 12 deletions(-) create mode 100644 reference/shaders-hlsl/comp/shared.comp create mode 100644 shaders-hlsl/comp/shared.comp diff --git a/reference/shaders-hlsl/comp/shared.comp b/reference/shaders-hlsl/comp/shared.comp new file mode 100644 index 00000000..a344b859 --- /dev/null +++ b/reference/shaders-hlsl/comp/shared.comp @@ -0,0 +1,31 @@ +const uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u); + +ByteAddressBuffer _22 : register(u0); +RWByteAddressBuffer _44 : register(u1); + +static uint3 gl_GlobalInvocationID; +static uint gl_LocalInvocationIndex; +struct SPIRV_Cross_Input +{ + uint3 gl_GlobalInvocationID : SV_DispatchThreadID; + uint gl_LocalInvocationIndex : SV_GroupIndex; +}; + +groupshared float sShared[4]; + +void comp_main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = asfloat(_22.Load(ident * 4 + 0)); + sShared[gl_LocalInvocationIndex] = idata; + GroupMemoryBarrierWithGroupSync(); + _44.Store(ident * 4 + 0, asuint(sShared[(4u - gl_LocalInvocationIndex) - 1u])); +} + +[numthreads(4, 1, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID; + gl_LocalInvocationIndex = stage_input.gl_LocalInvocationIndex; + comp_main(); +} diff --git a/shaders-hlsl/comp/shared.comp b/shaders-hlsl/comp/shared.comp new file mode 100644 index 00000000..4deff935 --- /dev/null +++ b/shaders-hlsl/comp/shared.comp @@ -0,0 +1,27 @@ +#version 310 es +layout(local_size_x = 4) in; + +shared float sShared[gl_WorkGroupSize.x]; + +layout(std430, binding = 0) readonly buffer SSBO +{ + float in_data[]; +}; + +layout(std430, binding = 1) writeonly buffer SSBO2 +{ + float out_data[]; +}; + +void main() +{ + uint ident = gl_GlobalInvocationID.x; + float idata = in_data[ident]; + + sShared[gl_LocalInvocationIndex] = idata; + memoryBarrierShared(); + barrier(); + + out_data[ident] = sShared[gl_WorkGroupSize.x - gl_LocalInvocationIndex - 1u]; +} + diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 0d4a6299..79fe18b3 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -4888,6 +4888,14 @@ bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &r return true; } +void CompilerGLSL::emit_block_instructions(const SPIRBlock &block) +{ + current_emitting_block = █ + for (auto &op : block.ops) + emit_instruction(op); + current_emitting_block = nullptr; +} + void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); @@ -6262,10 +6270,16 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (get_entry_point().model == ExecutionModelGLCompute) { uint32_t mem = get(ops[2]).scalar(); + + // We cannot forward any loads beyond the memory barrier. + if (mem) + flush_all_active_variables(); + if (mem == MemorySemanticsWorkgroupMemoryMask) statement("memoryBarrierShared();"); else if (mem) statement("memoryBarrier();"); + } statement("barrier();"); break; @@ -7338,8 +7352,7 @@ string CompilerGLSL::emit_continue_block(uint32_t continue_block) { propagate_loop_dominators(*block); // Write out all instructions we have in this block. - for (auto &op : block->ops) - emit_instruction(op); + emit_block_instructions(*block); // For plain branchless for/while continue blocks. if (block->next_block) @@ -7410,8 +7423,7 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method // If we're trying to create a true for loop, // we need to make sure that all opcodes before branch statement do not actually emit any code. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. - for (auto &op : block.ops) - emit_instruction(op); + emit_block_instructions(block); bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); @@ -7462,8 +7474,7 @@ bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method // If we're trying to create a true for loop, // we need to make sure that all opcodes before branch statement do not actually emit any code. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. - for (auto &op : child.ops) - emit_instruction(op); + emit_block_instructions(child); bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); @@ -7569,8 +7580,8 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) { statement("do"); begin_scope(); - for (auto &op : block.ops) - emit_instruction(op); + + emit_block_instructions(block); } else if (block.merge == SPIRBlock::MergeLoop) { @@ -7582,13 +7593,12 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) statement("for (;;)"); begin_scope(); - for (auto &op : block.ops) - emit_instruction(op); + + emit_block_instructions(block); } else { - for (auto &op : block.ops) - emit_instruction(op); + emit_block_instructions(block); } // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index b652f713..9f3750c3 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -181,7 +181,12 @@ protected: // Virtualize methods which need to be overridden by subclass targets like C++ and such. virtual void emit_function_prototype(SPIRFunction &func, uint64_t return_flags); + + // Kinda ugly way to let opcodes peek at their neighbor instructions for trivial peephole scenarios. + const SPIRBlock *current_emitting_block = nullptr; + virtual void emit_instruction(const Instruction &instr); + void emit_block_instructions(const SPIRBlock &block); virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args, uint32_t count); virtual void emit_header(); diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 30fc243b..40fc6065 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -2404,6 +2404,16 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) register_read(ops[1], ops[2], should_forward(ops[2])); } +const Instruction *CompilerHLSL::get_next_instruction_in_block(const Instruction &instr) +{ + // FIXME: This is kind of hacky. There should be a cleaner way. + uint32_t offset = uint32_t(&instr - current_emitting_block->ops.data()); + if ((offset + 1) < current_emitting_block->ops.size()) + return ¤t_emitting_block->ops[offset + 1]; + else + return nullptr; +} + void CompilerHLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); @@ -2783,6 +2793,52 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) break; } + case OpMemoryBarrier: + { + uint32_t mem = get(ops[1]).scalar(); + + // If the next instruction is OpControlBarrier and it does what we need, this opcode can be a noop. + const Instruction *next = get_next_instruction_in_block(instruction); + if (next && next->op == OpControlBarrier) + { + auto *next_ops = stream(*next); + uint32_t next_mem = get(next_ops[2]).scalar(); + next_mem |= MemorySemanticsWorkgroupMemoryMask; // Barrier in HLSL always implies GroupSync. + if ((next_mem & mem) == mem) + break; + } + + // We cannot forward any loads beyond the memory barrier. + if (mem) + flush_all_active_variables(); + + if (mem == MemorySemanticsWorkgroupMemoryMask) + statement("GroupMemoryBarrier();"); + else if (mem) + statement("DeviceMemoryBarrier();"); + break; + } + + case OpControlBarrier: + { + uint32_t mem = get(ops[2]).scalar(); + + // We cannot forward any loads beyond the memory barrier. + if (mem) + flush_all_active_variables(); + + if (mem == MemorySemanticsWorkgroupMemoryMask) + statement("GroupMemoryBarrierWithGroupSync();"); + else if (mem) + statement("DeviceMemoryBarrierWithGroupSync();"); + else + { + // There is no "GroupSync" standalone function. + statement("GroupMemoryBarrierWithGroupSync();"); + } + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp index d0ed2911..087345de 100644 --- a/spirv_hlsl.hpp +++ b/spirv_hlsl.hpp @@ -93,6 +93,7 @@ private: std::string read_access_chain(const SPIRAccessChain &chain); void emit_store(const Instruction &instruction); void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op); + const Instruction *get_next_instruction_in_block(const Instruction &instr); void emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier) override; From f4f497dc85130660c79d78f9d5a216089492bab2 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 20 Oct 2017 16:18:17 +0200 Subject: [PATCH 4/7] Run format_all.sh. --- spirv_glsl.cpp | 1 - spirv_hlsl.cpp | 7 ++++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 79fe18b3..f5ec339e 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -6279,7 +6279,6 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) statement("memoryBarrierShared();"); else if (mem) statement("memoryBarrier();"); - } statement("barrier();"); break; diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 40fc6065..2599c9d8 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -2394,8 +2394,8 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) { // RWByteAddress buffer is always uint in its underlying type. expression_type = SPIRType::UInt; - statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, - ", ", value_expr, ", ", to_name(id), ");"); + statement(chain->base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, ", ", + to_name(id), ");"); } auto expr = bitcast_expression(type, expression_type, to_name(id)); @@ -2769,7 +2769,8 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) { uint32_t result_type = ops[0]; uint32_t id = ops[1]; - auto &e = set(id, join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"), result_type, true); + auto &e = + set(id, join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"), result_type, true); // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); From ab3f114120bc47d853cfb6e5a76cb2b45aff8330 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 24 Oct 2017 09:23:29 +0200 Subject: [PATCH 5/7] Use remap_swizzle for image load-store more actively. --- spirv_glsl.cpp | 27 ++++++++++++++++----------- spirv_glsl.hpp | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index f5ec339e..97d42ca9 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -1704,17 +1704,15 @@ void CompilerGLSL::replace_fragment_outputs() } } -string CompilerGLSL::remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr) +string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) { - auto &out_type = get(result_type); - if (out_type.vecsize == input_components) - return to_expression(expr); + return expr; else if (input_components == 1) - return join(type_to_glsl(out_type), "(", to_expression(expr), ")"); + return join(type_to_glsl(out_type), "(", expr, ")"); else { - auto e = to_enclosed_expression(expr) + "."; + auto e = enclose_expression(expr) + "."; // Just clamp the swizzle index if we have more outputs than inputs. for (uint32_t c = 0; c < out_type.vecsize; c++) e += index_to_swizzle(min(c, input_components - 1)); @@ -6103,14 +6101,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // since ImageRead always returns 4-component vectors and the backing type is opaque. if (!var->remapped_components) SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); - imgexpr = remap_swizzle(result_type, var->remapped_components, ops[2]); + imgexpr = remap_swizzle(get(result_type), var->remapped_components, to_expression(ops[2])); } else { // PLS input could have different number of components than what the SPIR expects, swizzle to // the appropriate vector size. uint32_t components = pls_format_to_components(itr->format); - imgexpr = remap_swizzle(result_type, components, ops[2]); + imgexpr = remap_swizzle(get(result_type), components, to_expression(ops[2])); } pure = true; } @@ -6151,6 +6149,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); } } + imgexpr = remap_swizzle(get(result_type), 4, imgexpr); pure = true; } else @@ -6168,6 +6167,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } else imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", to_expression(ops[3]), ")"); + + imgexpr = remap_swizzle(get(result_type), 4, imgexpr); pure = false; } @@ -6216,6 +6217,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } auto &type = expression_type(ops[0]); + auto &value_type = expression_type(ops[2]); + auto store_type = value_type; + store_type.vecsize = 4; + if (type.image.ms) { uint32_t operands = ops[3]; @@ -6223,11 +6228,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); uint32_t samples = ops[4]; statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(samples), - ", ", to_expression(ops[2]), ");"); + ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); } else - statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), - ");"); + statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", + remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); if (var && variable_storage_is_aliased(*var)) flush_all_aliased_variables(); diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index 9f3750c3..2c806dd7 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -381,7 +381,7 @@ protected: uint32_t *matrix_stride = nullptr); const char *index_to_swizzle(uint32_t index); - std::string remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr); + std::string remap_swizzle(const SPIRType &result_type, uint32_t input_components, const std::string &expr); std::string declare_temporary(uint32_t type, uint32_t id); void append_global_func_args(const SPIRFunction &func, uint32_t index, std::vector &arglist); std::string to_expression(uint32_t id); From a95295cb231355537b07c302e1d59073870c5050 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 24 Oct 2017 09:52:12 +0200 Subject: [PATCH 6/7] Remap swizzle for HLSL and RWBuffer. --- reference/shaders-hlsl/comp/atomic.comp | 2 +- reference/shaders-hlsl/comp/image.comp | 52 ++++++++++++- .../shaders-hlsl/vert/texture_buffer.vert | 21 +++++ shaders-hlsl/comp/image.comp | 65 +++++++++++++++- shaders-hlsl/vert/texture_buffer.vert | 9 +++ spirv_hlsl.cpp | 78 ++++++++++++++++++- 6 files changed, 216 insertions(+), 11 deletions(-) create mode 100644 reference/shaders-hlsl/vert/texture_buffer.vert create mode 100644 shaders-hlsl/vert/texture_buffer.vert diff --git a/reference/shaders-hlsl/comp/atomic.comp b/reference/shaders-hlsl/comp/atomic.comp index 52338941..382d4298 100644 --- a/reference/shaders-hlsl/comp/atomic.comp +++ b/reference/shaders-hlsl/comp/atomic.comp @@ -14,7 +14,7 @@ void comp_main() uint _27; InterlockedAdd(uImage[int2(1, 5)], 1u, _27); int _28 = int(_27); - iImage[int2(1, 6)] = int4(_28, _28, _28, _28); + iImage[int2(1, 6)] = int4(_28, _28, _28, _28).x; uint _32; InterlockedOr(uImage[int2(1, 5)], 1u, _32); uint _34; diff --git a/reference/shaders-hlsl/comp/image.comp b/reference/shaders-hlsl/comp/image.comp index d260adca..cb084e22 100644 --- a/reference/shaders-hlsl/comp/image.comp +++ b/reference/shaders-hlsl/comp/image.comp @@ -1,5 +1,27 @@ -RWTexture2D uImageIn : register(u0); -RWTexture2D uImageOut : register(u1); +RWTexture2D uImageInF : register(u0); +RWTexture2D uImageOutF : register(u1); +RWTexture2D uImageInI : register(u2); +RWTexture2D uImageOutI : register(u3); +RWTexture2D uImageInU : register(u4); +RWTexture2D uImageOutU : register(u5); +RWBuffer uImageInBuffer : register(u6); +RWBuffer uImageOutBuffer : register(u7); +RWTexture2D uImageInF2 : register(u0); +RWTexture2D uImageOutF2 : register(u1); +RWTexture2D uImageInI2 : register(u2); +RWTexture2D uImageOutI2 : register(u3); +RWTexture2D uImageInU2 : register(u4); +RWTexture2D uImageOutU2 : register(u5); +RWBuffer uImageInBuffer2 : register(u6); +RWBuffer uImageOutBuffer2 : register(u7); +RWTexture2D uImageInF4 : register(u0); +RWTexture2D uImageOutF4 : register(u1); +RWTexture2D uImageInI4 : register(u2); +RWTexture2D uImageOutI4 : register(u3); +RWTexture2D uImageInU4 : register(u4); +RWTexture2D uImageOutU4 : register(u5); +RWBuffer uImageInBuffer4 : register(u6); +RWBuffer uImageOutBuffer4 : register(u7); static uint3 gl_GlobalInvocationID; struct SPIRV_Cross_Input @@ -9,8 +31,30 @@ struct SPIRV_Cross_Input void comp_main() { - float4 v = uImageIn[int2(gl_GlobalInvocationID.xy)]; - uImageOut[int2(gl_GlobalInvocationID.xy)] = v; + float4 f = float4(uImageInF[int2(gl_GlobalInvocationID.xy)]); + uImageOutF[int2(gl_GlobalInvocationID.xy)] = f.x; + int4 i = int4(uImageInI[int2(gl_GlobalInvocationID.xy)]); + uImageOutI[int2(gl_GlobalInvocationID.xy)] = i.x; + uint4 u = uint4(uImageInU[int2(gl_GlobalInvocationID.xy)]); + uImageOutU[int2(gl_GlobalInvocationID.xy)] = u.x; + float4 b = float4(uImageInBuffer[int(gl_GlobalInvocationID.x)]); + uImageOutBuffer[int(gl_GlobalInvocationID.x)] = b.x; + float4 f2 = uImageInF2[int2(gl_GlobalInvocationID.xy)].xyyy; + uImageOutF2[int2(gl_GlobalInvocationID.xy)] = f2.xy; + int4 i2 = uImageInI2[int2(gl_GlobalInvocationID.xy)].xyyy; + uImageOutI2[int2(gl_GlobalInvocationID.xy)] = i2.xy; + uint4 u2 = uImageInU2[int2(gl_GlobalInvocationID.xy)].xyyy; + uImageOutU2[int2(gl_GlobalInvocationID.xy)] = u2.xy; + float4 b2 = uImageInBuffer2[int(gl_GlobalInvocationID.x)].xyyy; + uImageOutBuffer2[int(gl_GlobalInvocationID.x)] = b2.xy; + float4 f4 = uImageInF4[int2(gl_GlobalInvocationID.xy)]; + uImageOutF4[int2(gl_GlobalInvocationID.xy)] = f4; + int4 i4 = uImageInI4[int2(gl_GlobalInvocationID.xy)]; + uImageOutI4[int2(gl_GlobalInvocationID.xy)] = i4; + uint4 u4 = uImageInU4[int2(gl_GlobalInvocationID.xy)]; + uImageOutU4[int2(gl_GlobalInvocationID.xy)] = u4; + float4 b4 = uImageInBuffer4[int(gl_GlobalInvocationID.x)]; + uImageOutBuffer4[int(gl_GlobalInvocationID.x)] = b4; } [numthreads(1, 1, 1)] diff --git a/reference/shaders-hlsl/vert/texture_buffer.vert b/reference/shaders-hlsl/vert/texture_buffer.vert new file mode 100644 index 00000000..1c92f6fe --- /dev/null +++ b/reference/shaders-hlsl/vert/texture_buffer.vert @@ -0,0 +1,21 @@ +Buffer uSamp : register(t4); +RWBuffer uSampo : register(u5); + +static float4 gl_Position; +struct SPIRV_Cross_Output +{ + float4 gl_Position : SV_Position; +}; + +void vert_main() +{ + gl_Position = uSamp.Load(10) + uSampo[100]; +} + +SPIRV_Cross_Output main() +{ + vert_main(); + SPIRV_Cross_Output stage_output; + stage_output.gl_Position = gl_Position; + return stage_output; +} diff --git a/shaders-hlsl/comp/image.comp b/shaders-hlsl/comp/image.comp index 28a39eb8..218af74d 100644 --- a/shaders-hlsl/comp/image.comp +++ b/shaders-hlsl/comp/image.comp @@ -1,12 +1,69 @@ #version 450 layout(local_size_x = 1) in; -layout(r32f, binding = 0) uniform readonly mediump image2D uImageIn; -layout(r32f, binding = 1) uniform writeonly mediump image2D uImageOut; +layout(r32f, binding = 0) uniform readonly image2D uImageInF; +layout(r32f, binding = 1) uniform writeonly image2D uImageOutF; +layout(r32i, binding = 2) uniform readonly iimage2D uImageInI; +layout(r32i, binding = 3) uniform writeonly iimage2D uImageOutI; +layout(r32ui, binding = 4) uniform readonly uimage2D uImageInU; +layout(r32ui, binding = 5) uniform writeonly uimage2D uImageOutU; +layout(r32f, binding = 6) uniform readonly imageBuffer uImageInBuffer; +layout(r32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer; + +layout(rg32f, binding = 0) uniform readonly image2D uImageInF2; +layout(rg32f, binding = 1) uniform writeonly image2D uImageOutF2; +layout(rg32i, binding = 2) uniform readonly iimage2D uImageInI2; +layout(rg32i, binding = 3) uniform writeonly iimage2D uImageOutI2; +layout(rg32ui, binding = 4) uniform readonly uimage2D uImageInU2; +layout(rg32ui, binding = 5) uniform writeonly uimage2D uImageOutU2; +layout(rg32f, binding = 6) uniform readonly imageBuffer uImageInBuffer2; +layout(rg32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer2; + +layout(rgba32f, binding = 0) uniform readonly image2D uImageInF4; +layout(rgba32f, binding = 1) uniform writeonly image2D uImageOutF4; +layout(rgba32i, binding = 2) uniform readonly iimage2D uImageInI4; +layout(rgba32i, binding = 3) uniform writeonly iimage2D uImageOutI4; +layout(rgba32ui, binding = 4) uniform readonly uimage2D uImageInU4; +layout(rgba32ui, binding = 5) uniform writeonly uimage2D uImageOutU4; +layout(rgba32f, binding = 6) uniform readonly imageBuffer uImageInBuffer4; +layout(rgba32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer4; void main() { - vec4 v = imageLoad(uImageIn, ivec2(gl_GlobalInvocationID.xy)); - imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), v); + vec4 f = imageLoad(uImageInF, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutF, ivec2(gl_GlobalInvocationID.xy), f); + + ivec4 i = imageLoad(uImageInI, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutI, ivec2(gl_GlobalInvocationID.xy), i); + + uvec4 u = imageLoad(uImageInU, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutU, ivec2(gl_GlobalInvocationID.xy), u); + + vec4 b = imageLoad(uImageInBuffer, int(gl_GlobalInvocationID.x)); + imageStore(uImageOutBuffer, int(gl_GlobalInvocationID.x), b); + + vec4 f2 = imageLoad(uImageInF2, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutF2, ivec2(gl_GlobalInvocationID.xy), f2); + + ivec4 i2 = imageLoad(uImageInI2, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutI2, ivec2(gl_GlobalInvocationID.xy), i2); + + uvec4 u2 = imageLoad(uImageInU2, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutU2, ivec2(gl_GlobalInvocationID.xy), u2); + + vec4 b2 = imageLoad(uImageInBuffer2, int(gl_GlobalInvocationID.x)); + imageStore(uImageOutBuffer2, int(gl_GlobalInvocationID.x), b2); + + vec4 f4 = imageLoad(uImageInF4, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutF4, ivec2(gl_GlobalInvocationID.xy), f4); + + ivec4 i4 = imageLoad(uImageInI4, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutI4, ivec2(gl_GlobalInvocationID.xy), i4); + + uvec4 u4 = imageLoad(uImageInU4, ivec2(gl_GlobalInvocationID.xy)); + imageStore(uImageOutU4, ivec2(gl_GlobalInvocationID.xy), u4); + + vec4 b4 = imageLoad(uImageInBuffer4, int(gl_GlobalInvocationID.x)); + imageStore(uImageOutBuffer4, int(gl_GlobalInvocationID.x), b4); } diff --git a/shaders-hlsl/vert/texture_buffer.vert b/shaders-hlsl/vert/texture_buffer.vert new file mode 100644 index 00000000..b071e0c9 --- /dev/null +++ b/shaders-hlsl/vert/texture_buffer.vert @@ -0,0 +1,9 @@ +#version 450 + +layout(binding = 4) uniform samplerBuffer uSamp; +layout(rgba32f, binding = 5) uniform readonly imageBuffer uSampo; + +void main() +{ + gl_Position = texelFetch(uSamp, 10) + imageLoad(uSampo, 100); +} diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 2599c9d8..b7dab6fc 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -23,6 +23,62 @@ using namespace spv; using namespace spirv_cross; using namespace std; +static unsigned image_format_to_components(ImageFormat fmt) +{ + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + case ImageFormatR16f: + case ImageFormatR32f: + case ImageFormatR8i: + case ImageFormatR16i: + case ImageFormatR32i: + case ImageFormatR8ui: + case ImageFormatR16ui: + case ImageFormatR32ui: + return 1; + + case ImageFormatRg8: + case ImageFormatRg16: + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + case ImageFormatRg16f: + case ImageFormatRg32f: + case ImageFormatRg8i: + case ImageFormatRg16i: + case ImageFormatRg32i: + case ImageFormatRg8ui: + case ImageFormatRg16ui: + case ImageFormatRg32ui: + return 2; + + case ImageFormatR11fG11fB10f: + return 3; + + case ImageFormatRgba8: + case ImageFormatRgba16: + case ImageFormatRgb10A2: + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + case ImageFormatRgba16f: + case ImageFormatRgba32f: + case ImageFormatRgba8i: + case ImageFormatRgba16i: + case ImageFormatRgba32i: + case ImageFormatRgba8ui: + case ImageFormatRgba16ui: + case ImageFormatRgba32ui: + case ImageFormatRgb10a2ui: + return 4; + + default: + SPIRV_CROSS_THROW("Unrecognized typed image format."); + } +} + static string image_format_to_type(ImageFormat fmt) { switch (fmt) @@ -148,7 +204,7 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) if (type.image.sampled == 1) return join("Buffer<", type_to_glsl(imagetype), components, ">"); else if (type.image.sampled == 2) - return join("RWBuffer<", image_format_to_type(imagetype.image.format), components, ">"); + return join("RWBuffer<", image_format_to_type(type.image.format), ">"); else SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); case DimSubpassData: @@ -2745,6 +2801,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) auto *var = maybe_get_backing_variable(ops[2]); auto imgexpr = join(to_expression(ops[2]), "[", to_expression(ops[3]), "]"); + // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", + // except that the underlying type changes how the data is interpreted. + if (var) + imgexpr = remap_swizzle(get(result_type), + image_format_to_components(get(var->basetype).image.format), imgexpr); + if (var && var->forwardable) { auto &e = emit_op(result_type, id, imgexpr, true); @@ -2759,7 +2821,19 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpImageWrite: { auto *var = maybe_get_backing_variable(ops[0]); - statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", to_expression(ops[2]), ";"); + + // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", + // except that the underlying type changes how the data is interpreted. + auto value_expr = to_expression(ops[2]); + if (var) + { + auto &type = get(var->basetype); + auto narrowed_type = get(type.image.type); + narrowed_type.vecsize = image_format_to_components(type.image.format); + value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr); + } + + statement(to_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";"); if (var && variable_storage_is_aliased(*var)) flush_all_aliased_variables(); break; From 2ba7ed1c09ef12e54b4ded570f5c8c322fccd2de Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 24 Oct 2017 09:56:08 +0200 Subject: [PATCH 7/7] Fix tests after rebase. --- reference/shaders-hlsl/comp/shared.comp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reference/shaders-hlsl/comp/shared.comp b/reference/shaders-hlsl/comp/shared.comp index a344b859..40ba1e46 100644 --- a/reference/shaders-hlsl/comp/shared.comp +++ b/reference/shaders-hlsl/comp/shared.comp @@ -1,4 +1,4 @@ -const uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u); +static const uint3 gl_WorkGroupSize = uint3(4u, 1u, 1u); ByteAddressBuffer _22 : register(u0); RWByteAddressBuffer _44 : register(u1);