From 234f629e6662f342c9c835b929cbd87945523c4f Mon Sep 17 00:00:00 2001 From: Jamie Reece Wilson Date: Sat, 17 Aug 2024 16:04:48 +0100 Subject: [PATCH] [*] What the actual fuck is going on with this formatting? Just use spaces --- spirv_hlsl.cpp | 11828 +++++++++++++++++++++++------------------------ 1 file changed, 5914 insertions(+), 5914 deletions(-) diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 031b4d44..784a6373 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -33,364 +33,364 @@ using namespace std; enum class ImageFormatNormalizedState { - None = 0, - Unorm = 1, - Snorm = 2 + None = 0, + Unorm = 1, + Snorm = 2 }; static ImageFormatNormalizedState image_format_to_normalized_state(ImageFormat fmt) { - switch (fmt) - { - case ImageFormatR8: - case ImageFormatR16: - case ImageFormatRg8: - case ImageFormatRg16: - case ImageFormatRgba8: - case ImageFormatRgba16: - case ImageFormatRgb10A2: - return ImageFormatNormalizedState::Unorm; + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + case ImageFormatRg8: + case ImageFormatRg16: + case ImageFormatRgba8: + case ImageFormatRgba16: + case ImageFormatRgb10A2: + return ImageFormatNormalizedState::Unorm; - case ImageFormatR8Snorm: - case ImageFormatR16Snorm: - case ImageFormatRg8Snorm: - case ImageFormatRg16Snorm: - case ImageFormatRgba8Snorm: - case ImageFormatRgba16Snorm: - return ImageFormatNormalizedState::Snorm; + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + return ImageFormatNormalizedState::Snorm; - default: - break; - } + default: + break; + } - return ImageFormatNormalizedState::None; + return ImageFormatNormalizedState::None; } static unsigned image_format_to_components(ImageFormat fmt) { - switch (fmt) - { - case ImageFormatR8: - case ImageFormatR16: - case ImageFormatR8Snorm: - case ImageFormatR16Snorm: - case ImageFormatR16f: - case ImageFormatR32f: - case ImageFormatR8i: - case ImageFormatR16i: - case ImageFormatR32i: - case ImageFormatR8ui: - case ImageFormatR16ui: - case ImageFormatR32ui: - return 1; + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + case ImageFormatR16f: + case ImageFormatR32f: + case ImageFormatR8i: + case ImageFormatR16i: + case ImageFormatR32i: + case ImageFormatR8ui: + case ImageFormatR16ui: + case ImageFormatR32ui: + return 1; - case ImageFormatRg8: - case ImageFormatRg16: - case ImageFormatRg8Snorm: - case ImageFormatRg16Snorm: - case ImageFormatRg16f: - case ImageFormatRg32f: - case ImageFormatRg8i: - case ImageFormatRg16i: - case ImageFormatRg32i: - case ImageFormatRg8ui: - case ImageFormatRg16ui: - case ImageFormatRg32ui: - return 2; + case ImageFormatRg8: + case ImageFormatRg16: + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + case ImageFormatRg16f: + case ImageFormatRg32f: + case ImageFormatRg8i: + case ImageFormatRg16i: + case ImageFormatRg32i: + case ImageFormatRg8ui: + case ImageFormatRg16ui: + case ImageFormatRg32ui: + return 2; - case ImageFormatR11fG11fB10f: - return 3; + case ImageFormatR11fG11fB10f: + return 3; - case ImageFormatRgba8: - case ImageFormatRgba16: - case ImageFormatRgb10A2: - case ImageFormatRgba8Snorm: - case ImageFormatRgba16Snorm: - case ImageFormatRgba16f: - case ImageFormatRgba32f: - case ImageFormatRgba8i: - case ImageFormatRgba16i: - case ImageFormatRgba32i: - case ImageFormatRgba8ui: - case ImageFormatRgba16ui: - case ImageFormatRgba32ui: - case ImageFormatRgb10a2ui: - return 4; + case ImageFormatRgba8: + case ImageFormatRgba16: + case ImageFormatRgb10A2: + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + case ImageFormatRgba16f: + case ImageFormatRgba32f: + case ImageFormatRgba8i: + case ImageFormatRgba16i: + case ImageFormatRgba32i: + case ImageFormatRgba8ui: + case ImageFormatRgba16ui: + case ImageFormatRgba32ui: + case ImageFormatRgb10a2ui: + return 4; - case ImageFormatUnknown: - return 4; // Assume 4. + case ImageFormatUnknown: + return 4; // Assume 4. - default: - SPIRV_CROSS_THROW("Unrecognized typed image format."); - } + default: + SPIRV_CROSS_THROW("Unrecognized typed image format."); + } } static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype) { - switch (fmt) - { - case ImageFormatR8: - case ImageFormatR16: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "unorm float"; - case ImageFormatRg8: - case ImageFormatRg16: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "unorm float2"; - case ImageFormatRgba8: - case ImageFormatRgba16: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "unorm float4"; - case ImageFormatRgb10A2: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "unorm float4"; + switch (fmt) + { + case ImageFormatR8: + case ImageFormatR16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float"; + case ImageFormatRg8: + case ImageFormatRg16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float2"; + case ImageFormatRgba8: + case ImageFormatRgba16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float4"; + case ImageFormatRgb10A2: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "unorm float4"; - case ImageFormatR8Snorm: - case ImageFormatR16Snorm: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "snorm float"; - case ImageFormatRg8Snorm: - case ImageFormatRg16Snorm: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "snorm float2"; - case ImageFormatRgba8Snorm: - case ImageFormatRgba16Snorm: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "snorm float4"; + case ImageFormatR8Snorm: + case ImageFormatR16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "snorm float"; + case ImageFormatRg8Snorm: + case ImageFormatRg16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "snorm float2"; + case ImageFormatRgba8Snorm: + case ImageFormatRgba16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "snorm float4"; - case ImageFormatR16f: - case ImageFormatR32f: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "float"; - case ImageFormatRg16f: - case ImageFormatRg32f: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "float2"; - case ImageFormatRgba16f: - case ImageFormatRgba32f: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "float4"; + case ImageFormatR16f: + case ImageFormatR32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float"; + case ImageFormatRg16f: + case ImageFormatRg32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float2"; + case ImageFormatRgba16f: + case ImageFormatRgba32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float4"; - case ImageFormatR11fG11fB10f: - if (basetype != SPIRType::Float) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "float3"; + case ImageFormatR11fG11fB10f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "float3"; - case ImageFormatR8i: - case ImageFormatR16i: - case ImageFormatR32i: - if (basetype != SPIRType::Int) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "int"; - case ImageFormatRg8i: - case ImageFormatRg16i: - case ImageFormatRg32i: - if (basetype != SPIRType::Int) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "int2"; - case ImageFormatRgba8i: - case ImageFormatRgba16i: - case ImageFormatRgba32i: - if (basetype != SPIRType::Int) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "int4"; + case ImageFormatR8i: + case ImageFormatR16i: + case ImageFormatR32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "int"; + case ImageFormatRg8i: + case ImageFormatRg16i: + case ImageFormatRg32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "int2"; + case ImageFormatRgba8i: + case ImageFormatRgba16i: + case ImageFormatRgba32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "int4"; - case ImageFormatR8ui: - case ImageFormatR16ui: - case ImageFormatR32ui: - if (basetype != SPIRType::UInt) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "uint"; - case ImageFormatRg8ui: - case ImageFormatRg16ui: - case ImageFormatRg32ui: - if (basetype != SPIRType::UInt) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "uint2"; - case ImageFormatRgba8ui: - case ImageFormatRgba16ui: - case ImageFormatRgba32ui: - if (basetype != SPIRType::UInt) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "uint4"; - case ImageFormatRgb10a2ui: - if (basetype != SPIRType::UInt) - SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); - return "uint4"; + case ImageFormatR8ui: + case ImageFormatR16ui: + case ImageFormatR32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint"; + case ImageFormatRg8ui: + case ImageFormatRg16ui: + case ImageFormatRg32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint2"; + case ImageFormatRgba8ui: + case ImageFormatRgba16ui: + case ImageFormatRgba32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint4"; + case ImageFormatRgb10a2ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint4"; - case ImageFormatUnknown: - switch (basetype) - { - case SPIRType::Float: - return "float4"; - case SPIRType::Int: - return "int4"; - case SPIRType::UInt: - return "uint4"; - default: - SPIRV_CROSS_THROW("Unsupported base type for image."); - } + case ImageFormatUnknown: + switch (basetype) + { + case SPIRType::Float: + return "float4"; + case SPIRType::Int: + return "int4"; + case SPIRType::UInt: + return "uint4"; + default: + SPIRV_CROSS_THROW("Unsupported base type for image."); + } - default: - SPIRV_CROSS_THROW("Unrecognized typed image format."); - } + default: + SPIRV_CROSS_THROW("Unrecognized typed image format."); + } } string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id) { - auto &imagetype = get(type.image.type); - const char *dim = nullptr; - bool typed_load = false; - uint32_t components = 4; + auto &imagetype = get(type.image.type); + const char *dim = nullptr; + bool typed_load = false; + uint32_t components = 4; - bool force_image_srv = hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id, DecorationNonWritable); + bool force_image_srv = hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(id, DecorationNonWritable); - switch (type.image.dim) - { - case Dim1D: - typed_load = type.image.sampled == 2; - dim = "1D"; - break; - case Dim2D: - typed_load = type.image.sampled == 2; - dim = "2D"; - break; - case Dim3D: - typed_load = type.image.sampled == 2; - dim = "3D"; - break; - case DimCube: - if (type.image.sampled == 2) - SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL."); - dim = "Cube"; - break; - case DimRect: - SPIRV_CROSS_THROW("Rectangle texture support is not yet implemented for HLSL."); // TODO - case DimBuffer: - if (type.image.sampled == 1) - return join("Buffer<", type_to_glsl(imagetype), components, ">"); - else if (type.image.sampled == 2) - { - if (interlocked_resources.count(id)) - return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype), - ">"); + switch (type.image.dim) + { + case Dim1D: + typed_load = type.image.sampled == 2; + dim = "1D"; + break; + case Dim2D: + typed_load = type.image.sampled == 2; + dim = "2D"; + break; + case Dim3D: + typed_load = type.image.sampled == 2; + dim = "3D"; + break; + case DimCube: + if (type.image.sampled == 2) + SPIRV_CROSS_THROW("RWTextureCube does not exist in HLSL."); + dim = "Cube"; + break; + case DimRect: + SPIRV_CROSS_THROW("Rectangle texture support is not yet implemented for HLSL."); // TODO + case DimBuffer: + if (type.image.sampled == 1) + return join("Buffer<", type_to_glsl(imagetype), components, ">"); + else if (type.image.sampled == 2) + { + if (interlocked_resources.count(id)) + return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype), + ">"); - typed_load = !force_image_srv && type.image.sampled == 2; + typed_load = !force_image_srv && type.image.sampled == 2; - const char *rw = force_image_srv ? "" : "RW"; - return join(rw, "Buffer<", - typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : - join(type_to_glsl(imagetype), components), - ">"); - } - else - SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); - case DimSubpassData: - dim = "2D"; - typed_load = false; - break; - default: - SPIRV_CROSS_THROW("Invalid dimension."); - } - const char *arrayed = type.image.arrayed ? "Array" : ""; - const char *ms = type.image.ms ? "MS" : ""; - const char *rw = typed_load && !force_image_srv ? "RW" : ""; + const char *rw = force_image_srv ? "" : "RW"; + return join(rw, "Buffer<", + typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : + join(type_to_glsl(imagetype), components), + ">"); + } + else + SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); + case DimSubpassData: + dim = "2D"; + typed_load = false; + break; + default: + SPIRV_CROSS_THROW("Invalid dimension."); + } + const char *arrayed = type.image.arrayed ? "Array" : ""; + const char *ms = type.image.ms ? "MS" : ""; + const char *rw = typed_load && !force_image_srv ? "RW" : ""; - if (force_image_srv) - typed_load = false; + if (force_image_srv) + typed_load = false; - if (typed_load && interlocked_resources.count(id)) - rw = "RasterizerOrdered"; + if (typed_load && interlocked_resources.count(id)) + rw = "RasterizerOrdered"; - return join(rw, "Texture", dim, ms, arrayed, "<", - typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : - join(type_to_glsl(imagetype), components), - ">"); + return join(rw, "Texture", dim, ms, arrayed, "<", + typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : + join(type_to_glsl(imagetype), components), + ">"); } string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type, uint32_t /*id*/) { - auto &imagetype = get(type.image.type); - string res; + auto &imagetype = get(type.image.type); + string res; - switch (imagetype.basetype) - { - case SPIRType::Int: - res = "i"; - break; - case SPIRType::UInt: - res = "u"; - break; - default: - break; - } + switch (imagetype.basetype) + { + case SPIRType::Int: + res = "i"; + break; + case SPIRType::UInt: + res = "u"; + break; + default: + break; + } - if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) - return res + "subpassInput" + (type.image.ms ? "MS" : ""); + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + return res + "subpassInput" + (type.image.ms ? "MS" : ""); - // If we're emulating subpassInput with samplers, force sampler2D - // so we don't have to specify format. - if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) - { - // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. - if (type.image.dim == DimBuffer && type.image.sampled == 1) - res += "sampler"; - else - res += type.image.sampled == 2 ? "image" : "texture"; - } - else - res += "sampler"; + // If we're emulating subpassInput with samplers, force sampler2D + // so we don't have to specify format. + if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) + { + // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. + if (type.image.dim == DimBuffer && type.image.sampled == 1) + res += "sampler"; + else + res += type.image.sampled == 2 ? "image" : "texture"; + } + else + res += "sampler"; - switch (type.image.dim) - { - case Dim1D: - res += "1D"; - break; - case Dim2D: - res += "2D"; - break; - case Dim3D: - res += "3D"; - break; - case DimCube: - res += "CUBE"; - break; + switch (type.image.dim) + { + case Dim1D: + res += "1D"; + break; + case Dim2D: + res += "2D"; + break; + case Dim3D: + res += "3D"; + break; + case DimCube: + res += "CUBE"; + break; - case DimBuffer: - res += "Buffer"; - break; + case DimBuffer: + res += "Buffer"; + break; - case DimSubpassData: - res += "2D"; - break; - default: - SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported."); - } + case DimSubpassData: + res += "2D"; + break; + default: + SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported."); + } - if (type.image.ms) - res += "MS"; - if (type.image.arrayed) - res += "Array"; + if (type.image.ms) + res += "MS"; + if (type.image.arrayed) + res += "Array"; - return res; + return res; } string CompilerHLSL::image_type_hlsl(const SPIRType &type, uint32_t id) { - if (hlsl_options.shader_model <= 30) - return image_type_hlsl_legacy(type, id); - else - return image_type_hlsl_modern(type, id); + if (hlsl_options.shader_model <= 30) + return image_type_hlsl_legacy(type, id); + else + return image_type_hlsl_modern(type, id); } // The optional id parameter indicates the object whose type we are trying @@ -398,4973 +398,4973 @@ string CompilerHLSL::image_type_hlsl(const SPIRType &type, uint32_t id) // depend on a specific object's use of that type. string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id) { - // Ignore the pointer type since GLSL doesn't have pointers. + // Ignore the pointer type since GLSL doesn't have pointers. - switch (type.basetype) - { - case SPIRType::Struct: - // Need OpName lookup here to get a "sensible" name for a struct. - if (backend.explicit_struct_type) - return join("struct ", to_name(type.self)); - else - return to_name(type.self); + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + if (backend.explicit_struct_type) + return join("struct ", to_name(type.self)); + else + return to_name(type.self); - case SPIRType::Image: - case SPIRType::SampledImage: - return image_type_hlsl(type, id); + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_hlsl(type, id); - case SPIRType::Sampler: - return comparison_ids.count(id) ? "SamplerComparisonState" : "SamplerState"; + case SPIRType::Sampler: + return comparison_ids.count(id) ? "SamplerComparisonState" : "SamplerState"; - case SPIRType::Void: - return "void"; + case SPIRType::Void: + return "void"; - default: - break; - } + default: + break; + } - if (type.vecsize == 1 && type.columns == 1) // Scalar builtin - { - switch (type.basetype) - { - case SPIRType::Boolean: - return "bool"; - case SPIRType::Int: - return backend.basic_int_type; - case SPIRType::UInt: - return backend.basic_uint_type; - case SPIRType::AtomicCounter: - return "atomic_uint"; - case SPIRType::Half: - if (hlsl_options.enable_16bit_types) - return "half"; - else - return "min16float"; - case SPIRType::Short: - if (hlsl_options.enable_16bit_types) - return "int16_t"; - else - return "min16int"; - case SPIRType::UShort: - if (hlsl_options.enable_16bit_types) - return "uint16_t"; - else - return "min16uint"; - case SPIRType::Float: - return "float"; - case SPIRType::Double: - return "double"; - case SPIRType::Int64: - if (hlsl_options.shader_model < 60) - SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0."); - return "int64_t"; - case SPIRType::UInt64: - if (hlsl_options.shader_model < 60) - SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0."); - return "uint64_t"; - case SPIRType::AccelerationStructure: - return "RaytracingAccelerationStructure"; - case SPIRType::RayQuery: - return "RayQuery"; - default: - return "???"; - } - } - else if (type.vecsize > 1 && type.columns == 1) // Vector builtin - { - switch (type.basetype) - { - case SPIRType::Boolean: - return join("bool", type.vecsize); - case SPIRType::Int: - return join("int", type.vecsize); - case SPIRType::UInt: - return join("uint", type.vecsize); - case SPIRType::Half: - return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.vecsize); - case SPIRType::Short: - return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.vecsize); - case SPIRType::UShort: - return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.vecsize); - case SPIRType::Float: - return join("float", type.vecsize); - case SPIRType::Double: - return join("double", type.vecsize); - case SPIRType::Int64: - return join("i64vec", type.vecsize); - case SPIRType::UInt64: - return join("u64vec", type.vecsize); - default: - return "???"; - } - } - else - { - switch (type.basetype) - { - case SPIRType::Boolean: - return join("bool", type.columns, "x", type.vecsize); - case SPIRType::Int: - return join("int", type.columns, "x", type.vecsize); - case SPIRType::UInt: - return join("uint", type.columns, "x", type.vecsize); - case SPIRType::Half: - return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.columns, "x", type.vecsize); - case SPIRType::Short: - return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.columns, "x", type.vecsize); - case SPIRType::UShort: - return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.columns, "x", type.vecsize); - case SPIRType::Float: - return join("float", type.columns, "x", type.vecsize); - case SPIRType::Double: - return join("double", type.columns, "x", type.vecsize); - // Matrix types not supported for int64/uint64. - default: - return "???"; - } - } + if (type.vecsize == 1 && type.columns == 1) // Scalar builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return "bool"; + case SPIRType::Int: + return backend.basic_int_type; + case SPIRType::UInt: + return backend.basic_uint_type; + case SPIRType::AtomicCounter: + return "atomic_uint"; + case SPIRType::Half: + if (hlsl_options.enable_16bit_types) + return "half"; + else + return "min16float"; + case SPIRType::Short: + if (hlsl_options.enable_16bit_types) + return "int16_t"; + else + return "min16int"; + case SPIRType::UShort: + if (hlsl_options.enable_16bit_types) + return "uint16_t"; + else + return "min16uint"; + case SPIRType::Float: + return "float"; + case SPIRType::Double: + return "double"; + case SPIRType::Int64: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0."); + return "int64_t"; + case SPIRType::UInt64: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("64-bit integers only supported in SM 6.0."); + return "uint64_t"; + case SPIRType::AccelerationStructure: + return "RaytracingAccelerationStructure"; + case SPIRType::RayQuery: + return "RayQuery"; + default: + return "???"; + } + } + else if (type.vecsize > 1 && type.columns == 1) // Vector builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bool", type.vecsize); + case SPIRType::Int: + return join("int", type.vecsize); + case SPIRType::UInt: + return join("uint", type.vecsize); + case SPIRType::Half: + return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.vecsize); + case SPIRType::Short: + return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.vecsize); + case SPIRType::UShort: + return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.vecsize); + case SPIRType::Float: + return join("float", type.vecsize); + case SPIRType::Double: + return join("double", type.vecsize); + case SPIRType::Int64: + return join("i64vec", type.vecsize); + case SPIRType::UInt64: + return join("u64vec", type.vecsize); + default: + return "???"; + } + } + else + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bool", type.columns, "x", type.vecsize); + case SPIRType::Int: + return join("int", type.columns, "x", type.vecsize); + case SPIRType::UInt: + return join("uint", type.columns, "x", type.vecsize); + case SPIRType::Half: + return join(hlsl_options.enable_16bit_types ? "half" : "min16float", type.columns, "x", type.vecsize); + case SPIRType::Short: + return join(hlsl_options.enable_16bit_types ? "int16_t" : "min16int", type.columns, "x", type.vecsize); + case SPIRType::UShort: + return join(hlsl_options.enable_16bit_types ? "uint16_t" : "min16uint", type.columns, "x", type.vecsize); + case SPIRType::Float: + return join("float", type.columns, "x", type.vecsize); + case SPIRType::Double: + return join("double", type.columns, "x", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } } void CompilerHLSL::emit_header() { - for (auto &header : header_lines) - statement(header); + for (auto &header : header_lines) + statement(header); - if (header_lines.size() > 0) - { - statement(""); - } + if (header_lines.size() > 0) + { + statement(""); + } } void CompilerHLSL::emit_interface_block_globally(const SPIRVariable &var) { - add_resource_name(var.self); + add_resource_name(var.self); - // The global copies of I/O variables should not contain interpolation qualifiers. - // These are emitted inside the interface structs. - auto &flags = ir.meta[var.self].decoration.decoration_flags; - auto old_flags = flags; - flags.reset(); - statement("static ", variable_decl(var), ";"); - flags = old_flags; + // The global copies of I/O variables should not contain interpolation qualifiers. + // These are emitted inside the interface structs. + auto &flags = ir.meta[var.self].decoration.decoration_flags; + auto old_flags = flags; + flags.reset(); + statement("static ", variable_decl(var), ";"); + flags = old_flags; } const char *CompilerHLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) { - // Input and output variables are handled specially in HLSL backend. - // The variables are declared as global, private variables, and do not need any qualifiers. - if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || - var.storage == StorageClassPushConstant) - { - return "uniform "; - } + // Input and output variables are handled specially in HLSL backend. + // The variables are declared as global, private variables, and do not need any qualifiers. + if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || + var.storage == StorageClassPushConstant) + { + return "uniform "; + } - return ""; + return ""; } void CompilerHLSL::emit_builtin_outputs_in_struct() { - auto &execution = get_entry_point(); + auto &execution = get_entry_point(); - bool legacy = hlsl_options.shader_model <= 30; - active_output_builtins.for_each_bit([&](uint32_t i) { - const char *type = nullptr; - const char *semantic = nullptr; - auto builtin = static_cast(i); - switch (builtin) - { - case BuiltInPosition: - type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4"; - semantic = legacy ? "POSITION" : "SV_Position"; - break; + bool legacy = hlsl_options.shader_model <= 30; + active_output_builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + const char *semantic = nullptr; + auto builtin = static_cast(i); + switch (builtin) + { + case BuiltInPosition: + type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4"; + semantic = legacy ? "POSITION" : "SV_Position"; + break; - case BuiltInSampleMask: - if (hlsl_options.shader_model < 41 || execution.model != ExecutionModelFragment) - SPIRV_CROSS_THROW("Sample Mask output is only supported in PS 4.1 or higher."); - type = "uint"; - semantic = "SV_Coverage"; - break; + case BuiltInSampleMask: + if (hlsl_options.shader_model < 41 || execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Sample Mask output is only supported in PS 4.1 or higher."); + type = "uint"; + semantic = "SV_Coverage"; + break; - case BuiltInFragDepth: - type = "float"; - if (legacy) - { - semantic = "DEPTH"; - } - else - { - if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthGreater)) - semantic = "SV_DepthGreaterEqual"; - else if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthLess)) - semantic = "SV_DepthLessEqual"; - else - semantic = "SV_Depth"; - } - break; + case BuiltInFragDepth: + type = "float"; + if (legacy) + { + semantic = "DEPTH"; + } + else + { + if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthGreater)) + semantic = "SV_DepthGreaterEqual"; + else if (hlsl_options.shader_model >= 50 && execution.flags.get(ExecutionModeDepthLess)) + semantic = "SV_DepthLessEqual"; + else + semantic = "SV_Depth"; + } + break; - case BuiltInClipDistance: - { - static const char *types[] = { "float", "float2", "float3", "float4" }; + case BuiltInClipDistance: + { + static const char *types[] = { "float", "float2", "float3", "float4" }; - // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. - if (execution.model == ExecutionModelMeshEXT) - { - if (clip_distance_count > 4) - SPIRV_CROSS_THROW("Clip distance count > 4 not supported for mesh shaders."); + // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. + if (execution.model == ExecutionModelMeshEXT) + { + if (clip_distance_count > 4) + SPIRV_CROSS_THROW("Clip distance count > 4 not supported for mesh shaders."); - if (clip_distance_count == 1) - { - // Avoids having to hack up access_chain code. Makes it trivially indexable. - statement("float gl_ClipDistance[1] : SV_ClipDistance;"); - } - else - { - // Replace array with vector directly, avoids any weird fixup path. - statement(types[clip_distance_count - 1], " gl_ClipDistance : SV_ClipDistance;"); - } - } - else - { - for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) - { - uint32_t to_declare = clip_distance_count - clip; - if (to_declare > 4) - to_declare = 4; + if (clip_distance_count == 1) + { + // Avoids having to hack up access_chain code. Makes it trivially indexable. + statement("float gl_ClipDistance[1] : SV_ClipDistance;"); + } + else + { + // Replace array with vector directly, avoids any weird fixup path. + statement(types[clip_distance_count - 1], " gl_ClipDistance : SV_ClipDistance;"); + } + } + else + { + for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + { + uint32_t to_declare = clip_distance_count - clip; + if (to_declare > 4) + to_declare = 4; - uint32_t semantic_index = clip / 4; + uint32_t semantic_index = clip / 4; - statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, - " : SV_ClipDistance", semantic_index, ";"); - } - } - break; - } + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_ClipDistance", semantic_index, ";"); + } + } + break; + } - case BuiltInCullDistance: - { - static const char *types[] = { "float", "float2", "float3", "float4" }; + case BuiltInCullDistance: + { + static const char *types[] = { "float", "float2", "float3", "float4" }; - // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. - if (execution.model == ExecutionModelMeshEXT) - { - if (cull_distance_count > 4) - SPIRV_CROSS_THROW("Cull distance count > 4 not supported for mesh shaders."); + // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. + if (execution.model == ExecutionModelMeshEXT) + { + if (cull_distance_count > 4) + SPIRV_CROSS_THROW("Cull distance count > 4 not supported for mesh shaders."); - if (cull_distance_count == 1) - { - // Avoids having to hack up access_chain code. Makes it trivially indexable. - statement("float gl_CullDistance[1] : SV_CullDistance;"); - } - else - { - // Replace array with vector directly, avoids any weird fixup path. - statement(types[cull_distance_count - 1], " gl_CullDistance : SV_CullDistance;"); - } - } - else - { - for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) - { - uint32_t to_declare = cull_distance_count - cull; - if (to_declare > 4) - to_declare = 4; + if (cull_distance_count == 1) + { + // Avoids having to hack up access_chain code. Makes it trivially indexable. + statement("float gl_CullDistance[1] : SV_CullDistance;"); + } + else + { + // Replace array with vector directly, avoids any weird fixup path. + statement(types[cull_distance_count - 1], " gl_CullDistance : SV_CullDistance;"); + } + } + else + { + for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + { + uint32_t to_declare = cull_distance_count - cull; + if (to_declare > 4) + to_declare = 4; - uint32_t semantic_index = cull / 4; + uint32_t semantic_index = cull / 4; - statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, - " : SV_CullDistance", semantic_index, ";"); - } - } - break; - } + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassOutput), semantic_index, + " : SV_CullDistance", semantic_index, ";"); + } + } + break; + } - case BuiltInPointSize: - // If point_size_compat is enabled, just ignore PointSize. - // PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders, - // even if it means working around the missing feature. - if (legacy) - { - type = "float"; - semantic = "PSIZE"; - } - else if (!hlsl_options.point_size_compat) - SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); - break; + case BuiltInPointSize: + // If point_size_compat is enabled, just ignore PointSize. + // PointSize does not exist in HLSL, but some code bases might want to be able to use these shaders, + // even if it means working around the missing feature. + if (legacy) + { + type = "float"; + semantic = "PSIZE"; + } + else if (!hlsl_options.point_size_compat) + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + break; - case BuiltInLayer: - case BuiltInPrimitiveId: - case BuiltInViewportIndex: - case BuiltInPrimitiveShadingRateKHR: - case BuiltInCullPrimitiveEXT: - // per-primitive attributes handled separatly - break; + case BuiltInLayer: + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInCullPrimitiveEXT: + // per-primitive attributes handled separatly + break; - case BuiltInPrimitivePointIndicesEXT: - case BuiltInPrimitiveLineIndicesEXT: - case BuiltInPrimitiveTriangleIndicesEXT: - // meshlet local-index buffer handled separatly - break; + case BuiltInPrimitivePointIndicesEXT: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInPrimitiveTriangleIndicesEXT: + // meshlet local-index buffer handled separatly + break; - default: - SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); - } + default: + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + } - if (type && semantic) - statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";"); - }); + if (type && semantic) + statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";"); + }); } void CompilerHLSL::emit_builtin_primitive_outputs_in_struct() { - active_output_builtins.for_each_bit([&](uint32_t i) { - const char *type = nullptr; - const char *semantic = nullptr; - auto builtin = static_cast(i); - switch (builtin) - { - case BuiltInLayer: - { - if (hlsl_options.shader_model < 50) - SPIRV_CROSS_THROW("Render target array index output is only supported in SM 5.0 or higher."); - type = "uint"; - semantic = "SV_RenderTargetArrayIndex"; - break; - } + active_output_builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + const char *semantic = nullptr; + auto builtin = static_cast(i); + switch (builtin) + { + case BuiltInLayer: + { + if (hlsl_options.shader_model < 50) + SPIRV_CROSS_THROW("Render target array index output is only supported in SM 5.0 or higher."); + type = "uint"; + semantic = "SV_RenderTargetArrayIndex"; + break; + } - case BuiltInPrimitiveId: - type = "uint"; - semantic = "SV_PrimitiveID"; - break; + case BuiltInPrimitiveId: + type = "uint"; + semantic = "SV_PrimitiveID"; + break; - case BuiltInViewportIndex: - type = "uint"; - semantic = "SV_ViewportArrayIndex"; - break; + case BuiltInViewportIndex: + type = "uint"; + semantic = "SV_ViewportArrayIndex"; + break; - case BuiltInPrimitiveShadingRateKHR: - type = "uint"; - semantic = "SV_ShadingRate"; - break; + case BuiltInPrimitiveShadingRateKHR: + type = "uint"; + semantic = "SV_ShadingRate"; + break; - case BuiltInCullPrimitiveEXT: - type = "bool"; - semantic = "SV_CullPrimitive"; - break; + case BuiltInCullPrimitiveEXT: + type = "bool"; + semantic = "SV_CullPrimitive"; + break; - default: - break; - } + default: + break; + } - if (type && semantic) - statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";"); - }); + if (type && semantic) + statement(type, " ", builtin_to_glsl(builtin, StorageClassOutput), " : ", semantic, ";"); + }); } void CompilerHLSL::emit_builtin_inputs_in_struct() { - bool legacy = hlsl_options.shader_model <= 30; - active_input_builtins.for_each_bit([&](uint32_t i) { - const char *type = nullptr; - const char *semantic = nullptr; - auto builtin = static_cast(i); - switch (builtin) - { - case BuiltInFragCoord: - type = "float4"; - semantic = legacy ? "VPOS" : "SV_Position"; - break; + bool legacy = hlsl_options.shader_model <= 30; + active_input_builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + const char *semantic = nullptr; + auto builtin = static_cast(i); + switch (builtin) + { + case BuiltInFragCoord: + type = "float4"; + semantic = legacy ? "VPOS" : "SV_Position"; + break; - case BuiltInVertexId: - case BuiltInVertexIndex: - if (legacy) - SPIRV_CROSS_THROW("Vertex index not supported in SM 3.0 or lower."); - type = "uint"; - semantic = "SV_VertexID"; - break; - - case BuiltInPrimitiveId: - if (legacy) - SPIRV_CROSS_THROW("Primitive ID not supported in SM 3.0 or lower."); - - type = "uint"; - semantic = "SV_PrimitiveID"; - break; - - case BuiltInInstanceId: - case BuiltInInstanceIndex: - if (legacy) - SPIRV_CROSS_THROW("Instance index not supported in SM 3.0 or lower."); - type = "uint"; - semantic = "SV_InstanceID"; - break; - - case BuiltInSampleId: - if (legacy) - SPIRV_CROSS_THROW("Sample ID not supported in SM 3.0 or lower."); - type = "uint"; - semantic = "SV_SampleIndex"; - break; - - case BuiltInSampleMask: - if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) - SPIRV_CROSS_THROW("Sample Mask input is only supported in PS 5.0 or higher."); - type = "uint"; - semantic = "SV_Coverage"; - break; - - case BuiltInGlobalInvocationId: - type = "uint3"; - semantic = "SV_DispatchThreadID"; - break; - - case BuiltInLocalInvocationId: - type = "uint3"; - semantic = "SV_GroupThreadID"; - break; - - case BuiltInLocalInvocationIndex: - type = "uint"; - semantic = "SV_GroupIndex"; - break; - - case BuiltInWorkgroupId: - type = "uint3"; - semantic = "SV_GroupID"; - break; - - case BuiltInFrontFacing: + case BuiltInVertexId: + case BuiltInVertexIndex: if (legacy) - { - type = "float"; - semantic = "VFACE"; + SPIRV_CROSS_THROW("Vertex index not supported in SM 3.0 or lower."); + type = "uint"; + semantic = "SV_VertexID"; + break; + + case BuiltInPrimitiveId: + if (legacy) + SPIRV_CROSS_THROW("Primitive ID not supported in SM 3.0 or lower."); + + type = "uint"; + semantic = "SV_PrimitiveID"; + break; + + case BuiltInInstanceId: + case BuiltInInstanceIndex: + if (legacy) + SPIRV_CROSS_THROW("Instance index not supported in SM 3.0 or lower."); + type = "uint"; + semantic = "SV_InstanceID"; + break; + + case BuiltInSampleId: + if (legacy) + SPIRV_CROSS_THROW("Sample ID not supported in SM 3.0 or lower."); + type = "uint"; + semantic = "SV_SampleIndex"; + break; + + case BuiltInSampleMask: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Sample Mask input is only supported in PS 5.0 or higher."); + type = "uint"; + semantic = "SV_Coverage"; + break; + + case BuiltInGlobalInvocationId: + type = "uint3"; + semantic = "SV_DispatchThreadID"; + break; + + case BuiltInLocalInvocationId: + type = "uint3"; + semantic = "SV_GroupThreadID"; + break; + + case BuiltInLocalInvocationIndex: + type = "uint"; + semantic = "SV_GroupIndex"; + break; + + case BuiltInWorkgroupId: + type = "uint3"; + semantic = "SV_GroupID"; + break; + + case BuiltInFrontFacing: + if (legacy) + { + type = "float"; + semantic = "VFACE"; } else - { - type = "bool"; - semantic = "SV_IsFrontFace"; - } - break; + { + type = "bool"; + semantic = "SV_IsFrontFace"; + } + break; - case BuiltInViewIndex: - if (hlsl_options.shader_model < 61 || (get_entry_point().model != ExecutionModelVertex && get_entry_point().model != ExecutionModelFragment)) - SPIRV_CROSS_THROW("View Index input is only supported in VS and PS 6.1 or higher."); - type = "uint"; - semantic = "SV_ViewID"; - break; + case BuiltInViewIndex: + if (hlsl_options.shader_model < 61 || (get_entry_point().model != ExecutionModelVertex && get_entry_point().model != ExecutionModelFragment)) + SPIRV_CROSS_THROW("View Index input is only supported in VS and PS 6.1 or higher."); + type = "uint"; + semantic = "SV_ViewID"; + break; - case BuiltInNumWorkgroups: - case BuiltInSubgroupSize: - case BuiltInSubgroupLocalInvocationId: - case BuiltInSubgroupEqMask: - case BuiltInSubgroupLtMask: - case BuiltInSubgroupLeMask: - case BuiltInSubgroupGtMask: - case BuiltInSubgroupGeMask: - // Handled specially. - break; + case BuiltInNumWorkgroups: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + case BuiltInSubgroupEqMask: + case BuiltInSubgroupLtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupGeMask: + // Handled specially. + break; - case BuiltInBaseVertex: - if (hlsl_options.shader_model >= 68) - { - type = "uint"; - semantic = "SV_StartVertexLocation"; - } - break; + case BuiltInBaseVertex: + if (hlsl_options.shader_model >= 68) + { + type = "uint"; + semantic = "SV_StartVertexLocation"; + } + break; - case BuiltInBaseInstance: - if (hlsl_options.shader_model >= 68) - { - type = "uint"; - semantic = "SV_StartInstanceLocation"; - } - break; + case BuiltInBaseInstance: + if (hlsl_options.shader_model >= 68) + { + type = "uint"; + semantic = "SV_StartInstanceLocation"; + } + break; - case BuiltInHelperInvocation: - if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) - SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher."); - break; + case BuiltInHelperInvocation: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher."); + break; - case BuiltInClipDistance: - // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. - for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) - { - uint32_t to_declare = clip_distance_count - clip; - if (to_declare > 4) - to_declare = 4; + case BuiltInClipDistance: + // HLSL is a bit weird here, use SV_ClipDistance0, SV_ClipDistance1 and so on with vectors. + for (uint32_t clip = 0; clip < clip_distance_count; clip += 4) + { + uint32_t to_declare = clip_distance_count - clip; + if (to_declare > 4) + to_declare = 4; - uint32_t semantic_index = clip / 4; + uint32_t semantic_index = clip / 4; - static const char *types[] = { "float", "float2", "float3", "float4" }; - statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index, - " : SV_ClipDistance", semantic_index, ";"); - } - break; + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index, + " : SV_ClipDistance", semantic_index, ";"); + } + break; - case BuiltInCullDistance: - // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. - for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) - { - uint32_t to_declare = cull_distance_count - cull; - if (to_declare > 4) - to_declare = 4; + case BuiltInCullDistance: + // HLSL is a bit weird here, use SV_CullDistance0, SV_CullDistance1 and so on with vectors. + for (uint32_t cull = 0; cull < cull_distance_count; cull += 4) + { + uint32_t to_declare = cull_distance_count - cull; + if (to_declare > 4) + to_declare = 4; - uint32_t semantic_index = cull / 4; + uint32_t semantic_index = cull / 4; - static const char *types[] = { "float", "float2", "float3", "float4" }; - statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index, - " : SV_CullDistance", semantic_index, ";"); - } - break; + static const char *types[] = { "float", "float2", "float3", "float4" }; + statement(types[to_declare - 1], " ", builtin_to_glsl(builtin, StorageClassInput), semantic_index, + " : SV_CullDistance", semantic_index, ";"); + } + break; - case BuiltInPointCoord: - // PointCoord is not supported, but provide a way to just ignore that, similar to PointSize. - if (hlsl_options.point_coord_compat) - break; - else - SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + case BuiltInPointCoord: + // PointCoord is not supported, but provide a way to just ignore that, similar to PointSize. + if (hlsl_options.point_coord_compat) + break; + else + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); - case BuiltInLayer: - if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) - SPIRV_CROSS_THROW("Render target array index input is only supported in PS 5.0 or higher."); - type = "uint"; - semantic = "SV_RenderTargetArrayIndex"; - break; + case BuiltInLayer: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Render target array index input is only supported in PS 5.0 or higher."); + type = "uint"; + semantic = "SV_RenderTargetArrayIndex"; + break; - default: - SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); - } + default: + SPIRV_CROSS_THROW("Unsupported builtin in HLSL."); + } - if (type && semantic) - statement(type, " ", builtin_to_glsl(builtin, StorageClassInput), " : ", semantic, ";"); - }); + if (type && semantic) + statement(type, " ", builtin_to_glsl(builtin, StorageClassInput), " : ", semantic, ";"); + }); } uint32_t CompilerHLSL::type_to_consumed_locations(const SPIRType &type) const { - // TODO: Need to verify correctness. - uint32_t elements = 0; + // TODO: Need to verify correctness. + uint32_t elements = 0; - if (type.basetype == SPIRType::Struct) - { - for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) - elements += type_to_consumed_locations(get(type.member_types[i])); - } - else - { - uint32_t array_multiplier = 1; - for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) - { - if (type.array_size_literal[i]) - array_multiplier *= type.array[i]; - else - array_multiplier *= evaluate_constant_u32(type.array[i]); - } - elements += array_multiplier * type.columns; - } - return elements; + if (type.basetype == SPIRType::Struct) + { + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + elements += type_to_consumed_locations(get(type.member_types[i])); + } + else + { + uint32_t array_multiplier = 1; + for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) + { + if (type.array_size_literal[i]) + array_multiplier *= type.array[i]; + else + array_multiplier *= evaluate_constant_u32(type.array[i]); + } + elements += array_multiplier * type.columns; + } + return elements; } string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags) { - string res; - //if (flags & (1ull << DecorationSmooth)) - // res += "linear "; - if (flags.get(DecorationFlat)) - res += "nointerpolation "; - if (flags.get(DecorationNoPerspective)) - res += "noperspective "; - if (flags.get(DecorationCentroid)) - res += "centroid "; - if (flags.get(DecorationPatch)) - res += "patch "; // Seems to be different in actual HLSL. - if (flags.get(DecorationSample)) - res += "sample "; - if (flags.get(DecorationInvariant) && backend.support_precise_qualifier) - res += "precise "; // Not supported? + string res; + //if (flags & (1ull << DecorationSmooth)) + // res += "linear "; + if (flags.get(DecorationFlat)) + res += "nointerpolation "; + if (flags.get(DecorationNoPerspective)) + res += "noperspective "; + if (flags.get(DecorationCentroid)) + res += "centroid "; + if (flags.get(DecorationPatch)) + res += "patch "; // Seems to be different in actual HLSL. + if (flags.get(DecorationSample)) + res += "sample "; + if (flags.get(DecorationInvariant) && backend.support_precise_qualifier) + res += "precise "; // Not supported? - return res; + return res; } std::string CompilerHLSL::to_semantic(uint32_t location, ExecutionModel em, StorageClass sc) { - if (em == ExecutionModelVertex && sc == StorageClassInput) - { - // We have a vertex attribute - we should look at remapping it if the user provided - // vertex attribute hints. - for (auto &attribute : remap_vertex_attributes) - if (attribute.location == location) - return attribute.semantic; - } + if (em == ExecutionModelVertex && sc == StorageClassInput) + { + // We have a vertex attribute - we should look at remapping it if the user provided + // vertex attribute hints. + for (auto &attribute : remap_vertex_attributes) + if (attribute.location == location) + return attribute.semantic; + } - // Not a vertex attribute, or no remap_vertex_attributes entry. - return join("TEXCOORD", location); + // Not a vertex attribute, or no remap_vertex_attributes entry. + return join("TEXCOORD", location); } std::string CompilerHLSL::to_initializer_expression(const SPIRVariable &var) { - // We cannot emit static const initializer for block constants for practical reasons, - // so just inline the initializer. - // FIXME: There is a theoretical problem here if someone tries to composite extract - // into this initializer since we don't declare it properly, but that is somewhat non-sensical. - auto &type = get(var.basetype); - bool is_block = has_decoration(type.self, DecorationBlock); - auto *c = maybe_get(var.initializer); - if (is_block && c) - return constant_expression(*c); - else - return CompilerGLSL::to_initializer_expression(var); + // We cannot emit static const initializer for block constants for practical reasons, + // so just inline the initializer. + // FIXME: There is a theoretical problem here if someone tries to composite extract + // into this initializer since we don't declare it properly, but that is somewhat non-sensical. + auto &type = get(var.basetype); + bool is_block = has_decoration(type.self, DecorationBlock); + auto *c = maybe_get(var.initializer); + if (is_block && c) + return constant_expression(*c); + else + return CompilerGLSL::to_initializer_expression(var); } void CompilerHLSL::emit_interface_block_member_in_struct(const SPIRVariable &var, uint32_t member_index, uint32_t location, std::unordered_set &active_locations) { - auto &execution = get_entry_point(); - auto type = get(var.basetype); - auto semantic = to_semantic(location, execution.model, var.storage); - auto mbr_name = join(to_name(type.self), "_", to_member_name(type, member_index)); - auto &mbr_type = get(type.member_types[member_index]); + auto &execution = get_entry_point(); + auto type = get(var.basetype); + auto semantic = to_semantic(location, execution.model, var.storage); + auto mbr_name = join(to_name(type.self), "_", to_member_name(type, member_index)); + auto &mbr_type = get(type.member_types[member_index]); - statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, member_index)), - type_to_glsl(mbr_type), - " ", mbr_name, type_to_array_glsl(mbr_type, var.self), - " : ", semantic, ";"); + statement(to_interpolation_qualifiers(get_member_decoration_bitset(type.self, member_index)), + type_to_glsl(mbr_type), + " ", mbr_name, type_to_array_glsl(mbr_type, var.self), + " : ", semantic, ";"); - // Structs and arrays should consume more locations. - uint32_t consumed_locations = type_to_consumed_locations(mbr_type); - for (uint32_t i = 0; i < consumed_locations; i++) - active_locations.insert(location + i); + // Structs and arrays should consume more locations. + uint32_t consumed_locations = type_to_consumed_locations(mbr_type); + for (uint32_t i = 0; i < consumed_locations; i++) + active_locations.insert(location + i); } void CompilerHLSL::emit_interface_block_in_struct(const SPIRVariable &var, unordered_set &active_locations) { - auto &execution = get_entry_point(); - auto type = get(var.basetype); + auto &execution = get_entry_point(); + auto type = get(var.basetype); - string binding; - bool use_location_number = true; - bool need_matrix_unroll = false; - bool legacy = hlsl_options.shader_model <= 30; - if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) - { - // Dual-source blending is achieved in HLSL by emitting to SV_Target0 and 1. - uint32_t index = get_decoration(var.self, DecorationIndex); - uint32_t location = get_decoration(var.self, DecorationLocation); + string binding; + bool use_location_number = true; + bool need_matrix_unroll = false; + bool legacy = hlsl_options.shader_model <= 30; + if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) + { + // Dual-source blending is achieved in HLSL by emitting to SV_Target0 and 1. + uint32_t index = get_decoration(var.self, DecorationIndex); + uint32_t location = get_decoration(var.self, DecorationLocation); - if (index != 0 && location != 0) - SPIRV_CROSS_THROW("Dual-source blending is only supported on MRT #0 in HLSL."); + if (index != 0 && location != 0) + SPIRV_CROSS_THROW("Dual-source blending is only supported on MRT #0 in HLSL."); - binding = join(legacy ? "COLOR" : "SV_Target", location + index); - use_location_number = false; - if (legacy) // COLOR must be a four-component vector on legacy shader model targets (HLSL ERR_COLOR_4COMP) - type.vecsize = 4; - } - else if (var.storage == StorageClassInput && execution.model == ExecutionModelVertex) - { - need_matrix_unroll = true; - if (legacy) // Inputs must be floating-point in legacy targets. - type.basetype = SPIRType::Float; - } + binding = join(legacy ? "COLOR" : "SV_Target", location + index); + use_location_number = false; + if (legacy) // COLOR must be a four-component vector on legacy shader model targets (HLSL ERR_COLOR_4COMP) + type.vecsize = 4; + } + else if (var.storage == StorageClassInput && execution.model == ExecutionModelVertex) + { + need_matrix_unroll = true; + if (legacy) // Inputs must be floating-point in legacy targets. + type.basetype = SPIRType::Float; + } - const auto get_vacant_location = [&]() -> uint32_t { - for (uint32_t i = 0; i < 64; i++) - if (!active_locations.count(i)) - return i; - SPIRV_CROSS_THROW("All locations from 0 to 63 are exhausted."); - }; + const auto get_vacant_location = [&]() -> uint32_t { + for (uint32_t i = 0; i < 64; i++) + if (!active_locations.count(i)) + return i; + SPIRV_CROSS_THROW("All locations from 0 to 63 are exhausted."); + }; - auto name = to_name(var.self); - if (use_location_number) - { - uint32_t location_number; + auto name = to_name(var.self); + if (use_location_number) + { + uint32_t location_number; - // If an explicit location exists, use it with TEXCOORD[N] semantic. - // Otherwise, pick a vacant location. - if (has_decoration(var.self, DecorationLocation)) - location_number = get_decoration(var.self, DecorationLocation); - else - location_number = get_vacant_location(); + // If an explicit location exists, use it with TEXCOORD[N] semantic. + // Otherwise, pick a vacant location. + if (has_decoration(var.self, DecorationLocation)) + location_number = get_decoration(var.self, DecorationLocation); + else + location_number = get_vacant_location(); - // Allow semantic remap if specified. - auto semantic = to_semantic(location_number, execution.model, var.storage); + // Allow semantic remap if specified. + auto semantic = to_semantic(location_number, execution.model, var.storage); - if (need_matrix_unroll && type.columns > 1) - { - if (!type.array.empty()) - SPIRV_CROSS_THROW("Arrays of matrices used as input/output. This is not supported."); + if (need_matrix_unroll && type.columns > 1) + { + if (!type.array.empty()) + SPIRV_CROSS_THROW("Arrays of matrices used as input/output. This is not supported."); - // Unroll matrices. - for (uint32_t i = 0; i < type.columns; i++) - { - SPIRType newtype = type; - newtype.columns = 1; + // Unroll matrices. + for (uint32_t i = 0; i < type.columns; i++) + { + SPIRType newtype = type; + newtype.columns = 1; - string effective_semantic; - if (hlsl_options.flatten_matrix_vertex_input_semantics) - effective_semantic = to_semantic(location_number, execution.model, var.storage); - else - effective_semantic = join(semantic, "_", i); + string effective_semantic; + if (hlsl_options.flatten_matrix_vertex_input_semantics) + effective_semantic = to_semantic(location_number, execution.model, var.storage); + else + effective_semantic = join(semantic, "_", i); - statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), - variable_decl(newtype, join(name, "_", i)), " : ", effective_semantic, ";"); - active_locations.insert(location_number++); - } - } - else - { - auto decl_type = type; - if (execution.model == ExecutionModelMeshEXT) - { - decl_type.array.erase(decl_type.array.begin()); - decl_type.array_size_literal.erase(decl_type.array_size_literal.begin()); - } - statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(decl_type, name), " : ", - semantic, ";"); + statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), + variable_decl(newtype, join(name, "_", i)), " : ", effective_semantic, ";"); + active_locations.insert(location_number++); + } + } + else + { + auto decl_type = type; + if (execution.model == ExecutionModelMeshEXT) + { + decl_type.array.erase(decl_type.array.begin()); + decl_type.array_size_literal.erase(decl_type.array_size_literal.begin()); + } + statement(to_interpolation_qualifiers(get_decoration_bitset(var.self)), variable_decl(decl_type, name), " : ", + semantic, ";"); - // Structs and arrays should consume more locations. - uint32_t consumed_locations = type_to_consumed_locations(decl_type); - for (uint32_t i = 0; i < consumed_locations; i++) - active_locations.insert(location_number + i); - } - } - else - { - statement(variable_decl(type, name), " : ", binding, ";"); - } + // Structs and arrays should consume more locations. + uint32_t consumed_locations = type_to_consumed_locations(decl_type); + for (uint32_t i = 0; i < consumed_locations; i++) + active_locations.insert(location_number + i); + } + } + else + { + statement(variable_decl(type, name), " : ", binding, ";"); + } } std::string CompilerHLSL::builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) { - switch (builtin) - { - case BuiltInVertexId: - return "gl_VertexID"; - case BuiltInInstanceId: - return "gl_InstanceID"; - case BuiltInNumWorkgroups: - { - if (!num_workgroups_builtin) - SPIRV_CROSS_THROW("NumWorkgroups builtin is used, but remap_num_workgroups_builtin() was not called. " - "Cannot emit code for this builtin."); + switch (builtin) + { + case BuiltInVertexId: + return "gl_VertexID"; + case BuiltInInstanceId: + return "gl_InstanceID"; + case BuiltInNumWorkgroups: + { + if (!num_workgroups_builtin) + SPIRV_CROSS_THROW("NumWorkgroups builtin is used, but remap_num_workgroups_builtin() was not called. " + "Cannot emit code for this builtin."); - auto &var = get(num_workgroups_builtin); - auto &type = get(var.basetype); - auto ret = join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0)); - ParsedIR::sanitize_underscores(ret); - return ret; - } - case BuiltInPointCoord: - // Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set. - return "float2(0.5f, 0.5f)"; - case BuiltInSubgroupLocalInvocationId: - return "WaveGetLaneIndex()"; - case BuiltInSubgroupSize: - return "WaveGetLaneCount()"; - case BuiltInHelperInvocation: - return "IsHelperLane()"; + auto &var = get(num_workgroups_builtin); + auto &type = get(var.basetype); + auto ret = join(to_name(num_workgroups_builtin), "_", get_member_name(type.self, 0)); + ParsedIR::sanitize_underscores(ret); + return ret; + } + case BuiltInPointCoord: + // Crude hack, but there is no real alternative. This path is only enabled if point_coord_compat is set. + return "float2(0.5f, 0.5f)"; + case BuiltInSubgroupLocalInvocationId: + return "WaveGetLaneIndex()"; + case BuiltInSubgroupSize: + return "WaveGetLaneCount()"; + case BuiltInHelperInvocation: + return "IsHelperLane()"; - default: - return CompilerGLSL::builtin_to_glsl(builtin, storage); - } + default: + return CompilerGLSL::builtin_to_glsl(builtin, storage); + } } void CompilerHLSL::emit_builtin_variables() { - Bitset builtins = active_input_builtins; - builtins.merge_or(active_output_builtins); + Bitset builtins = active_input_builtins; + builtins.merge_or(active_output_builtins); - std::unordered_map builtin_to_initializer; + std::unordered_map builtin_to_initializer; - // We need to declare sample mask with the same type that module declares it. - // Sample mask is somewhat special in that SPIR-V has an array, and we can copy that array, so we need to - // match sign. - SPIRType::BaseType sample_mask_in_basetype = SPIRType::Void; - SPIRType::BaseType sample_mask_out_basetype = SPIRType::Void; + // We need to declare sample mask with the same type that module declares it. + // Sample mask is somewhat special in that SPIR-V has an array, and we can copy that array, so we need to + // match sign. + SPIRType::BaseType sample_mask_in_basetype = SPIRType::Void; + SPIRType::BaseType sample_mask_out_basetype = SPIRType::Void; - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - if (!is_builtin_variable(var)) - return; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + if (!is_builtin_variable(var)) + return; - auto &type = this->get(var.basetype); - auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + auto &type = this->get(var.basetype); + auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); - if (var.storage == StorageClassInput && builtin == BuiltInSampleMask) - sample_mask_in_basetype = type.basetype; - else if (var.storage == StorageClassOutput && builtin == BuiltInSampleMask) - sample_mask_out_basetype = type.basetype; + if (var.storage == StorageClassInput && builtin == BuiltInSampleMask) + sample_mask_in_basetype = type.basetype; + else if (var.storage == StorageClassOutput && builtin == BuiltInSampleMask) + sample_mask_out_basetype = type.basetype; - if (var.initializer && var.storage == StorageClassOutput) - { - auto *c = this->maybe_get(var.initializer); - if (!c) - return; + if (var.initializer && var.storage == StorageClassOutput) + { + auto *c = this->maybe_get(var.initializer); + if (!c) + return; - if (type.basetype == SPIRType::Struct) - { - uint32_t member_count = uint32_t(type.member_types.size()); - for (uint32_t i = 0; i < member_count; i++) - { - if (has_member_decoration(type.self, i, DecorationBuiltIn)) - { - builtin_to_initializer[get_member_decoration(type.self, i, DecorationBuiltIn)] = - c->subconstants[i]; - } - } - } - else if (has_decoration(var.self, DecorationBuiltIn)) - { - builtin_to_initializer[builtin] = var.initializer; - } - } - }); + if (type.basetype == SPIRType::Struct) + { + uint32_t member_count = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + if (has_member_decoration(type.self, i, DecorationBuiltIn)) + { + builtin_to_initializer[get_member_decoration(type.self, i, DecorationBuiltIn)] = + c->subconstants[i]; + } + } + } + else if (has_decoration(var.self, DecorationBuiltIn)) + { + builtin_to_initializer[builtin] = var.initializer; + } + } + }); - // Emit global variables for the interface variables which are statically used by the shader. - builtins.for_each_bit([&](uint32_t i) { - const char *type = nullptr; - auto builtin = static_cast(i); - uint32_t array_size = 0; + // Emit global variables for the interface variables which are statically used by the shader. + builtins.for_each_bit([&](uint32_t i) { + const char *type = nullptr; + auto builtin = static_cast(i); + uint32_t array_size = 0; - string init_expr; - auto init_itr = builtin_to_initializer.find(builtin); - if (init_itr != builtin_to_initializer.end()) - init_expr = join(" = ", to_expression(init_itr->second)); + string init_expr; + auto init_itr = builtin_to_initializer.find(builtin); + if (init_itr != builtin_to_initializer.end()) + init_expr = join(" = ", to_expression(init_itr->second)); - if (get_execution_model() == ExecutionModelMeshEXT) - { - if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || - builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId || - builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT || - builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT || - builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT) - { - return; - } - } + if (get_execution_model() == ExecutionModelMeshEXT) + { + if (builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || + builtin == BuiltInCullDistance || builtin == BuiltInLayer || builtin == BuiltInPrimitiveId || + builtin == BuiltInViewportIndex || builtin == BuiltInCullPrimitiveEXT || + builtin == BuiltInPrimitiveShadingRateKHR || builtin == BuiltInPrimitivePointIndicesEXT || + builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT) + { + return; + } + } - switch (builtin) - { - case BuiltInFragCoord: - case BuiltInPosition: - type = "float4"; - break; + switch (builtin) + { + case BuiltInFragCoord: + case BuiltInPosition: + type = "float4"; + break; - case BuiltInFragDepth: - type = "float"; - break; + case BuiltInFragDepth: + type = "float"; + break; - case BuiltInVertexId: - case BuiltInVertexIndex: - case BuiltInInstanceIndex: - type = "int"; - if (hlsl_options.support_nonzero_base_vertex_base_instance || hlsl_options.shader_model >= 68) - base_vertex_info.used = true; - break; + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInInstanceIndex: + type = "int"; + if (hlsl_options.support_nonzero_base_vertex_base_instance || hlsl_options.shader_model >= 68) + base_vertex_info.used = true; + break; - case BuiltInBaseVertex: - case BuiltInBaseInstance: - type = "int"; - base_vertex_info.used = true; - break; + case BuiltInBaseVertex: + case BuiltInBaseInstance: + type = "int"; + base_vertex_info.used = true; + break; - case BuiltInInstanceId: - case BuiltInSampleId: - type = "int"; - break; + case BuiltInInstanceId: + case BuiltInSampleId: + type = "int"; + break; - case BuiltInPointSize: - if (hlsl_options.point_size_compat || hlsl_options.shader_model <= 30) - { - // Just emit the global variable, it will be ignored. - type = "float"; - break; - } - else - SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); + case BuiltInPointSize: + if (hlsl_options.point_size_compat || hlsl_options.shader_model <= 30) + { + // Just emit the global variable, it will be ignored. + type = "float"; + break; + } + else + SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); - case BuiltInGlobalInvocationId: - case BuiltInLocalInvocationId: - case BuiltInWorkgroupId: - type = "uint3"; - break; + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInWorkgroupId: + type = "uint3"; + break; - case BuiltInLocalInvocationIndex: - type = "uint"; - break; + case BuiltInLocalInvocationIndex: + type = "uint"; + break; - case BuiltInFrontFacing: - type = "bool"; - break; + case BuiltInFrontFacing: + type = "bool"; + break; - case BuiltInNumWorkgroups: - case BuiltInPointCoord: - // Handled specially. - break; + case BuiltInNumWorkgroups: + case BuiltInPointCoord: + // Handled specially. + break; - case BuiltInSubgroupLocalInvocationId: - case BuiltInSubgroupSize: - if (hlsl_options.shader_model < 60) - SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops."); - break; + case BuiltInSubgroupLocalInvocationId: + case BuiltInSubgroupSize: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops."); + break; - case BuiltInSubgroupEqMask: - case BuiltInSubgroupLtMask: - case BuiltInSubgroupLeMask: - case BuiltInSubgroupGtMask: - case BuiltInSubgroupGeMask: - if (hlsl_options.shader_model < 60) - SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops."); - type = "uint4"; - break; + case BuiltInSubgroupEqMask: + case BuiltInSubgroupLtMask: + case BuiltInSubgroupLeMask: + case BuiltInSubgroupGtMask: + case BuiltInSubgroupGeMask: + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("Need SM 6.0 for Wave ops."); + type = "uint4"; + break; - case BuiltInHelperInvocation: - if (hlsl_options.shader_model < 50) - SPIRV_CROSS_THROW("Need SM 5.0 for Helper Invocation."); - break; + case BuiltInHelperInvocation: + if (hlsl_options.shader_model < 50) + SPIRV_CROSS_THROW("Need SM 5.0 for Helper Invocation."); + break; - case BuiltInClipDistance: - array_size = clip_distance_count; - type = "float"; - break; + case BuiltInClipDistance: + array_size = clip_distance_count; + type = "float"; + break; - case BuiltInCullDistance: - array_size = cull_distance_count; - type = "float"; - break; + case BuiltInCullDistance: + array_size = cull_distance_count; + type = "float"; + break; - case BuiltInSampleMask: - if (active_input_builtins.get(BuiltInSampleMask)) - type = sample_mask_in_basetype == SPIRType::UInt ? "uint" : "int"; - else - type = sample_mask_out_basetype == SPIRType::UInt ? "uint" : "int"; - array_size = 1; - break; + case BuiltInSampleMask: + if (active_input_builtins.get(BuiltInSampleMask)) + type = sample_mask_in_basetype == SPIRType::UInt ? "uint" : "int"; + else + type = sample_mask_out_basetype == SPIRType::UInt ? "uint" : "int"; + array_size = 1; + break; - case BuiltInPrimitiveId: - case BuiltInViewIndex: - case BuiltInLayer: - type = "uint"; - break; + case BuiltInPrimitiveId: + case BuiltInViewIndex: + case BuiltInLayer: + type = "uint"; + break; - case BuiltInViewportIndex: - case BuiltInPrimitiveShadingRateKHR: - case BuiltInPrimitiveLineIndicesEXT: - case BuiltInCullPrimitiveEXT: - type = "uint"; - break; + case BuiltInViewportIndex: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInCullPrimitiveEXT: + type = "uint"; + break; - default: - SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); - } + default: + SPIRV_CROSS_THROW(join("Unsupported builtin in HLSL: ", unsigned(builtin))); + } - StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput; + StorageClass storage = active_input_builtins.get(i) ? StorageClassInput : StorageClassOutput; - if (type) - { - if (array_size) - statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "]", init_expr, ";"); - else - statement("static ", type, " ", builtin_to_glsl(builtin, storage), init_expr, ";"); - } + if (type) + { + if (array_size) + statement("static ", type, " ", builtin_to_glsl(builtin, storage), "[", array_size, "]", init_expr, ";"); + else + statement("static ", type, " ", builtin_to_glsl(builtin, storage), init_expr, ";"); + } - // SampleMask can be both in and out with sample builtin, in this case we have already - // declared the input variable and we need to add the output one now. - if (builtin == BuiltInSampleMask && storage == StorageClassInput && this->active_output_builtins.get(i)) - { - type = sample_mask_out_basetype == SPIRType::UInt ? "uint" : "int"; - if (array_size) - statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), "[", array_size, "]", init_expr, ";"); - else - statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), init_expr, ";"); - } - }); + // SampleMask can be both in and out with sample builtin, in this case we have already + // declared the input variable and we need to add the output one now. + if (builtin == BuiltInSampleMask && storage == StorageClassInput && this->active_output_builtins.get(i)) + { + type = sample_mask_out_basetype == SPIRType::UInt ? "uint" : "int"; + if (array_size) + statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), "[", array_size, "]", init_expr, ";"); + else + statement("static ", type, " ", this->builtin_to_glsl(builtin, StorageClassOutput), init_expr, ";"); + } + }); - if (base_vertex_info.used && hlsl_options.shader_model < 68) - { - string binding_info; - if (base_vertex_info.explicit_binding) - { - binding_info = join(" : register(b", base_vertex_info.register_index); - if (base_vertex_info.register_space) - binding_info += join(", space", base_vertex_info.register_space); - binding_info += ")"; - } - statement("cbuffer SPIRV_Cross_VertexInfo", binding_info); - begin_scope(); - statement("int SPIRV_Cross_BaseVertex;"); - statement("int SPIRV_Cross_BaseInstance;"); - end_scope_decl(); - statement(""); - } + if (base_vertex_info.used && hlsl_options.shader_model < 68) + { + string binding_info; + if (base_vertex_info.explicit_binding) + { + binding_info = join(" : register(b", base_vertex_info.register_index); + if (base_vertex_info.register_space) + binding_info += join(", space", base_vertex_info.register_space); + binding_info += ")"; + } + statement("cbuffer SPIRV_Cross_VertexInfo", binding_info); + begin_scope(); + statement("int SPIRV_Cross_BaseVertex;"); + statement("int SPIRV_Cross_BaseInstance;"); + end_scope_decl(); + statement(""); + } } void CompilerHLSL::set_hlsl_aux_buffer_binding(HLSLAuxBinding binding, uint32_t register_index, uint32_t register_space) { - if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE) - { - base_vertex_info.explicit_binding = true; - base_vertex_info.register_space = register_space; - base_vertex_info.register_index = register_index; - } + if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE) + { + base_vertex_info.explicit_binding = true; + base_vertex_info.register_space = register_space; + base_vertex_info.register_index = register_index; + } } void CompilerHLSL::unset_hlsl_aux_buffer_binding(HLSLAuxBinding binding) { - if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE) - base_vertex_info.explicit_binding = false; + if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE) + base_vertex_info.explicit_binding = false; } bool CompilerHLSL::is_hlsl_aux_buffer_binding_used(HLSLAuxBinding binding) const { - if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE) - return base_vertex_info.used; - else - return false; + if (binding == HLSL_AUX_BINDING_BASE_VERTEX_INSTANCE) + return base_vertex_info.used; + else + return false; } void CompilerHLSL::emit_composite_constants() { - // HLSL cannot declare structs or arrays inline, so we must move them out to - // global constants directly. - bool emitted = false; + // HLSL cannot declare structs or arrays inline, so we must move them out to + // global constants directly. + bool emitted = false; - ir.for_each_typed_id([&](uint32_t, SPIRConstant &c) { - if (c.specialization) - return; + ir.for_each_typed_id([&](uint32_t, SPIRConstant &c) { + if (c.specialization) + return; - auto &type = this->get(c.constant_type); + auto &type = this->get(c.constant_type); - if (type.basetype == SPIRType::Struct && is_builtin_type(type)) - return; + if (type.basetype == SPIRType::Struct && is_builtin_type(type)) + return; - if (type.basetype == SPIRType::Struct || !type.array.empty()) - { - add_resource_name(c.self); - auto name = to_name(c.self); - statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";"); - emitted = true; - } - }); + if (type.basetype == SPIRType::Struct || !type.array.empty()) + { + add_resource_name(c.self); + auto name = to_name(c.self); + statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";"); + emitted = true; + } + }); - if (emitted) - statement(""); + if (emitted) + statement(""); } void CompilerHLSL::emit_specialization_constants_and_structs() { - bool emitted = false; - SpecializationConstant wg_x, wg_y, wg_z; - ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + bool emitted = false; + SpecializationConstant wg_x, wg_y, wg_z; + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); - std::unordered_set io_block_types; - ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { - auto &type = this->get(var.basetype); - if ((var.storage == StorageClassInput || var.storage == StorageClassOutput) && - !var.remapped_variable && type.pointer && !is_builtin_variable(var) && - interface_variable_exists_in_entry_point(var.self) && - has_decoration(type.self, DecorationBlock)) - { - io_block_types.insert(type.self); - } - }); + std::unordered_set io_block_types; + ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { + auto &type = this->get(var.basetype); + if ((var.storage == StorageClassInput || var.storage == StorageClassOutput) && + !var.remapped_variable && type.pointer && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self) && + has_decoration(type.self, DecorationBlock)) + { + io_block_types.insert(type.self); + } + }); - auto loop_lock = ir.create_loop_hard_lock(); - for (auto &id_ : ir.ids_for_constant_undef_or_type) - { - auto &id = ir.ids[id_]; + auto loop_lock = ir.create_loop_hard_lock(); + for (auto &id_ : ir.ids_for_constant_undef_or_type) + { + auto &id = ir.ids[id_]; - if (id.get_type() == TypeConstant) - { - auto &c = id.get(); + if (id.get_type() == TypeConstant) + { + auto &c = id.get(); - if (c.self == workgroup_size_id) - { - statement("static const uint3 gl_WorkGroupSize = ", - constant_expression(get(workgroup_size_id)), ";"); - emitted = true; - } - else if (c.specialization) - { - auto &type = get(c.constant_type); - add_resource_name(c.self); - auto name = to_name(c.self); + if (c.self == workgroup_size_id) + { + statement("static const uint3 gl_WorkGroupSize = ", + constant_expression(get(workgroup_size_id)), ";"); + emitted = true; + } + else if (c.specialization) + { + auto &type = get(c.constant_type); + add_resource_name(c.self); + auto name = to_name(c.self); - if (has_decoration(c.self, DecorationSpecId)) - { - // HLSL does not support specialization constants, so fallback to macros. - c.specialization_constant_macro_name = - constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + if (has_decoration(c.self, DecorationSpecId)) + { + // HLSL does not support specialization constants, so fallback to macros. + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); - statement("#ifndef ", c.specialization_constant_macro_name); - statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); - statement("#endif"); - statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";"); - } - else - statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";"); + statement("#ifndef ", c.specialization_constant_macro_name); + statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); + statement("#endif"); + statement("static const ", variable_decl(type, name), " = ", c.specialization_constant_macro_name, ";"); + } + else + statement("static const ", variable_decl(type, name), " = ", constant_expression(c), ";"); - emitted = true; - } - } - else if (id.get_type() == TypeConstantOp) - { - auto &c = id.get(); - auto &type = get(c.basetype); - add_resource_name(c.self); - auto name = to_name(c.self); - statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); - emitted = true; - } - else if (id.get_type() == TypeType) - { - auto &type = id.get(); - bool is_non_io_block = has_decoration(type.self, DecorationBlock) && - io_block_types.count(type.self) == 0; - bool is_buffer_block = has_decoration(type.self, DecorationBufferBlock); - if (type.basetype == SPIRType::Struct && type.array.empty() && - !type.pointer && !is_non_io_block && !is_buffer_block) - { - if (emitted) - statement(""); - emitted = false; + emitted = true; + } + } + else if (id.get_type() == TypeConstantOp) + { + auto &c = id.get(); + auto &type = get(c.basetype); + add_resource_name(c.self); + auto name = to_name(c.self); + statement("static const ", variable_decl(type, name), " = ", constant_op_expression(c), ";"); + emitted = true; + } + else if (id.get_type() == TypeType) + { + auto &type = id.get(); + bool is_non_io_block = has_decoration(type.self, DecorationBlock) && + io_block_types.count(type.self) == 0; + bool is_buffer_block = has_decoration(type.self, DecorationBufferBlock); + if (type.basetype == SPIRType::Struct && type.array.empty() && + !type.pointer && !is_non_io_block && !is_buffer_block) + { + if (emitted) + statement(""); + emitted = false; - emit_struct(type); - } - } - else if (id.get_type() == TypeUndef) - { - auto &undef = id.get(); - auto &type = this->get(undef.basetype); - // OpUndef can be void for some reason ... - if (type.basetype == SPIRType::Void) - return; + emit_struct(type); + } + } + else if (id.get_type() == TypeUndef) + { + auto &undef = id.get(); + auto &type = this->get(undef.basetype); + // OpUndef can be void for some reason ... + if (type.basetype == SPIRType::Void) + return; - string initializer; - if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) - initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); - statement("static ", variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); - emitted = true; - } - } + statement("static ", variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); + emitted = true; + } + } - if (emitted) - statement(""); + if (emitted) + statement(""); } void CompilerHLSL::replace_illegal_names() { - static const unordered_set keywords = { - // Additional HLSL specific keywords. - // From https://docs.microsoft.com/en-US/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords - "AppendStructuredBuffer", "asm", "asm_fragment", - "BlendState", "bool", "break", "Buffer", "ByteAddressBuffer", - "case", "cbuffer", "centroid", "class", "column_major", "compile", - "compile_fragment", "CompileShader", "const", "continue", "ComputeShader", - "ConsumeStructuredBuffer", - "default", "DepthStencilState", "DepthStencilView", "discard", "do", - "double", "DomainShader", "dword", - "else", "export", "false", "float", "for", "fxgroup", - "GeometryShader", "groupshared", "half", "HullShader", - "indices", "if", "in", "inline", "inout", "InputPatch", "int", "interface", - "line", "lineadj", "linear", "LineStream", - "matrix", "min16float", "min10float", "min16int", "min16uint", - "namespace", "nointerpolation", "noperspective", "NULL", - "out", "OutputPatch", - "payload", "packoffset", "pass", "pixelfragment", "PixelShader", "point", - "PointStream", "precise", "RasterizerState", "RenderTargetView", - "return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer", - "RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D", - "RWTexture2DArray", "RWTexture3D", "sample", "sampler", "SamplerState", - "SamplerComparisonState", "shared", "snorm", "stateblock", "stateblock_state", - "static", "string", "struct", "switch", "StructuredBuffer", "tbuffer", - "technique", "technique10", "technique11", "texture", "Texture1D", - "Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray", - "Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle", - "triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned", - "vector", "vertexfragment", "VertexShader", "vertices", "void", "volatile", "while", - }; + static const unordered_set keywords = { + // Additional HLSL specific keywords. + // From https://docs.microsoft.com/en-US/windows/win32/direct3dhlsl/dx-graphics-hlsl-appendix-keywords + "AppendStructuredBuffer", "asm", "asm_fragment", + "BlendState", "bool", "break", "Buffer", "ByteAddressBuffer", + "case", "cbuffer", "centroid", "class", "column_major", "compile", + "compile_fragment", "CompileShader", "const", "continue", "ComputeShader", + "ConsumeStructuredBuffer", + "default", "DepthStencilState", "DepthStencilView", "discard", "do", + "double", "DomainShader", "dword", + "else", "export", "false", "float", "for", "fxgroup", + "GeometryShader", "groupshared", "half", "HullShader", + "indices", "if", "in", "inline", "inout", "InputPatch", "int", "interface", + "line", "lineadj", "linear", "LineStream", + "matrix", "min16float", "min10float", "min16int", "min16uint", + "namespace", "nointerpolation", "noperspective", "NULL", + "out", "OutputPatch", + "payload", "packoffset", "pass", "pixelfragment", "PixelShader", "point", + "PointStream", "precise", "RasterizerState", "RenderTargetView", + "return", "register", "row_major", "RWBuffer", "RWByteAddressBuffer", + "RWStructuredBuffer", "RWTexture1D", "RWTexture1DArray", "RWTexture2D", + "RWTexture2DArray", "RWTexture3D", "sample", "sampler", "SamplerState", + "SamplerComparisonState", "shared", "snorm", "stateblock", "stateblock_state", + "static", "string", "struct", "switch", "StructuredBuffer", "tbuffer", + "technique", "technique10", "technique11", "texture", "Texture1D", + "Texture1DArray", "Texture2D", "Texture2DArray", "Texture2DMS", "Texture2DMSArray", + "Texture3D", "TextureCube", "TextureCubeArray", "true", "typedef", "triangle", + "triangleadj", "TriangleStream", "uint", "uniform", "unorm", "unsigned", + "vector", "vertexfragment", "VertexShader", "vertices", "void", "volatile", "while", + }; - CompilerGLSL::replace_illegal_names(keywords); - CompilerGLSL::replace_illegal_names(); + CompilerGLSL::replace_illegal_names(keywords); + CompilerGLSL::replace_illegal_names(); } SPIRType::BaseType CompilerHLSL::get_builtin_basetype(BuiltIn builtin, SPIRType::BaseType default_type) { - switch (builtin) - { - case BuiltInSampleMask: - // We declare sample mask array with module type, so always use default_type here. - return default_type; - default: - return CompilerGLSL::get_builtin_basetype(builtin, default_type); - } + switch (builtin) + { + case BuiltInSampleMask: + // We declare sample mask array with module type, so always use default_type here. + return default_type; + default: + return CompilerGLSL::get_builtin_basetype(builtin, default_type); + } } void CompilerHLSL::emit_resources() { - auto &execution = get_entry_point(); - - replace_illegal_names(); - - switch (execution.model) - { - case ExecutionModelGeometry: - case ExecutionModelTessellationControl: - case ExecutionModelTessellationEvaluation: - case ExecutionModelMeshEXT: - fixup_implicit_builtin_block_names(execution.model); - break; - - default: - break; - } - - emit_specialization_constants_and_structs(); - emit_composite_constants(); - - bool emitted = false; - - // Output UBOs and SSBOs - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - - bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform; - bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || - ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); - - if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && - has_block_flags) - { - emit_buffer_block(var); - emitted = true; - } - }); - - // Output push constant blocks - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && - !is_hidden_variable(var)) - { - emit_push_constant_block(var); - emitted = true; - } - }); - - if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30 && - active_output_builtins.get(BuiltInPosition)) - { - statement("uniform float4 gl_HalfPixel;"); - emitted = true; - } - - bool skip_separate_image_sampler = !combined_image_samplers.empty() || hlsl_options.shader_model <= 30; - - // Output Uniform Constants (values, samplers, images, etc). - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - - // If we're remapping separate samplers and images, only emit the combined samplers. - if (skip_separate_image_sampler) - { - // Sampler buffers are always used without a sampler, and they will also work in regular D3D. - bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; - bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; - bool separate_sampler = type.basetype == SPIRType::Sampler; - if (!sampler_buffer && (separate_image || separate_sampler)) - return; - } - - if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable && - type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) && - !is_hidden_variable(var)) - { - emit_uniform(var); - emitted = true; - } - }); - - if (emitted) - statement(""); - emitted = false; - - // Emit builtin input and output variables here. - emit_builtin_variables(); - - if (execution.model != ExecutionModelMeshEXT) - { - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - - if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && - (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && - interface_variable_exists_in_entry_point(var.self)) - { - // Builtin variables are handled separately. - emit_interface_block_globally(var); - emitted = true; - } - }); - } - - if (emitted) - statement(""); - emitted = false; - - require_input = false; - require_output = false; - unordered_set active_inputs; - unordered_set active_outputs; - - struct IOVariable - { - const SPIRVariable *var; - uint32_t location; - uint32_t block_member_index; - bool block; - }; - - SmallVector input_variables; - SmallVector output_variables; - - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - bool block = has_decoration(type.self, DecorationBlock); - - if (var.storage != StorageClassInput && var.storage != StorageClassOutput) - return; - - if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) && - interface_variable_exists_in_entry_point(var.self)) - { - if (block) - { - for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) - { - uint32_t location = get_declared_member_location(var, i, false); - if (var.storage == StorageClassInput) - input_variables.push_back({ &var, location, i, true }); - else - output_variables.push_back({ &var, location, i, true }); - } - } - else - { - uint32_t location = get_decoration(var.self, DecorationLocation); - if (var.storage == StorageClassInput) - input_variables.push_back({ &var, location, 0, false }); - else - output_variables.push_back({ &var, location, 0, false }); - } - } - }); - - const auto variable_compare = [&](const IOVariable &a, const IOVariable &b) -> bool { - // Sort input and output variables based on, from more robust to less robust: - // - Location - // - Variable has a location - // - Name comparison - // - Variable has a name - // - Fallback: ID - bool has_location_a = a.block || has_decoration(a.var->self, DecorationLocation); - bool has_location_b = b.block || has_decoration(b.var->self, DecorationLocation); - - if (has_location_a && has_location_b) - return a.location < b.location; - else if (has_location_a && !has_location_b) - return true; - else if (!has_location_a && has_location_b) - return false; - - const auto &name1 = to_name(a.var->self); - const auto &name2 = to_name(b.var->self); - - if (name1.empty() && name2.empty()) - return a.var->self < b.var->self; - else if (name1.empty()) - return true; - else if (name2.empty()) - return false; - - return name1.compare(name2) < 0; - }; - - auto input_builtins = active_input_builtins; - input_builtins.clear(BuiltInNumWorkgroups); - input_builtins.clear(BuiltInPointCoord); - input_builtins.clear(BuiltInSubgroupSize); - input_builtins.clear(BuiltInSubgroupLocalInvocationId); - input_builtins.clear(BuiltInSubgroupEqMask); - input_builtins.clear(BuiltInSubgroupLtMask); - input_builtins.clear(BuiltInSubgroupLeMask); - input_builtins.clear(BuiltInSubgroupGtMask); - input_builtins.clear(BuiltInSubgroupGeMask); - - if (!input_variables.empty() || !input_builtins.empty()) - { - require_input = true; - statement("struct SPIRV_Cross_Input"); - - begin_scope(); - sort(input_variables.begin(), input_variables.end(), variable_compare); - for (auto &var : input_variables) - { - if (var.block) - emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_inputs); - else - emit_interface_block_in_struct(*var.var, active_inputs); - } - emit_builtin_inputs_in_struct(); - end_scope_decl(); - statement(""); - } - - const bool is_mesh_shader = execution.model == ExecutionModelMeshEXT; - if (!output_variables.empty() || !active_output_builtins.empty()) - { - sort(output_variables.begin(), output_variables.end(), variable_compare); - require_output = !is_mesh_shader; - - statement(is_mesh_shader ? "struct gl_MeshPerVertexEXT" : "struct SPIRV_Cross_Output"); - begin_scope(); - for (auto &var : output_variables) - { - if (is_per_primitive_variable(*var.var)) - continue; - if (var.block && is_mesh_shader && var.block_member_index != 0) - continue; - if (var.block && !is_mesh_shader) - emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs); - else - emit_interface_block_in_struct(*var.var, active_outputs); - } - emit_builtin_outputs_in_struct(); - if (!is_mesh_shader) - emit_builtin_primitive_outputs_in_struct(); - end_scope_decl(); - statement(""); - - if (is_mesh_shader) - { - statement("struct gl_MeshPerPrimitiveEXT"); - begin_scope(); - for (auto &var : output_variables) - { - if (!is_per_primitive_variable(*var.var)) - continue; - if (var.block && var.block_member_index != 0) - continue; - - emit_interface_block_in_struct(*var.var, active_outputs); - } - emit_builtin_primitive_outputs_in_struct(); - end_scope_decl(); - statement(""); - } - } - - // Global variables. - for (auto global : global_variables) - { - auto &var = get(global); - if (is_hidden_variable(var, true)) - continue; - - if (var.storage == StorageClassTaskPayloadWorkgroupEXT && is_mesh_shader) - continue; - - if (var.storage != StorageClassOutput) - { - if (!variable_is_lut(var)) - { - add_resource_name(var.self); - - const char *storage = nullptr; - switch (var.storage) - { - case StorageClassWorkgroup: - case StorageClassTaskPayloadWorkgroupEXT: - storage = "groupshared"; - break; - - default: - storage = "static"; - break; - } - - string initializer; - if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && - !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var))) - { - initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var))); - } - statement(storage, " ", variable_decl(var), initializer, ";"); - - emitted = true; - } - } - } - - if (emitted) - statement(""); - - if (requires_op_fmod) - { - static const char *types[] = { - "float", - "float2", - "float3", - "float4", - }; - - for (auto &type : types) - { - statement(type, " mod(", type, " x, ", type, " y)"); - begin_scope(); - statement("return x - y * floor(x / y);"); - end_scope(); - statement(""); - } - } - - emit_texture_size_variants(required_texture_size_variants.srv, "4", false, ""); - for (uint32_t norm = 0; norm < 3; norm++) - { - for (uint32_t comp = 0; comp < 4; comp++) - { - static const char *qualifiers[] = { "", "unorm ", "snorm " }; - static const char *vecsizes[] = { "", "2", "3", "4" }; - emit_texture_size_variants(required_texture_size_variants.uav[norm][comp], vecsizes[comp], true, - qualifiers[norm]); - } - } - - if (requires_fp16_packing) - { - // HLSL does not pack into a single word sadly :( - statement("uint spvPackHalf2x16(float2 value)"); - begin_scope(); - statement("uint2 Packed = f32tof16(value);"); - statement("return Packed.x | (Packed.y << 16);"); - end_scope(); - statement(""); - - statement("float2 spvUnpackHalf2x16(uint value)"); - begin_scope(); - statement("return f16tof32(uint2(value & 0xffff, value >> 16));"); - end_scope(); - statement(""); - } - - if (requires_uint2_packing) - { - statement("uint64_t spvPackUint2x32(uint2 value)"); - begin_scope(); - statement("return (uint64_t(value.y) << 32) | uint64_t(value.x);"); - end_scope(); - statement(""); - - statement("uint2 spvUnpackUint2x32(uint64_t value)"); - begin_scope(); - statement("uint2 Unpacked;"); - statement("Unpacked.x = uint(value & 0xffffffff);"); - statement("Unpacked.y = uint(value >> 32);"); - statement("return Unpacked;"); - end_scope(); - statement(""); - } - - if (requires_explicit_fp16_packing) - { - // HLSL does not pack into a single word sadly :( - statement("uint spvPackFloat2x16(min16float2 value)"); - begin_scope(); - statement("uint2 Packed = f32tof16(value);"); - statement("return Packed.x | (Packed.y << 16);"); - end_scope(); - statement(""); - - statement("min16float2 spvUnpackFloat2x16(uint value)"); - begin_scope(); - statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));"); - end_scope(); - statement(""); - } - - // HLSL does not seem to have builtins for these operation, so roll them by hand ... - if (requires_unorm8_packing) - { - statement("uint spvPackUnorm4x8(float4 value)"); - begin_scope(); - statement("uint4 Packed = uint4(round(saturate(value) * 255.0));"); - statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);"); - end_scope(); - statement(""); - - statement("float4 spvUnpackUnorm4x8(uint value)"); - begin_scope(); - statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);"); - statement("return float4(Packed) / 255.0;"); - end_scope(); - statement(""); - } - - if (requires_snorm8_packing) - { - statement("uint spvPackSnorm4x8(float4 value)"); - begin_scope(); - statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;"); - statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));"); - end_scope(); - statement(""); - - statement("float4 spvUnpackSnorm4x8(uint value)"); - begin_scope(); - statement("int SignedValue = int(value);"); - statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;"); - statement("return clamp(float4(Packed) / 127.0, -1.0, 1.0);"); - end_scope(); - statement(""); - } - - if (requires_unorm16_packing) - { - statement("uint spvPackUnorm2x16(float2 value)"); - begin_scope(); - statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));"); - statement("return Packed.x | (Packed.y << 16);"); - end_scope(); - statement(""); - - statement("float2 spvUnpackUnorm2x16(uint value)"); - begin_scope(); - statement("uint2 Packed = uint2(value & 0xffff, value >> 16);"); - statement("return float2(Packed) / 65535.0;"); - end_scope(); - statement(""); - } - - if (requires_snorm16_packing) - { - statement("uint spvPackSnorm2x16(float2 value)"); - begin_scope(); - statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;"); - statement("return uint(Packed.x | (Packed.y << 16));"); - end_scope(); - statement(""); - - statement("float2 spvUnpackSnorm2x16(uint value)"); - begin_scope(); - statement("int SignedValue = int(value);"); - statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;"); - statement("return clamp(float2(Packed) / 32767.0, -1.0, 1.0);"); - end_scope(); - statement(""); - } - - if (requires_bitfield_insert) - { - static const char *types[] = { "uint", "uint2", "uint3", "uint4" }; - for (auto &type : types) - { - statement(type, " spvBitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)"); - begin_scope(); - statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));"); - statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);"); - end_scope(); - statement(""); - } - } - - if (requires_bitfield_extract) - { - static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" }; - for (auto &type : unsigned_types) - { - statement(type, " spvBitfieldUExtract(", type, " Base, uint Offset, uint Count)"); - begin_scope(); - statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);"); - statement("return (Base >> Offset) & Mask;"); - end_scope(); - statement(""); - } - - // In this overload, we will have to do sign-extension, which we will emulate by shifting up and down. - static const char *signed_types[] = { "int", "int2", "int3", "int4" }; - for (auto &type : signed_types) - { - statement(type, " spvBitfieldSExtract(", type, " Base, int Offset, int Count)"); - begin_scope(); - statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);"); - statement(type, " Masked = (Base >> Offset) & Mask;"); - statement("int ExtendShift = (32 - Count) & 31;"); - statement("return (Masked << ExtendShift) >> ExtendShift;"); - end_scope(); - statement(""); - } - } - - if (requires_inverse_2x2) - { - statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); - statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float2x2 spvInverse(float2x2 m)"); - begin_scope(); - statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); - statement_no_indent(""); - statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); - statement("adj[0][0] = m[1][1];"); - statement("adj[0][1] = -m[0][1];"); - statement_no_indent(""); - statement("adj[1][0] = -m[1][0];"); - statement("adj[1][1] = m[0][0];"); - statement_no_indent(""); - statement("// Calculate the determinant as a combination of the cofactors of the first row."); - statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);"); - statement_no_indent(""); - statement("// Divide the classical adjoint matrix by the determinant."); - statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); - statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); - end_scope(); - statement(""); - } - - if (requires_inverse_3x3) - { - statement("// Returns the determinant of a 2x2 matrix."); - statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); - begin_scope(); - statement("return a1 * b2 - b1 * a2;"); - end_scope(); - statement_no_indent(""); - statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); - statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float3x3 spvInverse(float3x3 m)"); - begin_scope(); - statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); - statement_no_indent(""); - statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); - statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); - statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); - statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); - statement_no_indent(""); - statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); - statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); - statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); - statement_no_indent(""); - statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); - statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); - statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); - statement_no_indent(""); - statement("// Calculate the determinant as a combination of the cofactors of the first row."); - statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);"); - statement_no_indent(""); - statement("// Divide the classical adjoint matrix by the determinant."); - statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); - statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); - end_scope(); - statement(""); - } - - if (requires_inverse_4x4) - { - if (!requires_inverse_3x3) - { - statement("// Returns the determinant of a 2x2 matrix."); - statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); - begin_scope(); - statement("return a1 * b2 - b1 * a2;"); - end_scope(); - statement(""); - } - - statement("// Returns the determinant of a 3x3 matrix."); - statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " - "float c2, float c3)"); - begin_scope(); - statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * " - "spvDet2x2(a2, a3, " - "b2, b3);"); - end_scope(); - statement_no_indent(""); - statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); - statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); - statement("float4x4 spvInverse(float4x4 m)"); - begin_scope(); - statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); - statement_no_indent(""); - statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); - statement( - "adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " - "m[3][3]);"); - statement( - "adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " - "m[3][3]);"); - statement( - "adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " - "m[3][3]);"); - statement( - "adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " - "m[2][3]);"); - statement_no_indent(""); - statement( - "adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " - "m[3][3]);"); - statement( - "adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " - "m[3][3]);"); - statement( - "adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " - "m[3][3]);"); - statement( - "adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " - "m[2][3]);"); - statement_no_indent(""); - statement( - "adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " - "m[3][3]);"); - statement( - "adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " - "m[3][3]);"); - statement( - "adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " - "m[3][3]);"); - statement( - "adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " - "m[2][3]);"); - statement_no_indent(""); - statement( - "adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " - "m[3][2]);"); - statement( - "adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " - "m[3][2]);"); - statement( - "adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " - "m[3][2]);"); - statement( - "adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " - "m[2][2]);"); - statement_no_indent(""); - statement("// Calculate the determinant as a combination of the cofactors of the first row."); - statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] " - "* m[3][0]);"); - statement_no_indent(""); - statement("// Divide the classical adjoint matrix by the determinant."); - statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); - statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); - end_scope(); - statement(""); - } - - if (requires_scalar_reflect) - { - // FP16/FP64? No templates in HLSL. - statement("float spvReflect(float i, float n)"); - begin_scope(); - statement("return i - 2.0 * dot(n, i) * n;"); - end_scope(); - statement(""); - } - - if (requires_scalar_refract) - { - // FP16/FP64? No templates in HLSL. - statement("float spvRefract(float i, float n, float eta)"); - begin_scope(); - statement("float NoI = n * i;"); - statement("float NoI2 = NoI * NoI;"); - statement("float k = 1.0 - eta * eta * (1.0 - NoI2);"); - statement("if (k < 0.0)"); - begin_scope(); - statement("return 0.0;"); - end_scope(); - statement("else"); - begin_scope(); - statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); - end_scope(); - end_scope(); - statement(""); - } - - if (requires_scalar_faceforward) - { - // FP16/FP64? No templates in HLSL. - statement("float spvFaceForward(float n, float i, float nref)"); - begin_scope(); - statement("return i * nref < 0.0 ? n : -n;"); - end_scope(); - statement(""); - } - - for (TypeID type_id : composite_selection_workaround_types) - { - // Need out variable since HLSL does not support returning arrays. - auto &type = get(type_id); - auto type_str = type_to_glsl(type); - auto type_arr_str = type_to_array_glsl(type, 0); - statement("void spvSelectComposite(out ", type_str, " out_value", type_arr_str, ", bool cond, ", - type_str, " true_val", type_arr_str, ", ", - type_str, " false_val", type_arr_str, ")"); - begin_scope(); - statement("if (cond)"); - begin_scope(); - statement("out_value = true_val;"); - end_scope(); - statement("else"); - begin_scope(); - statement("out_value = false_val;"); - end_scope(); - end_scope(); - statement(""); - } - - if (is_mesh_shader && options.vertex.flip_vert_y) - { - statement("float4 spvFlipVertY(float4 v)"); - begin_scope(); - statement("return float4(v.x, -v.y, v.z, v.w);"); - end_scope(); - statement(""); - statement("float spvFlipVertY(float v)"); - begin_scope(); - statement("return -v;"); - end_scope(); - statement(""); - } + auto &execution = get_entry_point(); + + replace_illegal_names(); + + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + case ExecutionModelMeshEXT: + fixup_implicit_builtin_block_names(execution.model); + break; + + default: + break; + } + + emit_specialization_constants_and_structs(); + emit_composite_constants(); + + bool emitted = false; + + // Output UBOs and SSBOs + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + + bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform; + bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && + has_block_flags) + { + emit_buffer_block(var); + emitted = true; + } + }); + + // Output push constant blocks + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && + !is_hidden_variable(var)) + { + emit_push_constant_block(var); + emitted = true; + } + }); + + if (execution.model == ExecutionModelVertex && hlsl_options.shader_model <= 30 && + active_output_builtins.get(BuiltInPosition)) + { + statement("uniform float4 gl_HalfPixel;"); + emitted = true; + } + + bool skip_separate_image_sampler = !combined_image_samplers.empty() || hlsl_options.shader_model <= 30; + + // Output Uniform Constants (values, samplers, images, etc). + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + + // If we're remapping separate samplers and images, only emit the combined samplers. + if (skip_separate_image_sampler) + { + // Sampler buffers are always used without a sampler, and they will also work in regular D3D. + bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (!sampler_buffer && (separate_image || separate_sampler)) + return; + } + + if (var.storage != StorageClassFunction && !is_builtin_variable(var) && !var.remapped_variable && + type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) && + !is_hidden_variable(var)) + { + emit_uniform(var); + emitted = true; + } + }); + + if (emitted) + statement(""); + emitted = false; + + // Emit builtin input and output variables here. + emit_builtin_variables(); + + if (execution.model != ExecutionModelMeshEXT) + { + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + + if (var.storage != StorageClassFunction && !var.remapped_variable && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + // Builtin variables are handled separately. + emit_interface_block_globally(var); + emitted = true; + } + }); + } + + if (emitted) + statement(""); + emitted = false; + + require_input = false; + require_output = false; + unordered_set active_inputs; + unordered_set active_outputs; + + struct IOVariable + { + const SPIRVariable *var; + uint32_t location; + uint32_t block_member_index; + bool block; + }; + + SmallVector input_variables; + SmallVector output_variables; + + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + + if (var.storage != StorageClassInput && var.storage != StorageClassOutput) + return; + + if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + if (block) + { + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + uint32_t location = get_declared_member_location(var, i, false); + if (var.storage == StorageClassInput) + input_variables.push_back({ &var, location, i, true }); + else + output_variables.push_back({ &var, location, i, true }); + } + } + else + { + uint32_t location = get_decoration(var.self, DecorationLocation); + if (var.storage == StorageClassInput) + input_variables.push_back({ &var, location, 0, false }); + else + output_variables.push_back({ &var, location, 0, false }); + } + } + }); + + const auto variable_compare = [&](const IOVariable &a, const IOVariable &b) -> bool { + // Sort input and output variables based on, from more robust to less robust: + // - Location + // - Variable has a location + // - Name comparison + // - Variable has a name + // - Fallback: ID + bool has_location_a = a.block || has_decoration(a.var->self, DecorationLocation); + bool has_location_b = b.block || has_decoration(b.var->self, DecorationLocation); + + if (has_location_a && has_location_b) + return a.location < b.location; + else if (has_location_a && !has_location_b) + return true; + else if (!has_location_a && has_location_b) + return false; + + const auto &name1 = to_name(a.var->self); + const auto &name2 = to_name(b.var->self); + + if (name1.empty() && name2.empty()) + return a.var->self < b.var->self; + else if (name1.empty()) + return true; + else if (name2.empty()) + return false; + + return name1.compare(name2) < 0; + }; + + auto input_builtins = active_input_builtins; + input_builtins.clear(BuiltInNumWorkgroups); + input_builtins.clear(BuiltInPointCoord); + input_builtins.clear(BuiltInSubgroupSize); + input_builtins.clear(BuiltInSubgroupLocalInvocationId); + input_builtins.clear(BuiltInSubgroupEqMask); + input_builtins.clear(BuiltInSubgroupLtMask); + input_builtins.clear(BuiltInSubgroupLeMask); + input_builtins.clear(BuiltInSubgroupGtMask); + input_builtins.clear(BuiltInSubgroupGeMask); + + if (!input_variables.empty() || !input_builtins.empty()) + { + require_input = true; + statement("struct SPIRV_Cross_Input"); + + begin_scope(); + sort(input_variables.begin(), input_variables.end(), variable_compare); + for (auto &var : input_variables) + { + if (var.block) + emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_inputs); + else + emit_interface_block_in_struct(*var.var, active_inputs); + } + emit_builtin_inputs_in_struct(); + end_scope_decl(); + statement(""); + } + + const bool is_mesh_shader = execution.model == ExecutionModelMeshEXT; + if (!output_variables.empty() || !active_output_builtins.empty()) + { + sort(output_variables.begin(), output_variables.end(), variable_compare); + require_output = !is_mesh_shader; + + statement(is_mesh_shader ? "struct gl_MeshPerVertexEXT" : "struct SPIRV_Cross_Output"); + begin_scope(); + for (auto &var : output_variables) + { + if (is_per_primitive_variable(*var.var)) + continue; + if (var.block && is_mesh_shader && var.block_member_index != 0) + continue; + if (var.block && !is_mesh_shader) + emit_interface_block_member_in_struct(*var.var, var.block_member_index, var.location, active_outputs); + else + emit_interface_block_in_struct(*var.var, active_outputs); + } + emit_builtin_outputs_in_struct(); + if (!is_mesh_shader) + emit_builtin_primitive_outputs_in_struct(); + end_scope_decl(); + statement(""); + + if (is_mesh_shader) + { + statement("struct gl_MeshPerPrimitiveEXT"); + begin_scope(); + for (auto &var : output_variables) + { + if (!is_per_primitive_variable(*var.var)) + continue; + if (var.block && var.block_member_index != 0) + continue; + + emit_interface_block_in_struct(*var.var, active_outputs); + } + emit_builtin_primitive_outputs_in_struct(); + end_scope_decl(); + statement(""); + } + } + + // Global variables. + for (auto global : global_variables) + { + auto &var = get(global); + if (is_hidden_variable(var, true)) + continue; + + if (var.storage == StorageClassTaskPayloadWorkgroupEXT && is_mesh_shader) + continue; + + if (var.storage != StorageClassOutput) + { + if (!variable_is_lut(var)) + { + add_resource_name(var.self); + + const char *storage = nullptr; + switch (var.storage) + { + case StorageClassWorkgroup: + case StorageClassTaskPayloadWorkgroupEXT: + storage = "groupshared"; + break; + + default: + storage = "static"; + break; + } + + string initializer; + if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && + !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var))) + { + initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var))); + } + statement(storage, " ", variable_decl(var), initializer, ";"); + + emitted = true; + } + } + } + + if (emitted) + statement(""); + + if (requires_op_fmod) + { + static const char *types[] = { + "float", + "float2", + "float3", + "float4", + }; + + for (auto &type : types) + { + statement(type, " mod(", type, " x, ", type, " y)"); + begin_scope(); + statement("return x - y * floor(x / y);"); + end_scope(); + statement(""); + } + } + + emit_texture_size_variants(required_texture_size_variants.srv, "4", false, ""); + for (uint32_t norm = 0; norm < 3; norm++) + { + for (uint32_t comp = 0; comp < 4; comp++) + { + static const char *qualifiers[] = { "", "unorm ", "snorm " }; + static const char *vecsizes[] = { "", "2", "3", "4" }; + emit_texture_size_variants(required_texture_size_variants.uav[norm][comp], vecsizes[comp], true, + qualifiers[norm]); + } + } + + if (requires_fp16_packing) + { + // HLSL does not pack into a single word sadly :( + statement("uint spvPackHalf2x16(float2 value)"); + begin_scope(); + statement("uint2 Packed = f32tof16(value);"); + statement("return Packed.x | (Packed.y << 16);"); + end_scope(); + statement(""); + + statement("float2 spvUnpackHalf2x16(uint value)"); + begin_scope(); + statement("return f16tof32(uint2(value & 0xffff, value >> 16));"); + end_scope(); + statement(""); + } + + if (requires_uint2_packing) + { + statement("uint64_t spvPackUint2x32(uint2 value)"); + begin_scope(); + statement("return (uint64_t(value.y) << 32) | uint64_t(value.x);"); + end_scope(); + statement(""); + + statement("uint2 spvUnpackUint2x32(uint64_t value)"); + begin_scope(); + statement("uint2 Unpacked;"); + statement("Unpacked.x = uint(value & 0xffffffff);"); + statement("Unpacked.y = uint(value >> 32);"); + statement("return Unpacked;"); + end_scope(); + statement(""); + } + + if (requires_explicit_fp16_packing) + { + // HLSL does not pack into a single word sadly :( + statement("uint spvPackFloat2x16(min16float2 value)"); + begin_scope(); + statement("uint2 Packed = f32tof16(value);"); + statement("return Packed.x | (Packed.y << 16);"); + end_scope(); + statement(""); + + statement("min16float2 spvUnpackFloat2x16(uint value)"); + begin_scope(); + statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));"); + end_scope(); + statement(""); + } + + // HLSL does not seem to have builtins for these operation, so roll them by hand ... + if (requires_unorm8_packing) + { + statement("uint spvPackUnorm4x8(float4 value)"); + begin_scope(); + statement("uint4 Packed = uint4(round(saturate(value) * 255.0));"); + statement("return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24);"); + end_scope(); + statement(""); + + statement("float4 spvUnpackUnorm4x8(uint value)"); + begin_scope(); + statement("uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);"); + statement("return float4(Packed) / 255.0;"); + end_scope(); + statement(""); + } + + if (requires_snorm8_packing) + { + statement("uint spvPackSnorm4x8(float4 value)"); + begin_scope(); + statement("int4 Packed = int4(round(clamp(value, -1.0, 1.0) * 127.0)) & 0xff;"); + statement("return uint(Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24));"); + end_scope(); + statement(""); + + statement("float4 spvUnpackSnorm4x8(uint value)"); + begin_scope(); + statement("int SignedValue = int(value);"); + statement("int4 Packed = int4(SignedValue << 24, SignedValue << 16, SignedValue << 8, SignedValue) >> 24;"); + statement("return clamp(float4(Packed) / 127.0, -1.0, 1.0);"); + end_scope(); + statement(""); + } + + if (requires_unorm16_packing) + { + statement("uint spvPackUnorm2x16(float2 value)"); + begin_scope(); + statement("uint2 Packed = uint2(round(saturate(value) * 65535.0));"); + statement("return Packed.x | (Packed.y << 16);"); + end_scope(); + statement(""); + + statement("float2 spvUnpackUnorm2x16(uint value)"); + begin_scope(); + statement("uint2 Packed = uint2(value & 0xffff, value >> 16);"); + statement("return float2(Packed) / 65535.0;"); + end_scope(); + statement(""); + } + + if (requires_snorm16_packing) + { + statement("uint spvPackSnorm2x16(float2 value)"); + begin_scope(); + statement("int2 Packed = int2(round(clamp(value, -1.0, 1.0) * 32767.0)) & 0xffff;"); + statement("return uint(Packed.x | (Packed.y << 16));"); + end_scope(); + statement(""); + + statement("float2 spvUnpackSnorm2x16(uint value)"); + begin_scope(); + statement("int SignedValue = int(value);"); + statement("int2 Packed = int2(SignedValue << 16, SignedValue) >> 16;"); + statement("return clamp(float2(Packed) / 32767.0, -1.0, 1.0);"); + end_scope(); + statement(""); + } + + if (requires_bitfield_insert) + { + static const char *types[] = { "uint", "uint2", "uint3", "uint4" }; + for (auto &type : types) + { + statement(type, " spvBitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)"); + begin_scope(); + statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));"); + statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);"); + end_scope(); + statement(""); + } + } + + if (requires_bitfield_extract) + { + static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" }; + for (auto &type : unsigned_types) + { + statement(type, " spvBitfieldUExtract(", type, " Base, uint Offset, uint Count)"); + begin_scope(); + statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);"); + statement("return (Base >> Offset) & Mask;"); + end_scope(); + statement(""); + } + + // In this overload, we will have to do sign-extension, which we will emulate by shifting up and down. + static const char *signed_types[] = { "int", "int2", "int3", "int4" }; + for (auto &type : signed_types) + { + statement(type, " spvBitfieldSExtract(", type, " Base, int Offset, int Count)"); + begin_scope(); + statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);"); + statement(type, " Masked = (Base >> Offset) & Mask;"); + statement("int ExtendShift = (32 - Count) & 31;"); + statement("return (Masked << ExtendShift) >> ExtendShift;"); + end_scope(); + statement(""); + } + } + + if (requires_inverse_2x2) + { + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float2x2 spvInverse(float2x2 m)"); + begin_scope(); + statement("float2x2 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = m[1][1];"); + statement("adj[0][1] = -m[0][1];"); + statement_no_indent(""); + statement("adj[1][0] = -m[1][0];"); + statement("adj[1][1] = m[0][0];"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + } + + if (requires_inverse_3x3) + { + statement("// Returns the determinant of a 2x2 matrix."); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); + begin_scope(); + statement("return a1 * b2 - b1 * a2;"); + end_scope(); + statement_no_indent(""); + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float3x3 spvInverse(float3x3 m)"); + begin_scope(); + statement("float3x3 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement("adj[0][0] = spvDet2x2(m[1][1], m[1][2], m[2][1], m[2][2]);"); + statement("adj[0][1] = -spvDet2x2(m[0][1], m[0][2], m[2][1], m[2][2]);"); + statement("adj[0][2] = spvDet2x2(m[0][1], m[0][2], m[1][1], m[1][2]);"); + statement_no_indent(""); + statement("adj[1][0] = -spvDet2x2(m[1][0], m[1][2], m[2][0], m[2][2]);"); + statement("adj[1][1] = spvDet2x2(m[0][0], m[0][2], m[2][0], m[2][2]);"); + statement("adj[1][2] = -spvDet2x2(m[0][0], m[0][2], m[1][0], m[1][2]);"); + statement_no_indent(""); + statement("adj[2][0] = spvDet2x2(m[1][0], m[1][1], m[2][0], m[2][1]);"); + statement("adj[2][1] = -spvDet2x2(m[0][0], m[0][1], m[2][0], m[2][1]);"); + statement("adj[2][2] = spvDet2x2(m[0][0], m[0][1], m[1][0], m[1][1]);"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + } + + if (requires_inverse_4x4) + { + if (!requires_inverse_3x3) + { + statement("// Returns the determinant of a 2x2 matrix."); + statement("float spvDet2x2(float a1, float a2, float b1, float b2)"); + begin_scope(); + statement("return a1 * b2 - b1 * a2;"); + end_scope(); + statement(""); + } + + statement("// Returns the determinant of a 3x3 matrix."); + statement("float spvDet3x3(float a1, float a2, float a3, float b1, float b2, float b3, float c1, " + "float c2, float c3)"); + begin_scope(); + statement("return a1 * spvDet2x2(b2, b3, c2, c3) - b1 * spvDet2x2(a2, a3, c2, c3) + c1 * " + "spvDet2x2(a2, a3, " + "b2, b3);"); + end_scope(); + statement_no_indent(""); + statement("// Returns the inverse of a matrix, by using the algorithm of calculating the classical"); + statement("// adjoint and dividing by the determinant. The contents of the matrix are changed."); + statement("float4x4 spvInverse(float4x4 m)"); + begin_scope(); + statement("float4x4 adj; // The adjoint matrix (inverse after dividing by determinant)"); + statement_no_indent(""); + statement("// Create the transpose of the cofactors, as the classical adjoint of the matrix."); + statement( + "adj[0][0] = spvDet3x3(m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement( + "adj[0][1] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[2][1], m[2][2], m[2][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement( + "adj[0][2] = spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[3][1], m[3][2], " + "m[3][3]);"); + statement( + "adj[0][3] = -spvDet3x3(m[0][1], m[0][2], m[0][3], m[1][1], m[1][2], m[1][3], m[2][1], m[2][2], " + "m[2][3]);"); + statement_no_indent(""); + statement( + "adj[1][0] = -spvDet3x3(m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement( + "adj[1][1] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[2][0], m[2][2], m[2][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement( + "adj[1][2] = -spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[3][0], m[3][2], " + "m[3][3]);"); + statement( + "adj[1][3] = spvDet3x3(m[0][0], m[0][2], m[0][3], m[1][0], m[1][2], m[1][3], m[2][0], m[2][2], " + "m[2][3]);"); + statement_no_indent(""); + statement( + "adj[2][0] = spvDet3x3(m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement( + "adj[2][1] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[2][0], m[2][1], m[2][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement( + "adj[2][2] = spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[3][0], m[3][1], " + "m[3][3]);"); + statement( + "adj[2][3] = -spvDet3x3(m[0][0], m[0][1], m[0][3], m[1][0], m[1][1], m[1][3], m[2][0], m[2][1], " + "m[2][3]);"); + statement_no_indent(""); + statement( + "adj[3][0] = -spvDet3x3(m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement( + "adj[3][1] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[2][0], m[2][1], m[2][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement( + "adj[3][2] = -spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[3][0], m[3][1], " + "m[3][2]);"); + statement( + "adj[3][3] = spvDet3x3(m[0][0], m[0][1], m[0][2], m[1][0], m[1][1], m[1][2], m[2][0], m[2][1], " + "m[2][2]);"); + statement_no_indent(""); + statement("// Calculate the determinant as a combination of the cofactors of the first row."); + statement("float det = (adj[0][0] * m[0][0]) + (adj[0][1] * m[1][0]) + (adj[0][2] * m[2][0]) + (adj[0][3] " + "* m[3][0]);"); + statement_no_indent(""); + statement("// Divide the classical adjoint matrix by the determinant."); + statement("// If determinant is zero, matrix is not invertable, so leave it unchanged."); + statement("return (det != 0.0f) ? (adj * (1.0f / det)) : m;"); + end_scope(); + statement(""); + } + + if (requires_scalar_reflect) + { + // FP16/FP64? No templates in HLSL. + statement("float spvReflect(float i, float n)"); + begin_scope(); + statement("return i - 2.0 * dot(n, i) * n;"); + end_scope(); + statement(""); + } + + if (requires_scalar_refract) + { + // FP16/FP64? No templates in HLSL. + statement("float spvRefract(float i, float n, float eta)"); + begin_scope(); + statement("float NoI = n * i;"); + statement("float NoI2 = NoI * NoI;"); + statement("float k = 1.0 - eta * eta * (1.0 - NoI2);"); + statement("if (k < 0.0)"); + begin_scope(); + statement("return 0.0;"); + end_scope(); + statement("else"); + begin_scope(); + statement("return eta * i - (eta * NoI + sqrt(k)) * n;"); + end_scope(); + end_scope(); + statement(""); + } + + if (requires_scalar_faceforward) + { + // FP16/FP64? No templates in HLSL. + statement("float spvFaceForward(float n, float i, float nref)"); + begin_scope(); + statement("return i * nref < 0.0 ? n : -n;"); + end_scope(); + statement(""); + } + + for (TypeID type_id : composite_selection_workaround_types) + { + // Need out variable since HLSL does not support returning arrays. + auto &type = get(type_id); + auto type_str = type_to_glsl(type); + auto type_arr_str = type_to_array_glsl(type, 0); + statement("void spvSelectComposite(out ", type_str, " out_value", type_arr_str, ", bool cond, ", + type_str, " true_val", type_arr_str, ", ", + type_str, " false_val", type_arr_str, ")"); + begin_scope(); + statement("if (cond)"); + begin_scope(); + statement("out_value = true_val;"); + end_scope(); + statement("else"); + begin_scope(); + statement("out_value = false_val;"); + end_scope(); + end_scope(); + statement(""); + } + + if (is_mesh_shader && options.vertex.flip_vert_y) + { + statement("float4 spvFlipVertY(float4 v)"); + begin_scope(); + statement("return float4(v.x, -v.y, v.z, v.w);"); + end_scope(); + statement(""); + statement("float spvFlipVertY(float v)"); + begin_scope(); + statement("return -v;"); + end_scope(); + statement(""); + } } void CompilerHLSL::emit_texture_size_variants(uint64_t variant_mask, const char *vecsize_qualifier, bool uav, const char *type_qualifier) { - if (variant_mask == 0) - return; + if (variant_mask == 0) + return; - static const char *types[QueryTypeCount] = { "float", "int", "uint" }; - static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", - "Texture3D", "Buffer", "TextureCube", "TextureCubeArray", - "Texture2DMS", "Texture2DMSArray" }; + static const char *types[QueryTypeCount] = { "float", "int", "uint" }; + static const char *dims[QueryDimCount] = { "Texture1D", "Texture1DArray", "Texture2D", "Texture2DArray", + "Texture3D", "Buffer", "TextureCube", "TextureCubeArray", + "Texture2DMS", "Texture2DMSArray" }; - static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false }; + static const bool has_lod[QueryDimCount] = { true, true, true, true, true, false, true, true, false, false }; - static const char *ret_types[QueryDimCount] = { - "uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3", - }; + static const char *ret_types[QueryDimCount] = { + "uint", "uint2", "uint2", "uint3", "uint3", "uint", "uint2", "uint3", "uint2", "uint3", + }; - static const uint32_t return_arguments[QueryDimCount] = { - 1, 2, 2, 3, 3, 1, 2, 3, 2, 3, - }; + static const uint32_t return_arguments[QueryDimCount] = { + 1, 2, 2, 3, 3, 1, 2, 3, 2, 3, + }; - for (uint32_t index = 0; index < QueryDimCount; index++) - { - for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++) - { - uint32_t bit = 16 * type_index + index; - uint64_t mask = 1ull << bit; + for (uint32_t index = 0; index < QueryDimCount; index++) + { + for (uint32_t type_index = 0; type_index < QueryTypeCount; type_index++) + { + uint32_t bit = 16 * type_index + index; + uint64_t mask = 1ull << bit; - if ((variant_mask & mask) == 0) - continue; + if ((variant_mask & mask) == 0) + continue; - statement(ret_types[index], " spv", (uav ? "Image" : "Texture"), "Size(", (uav ? "RW" : ""), - dims[index], "<", type_qualifier, types[type_index], vecsize_qualifier, "> Tex, ", - (uav ? "" : "uint Level, "), "out uint Param)"); - begin_scope(); - statement(ret_types[index], " ret;"); - switch (return_arguments[index]) - { - case 1: - if (has_lod[index] && !uav) - statement("Tex.GetDimensions(Level, ret.x, Param);"); - else - { - statement("Tex.GetDimensions(ret.x);"); - statement("Param = 0u;"); - } - break; - case 2: - if (has_lod[index] && !uav) - statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);"); - else if (!uav) - statement("Tex.GetDimensions(ret.x, ret.y, Param);"); - else - { - statement("Tex.GetDimensions(ret.x, ret.y);"); - statement("Param = 0u;"); - } - break; - case 3: - if (has_lod[index] && !uav) - statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);"); - else if (!uav) - statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);"); - else - { - statement("Tex.GetDimensions(ret.x, ret.y, ret.z);"); - statement("Param = 0u;"); - } - break; - } + statement(ret_types[index], " spv", (uav ? "Image" : "Texture"), "Size(", (uav ? "RW" : ""), + dims[index], "<", type_qualifier, types[type_index], vecsize_qualifier, "> Tex, ", + (uav ? "" : "uint Level, "), "out uint Param)"); + begin_scope(); + statement(ret_types[index], " ret;"); + switch (return_arguments[index]) + { + case 1: + if (has_lod[index] && !uav) + statement("Tex.GetDimensions(Level, ret.x, Param);"); + else + { + statement("Tex.GetDimensions(ret.x);"); + statement("Param = 0u;"); + } + break; + case 2: + if (has_lod[index] && !uav) + statement("Tex.GetDimensions(Level, ret.x, ret.y, Param);"); + else if (!uav) + statement("Tex.GetDimensions(ret.x, ret.y, Param);"); + else + { + statement("Tex.GetDimensions(ret.x, ret.y);"); + statement("Param = 0u;"); + } + break; + case 3: + if (has_lod[index] && !uav) + statement("Tex.GetDimensions(Level, ret.x, ret.y, ret.z, Param);"); + else if (!uav) + statement("Tex.GetDimensions(ret.x, ret.y, ret.z, Param);"); + else + { + statement("Tex.GetDimensions(ret.x, ret.y, ret.z);"); + statement("Param = 0u;"); + } + break; + } - statement("return ret;"); - end_scope(); - statement(""); - } - } + statement("return ret;"); + end_scope(); + statement(""); + } + } } void CompilerHLSL::analyze_meshlet_writes() { - uint32_t id_per_vertex = 0; - uint32_t id_per_primitive = 0; - bool need_per_primitive = false; - bool need_per_vertex = false; + uint32_t id_per_vertex = 0; + uint32_t id_per_primitive = 0; + bool need_per_primitive = false; + bool need_per_vertex = false; - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - bool block = has_decoration(type.self, DecorationBlock); - if (var.storage == StorageClassOutput && block && is_builtin_variable(var)) - { - auto flags = get_buffer_block_flags(var.self); - if (flags.get(DecorationPerPrimitiveEXT)) - id_per_primitive = var.self; - else - id_per_vertex = var.self; - } - else if (var.storage == StorageClassOutput) - { - Bitset flags; - if (block) - flags = get_buffer_block_flags(var.self); - else - flags = get_decoration_bitset(var.self); + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if (var.storage == StorageClassOutput && block && is_builtin_variable(var)) + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + id_per_primitive = var.self; + else + id_per_vertex = var.self; + } + else if (var.storage == StorageClassOutput) + { + Bitset flags; + if (block) + flags = get_buffer_block_flags(var.self); + else + flags = get_decoration_bitset(var.self); - if (flags.get(DecorationPerPrimitiveEXT)) - need_per_primitive = true; - else - need_per_vertex = true; - } - }); + if (flags.get(DecorationPerPrimitiveEXT)) + need_per_primitive = true; + else + need_per_vertex = true; + } + }); - // If we have per-primitive outputs, and no per-primitive builtins, - // empty version of gl_MeshPerPrimitiveEXT will be emitted. - // If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block. + // If we have per-primitive outputs, and no per-primitive builtins, + // empty version of gl_MeshPerPrimitiveEXT will be emitted. + // If we don't use block IO for vertex output, we'll also need to synthesize the PerVertex block. - const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t { - auto &execution = get_entry_point(); + const auto generate_block = [&](const char *block_name, const char *instance_name, bool per_primitive) -> uint32_t { + auto &execution = get_entry_point(); - uint32_t op_type = ir.increase_bound_by(4); - uint32_t op_arr = op_type + 1; - uint32_t op_ptr = op_type + 2; - uint32_t op_var = op_type + 3; + uint32_t op_type = ir.increase_bound_by(4); + uint32_t op_arr = op_type + 1; + uint32_t op_ptr = op_type + 2; + uint32_t op_var = op_type + 3; - auto &type = set(op_type, OpTypeStruct); - type.basetype = SPIRType::Struct; - set_name(op_type, block_name); - set_decoration(op_type, DecorationBlock); - if (per_primitive) - set_decoration(op_type, DecorationPerPrimitiveEXT); + auto &type = set(op_type, OpTypeStruct); + type.basetype = SPIRType::Struct; + set_name(op_type, block_name); + set_decoration(op_type, DecorationBlock); + if (per_primitive) + set_decoration(op_type, DecorationPerPrimitiveEXT); - auto &arr = set(op_arr, type); - arr.parent_type = type.self; - arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices); - arr.array_size_literal.push_back(true); + auto &arr = set(op_arr, type); + arr.parent_type = type.self; + arr.array.push_back(per_primitive ? execution.output_primitives : execution.output_vertices); + arr.array_size_literal.push_back(true); - auto &ptr = set(op_ptr, arr); - ptr.parent_type = arr.self; - ptr.pointer = true; - ptr.pointer_depth++; - ptr.storage = StorageClassOutput; - set_decoration(op_ptr, DecorationBlock); - set_name(op_ptr, block_name); + auto &ptr = set(op_ptr, arr); + ptr.parent_type = arr.self; + ptr.pointer = true; + ptr.pointer_depth++; + ptr.storage = StorageClassOutput; + set_decoration(op_ptr, DecorationBlock); + set_name(op_ptr, block_name); - auto &var = set(op_var, op_ptr, StorageClassOutput); - if (per_primitive) - set_decoration(op_var, DecorationPerPrimitiveEXT); - set_name(op_var, instance_name); - execution.interface_variables.push_back(var.self); + auto &var = set(op_var, op_ptr, StorageClassOutput); + if (per_primitive) + set_decoration(op_var, DecorationPerPrimitiveEXT); + set_name(op_var, instance_name); + execution.interface_variables.push_back(var.self); - return op_var; - }; + return op_var; + }; - if (id_per_vertex == 0 && need_per_vertex) - id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false); - if (id_per_primitive == 0 && need_per_primitive) - id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true); + if (id_per_vertex == 0 && need_per_vertex) + id_per_vertex = generate_block("gl_MeshPerVertexEXT", "gl_MeshVerticesEXT", false); + if (id_per_primitive == 0 && need_per_primitive) + id_per_primitive = generate_block("gl_MeshPerPrimitiveEXT", "gl_MeshPrimitivesEXT", true); - unordered_set processed_func_ids; - analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); + unordered_set processed_func_ids; + analyze_meshlet_writes(ir.default_entry_point, id_per_vertex, id_per_primitive, processed_func_ids); } void CompilerHLSL::analyze_meshlet_writes(uint32_t func_id, uint32_t id_per_vertex, uint32_t id_per_primitive, std::unordered_set &processed_func_ids) { - // Avoid processing a function more than once - if (processed_func_ids.find(func_id) != processed_func_ids.end()) - return; - processed_func_ids.insert(func_id); + // Avoid processing a function more than once + if (processed_func_ids.find(func_id) != processed_func_ids.end()) + return; + processed_func_ids.insert(func_id); - auto &func = get(func_id); - // Recursively establish global args added to functions on which we depend. - for (auto& block : func.blocks) - { - auto &b = get(block); - for (auto &i : b.ops) - { - auto ops = stream(i); - auto op = static_cast(i.op); + auto &func = get(func_id); + // Recursively establish global args added to functions on which we depend. + for (auto& block : func.blocks) + { + auto &b = get(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast(i.op); - switch (op) - { - case OpFunctionCall: - { - // Then recurse into the function itself to extract globals used internally in the function - uint32_t inner_func_id = ops[2]; - analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids); - auto &inner_func = get(inner_func_id); - for (auto &iarg : inner_func.arguments) - { - if (!iarg.alias_global_variable) - continue; + switch (op) + { + case OpFunctionCall: + { + // Then recurse into the function itself to extract globals used internally in the function + uint32_t inner_func_id = ops[2]; + analyze_meshlet_writes(inner_func_id, id_per_vertex, id_per_primitive, processed_func_ids); + auto &inner_func = get(inner_func_id); + for (auto &iarg : inner_func.arguments) + { + if (!iarg.alias_global_variable) + continue; - bool already_declared = false; - for (auto &arg : func.arguments) - { - if (arg.id == iarg.id) - { - already_declared = true; - break; - } - } + bool already_declared = false; + for (auto &arg : func.arguments) + { + if (arg.id == iarg.id) + { + already_declared = true; + break; + } + } - if (!already_declared) - { - // basetype is effectively ignored here since we declare the argument - // with explicit types. Just pass down a valid type. - func.arguments.push_back({ expression_type_id(iarg.id), iarg.id, - iarg.read_count, iarg.write_count, true }); - } - } - break; - } + if (!already_declared) + { + // basetype is effectively ignored here since we declare the argument + // with explicit types. Just pass down a valid type. + func.arguments.push_back({ expression_type_id(iarg.id), iarg.id, + iarg.read_count, iarg.write_count, true }); + } + } + break; + } - case OpStore: - case OpLoad: - case OpInBoundsAccessChain: - case OpAccessChain: - case OpPtrAccessChain: - case OpInBoundsPtrAccessChain: - case OpArrayLength: - { - auto *var = maybe_get(ops[op == OpStore ? 0 : 2]); - if (var && (var->storage == StorageClassOutput || var->storage == StorageClassTaskPayloadWorkgroupEXT)) - { - bool already_declared = false; - auto builtin_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); + case OpStore: + case OpLoad: + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + case OpInBoundsPtrAccessChain: + case OpArrayLength: + { + auto *var = maybe_get(ops[op == OpStore ? 0 : 2]); + if (var && (var->storage == StorageClassOutput || var->storage == StorageClassTaskPayloadWorkgroupEXT)) + { + bool already_declared = false; + auto builtin_type = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); - uint32_t var_id = var->self; - if (var->storage != StorageClassTaskPayloadWorkgroupEXT && - builtin_type != BuiltInPrimitivePointIndicesEXT && - builtin_type != BuiltInPrimitiveLineIndicesEXT && - builtin_type != BuiltInPrimitiveTriangleIndicesEXT) - { - var_id = is_per_primitive_variable(*var) ? id_per_primitive : id_per_vertex; - } + uint32_t var_id = var->self; + if (var->storage != StorageClassTaskPayloadWorkgroupEXT && + builtin_type != BuiltInPrimitivePointIndicesEXT && + builtin_type != BuiltInPrimitiveLineIndicesEXT && + builtin_type != BuiltInPrimitiveTriangleIndicesEXT) + { + var_id = is_per_primitive_variable(*var) ? id_per_primitive : id_per_vertex; + } - for (auto &arg : func.arguments) - { - if (arg.id == var_id) - { - already_declared = true; - break; - } - } + for (auto &arg : func.arguments) + { + if (arg.id == var_id) + { + already_declared = true; + break; + } + } - if (!already_declared) - { - // basetype is effectively ignored here since we declare the argument - // with explicit types. Just pass down a valid type. - uint32_t type_id = expression_type_id(var_id); - if (var->storage == StorageClassTaskPayloadWorkgroupEXT) - func.arguments.push_back({ type_id, var_id, 1u, 0u, true }); - else - func.arguments.push_back({ type_id, var_id, 1u, 1u, true }); - } - } - break; - } + if (!already_declared) + { + // basetype is effectively ignored here since we declare the argument + // with explicit types. Just pass down a valid type. + uint32_t type_id = expression_type_id(var_id); + if (var->storage == StorageClassTaskPayloadWorkgroupEXT) + func.arguments.push_back({ type_id, var_id, 1u, 0u, true }); + else + func.arguments.push_back({ type_id, var_id, 1u, 1u, true }); + } + } + break; + } - default: - break; - } - } - } + default: + break; + } + } + } } string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) { - auto &flags = get_member_decoration_bitset(type.self, index); + auto &flags = get_member_decoration_bitset(type.self, index); - // HLSL can emit row_major or column_major decoration in any struct. - // Do not try to merge combined decorations for children like in GLSL. + // HLSL can emit row_major or column_major decoration in any struct. + // Do not try to merge combined decorations for children like in GLSL. - // Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major". - // The way to deal with this is to multiply everything in inverse order, and reverse the memory layout. - if (flags.get(DecorationColMajor)) - return "row_major "; - else if (flags.get(DecorationRowMajor)) - return "column_major "; + // Flip the convention. HLSL is a bit odd in that the memory layout is column major ... but the language API is "row-major". + // The way to deal with this is to multiply everything in inverse order, and reverse the memory layout. + if (flags.get(DecorationColMajor)) + return "row_major "; + else if (flags.get(DecorationRowMajor)) + return "column_major "; - return ""; + return ""; } void CompilerHLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const string &qualifier, uint32_t base_offset) { - auto &membertype = get(member_type_id); + auto &membertype = get(member_type_id); - Bitset memberflags; - auto &memb = ir.meta[type.self].members; - if (index < memb.size()) - memberflags = memb[index].decoration_flags; + Bitset memberflags; + auto &memb = ir.meta[type.self].members; + if (index < memb.size()) + memberflags = memb[index].decoration_flags; - string packing_offset; - bool is_push_constant = type.storage == StorageClassPushConstant; + string packing_offset; + bool is_push_constant = type.storage == StorageClassPushConstant; - if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) && - has_member_decoration(type.self, index, DecorationOffset)) - { - uint32_t offset = memb[index].offset - base_offset; - if (offset & 3) - SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL."); + if ((has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) || is_push_constant) && + has_member_decoration(type.self, index, DecorationOffset)) + { + uint32_t offset = memb[index].offset - base_offset; + if (offset & 3) + SPIRV_CROSS_THROW("Cannot pack on tighter bounds than 4 bytes in HLSL."); - static const char *packing_swizzle[] = { "", ".y", ".z", ".w" }; - packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")"); - } + static const char *packing_swizzle[] = { "", ".y", ".z", ".w" }; + packing_offset = join(" : packoffset(c", offset / 16, packing_swizzle[(offset & 15) >> 2], ")"); + } - statement(layout_for_member(type, index), qualifier, - variable_decl(membertype, to_member_name(type, index)), packing_offset, ";"); + statement(layout_for_member(type, index), qualifier, + variable_decl(membertype, to_member_name(type, index)), packing_offset, ";"); } void CompilerHLSL::emit_rayquery_function(const char *commited, const char *candidate, const uint32_t *ops) { - flush_variable_declaration(ops[0]); - uint32_t is_commited = evaluate_constant_u32(ops[3]); - emit_op(ops[0], ops[1], join(to_expression(ops[2]), is_commited ? commited : candidate), false); + flush_variable_declaration(ops[0]); + uint32_t is_commited = evaluate_constant_u32(ops[3]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), is_commited ? commited : candidate), false); } void CompilerHLSL::emit_mesh_tasks(SPIRBlock &block) { - if (block.mesh.payload != 0) - { - statement("DispatchMesh(", to_unpacked_expression(block.mesh.groups[0]), ", ", to_unpacked_expression(block.mesh.groups[1]), ", ", - to_unpacked_expression(block.mesh.groups[2]), ", ", to_unpacked_expression(block.mesh.payload), ");"); - } - else - { - SPIRV_CROSS_THROW("Amplification shader in HLSL must have payload"); - } + if (block.mesh.payload != 0) + { + statement("DispatchMesh(", to_unpacked_expression(block.mesh.groups[0]), ", ", to_unpacked_expression(block.mesh.groups[1]), ", ", + to_unpacked_expression(block.mesh.groups[2]), ", ", to_unpacked_expression(block.mesh.payload), ");"); + } + else + { + SPIRV_CROSS_THROW("Amplification shader in HLSL must have payload"); + } } void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) { - auto &type = get(var.basetype); + auto &type = get(var.basetype); - bool is_uav = var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock); + bool is_uav = var.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock); - if (flattened_buffer_blocks.count(var.self)) - { - emit_buffer_block_flattened(var); - } - else if (is_uav) - { - Bitset flags = ir.get_buffer_block_flags(var); - bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); - bool is_coherent = flags.get(DecorationCoherent) && !is_readonly; - bool is_interlocked = interlocked_resources.count(var.self) > 0; + if (flattened_buffer_blocks.count(var.self)) + { + emit_buffer_block_flattened(var); + } + else if (is_uav) + { + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); + bool is_coherent = flags.get(DecorationCoherent) && !is_readonly; + bool is_interlocked = interlocked_resources.count(var.self) > 0; - auto to_structuredbuffer_subtype_name = [this](const SPIRType &parent_type) -> std::string - { - if (parent_type.basetype == SPIRType::Struct && parent_type.member_types.size() == 1) - { - // Use type of first struct member as a StructuredBuffer will have only one '._m0' field in SPIR-V - const auto &member0_type = this->get(parent_type.member_types.front()); - return this->type_to_glsl(member0_type); - } - else - { - // Otherwise, this StructuredBuffer only has a basic subtype, e.g. StructuredBuffer - return this->type_to_glsl(parent_type); - } - }; + auto to_structuredbuffer_subtype_name = [this](const SPIRType &parent_type) -> std::string + { + if (parent_type.basetype == SPIRType::Struct && parent_type.member_types.size() == 1) + { + // Use type of first struct member as a StructuredBuffer will have only one '._m0' field in SPIR-V + const auto &member0_type = this->get(parent_type.member_types.front()); + return this->type_to_glsl(member0_type); + } + else + { + // Otherwise, this StructuredBuffer only has a basic subtype, e.g. StructuredBuffer + return this->type_to_glsl(parent_type); + } + }; - std::string type_name; - if (is_user_type_structured(var.self)) - type_name = join(is_readonly ? "" : is_interlocked ? "RasterizerOrdered" : "RW", "StructuredBuffer<", to_structuredbuffer_subtype_name(type), ">"); - else - type_name = is_readonly ? "ByteAddressBuffer" : is_interlocked ? "RasterizerOrderedByteAddressBuffer" : "RWByteAddressBuffer"; + std::string type_name; + if (is_user_type_structured(var.self)) + type_name = join(is_readonly ? "" : is_interlocked ? "RasterizerOrdered" : "RW", "StructuredBuffer<", to_structuredbuffer_subtype_name(type), ">"); + else + type_name = is_readonly ? "ByteAddressBuffer" : is_interlocked ? "RasterizerOrderedByteAddressBuffer" : "RWByteAddressBuffer"; - add_resource_name(var.self); - statement(is_coherent ? "globallycoherent " : "", type_name, " ", to_name(var.self), type_to_array_glsl(type, var.self), - to_resource_binding(var), ";"); - } - else - { - if (type.array.empty()) - { - // Flatten the top-level struct so we can use packoffset, - // this restriction is similar to GLSL where layout(offset) is not possible on sub-structs. - flattened_structs[var.self] = false; + add_resource_name(var.self); + statement(is_coherent ? "globallycoherent " : "", type_name, " ", to_name(var.self), type_to_array_glsl(type, var.self), + to_resource_binding(var), ";"); + } + else + { + if (type.array.empty()) + { + // Flatten the top-level struct so we can use packoffset, + // this restriction is similar to GLSL where layout(offset) is not possible on sub-structs. + flattened_structs[var.self] = false; - // Prefer the block name if possible. - auto buffer_name = to_name(type.self, false); - if (ir.meta[type.self].decoration.alias.empty() || - resource_names.find(buffer_name) != end(resource_names) || - block_names.find(buffer_name) != end(block_names)) - { - buffer_name = get_block_fallback_name(var.self); - } + // Prefer the block name if possible. + auto buffer_name = to_name(type.self, false); + if (ir.meta[type.self].decoration.alias.empty() || + resource_names.find(buffer_name) != end(resource_names) || + block_names.find(buffer_name) != end(block_names)) + { + buffer_name = get_block_fallback_name(var.self); + } - add_variable(block_names, resource_names, buffer_name); + add_variable(block_names, resource_names, buffer_name); - // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. - // This cannot conflict with anything else, so we're safe now. - if (buffer_name.empty()) - buffer_name = join("_", get(var.basetype).self, "_", var.self); + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + if (buffer_name.empty()) + buffer_name = join("_", get(var.basetype).self, "_", var.self); - uint32_t failed_index = 0; - if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index)) - set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); - else - { - SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ", - failed_index, " (name: ", to_member_name(type, failed_index), - ") cannot be expressed with either HLSL packing layout or packoffset.")); - } + uint32_t failed_index = 0; + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index)) + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + else + { + SPIRV_CROSS_THROW(join("cbuffer ID ", var.self, " (name: ", buffer_name, "), member index ", + failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with either HLSL packing layout or packoffset.")); + } - block_names.insert(buffer_name); + block_names.insert(buffer_name); - // Save for post-reflection later. - declared_block_names[var.self] = buffer_name; + // Save for post-reflection later. + declared_block_names[var.self] = buffer_name; - type.member_name_cache.clear(); - // var.self can be used as a backup name for the block name, - // so we need to make sure we don't disturb the name here on a recompile. - // It will need to be reset if we have to recompile. - preserve_alias_on_reset(var.self); - add_resource_name(var.self); - statement("cbuffer ", buffer_name, to_resource_binding(var)); - begin_scope(); + type.member_name_cache.clear(); + // var.self can be used as a backup name for the block name, + // so we need to make sure we don't disturb the name here on a recompile. + // It will need to be reset if we have to recompile. + preserve_alias_on_reset(var.self); + add_resource_name(var.self); + statement("cbuffer ", buffer_name, to_resource_binding(var)); + begin_scope(); - uint32_t i = 0; - for (auto &member : type.member_types) - { - add_member_name(type, i); - auto backup_name = get_member_name(type.self, i); - auto member_name = to_member_name(type, i); - member_name = join(to_name(var.self), "_", member_name); - ParsedIR::sanitize_underscores(member_name); - set_member_name(type.self, i, member_name); - emit_struct_member(type, member, i, ""); - set_member_name(type.self, i, backup_name); - i++; - } + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + auto backup_name = get_member_name(type.self, i); + auto member_name = to_member_name(type, i); + member_name = join(to_name(var.self), "_", member_name); + ParsedIR::sanitize_underscores(member_name); + set_member_name(type.self, i, member_name); + emit_struct_member(type, member, i, ""); + set_member_name(type.self, i, backup_name); + i++; + } - end_scope_decl(); - statement(""); - } - else - { - if (hlsl_options.shader_model < 51) - SPIRV_CROSS_THROW( - "Need ConstantBuffer to use arrays of UBOs, but this is only supported in SM 5.1."); + end_scope_decl(); + statement(""); + } + else + { + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW( + "Need ConstantBuffer to use arrays of UBOs, but this is only supported in SM 5.1."); - add_resource_name(type.self); - add_resource_name(var.self); + add_resource_name(type.self); + add_resource_name(var.self); - // ConstantBuffer does not support packoffset, so it is unuseable unless everything aligns as we expect. - uint32_t failed_index = 0; - if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer, &failed_index)) - { - SPIRV_CROSS_THROW(join("HLSL ConstantBuffer ID ", var.self, " (name: ", to_name(type.self), - "), member index ", failed_index, " (name: ", to_member_name(type, failed_index), - ") cannot be expressed with normal HLSL packing rules.")); - } + // ConstantBuffer does not support packoffset, so it is unuseable unless everything aligns as we expect. + uint32_t failed_index = 0; + if (!buffer_is_packing_standard(type, BufferPackingHLSLCbuffer, &failed_index)) + { + SPIRV_CROSS_THROW(join("HLSL ConstantBuffer ID ", var.self, " (name: ", to_name(type.self), + "), member index ", failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with normal HLSL packing rules.")); + } - emit_struct(get(type.self)); - statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type, var.self), - to_resource_binding(var), ";"); - } - } + emit_struct(get(type.self)); + statement("ConstantBuffer<", to_name(type.self), "> ", to_name(var.self), type_to_array_glsl(type, var.self), + to_resource_binding(var), ";"); + } + } } void CompilerHLSL::emit_push_constant_block(const SPIRVariable &var) { - if (flattened_buffer_blocks.count(var.self)) - { - emit_buffer_block_flattened(var); - } - else if (root_constants_layout.empty()) - { - emit_buffer_block(var); - } - else - { - for (const auto &layout : root_constants_layout) - { - auto &type = get(var.basetype); + if (flattened_buffer_blocks.count(var.self)) + { + emit_buffer_block_flattened(var); + } + else if (root_constants_layout.empty()) + { + emit_buffer_block(var); + } + else + { + for (const auto &layout : root_constants_layout) + { + auto &type = get(var.basetype); - uint32_t failed_index = 0; - if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index, layout.start, - layout.end)) - set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); - else - { - SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")", - ", member index ", failed_index, " (name: ", to_member_name(type, failed_index), - ") cannot be expressed with either HLSL packing layout or packoffset.")); - } + uint32_t failed_index = 0; + if (buffer_is_packing_standard(type, BufferPackingHLSLCbufferPackOffset, &failed_index, layout.start, + layout.end)) + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + else + { + SPIRV_CROSS_THROW(join("Root constant cbuffer ID ", var.self, " (name: ", to_name(type.self), ")", + ", member index ", failed_index, " (name: ", to_member_name(type, failed_index), + ") cannot be expressed with either HLSL packing layout or packoffset.")); + } - flattened_structs[var.self] = false; - type.member_name_cache.clear(); - add_resource_name(var.self); - auto &memb = ir.meta[type.self].members; + flattened_structs[var.self] = false; + type.member_name_cache.clear(); + add_resource_name(var.self); + auto &memb = ir.meta[type.self].members; - statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self), - to_resource_register(HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, 'b', layout.binding, layout.space)); - begin_scope(); + statement("cbuffer SPIRV_CROSS_RootConstant_", to_name(var.self), + to_resource_register(HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT, 'b', layout.binding, layout.space)); + begin_scope(); - // Index of the next field in the generated root constant constant buffer - auto constant_index = 0u; + // Index of the next field in the generated root constant constant buffer + auto constant_index = 0u; - // Iterate over all member of the push constant and check which of the fields - // fit into the given root constant layout. - for (auto i = 0u; i < memb.size(); i++) - { - const auto offset = memb[i].offset; - if (layout.start <= offset && offset < layout.end) - { - const auto &member = type.member_types[i]; + // Iterate over all member of the push constant and check which of the fields + // fit into the given root constant layout. + for (auto i = 0u; i < memb.size(); i++) + { + const auto offset = memb[i].offset; + if (layout.start <= offset && offset < layout.end) + { + const auto &member = type.member_types[i]; - add_member_name(type, constant_index); - auto backup_name = get_member_name(type.self, i); - auto member_name = to_member_name(type, i); - member_name = join(to_name(var.self), "_", member_name); - ParsedIR::sanitize_underscores(member_name); - set_member_name(type.self, constant_index, member_name); - emit_struct_member(type, member, i, "", layout.start); - set_member_name(type.self, constant_index, backup_name); + add_member_name(type, constant_index); + auto backup_name = get_member_name(type.self, i); + auto member_name = to_member_name(type, i); + member_name = join(to_name(var.self), "_", member_name); + ParsedIR::sanitize_underscores(member_name); + set_member_name(type.self, constant_index, member_name); + emit_struct_member(type, member, i, "", layout.start); + set_member_name(type.self, constant_index, backup_name); - constant_index++; - } - } + constant_index++; + } + } - end_scope_decl(); - } - } + end_scope_decl(); + } + } } string CompilerHLSL::to_sampler_expression(uint32_t id) { - auto expr = join("_", to_non_uniform_aware_expression(id)); - auto index = expr.find_first_of('['); - if (index == string::npos) - { - return expr + "_sampler"; - } - else - { - // We have an expression like _ident[array], so we cannot tack on _sampler, insert it inside the string instead. - return expr.insert(index, "_sampler"); - } + auto expr = join("_", to_non_uniform_aware_expression(id)); + auto index = expr.find_first_of('['); + if (index == string::npos) + { + return expr + "_sampler"; + } + else + { + // We have an expression like _ident[array], so we cannot tack on _sampler, insert it inside the string instead. + return expr.insert(index, "_sampler"); + } } void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) { - if (hlsl_options.shader_model >= 40 && combined_image_samplers.empty()) - { - set(result_id, result_type, image_id, samp_id); - } - else - { - // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. - emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); - } + if (hlsl_options.shader_model >= 40 && combined_image_samplers.empty()) + { + set(result_id, result_type, image_id, samp_id); + } + else + { + // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. + emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); + } } string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) { - string arg_str = CompilerGLSL::to_func_call_arg(arg, id); + string arg_str = CompilerGLSL::to_func_call_arg(arg, id); - if (hlsl_options.shader_model <= 30) - return arg_str; + if (hlsl_options.shader_model <= 30) + return arg_str; - // Manufacture automatic sampler arg if the arg is a SampledImage texture and we're in modern HLSL. - auto &type = expression_type(id); + // Manufacture automatic sampler arg if the arg is a SampledImage texture and we're in modern HLSL. + auto &type = expression_type(id); - // We don't have to consider combined image samplers here via OpSampledImage because - // those variables cannot be passed as arguments to functions. - // Only global SampledImage variables may be used as arguments. - if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) - arg_str += ", " + to_sampler_expression(id); + // We don't have to consider combined image samplers here via OpSampledImage because + // those variables cannot be passed as arguments to functions. + // Only global SampledImage variables may be used as arguments. + if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) + arg_str += ", " + to_sampler_expression(id); - return arg_str; + return arg_str; } string CompilerHLSL::get_inner_entry_point_name() const { - auto &execution = get_entry_point(); + auto &execution = get_entry_point(); - if (hlsl_options.use_entry_point_name) - { - auto name = join(execution.name, "_inner"); - ParsedIR::sanitize_underscores(name); - return name; - } + if (hlsl_options.use_entry_point_name) + { + auto name = join(execution.name, "_inner"); + ParsedIR::sanitize_underscores(name); + return name; + } - if (execution.model == ExecutionModelVertex) - return "vert_main"; - else if (execution.model == ExecutionModelFragment) - return "frag_main"; - else if (execution.model == ExecutionModelGLCompute) - return "comp_main"; - else if (execution.model == ExecutionModelMeshEXT) - return "mesh_main"; - else if (execution.model == ExecutionModelTaskEXT) - return "task_main"; - else - SPIRV_CROSS_THROW("Unsupported execution model."); + if (execution.model == ExecutionModelVertex) + return "vert_main"; + else if (execution.model == ExecutionModelFragment) + return "frag_main"; + else if (execution.model == ExecutionModelGLCompute) + return "comp_main"; + else if (execution.model == ExecutionModelMeshEXT) + return "mesh_main"; + else if (execution.model == ExecutionModelTaskEXT) + return "task_main"; + else + SPIRV_CROSS_THROW("Unsupported execution model."); } void CompilerHLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) { - if (func.self != ir.default_entry_point) - add_function_overload(func); + if (func.self != ir.default_entry_point) + add_function_overload(func); - // Avoid shadow declarations. - local_variable_names = resource_names; + // Avoid shadow declarations. + local_variable_names = resource_names; - string decl; + string decl; - auto &type = get(func.return_type); - if (type.array.empty()) - { - decl += flags_to_qualifiers_glsl(type, return_flags); - decl += type_to_glsl(type); - decl += " "; - } - else - { - // We cannot return arrays in HLSL, so "return" through an out variable. - decl = "void "; - } + auto &type = get(func.return_type); + if (type.array.empty()) + { + decl += flags_to_qualifiers_glsl(type, return_flags); + decl += type_to_glsl(type); + decl += " "; + } + else + { + // We cannot return arrays in HLSL, so "return" through an out variable. + decl = "void "; + } - if (func.self == ir.default_entry_point) - { - decl += get_inner_entry_point_name(); - processing_entry_point = true; - } - else - decl += to_name(func.self); + if (func.self == ir.default_entry_point) + { + decl += get_inner_entry_point_name(); + processing_entry_point = true; + } + else + decl += to_name(func.self); - decl += "("; - SmallVector arglist; + decl += "("; + SmallVector arglist; - if (!type.array.empty()) - { - // Fake array returns by writing to an out array instead. - string out_argument; - out_argument += "out "; - out_argument += type_to_glsl(type); - out_argument += " "; - out_argument += "spvReturnValue"; - out_argument += type_to_array_glsl(type, 0); - arglist.push_back(std::move(out_argument)); - } + if (!type.array.empty()) + { + // Fake array returns by writing to an out array instead. + string out_argument; + out_argument += "out "; + out_argument += type_to_glsl(type); + out_argument += " "; + out_argument += "spvReturnValue"; + out_argument += type_to_array_glsl(type, 0); + arglist.push_back(std::move(out_argument)); + } - for (auto &arg : func.arguments) - { - // Do not pass in separate images or samplers if we're remapping - // to combined image samplers. - if (skip_argument(arg.id)) - continue; + for (auto &arg : func.arguments) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg.id)) + continue; - // Might change the variable name if it already exists in this function. - // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation - // to use same name for variables. - // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. - add_local_variable_name(arg.id); + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); - arglist.push_back(argument_decl(arg)); + arglist.push_back(argument_decl(arg)); - // Flatten a combined sampler to two separate arguments in modern HLSL. - auto &arg_type = get(arg.type); - if (hlsl_options.shader_model > 30 && arg_type.basetype == SPIRType::SampledImage && - arg_type.image.dim != DimBuffer) - { - // Manufacture automatic sampler arg for SampledImage texture - arglist.push_back(join(is_depth_image(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ", - to_sampler_expression(arg.id), type_to_array_glsl(arg_type, arg.id))); - } + // Flatten a combined sampler to two separate arguments in modern HLSL. + auto &arg_type = get(arg.type); + if (hlsl_options.shader_model > 30 && arg_type.basetype == SPIRType::SampledImage && + arg_type.image.dim != DimBuffer) + { + // Manufacture automatic sampler arg for SampledImage texture + arglist.push_back(join(is_depth_image(arg_type, arg.id) ? "SamplerComparisonState " : "SamplerState ", + to_sampler_expression(arg.id), type_to_array_glsl(arg_type, arg.id))); + } - // Hold a pointer to the parameter so we can invalidate the readonly field if needed. - auto *var = maybe_get(arg.id); - if (var) - var->parameter = &arg; - } + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } - for (auto &arg : func.shadow_arguments) - { - // Might change the variable name if it already exists in this function. - // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation - // to use same name for variables. - // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. - add_local_variable_name(arg.id); + for (auto &arg : func.shadow_arguments) + { + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); - arglist.push_back(argument_decl(arg)); + arglist.push_back(argument_decl(arg)); - // Hold a pointer to the parameter so we can invalidate the readonly field if needed. - auto *var = maybe_get(arg.id); - if (var) - var->parameter = &arg; - } + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get(arg.id); + if (var) + var->parameter = &arg; + } - decl += merge(arglist); - decl += ")"; - statement(decl); + decl += merge(arglist); + decl += ")"; + statement(decl); } void CompilerHLSL::emit_hlsl_entry_point() { - SmallVector arguments; + SmallVector arguments; - if (require_input) - arguments.push_back("SPIRV_Cross_Input stage_input"); + if (require_input) + arguments.push_back("SPIRV_Cross_Input stage_input"); - auto &execution = get_entry_point(); + auto &execution = get_entry_point(); - switch (execution.model) - { - case ExecutionModelTaskEXT: - case ExecutionModelMeshEXT: - case ExecutionModelGLCompute: - { - if (execution.model == ExecutionModelMeshEXT) - { - if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) - statement("[outputtopology(\"triangle\")]"); - else if (execution.flags.get(ExecutionModeOutputLinesEXT)) - statement("[outputtopology(\"line\")]"); - else if (execution.flags.get(ExecutionModeOutputPoints)) - SPIRV_CROSS_THROW("Topology mode \"points\" is not supported in DirectX"); + switch (execution.model) + { + case ExecutionModelTaskEXT: + case ExecutionModelMeshEXT: + case ExecutionModelGLCompute: + { + if (execution.model == ExecutionModelMeshEXT) + { + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + statement("[outputtopology(\"triangle\")]"); + else if (execution.flags.get(ExecutionModeOutputLinesEXT)) + statement("[outputtopology(\"line\")]"); + else if (execution.flags.get(ExecutionModeOutputPoints)) + SPIRV_CROSS_THROW("Topology mode \"points\" is not supported in DirectX"); - auto &func = get(ir.default_entry_point); - for (auto &arg : func.arguments) - { - auto &var = get(arg.id); - auto &base_type = get(var.basetype); - bool block = has_decoration(base_type.self, DecorationBlock); - if (var.storage == StorageClassTaskPayloadWorkgroupEXT) - { - arguments.push_back("in payload " + variable_decl(var)); - } - else if (block) - { - auto flags = get_buffer_block_flags(var.self); - if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(arg.id, DecorationPerPrimitiveEXT)) - { - arguments.push_back("out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[" + - std::to_string(execution.output_primitives) + "]"); - } - else - { - arguments.push_back("out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[" + - std::to_string(execution.output_vertices) + "]"); - } - } - else - { - if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) - { - arguments.push_back("out indices uint3 gl_PrimitiveTriangleIndicesEXT[" + - std::to_string(execution.output_primitives) + "]"); - } - else - { - arguments.push_back("out indices uint2 gl_PrimitiveLineIndicesEXT[" + - std::to_string(execution.output_primitives) + "]"); - } - } - } - } - SpecializationConstant wg_x, wg_y, wg_z; - get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + auto &func = get(ir.default_entry_point); + for (auto &arg : func.arguments) + { + auto &var = get(arg.id); + auto &base_type = get(var.basetype); + bool block = has_decoration(base_type.self, DecorationBlock); + if (var.storage == StorageClassTaskPayloadWorkgroupEXT) + { + arguments.push_back("in payload " + variable_decl(var)); + } + else if (block) + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT) || has_decoration(arg.id, DecorationPerPrimitiveEXT)) + { + arguments.push_back("out primitives gl_MeshPerPrimitiveEXT gl_MeshPrimitivesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + else + { + arguments.push_back("out vertices gl_MeshPerVertexEXT gl_MeshVerticesEXT[" + + std::to_string(execution.output_vertices) + "]"); + } + } + else + { + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + { + arguments.push_back("out indices uint3 gl_PrimitiveTriangleIndicesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + else + { + arguments.push_back("out indices uint2 gl_PrimitiveLineIndicesEXT[" + + std::to_string(execution.output_primitives) + "]"); + } + } + } + } + SpecializationConstant wg_x, wg_y, wg_z; + get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); - uint32_t x = execution.workgroup_size.x; - uint32_t y = execution.workgroup_size.y; - uint32_t z = execution.workgroup_size.z; + uint32_t x = execution.workgroup_size.x; + uint32_t y = execution.workgroup_size.y; + uint32_t z = execution.workgroup_size.z; - if (!execution.workgroup_size.constant && execution.flags.get(ExecutionModeLocalSizeId)) - { - if (execution.workgroup_size.id_x) - x = get(execution.workgroup_size.id_x).scalar(); - if (execution.workgroup_size.id_y) - y = get(execution.workgroup_size.id_y).scalar(); - if (execution.workgroup_size.id_z) - z = get(execution.workgroup_size.id_z).scalar(); - } + if (!execution.workgroup_size.constant && execution.flags.get(ExecutionModeLocalSizeId)) + { + if (execution.workgroup_size.id_x) + x = get(execution.workgroup_size.id_x).scalar(); + if (execution.workgroup_size.id_y) + y = get(execution.workgroup_size.id_y).scalar(); + if (execution.workgroup_size.id_z) + z = get(execution.workgroup_size.id_z).scalar(); + } - auto x_expr = wg_x.id ? get(wg_x.id).specialization_constant_macro_name : to_string(x); - auto y_expr = wg_y.id ? get(wg_y.id).specialization_constant_macro_name : to_string(y); - auto z_expr = wg_z.id ? get(wg_z.id).specialization_constant_macro_name : to_string(z); + auto x_expr = wg_x.id ? get(wg_x.id).specialization_constant_macro_name : to_string(x); + auto y_expr = wg_y.id ? get(wg_y.id).specialization_constant_macro_name : to_string(y); + auto z_expr = wg_z.id ? get(wg_z.id).specialization_constant_macro_name : to_string(z); - statement("[numthreads(", x_expr, ", ", y_expr, ", ", z_expr, ")]"); - break; - } - case ExecutionModelFragment: - if (execution.flags.get(ExecutionModeEarlyFragmentTests)) - statement("[earlydepthstencil]"); - break; - default: - break; - } + statement("[numthreads(", x_expr, ", ", y_expr, ", ", z_expr, ")]"); + break; + } + case ExecutionModelFragment: + if (execution.flags.get(ExecutionModeEarlyFragmentTests)) + statement("[earlydepthstencil]"); + break; + default: + break; + } - const char *entry_point_name; - if (hlsl_options.use_entry_point_name) - entry_point_name = get_entry_point().name.c_str(); - else - entry_point_name = "main"; + const char *entry_point_name; + if (hlsl_options.use_entry_point_name) + entry_point_name = get_entry_point().name.c_str(); + else + entry_point_name = "main"; - statement(require_output ? "SPIRV_Cross_Output " : "void ", entry_point_name, "(", merge(arguments), ")"); - begin_scope(); - bool legacy = hlsl_options.shader_model <= 30; + statement(require_output ? "SPIRV_Cross_Output " : "void ", entry_point_name, "(", merge(arguments), ")"); + begin_scope(); + bool legacy = hlsl_options.shader_model <= 30; - // Copy builtins from entry point arguments to globals. - active_input_builtins.for_each_bit([&](uint32_t i) { - auto builtin = builtin_to_glsl(static_cast(i), StorageClassInput); - switch (static_cast(i)) - { - case BuiltInFragCoord: - // VPOS in D3D9 is sampled at integer locations, apply half-pixel offset to be consistent. - // TODO: Do we need an option here? Any reason why a D3D9 shader would be used - // on a D3D10+ system with a different rasterization config? - if (legacy) - statement(builtin, " = stage_input.", builtin, " + float4(0.5f, 0.5f, 0.0f, 0.0f);"); - else - { - statement(builtin, " = stage_input.", builtin, ";"); - // ZW are undefined in D3D9, only do this fixup here. - statement(builtin, ".w = 1.0 / ", builtin, ".w;"); - } - break; + // Copy builtins from entry point arguments to globals. + active_input_builtins.for_each_bit([&](uint32_t i) { + auto builtin = builtin_to_glsl(static_cast(i), StorageClassInput); + switch (static_cast(i)) + { + case BuiltInFragCoord: + // VPOS in D3D9 is sampled at integer locations, apply half-pixel offset to be consistent. + // TODO: Do we need an option here? Any reason why a D3D9 shader would be used + // on a D3D10+ system with a different rasterization config? + if (legacy) + statement(builtin, " = stage_input.", builtin, " + float4(0.5f, 0.5f, 0.0f, 0.0f);"); + else + { + statement(builtin, " = stage_input.", builtin, ";"); + // ZW are undefined in D3D9, only do this fixup here. + statement(builtin, ".w = 1.0 / ", builtin, ".w;"); + } + break; - case BuiltInFrontFacing: + case BuiltInFrontFacing: { - if (legacy) - statement(builtin, " = bool(stage_input.", builtin, " > 0.0 ? true : false);"); - else - { - statement(builtin, " = int(stage_input.", builtin, ");"); - } - break; + if (legacy) + statement(builtin, " = bool(stage_input.", builtin, " > 0.0 ? true : false);"); + else + { + statement(builtin, " = int(stage_input.", builtin, ");"); + } + break; } - case BuiltInVertexId: - case BuiltInVertexIndex: - case BuiltInInstanceIndex: - // D3D semantics are uint, but shader wants int. - if (hlsl_options.support_nonzero_base_vertex_base_instance || hlsl_options.shader_model >= 68) - { - if (hlsl_options.shader_model >= 68) - { - if (static_cast(i) == BuiltInInstanceIndex) - statement(builtin, " = int(stage_input.", builtin, " + stage_input.gl_BaseInstanceARB);"); - else - statement(builtin, " = int(stage_input.", builtin, " + stage_input.gl_BaseVertexARB);"); - } - else - { - if (static_cast(i) == BuiltInInstanceIndex) - statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseInstance;"); - else - statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseVertex;"); - } - } - else - statement(builtin, " = int(stage_input.", builtin, ");"); - break; + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInInstanceIndex: + // D3D semantics are uint, but shader wants int. + if (hlsl_options.support_nonzero_base_vertex_base_instance || hlsl_options.shader_model >= 68) + { + if (hlsl_options.shader_model >= 68) + { + if (static_cast(i) == BuiltInInstanceIndex) + statement(builtin, " = int(stage_input.", builtin, " + stage_input.gl_BaseInstanceARB);"); + else + statement(builtin, " = int(stage_input.", builtin, " + stage_input.gl_BaseVertexARB);"); + } + else + { + if (static_cast(i) == BuiltInInstanceIndex) + statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseInstance;"); + else + statement(builtin, " = int(stage_input.", builtin, ") + SPIRV_Cross_BaseVertex;"); + } + } + else + statement(builtin, " = int(stage_input.", builtin, ");"); + break; - case BuiltInBaseVertex: - if (hlsl_options.shader_model >= 68) - statement(builtin, " = stage_input.gl_BaseVertexARB;"); - else - statement(builtin, " = SPIRV_Cross_BaseVertex;"); - break; + case BuiltInBaseVertex: + if (hlsl_options.shader_model >= 68) + statement(builtin, " = stage_input.gl_BaseVertexARB;"); + else + statement(builtin, " = SPIRV_Cross_BaseVertex;"); + break; - case BuiltInBaseInstance: - if (hlsl_options.shader_model >= 68) - statement(builtin, " = stage_input.gl_BaseInstanceARB;"); - else - statement(builtin, " = SPIRV_Cross_BaseInstance;"); - break; + case BuiltInBaseInstance: + if (hlsl_options.shader_model >= 68) + statement(builtin, " = stage_input.gl_BaseInstanceARB;"); + else + statement(builtin, " = SPIRV_Cross_BaseInstance;"); + break; - case BuiltInInstanceId: - // D3D semantics are uint, but shader wants int. - statement(builtin, " = int(stage_input.", builtin, ");"); - break; + case BuiltInInstanceId: + // D3D semantics are uint, but shader wants int. + statement(builtin, " = int(stage_input.", builtin, ");"); + break; - case BuiltInSampleMask: - statement(builtin, "[0] = stage_input.", builtin, ";"); - break; + case BuiltInSampleMask: + statement(builtin, "[0] = stage_input.", builtin, ";"); + break; - case BuiltInNumWorkgroups: - case BuiltInPointCoord: - case BuiltInSubgroupSize: - case BuiltInSubgroupLocalInvocationId: - case BuiltInHelperInvocation: - break; + case BuiltInNumWorkgroups: + case BuiltInPointCoord: + case BuiltInSubgroupSize: + case BuiltInSubgroupLocalInvocationId: + case BuiltInHelperInvocation: + break; - case BuiltInSubgroupEqMask: - // Emulate these ... - // No 64-bit in HLSL, so have to do it in 32-bit and unroll. - statement("gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));"); - statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;"); - statement("if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;"); - statement("if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;"); - statement("if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;"); - break; + case BuiltInSubgroupEqMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("gl_SubgroupEqMask = 1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96));"); + statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupEqMask.x = 0;"); + statement("if (WaveGetLaneIndex() >= 64 || WaveGetLaneIndex() < 32) gl_SubgroupEqMask.y = 0;"); + statement("if (WaveGetLaneIndex() >= 96 || WaveGetLaneIndex() < 64) gl_SubgroupEqMask.z = 0;"); + statement("if (WaveGetLaneIndex() < 96) gl_SubgroupEqMask.w = 0;"); + break; - case BuiltInSubgroupGeMask: - // Emulate these ... - // No 64-bit in HLSL, so have to do it in 32-bit and unroll. - statement("gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);"); - statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;"); - statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;"); - statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;"); - statement("if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;"); - statement("if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;"); - statement("if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;"); - break; + case BuiltInSubgroupGeMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("gl_SubgroupGeMask = ~((1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u);"); + statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupGeMask.x = 0u;"); + statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupGeMask.y = 0u;"); + statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupGeMask.z = 0u;"); + statement("if (WaveGetLaneIndex() < 32) gl_SubgroupGeMask.y = ~0u;"); + statement("if (WaveGetLaneIndex() < 64) gl_SubgroupGeMask.z = ~0u;"); + statement("if (WaveGetLaneIndex() < 96) gl_SubgroupGeMask.w = ~0u;"); + break; - case BuiltInSubgroupGtMask: - // Emulate these ... - // No 64-bit in HLSL, so have to do it in 32-bit and unroll. - statement("uint gt_lane_index = WaveGetLaneIndex() + 1;"); - statement("gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);"); - statement("if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;"); - statement("if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;"); - statement("if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;"); - statement("if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;"); - statement("if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;"); - statement("if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;"); - statement("if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;"); - break; + case BuiltInSubgroupGtMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("uint gt_lane_index = WaveGetLaneIndex() + 1;"); + statement("gl_SubgroupGtMask = ~((1u << (gt_lane_index - uint4(0, 32, 64, 96))) - 1u);"); + statement("if (gt_lane_index >= 32) gl_SubgroupGtMask.x = 0u;"); + statement("if (gt_lane_index >= 64) gl_SubgroupGtMask.y = 0u;"); + statement("if (gt_lane_index >= 96) gl_SubgroupGtMask.z = 0u;"); + statement("if (gt_lane_index >= 128) gl_SubgroupGtMask.w = 0u;"); + statement("if (gt_lane_index < 32) gl_SubgroupGtMask.y = ~0u;"); + statement("if (gt_lane_index < 64) gl_SubgroupGtMask.z = ~0u;"); + statement("if (gt_lane_index < 96) gl_SubgroupGtMask.w = ~0u;"); + break; - case BuiltInSubgroupLeMask: - // Emulate these ... - // No 64-bit in HLSL, so have to do it in 32-bit and unroll. - statement("uint le_lane_index = WaveGetLaneIndex() + 1;"); - statement("gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;"); - statement("if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;"); - statement("if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;"); - statement("if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;"); - statement("if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;"); - statement("if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;"); - statement("if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;"); - statement("if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;"); - break; + case BuiltInSubgroupLeMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("uint le_lane_index = WaveGetLaneIndex() + 1;"); + statement("gl_SubgroupLeMask = (1u << (le_lane_index - uint4(0, 32, 64, 96))) - 1u;"); + statement("if (le_lane_index >= 32) gl_SubgroupLeMask.x = ~0u;"); + statement("if (le_lane_index >= 64) gl_SubgroupLeMask.y = ~0u;"); + statement("if (le_lane_index >= 96) gl_SubgroupLeMask.z = ~0u;"); + statement("if (le_lane_index >= 128) gl_SubgroupLeMask.w = ~0u;"); + statement("if (le_lane_index < 32) gl_SubgroupLeMask.y = 0u;"); + statement("if (le_lane_index < 64) gl_SubgroupLeMask.z = 0u;"); + statement("if (le_lane_index < 96) gl_SubgroupLeMask.w = 0u;"); + break; - case BuiltInSubgroupLtMask: - // Emulate these ... - // No 64-bit in HLSL, so have to do it in 32-bit and unroll. - statement("gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;"); - statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;"); - statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;"); - statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;"); - statement("if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;"); - statement("if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;"); - statement("if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;"); - break; + case BuiltInSubgroupLtMask: + // Emulate these ... + // No 64-bit in HLSL, so have to do it in 32-bit and unroll. + statement("gl_SubgroupLtMask = (1u << (WaveGetLaneIndex() - uint4(0, 32, 64, 96))) - 1u;"); + statement("if (WaveGetLaneIndex() >= 32) gl_SubgroupLtMask.x = ~0u;"); + statement("if (WaveGetLaneIndex() >= 64) gl_SubgroupLtMask.y = ~0u;"); + statement("if (WaveGetLaneIndex() >= 96) gl_SubgroupLtMask.z = ~0u;"); + statement("if (WaveGetLaneIndex() < 32) gl_SubgroupLtMask.y = 0u;"); + statement("if (WaveGetLaneIndex() < 64) gl_SubgroupLtMask.z = 0u;"); + statement("if (WaveGetLaneIndex() < 96) gl_SubgroupLtMask.w = 0u;"); + break; - case BuiltInClipDistance: - for (uint32_t clip = 0; clip < clip_distance_count; clip++) - statement("gl_ClipDistance[", clip, "] = stage_input.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], - ";"); - break; + case BuiltInClipDistance: + for (uint32_t clip = 0; clip < clip_distance_count; clip++) + statement("gl_ClipDistance[", clip, "] = stage_input.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], + ";"); + break; - case BuiltInCullDistance: - for (uint32_t cull = 0; cull < cull_distance_count; cull++) - statement("gl_CullDistance[", cull, "] = stage_input.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], - ";"); - break; + case BuiltInCullDistance: + for (uint32_t cull = 0; cull < cull_distance_count; cull++) + statement("gl_CullDistance[", cull, "] = stage_input.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], + ";"); + break; - default: - statement(builtin, " = stage_input.", builtin, ";"); - break; - } - }); + default: + statement(builtin, " = stage_input.", builtin, ";"); + break; + } + }); - // Copy from stage input struct to globals. - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - bool block = has_decoration(type.self, DecorationBlock); + // Copy from stage input struct to globals. + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); - if (var.storage != StorageClassInput) - return; + if (var.storage != StorageClassInput) + return; - bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex; + bool need_matrix_unroll = var.storage == StorageClassInput && execution.model == ExecutionModelVertex; - if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) && - interface_variable_exists_in_entry_point(var.self)) - { - if (block) - { - auto type_name = to_name(type.self); - auto var_name = to_name(var.self); - for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++) - { - auto mbr_name = to_member_name(type, mbr_idx); - auto flat_name = join(type_name, "_", mbr_name); - statement(var_name, ".", mbr_name, " = stage_input.", flat_name, ";"); - } - } - else - { - auto name = to_name(var.self); - auto &mtype = this->get(var.basetype); - if (need_matrix_unroll && mtype.columns > 1) - { - // Unroll matrices. - for (uint32_t col = 0; col < mtype.columns; col++) - statement(name, "[", col, "] = stage_input.", name, "_", col, ";"); - } - else - { - statement(name, " = stage_input.", name, ";"); - } - } - } - }); + if (!var.remapped_variable && type.pointer && !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + if (block) + { + auto type_name = to_name(type.self); + auto var_name = to_name(var.self); + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++) + { + auto mbr_name = to_member_name(type, mbr_idx); + auto flat_name = join(type_name, "_", mbr_name); + statement(var_name, ".", mbr_name, " = stage_input.", flat_name, ";"); + } + } + else + { + auto name = to_name(var.self); + auto &mtype = this->get(var.basetype); + if (need_matrix_unroll && mtype.columns > 1) + { + // Unroll matrices. + for (uint32_t col = 0; col < mtype.columns; col++) + statement(name, "[", col, "] = stage_input.", name, "_", col, ";"); + } + else + { + statement(name, " = stage_input.", name, ";"); + } + } + } + }); - // Run the shader. - if (execution.model == ExecutionModelVertex || - execution.model == ExecutionModelFragment || - execution.model == ExecutionModelGLCompute || - execution.model == ExecutionModelMeshEXT || - execution.model == ExecutionModelTaskEXT) - { - // For mesh shaders, we receive special arguments that we must pass down as function arguments. - // HLSL does not support proper reference types for passing these IO blocks, - // but DXC post-inlining seems to magically fix it up anyways *shrug*. - SmallVector arglist; - auto &func = get(ir.default_entry_point); - // The arguments are marked out, avoid detecting reads and emitting inout. - for (auto &arg : func.arguments) - arglist.push_back(to_expression(arg.id, false)); - statement(get_inner_entry_point_name(), "(", merge(arglist), ");"); - } - else - SPIRV_CROSS_THROW("Unsupported shader stage."); + // Run the shader. + if (execution.model == ExecutionModelVertex || + execution.model == ExecutionModelFragment || + execution.model == ExecutionModelGLCompute || + execution.model == ExecutionModelMeshEXT || + execution.model == ExecutionModelTaskEXT) + { + // For mesh shaders, we receive special arguments that we must pass down as function arguments. + // HLSL does not support proper reference types for passing these IO blocks, + // but DXC post-inlining seems to magically fix it up anyways *shrug*. + SmallVector arglist; + auto &func = get(ir.default_entry_point); + // The arguments are marked out, avoid detecting reads and emitting inout. + for (auto &arg : func.arguments) + arglist.push_back(to_expression(arg.id, false)); + statement(get_inner_entry_point_name(), "(", merge(arglist), ");"); + } + else + SPIRV_CROSS_THROW("Unsupported shader stage."); - // Copy stage outputs. - if (require_output) - { - statement("SPIRV_Cross_Output stage_output;"); + // Copy stage outputs. + if (require_output) + { + statement("SPIRV_Cross_Output stage_output;"); - // Copy builtins from globals to return struct. - active_output_builtins.for_each_bit([&](uint32_t i) { - // PointSize doesn't exist in HLSL SM 4+. - if (i == BuiltInPointSize && !legacy) - return; + // Copy builtins from globals to return struct. + active_output_builtins.for_each_bit([&](uint32_t i) { + // PointSize doesn't exist in HLSL SM 4+. + if (i == BuiltInPointSize && !legacy) + return; - switch (static_cast(i)) - { - case BuiltInClipDistance: - for (uint32_t clip = 0; clip < clip_distance_count; clip++) - statement("stage_output.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], " = gl_ClipDistance[", - clip, "];"); - break; + switch (static_cast(i)) + { + case BuiltInClipDistance: + for (uint32_t clip = 0; clip < clip_distance_count; clip++) + statement("stage_output.gl_ClipDistance", clip / 4, ".", "xyzw"[clip & 3], " = gl_ClipDistance[", + clip, "];"); + break; - case BuiltInCullDistance: - for (uint32_t cull = 0; cull < cull_distance_count; cull++) - statement("stage_output.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], " = gl_CullDistance[", - cull, "];"); - break; + case BuiltInCullDistance: + for (uint32_t cull = 0; cull < cull_distance_count; cull++) + statement("stage_output.gl_CullDistance", cull / 4, ".", "xyzw"[cull & 3], " = gl_CullDistance[", + cull, "];"); + break; - case BuiltInSampleMask: - statement("stage_output.gl_SampleMask = gl_SampleMask[0];"); - break; + case BuiltInSampleMask: + statement("stage_output.gl_SampleMask = gl_SampleMask[0];"); + break; - default: - { - auto builtin_expr = builtin_to_glsl(static_cast(i), StorageClassOutput); - statement("stage_output.", builtin_expr, " = ", builtin_expr, ";"); - break; - } - } - }); + default: + { + auto builtin_expr = builtin_to_glsl(static_cast(i), StorageClassOutput); + statement("stage_output.", builtin_expr, " = ", builtin_expr, ";"); + break; + } + } + }); - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { - auto &type = this->get(var.basetype); - bool block = has_decoration(type.self, DecorationBlock); + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + auto &type = this->get(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); - if (var.storage != StorageClassOutput) - return; + if (var.storage != StorageClassOutput) + return; - if (!var.remapped_variable && type.pointer && - !is_builtin_variable(var) && - interface_variable_exists_in_entry_point(var.self)) - { - if (block) - { - // I/O blocks need to flatten output. - auto type_name = to_name(type.self); - auto var_name = to_name(var.self); - for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++) - { - auto mbr_name = to_member_name(type, mbr_idx); - auto flat_name = join(type_name, "_", mbr_name); - statement("stage_output.", flat_name, " = ", var_name, ".", mbr_name, ";"); - } - } - else - { - auto name = to_name(var.self); + if (!var.remapped_variable && type.pointer && + !is_builtin_variable(var) && + interface_variable_exists_in_entry_point(var.self)) + { + if (block) + { + // I/O blocks need to flatten output. + auto type_name = to_name(type.self); + auto var_name = to_name(var.self); + for (uint32_t mbr_idx = 0; mbr_idx < uint32_t(type.member_types.size()); mbr_idx++) + { + auto mbr_name = to_member_name(type, mbr_idx); + auto flat_name = join(type_name, "_", mbr_name); + statement("stage_output.", flat_name, " = ", var_name, ".", mbr_name, ";"); + } + } + else + { + auto name = to_name(var.self); - if (legacy && execution.model == ExecutionModelFragment) - { - string output_filler; - for (uint32_t size = type.vecsize; size < 4; ++size) - output_filler += ", 0.0"; + if (legacy && execution.model == ExecutionModelFragment) + { + string output_filler; + for (uint32_t size = type.vecsize; size < 4; ++size) + output_filler += ", 0.0"; - statement("stage_output.", name, " = float4(", name, output_filler, ");"); - } - else - { - statement("stage_output.", name, " = ", name, ";"); - } - } - } - }); + statement("stage_output.", name, " = float4(", name, output_filler, ");"); + } + else + { + statement("stage_output.", name, " = ", name, ";"); + } + } + } + }); - statement("return stage_output;"); - } + statement("return stage_output;"); + } - end_scope(); + end_scope(); } void CompilerHLSL::emit_fixup() { - if (is_vertex_like_shader() && active_output_builtins.get(BuiltInPosition)) - { - // Do various mangling on the gl_Position. - if (hlsl_options.shader_model <= 30) - { - statement("gl_Position.x = gl_Position.x - gl_HalfPixel.x * " - "gl_Position.w;"); - statement("gl_Position.y = gl_Position.y + gl_HalfPixel.y * " - "gl_Position.w;"); - } + if (is_vertex_like_shader() && active_output_builtins.get(BuiltInPosition)) + { + // Do various mangling on the gl_Position. + if (hlsl_options.shader_model <= 30) + { + statement("gl_Position.x = gl_Position.x - gl_HalfPixel.x * " + "gl_Position.w;"); + statement("gl_Position.y = gl_Position.y + gl_HalfPixel.y * " + "gl_Position.w;"); + } - if (options.vertex.flip_vert_y) - statement("gl_Position.y = -gl_Position.y;"); - if (options.vertex.fixup_clipspace) - statement("gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;"); - } + if (options.vertex.flip_vert_y) + statement("gl_Position.y = -gl_Position.y;"); + if (options.vertex.fixup_clipspace) + statement("gl_Position.z = (gl_Position.z + gl_Position.w) * 0.5;"); + } } void CompilerHLSL::emit_texture_op(const Instruction &i, bool sparse) { - if (sparse) - SPIRV_CROSS_THROW("Sparse feedback not yet supported in HLSL."); + if (sparse) + SPIRV_CROSS_THROW("Sparse feedback not yet supported in HLSL."); - auto *ops = stream(i); - auto op = static_cast(i.op); - uint32_t length = i.length; + auto *ops = stream(i); + auto op = static_cast(i.op); + uint32_t length = i.length; - SmallVector inherited_expressions; + SmallVector inherited_expressions; - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - VariableID img = ops[2]; - uint32_t coord = ops[3]; - uint32_t dref = 0; - uint32_t comp = 0; - bool gather = false; - bool proj = false; - const uint32_t *opt = nullptr; - auto *combined_image = maybe_get(img); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + VariableID img = ops[2]; + uint32_t coord = ops[3]; + uint32_t dref = 0; + uint32_t comp = 0; + bool gather = false; + bool proj = false; + const uint32_t *opt = nullptr; + auto *combined_image = maybe_get(img); - if (combined_image && has_decoration(img, DecorationNonUniform)) - { - set_decoration(combined_image->image, DecorationNonUniform); - set_decoration(combined_image->sampler, DecorationNonUniform); - } + if (combined_image && has_decoration(img, DecorationNonUniform)) + { + set_decoration(combined_image->image, DecorationNonUniform); + set_decoration(combined_image->sampler, DecorationNonUniform); + } - auto img_expr = to_non_uniform_aware_expression(combined_image ? combined_image->image : img); + auto img_expr = to_non_uniform_aware_expression(combined_image ? combined_image->image : img); - inherited_expressions.push_back(coord); + inherited_expressions.push_back(coord); - switch (op) - { - case OpImageSampleDrefImplicitLod: - case OpImageSampleDrefExplicitLod: - dref = ops[4]; - opt = &ops[5]; - length -= 5; - break; + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + break; - case OpImageSampleProjDrefImplicitLod: - case OpImageSampleProjDrefExplicitLod: - dref = ops[4]; - proj = true; - opt = &ops[5]; - length -= 5; - break; + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + dref = ops[4]; + proj = true; + opt = &ops[5]; + length -= 5; + break; - case OpImageDrefGather: - dref = ops[4]; - opt = &ops[5]; - gather = true; - length -= 5; - break; + case OpImageDrefGather: + dref = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; - case OpImageGather: - comp = ops[4]; - opt = &ops[5]; - gather = true; - length -= 5; - break; + case OpImageGather: + comp = ops[4]; + opt = &ops[5]; + gather = true; + length -= 5; + break; - case OpImageSampleProjImplicitLod: - case OpImageSampleProjExplicitLod: - opt = &ops[4]; - length -= 4; - proj = true; - break; + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + opt = &ops[4]; + length -= 4; + proj = true; + break; - case OpImageQueryLod: - opt = &ops[4]; - length -= 4; - break; + case OpImageQueryLod: + opt = &ops[4]; + length -= 4; + break; - default: - opt = &ops[4]; - length -= 4; - break; - } + default: + opt = &ops[4]; + length -= 4; + break; + } - auto &imgtype = expression_type(img); - uint32_t coord_components = 0; - switch (imgtype.image.dim) - { - case spv::Dim1D: - coord_components = 1; - break; - case spv::Dim2D: - coord_components = 2; - break; - case spv::Dim3D: - coord_components = 3; - break; - case spv::DimCube: - coord_components = 3; - break; - case spv::DimBuffer: - coord_components = 1; - break; - default: - coord_components = 2; - break; - } + auto &imgtype = expression_type(img); + uint32_t coord_components = 0; + switch (imgtype.image.dim) + { + case spv::Dim1D: + coord_components = 1; + break; + case spv::Dim2D: + coord_components = 2; + break; + case spv::Dim3D: + coord_components = 3; + break; + case spv::DimCube: + coord_components = 3; + break; + case spv::DimBuffer: + coord_components = 1; + break; + default: + coord_components = 2; + break; + } - if (dref) - inherited_expressions.push_back(dref); + if (dref) + inherited_expressions.push_back(dref); - if (imgtype.image.arrayed) - coord_components++; + if (imgtype.image.arrayed) + coord_components++; - uint32_t bias = 0; - uint32_t lod = 0; - uint32_t grad_x = 0; - uint32_t grad_y = 0; - uint32_t coffset = 0; - uint32_t offset = 0; - uint32_t coffsets = 0; - uint32_t sample = 0; - uint32_t minlod = 0; - uint32_t flags = 0; + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t grad_x = 0; + uint32_t grad_y = 0; + uint32_t coffset = 0; + uint32_t offset = 0; + uint32_t coffsets = 0; + uint32_t sample = 0; + uint32_t minlod = 0; + uint32_t flags = 0; - if (length) - { - flags = opt[0]; - opt++; - length--; - } + if (length) + { + flags = opt[0]; + opt++; + length--; + } - auto test = [&](uint32_t &v, uint32_t flag) { - if (length && (flags & flag)) - { - v = *opt++; - inherited_expressions.push_back(v); - length--; - } - }; + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + inherited_expressions.push_back(v); + length--; + } + }; - test(bias, ImageOperandsBiasMask); - test(lod, ImageOperandsLodMask); - test(grad_x, ImageOperandsGradMask); - test(grad_y, ImageOperandsGradMask); - test(coffset, ImageOperandsConstOffsetMask); - test(offset, ImageOperandsOffsetMask); - test(coffsets, ImageOperandsConstOffsetsMask); - test(sample, ImageOperandsSampleMask); - test(minlod, ImageOperandsMinLodMask); + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + test(grad_x, ImageOperandsGradMask); + test(grad_y, ImageOperandsGradMask); + test(coffset, ImageOperandsConstOffsetMask); + test(offset, ImageOperandsOffsetMask); + test(coffsets, ImageOperandsConstOffsetsMask); + test(sample, ImageOperandsSampleMask); + test(minlod, ImageOperandsMinLodMask); - string expr; - string texop; + string expr; + string texop; - if (minlod != 0) - SPIRV_CROSS_THROW("MinLod texture operand not supported in HLSL."); + if (minlod != 0) + SPIRV_CROSS_THROW("MinLod texture operand not supported in HLSL."); - if (op == OpImageFetch) - { - if (hlsl_options.shader_model < 40) - { - SPIRV_CROSS_THROW("texelFetch is not supported in HLSL shader model 2/3."); - } - texop += img_expr; - texop += ".Load"; - } - else if (op == OpImageQueryLod) - { - texop += img_expr; - texop += ".CalculateLevelOfDetail"; - } - else - { - auto &imgformat = get(imgtype.image.type); - if (hlsl_options.shader_model < 67 && imgformat.basetype != SPIRType::Float) - { - SPIRV_CROSS_THROW("Sampling non-float textures is not supported in HLSL SM < 6.7."); - } + if (op == OpImageFetch) + { + if (hlsl_options.shader_model < 40) + { + SPIRV_CROSS_THROW("texelFetch is not supported in HLSL shader model 2/3."); + } + texop += img_expr; + texop += ".Load"; + } + else if (op == OpImageQueryLod) + { + texop += img_expr; + texop += ".CalculateLevelOfDetail"; + } + else + { + auto &imgformat = get(imgtype.image.type); + if (hlsl_options.shader_model < 67 && imgformat.basetype != SPIRType::Float) + { + SPIRV_CROSS_THROW("Sampling non-float textures is not supported in HLSL SM < 6.7."); + } - if (hlsl_options.shader_model >= 40) - { - texop += img_expr; + if (hlsl_options.shader_model >= 40) + { + texop += img_expr; - if (is_depth_image(imgtype, img)) - { - if (gather) - { - texop += ".GatherCmp"; - } - else if (lod || grad_x || grad_y) - { - // Assume we want a fixed level, and the only thing we can get in HLSL is SampleCmpLevelZero. - texop += ".SampleCmpLevelZero"; - } - else - texop += ".SampleCmp"; - } - else if (gather) - { - uint32_t comp_num = evaluate_constant_u32(comp); - if (hlsl_options.shader_model >= 50) - { - switch (comp_num) - { - case 0: - texop += ".GatherRed"; - break; - case 1: - texop += ".GatherGreen"; - break; - case 2: - texop += ".GatherBlue"; - break; - case 3: - texop += ".GatherAlpha"; - break; - default: - SPIRV_CROSS_THROW("Invalid component."); - } - } - else - { - if (comp_num == 0) - texop += ".Gather"; - else - SPIRV_CROSS_THROW("HLSL shader model 4 can only gather from the red component."); - } - } - else if (bias) - texop += ".SampleBias"; - else if (grad_x || grad_y) - texop += ".SampleGrad"; - else if (lod) - texop += ".SampleLevel"; - else - texop += ".Sample"; - } - else - { - switch (imgtype.image.dim) - { - case Dim1D: - texop += "tex1D"; - break; - case Dim2D: - texop += "tex2D"; - break; - case Dim3D: - texop += "tex3D"; - break; - case DimCube: - texop += "texCUBE"; - break; - case DimRect: - case DimBuffer: - case DimSubpassData: - SPIRV_CROSS_THROW("Buffer texture support is not yet implemented for HLSL"); // TODO - default: - SPIRV_CROSS_THROW("Invalid dimension."); - } + if (is_depth_image(imgtype, img)) + { + if (gather) + { + texop += ".GatherCmp"; + } + else if (lod || grad_x || grad_y) + { + // Assume we want a fixed level, and the only thing we can get in HLSL is SampleCmpLevelZero. + texop += ".SampleCmpLevelZero"; + } + else + texop += ".SampleCmp"; + } + else if (gather) + { + uint32_t comp_num = evaluate_constant_u32(comp); + if (hlsl_options.shader_model >= 50) + { + switch (comp_num) + { + case 0: + texop += ".GatherRed"; + break; + case 1: + texop += ".GatherGreen"; + break; + case 2: + texop += ".GatherBlue"; + break; + case 3: + texop += ".GatherAlpha"; + break; + default: + SPIRV_CROSS_THROW("Invalid component."); + } + } + else + { + if (comp_num == 0) + texop += ".Gather"; + else + SPIRV_CROSS_THROW("HLSL shader model 4 can only gather from the red component."); + } + } + else if (bias) + texop += ".SampleBias"; + else if (grad_x || grad_y) + texop += ".SampleGrad"; + else if (lod) + texop += ".SampleLevel"; + else + texop += ".Sample"; + } + else + { + switch (imgtype.image.dim) + { + case Dim1D: + texop += "tex1D"; + break; + case Dim2D: + texop += "tex2D"; + break; + case Dim3D: + texop += "tex3D"; + break; + case DimCube: + texop += "texCUBE"; + break; + case DimRect: + case DimBuffer: + case DimSubpassData: + SPIRV_CROSS_THROW("Buffer texture support is not yet implemented for HLSL"); // TODO + default: + SPIRV_CROSS_THROW("Invalid dimension."); + } - if (gather) - SPIRV_CROSS_THROW("textureGather is not supported in HLSL shader model 2/3."); - if (offset || coffset) - SPIRV_CROSS_THROW("textureOffset is not supported in HLSL shader model 2/3."); + if (gather) + SPIRV_CROSS_THROW("textureGather is not supported in HLSL shader model 2/3."); + if (offset || coffset) + SPIRV_CROSS_THROW("textureOffset is not supported in HLSL shader model 2/3."); - if (grad_x || grad_y) - texop += "grad"; - else if (lod) - texop += "lod"; - else if (bias) - texop += "bias"; - else if (proj || dref) - texop += "proj"; - } - } + if (grad_x || grad_y) + texop += "grad"; + else if (lod) + texop += "lod"; + else if (bias) + texop += "bias"; + else if (proj || dref) + texop += "proj"; + } + } - expr += texop; - expr += "("; - if (hlsl_options.shader_model < 40) - { - if (combined_image) - SPIRV_CROSS_THROW("Separate images/samplers are not supported in HLSL shader model 2/3."); - expr += to_expression(img); - } - else if (op != OpImageFetch) - { - string sampler_expr; - if (combined_image) - sampler_expr = to_non_uniform_aware_expression(combined_image->sampler); - else - sampler_expr = to_sampler_expression(img); - expr += sampler_expr; - } + expr += texop; + expr += "("; + if (hlsl_options.shader_model < 40) + { + if (combined_image) + SPIRV_CROSS_THROW("Separate images/samplers are not supported in HLSL shader model 2/3."); + expr += to_expression(img); + } + else if (op != OpImageFetch) + { + string sampler_expr; + if (combined_image) + sampler_expr = to_non_uniform_aware_expression(combined_image->sampler); + else + sampler_expr = to_sampler_expression(img); + expr += sampler_expr; + } - auto swizzle = [](uint32_t comps, uint32_t in_comps) -> const char * { - if (comps == in_comps) - return ""; + auto swizzle = [](uint32_t comps, uint32_t in_comps) -> const char * { + if (comps == in_comps) + return ""; - switch (comps) - { - case 1: - return ".x"; - case 2: - return ".xy"; - case 3: - return ".xyz"; - default: - return ""; - } - }; + switch (comps) + { + case 1: + return ".x"; + case 2: + return ".xy"; + case 3: + return ".xyz"; + default: + return ""; + } + }; - bool forward = should_forward(coord); + bool forward = should_forward(coord); - // The IR can give us more components than we need, so chop them off as needed. - string coord_expr; - auto &coord_type = expression_type(coord); - if (coord_components != coord_type.vecsize) - coord_expr = to_enclosed_expression(coord) + swizzle(coord_components, expression_type(coord).vecsize); - else - coord_expr = to_expression(coord); + // The IR can give us more components than we need, so chop them off as needed. + string coord_expr; + auto &coord_type = expression_type(coord); + if (coord_components != coord_type.vecsize) + coord_expr = to_enclosed_expression(coord) + swizzle(coord_components, expression_type(coord).vecsize); + else + coord_expr = to_expression(coord); - if (proj && hlsl_options.shader_model >= 40) // Legacy HLSL has "proj" operations which do this for us. - coord_expr = coord_expr + " / " + to_extract_component_expression(coord, coord_components); + if (proj && hlsl_options.shader_model >= 40) // Legacy HLSL has "proj" operations which do this for us. + coord_expr = coord_expr + " / " + to_extract_component_expression(coord, coord_components); - if (hlsl_options.shader_model < 40) - { - if (dref) - { - if (imgtype.image.dim != spv::Dim1D && imgtype.image.dim != spv::Dim2D) - { - SPIRV_CROSS_THROW( - "Depth comparison is only supported for 1D and 2D textures in HLSL shader model 2/3."); - } + if (hlsl_options.shader_model < 40) + { + if (dref) + { + if (imgtype.image.dim != spv::Dim1D && imgtype.image.dim != spv::Dim2D) + { + SPIRV_CROSS_THROW( + "Depth comparison is only supported for 1D and 2D textures in HLSL shader model 2/3."); + } - if (grad_x || grad_y) - SPIRV_CROSS_THROW("Depth comparison is not supported for grad sampling in HLSL shader model 2/3."); + if (grad_x || grad_y) + SPIRV_CROSS_THROW("Depth comparison is not supported for grad sampling in HLSL shader model 2/3."); - for (uint32_t size = coord_components; size < 2; ++size) - coord_expr += ", 0.0"; + for (uint32_t size = coord_components; size < 2; ++size) + coord_expr += ", 0.0"; - forward = forward && should_forward(dref); - coord_expr += ", " + to_expression(dref); - } - else if (lod || bias || proj) - { - for (uint32_t size = coord_components; size < 3; ++size) - coord_expr += ", 0.0"; - } + forward = forward && should_forward(dref); + coord_expr += ", " + to_expression(dref); + } + else if (lod || bias || proj) + { + for (uint32_t size = coord_components; size < 3; ++size) + coord_expr += ", 0.0"; + } - if (lod) - { - coord_expr = "float4(" + coord_expr + ", " + to_expression(lod) + ")"; - } - else if (bias) - { - coord_expr = "float4(" + coord_expr + ", " + to_expression(bias) + ")"; - } - else if (proj) - { - coord_expr = "float4(" + coord_expr + ", " + to_extract_component_expression(coord, coord_components) + ")"; - } - else if (dref) - { - // A "normal" sample gets fed into tex2Dproj as well, because the - // regular tex2D accepts only two coordinates. - coord_expr = "float4(" + coord_expr + ", 1.0)"; - } + if (lod) + { + coord_expr = "float4(" + coord_expr + ", " + to_expression(lod) + ")"; + } + else if (bias) + { + coord_expr = "float4(" + coord_expr + ", " + to_expression(bias) + ")"; + } + else if (proj) + { + coord_expr = "float4(" + coord_expr + ", " + to_extract_component_expression(coord, coord_components) + ")"; + } + else if (dref) + { + // A "normal" sample gets fed into tex2Dproj as well, because the + // regular tex2D accepts only two coordinates. + coord_expr = "float4(" + coord_expr + ", 1.0)"; + } - if (!!lod + !!bias + !!proj > 1) - SPIRV_CROSS_THROW("Legacy HLSL can only use one of lod/bias/proj modifiers."); - } + if (!!lod + !!bias + !!proj > 1) + SPIRV_CROSS_THROW("Legacy HLSL can only use one of lod/bias/proj modifiers."); + } - if (op == OpImageFetch) - { - if (imgtype.image.dim != DimBuffer && !imgtype.image.ms) - coord_expr = - join("int", coord_components + 1, "(", coord_expr, ", ", lod ? to_expression(lod) : string("0"), ")"); - } - else - expr += ", "; - expr += coord_expr; + if (op == OpImageFetch) + { + if (imgtype.image.dim != DimBuffer && !imgtype.image.ms) + coord_expr = + join("int", coord_components + 1, "(", coord_expr, ", ", lod ? to_expression(lod) : string("0"), ")"); + } + else + expr += ", "; + expr += coord_expr; - if (dref && hlsl_options.shader_model >= 40) - { - forward = forward && should_forward(dref); - expr += ", "; + if (dref && hlsl_options.shader_model >= 40) + { + forward = forward && should_forward(dref); + expr += ", "; - if (proj) - expr += to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, coord_components); - else - expr += to_expression(dref); - } + if (proj) + expr += to_enclosed_expression(dref) + " / " + to_extract_component_expression(coord, coord_components); + else + expr += to_expression(dref); + } - if (!dref && (grad_x || grad_y)) - { - forward = forward && should_forward(grad_x); - forward = forward && should_forward(grad_y); - expr += ", "; - expr += to_expression(grad_x); - expr += ", "; - expr += to_expression(grad_y); - } + if (!dref && (grad_x || grad_y)) + { + forward = forward && should_forward(grad_x); + forward = forward && should_forward(grad_y); + expr += ", "; + expr += to_expression(grad_x); + expr += ", "; + expr += to_expression(grad_y); + } - if (!dref && lod && hlsl_options.shader_model >= 40 && op != OpImageFetch) - { - forward = forward && should_forward(lod); - expr += ", "; - expr += to_expression(lod); - } + if (!dref && lod && hlsl_options.shader_model >= 40 && op != OpImageFetch) + { + forward = forward && should_forward(lod); + expr += ", "; + expr += to_expression(lod); + } - if (!dref && bias && hlsl_options.shader_model >= 40) - { - forward = forward && should_forward(bias); - expr += ", "; - expr += to_expression(bias); - } + if (!dref && bias && hlsl_options.shader_model >= 40) + { + forward = forward && should_forward(bias); + expr += ", "; + expr += to_expression(bias); + } - if (coffset) - { - forward = forward && should_forward(coffset); - expr += ", "; - expr += to_expression(coffset); - } - else if (offset) - { - forward = forward && should_forward(offset); - expr += ", "; - expr += to_expression(offset); - } + if (coffset) + { + forward = forward && should_forward(coffset); + expr += ", "; + expr += to_expression(coffset); + } + else if (offset) + { + forward = forward && should_forward(offset); + expr += ", "; + expr += to_expression(offset); + } - if (sample) - { - expr += ", "; - expr += to_expression(sample); - } + if (sample) + { + expr += ", "; + expr += to_expression(sample); + } - expr += ")"; + expr += ")"; - if (dref && hlsl_options.shader_model < 40) - expr += ".x"; + if (dref && hlsl_options.shader_model < 40) + expr += ".x"; - if (op == OpImageQueryLod) - { - // This is rather awkward. - // textureQueryLod returns two values, the "accessed level", - // as well as the actual LOD lambda. - // As far as I can tell, there is no way to get the .x component - // according to GLSL spec, and it depends on the sampler itself. - // Just assume X == Y, so we will need to splat the result to a float2. - statement("float _", id, "_tmp = ", expr, ";"); - statement("float2 _", id, " = _", id, "_tmp.xx;"); - set(id, join("_", id), result_type, true); - } - else - { - emit_op(result_type, id, expr, forward, false); - } + if (op == OpImageQueryLod) + { + // This is rather awkward. + // textureQueryLod returns two values, the "accessed level", + // as well as the actual LOD lambda. + // As far as I can tell, there is no way to get the .x component + // according to GLSL spec, and it depends on the sampler itself. + // Just assume X == Y, so we will need to splat the result to a float2. + statement("float _", id, "_tmp = ", expr, ";"); + statement("float2 _", id, " = _", id, "_tmp.xx;"); + set(id, join("_", id), result_type, true); + } + else + { + emit_op(result_type, id, expr, forward, false); + } - for (auto &inherit : inherited_expressions) - inherit_expression_dependencies(id, inherit); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(id, inherit); - switch (op) - { - case OpImageSampleDrefImplicitLod: - case OpImageSampleImplicitLod: - case OpImageSampleProjImplicitLod: - case OpImageSampleProjDrefImplicitLod: - register_control_dependent_expression(id); - break; + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(id); + break; - default: - break; - } + default: + break; + } } string CompilerHLSL::to_resource_binding(const SPIRVariable &var) { - const auto &type = get(var.basetype); + const auto &type = get(var.basetype); - // We can remap push constant blocks, even if they don't have any binding decoration. - if (type.storage != StorageClassPushConstant && !has_decoration(var.self, DecorationBinding)) - return ""; + // We can remap push constant blocks, even if they don't have any binding decoration. + if (type.storage != StorageClassPushConstant && !has_decoration(var.self, DecorationBinding)) + return ""; - char space = '\0'; + char space = '\0'; - HLSLBindingFlagBits resource_flags = HLSL_BINDING_AUTO_NONE_BIT; + HLSLBindingFlagBits resource_flags = HLSL_BINDING_AUTO_NONE_BIT; - switch (type.basetype) - { - case SPIRType::SampledImage: - space = 't'; // SRV - resource_flags = HLSL_BINDING_AUTO_SRV_BIT; - break; + switch (type.basetype) + { + case SPIRType::SampledImage: + space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; + break; - case SPIRType::Image: - if (type.image.sampled == 2 && type.image.dim != DimSubpassData) - { - if (has_decoration(var.self, DecorationNonWritable) && hlsl_options.nonwritable_uav_texture_as_srv) - { - space = 't'; // SRV - resource_flags = HLSL_BINDING_AUTO_SRV_BIT; - } - else - { - space = 'u'; // UAV - resource_flags = HLSL_BINDING_AUTO_UAV_BIT; - } - } - else - { - space = 't'; // SRV - resource_flags = HLSL_BINDING_AUTO_SRV_BIT; - } - break; + case SPIRType::Image: + if (type.image.sampled == 2 && type.image.dim != DimSubpassData) + { + if (has_decoration(var.self, DecorationNonWritable) && hlsl_options.nonwritable_uav_texture_as_srv) + { + space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; + } + else + { + space = 'u'; // UAV + resource_flags = HLSL_BINDING_AUTO_UAV_BIT; + } + } + else + { + space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; + } + break; - case SPIRType::Sampler: - space = 's'; - resource_flags = HLSL_BINDING_AUTO_SAMPLER_BIT; - break; + case SPIRType::Sampler: + space = 's'; + resource_flags = HLSL_BINDING_AUTO_SAMPLER_BIT; + break; - case SPIRType::AccelerationStructure: - space = 't'; // SRV - resource_flags = HLSL_BINDING_AUTO_SRV_BIT; - break; + case SPIRType::AccelerationStructure: + space = 't'; // SRV + resource_flags = HLSL_BINDING_AUTO_SRV_BIT; + break; - case SPIRType::Struct: - { - auto storage = type.storage; - if (storage == StorageClassUniform) - { - if (has_decoration(type.self, DecorationBufferBlock)) - { - Bitset flags = ir.get_buffer_block_flags(var); - bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); - space = is_readonly ? 't' : 'u'; // UAV - resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT; - } - else if (has_decoration(type.self, DecorationBlock)) - { - space = 'b'; // Constant buffers - resource_flags = HLSL_BINDING_AUTO_CBV_BIT; - } - } - else if (storage == StorageClassPushConstant) - { - space = 'b'; // Constant buffers - resource_flags = HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT; - } - else if (storage == StorageClassStorageBuffer) - { - // UAV or SRV depending on readonly flag. - Bitset flags = ir.get_buffer_block_flags(var); - bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); - space = is_readonly ? 't' : 'u'; - resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT; - } + case SPIRType::Struct: + { + auto storage = type.storage; + if (storage == StorageClassUniform) + { + if (has_decoration(type.self, DecorationBufferBlock)) + { + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); + space = is_readonly ? 't' : 'u'; // UAV + resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT; + } + else if (has_decoration(type.self, DecorationBlock)) + { + space = 'b'; // Constant buffers + resource_flags = HLSL_BINDING_AUTO_CBV_BIT; + } + } + else if (storage == StorageClassPushConstant) + { + space = 'b'; // Constant buffers + resource_flags = HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT; + } + else if (storage == StorageClassStorageBuffer) + { + // UAV or SRV depending on readonly flag. + Bitset flags = ir.get_buffer_block_flags(var); + bool is_readonly = flags.get(DecorationNonWritable) && !is_hlsl_force_storage_buffer_as_uav(var.self); + space = is_readonly ? 't' : 'u'; + resource_flags = is_readonly ? HLSL_BINDING_AUTO_SRV_BIT : HLSL_BINDING_AUTO_UAV_BIT; + } - break; - } - default: - break; - } + break; + } + default: + break; + } - if (!space) - return ""; + if (!space) + return ""; - uint32_t desc_set = - resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantDescriptorSet : 0u; - uint32_t binding = resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantBinding : 0u; + uint32_t desc_set = + resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantDescriptorSet : 0u; + uint32_t binding = resource_flags == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT ? ResourceBindingPushConstantBinding : 0u; - if (has_decoration(var.self, DecorationBinding)) - binding = get_decoration(var.self, DecorationBinding); - if (has_decoration(var.self, DecorationDescriptorSet)) - desc_set = get_decoration(var.self, DecorationDescriptorSet); + if (has_decoration(var.self, DecorationBinding)) + binding = get_decoration(var.self, DecorationBinding); + if (has_decoration(var.self, DecorationDescriptorSet)) + desc_set = get_decoration(var.self, DecorationDescriptorSet); - return to_resource_register(resource_flags, space, binding, desc_set); + return to_resource_register(resource_flags, space, binding, desc_set); } string CompilerHLSL::to_resource_binding_sampler(const SPIRVariable &var) { - // For combined image samplers. - if (!has_decoration(var.self, DecorationBinding)) - return ""; + // For combined image samplers. + if (!has_decoration(var.self, DecorationBinding)) + return ""; - return to_resource_register(HLSL_BINDING_AUTO_SAMPLER_BIT, 's', get_decoration(var.self, DecorationBinding), - get_decoration(var.self, DecorationDescriptorSet)); + return to_resource_register(HLSL_BINDING_AUTO_SAMPLER_BIT, 's', get_decoration(var.self, DecorationBinding), + get_decoration(var.self, DecorationDescriptorSet)); } void CompilerHLSL::remap_hlsl_resource_binding(HLSLBindingFlagBits type, uint32_t &desc_set, uint32_t &binding) { - auto itr = resource_bindings.find({ get_execution_model(), desc_set, binding }); - if (itr != end(resource_bindings)) - { - auto &remap = itr->second; - remap.second = true; + auto itr = resource_bindings.find({ get_execution_model(), desc_set, binding }); + if (itr != end(resource_bindings)) + { + auto &remap = itr->second; + remap.second = true; - switch (type) - { - case HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT: - case HLSL_BINDING_AUTO_CBV_BIT: - desc_set = remap.first.cbv.register_space; - binding = remap.first.cbv.register_binding; - break; + switch (type) + { + case HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT: + case HLSL_BINDING_AUTO_CBV_BIT: + desc_set = remap.first.cbv.register_space; + binding = remap.first.cbv.register_binding; + break; - case HLSL_BINDING_AUTO_SRV_BIT: - desc_set = remap.first.srv.register_space; - binding = remap.first.srv.register_binding; - break; + case HLSL_BINDING_AUTO_SRV_BIT: + desc_set = remap.first.srv.register_space; + binding = remap.first.srv.register_binding; + break; - case HLSL_BINDING_AUTO_SAMPLER_BIT: - desc_set = remap.first.sampler.register_space; - binding = remap.first.sampler.register_binding; - break; + case HLSL_BINDING_AUTO_SAMPLER_BIT: + desc_set = remap.first.sampler.register_space; + binding = remap.first.sampler.register_binding; + break; - case HLSL_BINDING_AUTO_UAV_BIT: - desc_set = remap.first.uav.register_space; - binding = remap.first.uav.register_binding; - break; + case HLSL_BINDING_AUTO_UAV_BIT: + desc_set = remap.first.uav.register_space; + binding = remap.first.uav.register_binding; + break; - default: - break; - } - } + default: + break; + } + } } string CompilerHLSL::to_resource_register(HLSLBindingFlagBits flag, char space, uint32_t binding, uint32_t space_set) { - if ((flag & resource_binding_flags) == 0) - { - remap_hlsl_resource_binding(flag, space_set, binding); + if ((flag & resource_binding_flags) == 0) + { + remap_hlsl_resource_binding(flag, space_set, binding); - // The push constant block did not have a binding, and there were no remap for it, - // so, declare without register binding. - if (flag == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT && space_set == ResourceBindingPushConstantDescriptorSet) - return ""; + // The push constant block did not have a binding, and there were no remap for it, + // so, declare without register binding. + if (flag == HLSL_BINDING_AUTO_PUSH_CONSTANT_BIT && space_set == ResourceBindingPushConstantDescriptorSet) + return ""; - if (hlsl_options.shader_model >= 51) - return join(" : register(", space, binding, ", space", space_set, ")"); - else - return join(" : register(", space, binding, ")"); - } - else - return ""; + if (hlsl_options.shader_model >= 51) + return join(" : register(", space, binding, ", space", space_set, ")"); + else + return join(" : register(", space, binding, ")"); + } + else + return ""; } void CompilerHLSL::emit_modern_uniform(const SPIRVariable &var) { - auto &type = get(var.basetype); - switch (type.basetype) - { - case SPIRType::SampledImage: - case SPIRType::Image: - { - bool is_coherent = false; - if (type.basetype == SPIRType::Image && type.image.sampled == 2) - is_coherent = has_decoration(var.self, DecorationCoherent); + auto &type = get(var.basetype); + switch (type.basetype) + { + case SPIRType::SampledImage: + case SPIRType::Image: + { + bool is_coherent = false; + if (type.basetype == SPIRType::Image && type.image.sampled == 2) + is_coherent = has_decoration(var.self, DecorationCoherent); - statement(is_coherent ? "globallycoherent " : "", image_type_hlsl_modern(type, var.self), " ", - to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";"); + statement(is_coherent ? "globallycoherent " : "", image_type_hlsl_modern(type, var.self), " ", + to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";"); - if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) - { - // For combined image samplers, also emit a combined image sampler. - if (is_depth_image(type, var.self)) - statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type, var.self), - to_resource_binding_sampler(var), ";"); - else - statement("SamplerState ", to_sampler_expression(var.self), type_to_array_glsl(type, var.self), - to_resource_binding_sampler(var), ";"); - } - break; - } + if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) + { + // For combined image samplers, also emit a combined image sampler. + if (is_depth_image(type, var.self)) + statement("SamplerComparisonState ", to_sampler_expression(var.self), type_to_array_glsl(type, var.self), + to_resource_binding_sampler(var), ";"); + else + statement("SamplerState ", to_sampler_expression(var.self), type_to_array_glsl(type, var.self), + to_resource_binding_sampler(var), ";"); + } + break; + } - case SPIRType::Sampler: - if (comparison_ids.count(var.self)) - statement("SamplerComparisonState ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), - ";"); - else - statement("SamplerState ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";"); - break; + case SPIRType::Sampler: + if (comparison_ids.count(var.self)) + statement("SamplerComparisonState ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), + ";"); + else + statement("SamplerState ", to_name(var.self), type_to_array_glsl(type, var.self), to_resource_binding(var), ";"); + break; - default: - statement(variable_decl(var), to_resource_binding(var), ";"); - break; - } + default: + statement(variable_decl(var), to_resource_binding(var), ";"); + break; + } } void CompilerHLSL::emit_legacy_uniform(const SPIRVariable &var) { - auto &type = get(var.basetype); - switch (type.basetype) - { - case SPIRType::Sampler: - case SPIRType::Image: - SPIRV_CROSS_THROW("Separate image and samplers not supported in legacy HLSL."); + auto &type = get(var.basetype); + switch (type.basetype) + { + case SPIRType::Sampler: + case SPIRType::Image: + SPIRV_CROSS_THROW("Separate image and samplers not supported in legacy HLSL."); - default: - statement(variable_decl(var), ";"); - break; - } + default: + statement(variable_decl(var), ";"); + break; + } } void CompilerHLSL::emit_uniform(const SPIRVariable &var) { - add_resource_name(var.self); - if (hlsl_options.shader_model >= 40) - emit_modern_uniform(var); - else - emit_legacy_uniform(var); + add_resource_name(var.self); + if (hlsl_options.shader_model >= 40) + emit_modern_uniform(var); + else + emit_legacy_uniform(var); } bool CompilerHLSL::emit_complex_bitcast(uint32_t, uint32_t, uint32_t) { - return false; + return false; } string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) { - if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int) - return type_to_glsl(out_type); - else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64) - return type_to_glsl(out_type); - else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) - return "asuint"; - else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt) - return type_to_glsl(out_type); - else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64) - return type_to_glsl(out_type); - else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) - return "asint"; - else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) - return "asfloat"; - else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) - return "asfloat"; - else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) - SPIRV_CROSS_THROW("Double to Int64 is not supported in HLSL."); - else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) - SPIRV_CROSS_THROW("Double to UInt64 is not supported in HLSL."); - else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) - return "asdouble"; - else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) - return "asdouble"; - else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) - { - if (!requires_explicit_fp16_packing) - { - requires_explicit_fp16_packing = true; - force_recompile(); - } - return "spvUnpackFloat2x16"; - } - else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) - { - if (!requires_explicit_fp16_packing) - { - requires_explicit_fp16_packing = true; - force_recompile(); - } - return "spvPackFloat2x16"; - } - else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) - { - if (hlsl_options.shader_model < 40) - SPIRV_CROSS_THROW("Half to UShort requires Shader Model 4."); - return "(" + type_to_glsl(out_type) + ")f32tof16"; - } - else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) - { - if (hlsl_options.shader_model < 40) - SPIRV_CROSS_THROW("UShort to Half requires Shader Model 4."); - return "(" + type_to_glsl(out_type) + ")f16tof32"; - } - else - return ""; + if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) + return "asuint"; + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64) + return type_to_glsl(out_type); + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) + return "asint"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) + return "asfloat"; + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + return "asfloat"; + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) + SPIRV_CROSS_THROW("Double to Int64 is not supported in HLSL."); + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) + SPIRV_CROSS_THROW("Double to UInt64 is not supported in HLSL."); + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) + return "asdouble"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) + return "asdouble"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + { + if (!requires_explicit_fp16_packing) + { + requires_explicit_fp16_packing = true; + force_recompile(); + } + return "spvUnpackFloat2x16"; + } + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) + { + if (!requires_explicit_fp16_packing) + { + requires_explicit_fp16_packing = true; + force_recompile(); + } + return "spvPackFloat2x16"; + } + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) + { + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("Half to UShort requires Shader Model 4."); + return "(" + type_to_glsl(out_type) + ")f32tof16"; + } + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) + { + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("UShort to Half requires Shader Model 4."); + return "(" + type_to_glsl(out_type) + ")f16tof32"; + } + else + return ""; } void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t count) { - auto op = static_cast(eop); + auto op = static_cast(eop); - // If we need to do implicit bitcasts, make sure we do it with the correct type. - uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, count); - auto int_type = to_signed_basetype(integer_width); - auto uint_type = to_unsigned_basetype(integer_width); + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, count); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); - op = get_remapped_glsl_op(op); + op = get_remapped_glsl_op(op); - switch (op) - { - case GLSLstd450InverseSqrt: - emit_unary_func_op(result_type, id, args[0], "rsqrt"); - break; + switch (op) + { + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "rsqrt"); + break; - case GLSLstd450Fract: - emit_unary_func_op(result_type, id, args[0], "frac"); - break; + case GLSLstd450Fract: + emit_unary_func_op(result_type, id, args[0], "frac"); + break; - case GLSLstd450RoundEven: - if (hlsl_options.shader_model < 40) - SPIRV_CROSS_THROW("roundEven is not supported in HLSL shader model 2/3."); - emit_unary_func_op(result_type, id, args[0], "round"); - break; + case GLSLstd450RoundEven: + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("roundEven is not supported in HLSL shader model 2/3."); + emit_unary_func_op(result_type, id, args[0], "round"); + break; - case GLSLstd450Trunc: - emit_unary_func_op(result_type, id, args[0], "trunc"); - break; + case GLSLstd450Trunc: + emit_unary_func_op(result_type, id, args[0], "trunc"); + break; - case GLSLstd450Acosh: - case GLSLstd450Asinh: - case GLSLstd450Atanh: - // These are not supported in HLSL, always emulate them. - emit_emulated_ahyper_op(result_type, id, args[0], op); - break; + case GLSLstd450Acosh: + case GLSLstd450Asinh: + case GLSLstd450Atanh: + // These are not supported in HLSL, always emulate them. + emit_emulated_ahyper_op(result_type, id, args[0], op); + break; - case GLSLstd450FMix: - case GLSLstd450IMix: - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "lerp"); - break; + case GLSLstd450FMix: + case GLSLstd450IMix: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "lerp"); + break; - case GLSLstd450Atan2: - emit_binary_func_op(result_type, id, args[0], args[1], "atan2"); - break; + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan2"); + break; - case GLSLstd450Fma: - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mad"); - break; + case GLSLstd450Fma: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mad"); + break; - case GLSLstd450InterpolateAtCentroid: - emit_unary_func_op(result_type, id, args[0], "EvaluateAttributeAtCentroid"); - break; - case GLSLstd450InterpolateAtSample: - emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeAtSample"); - break; - case GLSLstd450InterpolateAtOffset: - emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeSnapped"); - break; + case GLSLstd450InterpolateAtCentroid: + emit_unary_func_op(result_type, id, args[0], "EvaluateAttributeAtCentroid"); + break; + case GLSLstd450InterpolateAtSample: + emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeAtSample"); + break; + case GLSLstd450InterpolateAtOffset: + emit_binary_func_op(result_type, id, args[0], args[1], "EvaluateAttributeSnapped"); + break; - case GLSLstd450PackHalf2x16: - if (!requires_fp16_packing) - { - requires_fp16_packing = true; - force_recompile(); - } - emit_unary_func_op(result_type, id, args[0], "spvPackHalf2x16"); - break; + case GLSLstd450PackHalf2x16: + if (!requires_fp16_packing) + { + requires_fp16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvPackHalf2x16"); + break; - case GLSLstd450UnpackHalf2x16: - if (!requires_fp16_packing) - { - requires_fp16_packing = true; - force_recompile(); - } - emit_unary_func_op(result_type, id, args[0], "spvUnpackHalf2x16"); - break; + case GLSLstd450UnpackHalf2x16: + if (!requires_fp16_packing) + { + requires_fp16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvUnpackHalf2x16"); + break; - case GLSLstd450PackSnorm4x8: - if (!requires_snorm8_packing) - { - requires_snorm8_packing = true; - force_recompile(); - } - emit_unary_func_op(result_type, id, args[0], "spvPackSnorm4x8"); - break; + case GLSLstd450PackSnorm4x8: + if (!requires_snorm8_packing) + { + requires_snorm8_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvPackSnorm4x8"); + break; - case GLSLstd450UnpackSnorm4x8: - if (!requires_snorm8_packing) - { - requires_snorm8_packing = true; - force_recompile(); - } - emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm4x8"); - break; + case GLSLstd450UnpackSnorm4x8: + if (!requires_snorm8_packing) + { + requires_snorm8_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm4x8"); + break; - case GLSLstd450PackUnorm4x8: - if (!requires_unorm8_packing) - { - requires_unorm8_packing = true; - force_recompile(); - } - emit_unary_func_op(result_type, id, args[0], "spvPackUnorm4x8"); - break; + case GLSLstd450PackUnorm4x8: + if (!requires_unorm8_packing) + { + requires_unorm8_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvPackUnorm4x8"); + break; - case GLSLstd450UnpackUnorm4x8: - if (!requires_unorm8_packing) - { - requires_unorm8_packing = true; - force_recompile(); - } - emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm4x8"); - break; + case GLSLstd450UnpackUnorm4x8: + if (!requires_unorm8_packing) + { + requires_unorm8_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm4x8"); + break; - case GLSLstd450PackSnorm2x16: - if (!requires_snorm16_packing) - { - requires_snorm16_packing = true; - force_recompile(); - } - emit_unary_func_op(result_type, id, args[0], "spvPackSnorm2x16"); - break; + case GLSLstd450PackSnorm2x16: + if (!requires_snorm16_packing) + { + requires_snorm16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvPackSnorm2x16"); + break; - case GLSLstd450UnpackSnorm2x16: - if (!requires_snorm16_packing) - { - requires_snorm16_packing = true; - force_recompile(); - } - emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm2x16"); - break; + case GLSLstd450UnpackSnorm2x16: + if (!requires_snorm16_packing) + { + requires_snorm16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvUnpackSnorm2x16"); + break; - case GLSLstd450PackUnorm2x16: - if (!requires_unorm16_packing) - { - requires_unorm16_packing = true; - force_recompile(); - } - emit_unary_func_op(result_type, id, args[0], "spvPackUnorm2x16"); - break; + case GLSLstd450PackUnorm2x16: + if (!requires_unorm16_packing) + { + requires_unorm16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvPackUnorm2x16"); + break; - case GLSLstd450UnpackUnorm2x16: - if (!requires_unorm16_packing) - { - requires_unorm16_packing = true; - force_recompile(); - } - emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm2x16"); - break; + case GLSLstd450UnpackUnorm2x16: + if (!requires_unorm16_packing) + { + requires_unorm16_packing = true; + force_recompile(); + } + emit_unary_func_op(result_type, id, args[0], "spvUnpackUnorm2x16"); + break; - case GLSLstd450PackDouble2x32: - case GLSLstd450UnpackDouble2x32: - SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL."); + case GLSLstd450PackDouble2x32: + case GLSLstd450UnpackDouble2x32: + SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL."); - case GLSLstd450FindILsb: - { - auto basetype = expression_type(args[0]).basetype; - emit_unary_func_op_cast(result_type, id, args[0], "firstbitlow", basetype, basetype); - break; - } + case GLSLstd450FindILsb: + { + auto basetype = expression_type(args[0]).basetype; + emit_unary_func_op_cast(result_type, id, args[0], "firstbitlow", basetype, basetype); + break; + } - case GLSLstd450FindSMsb: - emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type); - break; + case GLSLstd450FindSMsb: + emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", int_type, int_type); + break; - case GLSLstd450FindUMsb: - emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", uint_type, uint_type); - break; + case GLSLstd450FindUMsb: + emit_unary_func_op_cast(result_type, id, args[0], "firstbithigh", uint_type, uint_type); + break; - case GLSLstd450MatrixInverse: - { - auto &type = get(result_type); - if (type.vecsize == 2 && type.columns == 2) - { - if (!requires_inverse_2x2) - { - requires_inverse_2x2 = true; - force_recompile(); - } - } - else if (type.vecsize == 3 && type.columns == 3) - { - if (!requires_inverse_3x3) - { - requires_inverse_3x3 = true; - force_recompile(); - } - } - else if (type.vecsize == 4 && type.columns == 4) - { - if (!requires_inverse_4x4) - { - requires_inverse_4x4 = true; - force_recompile(); - } - } - emit_unary_func_op(result_type, id, args[0], "spvInverse"); - break; - } + case GLSLstd450MatrixInverse: + { + auto &type = get(result_type); + if (type.vecsize == 2 && type.columns == 2) + { + if (!requires_inverse_2x2) + { + requires_inverse_2x2 = true; + force_recompile(); + } + } + else if (type.vecsize == 3 && type.columns == 3) + { + if (!requires_inverse_3x3) + { + requires_inverse_3x3 = true; + force_recompile(); + } + } + else if (type.vecsize == 4 && type.columns == 4) + { + if (!requires_inverse_4x4) + { + requires_inverse_4x4 = true; + force_recompile(); + } + } + emit_unary_func_op(result_type, id, args[0], "spvInverse"); + break; + } - case GLSLstd450Normalize: - // HLSL does not support scalar versions here. - if (expression_type(args[0]).vecsize == 1) - { - // Returns -1 or 1 for valid input, sign() does the job. - emit_unary_func_op(result_type, id, args[0], "sign"); - } - else - CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); - break; + case GLSLstd450Normalize: + // HLSL does not support scalar versions here. + if (expression_type(args[0]).vecsize == 1) + { + // Returns -1 or 1 for valid input, sign() does the job. + emit_unary_func_op(result_type, id, args[0], "sign"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; - case GLSLstd450Reflect: - if (get(result_type).vecsize == 1) - { - if (!requires_scalar_reflect) - { - requires_scalar_reflect = true; - force_recompile(); - } - emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect"); - } - else - CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); - break; + case GLSLstd450Reflect: + if (get(result_type).vecsize == 1) + { + if (!requires_scalar_reflect) + { + requires_scalar_reflect = true; + force_recompile(); + } + emit_binary_func_op(result_type, id, args[0], args[1], "spvReflect"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; - case GLSLstd450Refract: - if (get(result_type).vecsize == 1) - { - if (!requires_scalar_refract) - { - requires_scalar_refract = true; - force_recompile(); - } - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract"); - } - else - CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); - break; + case GLSLstd450Refract: + if (get(result_type).vecsize == 1) + { + if (!requires_scalar_refract) + { + requires_scalar_refract = true; + force_recompile(); + } + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvRefract"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; - case GLSLstd450FaceForward: - if (get(result_type).vecsize == 1) - { - if (!requires_scalar_faceforward) - { - requires_scalar_faceforward = true; - force_recompile(); - } - emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward"); - } - else - CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); - break; + case GLSLstd450FaceForward: + if (get(result_type).vecsize == 1) + { + if (!requires_scalar_faceforward) + { + requires_scalar_faceforward = true; + force_recompile(); + } + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "spvFaceForward"); + } + else + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; - case GLSLstd450NMin: - CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FMin, args, count); - break; + case GLSLstd450NMin: + CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FMin, args, count); + break; - case GLSLstd450NMax: - CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FMax, args, count); - break; + case GLSLstd450NMax: + CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FMax, args, count); + break; - case GLSLstd450NClamp: - CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FClamp, args, count); - break; + case GLSLstd450NClamp: + CompilerGLSL::emit_glsl_op(result_type, id, GLSLstd450FClamp, args, count); + break; - default: - CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); - break; - } + default: + CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); + break; + } } void CompilerHLSL::read_access_chain_array(const string &lhs, const SPIRAccessChain &chain) { - auto &type = get(chain.basetype); + auto &type = get(chain.basetype); - // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops. - auto ident = get_unique_identifier(); + // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops. + auto ident = get_unique_identifier(); - statement("[unroll]"); - statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ", - ident, "++)"); - begin_scope(); - auto subchain = chain; - subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index); - subchain.basetype = type.parent_type; - if (!get(subchain.basetype).array.empty()) - subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride); - read_access_chain(nullptr, join(lhs, "[", ident, "]"), subchain); - end_scope(); + statement("[unroll]"); + statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ", + ident, "++)"); + begin_scope(); + auto subchain = chain; + subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index); + subchain.basetype = type.parent_type; + if (!get(subchain.basetype).array.empty()) + subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride); + read_access_chain(nullptr, join(lhs, "[", ident, "]"), subchain); + end_scope(); } void CompilerHLSL::read_access_chain_struct(const string &lhs, const SPIRAccessChain &chain) { - auto &type = get(chain.basetype); - auto subchain = chain; - uint32_t member_count = uint32_t(type.member_types.size()); + auto &type = get(chain.basetype); + auto subchain = chain; + uint32_t member_count = uint32_t(type.member_types.size()); - for (uint32_t i = 0; i < member_count; i++) - { - uint32_t offset = type_struct_member_offset(type, i); - subchain.static_index = chain.static_index + offset; - subchain.basetype = type.member_types[i]; + for (uint32_t i = 0; i < member_count; i++) + { + uint32_t offset = type_struct_member_offset(type, i); + subchain.static_index = chain.static_index + offset; + subchain.basetype = type.member_types[i]; - subchain.matrix_stride = 0; - subchain.array_stride = 0; - subchain.row_major_matrix = false; + subchain.matrix_stride = 0; + subchain.array_stride = 0; + subchain.row_major_matrix = false; - auto &member_type = get(subchain.basetype); - if (member_type.columns > 1) - { - subchain.matrix_stride = type_struct_member_matrix_stride(type, i); - subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor); - } + auto &member_type = get(subchain.basetype); + if (member_type.columns > 1) + { + subchain.matrix_stride = type_struct_member_matrix_stride(type, i); + subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor); + } - if (!member_type.array.empty()) - subchain.array_stride = type_struct_member_array_stride(type, i); + if (!member_type.array.empty()) + subchain.array_stride = type_struct_member_array_stride(type, i); - read_access_chain(nullptr, join(lhs, ".", to_member_name(type, i)), subchain); - } + read_access_chain(nullptr, join(lhs, ".", to_member_name(type, i)), subchain); + } } void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIRAccessChain &chain) { - auto &type = get(chain.basetype); + auto &type = get(chain.basetype); - SPIRType target_type { is_scalar(type) ? OpTypeInt : type.op }; - target_type.basetype = SPIRType::UInt; - target_type.vecsize = type.vecsize; - target_type.columns = type.columns; + SPIRType target_type { is_scalar(type) ? OpTypeInt : type.op }; + target_type.basetype = SPIRType::UInt; + target_type.vecsize = type.vecsize; + target_type.columns = type.columns; - if (!type.array.empty()) - { - read_access_chain_array(lhs, chain); - return; - } - else if (type.basetype == SPIRType::Struct) - { - read_access_chain_struct(lhs, chain); - return; - } - else if (type.width != 32 && !hlsl_options.enable_16bit_types) - SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported, unless SM 6.2 and " - "native 16-bit types are enabled."); + if (!type.array.empty()) + { + read_access_chain_array(lhs, chain); + return; + } + else if (type.basetype == SPIRType::Struct) + { + read_access_chain_struct(lhs, chain); + return; + } + else if (type.width != 32 && !hlsl_options.enable_16bit_types) + SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported, unless SM 6.2 and " + "native 16-bit types are enabled."); - string base = chain.base; - if (has_decoration(chain.self, DecorationNonUniform)) - convert_non_uniform_expression(base, chain.self); + string base = chain.base; + if (has_decoration(chain.self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain.self); - bool templated_load = hlsl_options.shader_model >= 62; - string load_expr; + bool templated_load = hlsl_options.shader_model >= 62; + string load_expr; - string template_expr; - if (templated_load) - template_expr = join("<", type_to_glsl(type), ">"); + string template_expr; + if (templated_load) + template_expr = join("<", type_to_glsl(type), ">"); - // Load a vector or scalar. - if (type.columns == 1 && !chain.row_major_matrix) - { - const char *load_op = nullptr; - switch (type.vecsize) - { - case 1: - load_op = "Load"; - break; - case 2: - load_op = "Load2"; - break; - case 3: - load_op = "Load3"; - break; - case 4: - load_op = "Load4"; - break; - default: - SPIRV_CROSS_THROW("Unknown vector size."); - } + // Load a vector or scalar. + if (type.columns == 1 && !chain.row_major_matrix) + { + const char *load_op = nullptr; + switch (type.vecsize) + { + case 1: + load_op = "Load"; + break; + case 2: + load_op = "Load2"; + break; + case 3: + load_op = "Load3"; + break; + case 4: + load_op = "Load4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } - if (templated_load) - load_op = "Load"; + if (templated_load) + load_op = "Load"; - load_expr = join(base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")"); - } - else if (type.columns == 1) - { - // Strided load since we are loading a column from a row-major matrix. - if (templated_load) - { - auto scalar_type = type; - scalar_type.vecsize = 1; - scalar_type.columns = 1; - template_expr = join("<", type_to_glsl(scalar_type), ">"); - if (type.vecsize > 1) - load_expr += type_to_glsl(type) + "("; - } - else if (type.vecsize > 1) - { - load_expr = type_to_glsl(target_type); - load_expr += "("; - } + load_expr = join(base, ".", load_op, template_expr, "(", chain.dynamic_index, chain.static_index, ")"); + } + else if (type.columns == 1) + { + // Strided load since we are loading a column from a row-major matrix. + if (templated_load) + { + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + if (type.vecsize > 1) + load_expr += type_to_glsl(type) + "("; + } + else if (type.vecsize > 1) + { + load_expr = type_to_glsl(target_type); + load_expr += "("; + } - for (uint32_t r = 0; r < type.vecsize; r++) - { - load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index, - chain.static_index + r * chain.matrix_stride, ")"); - if (r + 1 < type.vecsize) - load_expr += ", "; - } + for (uint32_t r = 0; r < type.vecsize; r++) + { + load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index, + chain.static_index + r * chain.matrix_stride, ")"); + if (r + 1 < type.vecsize) + load_expr += ", "; + } - if (type.vecsize > 1) - load_expr += ")"; - } - else if (!chain.row_major_matrix) - { - // Load a matrix, column-major, the easy case. - const char *load_op = nullptr; - switch (type.vecsize) - { - case 1: - load_op = "Load"; - break; - case 2: - load_op = "Load2"; - break; - case 3: - load_op = "Load3"; - break; - case 4: - load_op = "Load4"; - break; - default: - SPIRV_CROSS_THROW("Unknown vector size."); - } + if (type.vecsize > 1) + load_expr += ")"; + } + else if (!chain.row_major_matrix) + { + // Load a matrix, column-major, the easy case. + const char *load_op = nullptr; + switch (type.vecsize) + { + case 1: + load_op = "Load"; + break; + case 2: + load_op = "Load2"; + break; + case 3: + load_op = "Load3"; + break; + case 4: + load_op = "Load4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } - if (templated_load) - { - auto vector_type = type; - vector_type.columns = 1; - template_expr = join("<", type_to_glsl(vector_type), ">"); - load_expr = type_to_glsl(type); - load_op = "Load"; - } - else - { - // Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend, - // so row-major is technically column-major ... - load_expr = type_to_glsl(target_type); - } - load_expr += "("; + if (templated_load) + { + auto vector_type = type; + vector_type.columns = 1; + template_expr = join("<", type_to_glsl(vector_type), ">"); + load_expr = type_to_glsl(type); + load_op = "Load"; + } + else + { + // Note, this loading style in HLSL is *actually* row-major, but we always treat matrices as transposed in this backend, + // so row-major is technically column-major ... + load_expr = type_to_glsl(target_type); + } + load_expr += "("; - for (uint32_t c = 0; c < type.columns; c++) - { - load_expr += join(base, ".", load_op, template_expr, "(", chain.dynamic_index, - chain.static_index + c * chain.matrix_stride, ")"); - if (c + 1 < type.columns) - load_expr += ", "; - } - load_expr += ")"; - } - else - { - // Pick out elements one by one ... Hopefully compilers are smart enough to recognize this pattern - // considering HLSL is "row-major decl", but "column-major" memory layout (basically implicit transpose model, ugh) ... + for (uint32_t c = 0; c < type.columns; c++) + { + load_expr += join(base, ".", load_op, template_expr, "(", chain.dynamic_index, + chain.static_index + c * chain.matrix_stride, ")"); + if (c + 1 < type.columns) + load_expr += ", "; + } + load_expr += ")"; + } + else + { + // Pick out elements one by one ... Hopefully compilers are smart enough to recognize this pattern + // considering HLSL is "row-major decl", but "column-major" memory layout (basically implicit transpose model, ugh) ... - if (templated_load) - { - load_expr = type_to_glsl(type); - auto scalar_type = type; - scalar_type.vecsize = 1; - scalar_type.columns = 1; - template_expr = join("<", type_to_glsl(scalar_type), ">"); - } - else - load_expr = type_to_glsl(target_type); + if (templated_load) + { + load_expr = type_to_glsl(type); + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + } + else + load_expr = type_to_glsl(target_type); - load_expr += "("; + load_expr += "("; - for (uint32_t c = 0; c < type.columns; c++) - { - for (uint32_t r = 0; r < type.vecsize; r++) - { - load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index, - chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")"); + for (uint32_t c = 0; c < type.columns; c++) + { + for (uint32_t r = 0; r < type.vecsize; r++) + { + load_expr += join(base, ".Load", template_expr, "(", chain.dynamic_index, + chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ")"); - if ((r + 1 < type.vecsize) || (c + 1 < type.columns)) - load_expr += ", "; - } - } - load_expr += ")"; - } + if ((r + 1 < type.vecsize) || (c + 1 < type.columns)) + load_expr += ", "; + } + } + load_expr += ")"; + } - if (!templated_load) - { - auto bitcast_op = bitcast_glsl_op(type, target_type); - if (!bitcast_op.empty()) - load_expr = join(bitcast_op, "(", load_expr, ")"); - } + if (!templated_load) + { + auto bitcast_op = bitcast_glsl_op(type, target_type); + if (!bitcast_op.empty()) + load_expr = join(bitcast_op, "(", load_expr, ")"); + } - if (lhs.empty()) - { - assert(expr); - *expr = std::move(load_expr); - } - else - statement(lhs, " = ", load_expr, ";"); + if (lhs.empty()) + { + assert(expr); + *expr = std::move(load_expr); + } + else + statement(lhs, " = ", load_expr, ";"); } void CompilerHLSL::emit_load(const Instruction &instruction) { - auto ops = stream(instruction); + auto ops = stream(instruction); - auto *chain = maybe_get(ops[2]); - if (chain) - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - uint32_t ptr = ops[2]; + auto *chain = maybe_get(ops[2]); + if (chain) + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; - auto &type = get(result_type); - bool composite_load = !type.array.empty() || type.basetype == SPIRType::Struct; + auto &type = get(result_type); + bool composite_load = !type.array.empty() || type.basetype == SPIRType::Struct; - if (composite_load) - { - // We cannot make this work in one single expression as we might have nested structures and arrays, - // so unroll the load to an uninitialized temporary. - emit_uninitialized_temporary_expression(result_type, id); - read_access_chain(nullptr, to_expression(id), *chain); - track_expression_read(chain->self); - } - else - { - string load_expr; - read_access_chain(&load_expr, "", *chain); + if (composite_load) + { + // We cannot make this work in one single expression as we might have nested structures and arrays, + // so unroll the load to an uninitialized temporary. + emit_uninitialized_temporary_expression(result_type, id); + read_access_chain(nullptr, to_expression(id), *chain); + track_expression_read(chain->self); + } + else + { + string load_expr; + read_access_chain(&load_expr, "", *chain); - bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); - // If we are forwarding this load, - // don't register the read to access chain here, defer that to when we actually use the expression, - // using the add_implied_read_expression mechanism. - if (!forward) - track_expression_read(chain->self); + // If we are forwarding this load, + // don't register the read to access chain here, defer that to when we actually use the expression, + // using the add_implied_read_expression mechanism. + if (!forward) + track_expression_read(chain->self); - // Do not forward complex load sequences like matrices, structs and arrays. - if (type.columns > 1) - forward = false; + // Do not forward complex load sequences like matrices, structs and arrays. + if (type.columns > 1) + forward = false; - auto &e = emit_op(result_type, id, load_expr, forward, true); - e.need_transpose = false; - register_read(id, ptr, forward); - inherit_expression_dependencies(id, ptr); - if (forward) - add_implied_read_expression(e, chain->self); - } - } - else - CompilerGLSL::emit_instruction(instruction); + auto &e = emit_op(result_type, id, load_expr, forward, true); + e.need_transpose = false; + register_read(id, ptr, forward); + inherit_expression_dependencies(id, ptr); + if (forward) + add_implied_read_expression(e, chain->self); + } + } + else + CompilerGLSL::emit_instruction(instruction); } void CompilerHLSL::write_access_chain_array(const SPIRAccessChain &chain, uint32_t value, const SmallVector &composite_chain) { - auto *ptype = &get(chain.basetype); - while (ptype->pointer) - { - ptype = &get(ptype->basetype); - } - auto &type = *ptype; + auto *ptype = &get(chain.basetype); + while (ptype->pointer) + { + ptype = &get(ptype->basetype); + } + auto &type = *ptype; - // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops. - auto ident = get_unique_identifier(); + // Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops. + auto ident = get_unique_identifier(); - uint32_t id = ir.increase_bound_by(2); - uint32_t int_type_id = id + 1; - SPIRType int_type { OpTypeInt }; - int_type.basetype = SPIRType::Int; - int_type.width = 32; - set(int_type_id, int_type); - set(id, ident, int_type_id, true); - set_name(id, ident); - suppressed_usage_tracking.insert(id); + uint32_t id = ir.increase_bound_by(2); + uint32_t int_type_id = id + 1; + SPIRType int_type { OpTypeInt }; + int_type.basetype = SPIRType::Int; + int_type.width = 32; + set(int_type_id, int_type); + set(id, ident, int_type_id, true); + set_name(id, ident); + suppressed_usage_tracking.insert(id); - statement("[unroll]"); - statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ", - ident, "++)"); - begin_scope(); - auto subchain = chain; - subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index); - subchain.basetype = type.parent_type; + statement("[unroll]"); + statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ", + ident, "++)"); + begin_scope(); + auto subchain = chain; + subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index); + subchain.basetype = type.parent_type; - // Forcefully allow us to use an ID here by setting MSB. - auto subcomposite_chain = composite_chain; - subcomposite_chain.push_back(0x80000000u | id); + // Forcefully allow us to use an ID here by setting MSB. + auto subcomposite_chain = composite_chain; + subcomposite_chain.push_back(0x80000000u | id); - if (!get(subchain.basetype).array.empty()) - subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride); + if (!get(subchain.basetype).array.empty()) + subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride); - write_access_chain(subchain, value, subcomposite_chain); - end_scope(); + write_access_chain(subchain, value, subcomposite_chain); + end_scope(); } void CompilerHLSL::write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value, const SmallVector &composite_chain) { - auto &type = get(chain.basetype); - uint32_t member_count = uint32_t(type.member_types.size()); - auto subchain = chain; + auto &type = get(chain.basetype); + uint32_t member_count = uint32_t(type.member_types.size()); + auto subchain = chain; - auto subcomposite_chain = composite_chain; - subcomposite_chain.push_back(0); + auto subcomposite_chain = composite_chain; + subcomposite_chain.push_back(0); - for (uint32_t i = 0; i < member_count; i++) - { - uint32_t offset = type_struct_member_offset(type, i); - subchain.static_index = chain.static_index + offset; - subchain.basetype = type.member_types[i]; + for (uint32_t i = 0; i < member_count; i++) + { + uint32_t offset = type_struct_member_offset(type, i); + subchain.static_index = chain.static_index + offset; + subchain.basetype = type.member_types[i]; - subchain.matrix_stride = 0; - subchain.array_stride = 0; - subchain.row_major_matrix = false; + subchain.matrix_stride = 0; + subchain.array_stride = 0; + subchain.row_major_matrix = false; - auto &member_type = get(subchain.basetype); - if (member_type.columns > 1) - { - subchain.matrix_stride = type_struct_member_matrix_stride(type, i); - subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor); - } + auto &member_type = get(subchain.basetype); + if (member_type.columns > 1) + { + subchain.matrix_stride = type_struct_member_matrix_stride(type, i); + subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor); + } - if (!member_type.array.empty()) - subchain.array_stride = type_struct_member_array_stride(type, i); + if (!member_type.array.empty()) + subchain.array_stride = type_struct_member_array_stride(type, i); - subcomposite_chain.back() = i; - write_access_chain(subchain, value, subcomposite_chain); - } + subcomposite_chain.back() = i; + write_access_chain(subchain, value, subcomposite_chain); + } } string CompilerHLSL::write_access_chain_value(uint32_t value, const SmallVector &composite_chain, bool enclose) { - string ret; - if (composite_chain.empty()) - ret = to_expression(value); - else - { - AccessChainMeta meta; - ret = access_chain_internal(value, composite_chain.data(), uint32_t(composite_chain.size()), - ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_LITERAL_MSB_FORCE_ID, &meta); - } + string ret; + if (composite_chain.empty()) + ret = to_expression(value); + else + { + AccessChainMeta meta; + ret = access_chain_internal(value, composite_chain.data(), uint32_t(composite_chain.size()), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_LITERAL_MSB_FORCE_ID, &meta); + } - if (enclose) - ret = enclose_expression(ret); - return ret; + if (enclose) + ret = enclose_expression(ret); + return ret; } void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value, const SmallVector &composite_chain) { - auto &type = get(chain.basetype); + auto &type = get(chain.basetype); - // Make sure we trigger a read of the constituents in the access chain. - track_expression_read(chain.self); + // Make sure we trigger a read of the constituents in the access chain. + track_expression_read(chain.self); - SPIRType target_type { is_scalar(type) ? OpTypeInt : type.op }; - target_type.basetype = SPIRType::UInt; - target_type.vecsize = type.vecsize; - target_type.columns = type.columns; + SPIRType target_type { is_scalar(type) ? OpTypeInt : type.op }; + target_type.basetype = SPIRType::UInt; + target_type.vecsize = type.vecsize; + target_type.columns = type.columns; - if (!type.array.empty()) - { - write_access_chain_array(chain, value, composite_chain); - register_write(chain.self); - return; - } - else if (type.basetype == SPIRType::Struct) - { - write_access_chain_struct(chain, value, composite_chain); - register_write(chain.self); - return; - } - else if (type.width != 32 && !hlsl_options.enable_16bit_types) - SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported, unless SM 6.2 and " - "native 16-bit types are enabled."); + if (!type.array.empty()) + { + write_access_chain_array(chain, value, composite_chain); + register_write(chain.self); + return; + } + else if (type.basetype == SPIRType::Struct) + { + write_access_chain_struct(chain, value, composite_chain); + register_write(chain.self); + return; + } + else if (type.width != 32 && !hlsl_options.enable_16bit_types) + SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported, unless SM 6.2 and " + "native 16-bit types are enabled."); - bool templated_store = hlsl_options.shader_model >= 62; + bool templated_store = hlsl_options.shader_model >= 62; - auto base = chain.base; - if (has_decoration(chain.self, DecorationNonUniform)) - convert_non_uniform_expression(base, chain.self); + auto base = chain.base; + if (has_decoration(chain.self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain.self); - string template_expr; - if (templated_store) - template_expr = join("<", type_to_glsl(type), ">"); + string template_expr; + if (templated_store) + template_expr = join("<", type_to_glsl(type), ">"); - if (type.columns == 1 && !chain.row_major_matrix) - { - const char *store_op = nullptr; - switch (type.vecsize) - { - case 1: - store_op = "Store"; - break; - case 2: - store_op = "Store2"; - break; - case 3: - store_op = "Store3"; - break; - case 4: - store_op = "Store4"; - break; - default: - SPIRV_CROSS_THROW("Unknown vector size."); - } + if (type.columns == 1 && !chain.row_major_matrix) + { + const char *store_op = nullptr; + switch (type.vecsize) + { + case 1: + store_op = "Store"; + break; + case 2: + store_op = "Store2"; + break; + case 3: + store_op = "Store3"; + break; + case 4: + store_op = "Store4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } - auto store_expr = write_access_chain_value(value, composite_chain, false); + auto store_expr = write_access_chain_value(value, composite_chain, false); - if (!templated_store) - { - auto bitcast_op = bitcast_glsl_op(target_type, type); - if (!bitcast_op.empty()) - store_expr = join(bitcast_op, "(", store_expr, ")"); - } - else - store_op = "Store"; - statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ", - store_expr, ");"); - } - else if (type.columns == 1) - { - if (templated_store) - { - auto scalar_type = type; - scalar_type.vecsize = 1; - scalar_type.columns = 1; - template_expr = join("<", type_to_glsl(scalar_type), ">"); - } + if (!templated_store) + { + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + } + else + store_op = "Store"; + statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, chain.static_index, ", ", + store_expr, ");"); + } + else if (type.columns == 1) + { + if (templated_store) + { + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + } - // Strided store. - for (uint32_t r = 0; r < type.vecsize; r++) - { - auto store_expr = write_access_chain_value(value, composite_chain, true); - if (type.vecsize > 1) - { - store_expr += "."; - store_expr += index_to_swizzle(r); - } - remove_duplicate_swizzle(store_expr); + // Strided store. + for (uint32_t r = 0; r < type.vecsize; r++) + { + auto store_expr = write_access_chain_value(value, composite_chain, true); + if (type.vecsize > 1) + { + store_expr += "."; + store_expr += index_to_swizzle(r); + } + remove_duplicate_swizzle(store_expr); - if (!templated_store) - { - auto bitcast_op = bitcast_glsl_op(target_type, type); - if (!bitcast_op.empty()) - store_expr = join(bitcast_op, "(", store_expr, ")"); - } + if (!templated_store) + { + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + } - statement(base, ".Store", template_expr, "(", chain.dynamic_index, - chain.static_index + chain.matrix_stride * r, ", ", store_expr, ");"); - } - } - else if (!chain.row_major_matrix) - { - const char *store_op = nullptr; - switch (type.vecsize) - { - case 1: - store_op = "Store"; - break; - case 2: - store_op = "Store2"; - break; - case 3: - store_op = "Store3"; - break; - case 4: - store_op = "Store4"; - break; - default: - SPIRV_CROSS_THROW("Unknown vector size."); - } + statement(base, ".Store", template_expr, "(", chain.dynamic_index, + chain.static_index + chain.matrix_stride * r, ", ", store_expr, ");"); + } + } + else if (!chain.row_major_matrix) + { + const char *store_op = nullptr; + switch (type.vecsize) + { + case 1: + store_op = "Store"; + break; + case 2: + store_op = "Store2"; + break; + case 3: + store_op = "Store3"; + break; + case 4: + store_op = "Store4"; + break; + default: + SPIRV_CROSS_THROW("Unknown vector size."); + } - if (templated_store) - { - store_op = "Store"; - auto vector_type = type; - vector_type.columns = 1; - template_expr = join("<", type_to_glsl(vector_type), ">"); - } + if (templated_store) + { + store_op = "Store"; + auto vector_type = type; + vector_type.columns = 1; + template_expr = join("<", type_to_glsl(vector_type), ">"); + } - for (uint32_t c = 0; c < type.columns; c++) - { - auto store_expr = join(write_access_chain_value(value, composite_chain, true), "[", c, "]"); + for (uint32_t c = 0; c < type.columns; c++) + { + auto store_expr = join(write_access_chain_value(value, composite_chain, true), "[", c, "]"); - if (!templated_store) - { - auto bitcast_op = bitcast_glsl_op(target_type, type); - if (!bitcast_op.empty()) - store_expr = join(bitcast_op, "(", store_expr, ")"); - } + if (!templated_store) + { + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + } - statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, - chain.static_index + c * chain.matrix_stride, ", ", store_expr, ");"); - } - } - else - { - if (templated_store) - { - auto scalar_type = type; - scalar_type.vecsize = 1; - scalar_type.columns = 1; - template_expr = join("<", type_to_glsl(scalar_type), ">"); - } + statement(base, ".", store_op, template_expr, "(", chain.dynamic_index, + chain.static_index + c * chain.matrix_stride, ", ", store_expr, ");"); + } + } + else + { + if (templated_store) + { + auto scalar_type = type; + scalar_type.vecsize = 1; + scalar_type.columns = 1; + template_expr = join("<", type_to_glsl(scalar_type), ">"); + } - for (uint32_t r = 0; r < type.vecsize; r++) - { - for (uint32_t c = 0; c < type.columns; c++) - { - auto store_expr = - join(write_access_chain_value(value, composite_chain, true), "[", c, "].", index_to_swizzle(r)); - remove_duplicate_swizzle(store_expr); - auto bitcast_op = bitcast_glsl_op(target_type, type); - if (!bitcast_op.empty()) - store_expr = join(bitcast_op, "(", store_expr, ")"); - statement(base, ".Store", template_expr, "(", chain.dynamic_index, - chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");"); - } - } - } + for (uint32_t r = 0; r < type.vecsize; r++) + { + for (uint32_t c = 0; c < type.columns; c++) + { + auto store_expr = + join(write_access_chain_value(value, composite_chain, true), "[", c, "].", index_to_swizzle(r)); + remove_duplicate_swizzle(store_expr); + auto bitcast_op = bitcast_glsl_op(target_type, type); + if (!bitcast_op.empty()) + store_expr = join(bitcast_op, "(", store_expr, ")"); + statement(base, ".Store", template_expr, "(", chain.dynamic_index, + chain.static_index + c * (type.width / 8) + r * chain.matrix_stride, ", ", store_expr, ");"); + } + } + } - register_write(chain.self); + register_write(chain.self); } void CompilerHLSL::emit_store(const Instruction &instruction) { - auto ops = stream(instruction); - if (options.vertex.flip_vert_y) - { - auto *expr = maybe_get(ops[0]); - if (expr != nullptr && expr->access_meshlet_position_y) - { - auto lhs = to_dereferenced_expression(ops[0]); - auto rhs = to_unpacked_expression(ops[1]); - statement(lhs, " = spvFlipVertY(", rhs, ");"); - register_write(ops[0]); - return; - } - } + auto ops = stream(instruction); + if (options.vertex.flip_vert_y) + { + auto *expr = maybe_get(ops[0]); + if (expr != nullptr && expr->access_meshlet_position_y) + { + auto lhs = to_dereferenced_expression(ops[0]); + auto rhs = to_unpacked_expression(ops[1]); + statement(lhs, " = spvFlipVertY(", rhs, ");"); + register_write(ops[0]); + return; + } + } - auto *chain = maybe_get(ops[0]); - if (chain) - write_access_chain(*chain, ops[1], {}); - else - CompilerGLSL::emit_instruction(instruction); + auto *chain = maybe_get(ops[0]); + if (chain) + write_access_chain(*chain, ops[1], {}); + else + CompilerGLSL::emit_instruction(instruction); } void CompilerHLSL::emit_access_chain(const Instruction &instruction) { - auto ops = stream(instruction); - uint32_t length = instruction.length; + auto ops = stream(instruction); + uint32_t length = instruction.length; - bool need_byte_access_chain = false; - auto &type = expression_type(ops[2]); - const auto *chain = maybe_get(ops[2]); + bool need_byte_access_chain = false; + auto &type = expression_type(ops[2]); + const auto *chain = maybe_get(ops[2]); - if (chain) - { - // Keep tacking on an existing access chain. - need_byte_access_chain = true; - } - else if (type.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock)) - { - // If we are starting to poke into an SSBO, we are dealing with ByteAddressBuffers, and we need - // to emit SPIRAccessChain rather than a plain SPIRExpression. - uint32_t chain_arguments = length - 3; - if (chain_arguments > type.array.size()) - need_byte_access_chain = true; - } + if (chain) + { + // Keep tacking on an existing access chain. + need_byte_access_chain = true; + } + else if (type.storage == StorageClassStorageBuffer || has_decoration(type.self, DecorationBufferBlock)) + { + // If we are starting to poke into an SSBO, we are dealing with ByteAddressBuffers, and we need + // to emit SPIRAccessChain rather than a plain SPIRExpression. + uint32_t chain_arguments = length - 3; + if (chain_arguments > type.array.size()) + need_byte_access_chain = true; + } - if (need_byte_access_chain) - { - // If we have a chain variable, we are already inside the SSBO, and any array type will refer to arrays within a block, - // and not array of SSBO. - uint32_t to_plain_buffer_length = chain ? 0u : static_cast(type.array.size()); + if (need_byte_access_chain) + { + // If we have a chain variable, we are already inside the SSBO, and any array type will refer to arrays within a block, + // and not array of SSBO. + uint32_t to_plain_buffer_length = chain ? 0u : static_cast(type.array.size()); - auto *backing_variable = maybe_get_backing_variable(ops[2]); + auto *backing_variable = maybe_get_backing_variable(ops[2]); - if (backing_variable != nullptr && is_user_type_structured(backing_variable->self)) - { - CompilerGLSL::emit_instruction(instruction); - return; - } + if (backing_variable != nullptr && is_user_type_structured(backing_variable->self)) + { + CompilerGLSL::emit_instruction(instruction); + return; + } - string base; - if (to_plain_buffer_length != 0) - base = access_chain(ops[2], &ops[3], to_plain_buffer_length, get(ops[0])); - else if (chain) - base = chain->base; - else - base = to_expression(ops[2]); + string base; + if (to_plain_buffer_length != 0) + base = access_chain(ops[2], &ops[3], to_plain_buffer_length, get(ops[0])); + else if (chain) + base = chain->base; + else + base = to_expression(ops[2]); - // Start traversing type hierarchy at the proper non-pointer types. - auto *basetype = &get_pointee_type(type); + // Start traversing type hierarchy at the proper non-pointer types. + auto *basetype = &get_pointee_type(type); - // Traverse the type hierarchy down to the actual buffer types. - for (uint32_t i = 0; i < to_plain_buffer_length; i++) - { - assert(basetype->parent_type); - basetype = &get(basetype->parent_type); - } + // Traverse the type hierarchy down to the actual buffer types. + for (uint32_t i = 0; i < to_plain_buffer_length; i++) + { + assert(basetype->parent_type); + basetype = &get(basetype->parent_type); + } - uint32_t matrix_stride = 0; - uint32_t array_stride = 0; - bool row_major_matrix = false; + uint32_t matrix_stride = 0; + uint32_t array_stride = 0; + bool row_major_matrix = false; - // Inherit matrix information. - if (chain) - { - matrix_stride = chain->matrix_stride; - row_major_matrix = chain->row_major_matrix; - array_stride = chain->array_stride; - } + // Inherit matrix information. + if (chain) + { + matrix_stride = chain->matrix_stride; + row_major_matrix = chain->row_major_matrix; + array_stride = chain->array_stride; + } - auto offsets = flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length], - length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix, - &matrix_stride, &array_stride); + auto offsets = flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length], + length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix, + &matrix_stride, &array_stride); - auto &e = set(ops[1], ops[0], type.storage, base, offsets.first, offsets.second); - e.row_major_matrix = row_major_matrix; - e.matrix_stride = matrix_stride; - e.array_stride = array_stride; - e.immutable = should_forward(ops[2]); - e.loaded_from = backing_variable ? backing_variable->self : ID(0); + auto &e = set(ops[1], ops[0], type.storage, base, offsets.first, offsets.second); + e.row_major_matrix = row_major_matrix; + e.matrix_stride = matrix_stride; + e.array_stride = array_stride; + e.immutable = should_forward(ops[2]); + e.loaded_from = backing_variable ? backing_variable->self : ID(0); - if (chain) - { - e.dynamic_index += chain->dynamic_index; - e.static_index += chain->static_index; - } + if (chain) + { + e.dynamic_index += chain->dynamic_index; + e.static_index += chain->static_index; + } - for (uint32_t i = 2; i < length; i++) - { - inherit_expression_dependencies(ops[1], ops[i]); - add_implied_read_expression(e, ops[i]); - } - } - else - { - CompilerGLSL::emit_instruction(instruction); - } + for (uint32_t i = 2; i < length; i++) + { + inherit_expression_dependencies(ops[1], ops[i]); + add_implied_read_expression(e, ops[i]); + } + } + else + { + CompilerGLSL::emit_instruction(instruction); + } } void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op) { - const char *atomic_op = nullptr; + const char *atomic_op = nullptr; - string value_expr; - if (op != OpAtomicIDecrement && op != OpAtomicIIncrement && op != OpAtomicLoad && op != OpAtomicStore) - value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]); + string value_expr; + if (op != OpAtomicIDecrement && op != OpAtomicIIncrement && op != OpAtomicLoad && op != OpAtomicStore) + value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]); - bool is_atomic_store = false; + bool is_atomic_store = false; - switch (op) - { - case OpAtomicIIncrement: - atomic_op = "InterlockedAdd"; - value_expr = "1"; - break; + switch (op) + { + case OpAtomicIIncrement: + atomic_op = "InterlockedAdd"; + value_expr = "1"; + break; - case OpAtomicIDecrement: - atomic_op = "InterlockedAdd"; - value_expr = "-1"; - break; + case OpAtomicIDecrement: + atomic_op = "InterlockedAdd"; + value_expr = "-1"; + break; - case OpAtomicLoad: - atomic_op = "InterlockedAdd"; - value_expr = "0"; - break; + case OpAtomicLoad: + atomic_op = "InterlockedAdd"; + value_expr = "0"; + break; - case OpAtomicISub: - atomic_op = "InterlockedAdd"; - value_expr = join("-", enclose_expression(value_expr)); - break; + case OpAtomicISub: + atomic_op = "InterlockedAdd"; + value_expr = join("-", enclose_expression(value_expr)); + break; - case OpAtomicSMin: - case OpAtomicUMin: - atomic_op = "InterlockedMin"; - break; + case OpAtomicSMin: + case OpAtomicUMin: + atomic_op = "InterlockedMin"; + break; - case OpAtomicSMax: - case OpAtomicUMax: - atomic_op = "InterlockedMax"; - break; + case OpAtomicSMax: + case OpAtomicUMax: + atomic_op = "InterlockedMax"; + break; - case OpAtomicAnd: - atomic_op = "InterlockedAnd"; - break; + case OpAtomicAnd: + atomic_op = "InterlockedAnd"; + break; - case OpAtomicOr: - atomic_op = "InterlockedOr"; - break; + case OpAtomicOr: + atomic_op = "InterlockedOr"; + break; - case OpAtomicXor: - atomic_op = "InterlockedXor"; - break; + case OpAtomicXor: + atomic_op = "InterlockedXor"; + break; - case OpAtomicIAdd: - atomic_op = "InterlockedAdd"; - break; + case OpAtomicIAdd: + atomic_op = "InterlockedAdd"; + break; - case OpAtomicExchange: - atomic_op = "InterlockedExchange"; - break; + case OpAtomicExchange: + atomic_op = "InterlockedExchange"; + break; - case OpAtomicStore: - atomic_op = "InterlockedExchange"; - is_atomic_store = true; - break; + case OpAtomicStore: + atomic_op = "InterlockedExchange"; + is_atomic_store = true; + break; - case OpAtomicCompareExchange: - if (length < 8) - SPIRV_CROSS_THROW("Not enough data for opcode."); - atomic_op = "InterlockedCompareExchange"; - value_expr = join(to_expression(ops[7]), ", ", value_expr); - break; + case OpAtomicCompareExchange: + if (length < 8) + SPIRV_CROSS_THROW("Not enough data for opcode."); + atomic_op = "InterlockedCompareExchange"; + value_expr = join(to_expression(ops[7]), ", ", value_expr); + break; - default: - SPIRV_CROSS_THROW("Unknown atomic opcode."); - } + default: + SPIRV_CROSS_THROW("Unknown atomic opcode."); + } - if (is_atomic_store) - { - auto &data_type = expression_type(ops[0]); - auto *chain = maybe_get(ops[0]); + if (is_atomic_store) + { + auto &data_type = expression_type(ops[0]); + auto *chain = maybe_get(ops[0]); - auto &tmp_id = extra_sub_expressions[ops[0]]; - if (!tmp_id) - { - tmp_id = ir.increase_bound_by(1); - emit_uninitialized_temporary_expression(get_pointee_type(data_type).self, tmp_id); - } + auto &tmp_id = extra_sub_expressions[ops[0]]; + if (!tmp_id) + { + tmp_id = ir.increase_bound_by(1); + emit_uninitialized_temporary_expression(get_pointee_type(data_type).self, tmp_id); + } - if (data_type.storage == StorageClassImage || !chain) - { - statement(atomic_op, "(", to_non_uniform_aware_expression(ops[0]), ", ", - to_expression(ops[3]), ", ", to_expression(tmp_id), ");"); - } - else - { - string base = chain->base; - if (has_decoration(chain->self, DecorationNonUniform)) - convert_non_uniform_expression(base, chain->self); - // RWByteAddress buffer is always uint in its underlying type. - statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", - to_expression(ops[3]), ", ", to_expression(tmp_id), ");"); - } - } - else - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - forced_temporaries.insert(ops[1]); + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_non_uniform_aware_expression(ops[0]), ", ", + to_expression(ops[3]), ", ", to_expression(tmp_id), ");"); + } + else + { + string base = chain->base; + if (has_decoration(chain->self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain->self); + // RWByteAddress buffer is always uint in its underlying type. + statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", + to_expression(ops[3]), ", ", to_expression(tmp_id), ");"); + } + } + else + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + forced_temporaries.insert(ops[1]); - auto &type = get(result_type); - statement(variable_decl(type, to_name(id)), ";"); + auto &type = get(result_type); + statement(variable_decl(type, to_name(id)), ";"); - auto &data_type = expression_type(ops[2]); - auto *chain = maybe_get(ops[2]); - SPIRType::BaseType expr_type; - if (data_type.storage == StorageClassImage || !chain) - { - statement(atomic_op, "(", to_non_uniform_aware_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); - expr_type = data_type.basetype; - } - else - { - // RWByteAddress buffer is always uint in its underlying type. - string base = chain->base; - if (has_decoration(chain->self, DecorationNonUniform)) - convert_non_uniform_expression(base, chain->self); - expr_type = SPIRType::UInt; - statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, - ", ", to_name(id), ");"); - } + auto &data_type = expression_type(ops[2]); + auto *chain = maybe_get(ops[2]); + SPIRType::BaseType expr_type; + if (data_type.storage == StorageClassImage || !chain) + { + statement(atomic_op, "(", to_non_uniform_aware_expression(ops[2]), ", ", value_expr, ", ", to_name(id), ");"); + expr_type = data_type.basetype; + } + else + { + // RWByteAddress buffer is always uint in its underlying type. + string base = chain->base; + if (has_decoration(chain->self, DecorationNonUniform)) + convert_non_uniform_expression(base, chain->self); + expr_type = SPIRType::UInt; + statement(base, ".", atomic_op, "(", chain->dynamic_index, chain->static_index, ", ", value_expr, + ", ", to_name(id), ");"); + } - auto expr = bitcast_expression(type, expr_type, to_name(id)); - set(id, expr, result_type, true); - } - flush_all_atomic_capable_variables(); + auto expr = bitcast_expression(type, expr_type, to_name(id)); + set(id, expr, result_type, true); + } + flush_all_atomic_capable_variables(); } void CompilerHLSL::emit_subgroup_op(const Instruction &i) { - if (hlsl_options.shader_model < 60) - SPIRV_CROSS_THROW("Wave ops requires SM 6.0 or higher."); + if (hlsl_options.shader_model < 60) + SPIRV_CROSS_THROW("Wave ops requires SM 6.0 or higher."); - const uint32_t *ops = stream(i); - auto op = static_cast(i.op); + const uint32_t *ops = stream(i); + auto op = static_cast(i.op); - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; - auto scope = static_cast(evaluate_constant_u32(ops[2])); - if (scope != ScopeSubgroup) - SPIRV_CROSS_THROW("Only subgroup scope is supported."); + auto scope = static_cast(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); - const auto make_inclusive_Sum = [&](const string &expr) -> string { - return join(expr, " + ", to_expression(ops[4])); - }; + const auto make_inclusive_Sum = [&](const string &expr) -> string { + return join(expr, " + ", to_expression(ops[4])); + }; - const auto make_inclusive_Product = [&](const string &expr) -> string { - return join(expr, " * ", to_expression(ops[4])); - }; + const auto make_inclusive_Product = [&](const string &expr) -> string { + return join(expr, " * ", to_expression(ops[4])); + }; - // If we need to do implicit bitcasts, make sure we do it with the correct type. - uint32_t integer_width = get_integer_width_for_instruction(i); - auto int_type = to_signed_basetype(integer_width); - auto uint_type = to_unsigned_basetype(integer_width); + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(i); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); #define make_inclusive_BitAnd(expr) "" #define make_inclusive_BitOr(expr) "" @@ -5372,1518 +5372,1518 @@ void CompilerHLSL::emit_subgroup_op(const Instruction &i) #define make_inclusive_Min(expr) "" #define make_inclusive_Max(expr) "" - switch (op) - { - case OpGroupNonUniformElect: - emit_op(result_type, id, "WaveIsFirstLane()", true); - break; + switch (op) + { + case OpGroupNonUniformElect: + emit_op(result_type, id, "WaveIsFirstLane()", true); + break; - case OpGroupNonUniformBroadcast: - emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt"); - break; + case OpGroupNonUniformBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt"); + break; - case OpGroupNonUniformBroadcastFirst: - emit_unary_func_op(result_type, id, ops[3], "WaveReadLaneFirst"); - break; + case OpGroupNonUniformBroadcastFirst: + emit_unary_func_op(result_type, id, ops[3], "WaveReadLaneFirst"); + break; - case OpGroupNonUniformBallot: - emit_unary_func_op(result_type, id, ops[3], "WaveActiveBallot"); - break; + case OpGroupNonUniformBallot: + emit_unary_func_op(result_type, id, ops[3], "WaveActiveBallot"); + break; - case OpGroupNonUniformInverseBallot: - SPIRV_CROSS_THROW("Cannot trivially implement InverseBallot in HLSL."); + case OpGroupNonUniformInverseBallot: + SPIRV_CROSS_THROW("Cannot trivially implement InverseBallot in HLSL."); - case OpGroupNonUniformBallotBitExtract: - SPIRV_CROSS_THROW("Cannot trivially implement BallotBitExtract in HLSL."); + case OpGroupNonUniformBallotBitExtract: + SPIRV_CROSS_THROW("Cannot trivially implement BallotBitExtract in HLSL."); - case OpGroupNonUniformBallotFindLSB: - SPIRV_CROSS_THROW("Cannot trivially implement BallotFindLSB in HLSL."); + case OpGroupNonUniformBallotFindLSB: + SPIRV_CROSS_THROW("Cannot trivially implement BallotFindLSB in HLSL."); - case OpGroupNonUniformBallotFindMSB: - SPIRV_CROSS_THROW("Cannot trivially implement BallotFindMSB in HLSL."); + case OpGroupNonUniformBallotFindMSB: + SPIRV_CROSS_THROW("Cannot trivially implement BallotFindMSB in HLSL."); - case OpGroupNonUniformBallotBitCount: - { - auto operation = static_cast(ops[3]); - bool forward = should_forward(ops[4]); - if (operation == GroupOperationReduce) - { - auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x) + countbits(", - to_enclosed_expression(ops[4]), ".y)"); - auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z) + countbits(", - to_enclosed_expression(ops[4]), ".w)"); - emit_op(result_type, id, join(left, " + ", right), forward); - inherit_expression_dependencies(id, ops[4]); - } - else if (operation == GroupOperationInclusiveScan) - { - auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLeMask.x) + countbits(", - to_enclosed_expression(ops[4]), ".y & gl_SubgroupLeMask.y)"); - auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLeMask.z) + countbits(", - to_enclosed_expression(ops[4]), ".w & gl_SubgroupLeMask.w)"); - emit_op(result_type, id, join(left, " + ", right), forward); - if (!active_input_builtins.get(BuiltInSubgroupLeMask)) - { - active_input_builtins.set(BuiltInSubgroupLeMask); - force_recompile_guarantee_forward_progress(); - } - } - else if (operation == GroupOperationExclusiveScan) - { - auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLtMask.x) + countbits(", - to_enclosed_expression(ops[4]), ".y & gl_SubgroupLtMask.y)"); - auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLtMask.z) + countbits(", - to_enclosed_expression(ops[4]), ".w & gl_SubgroupLtMask.w)"); - emit_op(result_type, id, join(left, " + ", right), forward); - if (!active_input_builtins.get(BuiltInSubgroupLtMask)) - { - active_input_builtins.set(BuiltInSubgroupLtMask); - force_recompile_guarantee_forward_progress(); - } - } - else - SPIRV_CROSS_THROW("Invalid BitCount operation."); - break; - } + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast(ops[3]); + bool forward = should_forward(ops[4]); + if (operation == GroupOperationReduce) + { + auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x) + countbits(", + to_enclosed_expression(ops[4]), ".y)"); + auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z) + countbits(", + to_enclosed_expression(ops[4]), ".w)"); + emit_op(result_type, id, join(left, " + ", right), forward); + inherit_expression_dependencies(id, ops[4]); + } + else if (operation == GroupOperationInclusiveScan) + { + auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLeMask.x) + countbits(", + to_enclosed_expression(ops[4]), ".y & gl_SubgroupLeMask.y)"); + auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLeMask.z) + countbits(", + to_enclosed_expression(ops[4]), ".w & gl_SubgroupLeMask.w)"); + emit_op(result_type, id, join(left, " + ", right), forward); + if (!active_input_builtins.get(BuiltInSubgroupLeMask)) + { + active_input_builtins.set(BuiltInSubgroupLeMask); + force_recompile_guarantee_forward_progress(); + } + } + else if (operation == GroupOperationExclusiveScan) + { + auto left = join("countbits(", to_enclosed_expression(ops[4]), ".x & gl_SubgroupLtMask.x) + countbits(", + to_enclosed_expression(ops[4]), ".y & gl_SubgroupLtMask.y)"); + auto right = join("countbits(", to_enclosed_expression(ops[4]), ".z & gl_SubgroupLtMask.z) + countbits(", + to_enclosed_expression(ops[4]), ".w & gl_SubgroupLtMask.w)"); + emit_op(result_type, id, join(left, " + ", right), forward); + if (!active_input_builtins.get(BuiltInSubgroupLtMask)) + { + active_input_builtins.set(BuiltInSubgroupLtMask); + force_recompile_guarantee_forward_progress(); + } + } + else + SPIRV_CROSS_THROW("Invalid BitCount operation."); + break; + } - case OpGroupNonUniformShuffle: - emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt"); - break; - case OpGroupNonUniformShuffleXor: - { - bool forward = should_forward(ops[3]); - emit_op(ops[0], ops[1], - join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", - "WaveGetLaneIndex() ^ ", to_enclosed_expression(ops[4]), ")"), forward); - inherit_expression_dependencies(ops[1], ops[3]); - break; - } - case OpGroupNonUniformShuffleUp: - { - bool forward = should_forward(ops[3]); - emit_op(ops[0], ops[1], - join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", - "WaveGetLaneIndex() - ", to_enclosed_expression(ops[4]), ")"), forward); - inherit_expression_dependencies(ops[1], ops[3]); - break; - } - case OpGroupNonUniformShuffleDown: - { - bool forward = should_forward(ops[3]); - emit_op(ops[0], ops[1], - join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", - "WaveGetLaneIndex() + ", to_enclosed_expression(ops[4]), ")"), forward); - inherit_expression_dependencies(ops[1], ops[3]); - break; - } + case OpGroupNonUniformShuffle: + emit_binary_func_op(result_type, id, ops[3], ops[4], "WaveReadLaneAt"); + break; + case OpGroupNonUniformShuffleXor: + { + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", + "WaveGetLaneIndex() ^ ", to_enclosed_expression(ops[4]), ")"), forward); + inherit_expression_dependencies(ops[1], ops[3]); + break; + } + case OpGroupNonUniformShuffleUp: + { + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", + "WaveGetLaneIndex() - ", to_enclosed_expression(ops[4]), ")"), forward); + inherit_expression_dependencies(ops[1], ops[3]); + break; + } + case OpGroupNonUniformShuffleDown: + { + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join("WaveReadLaneAt(", to_unpacked_expression(ops[3]), ", ", + "WaveGetLaneIndex() + ", to_enclosed_expression(ops[4]), ")"), forward); + inherit_expression_dependencies(ops[1], ops[3]); + break; + } - case OpGroupNonUniformAll: - emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue"); - break; + case OpGroupNonUniformAll: + emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllTrue"); + break; - case OpGroupNonUniformAny: - emit_unary_func_op(result_type, id, ops[3], "WaveActiveAnyTrue"); - break; + case OpGroupNonUniformAny: + emit_unary_func_op(result_type, id, ops[3], "WaveActiveAnyTrue"); + break; - case OpGroupNonUniformAllEqual: - emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllEqual"); - break; + case OpGroupNonUniformAllEqual: + emit_unary_func_op(result_type, id, ops[3], "WaveActiveAllEqual"); + break; - // clang-format off + // clang-format off #define HLSL_GROUP_OP(op, hlsl_op, supports_scan) \ case OpGroupNonUniform##op: \ - { \ - auto operation = static_cast(ops[3]); \ - if (operation == GroupOperationReduce) \ - emit_unary_func_op(result_type, id, ops[4], "WaveActive" #hlsl_op); \ - else if (operation == GroupOperationInclusiveScan && supports_scan) \ + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "WaveActive" #hlsl_op); \ + else if (operation == GroupOperationInclusiveScan && supports_scan) \ { \ - bool forward = should_forward(ops[4]); \ - emit_op(result_type, id, make_inclusive_##hlsl_op (join("WavePrefix" #hlsl_op, "(", to_expression(ops[4]), ")")), forward); \ - inherit_expression_dependencies(id, ops[4]); \ + bool forward = should_forward(ops[4]); \ + emit_op(result_type, id, make_inclusive_##hlsl_op (join("WavePrefix" #hlsl_op, "(", to_expression(ops[4]), ")")), forward); \ + inherit_expression_dependencies(id, ops[4]); \ } \ - else if (operation == GroupOperationExclusiveScan && supports_scan) \ - emit_unary_func_op(result_type, id, ops[4], "WavePrefix" #hlsl_op); \ - else if (operation == GroupOperationClusteredReduce) \ - SPIRV_CROSS_THROW("Cannot trivially implement ClusteredReduce in HLSL."); \ - else \ - SPIRV_CROSS_THROW("Invalid group operation."); \ - break; \ - } + else if (operation == GroupOperationExclusiveScan && supports_scan) \ + emit_unary_func_op(result_type, id, ops[4], "WavePrefix" #hlsl_op); \ + else if (operation == GroupOperationClusteredReduce) \ + SPIRV_CROSS_THROW("Cannot trivially implement ClusteredReduce in HLSL."); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } #define HLSL_GROUP_OP_CAST(op, hlsl_op, type) \ case OpGroupNonUniform##op: \ - { \ - auto operation = static_cast(ops[3]); \ - if (operation == GroupOperationReduce) \ - emit_unary_func_op_cast(result_type, id, ops[4], "WaveActive" #hlsl_op, type, type); \ - else \ - SPIRV_CROSS_THROW("Invalid group operation."); \ - break; \ - } + { \ + auto operation = static_cast(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op_cast(result_type, id, ops[4], "WaveActive" #hlsl_op, type, type); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } - HLSL_GROUP_OP(FAdd, Sum, true) - HLSL_GROUP_OP(FMul, Product, true) - HLSL_GROUP_OP(FMin, Min, false) - HLSL_GROUP_OP(FMax, Max, false) - HLSL_GROUP_OP(IAdd, Sum, true) - HLSL_GROUP_OP(IMul, Product, true) - HLSL_GROUP_OP_CAST(SMin, Min, int_type) - HLSL_GROUP_OP_CAST(SMax, Max, int_type) - HLSL_GROUP_OP_CAST(UMin, Min, uint_type) - HLSL_GROUP_OP_CAST(UMax, Max, uint_type) - HLSL_GROUP_OP(BitwiseAnd, BitAnd, false) - HLSL_GROUP_OP(BitwiseOr, BitOr, false) - HLSL_GROUP_OP(BitwiseXor, BitXor, false) - HLSL_GROUP_OP_CAST(LogicalAnd, BitAnd, uint_type) - HLSL_GROUP_OP_CAST(LogicalOr, BitOr, uint_type) - HLSL_GROUP_OP_CAST(LogicalXor, BitXor, uint_type) + HLSL_GROUP_OP(FAdd, Sum, true) + HLSL_GROUP_OP(FMul, Product, true) + HLSL_GROUP_OP(FMin, Min, false) + HLSL_GROUP_OP(FMax, Max, false) + HLSL_GROUP_OP(IAdd, Sum, true) + HLSL_GROUP_OP(IMul, Product, true) + HLSL_GROUP_OP_CAST(SMin, Min, int_type) + HLSL_GROUP_OP_CAST(SMax, Max, int_type) + HLSL_GROUP_OP_CAST(UMin, Min, uint_type) + HLSL_GROUP_OP_CAST(UMax, Max, uint_type) + HLSL_GROUP_OP(BitwiseAnd, BitAnd, false) + HLSL_GROUP_OP(BitwiseOr, BitOr, false) + HLSL_GROUP_OP(BitwiseXor, BitXor, false) + HLSL_GROUP_OP_CAST(LogicalAnd, BitAnd, uint_type) + HLSL_GROUP_OP_CAST(LogicalOr, BitOr, uint_type) + HLSL_GROUP_OP_CAST(LogicalXor, BitXor, uint_type) #undef HLSL_GROUP_OP #undef HLSL_GROUP_OP_CAST - // clang-format on + // clang-format on - case OpGroupNonUniformQuadSwap: - { - uint32_t direction = evaluate_constant_u32(ops[4]); - if (direction == 0) - emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossX"); - else if (direction == 1) - emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossY"); - else if (direction == 2) - emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossDiagonal"); - else - SPIRV_CROSS_THROW("Invalid quad swap direction."); - break; - } + case OpGroupNonUniformQuadSwap: + { + uint32_t direction = evaluate_constant_u32(ops[4]); + if (direction == 0) + emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossX"); + else if (direction == 1) + emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossY"); + else if (direction == 2) + emit_unary_func_op(result_type, id, ops[3], "QuadReadAcrossDiagonal"); + else + SPIRV_CROSS_THROW("Invalid quad swap direction."); + break; + } - case OpGroupNonUniformQuadBroadcast: - { - emit_binary_func_op(result_type, id, ops[3], ops[4], "QuadReadLaneAt"); - break; - } + case OpGroupNonUniformQuadBroadcast: + { + emit_binary_func_op(result_type, id, ops[3], ops[4], "QuadReadLaneAt"); + break; + } - default: - SPIRV_CROSS_THROW("Invalid opcode for subgroup."); - } + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } - register_control_dependent_expression(id); + register_control_dependent_expression(id); } void CompilerHLSL::emit_instruction(const Instruction &instruction) { - auto ops = stream(instruction); - auto opcode = static_cast(instruction.op); + auto ops = stream(instruction); + auto opcode = static_cast(instruction.op); #define HLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) #define HLSL_BOP_CAST(op, type) \ - emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode), false) + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode), false) #define HLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) #define HLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) #define HLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) #define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) #define HLSL_BFOP_CAST(op, type) \ - emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) #define HLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) #define HLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) - // If we need to do implicit bitcasts, make sure we do it with the correct type. - uint32_t integer_width = get_integer_width_for_instruction(instruction); - auto int_type = to_signed_basetype(integer_width); - auto uint_type = to_unsigned_basetype(integer_width); - - opcode = get_remapped_spirv_op(opcode); - - switch (opcode) - { - case OpAccessChain: - case OpInBoundsAccessChain: - { - emit_access_chain(instruction); - break; - } - case OpBitcast: - { - auto bitcast_type = get_bitcast_type(ops[0], ops[2]); - if (bitcast_type == CompilerHLSL::TypeNormal) - CompilerGLSL::emit_instruction(instruction); - else - { - if (!requires_uint2_packing) - { - requires_uint2_packing = true; - force_recompile(); - } - - if (bitcast_type == CompilerHLSL::TypePackUint2x32) - emit_unary_func_op(ops[0], ops[1], ops[2], "spvPackUint2x32"); - else - emit_unary_func_op(ops[0], ops[1], ops[2], "spvUnpackUint2x32"); - } - - break; - } - - case OpSelect: - { - auto &value_type = expression_type(ops[3]); - if (value_type.basetype == SPIRType::Struct || is_array(value_type)) - { - // HLSL does not support ternary expressions on composites. - // Cannot use branches, since we might be in a continue block - // where explicit control flow is prohibited. - // Emit a helper function where we can use control flow. - TypeID value_type_id = expression_type_id(ops[3]); - auto itr = std::find(composite_selection_workaround_types.begin(), - composite_selection_workaround_types.end(), - value_type_id); - if (itr == composite_selection_workaround_types.end()) - { - composite_selection_workaround_types.push_back(value_type_id); - force_recompile(); - } - emit_uninitialized_temporary_expression(ops[0], ops[1]); - statement("spvSelectComposite(", - to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", - to_expression(ops[3]), ", ", to_expression(ops[4]), ");"); - } - else - CompilerGLSL::emit_instruction(instruction); - break; - } - - case OpStore: - { - emit_store(instruction); - break; - } - - case OpLoad: - { - emit_load(instruction); - break; - } - - case OpMatrixTimesVector: - { - // Matrices are kept in a transposed state all the time, flip multiplication order always. - emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); - break; - } - - case OpVectorTimesMatrix: - { - // Matrices are kept in a transposed state all the time, flip multiplication order always. - emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); - break; - } - - case OpMatrixTimesMatrix: - { - // Matrices are kept in a transposed state all the time, flip multiplication order always. - emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); - break; - } - - case OpOuterProduct: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - uint32_t a = ops[2]; - uint32_t b = ops[3]; - - auto &type = get(result_type); - string expr = type_to_glsl_constructor(type); - expr += "("; - for (uint32_t col = 0; col < type.columns; col++) - { - expr += to_enclosed_expression(a); - expr += " * "; - expr += to_extract_component_expression(b, col); - if (col + 1 < type.columns) - expr += ", "; - } - expr += ")"; - emit_op(result_type, id, expr, should_forward(a) && should_forward(b)); - inherit_expression_dependencies(id, a); - inherit_expression_dependencies(id, b); - break; - } - - case OpFMod: - { - if (!requires_op_fmod) - { - requires_op_fmod = true; - force_recompile(); - } - CompilerGLSL::emit_instruction(instruction); - break; - } - - case OpFRem: - emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "fmod"); - break; - - case OpImage: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - auto *combined = maybe_get(ops[2]); - - if (combined) - { - auto &e = emit_op(result_type, id, to_expression(combined->image), true, true); - auto *var = maybe_get_backing_variable(combined->image); - if (var) - e.loaded_from = var->self; - } - else - { - auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); - auto *var = maybe_get_backing_variable(ops[2]); - if (var) - e.loaded_from = var->self; - } - break; - } - - case OpDPdx: - HLSL_UFOP(ddx); - register_control_dependent_expression(ops[1]); - break; - - case OpDPdy: - HLSL_UFOP(ddy); - register_control_dependent_expression(ops[1]); - break; - - case OpDPdxFine: - HLSL_UFOP(ddx_fine); - register_control_dependent_expression(ops[1]); - break; - - case OpDPdyFine: - HLSL_UFOP(ddy_fine); - register_control_dependent_expression(ops[1]); - break; - - case OpDPdxCoarse: - HLSL_UFOP(ddx_coarse); - register_control_dependent_expression(ops[1]); - break; - - case OpDPdyCoarse: - HLSL_UFOP(ddy_coarse); - register_control_dependent_expression(ops[1]); - break; - - case OpFwidth: - case OpFwidthCoarse: - case OpFwidthFine: - HLSL_UFOP(fwidth); - register_control_dependent_expression(ops[1]); - break; - - case OpLogicalNot: - { - auto result_type = ops[0]; - auto id = ops[1]; - auto &type = get(result_type); - - if (type.vecsize > 1) - emit_unrolled_unary_op(result_type, id, ops[2], "!"); - else - HLSL_UOP(!); - break; - } - - case OpIEqual: - { - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); - else - HLSL_BOP_CAST(==, int_type); - break; - } - - case OpLogicalEqual: - case OpFOrdEqual: - case OpFUnordEqual: - { - // HLSL != operator is unordered. - // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. - // isnan() is apparently implemented as x != x as well. - // We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual. - // HACK: FUnordEqual will be implemented as FOrdEqual. - - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); - else - HLSL_BOP(==); - break; - } - - case OpINotEqual: - { - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); - else - HLSL_BOP_CAST(!=, int_type); - break; - } - - case OpLogicalNotEqual: - case OpFOrdNotEqual: - case OpFUnordNotEqual: - { - // HLSL != operator is unordered. - // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. - // isnan() is apparently implemented as x != x as well. - - // FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here. - // We would need to do something like not(UnordEqual), but that cannot be expressed either. - // Adding a lot of NaN checks would be a breaking change from perspective of performance. - // SPIR-V will generally use isnan() checks when this even matters. - // HACK: FOrdNotEqual will be implemented as FUnordEqual. - - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); - else - HLSL_BOP(!=); - break; - } - - case OpUGreaterThan: - case OpSGreaterThan: - { - auto result_type = ops[0]; - auto id = ops[1]; - auto type = opcode == OpUGreaterThan ? uint_type : int_type; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type); - else - HLSL_BOP_CAST(>, type); - break; - } - - case OpFOrdGreaterThan: - { - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown); - else - HLSL_BOP(>); - break; - } - - case OpFUnordGreaterThan: - { - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown); - else - CompilerGLSL::emit_instruction(instruction); - break; - } - - case OpUGreaterThanEqual: - case OpSGreaterThanEqual: - { - auto result_type = ops[0]; - auto id = ops[1]; - - auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type); - else - HLSL_BOP_CAST(>=, type); - break; - } - - case OpFOrdGreaterThanEqual: - { - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown); - else - HLSL_BOP(>=); - break; - } - - case OpFUnordGreaterThanEqual: - { - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown); - else - CompilerGLSL::emit_instruction(instruction); - break; - } - - case OpULessThan: - case OpSLessThan: - { - auto result_type = ops[0]; - auto id = ops[1]; - - auto type = opcode == OpULessThan ? uint_type : int_type; - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type); - else - HLSL_BOP_CAST(<, type); - break; - } - - case OpFOrdLessThan: - { - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown); - else - HLSL_BOP(<); - break; - } - - case OpFUnordLessThan: - { - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown); - else - CompilerGLSL::emit_instruction(instruction); - break; - } - - case OpULessThanEqual: - case OpSLessThanEqual: - { - auto result_type = ops[0]; - auto id = ops[1]; - - auto type = opcode == OpULessThanEqual ? uint_type : int_type; - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type); - else - HLSL_BOP_CAST(<=, type); - break; - } - - case OpFOrdLessThanEqual: - { - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown); - else - HLSL_BOP(<=); - break; - } - - case OpFUnordLessThanEqual: - { - auto result_type = ops[0]; - auto id = ops[1]; - - if (expression_type(ops[2]).vecsize > 1) - emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown); - else - CompilerGLSL::emit_instruction(instruction); - break; - } - - case OpImageQueryLod: - emit_texture_op(instruction, false); - break; - - case OpImageQuerySizeLod: - { - auto result_type = ops[0]; - auto id = ops[1]; - - require_texture_query_variant(ops[2]); - auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); - statement("uint ", dummy_samples_levels, ";"); - - auto expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", ", - bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")"); - - auto &restype = get(ops[0]); - expr = bitcast_expression(restype, SPIRType::UInt, expr); - emit_op(result_type, id, expr, true); - break; - } - - case OpImageQuerySize: - { - auto result_type = ops[0]; - auto id = ops[1]; - - require_texture_query_variant(ops[2]); - bool uav = expression_type(ops[2]).image.sampled == 2; - - if (const auto *var = maybe_get_backing_variable(ops[2])) - if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable)) - uav = false; - - auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); - statement("uint ", dummy_samples_levels, ";"); - - string expr; - if (uav) - expr = join("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", dummy_samples_levels, ")"); - else - expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")"); - - auto &restype = get(ops[0]); - expr = bitcast_expression(restype, SPIRType::UInt, expr); - emit_op(result_type, id, expr, true); - break; - } - - case OpImageQuerySamples: - case OpImageQueryLevels: - { - auto result_type = ops[0]; - auto id = ops[1]; - - require_texture_query_variant(ops[2]); - bool uav = expression_type(ops[2]).image.sampled == 2; - if (opcode == OpImageQueryLevels && uav) - SPIRV_CROSS_THROW("Cannot query levels for UAV images."); - - if (const auto *var = maybe_get_backing_variable(ops[2])) - if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable)) - uav = false; - - // Keep it simple and do not emit special variants to make this look nicer ... - // This stuff is barely, if ever, used. - forced_temporaries.insert(id); - auto &type = get(result_type); - statement(variable_decl(type, to_name(id)), ";"); - - if (uav) - statement("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", to_name(id), ");"); - else - statement("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", to_name(id), ");"); - - auto &restype = get(ops[0]); - auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id)); - set(id, expr, result_type, true); - break; - } - - case OpImageRead: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - auto *var = maybe_get_backing_variable(ops[2]); - auto &type = expression_type(ops[2]); - bool subpass_data = type.image.dim == DimSubpassData; - bool pure = false; - - string imgexpr; - - if (subpass_data) - { - if (hlsl_options.shader_model < 40) - SPIRV_CROSS_THROW("Subpass loads are not supported in HLSL shader model 2/3."); - - // Similar to GLSL, implement subpass loads using texelFetch. - if (type.image.ms) - { - uint32_t operands = ops[4]; - if (operands != ImageOperandsSampleMask || instruction.length != 6) - SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); - uint32_t sample = ops[5]; - imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")"); - } - else - imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))"); - - pure = true; - } - else - { - imgexpr = join(to_non_uniform_aware_expression(ops[2]), "[", to_expression(ops[3]), "]"); - // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", - // except that the underlying type changes how the data is interpreted. - - bool force_srv = - hlsl_options.nonwritable_uav_texture_as_srv && var && has_decoration(var->self, DecorationNonWritable); - pure = force_srv; - - if (var && !subpass_data && !force_srv) - imgexpr = remap_swizzle(get(result_type), - image_format_to_components(get(var->basetype).image.format), imgexpr); - } - - if (var) - { - bool forward = forced_temporaries.find(id) == end(forced_temporaries); - auto &e = emit_op(result_type, id, imgexpr, forward); - - if (!pure) - { - e.loaded_from = var->self; - if (forward) - var->dependees.push_back(id); - } - } - else - emit_op(result_type, id, imgexpr, false); - - inherit_expression_dependencies(id, ops[2]); - if (type.image.ms) - inherit_expression_dependencies(id, ops[5]); - break; - } - - case OpImageWrite: - { - auto *var = maybe_get_backing_variable(ops[0]); - - // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", - // except that the underlying type changes how the data is interpreted. - auto value_expr = to_expression(ops[2]); - if (var) - { - auto &type = get(var->basetype); - auto narrowed_type = get(type.image.type); - narrowed_type.vecsize = image_format_to_components(type.image.format); - value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr); - } - - statement(to_non_uniform_aware_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";"); - if (var && variable_storage_is_aliased(*var)) - flush_all_aliased_variables(); - break; - } - - case OpImageTexelPointer: - { - uint32_t result_type = ops[0]; - uint32_t id = ops[1]; - - auto expr = to_expression(ops[2]); - expr += join("[", to_expression(ops[3]), "]"); - auto &e = set(id, expr, result_type, true); - - // When using the pointer, we need to know which variable it is actually loaded from. - auto *var = maybe_get_backing_variable(ops[2]); - e.loaded_from = var ? var->self : ID(0); - inherit_expression_dependencies(id, ops[3]); - break; - } - - case OpAtomicFAddEXT: - case OpAtomicFMinEXT: - case OpAtomicFMaxEXT: - SPIRV_CROSS_THROW("Floating-point atomics are not supported in HLSL."); - - case OpAtomicCompareExchange: - case OpAtomicExchange: - case OpAtomicISub: - case OpAtomicSMin: - case OpAtomicUMin: - case OpAtomicSMax: - case OpAtomicUMax: - case OpAtomicAnd: - case OpAtomicOr: - case OpAtomicXor: - case OpAtomicIAdd: - case OpAtomicIIncrement: - case OpAtomicIDecrement: - case OpAtomicLoad: - case OpAtomicStore: - { - emit_atomic(ops, instruction.length, opcode); - break; - } - - case OpControlBarrier: - case OpMemoryBarrier: - { - uint32_t memory; - uint32_t semantics; - - if (opcode == OpMemoryBarrier) - { - memory = evaluate_constant_u32(ops[0]); - semantics = evaluate_constant_u32(ops[1]); - } - else - { - memory = evaluate_constant_u32(ops[1]); - semantics = evaluate_constant_u32(ops[2]); - } - - if (memory == ScopeSubgroup) - { - // No Wave-barriers in HLSL. - break; - } - - // We only care about these flags, acquire/release and friends are not relevant to GLSL. - semantics = mask_relevant_memory_semantics(semantics); - - if (opcode == OpMemoryBarrier) - { - // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier - // does what we need, so we avoid redundant barriers. - const Instruction *next = get_next_instruction_in_block(instruction); - if (next && next->op == OpControlBarrier) - { - auto *next_ops = stream(*next); - uint32_t next_memory = evaluate_constant_u32(next_ops[1]); - uint32_t next_semantics = evaluate_constant_u32(next_ops[2]); - next_semantics = mask_relevant_memory_semantics(next_semantics); - - // There is no "just execution barrier" in HLSL. - // If there are no memory semantics for next instruction, we will imply group shared memory is synced. - if (next_semantics == 0) - next_semantics = MemorySemanticsWorkgroupMemoryMask; - - bool memory_scope_covered = false; - if (next_memory == memory) - memory_scope_covered = true; - else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) - { - // If we only care about workgroup memory, either Device or Workgroup scope is fine, - // scope does not have to match. - if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && - (memory == ScopeDevice || memory == ScopeWorkgroup)) - { - memory_scope_covered = true; - } - } - else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) - { - // The control barrier has device scope, but the memory barrier just has workgroup scope. - memory_scope_covered = true; - } - - // If we have the same memory scope, and all memory types are covered, we're good. - if (memory_scope_covered && (semantics & next_semantics) == semantics) - break; - } - } - - // We are synchronizing some memory or syncing execution, - // so we cannot forward any loads beyond the memory barrier. - if (semantics || opcode == OpControlBarrier) - { - assert(current_emitting_block); - flush_control_dependent_expressions(current_emitting_block->self); - flush_all_active_variables(); - } - - if (opcode == OpControlBarrier) - { - // We cannot emit just execution barrier, for no memory semantics pick the cheapest option. - if (semantics == MemorySemanticsWorkgroupMemoryMask || semantics == 0) - statement("GroupMemoryBarrierWithGroupSync();"); - else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0) - statement("DeviceMemoryBarrierWithGroupSync();"); - else - statement("AllMemoryBarrierWithGroupSync();"); - } - else - { - if (semantics == MemorySemanticsWorkgroupMemoryMask) - statement("GroupMemoryBarrier();"); - else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0) - statement("DeviceMemoryBarrier();"); - else - statement("AllMemoryBarrier();"); - } - break; - } - - case OpBitFieldInsert: - { - if (!requires_bitfield_insert) - { - requires_bitfield_insert = true; - force_recompile(); - } - - auto expr = join("spvBitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", - to_expression(ops[4]), ", ", to_expression(ops[5]), ")"); - - bool forward = - should_forward(ops[2]) && should_forward(ops[3]) && should_forward(ops[4]) && should_forward(ops[5]); - - auto &restype = get(ops[0]); - expr = bitcast_expression(restype, SPIRType::UInt, expr); - emit_op(ops[0], ops[1], expr, forward); - break; - } - - case OpBitFieldSExtract: - case OpBitFieldUExtract: - { - if (!requires_bitfield_extract) - { - requires_bitfield_extract = true; - force_recompile(); - } - - if (opcode == OpBitFieldSExtract) - HLSL_TFOP(spvBitfieldSExtract); - else - HLSL_TFOP(spvBitfieldUExtract); - break; - } - - case OpBitCount: - { - auto basetype = expression_type(ops[2]).basetype; - emit_unary_func_op_cast(ops[0], ops[1], ops[2], "countbits", basetype, basetype); - break; - } - - case OpBitReverse: - HLSL_UFOP(reversebits); - break; - - case OpArrayLength: - { - auto *var = maybe_get_backing_variable(ops[2]); - if (!var) - SPIRV_CROSS_THROW("Array length must point directly to an SSBO block."); - - auto &type = get(var->basetype); - if (!has_decoration(type.self, DecorationBlock) && !has_decoration(type.self, DecorationBufferBlock)) - SPIRV_CROSS_THROW("Array length expression must point to a block type."); - - // This must be 32-bit uint, so we're good to go. - emit_uninitialized_temporary_expression(ops[0], ops[1]); - statement(to_non_uniform_aware_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");"); - uint32_t offset = type_struct_member_offset(type, ops[3]); - uint32_t stride = type_struct_member_array_stride(type, ops[3]); - statement(to_expression(ops[1]), " = (", to_expression(ops[1]), " - ", offset, ") / ", stride, ";"); - break; - } - - case OpIsHelperInvocationEXT: - if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) - SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher."); - // Helper lane state with demote is volatile by nature. - // Do not forward this. - emit_op(ops[0], ops[1], "IsHelperLane()", false); - break; - - case OpBeginInvocationInterlockEXT: - case OpEndInvocationInterlockEXT: - if (hlsl_options.shader_model < 51) - SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1."); - break; // Nothing to do in the body - - case OpRayQueryInitializeKHR: - { - flush_variable_declaration(ops[0]); - - std::string ray_desc_name = get_unique_identifier(); - statement("RayDesc ", ray_desc_name, " = {", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", - to_expression(ops[6]), ", ", to_expression(ops[7]), "};"); - - statement(to_expression(ops[0]), ".TraceRayInline(", - to_expression(ops[1]), ", ", // acc structure - to_expression(ops[2]), ", ", // ray flags - to_expression(ops[3]), ", ", // mask - ray_desc_name, ");"); // ray - break; - } - case OpRayQueryProceedKHR: - { - flush_variable_declaration(ops[0]); - emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".Proceed()"), false); - break; - } - case OpRayQueryTerminateKHR: - { - flush_variable_declaration(ops[0]); - statement(to_expression(ops[0]), ".Abort();"); - break; - } - case OpRayQueryGenerateIntersectionKHR: - { - flush_variable_declaration(ops[0]); - statement(to_expression(ops[0]), ".CommitProceduralPrimitiveHit(", to_expression(ops[1]), ");"); - break; - } - case OpRayQueryConfirmIntersectionKHR: - { - flush_variable_declaration(ops[0]); - statement(to_expression(ops[0]), ".CommitNonOpaqueTriangleHit();"); - break; - } - case OpRayQueryGetIntersectionTypeKHR: - { - emit_rayquery_function(".CommittedStatus()", ".CandidateType()", ops); - break; - } - case OpRayQueryGetIntersectionTKHR: - { - emit_rayquery_function(".CommittedRayT()", ".CandidateTriangleRayT()", ops); - break; - } - case OpRayQueryGetIntersectionInstanceCustomIndexKHR: - { - emit_rayquery_function(".CommittedInstanceID()", ".CandidateInstanceID()", ops); - break; - } - case OpRayQueryGetIntersectionInstanceIdKHR: - { - emit_rayquery_function(".CommittedInstanceIndex()", ".CandidateInstanceIndex()", ops); - break; - } - case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: - { - emit_rayquery_function(".CommittedInstanceContributionToHitGroupIndex()", - ".CandidateInstanceContributionToHitGroupIndex()", ops); - break; - } - case OpRayQueryGetIntersectionGeometryIndexKHR: - { - emit_rayquery_function(".CommittedGeometryIndex()", - ".CandidateGeometryIndex()", ops); - break; - } - case OpRayQueryGetIntersectionPrimitiveIndexKHR: - { - emit_rayquery_function(".CommittedPrimitiveIndex()", ".CandidatePrimitiveIndex()", ops); - break; - } - case OpRayQueryGetIntersectionBarycentricsKHR: - { - emit_rayquery_function(".CommittedTriangleBarycentrics()", ".CandidateTriangleBarycentrics()", ops); - break; - } - case OpRayQueryGetIntersectionFrontFaceKHR: - { - emit_rayquery_function(".CommittedTriangleFrontFace()", ".CandidateTriangleFrontFace()", ops); - break; - } - case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: - { - flush_variable_declaration(ops[0]); - emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".CandidateProceduralPrimitiveNonOpaque()"), false); - break; - } - case OpRayQueryGetIntersectionObjectRayDirectionKHR: - { - emit_rayquery_function(".CommittedObjectRayDirection()", ".CandidateObjectRayDirection()", ops); - break; - } - case OpRayQueryGetIntersectionObjectRayOriginKHR: - { - flush_variable_declaration(ops[0]); - emit_rayquery_function(".CommittedObjectRayOrigin()", ".CandidateObjectRayOrigin()", ops); - break; - } - case OpRayQueryGetIntersectionObjectToWorldKHR: - { - emit_rayquery_function(".CommittedObjectToWorld4x3()", ".CandidateObjectToWorld4x3()", ops); - break; - } - case OpRayQueryGetIntersectionWorldToObjectKHR: - { - emit_rayquery_function(".CommittedWorldToObject4x3()", ".CandidateWorldToObject4x3()", ops); - break; - } - case OpRayQueryGetRayFlagsKHR: - { - flush_variable_declaration(ops[0]); - emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayFlags()"), false); - break; - } - case OpRayQueryGetRayTMinKHR: - { - flush_variable_declaration(ops[0]); - emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayTMin()"), false); - break; - } - case OpRayQueryGetWorldRayOriginKHR: - { - flush_variable_declaration(ops[0]); - emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayOrigin()"), false); - break; - } - case OpRayQueryGetWorldRayDirectionKHR: - { - flush_variable_declaration(ops[0]); - emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayDirection()"), false); - break; - } - case OpSetMeshOutputsEXT: - { - statement("SetMeshOutputCounts(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); - break; - } - default: - CompilerGLSL::emit_instruction(instruction); - break; - } + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(instruction); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + opcode = get_remapped_spirv_op(opcode); + + switch (opcode) + { + case OpAccessChain: + case OpInBoundsAccessChain: + { + emit_access_chain(instruction); + break; + } + case OpBitcast: + { + auto bitcast_type = get_bitcast_type(ops[0], ops[2]); + if (bitcast_type == CompilerHLSL::TypeNormal) + CompilerGLSL::emit_instruction(instruction); + else + { + if (!requires_uint2_packing) + { + requires_uint2_packing = true; + force_recompile(); + } + + if (bitcast_type == CompilerHLSL::TypePackUint2x32) + emit_unary_func_op(ops[0], ops[1], ops[2], "spvPackUint2x32"); + else + emit_unary_func_op(ops[0], ops[1], ops[2], "spvUnpackUint2x32"); + } + + break; + } + + case OpSelect: + { + auto &value_type = expression_type(ops[3]); + if (value_type.basetype == SPIRType::Struct || is_array(value_type)) + { + // HLSL does not support ternary expressions on composites. + // Cannot use branches, since we might be in a continue block + // where explicit control flow is prohibited. + // Emit a helper function where we can use control flow. + TypeID value_type_id = expression_type_id(ops[3]); + auto itr = std::find(composite_selection_workaround_types.begin(), + composite_selection_workaround_types.end(), + value_type_id); + if (itr == composite_selection_workaround_types.end()) + { + composite_selection_workaround_types.push_back(value_type_id); + force_recompile(); + } + emit_uninitialized_temporary_expression(ops[0], ops[1]); + statement("spvSelectComposite(", + to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", + to_expression(ops[3]), ", ", to_expression(ops[4]), ");"); + } + else + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpStore: + { + emit_store(instruction); + break; + } + + case OpLoad: + { + emit_load(instruction); + break; + } + + case OpMatrixTimesVector: + { + // Matrices are kept in a transposed state all the time, flip multiplication order always. + emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); + break; + } + + case OpVectorTimesMatrix: + { + // Matrices are kept in a transposed state all the time, flip multiplication order always. + emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); + break; + } + + case OpMatrixTimesMatrix: + { + // Matrices are kept in a transposed state all the time, flip multiplication order always. + emit_binary_func_op(ops[0], ops[1], ops[3], ops[2], "mul"); + break; + } + + case OpOuterProduct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2]; + uint32_t b = ops[3]; + + auto &type = get(result_type); + string expr = type_to_glsl_constructor(type); + expr += "("; + for (uint32_t col = 0; col < type.columns; col++) + { + expr += to_enclosed_expression(a); + expr += " * "; + expr += to_extract_component_expression(b, col); + if (col + 1 < type.columns) + expr += ", "; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(a) && should_forward(b)); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + break; + } + + case OpFMod: + { + if (!requires_op_fmod) + { + requires_op_fmod = true; + force_recompile(); + } + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpFRem: + emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], "fmod"); + break; + + case OpImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto *combined = maybe_get(ops[2]); + + if (combined) + { + auto &e = emit_op(result_type, id, to_expression(combined->image), true, true); + auto *var = maybe_get_backing_variable(combined->image); + if (var) + e.loaded_from = var->self; + } + else + { + auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); + auto *var = maybe_get_backing_variable(ops[2]); + if (var) + e.loaded_from = var->self; + } + break; + } + + case OpDPdx: + HLSL_UFOP(ddx); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdy: + HLSL_UFOP(ddy); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxFine: + HLSL_UFOP(ddx_fine); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyFine: + HLSL_UFOP(ddy_fine); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxCoarse: + HLSL_UFOP(ddx_coarse); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyCoarse: + HLSL_UFOP(ddy_coarse); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidth: + case OpFwidthCoarse: + case OpFwidthFine: + HLSL_UFOP(fwidth); + register_control_dependent_expression(ops[1]); + break; + + case OpLogicalNot: + { + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get(result_type); + + if (type.vecsize > 1) + emit_unrolled_unary_op(result_type, id, ops[2], "!"); + else + HLSL_UOP(!); + break; + } + + case OpIEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); + else + HLSL_BOP_CAST(==, int_type); + break; + } + + case OpLogicalEqual: + case OpFOrdEqual: + case OpFUnordEqual: + { + // HLSL != operator is unordered. + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. + // isnan() is apparently implemented as x != x as well. + // We cannot implement UnordEqual as !(OrdNotEqual), as HLSL cannot express OrdNotEqual. + // HACK: FUnordEqual will be implemented as FOrdEqual. + + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "==", false, SPIRType::Unknown); + else + HLSL_BOP(==); + break; + } + + case OpINotEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); + else + HLSL_BOP_CAST(!=, int_type); + break; + } + + case OpLogicalNotEqual: + case OpFOrdNotEqual: + case OpFUnordNotEqual: + { + // HLSL != operator is unordered. + // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-float-rules. + // isnan() is apparently implemented as x != x as well. + + // FIXME: FOrdNotEqual cannot be implemented in a crisp and simple way here. + // We would need to do something like not(UnordEqual), but that cannot be expressed either. + // Adding a lot of NaN checks would be a breaking change from perspective of performance. + // SPIR-V will generally use isnan() checks when this even matters. + // HACK: FOrdNotEqual will be implemented as FUnordEqual. + + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "!=", false, SPIRType::Unknown); + else + HLSL_BOP(!=); + break; + } + + case OpUGreaterThan: + case OpSGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + auto type = opcode == OpUGreaterThan ? uint_type : int_type; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, type); + else + HLSL_BOP_CAST(>, type); + break; + } + + case OpFOrdGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", false, SPIRType::Unknown); + else + HLSL_BOP(>); + break; + } + + case OpFUnordGreaterThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpUGreaterThanEqual: + case OpSGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, type); + else + HLSL_BOP_CAST(>=, type); + break; + } + + case OpFOrdGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", false, SPIRType::Unknown); + else + HLSL_BOP(>=); + break; + } + + case OpFUnordGreaterThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpULessThan: + case OpSLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + auto type = opcode == OpULessThan ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, type); + else + HLSL_BOP_CAST(<, type); + break; + } + + case OpFOrdLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<", false, SPIRType::Unknown); + else + HLSL_BOP(<); + break; + } + + case OpFUnordLessThan: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">=", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpULessThanEqual: + case OpSLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + auto type = opcode == OpULessThanEqual ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, type); + else + HLSL_BOP_CAST(<=, type); + break; + } + + case OpFOrdLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "<=", false, SPIRType::Unknown); + else + HLSL_BOP(<=); + break; + } + + case OpFUnordLessThanEqual: + { + auto result_type = ops[0]; + auto id = ops[1]; + + if (expression_type(ops[2]).vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], ">", true, SPIRType::Unknown); + else + CompilerGLSL::emit_instruction(instruction); + break; + } + + case OpImageQueryLod: + emit_texture_op(instruction, false); + break; + + case OpImageQuerySizeLod: + { + auto result_type = ops[0]; + auto id = ops[1]; + + require_texture_query_variant(ops[2]); + auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); + statement("uint ", dummy_samples_levels, ";"); + + auto expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", ", + bitcast_expression(SPIRType::UInt, ops[3]), ", ", dummy_samples_levels, ")"); + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::UInt, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpImageQuerySize: + { + auto result_type = ops[0]; + auto id = ops[1]; + + require_texture_query_variant(ops[2]); + bool uav = expression_type(ops[2]).image.sampled == 2; + + if (const auto *var = maybe_get_backing_variable(ops[2])) + if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable)) + uav = false; + + auto dummy_samples_levels = join(get_fallback_name(id), "_dummy_parameter"); + statement("uint ", dummy_samples_levels, ";"); + + string expr; + if (uav) + expr = join("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", dummy_samples_levels, ")"); + else + expr = join("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", dummy_samples_levels, ")"); + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::UInt, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpImageQuerySamples: + case OpImageQueryLevels: + { + auto result_type = ops[0]; + auto id = ops[1]; + + require_texture_query_variant(ops[2]); + bool uav = expression_type(ops[2]).image.sampled == 2; + if (opcode == OpImageQueryLevels && uav) + SPIRV_CROSS_THROW("Cannot query levels for UAV images."); + + if (const auto *var = maybe_get_backing_variable(ops[2])) + if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var->self, DecorationNonWritable)) + uav = false; + + // Keep it simple and do not emit special variants to make this look nicer ... + // This stuff is barely, if ever, used. + forced_temporaries.insert(id); + auto &type = get(result_type); + statement(variable_decl(type, to_name(id)), ";"); + + if (uav) + statement("spvImageSize(", to_non_uniform_aware_expression(ops[2]), ", ", to_name(id), ");"); + else + statement("spvTextureSize(", to_non_uniform_aware_expression(ops[2]), ", 0u, ", to_name(id), ");"); + + auto &restype = get(ops[0]); + auto expr = bitcast_expression(restype, SPIRType::UInt, to_name(id)); + set(id, expr, result_type, true); + break; + } + + case OpImageRead: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto *var = maybe_get_backing_variable(ops[2]); + auto &type = expression_type(ops[2]); + bool subpass_data = type.image.dim == DimSubpassData; + bool pure = false; + + string imgexpr; + + if (subpass_data) + { + if (hlsl_options.shader_model < 40) + SPIRV_CROSS_THROW("Subpass loads are not supported in HLSL shader model 2/3."); + + // Similar to GLSL, implement subpass loads using texelFetch. + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || instruction.length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); + uint32_t sample = ops[5]; + imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int2(gl_FragCoord.xy), ", to_expression(sample), ")"); + } + else + imgexpr = join(to_non_uniform_aware_expression(ops[2]), ".Load(int3(int2(gl_FragCoord.xy), 0))"); + + pure = true; + } + else + { + imgexpr = join(to_non_uniform_aware_expression(ops[2]), "[", to_expression(ops[3]), "]"); + // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", + // except that the underlying type changes how the data is interpreted. + + bool force_srv = + hlsl_options.nonwritable_uav_texture_as_srv && var && has_decoration(var->self, DecorationNonWritable); + pure = force_srv; + + if (var && !subpass_data && !force_srv) + imgexpr = remap_swizzle(get(result_type), + image_format_to_components(get(var->basetype).image.format), imgexpr); + } + + if (var) + { + bool forward = forced_temporaries.find(id) == end(forced_temporaries); + auto &e = emit_op(result_type, id, imgexpr, forward); + + if (!pure) + { + e.loaded_from = var->self; + if (forward) + var->dependees.push_back(id); + } + } + else + emit_op(result_type, id, imgexpr, false); + + inherit_expression_dependencies(id, ops[2]); + if (type.image.ms) + inherit_expression_dependencies(id, ops[5]); + break; + } + + case OpImageWrite: + { + auto *var = maybe_get_backing_variable(ops[0]); + + // The underlying image type in HLSL depends on the image format, unlike GLSL, where all images are "vec4", + // except that the underlying type changes how the data is interpreted. + auto value_expr = to_expression(ops[2]); + if (var) + { + auto &type = get(var->basetype); + auto narrowed_type = get(type.image.type); + narrowed_type.vecsize = image_format_to_components(type.image.format); + value_expr = remap_swizzle(narrowed_type, expression_type(ops[2]).vecsize, value_expr); + } + + statement(to_non_uniform_aware_expression(ops[0]), "[", to_expression(ops[1]), "] = ", value_expr, ";"); + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + break; + } + + case OpImageTexelPointer: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto expr = to_expression(ops[2]); + expr += join("[", to_expression(ops[3]), "]"); + auto &e = set(id, expr, result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); + break; + } + + case OpAtomicFAddEXT: + case OpAtomicFMinEXT: + case OpAtomicFMaxEXT: + SPIRV_CROSS_THROW("Floating-point atomics are not supported in HLSL."); + + case OpAtomicCompareExchange: + case OpAtomicExchange: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + case OpAtomicIAdd: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicLoad: + case OpAtomicStore: + { + emit_atomic(ops, instruction.length, opcode); + break; + } + + case OpControlBarrier: + case OpMemoryBarrier: + { + uint32_t memory; + uint32_t semantics; + + if (opcode == OpMemoryBarrier) + { + memory = evaluate_constant_u32(ops[0]); + semantics = evaluate_constant_u32(ops[1]); + } + else + { + memory = evaluate_constant_u32(ops[1]); + semantics = evaluate_constant_u32(ops[2]); + } + + if (memory == ScopeSubgroup) + { + // No Wave-barriers in HLSL. + break; + } + + // We only care about these flags, acquire/release and friends are not relevant to GLSL. + semantics = mask_relevant_memory_semantics(semantics); + + if (opcode == OpMemoryBarrier) + { + // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier + // does what we need, so we avoid redundant barriers. + const Instruction *next = get_next_instruction_in_block(instruction); + if (next && next->op == OpControlBarrier) + { + auto *next_ops = stream(*next); + uint32_t next_memory = evaluate_constant_u32(next_ops[1]); + uint32_t next_semantics = evaluate_constant_u32(next_ops[2]); + next_semantics = mask_relevant_memory_semantics(next_semantics); + + // There is no "just execution barrier" in HLSL. + // If there are no memory semantics for next instruction, we will imply group shared memory is synced. + if (next_semantics == 0) + next_semantics = MemorySemanticsWorkgroupMemoryMask; + + bool memory_scope_covered = false; + if (next_memory == memory) + memory_scope_covered = true; + else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) + { + // If we only care about workgroup memory, either Device or Workgroup scope is fine, + // scope does not have to match. + if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && + (memory == ScopeDevice || memory == ScopeWorkgroup)) + { + memory_scope_covered = true; + } + } + else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) + { + // The control barrier has device scope, but the memory barrier just has workgroup scope. + memory_scope_covered = true; + } + + // If we have the same memory scope, and all memory types are covered, we're good. + if (memory_scope_covered && (semantics & next_semantics) == semantics) + break; + } + } + + // We are synchronizing some memory or syncing execution, + // so we cannot forward any loads beyond the memory barrier. + if (semantics || opcode == OpControlBarrier) + { + assert(current_emitting_block); + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); + } + + if (opcode == OpControlBarrier) + { + // We cannot emit just execution barrier, for no memory semantics pick the cheapest option. + if (semantics == MemorySemanticsWorkgroupMemoryMask || semantics == 0) + statement("GroupMemoryBarrierWithGroupSync();"); + else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0) + statement("DeviceMemoryBarrierWithGroupSync();"); + else + statement("AllMemoryBarrierWithGroupSync();"); + } + else + { + if (semantics == MemorySemanticsWorkgroupMemoryMask) + statement("GroupMemoryBarrier();"); + else if (semantics != 0 && (semantics & MemorySemanticsWorkgroupMemoryMask) == 0) + statement("DeviceMemoryBarrier();"); + else + statement("AllMemoryBarrier();"); + } + break; + } + + case OpBitFieldInsert: + { + if (!requires_bitfield_insert) + { + requires_bitfield_insert = true; + force_recompile(); + } + + auto expr = join("spvBitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", + to_expression(ops[4]), ", ", to_expression(ops[5]), ")"); + + bool forward = + should_forward(ops[2]) && should_forward(ops[3]) && should_forward(ops[4]) && should_forward(ops[5]); + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::UInt, expr); + emit_op(ops[0], ops[1], expr, forward); + break; + } + + case OpBitFieldSExtract: + case OpBitFieldUExtract: + { + if (!requires_bitfield_extract) + { + requires_bitfield_extract = true; + force_recompile(); + } + + if (opcode == OpBitFieldSExtract) + HLSL_TFOP(spvBitfieldSExtract); + else + HLSL_TFOP(spvBitfieldUExtract); + break; + } + + case OpBitCount: + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "countbits", basetype, basetype); + break; + } + + case OpBitReverse: + HLSL_UFOP(reversebits); + break; + + case OpArrayLength: + { + auto *var = maybe_get_backing_variable(ops[2]); + if (!var) + SPIRV_CROSS_THROW("Array length must point directly to an SSBO block."); + + auto &type = get(var->basetype); + if (!has_decoration(type.self, DecorationBlock) && !has_decoration(type.self, DecorationBufferBlock)) + SPIRV_CROSS_THROW("Array length expression must point to a block type."); + + // This must be 32-bit uint, so we're good to go. + emit_uninitialized_temporary_expression(ops[0], ops[1]); + statement(to_non_uniform_aware_expression(ops[2]), ".GetDimensions(", to_expression(ops[1]), ");"); + uint32_t offset = type_struct_member_offset(type, ops[3]); + uint32_t stride = type_struct_member_array_stride(type, ops[3]); + statement(to_expression(ops[1]), " = (", to_expression(ops[1]), " - ", offset, ") / ", stride, ";"); + break; + } + + case OpIsHelperInvocationEXT: + if (hlsl_options.shader_model < 50 || get_entry_point().model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Helper Invocation input is only supported in PS 5.0 or higher."); + // Helper lane state with demote is volatile by nature. + // Do not forward this. + emit_op(ops[0], ops[1], "IsHelperLane()", false); + break; + + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1."); + break; // Nothing to do in the body + + case OpRayQueryInitializeKHR: + { + flush_variable_declaration(ops[0]); + + std::string ray_desc_name = get_unique_identifier(); + statement("RayDesc ", ray_desc_name, " = {", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", + to_expression(ops[6]), ", ", to_expression(ops[7]), "};"); + + statement(to_expression(ops[0]), ".TraceRayInline(", + to_expression(ops[1]), ", ", // acc structure + to_expression(ops[2]), ", ", // ray flags + to_expression(ops[3]), ", ", // mask + ray_desc_name, ");"); // ray + break; + } + case OpRayQueryProceedKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".Proceed()"), false); + break; + } + case OpRayQueryTerminateKHR: + { + flush_variable_declaration(ops[0]); + statement(to_expression(ops[0]), ".Abort();"); + break; + } + case OpRayQueryGenerateIntersectionKHR: + { + flush_variable_declaration(ops[0]); + statement(to_expression(ops[0]), ".CommitProceduralPrimitiveHit(", to_expression(ops[1]), ");"); + break; + } + case OpRayQueryConfirmIntersectionKHR: + { + flush_variable_declaration(ops[0]); + statement(to_expression(ops[0]), ".CommitNonOpaqueTriangleHit();"); + break; + } + case OpRayQueryGetIntersectionTypeKHR: + { + emit_rayquery_function(".CommittedStatus()", ".CandidateType()", ops); + break; + } + case OpRayQueryGetIntersectionTKHR: + { + emit_rayquery_function(".CommittedRayT()", ".CandidateTriangleRayT()", ops); + break; + } + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: + { + emit_rayquery_function(".CommittedInstanceID()", ".CandidateInstanceID()", ops); + break; + } + case OpRayQueryGetIntersectionInstanceIdKHR: + { + emit_rayquery_function(".CommittedInstanceIndex()", ".CandidateInstanceIndex()", ops); + break; + } + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: + { + emit_rayquery_function(".CommittedInstanceContributionToHitGroupIndex()", + ".CandidateInstanceContributionToHitGroupIndex()", ops); + break; + } + case OpRayQueryGetIntersectionGeometryIndexKHR: + { + emit_rayquery_function(".CommittedGeometryIndex()", + ".CandidateGeometryIndex()", ops); + break; + } + case OpRayQueryGetIntersectionPrimitiveIndexKHR: + { + emit_rayquery_function(".CommittedPrimitiveIndex()", ".CandidatePrimitiveIndex()", ops); + break; + } + case OpRayQueryGetIntersectionBarycentricsKHR: + { + emit_rayquery_function(".CommittedTriangleBarycentrics()", ".CandidateTriangleBarycentrics()", ops); + break; + } + case OpRayQueryGetIntersectionFrontFaceKHR: + { + emit_rayquery_function(".CommittedTriangleFrontFace()", ".CandidateTriangleFrontFace()", ops); + break; + } + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".CandidateProceduralPrimitiveNonOpaque()"), false); + break; + } + case OpRayQueryGetIntersectionObjectRayDirectionKHR: + { + emit_rayquery_function(".CommittedObjectRayDirection()", ".CandidateObjectRayDirection()", ops); + break; + } + case OpRayQueryGetIntersectionObjectRayOriginKHR: + { + flush_variable_declaration(ops[0]); + emit_rayquery_function(".CommittedObjectRayOrigin()", ".CandidateObjectRayOrigin()", ops); + break; + } + case OpRayQueryGetIntersectionObjectToWorldKHR: + { + emit_rayquery_function(".CommittedObjectToWorld4x3()", ".CandidateObjectToWorld4x3()", ops); + break; + } + case OpRayQueryGetIntersectionWorldToObjectKHR: + { + emit_rayquery_function(".CommittedWorldToObject4x3()", ".CandidateWorldToObject4x3()", ops); + break; + } + case OpRayQueryGetRayFlagsKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayFlags()"), false); + break; + } + case OpRayQueryGetRayTMinKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".RayTMin()"), false); + break; + } + case OpRayQueryGetWorldRayOriginKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayOrigin()"), false); + break; + } + case OpRayQueryGetWorldRayDirectionKHR: + { + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join(to_expression(ops[2]), ".WorldRayDirection()"), false); + break; + } + case OpSetMeshOutputsEXT: + { + statement("SetMeshOutputCounts(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); + break; + } + default: + CompilerGLSL::emit_instruction(instruction); + break; + } } void CompilerHLSL::require_texture_query_variant(uint32_t var_id) { - if (const auto *var = maybe_get_backing_variable(var_id)) - var_id = var->self; + if (const auto *var = maybe_get_backing_variable(var_id)) + var_id = var->self; - auto &type = expression_type(var_id); - bool uav = type.image.sampled == 2; - if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var_id, DecorationNonWritable)) - uav = false; + auto &type = expression_type(var_id); + bool uav = type.image.sampled == 2; + if (hlsl_options.nonwritable_uav_texture_as_srv && has_decoration(var_id, DecorationNonWritable)) + uav = false; - uint32_t bit = 0; - switch (type.image.dim) - { - case Dim1D: - bit = type.image.arrayed ? Query1DArray : Query1D; - break; + uint32_t bit = 0; + switch (type.image.dim) + { + case Dim1D: + bit = type.image.arrayed ? Query1DArray : Query1D; + break; - case Dim2D: - if (type.image.ms) - bit = type.image.arrayed ? Query2DMSArray : Query2DMS; - else - bit = type.image.arrayed ? Query2DArray : Query2D; - break; + case Dim2D: + if (type.image.ms) + bit = type.image.arrayed ? Query2DMSArray : Query2DMS; + else + bit = type.image.arrayed ? Query2DArray : Query2D; + break; - case Dim3D: - bit = Query3D; - break; + case Dim3D: + bit = Query3D; + break; - case DimCube: - bit = type.image.arrayed ? QueryCubeArray : QueryCube; - break; + case DimCube: + bit = type.image.arrayed ? QueryCubeArray : QueryCube; + break; - case DimBuffer: - bit = QueryBuffer; - break; + case DimBuffer: + bit = QueryBuffer; + break; - default: - SPIRV_CROSS_THROW("Unsupported query type."); - } + default: + SPIRV_CROSS_THROW("Unsupported query type."); + } - switch (get(type.image.type).basetype) - { - case SPIRType::Float: - bit += QueryTypeFloat; - break; + switch (get(type.image.type).basetype) + { + case SPIRType::Float: + bit += QueryTypeFloat; + break; - case SPIRType::Int: - bit += QueryTypeInt; - break; + case SPIRType::Int: + bit += QueryTypeInt; + break; - case SPIRType::UInt: - bit += QueryTypeUInt; - break; + case SPIRType::UInt: + bit += QueryTypeUInt; + break; - default: - SPIRV_CROSS_THROW("Unsupported query type."); - } + default: + SPIRV_CROSS_THROW("Unsupported query type."); + } - auto norm_state = image_format_to_normalized_state(type.image.format); - auto &variant = uav ? required_texture_size_variants - .uav[uint32_t(norm_state)][image_format_to_components(type.image.format) - 1] : - required_texture_size_variants.srv; + auto norm_state = image_format_to_normalized_state(type.image.format); + auto &variant = uav ? required_texture_size_variants + .uav[uint32_t(norm_state)][image_format_to_components(type.image.format) - 1] : + required_texture_size_variants.srv; - uint64_t mask = 1ull << bit; - if ((variant & mask) == 0) - { - force_recompile(); - variant |= mask; - } + uint64_t mask = 1ull << bit; + if ((variant & mask) == 0) + { + force_recompile(); + variant |= mask; + } } void CompilerHLSL::set_root_constant_layouts(std::vector layout) { - root_constants_layout = std::move(layout); + root_constants_layout = std::move(layout); } void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &vertex_attributes) { - remap_vertex_attributes.push_back(vertex_attributes); + remap_vertex_attributes.push_back(vertex_attributes); } VariableID CompilerHLSL::remap_num_workgroups_builtin() { - update_active_builtins(); + update_active_builtins(); - if (!active_input_builtins.get(BuiltInNumWorkgroups)) - return 0; + if (!active_input_builtins.get(BuiltInNumWorkgroups)) + return 0; - // Create a new, fake UBO. - uint32_t offset = ir.increase_bound_by(4); + // Create a new, fake UBO. + uint32_t offset = ir.increase_bound_by(4); - uint32_t uint_type_id = offset; - uint32_t block_type_id = offset + 1; - uint32_t block_pointer_type_id = offset + 2; - uint32_t variable_id = offset + 3; + uint32_t uint_type_id = offset; + uint32_t block_type_id = offset + 1; + uint32_t block_pointer_type_id = offset + 2; + uint32_t variable_id = offset + 3; - SPIRType uint_type { OpTypeVector }; - uint_type.basetype = SPIRType::UInt; - uint_type.width = 32; - uint_type.vecsize = 3; - uint_type.columns = 1; - set(uint_type_id, uint_type); + SPIRType uint_type { OpTypeVector }; + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + uint_type.vecsize = 3; + uint_type.columns = 1; + set(uint_type_id, uint_type); - SPIRType block_type { OpTypeStruct }; - block_type.basetype = SPIRType::Struct; - block_type.member_types.push_back(uint_type_id); - set(block_type_id, block_type); - set_decoration(block_type_id, DecorationBlock); - set_member_name(block_type_id, 0, "count"); - set_member_decoration(block_type_id, 0, DecorationOffset, 0); + SPIRType block_type { OpTypeStruct }; + block_type.basetype = SPIRType::Struct; + block_type.member_types.push_back(uint_type_id); + set(block_type_id, block_type); + set_decoration(block_type_id, DecorationBlock); + set_member_name(block_type_id, 0, "count"); + set_member_decoration(block_type_id, 0, DecorationOffset, 0); - SPIRType block_pointer_type = block_type; - block_pointer_type.pointer = true; - block_pointer_type.storage = StorageClassUniform; - block_pointer_type.parent_type = block_type_id; - auto &ptr_type = set(block_pointer_type_id, block_pointer_type); + SPIRType block_pointer_type = block_type; + block_pointer_type.pointer = true; + block_pointer_type.storage = StorageClassUniform; + block_pointer_type.parent_type = block_type_id; + auto &ptr_type = set(block_pointer_type_id, block_pointer_type); - // Preserve self. - ptr_type.self = block_type_id; + // Preserve self. + ptr_type.self = block_type_id; - set(variable_id, block_pointer_type_id, StorageClassUniform); - ir.meta[variable_id].decoration.alias = "SPIRV_Cross_NumWorkgroups"; + set(variable_id, block_pointer_type_id, StorageClassUniform); + ir.meta[variable_id].decoration.alias = "SPIRV_Cross_NumWorkgroups"; - num_workgroups_builtin = variable_id; - get_entry_point().interface_variables.push_back(num_workgroups_builtin); - return variable_id; + num_workgroups_builtin = variable_id; + get_entry_point().interface_variables.push_back(num_workgroups_builtin); + return variable_id; } void CompilerHLSL::set_resource_binding_flags(HLSLBindingFlags flags) { - resource_binding_flags = flags; + resource_binding_flags = flags; } void CompilerHLSL::validate_shader_model() { - // Check for nonuniform qualifier. - // Instead of looping over all decorations to find this, just look at capabilities. - for (auto &cap : ir.declared_capabilities) - { - switch (cap) - { - case CapabilityShaderNonUniformEXT: - case CapabilityRuntimeDescriptorArrayEXT: - if (hlsl_options.shader_model < 51) - SPIRV_CROSS_THROW( - "Shader model 5.1 or higher is required to use bindless resources or NonUniformResourceIndex."); - break; + // Check for nonuniform qualifier. + // Instead of looping over all decorations to find this, just look at capabilities. + for (auto &cap : ir.declared_capabilities) + { + switch (cap) + { + case CapabilityShaderNonUniformEXT: + case CapabilityRuntimeDescriptorArrayEXT: + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW( + "Shader model 5.1 or higher is required to use bindless resources or NonUniformResourceIndex."); + break; - case CapabilityVariablePointers: - case CapabilityVariablePointersStorageBuffer: - SPIRV_CROSS_THROW("VariablePointers capability is not supported in HLSL."); + case CapabilityVariablePointers: + case CapabilityVariablePointersStorageBuffer: + SPIRV_CROSS_THROW("VariablePointers capability is not supported in HLSL."); - default: - break; - } - } + default: + break; + } + } - if (ir.addressing_model != AddressingModelLogical) - SPIRV_CROSS_THROW("Only Logical addressing model can be used with HLSL."); + if (ir.addressing_model != AddressingModelLogical) + SPIRV_CROSS_THROW("Only Logical addressing model can be used with HLSL."); - if (hlsl_options.enable_16bit_types && hlsl_options.shader_model < 62) - SPIRV_CROSS_THROW("Need at least shader model 6.2 when enabling native 16-bit type support."); + if (hlsl_options.enable_16bit_types && hlsl_options.shader_model < 62) + SPIRV_CROSS_THROW("Need at least shader model 6.2 when enabling native 16-bit type support."); } string CompilerHLSL::compile() { - ir.fixup_reserved_names(); + ir.fixup_reserved_names(); - // Do not deal with ES-isms like precision, older extensions and such. - options.es = false; - options.version = 450; - options.vulkan_semantics = true; - backend.float_literal_suffix = true; - backend.double_literal_suffix = false; - backend.long_long_literal_suffix = true; - backend.uint32_t_literal_suffix = true; - backend.int16_t_literal_suffix = ""; - backend.uint16_t_literal_suffix = "u"; - backend.basic_int_type = "int"; - backend.basic_uint_type = "uint"; - backend.demote_literal = "discard"; - backend.boolean_mix_function = ""; - backend.swizzle_is_function = false; - backend.shared_is_implied = true; - backend.unsized_array_supported = true; - backend.explicit_struct_type = false; - backend.use_initializer_list = true; - backend.use_constructor_splatting = false; - backend.can_swizzle_scalar = true; - backend.can_declare_struct_inline = false; - backend.can_declare_arrays_inline = false; - backend.can_return_array = false; - backend.nonuniform_qualifier = "NonUniformResourceIndex"; - backend.support_case_fallthrough = false; - backend.force_merged_mesh_block = get_execution_model() == ExecutionModelMeshEXT; - backend.force_gl_in_out_block = backend.force_merged_mesh_block; + // Do not deal with ES-isms like precision, older extensions and such. + options.es = false; + options.version = 450; + options.vulkan_semantics = true; + backend.float_literal_suffix = true; + backend.double_literal_suffix = false; + backend.long_long_literal_suffix = true; + backend.uint32_t_literal_suffix = true; + backend.int16_t_literal_suffix = ""; + backend.uint16_t_literal_suffix = "u"; + backend.basic_int_type = "int"; + backend.basic_uint_type = "uint"; + backend.demote_literal = "discard"; + backend.boolean_mix_function = ""; + backend.swizzle_is_function = false; + backend.shared_is_implied = true; + backend.unsized_array_supported = true; + backend.explicit_struct_type = false; + backend.use_initializer_list = true; + backend.use_constructor_splatting = false; + backend.can_swizzle_scalar = true; + backend.can_declare_struct_inline = false; + backend.can_declare_arrays_inline = false; + backend.can_return_array = false; + backend.nonuniform_qualifier = "NonUniformResourceIndex"; + backend.support_case_fallthrough = false; + backend.force_merged_mesh_block = get_execution_model() == ExecutionModelMeshEXT; + backend.force_gl_in_out_block = backend.force_merged_mesh_block; - // SM 4.1 does not support precise for some reason. - backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40; + // SM 4.1 does not support precise for some reason. + backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40; - fixup_anonymous_struct_names(); - fixup_type_alias(); - reorder_type_alias(); - build_function_control_flow_graphs_and_analyze(); - validate_shader_model(); - update_active_builtins(); - analyze_image_and_sampler_usage(); - analyze_interlocked_resource_usage(); - if (get_execution_model() == ExecutionModelMeshEXT) - analyze_meshlet_writes(); + fixup_anonymous_struct_names(); + fixup_type_alias(); + reorder_type_alias(); + build_function_control_flow_graphs_and_analyze(); + validate_shader_model(); + update_active_builtins(); + analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); + if (get_execution_model() == ExecutionModelMeshEXT) + analyze_meshlet_writes(); - // Subpass input needs SV_Position. - if (need_subpass_input) - active_input_builtins.set(BuiltInFragCoord); + // Subpass input needs SV_Position. + if (need_subpass_input) + active_input_builtins.set(BuiltInFragCoord); - // Need to offset by BaseVertex/BaseInstance in SM 6.8+. - if (hlsl_options.shader_model >= 68) - { - if (active_input_builtins.get(BuiltInVertexIndex)) - active_input_builtins.set(BuiltInBaseVertex); - if (active_input_builtins.get(BuiltInInstanceIndex)) - active_input_builtins.set(BuiltInBaseInstance); - } + // Need to offset by BaseVertex/BaseInstance in SM 6.8+. + if (hlsl_options.shader_model >= 68) + { + if (active_input_builtins.get(BuiltInVertexIndex)) + active_input_builtins.set(BuiltInBaseVertex); + if (active_input_builtins.get(BuiltInInstanceIndex)) + active_input_builtins.set(BuiltInBaseInstance); + } - uint32_t pass_count = 0; - do - { - reset(pass_count); + uint32_t pass_count = 0; + do + { + reset(pass_count); - // Move constructor for this type is broken on GCC 4.9 ... - buffer.reset(); + // Move constructor for this type is broken on GCC 4.9 ... + buffer.reset(); - emit_header(); - emit_resources(); + emit_header(); + emit_resources(); - emit_function(get(ir.default_entry_point), Bitset()); - emit_hlsl_entry_point(); + emit_function(get(ir.default_entry_point), Bitset()); + emit_hlsl_entry_point(); - pass_count++; - } while (is_forcing_recompilation()); + pass_count++; + } while (is_forcing_recompilation()); - // Entry point in HLSL is always main() for the time being. - get_entry_point().name = "main"; + // Entry point in HLSL is always main() for the time being. + get_entry_point().name = "main"; - return buffer.str(); + return buffer.str(); } void CompilerHLSL::emit_block_hints(const SPIRBlock &block) { - switch (block.hint) - { - case SPIRBlock::HintFlatten: - statement("[flatten]"); - break; - case SPIRBlock::HintDontFlatten: - statement("[branch]"); - break; - case SPIRBlock::HintUnroll: - statement("[unroll]"); - break; - case SPIRBlock::HintDontUnroll: - statement("[loop]"); - break; - default: - break; - } + switch (block.hint) + { + case SPIRBlock::HintFlatten: + statement("[flatten]"); + break; + case SPIRBlock::HintDontFlatten: + statement("[branch]"); + break; + case SPIRBlock::HintUnroll: + statement("[unroll]"); + break; + case SPIRBlock::HintDontUnroll: + statement("[loop]"); + break; + default: + break; + } } string CompilerHLSL::get_unique_identifier() { - return join("_", unique_identifier_count++, "ident"); + return join("_", unique_identifier_count++, "ident"); } void CompilerHLSL::add_hlsl_resource_binding(const HLSLResourceBinding &binding) { - StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding }; - resource_bindings[tuple] = { binding, false }; + StageSetBinding tuple = { binding.stage, binding.desc_set, binding.binding }; + resource_bindings[tuple] = { binding, false }; } bool CompilerHLSL::is_hlsl_resource_binding_used(ExecutionModel model, uint32_t desc_set, uint32_t binding) const { - StageSetBinding tuple = { model, desc_set, binding }; - auto itr = resource_bindings.find(tuple); - return itr != end(resource_bindings) && itr->second.second; + StageSetBinding tuple = { model, desc_set, binding }; + auto itr = resource_bindings.find(tuple); + return itr != end(resource_bindings) && itr->second.second; } CompilerHLSL::BitcastType CompilerHLSL::get_bitcast_type(uint32_t result_type, uint32_t op0) { - auto &rslt_type = get(result_type); - auto &expr_type = expression_type(op0); + auto &rslt_type = get(result_type); + auto &expr_type = expression_type(op0); - if (rslt_type.basetype == SPIRType::BaseType::UInt64 && expr_type.basetype == SPIRType::BaseType::UInt && - expr_type.vecsize == 2) - return BitcastType::TypePackUint2x32; - else if (rslt_type.basetype == SPIRType::BaseType::UInt && rslt_type.vecsize == 2 && - expr_type.basetype == SPIRType::BaseType::UInt64) - return BitcastType::TypeUnpackUint64; + if (rslt_type.basetype == SPIRType::BaseType::UInt64 && expr_type.basetype == SPIRType::BaseType::UInt && + expr_type.vecsize == 2) + return BitcastType::TypePackUint2x32; + else if (rslt_type.basetype == SPIRType::BaseType::UInt && rslt_type.vecsize == 2 && + expr_type.basetype == SPIRType::BaseType::UInt64) + return BitcastType::TypeUnpackUint64; - return BitcastType::TypeNormal; + return BitcastType::TypeNormal; } bool CompilerHLSL::is_hlsl_force_storage_buffer_as_uav(ID id) const { - if (hlsl_options.force_storage_buffer_as_uav) - { - return true; - } + if (hlsl_options.force_storage_buffer_as_uav) + { + return true; + } - const uint32_t desc_set = get_decoration(id, spv::DecorationDescriptorSet); - const uint32_t binding = get_decoration(id, spv::DecorationBinding); + const uint32_t desc_set = get_decoration(id, spv::DecorationDescriptorSet); + const uint32_t binding = get_decoration(id, spv::DecorationBinding); - return (force_uav_buffer_bindings.find({ desc_set, binding }) != force_uav_buffer_bindings.end()); + return (force_uav_buffer_bindings.find({ desc_set, binding }) != force_uav_buffer_bindings.end()); } void CompilerHLSL::set_hlsl_force_storage_buffer_as_uav(uint32_t desc_set, uint32_t binding) { - SetBindingPair pair = { desc_set, binding }; - force_uav_buffer_bindings.insert(pair); + SetBindingPair pair = { desc_set, binding }; + force_uav_buffer_bindings.insert(pair); } bool CompilerHLSL::is_user_type_structured(uint32_t id) const { - if (hlsl_options.preserve_structured_buffers) - { - // Compare left hand side of string only as these user types can contain more meta data such as their subtypes, - // e.g. "structuredbuffer:int" - const std::string &user_type = get_decoration_string(id, DecorationUserTypeGOOGLE); - return user_type.compare(0, 16, "structuredbuffer") == 0 || - user_type.compare(0, 18, "rwstructuredbuffer") == 0 || - user_type.compare(0, 33, "rasterizerorderedstructuredbuffer") == 0; - } - return false; + if (hlsl_options.preserve_structured_buffers) + { + // Compare left hand side of string only as these user types can contain more meta data such as their subtypes, + // e.g. "structuredbuffer:int" + const std::string &user_type = get_decoration_string(id, DecorationUserTypeGOOGLE); + return user_type.compare(0, 16, "structuredbuffer") == 0 || + user_type.compare(0, 18, "rwstructuredbuffer") == 0 || + user_type.compare(0, 33, "rasterizerorderedstructuredbuffer") == 0; + } + return false; }