/* * Copyright 2015-2019 Arm Limited * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "spirv_glsl.hpp" #include "GLSL.std.450.h" #include "spirv_common.hpp" #include #include #include #include #include #include #ifndef _WIN32 #include #endif #include using namespace spv; using namespace SPIRV_CROSS_NAMESPACE; using namespace std; static bool is_unsigned_opcode(Op op) { // Don't have to be exhaustive, only relevant for legacy target checking ... switch (op) { case OpShiftRightLogical: case OpUGreaterThan: case OpUGreaterThanEqual: case OpULessThan: case OpULessThanEqual: case OpUConvert: case OpUDiv: case OpUMod: case OpUMulExtended: case OpConvertUToF: case OpConvertFToU: return true; default: return false; } } static bool is_unsigned_glsl_opcode(GLSLstd450 op) { // Don't have to be exhaustive, only relevant for legacy target checking ... switch (op) { case GLSLstd450UClamp: case GLSLstd450UMin: case GLSLstd450UMax: case GLSLstd450FindUMsb: return true; default: return false; } } static bool packing_is_vec4_padded(BufferPackingStandard packing) { switch (packing) { case BufferPackingHLSLCbuffer: case BufferPackingHLSLCbufferPackOffset: case BufferPackingStd140: case BufferPackingStd140EnhancedLayout: return true; default: return false; } } static bool packing_is_hlsl(BufferPackingStandard packing) { switch (packing) { case BufferPackingHLSLCbuffer: case BufferPackingHLSLCbufferPackOffset: return true; default: return false; } } static bool packing_has_flexible_offset(BufferPackingStandard packing) { switch (packing) { case BufferPackingStd140: case BufferPackingStd430: case BufferPackingScalar: case BufferPackingHLSLCbuffer: return false; default: return true; } } static bool packing_is_scalar(BufferPackingStandard packing) { switch (packing) { case BufferPackingScalar: case BufferPackingScalarEnhancedLayout: return true; default: return false; } } static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing) { switch (packing) { case BufferPackingStd140EnhancedLayout: return BufferPackingStd140; case BufferPackingStd430EnhancedLayout: return BufferPackingStd430; case BufferPackingHLSLCbufferPackOffset: return BufferPackingHLSLCbuffer; case BufferPackingScalarEnhancedLayout: return BufferPackingScalar; default: return packing; } } // Sanitizes underscores for GLSL where multiple underscores in a row are not allowed. string CompilerGLSL::sanitize_underscores(const string &str) { string res; res.reserve(str.size()); bool last_underscore = false; for (auto c : str) { if (c == '_') { if (last_underscore) continue; res += c; last_underscore = true; } else { res += c; last_underscore = false; } } return res; } void CompilerGLSL::init() { if (ir.source.known) { options.es = ir.source.es; options.version = ir.source.version; } // Query the locale to see what the decimal point is. // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather // tricky. #ifdef _WIN32 // On Windows, localeconv uses thread-local storage, so it should be fine. const struct lconv *conv = localeconv(); if (conv && conv->decimal_point) current_locale_radix_character = *conv->decimal_point; #elif defined(__ANDROID__) && __ANDROID_API__ < 26 // nl_langinfo is not supported on this platform, fall back to the worse alternative. const struct lconv *conv = localeconv(); if (conv && conv->decimal_point) current_locale_radix_character = *conv->decimal_point; #else // localeconv, the portable function is not MT safe ... const char *decimal_point = nl_langinfo(RADIXCHAR); if (decimal_point && *decimal_point != '\0') current_locale_radix_character = *decimal_point; #endif } static const char *to_pls_layout(PlsFormat format) { switch (format) { case PlsR11FG11FB10F: return "layout(r11f_g11f_b10f) "; case PlsR32F: return "layout(r32f) "; case PlsRG16F: return "layout(rg16f) "; case PlsRGB10A2: return "layout(rgb10_a2) "; case PlsRGBA8: return "layout(rgba8) "; case PlsRG16: return "layout(rg16) "; case PlsRGBA8I: return "layout(rgba8i)"; case PlsRG16I: return "layout(rg16i) "; case PlsRGB10A2UI: return "layout(rgb10_a2ui) "; case PlsRGBA8UI: return "layout(rgba8ui) "; case PlsRG16UI: return "layout(rg16ui) "; case PlsR32UI: return "layout(r32ui) "; default: return ""; } } static SPIRType::BaseType pls_format_to_basetype(PlsFormat format) { switch (format) { default: case PlsR11FG11FB10F: case PlsR32F: case PlsRG16F: case PlsRGB10A2: case PlsRGBA8: case PlsRG16: return SPIRType::Float; case PlsRGBA8I: case PlsRG16I: return SPIRType::Int; case PlsRGB10A2UI: case PlsRGBA8UI: case PlsRG16UI: case PlsR32UI: return SPIRType::UInt; } } static uint32_t pls_format_to_components(PlsFormat format) { switch (format) { default: case PlsR32F: case PlsR32UI: return 1; case PlsRG16F: case PlsRG16: case PlsRG16UI: case PlsRG16I: return 2; case PlsR11FG11FB10F: return 3; case PlsRGB10A2: case PlsRGBA8: case PlsRGBA8I: case PlsRGB10A2UI: case PlsRGBA8UI: return 4; } } static const char *vector_swizzle(int vecsize, int index) { static const char *const swizzle[4][4] = { { ".x", ".y", ".z", ".w" }, { ".xy", ".yz", ".zw", nullptr }, { ".xyz", ".yzw", nullptr, nullptr }, #if defined(__GNUC__) && (__GNUC__ == 9) // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947. // This array ends up being compiled as all nullptrs, tripping the assertions below. { "", nullptr, nullptr, "$" }, #else { "", nullptr, nullptr, nullptr }, #endif }; assert(vecsize >= 1 && vecsize <= 4); assert(index >= 0 && index < 4); assert(swizzle[vecsize - 1][index]); return swizzle[vecsize - 1][index]; } void CompilerGLSL::reset() { // We do some speculative optimizations which should pretty much always work out, // but just in case the SPIR-V is rather weird, recompile until it's happy. // This typically only means one extra pass. clear_force_recompile(); // Clear invalid expression tracking. invalid_expressions.clear(); current_function = nullptr; // Clear temporary usage tracking. expression_usage_counts.clear(); forwarded_temporaries.clear(); reset_name_caches(); ir.for_each_typed_id([&](uint32_t, SPIRFunction &func) { func.active = false; func.flush_undeclared = true; }); ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); }); ir.reset_all_of_type(); ir.reset_all_of_type(); statement_count = 0; indent = 0; } void CompilerGLSL::remap_pls_variables() { for (auto &input : pls_inputs) { auto &var = get(input.id); bool input_is_target = false; if (var.storage == StorageClassUniformConstant) { auto &type = get(var.basetype); input_is_target = type.image.dim == DimSubpassData; } if (var.storage != StorageClassInput && !input_is_target) SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs."); var.remapped_variable = true; } for (auto &output : pls_outputs) { auto &var = get(output.id); if (var.storage != StorageClassOutput) SPIRV_CROSS_THROW("Can only use out variables for PLS outputs."); var.remapped_variable = true; } } void CompilerGLSL::find_static_extensions() { ir.for_each_typed_id([&](uint32_t, const SPIRType &type) { if (type.basetype == SPIRType::Double) { if (options.es) SPIRV_CROSS_THROW("FP64 not supported in ES profile."); if (!options.es && options.version < 400) require_extension_internal("GL_ARB_gpu_shader_fp64"); } else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) { if (options.es) SPIRV_CROSS_THROW("64-bit integers not supported in ES profile."); if (!options.es) require_extension_internal("GL_ARB_gpu_shader_int64"); } else if (type.basetype == SPIRType::Half) { require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16"); if (options.vulkan_semantics) require_extension_internal("GL_EXT_shader_16bit_storage"); } else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte) { require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8"); if (options.vulkan_semantics) require_extension_internal("GL_EXT_shader_8bit_storage"); } else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort) { require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16"); if (options.vulkan_semantics) require_extension_internal("GL_EXT_shader_16bit_storage"); } }); auto &execution = get_entry_point(); switch (execution.model) { case ExecutionModelGLCompute: if (!options.es && options.version < 430) require_extension_internal("GL_ARB_compute_shader"); if (options.es && options.version < 310) SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders."); break; case ExecutionModelGeometry: if (options.es && options.version < 320) require_extension_internal("GL_EXT_geometry_shader"); if (!options.es && options.version < 150) require_extension_internal("GL_ARB_geometry_shader4"); if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1) { // Instanced GS is part of 400 core or this extension. if (!options.es && options.version < 400) require_extension_internal("GL_ARB_gpu_shader5"); } break; case ExecutionModelTessellationEvaluation: case ExecutionModelTessellationControl: if (options.es && options.version < 320) require_extension_internal("GL_EXT_tessellation_shader"); if (!options.es && options.version < 400) require_extension_internal("GL_ARB_tessellation_shader"); break; case ExecutionModelRayGenerationNV: case ExecutionModelIntersectionNV: case ExecutionModelAnyHitNV: case ExecutionModelClosestHitNV: case ExecutionModelMissNV: case ExecutionModelCallableNV: if (options.es || options.version < 460) SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above."); require_extension_internal("GL_NV_ray_tracing"); break; default: break; } if (!pls_inputs.empty() || !pls_outputs.empty()) require_extension_internal("GL_EXT_shader_pixel_local_storage"); if (options.separate_shader_objects && !options.es && options.version < 410) require_extension_internal("GL_ARB_separate_shader_objects"); if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) { if (!options.vulkan_semantics) SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL."); if (options.es && options.version < 320) SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320."); else if (!options.es && options.version < 450) SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450."); require_extension_internal("GL_EXT_buffer_reference"); } else if (ir.addressing_model != AddressingModelLogical) { SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported."); } // Check for nonuniform qualifier. // Instead of looping over all decorations to find this, just look at capabilities. for (auto &cap : ir.declared_capabilities) { bool nonuniform_indexing = false; switch (cap) { case CapabilityShaderNonUniformEXT: case CapabilityRuntimeDescriptorArrayEXT: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL."); require_extension_internal("GL_EXT_nonuniform_qualifier"); nonuniform_indexing = true; break; default: break; } if (nonuniform_indexing) break; } } string CompilerGLSL::compile() { if (options.vulkan_semantics) backend.allow_precision_qualifiers = true; backend.force_gl_in_out_block = true; backend.supports_extensions = true; // Scan the SPIR-V to find trivial uses of extensions. fixup_type_alias(); reorder_type_alias(); build_function_control_flow_graphs_and_analyze(); find_static_extensions(); fixup_image_load_store_access(); update_active_builtins(); analyze_image_and_sampler_usage(); // Shaders might cast unrelated data to pointers of non-block types. // Find all such instances and make sure we can cast the pointers to a synthesized block type. if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) analyze_non_block_pointer_types(); uint32_t pass_count = 0; do { if (pass_count >= 3) SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); reset(); buffer.reset(); emit_header(); emit_resources(); emit_function(get(ir.default_entry_point), Bitset()); pass_count++; } while (is_forcing_recompilation()); // Entry point in GLSL is always main(). get_entry_point().name = "main"; return buffer.str(); } std::string CompilerGLSL::get_partial_source() { return buffer.str(); } void CompilerGLSL::build_workgroup_size(SmallVector &arguments, const SpecializationConstant &wg_x, const SpecializationConstant &wg_y, const SpecializationConstant &wg_z) { auto &execution = get_entry_point(); if (wg_x.id) { if (options.vulkan_semantics) arguments.push_back(join("local_size_x_id = ", wg_x.constant_id)); else arguments.push_back(join("local_size_x = ", get(wg_x.id).specialization_constant_macro_name)); } else arguments.push_back(join("local_size_x = ", execution.workgroup_size.x)); if (wg_y.id) { if (options.vulkan_semantics) arguments.push_back(join("local_size_y_id = ", wg_y.constant_id)); else arguments.push_back(join("local_size_y = ", get(wg_y.id).specialization_constant_macro_name)); } else arguments.push_back(join("local_size_y = ", execution.workgroup_size.y)); if (wg_z.id) { if (options.vulkan_semantics) arguments.push_back(join("local_size_z_id = ", wg_z.constant_id)); else arguments.push_back(join("local_size_z = ", get(wg_z.id).specialization_constant_macro_name)); } else arguments.push_back(join("local_size_z = ", execution.workgroup_size.z)); } void CompilerGLSL::emit_header() { auto &execution = get_entry_point(); statement("#version ", options.version, options.es && options.version > 100 ? " es" : ""); if (!options.es && options.version < 420) { // Needed for binding = # on UBOs, etc. if (options.enable_420pack_extension) { statement("#ifdef GL_ARB_shading_language_420pack"); statement("#extension GL_ARB_shading_language_420pack : require"); statement("#endif"); } // Needed for: layout(early_fragment_tests) in; if (execution.flags.get(ExecutionModeEarlyFragmentTests)) require_extension_internal("GL_ARB_shader_image_load_store"); } // Needed for: layout(post_depth_coverage) in; if (execution.flags.get(ExecutionModePostDepthCoverage)) require_extension_internal("GL_ARB_post_depth_coverage"); for (auto &ext : forced_extensions) { if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") { // Special case, this extension has a potential fallback to another vendor extension in normal GLSL. // GL_AMD_gpu_shader_half_float is a superset, so try that first. statement("#if defined(GL_AMD_gpu_shader_half_float)"); statement("#extension GL_AMD_gpu_shader_half_float : require"); if (!options.vulkan_semantics) { statement("#elif defined(GL_NV_gpu_shader5)"); statement("#extension GL_NV_gpu_shader5 : require"); } else { statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)"); statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require"); } statement("#else"); statement("#error No extension available for FP16."); statement("#endif"); } else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16") { if (options.vulkan_semantics) statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); else { statement("#if defined(GL_AMD_gpu_shader_int16)"); statement("#extension GL_AMD_gpu_shader_int16 : require"); statement("#else"); statement("#error No extension available for Int16."); statement("#endif"); } } else statement("#extension ", ext, " : require"); } for (auto &header : header_lines) statement(header); SmallVector inputs; SmallVector outputs; switch (execution.model) { case ExecutionModelGeometry: outputs.push_back(join("max_vertices = ", execution.output_vertices)); if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1) inputs.push_back(join("invocations = ", execution.invocations)); if (execution.flags.get(ExecutionModeInputPoints)) inputs.push_back("points"); if (execution.flags.get(ExecutionModeInputLines)) inputs.push_back("lines"); if (execution.flags.get(ExecutionModeInputLinesAdjacency)) inputs.push_back("lines_adjacency"); if (execution.flags.get(ExecutionModeTriangles)) inputs.push_back("triangles"); if (execution.flags.get(ExecutionModeInputTrianglesAdjacency)) inputs.push_back("triangles_adjacency"); if (execution.flags.get(ExecutionModeOutputTriangleStrip)) outputs.push_back("triangle_strip"); if (execution.flags.get(ExecutionModeOutputPoints)) outputs.push_back("points"); if (execution.flags.get(ExecutionModeOutputLineStrip)) outputs.push_back("line_strip"); break; case ExecutionModelTessellationControl: if (execution.flags.get(ExecutionModeOutputVertices)) outputs.push_back(join("vertices = ", execution.output_vertices)); break; case ExecutionModelTessellationEvaluation: if (execution.flags.get(ExecutionModeQuads)) inputs.push_back("quads"); if (execution.flags.get(ExecutionModeTriangles)) inputs.push_back("triangles"); if (execution.flags.get(ExecutionModeIsolines)) inputs.push_back("isolines"); if (execution.flags.get(ExecutionModePointMode)) inputs.push_back("point_mode"); if (!execution.flags.get(ExecutionModeIsolines)) { if (execution.flags.get(ExecutionModeVertexOrderCw)) inputs.push_back("cw"); if (execution.flags.get(ExecutionModeVertexOrderCcw)) inputs.push_back("ccw"); } if (execution.flags.get(ExecutionModeSpacingFractionalEven)) inputs.push_back("fractional_even_spacing"); if (execution.flags.get(ExecutionModeSpacingFractionalOdd)) inputs.push_back("fractional_odd_spacing"); if (execution.flags.get(ExecutionModeSpacingEqual)) inputs.push_back("equal_spacing"); break; case ExecutionModelGLCompute: { if (execution.workgroup_size.constant != 0) { SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro // declarations before we can emit the work group size. if (options.vulkan_semantics || ((wg_x.id == 0) && (wg_y.id == 0) && (wg_z.id == 0))) build_workgroup_size(inputs, wg_x, wg_y, wg_z); } else { inputs.push_back(join("local_size_x = ", execution.workgroup_size.x)); inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); } break; } case ExecutionModelFragment: if (options.es) { switch (options.fragment.default_float_precision) { case Options::Lowp: statement("precision lowp float;"); break; case Options::Mediump: statement("precision mediump float;"); break; case Options::Highp: statement("precision highp float;"); break; default: break; } switch (options.fragment.default_int_precision) { case Options::Lowp: statement("precision lowp int;"); break; case Options::Mediump: statement("precision mediump int;"); break; case Options::Highp: statement("precision highp int;"); break; default: break; } } if (execution.flags.get(ExecutionModeEarlyFragmentTests)) inputs.push_back("early_fragment_tests"); if (execution.flags.get(ExecutionModePostDepthCoverage)) inputs.push_back("post_depth_coverage"); if (!options.es && execution.flags.get(ExecutionModeDepthGreater)) statement("layout(depth_greater) out float gl_FragDepth;"); else if (!options.es && execution.flags.get(ExecutionModeDepthLess)) statement("layout(depth_less) out float gl_FragDepth;"); break; default: break; } if (!inputs.empty()) statement("layout(", merge(inputs), ") in;"); if (!outputs.empty()) statement("layout(", merge(outputs), ") out;"); statement(""); } bool CompilerGLSL::type_is_empty(const SPIRType &type) { return type.basetype == SPIRType::Struct && type.member_types.empty(); } void CompilerGLSL::emit_struct(SPIRType &type) { // Struct types can be stamped out multiple times // with just different offsets, matrix layouts, etc ... // Type-punning with these types is legal, which complicates things // when we are storing struct and array types in an SSBO for example. // If the type master is packed however, we can no longer assume that the struct declaration will be redundant. if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) return; add_resource_name(type.self); auto name = type_to_glsl(type); statement(!backend.explicit_struct_type ? "struct " : "", name); begin_scope(); type.member_name_cache.clear(); uint32_t i = 0; bool emitted = false; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; emitted = true; } // Don't declare empty structs in GLSL, this is not allowed. if (type_is_empty(type) && !backend.supports_empty_struct) { statement("int empty_struct_member;"); emitted = true; } if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget)) emit_struct_padding_target(type); end_scope_decl(); if (emitted) statement(""); } string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) { string res; //if (flags & (1ull << DecorationSmooth)) // res += "smooth "; if (flags.get(DecorationFlat)) res += "flat "; if (flags.get(DecorationNoPerspective)) res += "noperspective "; if (flags.get(DecorationCentroid)) res += "centroid "; if (flags.get(DecorationPatch)) res += "patch "; if (flags.get(DecorationSample)) res += "sample "; if (flags.get(DecorationInvariant)) res += "invariant "; if (flags.get(DecorationExplicitInterpAMD)) res += "__explicitInterpAMD "; return res; } string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) { if (is_legacy()) return ""; bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); if (!is_block) return ""; auto &memb = ir.meta[type.self].members; if (index >= memb.size()) return ""; auto &dec = memb[index]; SmallVector attr; // We can only apply layouts on members in block interfaces. // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL. // // We would like to go from (SPIR-V style): // // struct Foo { layout(row_major) mat4 matrix; }; // buffer UBO { Foo foo; }; // // to // // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations. // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level. auto flags = combined_decoration_for_member(type, index); if (flags.get(DecorationRowMajor)) attr.push_back("row_major"); // We don't emit any global layouts, so column_major is default. //if (flags & (1ull << DecorationColMajor)) // attr.push_back("column_major"); if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true)) attr.push_back(join("location = ", dec.location)); // Can only declare component if we can declare location. if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true)) { if (!options.es) { if (options.version < 440 && options.version >= 140) require_extension_internal("GL_ARB_enhanced_layouts"); else if (options.version < 140) SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); attr.push_back(join("component = ", dec.component)); } else SPIRV_CROSS_THROW("Component decoration is not supported in ES targets."); } // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. // This is only done selectively in GLSL as needed. if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) && dec.decoration_flags.get(DecorationOffset)) attr.push_back(join("offset = ", dec.offset)); if (attr.empty()) return ""; string res = "layout("; res += merge(attr); res += ") "; return res; } const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) { if (options.es && is_desktop_only_format(format)) SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile."); switch (format) { case ImageFormatRgba32f: return "rgba32f"; case ImageFormatRgba16f: return "rgba16f"; case ImageFormatR32f: return "r32f"; case ImageFormatRgba8: return "rgba8"; case ImageFormatRgba8Snorm: return "rgba8_snorm"; case ImageFormatRg32f: return "rg32f"; case ImageFormatRg16f: return "rg16f"; case ImageFormatRgba32i: return "rgba32i"; case ImageFormatRgba16i: return "rgba16i"; case ImageFormatR32i: return "r32i"; case ImageFormatRgba8i: return "rgba8i"; case ImageFormatRg32i: return "rg32i"; case ImageFormatRg16i: return "rg16i"; case ImageFormatRgba32ui: return "rgba32ui"; case ImageFormatRgba16ui: return "rgba16ui"; case ImageFormatR32ui: return "r32ui"; case ImageFormatRgba8ui: return "rgba8ui"; case ImageFormatRg32ui: return "rg32ui"; case ImageFormatRg16ui: return "rg16ui"; case ImageFormatR11fG11fB10f: return "r11f_g11f_b10f"; case ImageFormatR16f: return "r16f"; case ImageFormatRgb10A2: return "rgb10_a2"; case ImageFormatR8: return "r8"; case ImageFormatRg8: return "rg8"; case ImageFormatR16: return "r16"; case ImageFormatRg16: return "rg16"; case ImageFormatRgba16: return "rgba16"; case ImageFormatR16Snorm: return "r16_snorm"; case ImageFormatRg16Snorm: return "rg16_snorm"; case ImageFormatRgba16Snorm: return "rgba16_snorm"; case ImageFormatR8Snorm: return "r8_snorm"; case ImageFormatRg8Snorm: return "rg8_snorm"; case ImageFormatR8ui: return "r8ui"; case ImageFormatRg8ui: return "rg8ui"; case ImageFormatR16ui: return "r16ui"; case ImageFormatRgb10a2ui: return "rgb10_a2ui"; case ImageFormatR8i: return "r8i"; case ImageFormatRg8i: return "rg8i"; case ImageFormatR16i: return "r16i"; default: case ImageFormatUnknown: return nullptr; } } uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard) { switch (type.basetype) { case SPIRType::Double: case SPIRType::Int64: case SPIRType::UInt64: return 8; case SPIRType::Float: case SPIRType::Int: case SPIRType::UInt: return 4; case SPIRType::Half: case SPIRType::Short: case SPIRType::UShort: return 2; case SPIRType::SByte: case SPIRType::UByte: return 1; default: SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size."); } } uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) { // If using PhysicalStorageBufferEXT storage class, this is a pointer, // and is 64-bit. if (type.storage == StorageClassPhysicalStorageBufferEXT) { if (!type.pointer) SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) { if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type)) return 16; else return 8; } else SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); } if (!type.array.empty()) { uint32_t minimum_alignment = 1; if (packing_is_vec4_padded(packing)) minimum_alignment = 16; auto *tmp = &get(type.parent_type); while (!tmp->array.empty()) tmp = &get(tmp->parent_type); // Get the alignment of the base type, then maybe round up. return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing)); } if (type.basetype == SPIRType::Struct) { // Rule 9. Structs alignments are maximum alignment of its members. uint32_t alignment = 1; for (uint32_t i = 0; i < type.member_types.size(); i++) { auto member_flags = ir.meta[type.self].members[i].decoration_flags; alignment = max(alignment, type_to_packed_alignment(get(type.member_types[i]), member_flags, packing)); } // In std140, struct alignment is rounded up to 16. if (packing_is_vec4_padded(packing)) alignment = max(alignment, 16u); return alignment; } else { const uint32_t base_alignment = type_to_packed_base_size(type, packing); // Alignment requirement for scalar block layout is always the alignment for the most basic component. if (packing_is_scalar(packing)) return base_alignment; // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle // a vec4, this is handled outside since that part knows our current offset. if (type.columns == 1 && packing_is_hlsl(packing)) return base_alignment; // From 7.6.2.2 in GL 4.5 core spec. // Rule 1 if (type.vecsize == 1 && type.columns == 1) return base_alignment; // Rule 2 if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1) return type.vecsize * base_alignment; // Rule 3 if (type.vecsize == 3 && type.columns == 1) return 4 * base_alignment; // Rule 4 implied. Alignment does not change in std430. // Rule 5. Column-major matrices are stored as arrays of // vectors. if (flags.get(DecorationColMajor) && type.columns > 1) { if (packing_is_vec4_padded(packing)) return 4 * base_alignment; else if (type.vecsize == 3) return 4 * base_alignment; else return type.vecsize * base_alignment; } // Rule 6 implied. // Rule 7. if (flags.get(DecorationRowMajor) && type.vecsize > 1) { if (packing_is_vec4_padded(packing)) return 4 * base_alignment; else if (type.columns == 3) return 4 * base_alignment; else return type.columns * base_alignment; } // Rule 8 implied. } SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?"); } uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) { // Array stride is equal to aligned size of the underlying type. uint32_t parent = type.parent_type; assert(parent); auto &tmp = get(parent); uint32_t size = type_to_packed_size(tmp, flags, packing); if (tmp.array.empty()) { uint32_t alignment = type_to_packed_alignment(type, flags, packing); return (size + alignment - 1) & ~(alignment - 1); } else { // For multidimensional arrays, array stride always matches size of subtype. // The alignment cannot change because multidimensional arrays are basically N * M array elements. return size; } } uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) { if (!type.array.empty()) { return to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing); } // If using PhysicalStorageBufferEXT storage class, this is a pointer, // and is 64-bit. if (type.storage == StorageClassPhysicalStorageBufferEXT) { if (!type.pointer) SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) return 8; else SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); } uint32_t size = 0; if (type.basetype == SPIRType::Struct) { uint32_t pad_alignment = 1; for (uint32_t i = 0; i < type.member_types.size(); i++) { auto member_flags = ir.meta[type.self].members[i].decoration_flags; auto &member_type = get(type.member_types[i]); uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing); uint32_t alignment = max(packed_alignment, pad_alignment); // The next member following a struct member is aligned to the base alignment of the struct that came before. // GL 4.5 spec, 7.6.2.2. if (member_type.basetype == SPIRType::Struct) pad_alignment = packed_alignment; else pad_alignment = 1; size = (size + alignment - 1) & ~(alignment - 1); size += type_to_packed_size(member_type, member_flags, packing); } } else { const uint32_t base_alignment = type_to_packed_base_size(type, packing); if (packing_is_scalar(packing)) { size = type.vecsize * type.columns * base_alignment; } else { if (type.columns == 1) size = type.vecsize * base_alignment; if (flags.get(DecorationColMajor) && type.columns > 1) { if (packing_is_vec4_padded(packing)) size = type.columns * 4 * base_alignment; else if (type.vecsize == 3) size = type.columns * 4 * base_alignment; else size = type.columns * type.vecsize * base_alignment; } if (flags.get(DecorationRowMajor) && type.vecsize > 1) { if (packing_is_vec4_padded(packing)) size = type.vecsize * 4 * base_alignment; else if (type.columns == 3) size = type.vecsize * 4 * base_alignment; else size = type.vecsize * type.columns * base_alignment; } } } return size; } bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset, uint32_t end_offset) { // This is very tricky and error prone, but try to be exhaustive and correct here. // SPIR-V doesn't directly say if we're using std430 or std140. // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters), // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information. // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing). // // It is almost certain that we're using std430, but it gets tricky with arrays in particular. // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430. // // The only two differences between std140 and std430 are related to padding alignment/array stride // in arrays and structs. In std140 they take minimum vec4 alignment. // std430 only removes the vec4 requirement. uint32_t offset = 0; uint32_t pad_alignment = 1; bool is_top_level_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); for (uint32_t i = 0; i < type.member_types.size(); i++) { auto &memb_type = get(type.member_types[i]); auto member_flags = ir.meta[type.self].members[i].decoration_flags; // Verify alignment rules. uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing); // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g: // layout(constant_id = 0) const int s = 10; // const int S = s + 5; // SpecConstantOp // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here, // we would need full implementation of compile-time constant folding. :( // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant // for our analysis (e.g. unsized arrays). // This lets us simply ignore that there are spec constant op sized arrays in our buffers. // Querying size of this member will fail, so just don't call it unless we have to. // // This is likely "best effort" we can support without going into unacceptably complicated workarounds. bool member_can_be_unsized = is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty(); uint32_t packed_size = 0; if (!member_can_be_unsized) packed_size = type_to_packed_size(memb_type, member_flags, packing); // We only need to care about this if we have non-array types which can straddle the vec4 boundary. if (packing_is_hlsl(packing)) { // If a member straddles across a vec4 boundary, alignment is actually vec4. uint32_t begin_word = offset / 16; uint32_t end_word = (offset + packed_size - 1) / 16; if (begin_word != end_word) packed_alignment = max(packed_alignment, 16u); } uint32_t alignment = max(packed_alignment, pad_alignment); offset = (offset + alignment - 1) & ~(alignment - 1); // Field is not in the specified range anymore and we can ignore any further fields. if (offset >= end_offset) break; // The next member following a struct member is aligned to the base alignment of the struct that came before. // GL 4.5 spec, 7.6.2.2. if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer) pad_alignment = packed_alignment; else pad_alignment = 1; // Only care about packing if we are in the given range if (offset >= start_offset) { uint32_t actual_offset = type_struct_member_offset(type, i); // We only care about offsets in std140, std430, etc ... // For EnhancedLayout variants, we have the flexibility to choose our own offsets. if (!packing_has_flexible_offset(packing)) { if (actual_offset != offset) // This cannot be the packing we're looking for. return false; } else if ((actual_offset & (alignment - 1)) != 0) { // We still need to verify that alignment rules are observed, even if we have explicit offset. return false; } // Verify array stride rules. if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) != type_struct_member_array_stride(type, i)) return false; // Verify that sub-structs also follow packing rules. // We cannot use enhanced layouts on substructs, so they better be up to spec. auto substruct_packing = packing_to_substruct_packing(packing); if (!memb_type.pointer && !memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing)) { return false; } } // Bump size. offset += packed_size; } return true; } bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block) { // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL. // Be very explicit here about how to solve the issue. if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) || (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput)) { uint32_t minimum_desktop_version = block ? 440 : 410; // ARB_enhanced_layouts vs ARB_separate_shader_objects ... if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects) return false; else if (options.es && options.version < 310) return false; } if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) || (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput)) { if (options.es && options.version < 300) return false; else if (!options.es && options.version < 330) return false; } if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant) { if (options.es && options.version < 310) return false; else if (!options.es && options.version < 430) return false; } return true; } string CompilerGLSL::layout_for_variable(const SPIRVariable &var) { // FIXME: Come up with a better solution for when to disable layouts. // Having layouts depend on extensions as well as which types // of layouts are used. For now, the simple solution is to just disable // layouts for legacy versions. if (is_legacy()) return ""; SmallVector attr; auto &dec = ir.meta[var.self].decoration; auto &type = get(var.basetype); auto &flags = dec.decoration_flags; auto typeflags = ir.meta[type.self].decoration.decoration_flags; if (options.vulkan_semantics && var.storage == StorageClassPushConstant) attr.push_back("push_constant"); else if (var.storage == StorageClassShaderRecordBufferNV) attr.push_back("shaderRecordNV"); if (flags.get(DecorationRowMajor)) attr.push_back("row_major"); if (flags.get(DecorationColMajor)) attr.push_back("column_major"); if (options.vulkan_semantics) { if (flags.get(DecorationInputAttachmentIndex)) attr.push_back(join("input_attachment_index = ", dec.input_attachment)); } bool is_block = has_decoration(type.self, DecorationBlock); if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block)) { Bitset combined_decoration; for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++) combined_decoration.merge_or(combined_decoration_for_member(type, i)); // If our members have location decorations, we don't need to // emit location decorations at the top as well (looks weird). if (!combined_decoration.get(DecorationLocation)) attr.push_back(join("location = ", dec.location)); } // Can only declare Component if we can declare location. if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block)) { if (!options.es) { if (options.version < 440 && options.version >= 140) require_extension_internal("GL_ARB_enhanced_layouts"); else if (options.version < 140) SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); attr.push_back(join("component = ", dec.component)); } else SPIRV_CROSS_THROW("Component decoration is not supported in ES targets."); } if (flags.get(DecorationIndex)) attr.push_back(join("index = ", dec.index)); // Do not emit set = decoration in regular GLSL output, but // we need to preserve it in Vulkan GLSL mode. if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferNV) { if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics) attr.push_back(join("set = ", dec.set)); } bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV || (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock)); bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer; bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock); // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ... bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140); // pretend no UBOs when options say so if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms) can_use_buffer_blocks = false; bool can_use_binding; if (options.es) can_use_binding = options.version >= 310; else can_use_binding = options.enable_420pack_extension || (options.version >= 420); // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30. if (!can_use_buffer_blocks && var.storage == StorageClassUniform) can_use_binding = false; if (var.storage == StorageClassShaderRecordBufferNV) can_use_binding = false; if (can_use_binding && flags.get(DecorationBinding)) attr.push_back(join("binding = ", dec.binding)); if (flags.get(DecorationOffset)) attr.push_back(join("offset = ", dec.offset)); // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. // If SPIR-V does not comply with either layout, we cannot really work around it. if (can_use_buffer_blocks && (ubo_block || emulated_ubo)) { attr.push_back(buffer_to_packing_standard(type, false)); } else if (can_use_buffer_blocks && (push_constant_block || ssbo_block)) { attr.push_back(buffer_to_packing_standard(type, true)); } // For images, the type itself adds a layout qualifer. // Only emit the format for storage images. if (type.basetype == SPIRType::Image && type.image.sampled == 2) { const char *fmt = format_to_glsl(type.image.format); if (fmt) attr.push_back(fmt); } if (attr.empty()) return ""; string res = "layout("; res += merge(attr); res += ") "; return res; } string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, bool support_std430_without_scalar_layout) { if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430)) return "std430"; else if (buffer_is_packing_standard(type, BufferPackingStd140)) return "std140"; else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar)) { require_extension_internal("GL_EXT_scalar_block_layout"); return "scalar"; } else if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { if (options.es && !options.vulkan_semantics) SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " "not support GL_ARB_enhanced_layouts."); if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension_internal("GL_ARB_enhanced_layouts"); set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std430"; } else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) { // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. if (options.es && !options.vulkan_semantics) SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " "not support GL_ARB_enhanced_layouts."); if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension_internal("GL_ARB_enhanced_layouts"); set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); return "std140"; } else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout)) { set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); require_extension_internal("GL_EXT_scalar_block_layout"); return "scalar"; } else if (!support_std430_without_scalar_layout && options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingStd430)) { // UBOs can support std430 with GL_EXT_scalar_block_layout. require_extension_internal("GL_EXT_scalar_block_layout"); return "std430"; } else if (!support_std430_without_scalar_layout && options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { // UBOs can support std430 with GL_EXT_scalar_block_layout. set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); require_extension_internal("GL_EXT_scalar_block_layout"); return "std430"; } else { SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced " "layouts. You can try flattening this block to support a more flexible layout."); } } void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var) { if (flattened_buffer_blocks.count(var.self)) emit_buffer_block_flattened(var); else if (options.vulkan_semantics) emit_push_constant_block_vulkan(var); else if (options.emit_push_constant_as_uniform_buffer) emit_buffer_block_native(var); else emit_push_constant_block_glsl(var); } void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var) { emit_buffer_block(var); } void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) { // OpenGL has no concept of push constant blocks, implement it as a uniform struct. auto &type = get(var.basetype); auto &flags = ir.meta[var.self].decoration.decoration_flags; flags.clear(DecorationBinding); flags.clear(DecorationDescriptorSet); #if 0 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " "Remap to location with reflection API first or disable these decorations."); #endif // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. auto &block_flags = ir.meta[type.self].decoration.decoration_flags; bool block_flag = block_flags.get(DecorationBlock); block_flags.clear(DecorationBlock); emit_struct(type); if (block_flag) block_flags.set(DecorationBlock); emit_uniform(var); statement(""); } void CompilerGLSL::emit_buffer_block(const SPIRVariable &var) { auto &type = get(var.basetype); bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock); if (flattened_buffer_blocks.count(var.self)) emit_buffer_block_flattened(var); else if (is_legacy() || (!options.es && options.version == 130) || (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)) emit_buffer_block_legacy(var); else emit_buffer_block_native(var); } void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var) { auto &type = get(var.basetype); bool ssbo = var.storage == StorageClassStorageBuffer || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); if (ssbo) SPIRV_CROSS_THROW("SSBOs not supported in legacy targets."); // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. auto &block_flags = ir.meta[type.self].decoration.decoration_flags; bool block_flag = block_flags.get(DecorationBlock); block_flags.clear(DecorationBlock); emit_struct(type); if (block_flag) block_flags.set(DecorationBlock); emit_uniform(var); statement(""); } void CompilerGLSL::emit_buffer_reference_block(SPIRType &type, bool forward_declaration) { string buffer_name; if (forward_declaration) { // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration. // The names must match up. buffer_name = to_name(type.self, false); // Shaders never use the block by interface name, so we don't // have to track this other than updating name caches. // If we have a collision for any reason, just fallback immediately. if (ir.meta[type.self].decoration.alias.empty() || block_ssbo_names.find(buffer_name) != end(block_ssbo_names) || resource_names.find(buffer_name) != end(resource_names)) { buffer_name = join("_", type.self); } // Make sure we get something unique for both global name scope and block name scope. // See GLSL 4.5 spec: section 4.3.9 for details. add_variable(block_ssbo_names, resource_names, buffer_name); // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. // This cannot conflict with anything else, so we're safe now. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. if (buffer_name.empty()) buffer_name = join("_", type.self); block_names.insert(buffer_name); block_ssbo_names.insert(buffer_name); } else if (type.basetype != SPIRType::Struct) buffer_name = type_to_glsl(type); else buffer_name = to_name(type.self, false); if (!forward_declaration) { if (type.basetype == SPIRType::Struct) statement("layout(buffer_reference, ", buffer_to_packing_standard(type, true), ") buffer ", buffer_name); else statement("layout(buffer_reference) buffer ", buffer_name); begin_scope(); if (type.basetype == SPIRType::Struct) { type.member_name_cache.clear(); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; } } else { auto &pointee_type = get_pointee_type(type); statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type), ";"); } end_scope_decl(); statement(""); } else { statement("layout(buffer_reference) buffer ", buffer_name, ";"); } } void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) { auto &type = get(var.basetype); Bitset flags = ir.get_buffer_block_flags(var); bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferNV || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); bool is_restrict = ssbo && flags.get(DecorationRestrict); bool is_writeonly = ssbo && flags.get(DecorationNonReadable); bool is_readonly = ssbo && flags.get(DecorationNonWritable); bool is_coherent = ssbo && flags.get(DecorationCoherent); // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... auto buffer_name = to_name(type.self, false); auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names; // Shaders never use the block by interface name, so we don't // have to track this other than updating name caches. // If we have a collision for any reason, just fallback immediately. if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) || resource_names.find(buffer_name) != end(resource_names)) { buffer_name = get_block_fallback_name(var.self); } // Make sure we get something unique for both global name scope and block name scope. // See GLSL 4.5 spec: section 4.3.9 for details. add_variable(block_namespace, resource_names, buffer_name); // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. // This cannot conflict with anything else, so we're safe now. // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. if (buffer_name.empty()) buffer_name = join("_", get(var.basetype).self, "_", var.self); block_names.insert(buffer_name); block_namespace.insert(buffer_name); // Save for post-reflection later. declared_block_names[var.self] = buffer_name; statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "", is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ", buffer_name); begin_scope(); type.member_name_cache.clear(); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; } // var.self can be used as a backup name for the block name, // so we need to make sure we don't disturb the name here on a recompile. // It will need to be reset if we have to recompile. preserve_alias_on_reset(var.self); add_resource_name(var.self); end_scope_decl(to_name(var.self) + type_to_array_glsl(type)); statement(""); } void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var) { auto &type = get(var.basetype); // Block names should never alias. auto buffer_name = to_name(type.self, false); size_t buffer_size = (get_declared_struct_size(type) + 15) / 16; SPIRType::BaseType basic_type; if (get_common_basic_type(type, basic_type)) { SPIRType tmp; tmp.basetype = basic_type; tmp.vecsize = 4; if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt) SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint."); auto flags = ir.get_buffer_block_flags(var); statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[", buffer_size, "];"); } else SPIRV_CROSS_THROW("All basic types in a flattened block must be the same."); } const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) { auto &execution = get_entry_point(); if (var.storage == StorageClassInput || var.storage == StorageClassOutput) { if (is_legacy() && execution.model == ExecutionModelVertex) return var.storage == StorageClassInput ? "attribute " : "varying "; else if (is_legacy() && execution.model == ExecutionModelFragment) return "varying "; // Fragment outputs are renamed so they never hit this case. else return var.storage == StorageClassInput ? "in " : "out "; } else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || var.storage == StorageClassPushConstant) { return "uniform "; } else if (var.storage == StorageClassRayPayloadNV) { return "rayPayloadNV "; } else if (var.storage == StorageClassIncomingRayPayloadNV) { return "rayPayloadInNV "; } else if (var.storage == StorageClassHitAttributeNV) { return "hitAttributeNV "; } else if (var.storage == StorageClassCallableDataNV) { return "callableDataNV "; } else if (var.storage == StorageClassIncomingCallableDataNV) { return "callableDataInNV "; } return ""; } void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual) { auto &type = get(var.basetype); if (!type.array.empty()) SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings."); auto old_flags = ir.meta[type.self].decoration.decoration_flags; // Emit the members as if they are part of a block to get all qualifiers. ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock); type.member_name_cache.clear(); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); auto &membertype = get(member); if (membertype.basetype == SPIRType::Struct) SPIRV_CROSS_THROW("Cannot flatten struct inside structs in I/O variables."); // Pass in the varying qualifier here so it will appear in the correct declaration order. // Replace member name while emitting it so it encodes both struct name and member name. // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, // which is not allowed. auto backup_name = get_member_name(type.self, i); auto member_name = to_member_name(type, i); set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name))); emit_struct_member(type, member, i, qual); // Restore member name. set_member_name(type.self, i, member_name); i++; } ir.meta[type.self].decoration.decoration_flags = old_flags; // Treat this variable as flattened from now on. flattened_structs.insert(var.self); } void CompilerGLSL::emit_interface_block(const SPIRVariable &var) { auto &type = get(var.basetype); // Either make it plain in/out or in/out blocks depending on what shader is doing ... bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); const char *qual = to_storage_qualifiers_glsl(var); if (block) { // ESSL earlier than 310 and GLSL earlier than 150 did not support // I/O variables which are struct types. // To support this, flatten the struct into separate varyings instead. if ((options.es && options.version < 310) || (!options.es && options.version < 150)) { // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320. // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150). emit_flattened_io_block(var, qual); } else { if (options.es && options.version < 320) { // Geometry and tessellation extensions imply this extension. if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader")) require_extension_internal("GL_EXT_shader_io_blocks"); } // Block names should never alias. auto block_name = to_name(type.self, false); // The namespace for I/O blocks is separate from other variables in GLSL. auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names; // Shaders never use the block by interface name, so we don't // have to track this other than updating name caches. if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace)) block_name = get_fallback_name(type.self); else block_namespace.insert(block_name); // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. // This cannot conflict with anything else, so we're safe now. if (block_name.empty()) block_name = join("_", get(var.basetype).self, "_", var.self); // Instance names cannot alias block names. resource_names.insert(block_name); statement(layout_for_variable(var), qual, block_name); begin_scope(); type.member_name_cache.clear(); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; } add_resource_name(var.self); end_scope_decl(join(to_name(var.self), type_to_array_glsl(type))); statement(""); } } else { // ESSL earlier than 310 and GLSL earlier than 150 did not support // I/O variables which are struct types. // To support this, flatten the struct into separate varyings instead. if (type.basetype == SPIRType::Struct && ((options.es && options.version < 310) || (!options.es && options.version < 150))) { emit_flattened_io_block(var, qual); } else { add_resource_name(var.self); statement(layout_for_variable(var), to_qualifiers_glsl(var.self), variable_decl(type, to_name(var.self), var.self), ";"); // If a StorageClassOutput variable has an initializer, we need to initialize it in main(). if (var.storage == StorageClassOutput && var.initializer) { auto &entry_func = this->get(ir.default_entry_point); entry_func.fixup_hooks_in.push_back( [&]() { statement(to_name(var.self), " = ", to_expression(var.initializer), ";"); }); } } } } void CompilerGLSL::emit_uniform(const SPIRVariable &var) { auto &type = get(var.basetype); if (type.basetype == SPIRType::Image && type.image.sampled == 2) { if (!options.es && options.version < 420) require_extension_internal("GL_ARB_shader_image_load_store"); else if (options.es && options.version < 310) SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store."); } add_resource_name(var.self); statement(layout_for_variable(var), variable_decl(var), ";"); } string CompilerGLSL::constant_value_macro_name(uint32_t id) { return join("SPIRV_CROSS_CONSTANT_ID_", id); } void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant) { auto &type = get(constant.basetype); auto name = to_name(constant.self); statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";"); } void CompilerGLSL::emit_constant(const SPIRConstant &constant) { auto &type = get(constant.constant_type); auto name = to_name(constant.self); SpecializationConstant wg_x, wg_y, wg_z; uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); // This specialization constant is implicitly declared by emitting layout() in; if (constant.self == workgroup_size_id) return; // These specialization constants are implicitly declared by emitting layout() in; // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration // later can use macro overrides for work group size. bool is_workgroup_size_constant = constant.self == wg_x.id || constant.self == wg_y.id || constant.self == wg_z.id; if (options.vulkan_semantics && is_workgroup_size_constant) { // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout(). return; } else if (!options.vulkan_semantics && is_workgroup_size_constant && !has_decoration(constant.self, DecorationSpecId)) { // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros. return; } // Only scalars have constant IDs. if (has_decoration(constant.self, DecorationSpecId)) { if (options.vulkan_semantics) { statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ", variable_decl(type, name), " = ", constant_expression(constant), ";"); } else { const string ¯o_name = constant.specialization_constant_macro_name; statement("#ifndef ", macro_name); statement("#define ", macro_name, " ", constant_expression(constant)); statement("#endif"); // For workgroup size constants, only emit the macros. if (!is_workgroup_size_constant) statement("const ", variable_decl(type, name), " = ", macro_name, ";"); } } else { statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";"); } } void CompilerGLSL::emit_entry_point_declarations() { } void CompilerGLSL::replace_illegal_names() { // clang-format off static const unordered_set keywords = { "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh", "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement", "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor", "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse", "ceil", "cos", "cosh", "cross", "degrees", "dFdx", "dFdxCoarse", "dFdxFine", "dFdy", "dFdyCoarse", "dFdyFine", "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2", "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract", "frexp", "fwidth", "fwidthCoarse", "fwidthFine", "greaterThan", "greaterThanEqual", "groupMemoryBarrier", "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor", "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample", "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2", "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared", "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual", "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8", "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow", "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step", "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets", "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad", "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize", "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16", "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow", "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer", "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard", "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4", "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float", "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray", "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube", "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect", "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant", "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect", "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp", "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump", "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly", "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow", "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray", "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer", "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static", "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D", "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube", "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray", "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube", "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile", "while", "writeonly", }; // clang-format on ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { if (!is_hidden_variable(var)) { auto &m = ir.meta[var.self].decoration; if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords)) m.alias = join("_", m.alias); } }); } void CompilerGLSL::replace_fragment_output(SPIRVariable &var) { auto &m = ir.meta[var.self].decoration; uint32_t location = 0; if (m.decoration_flags.get(DecorationLocation)) location = m.location; // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will // do the access chain part of this for us. auto &type = get(var.basetype); if (type.array.empty()) { // Redirect the write to a specific render target in legacy GLSL. m.alias = join("gl_FragData[", location, "]"); if (is_legacy_es() && location != 0) require_extension_internal("GL_EXT_draw_buffers"); } else if (type.array.size() == 1) { // If location is non-zero, we probably have to add an offset. // This gets really tricky since we'd have to inject an offset in the access chain. // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now. m.alias = "gl_FragData"; if (location != 0) SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. " "This is unimplemented in SPIRV-Cross."); if (is_legacy_es()) require_extension_internal("GL_EXT_draw_buffers"); } else SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL."); var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is. } void CompilerGLSL::replace_fragment_outputs() { ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput) replace_fragment_output(var); }); } string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) { if (out_type.vecsize == input_components) return expr; else if (input_components == 1 && !backend.can_swizzle_scalar) return join(type_to_glsl(out_type), "(", expr, ")"); else { // FIXME: This will not work with packed expressions. auto e = enclose_expression(expr) + "."; // Just clamp the swizzle index if we have more outputs than inputs. for (uint32_t c = 0; c < out_type.vecsize; c++) e += index_to_swizzle(min(c, input_components - 1)); if (backend.swizzle_is_function && out_type.vecsize > 1) e += "()"; remove_duplicate_swizzle(e); return e; } } void CompilerGLSL::emit_pls() { auto &execution = get_entry_point(); if (execution.model != ExecutionModelFragment) SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders."); if (!options.es) SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES."); if (options.version < 300) SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above."); if (!pls_inputs.empty()) { statement("__pixel_local_inEXT _PLSIn"); begin_scope(); for (auto &input : pls_inputs) statement(pls_decl(input), ";"); end_scope_decl(); statement(""); } if (!pls_outputs.empty()) { statement("__pixel_local_outEXT _PLSOut"); begin_scope(); for (auto &output : pls_outputs) statement(pls_decl(output), ";"); end_scope_decl(); statement(""); } } void CompilerGLSL::fixup_image_load_store_access() { ir.for_each_typed_id([&](uint32_t var, const SPIRVariable &) { auto &vartype = expression_type(var); if (vartype.basetype == SPIRType::Image) { // Older glslangValidator does not emit required qualifiers here. // Solve this by making the image access as restricted as possible and loosen up if we need to. // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. auto &flags = ir.meta[var].decoration.decoration_flags; if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable)) { flags.set(DecorationNonWritable); flags.set(DecorationNonReadable); } } }); } void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model) { Bitset emitted_builtins; Bitset global_builtins; const SPIRVariable *block_var = nullptr; bool emitted_block = false; bool builtin_array = false; // Need to use declared size in the type. // These variables might have been declared, but not statically used, so we haven't deduced their size yet. uint32_t cull_distance_size = 0; uint32_t clip_distance_size = 0; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); bool block = has_decoration(type.self, DecorationBlock); Bitset builtins; if (var.storage == storage && block && is_builtin_variable(var)) { uint32_t index = 0; for (auto &m : ir.meta[type.self].members) { if (m.builtin) { builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) cull_distance_size = this->get(type.member_types[index]).array.front(); else if (m.builtin_type == BuiltInClipDistance) clip_distance_size = this->get(type.member_types[index]).array.front(); } index++; } } else if (var.storage == storage && !block && is_builtin_variable(var)) { // While we're at it, collect all declared global builtins (HLSL mostly ...). auto &m = ir.meta[var.self].decoration; if (m.builtin) { global_builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) cull_distance_size = type.array.front(); else if (m.builtin_type == BuiltInClipDistance) clip_distance_size = type.array.front(); } } if (builtins.empty()) return; if (emitted_block) SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block."); emitted_builtins = builtins; emitted_block = true; builtin_array = !type.array.empty(); block_var = &var; }); global_builtins = Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance))); // Try to collect all other declared builtins. if (!emitted_block) emitted_builtins = global_builtins; // Can't declare an empty interface block. if (emitted_builtins.empty()) return; if (storage == StorageClassOutput) statement("out gl_PerVertex"); else statement("in gl_PerVertex"); begin_scope(); if (emitted_builtins.get(BuiltInPosition)) statement("vec4 gl_Position;"); if (emitted_builtins.get(BuiltInPointSize)) statement("float gl_PointSize;"); if (emitted_builtins.get(BuiltInClipDistance)) statement("float gl_ClipDistance[", clip_distance_size, "];"); if (emitted_builtins.get(BuiltInCullDistance)) statement("float gl_CullDistance[", cull_distance_size, "];"); bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl; if (builtin_array) { // Make sure the array has a supported name in the code. if (storage == StorageClassOutput) set_name(block_var->self, "gl_out"); else if (storage == StorageClassInput) set_name(block_var->self, "gl_in"); if (model == ExecutionModelTessellationControl && storage == StorageClassOutput) end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]")); else end_scope_decl(join(to_name(block_var->self), tessellation ? "[gl_MaxPatchVertices]" : "[]")); } else end_scope_decl(); statement(""); } void CompilerGLSL::declare_undefined_values() { bool emitted = false; ir.for_each_typed_id([&](uint32_t, const SPIRUndef &undef) { statement(variable_decl(this->get(undef.basetype), to_name(undef.self), undef.self), ";"); emitted = true; }); if (emitted) statement(""); } bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const { bool statically_assigned = var.statically_assigned && var.static_expression != 0 && var.remapped_variable; if (statically_assigned) { auto *constant = maybe_get(var.static_expression); if (constant && constant->is_used_as_lut) return true; } return false; } void CompilerGLSL::emit_resources() { auto &execution = get_entry_point(); replace_illegal_names(); // Legacy GL uses gl_FragData[], redeclare all fragment outputs // with builtins. if (execution.model == ExecutionModelFragment && is_legacy()) replace_fragment_outputs(); // Emit PLS blocks if we have such variables. if (!pls_inputs.empty() || !pls_outputs.empty()) emit_pls(); // Emit custom gl_PerVertex for SSO compatibility. if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment) { switch (execution.model) { case ExecutionModelGeometry: case ExecutionModelTessellationControl: case ExecutionModelTessellationEvaluation: emit_declared_builtin_block(StorageClassInput, execution.model); emit_declared_builtin_block(StorageClassOutput, execution.model); break; case ExecutionModelVertex: emit_declared_builtin_block(StorageClassOutput, execution.model); break; default: break; } } else { // Need to redeclare clip/cull distance with explicit size to use them. // SPIR-V mandates these builtins have a size declared. const char *storage = execution.model == ExecutionModelFragment ? "in" : "out"; if (clip_distance_count != 0) statement(storage, " float gl_ClipDistance[", clip_distance_count, "];"); if (cull_distance_count != 0) statement(storage, " float gl_CullDistance[", cull_distance_count, "];"); if (clip_distance_count != 0 || cull_distance_count != 0) statement(""); } if (position_invariant) { statement("invariant gl_Position;"); statement(""); } bool emitted = false; // If emitted Vulkan GLSL, // emit specialization constants as actual floats, // spec op expressions will redirect to the constant name. // { auto loop_lock = ir.create_loop_hard_lock(); for (auto &id_ : ir.ids_for_constant_or_type) { auto &id = ir.ids[id_]; if (id.get_type() == TypeConstant) { auto &c = id.get(); bool needs_declaration = c.specialization || c.is_used_as_lut; if (needs_declaration) { if (!options.vulkan_semantics && c.specialization) { c.specialization_constant_macro_name = constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); } emit_constant(c); emitted = true; } } else if (id.get_type() == TypeConstantOp) { emit_specialization_constant_op(id.get()); emitted = true; } else if (id.get_type() == TypeType) { auto &type = id.get(); if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && (!ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) && !ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock))) { if (emitted) statement(""); emitted = false; emit_struct(type); } } } } if (emitted) statement(""); // If we needed to declare work group size late, check here. // If the work group size depends on a specialization constant, we need to declare the layout() block // after constants (and their macros) have been declared. if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics && execution.workgroup_size.constant != 0) { SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); if ((wg_x.id != 0) || (wg_y.id != 0) || (wg_z.id != 0)) { SmallVector inputs; build_workgroup_size(inputs, wg_x, wg_y, wg_z); statement("layout(", merge(inputs), ") in;"); statement(""); } } emitted = false; if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) { for (auto type : physical_storage_non_block_pointer_types) { emit_buffer_reference_block(get(type), false); } // Output buffer reference blocks. // Do this in two stages, one with forward declaration, // and one without. Buffer reference blocks can reference themselves // to support things like linked lists. ir.for_each_typed_id([&](uint32_t, SPIRType &type) { bool has_block_flags = has_decoration(type.self, DecorationBlock); if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && type.storage == StorageClassPhysicalStorageBufferEXT) { emit_buffer_reference_block(type, true); } }); ir.for_each_typed_id([&](uint32_t, SPIRType &type) { bool has_block_flags = has_decoration(type.self, DecorationBlock); if (has_block_flags && type.pointer && type.pointer_depth == 1 && !type_is_array_of_pointers(type) && type.storage == StorageClassPhysicalStorageBufferEXT) { emit_buffer_reference_block(type, false); } }); } // Output UBOs and SSBOs ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform || type.storage == StorageClassShaderRecordBufferNV; bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && has_block_flags) { emit_buffer_block(var); } }); // Output push constant blocks ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && !is_hidden_variable(var)) { emit_push_constant_block(var); } }); bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics; // Output Uniform Constants (values, samplers, images, etc). ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); // If we're remapping separate samplers and images, only emit the combined samplers. if (skip_separate_image_sampler) { // Sampler buffers are always used without a sampler, and they will also work in regular GL. bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; bool separate_sampler = type.basetype == SPIRType::Sampler; if (!sampler_buffer && (separate_image || separate_sampler)) return; } if (var.storage != StorageClassFunction && type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || type.storage == StorageClassRayPayloadNV || type.storage == StorageClassIncomingRayPayloadNV || type.storage == StorageClassCallableDataNV || type.storage == StorageClassIncomingCallableDataNV || type.storage == StorageClassHitAttributeNV) && !is_hidden_variable(var)) { emit_uniform(var); emitted = true; } }); if (emitted) statement(""); emitted = false; // Output in/out interfaces. ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); if (var.storage != StorageClassFunction && type.pointer && (var.storage == StorageClassInput || var.storage == StorageClassOutput) && interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var)) { emit_interface_block(var); emitted = true; } else if (is_builtin_variable(var)) { // For gl_InstanceIndex emulation on GLES, the API user needs to // supply this uniform. if (options.vertex.support_nonzero_base_instance && ir.meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics) { statement("uniform int SPIRV_Cross_BaseInstance;"); emitted = true; } } }); // Global variables. for (auto global : global_variables) { auto &var = get(global); if (var.storage != StorageClassOutput) { if (!variable_is_lut(var)) { add_resource_name(var.self); statement(variable_decl(var), ";"); emitted = true; } } } if (emitted) statement(""); declare_undefined_values(); } // Returns a string representation of the ID, usable as a function arg. // Default is to simply return the expression representation fo the arg ID. // Subclasses may override to modify the return value. string CompilerGLSL::to_func_call_arg(uint32_t id) { // Make sure that we use the name of the original variable, and not the parameter alias. uint32_t name_id = id; auto *var = maybe_get(id); if (var && var->basevariable) name_id = var->basevariable; return to_expression(name_id); } void CompilerGLSL::handle_invalid_expression(uint32_t id) { // We tried to read an invalidated expression. // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated. forced_temporaries.insert(id); force_recompile(); } // Converts the format of the current expression from packed to unpacked, // by wrapping the expression in a constructor of the appropriate type. // GLSL does not support packed formats, so simply return the expression. // Subclasses that do will override. string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool) { return expr_str; } // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. void CompilerGLSL::strip_enclosed_expression(string &expr) { if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') return; // Have to make sure that our first and last parens actually enclose everything inside it. uint32_t paren_count = 0; for (auto &c : expr) { if (c == '(') paren_count++; else if (c == ')') { paren_count--; // If we hit 0 and this is not the final char, our first and final parens actually don't // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). if (paren_count == 0 && &c != &expr.back()) return; } } expr.erase(expr.size() - 1, 1); expr.erase(begin(expr)); } string CompilerGLSL::enclose_expression(const string &expr) { bool need_parens = false; // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back // unary expressions. if (!expr.empty()) { auto c = expr.front(); if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*') need_parens = true; } if (!need_parens) { uint32_t paren_count = 0; for (auto c : expr) { if (c == '(' || c == '[') paren_count++; else if (c == ')' || c == ']') { assert(paren_count); paren_count--; } else if (c == ' ' && paren_count == 0) { need_parens = true; break; } } assert(paren_count == 0); } // If this expression contains any spaces which are not enclosed by parentheses, // we need to enclose it so we can treat the whole string as an expression. // This happens when two expressions have been part of a binary op earlier. if (need_parens) return join('(', expr, ')'); else return expr; } string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr) { // If this expression starts with an address-of operator ('&'), then // just return the part after the operator. // TODO: Strip parens if unnecessary? if (expr.front() == '&') return expr.substr(1); else if (backend.native_pointers) return join('*', expr); else if (expr_type.storage == StorageClassPhysicalStorageBufferEXT && expr_type.basetype != SPIRType::Struct && expr_type.pointer_depth == 1) { return join(enclose_expression(expr), ".value"); } else return expr; } string CompilerGLSL::address_of_expression(const std::string &expr) { if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')') { // If we have an expression which looks like (*foo), taking the address of it is the same as stripping // the first two and last characters. We might have to enclose the expression. // This doesn't work for cases like (*foo + 10), // but this is an r-value expression which we cannot take the address of anyways. return enclose_expression(expr.substr(2, expr.size() - 3)); } else if (expr.front() == '*') { // If this expression starts with a dereference operator ('*'), then // just return the part after the operator. return expr.substr(1); } else return join('&', enclose_expression(expr)); } // Just like to_expression except that we enclose the expression inside parentheses if needed. string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read) { return enclose_expression(to_expression(id, register_expression_read)); } // Used explicitly when we want to read a row-major expression, but without any transpose shenanigans. // need_transpose must be forced to false. string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id) { return unpack_expression_type(to_expression(id), expression_type(id), get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true); } string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read) { // If we need to transpose, it will also take care of unpacking rules. auto *e = maybe_get(id); bool need_transpose = e && e->need_transpose; bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); if (!need_transpose && (is_remapped || is_packed)) { return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id), get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false); } else return to_expression(id, register_expression_read); } string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read) { // If we need to transpose, it will also take care of unpacking rules. auto *e = maybe_get(id); bool need_transpose = e && e->need_transpose; bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); if (!need_transpose && (is_remapped || is_packed)) { return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id), get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false); } else return to_enclosed_expression(id, register_expression_read); } string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read) { auto &type = expression_type(id); if (type.pointer && should_dereference(id)) return dereference_expression(type, to_enclosed_expression(id, register_expression_read)); else return to_expression(id, register_expression_read); } string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read) { auto &type = expression_type(id); if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) return address_of_expression(to_enclosed_expression(id, register_expression_read)); else return to_unpacked_expression(id, register_expression_read); } string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read) { auto &type = expression_type(id); if (type.pointer && expression_is_lvalue(id) && !should_dereference(id)) return address_of_expression(to_enclosed_expression(id, register_expression_read)); else return to_enclosed_unpacked_expression(id, register_expression_read); } string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index) { auto expr = to_enclosed_expression(id); if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked)) return join(expr, "[", index, "]"); else return join(expr, ".", index_to_swizzle(index)); } string CompilerGLSL::to_rerolled_array_expression(const string &base_expr, const SPIRType &type) { uint32_t size = to_array_size_literal(type); auto &parent = get(type.parent_type); string expr = "{ "; for (uint32_t i = 0; i < size; i++) { auto subexpr = join(base_expr, "[", convert_to_string(i), "]"); if (parent.array.empty()) expr += subexpr; else expr += to_rerolled_array_expression(subexpr, parent); if (i + 1 < size) expr += ", "; } expr += " }"; return expr; } string CompilerGLSL::to_composite_constructor_expression(uint32_t id) { auto &type = expression_type(id); if (!backend.array_is_value_type && !type.array.empty()) { // For this case, we need to "re-roll" an array initializer from a temporary. // We cannot simply pass the array directly, since it decays to a pointer and it cannot // participate in a struct initializer. E.g. // float arr[2] = { 1.0, 2.0 }; // Foo foo = { arr }; must be transformed to // Foo foo = { { arr[0], arr[1] } }; // The array sizes cannot be deduced from specialization constants since we cannot use any loops. // We're only triggering one read of the array expression, but this is fine since arrays have to be declared // as temporaries anyways. return to_rerolled_array_expression(to_enclosed_expression(id), type); } else return to_expression(id); } string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) { auto itr = invalid_expressions.find(id); if (itr != end(invalid_expressions)) handle_invalid_expression(id); if (ir.ids[id].get_type() == TypeExpression) { // We might have a more complex chain of dependencies. // A possible scenario is that we // // %1 = OpLoad // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. // // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, // and see that we should not forward reads of the original variable. auto &expr = get(id); for (uint32_t dep : expr.expression_dependencies) if (invalid_expressions.find(dep) != end(invalid_expressions)) handle_invalid_expression(dep); } if (register_expression_read) track_expression_read(id); switch (ir.ids[id].get_type()) { case TypeExpression: { auto &e = get(id); if (e.base_expression) return to_enclosed_expression(e.base_expression) + e.expression; else if (e.need_transpose && !e.access_chain) { // This should not be reached for access chains, since we always deal explicitly with transpose state // when consuming an access chain expression. uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); return convert_row_major_matrix(e.expression, get(e.expression_type), physical_type_id, is_packed); } else { if (is_forcing_recompilation()) { // During first compilation phase, certain expression patterns can trigger exponential growth of memory. // Avoid this by returning dummy expressions during this phase. // Do not use empty expressions here, because those are sentinels for other cases. return "_"; } else return e.expression; } } case TypeConstant: { auto &c = get(id); auto &type = get(c.constant_type); // WorkGroupSize may be a constant. auto &dec = ir.meta[c.self].decoration; if (dec.builtin) return builtin_to_glsl(dec.builtin_type, StorageClassGeneric); else if (c.specialization) return to_name(id); else if (c.is_used_as_lut) return to_name(id); else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) return to_name(id); else if (!type.array.empty() && !backend.can_declare_arrays_inline) return to_name(id); else return constant_expression(c); } case TypeConstantOp: return to_name(id); case TypeVariable: { auto &var = get(id); // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, // the variable has not been declared yet. if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) return to_expression(var.static_expression); else if (var.deferred_declaration) { var.deferred_declaration = false; return variable_decl(var); } else if (flattened_structs.count(id)) { return load_flattened_struct(var); } else { auto &dec = ir.meta[var.self].decoration; if (dec.builtin) return builtin_to_glsl(dec.builtin_type, var.storage); else return to_name(id); } } case TypeCombinedImageSampler: // This type should never be taken the expression of directly. // The intention is that texture sampling functions will extract the image and samplers // separately and take their expressions as needed. // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler // expression ala sampler2D(texture, sampler). SPIRV_CROSS_THROW("Combined image samplers have no default expression representation."); case TypeAccessChain: // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad. SPIRV_CROSS_THROW("Access chains have no default expression representation."); default: return to_name(id); } } string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) { auto &type = get(cop.basetype); bool binary = false; bool unary = false; string op; if (is_legacy() && is_unsigned_opcode(cop.opcode)) SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); // TODO: Find a clean way to reuse emit_instruction. switch (cop.opcode) { case OpSConvert: case OpUConvert: case OpFConvert: op = type_to_glsl_constructor(type); break; #define GLSL_BOP(opname, x) \ case Op##opname: \ binary = true; \ op = x; \ break #define GLSL_UOP(opname, x) \ case Op##opname: \ unary = true; \ op = x; \ break GLSL_UOP(SNegate, "-"); GLSL_UOP(Not, "~"); GLSL_BOP(IAdd, "+"); GLSL_BOP(ISub, "-"); GLSL_BOP(IMul, "*"); GLSL_BOP(SDiv, "/"); GLSL_BOP(UDiv, "/"); GLSL_BOP(UMod, "%"); GLSL_BOP(SMod, "%"); GLSL_BOP(ShiftRightLogical, ">>"); GLSL_BOP(ShiftRightArithmetic, ">>"); GLSL_BOP(ShiftLeftLogical, "<<"); GLSL_BOP(BitwiseOr, "|"); GLSL_BOP(BitwiseXor, "^"); GLSL_BOP(BitwiseAnd, "&"); GLSL_BOP(LogicalOr, "||"); GLSL_BOP(LogicalAnd, "&&"); GLSL_UOP(LogicalNot, "!"); GLSL_BOP(LogicalEqual, "=="); GLSL_BOP(LogicalNotEqual, "!="); GLSL_BOP(IEqual, "=="); GLSL_BOP(INotEqual, "!="); GLSL_BOP(ULessThan, "<"); GLSL_BOP(SLessThan, "<"); GLSL_BOP(ULessThanEqual, "<="); GLSL_BOP(SLessThanEqual, "<="); GLSL_BOP(UGreaterThan, ">"); GLSL_BOP(SGreaterThan, ">"); GLSL_BOP(UGreaterThanEqual, ">="); GLSL_BOP(SGreaterThanEqual, ">="); case OpSelect: { if (cop.arguments.size() < 3) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); // This one is pretty annoying. It's triggered from // uint(bool), int(bool) from spec constants. // In order to preserve its compile-time constness in Vulkan GLSL, // we need to reduce the OpSelect expression back to this simplified model. // If we cannot, fail. if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0])) { // Implement as a simple cast down below. } else { // Implement a ternary and pray the compiler understands it :) return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]); } break; } case OpVectorShuffle: { string expr = type_to_glsl_constructor(type); expr += "("; uint32_t left_components = expression_type(cop.arguments[0]).vecsize; string left_arg = to_enclosed_expression(cop.arguments[0]); string right_arg = to_enclosed_expression(cop.arguments[1]); for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++) { uint32_t index = cop.arguments[i]; if (index >= left_components) expr += right_arg + "." + "xyzw"[index - left_components]; else expr += left_arg + "." + "xyzw"[index]; if (i + 1 < uint32_t(cop.arguments.size())) expr += ", "; } expr += ")"; return expr; } case OpCompositeExtract: { auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); return expr; } case OpCompositeInsert: SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported."); default: // Some opcodes are unimplemented here, these are currently not possible to test from glslang. SPIRV_CROSS_THROW("Unimplemented spec constant op."); } uint32_t bit_width = 0; if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert) bit_width = expression_type(cop.arguments[0]).width; SPIRType::BaseType input_type; bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode); switch (cop.opcode) { case OpIEqual: case OpINotEqual: input_type = to_signed_basetype(bit_width); break; case OpSLessThan: case OpSLessThanEqual: case OpSGreaterThan: case OpSGreaterThanEqual: case OpSMod: case OpSDiv: case OpShiftRightArithmetic: case OpSConvert: case OpSNegate: input_type = to_signed_basetype(bit_width); break; case OpULessThan: case OpULessThanEqual: case OpUGreaterThan: case OpUGreaterThanEqual: case OpUMod: case OpUDiv: case OpShiftRightLogical: case OpUConvert: input_type = to_unsigned_basetype(bit_width); break; default: input_type = type.basetype; break; } #undef GLSL_BOP #undef GLSL_UOP if (binary) { if (cop.arguments.size() < 2) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); string cast_op0; string cast_op1; auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0], cop.arguments[1], skip_cast_if_equal_type); if (type.basetype != input_type && type.basetype != SPIRType::Boolean) { expected_type.basetype = input_type; auto expr = bitcast_glsl_op(type, expected_type); expr += '('; expr += join(cast_op0, " ", op, " ", cast_op1); expr += ')'; return expr; } else return join("(", cast_op0, " ", op, " ", cast_op1, ")"); } else if (unary) { if (cop.arguments.size() < 1) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); // Auto-bitcast to result type as needed. // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")"); } else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert) { if (cop.arguments.size() < 1) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); auto &arg_type = expression_type(cop.arguments[0]); if (arg_type.width < type.width && input_type != arg_type.basetype) { auto expected = arg_type; expected.basetype = input_type; return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")"); } else return join(op, "(", to_expression(cop.arguments[0]), ")"); } else { if (cop.arguments.size() < 1) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); return join(op, "(", to_expression(cop.arguments[0]), ")"); } } string CompilerGLSL::constant_expression(const SPIRConstant &c) { auto &type = get(c.constant_type); if (type.pointer) { return backend.null_pointer_literal; } else if (!c.subconstants.empty()) { // Handles Arrays and structures. string res; if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct && type.array.empty()) { res = type_to_glsl_constructor(type) + "{ "; } else if (backend.use_initializer_list) { res = "{ "; } else { res = type_to_glsl_constructor(type) + "("; } for (auto &elem : c.subconstants) { auto &subc = get(elem); if (subc.specialization) res += to_name(elem); else res += constant_expression(subc); if (&elem != &c.subconstants.back()) res += ", "; } res += backend.use_initializer_list ? " }" : ")"; return res; } else if (c.columns() == 1) { return constant_expression_vector(c, 0); } else { string res = type_to_glsl(get(c.constant_type)) + "("; for (uint32_t col = 0; col < c.columns(); col++) { if (c.specialization_constant_id(col) != 0) res += to_name(c.specialization_constant_id(col)); else res += constant_expression_vector(c, col); if (col + 1 < c.columns()) res += ", "; } res += ")"; return res; } } #ifdef _MSC_VER // sprintf warning. // We cannot rely on snprintf existing because, ..., MSVC. #pragma warning(push) #pragma warning(disable : 4996) #endif string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { string res; float float_value = c.scalar_f16(col, row); // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots // of complicated workarounds, just value-cast to the half type always. if (std::isnan(float_value) || std::isinf(float_value)) { SPIRType type; type.basetype = SPIRType::Half; type.vecsize = 1; type.columns = 1; if (float_value == numeric_limits::infinity()) res = join(type_to_glsl(type), "(1.0 / 0.0)"); else if (float_value == -numeric_limits::infinity()) res = join(type_to_glsl(type), "(-1.0 / 0.0)"); else if (std::isnan(float_value)) res = join(type_to_glsl(type), "(0.0 / 0.0)"); else SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); } else { SPIRType type; type.basetype = SPIRType::Half; type.vecsize = 1; type.columns = 1; res = join(type_to_glsl(type), "(", convert_to_string(float_value, current_locale_radix_character), ")"); } return res; } string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { string res; float float_value = c.scalar_f32(col, row); if (std::isnan(float_value) || std::isinf(float_value)) { // Use special representation. if (!is_legacy()) { SPIRType out_type; SPIRType in_type; out_type.basetype = SPIRType::Float; in_type.basetype = SPIRType::UInt; out_type.vecsize = 1; in_type.vecsize = 1; out_type.width = 32; in_type.width = 32; char print_buffer[32]; sprintf(print_buffer, "0x%xu", c.scalar(col, row)); res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")"); } else { if (float_value == numeric_limits::infinity()) { if (backend.float_literal_suffix) res = "(1.0f / 0.0f)"; else res = "(1.0 / 0.0)"; } else if (float_value == -numeric_limits::infinity()) { if (backend.float_literal_suffix) res = "(-1.0f / 0.0f)"; else res = "(-1.0 / 0.0)"; } else if (std::isnan(float_value)) { if (backend.float_literal_suffix) res = "(0.0f / 0.0f)"; else res = "(0.0 / 0.0)"; } else SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); } } else { res = convert_to_string(float_value, current_locale_radix_character); if (backend.float_literal_suffix) res += "f"; } return res; } std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { string res; double double_value = c.scalar_f64(col, row); if (std::isnan(double_value) || std::isinf(double_value)) { // Use special representation. if (!is_legacy()) { SPIRType out_type; SPIRType in_type; out_type.basetype = SPIRType::Double; in_type.basetype = SPIRType::UInt64; out_type.vecsize = 1; in_type.vecsize = 1; out_type.width = 64; in_type.width = 64; uint64_t u64_value = c.scalar_u64(col, row); if (options.es) SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile."); require_extension_internal("GL_ARB_gpu_shader_int64"); char print_buffer[64]; sprintf(print_buffer, "0x%llx%s", static_cast(u64_value), backend.long_long_literal_suffix ? "ull" : "ul"); res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")"); } else { if (options.es) SPIRV_CROSS_THROW("FP64 not supported in ES profile."); if (options.version < 400) require_extension_internal("GL_ARB_gpu_shader_fp64"); if (double_value == numeric_limits::infinity()) { if (backend.double_literal_suffix) res = "(1.0lf / 0.0lf)"; else res = "(1.0 / 0.0)"; } else if (double_value == -numeric_limits::infinity()) { if (backend.double_literal_suffix) res = "(-1.0lf / 0.0lf)"; else res = "(-1.0 / 0.0)"; } else if (std::isnan(double_value)) { if (backend.double_literal_suffix) res = "(0.0lf / 0.0lf)"; else res = "(0.0 / 0.0)"; } else SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); } } else { res = convert_to_string(double_value, current_locale_radix_character); if (backend.double_literal_suffix) res += "lf"; } return res; } #ifdef _MSC_VER #pragma warning(pop) #endif string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) { auto type = get(c.constant_type); type.columns = 1; auto scalar_type = type; scalar_type.vecsize = 1; string res; bool splat = backend.use_constructor_splatting && c.vector_size() > 1; bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1; if (!type_is_floating_point(type)) { // Cannot swizzle literal integers as a special case. swizzle_splat = false; } if (splat || swizzle_splat) { // Cannot use constant splatting if we have specialization constants somewhere in the vector. for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.specialization_constant_id(vector, i) != 0) { splat = false; swizzle_splat = false; break; } } } if (splat || swizzle_splat) { if (type.width == 64) { uint64_t ident = c.scalar_u64(vector, 0); for (uint32_t i = 1; i < c.vector_size(); i++) { if (ident != c.scalar_u64(vector, i)) { splat = false; swizzle_splat = false; break; } } } else { uint32_t ident = c.scalar(vector, 0); for (uint32_t i = 1; i < c.vector_size(); i++) { if (ident != c.scalar(vector, i)) { splat = false; swizzle_splat = false; } } } } if (c.vector_size() > 1 && !swizzle_splat) res += type_to_glsl(type) + "("; switch (type.basetype) { case SPIRType::Half: if (splat || swizzle_splat) { res += convert_half_to_string(c, vector, 0); if (swizzle_splat) res = remap_swizzle(get(c.constant_type), 1, res); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else res += convert_half_to_string(c, vector, i); if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Float: if (splat || swizzle_splat) { res += convert_float_to_string(c, vector, 0); if (swizzle_splat) res = remap_swizzle(get(c.constant_type), 1, res); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else res += convert_float_to_string(c, vector, i); if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Double: if (splat || swizzle_splat) { res += convert_double_to_string(c, vector, 0); if (swizzle_splat) res = remap_swizzle(get(c.constant_type), 1, res); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else res += convert_double_to_string(c, vector, i); if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Int64: if (splat) { res += convert_to_string(c.scalar_i64(vector, 0)); if (backend.long_long_literal_suffix) res += "ll"; else res += "l"; } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar_i64(vector, i)); if (backend.long_long_literal_suffix) res += "ll"; else res += "l"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::UInt64: if (splat) { res += convert_to_string(c.scalar_u64(vector, 0)); if (backend.long_long_literal_suffix) res += "ull"; else res += "ul"; } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar_u64(vector, i)); if (backend.long_long_literal_suffix) res += "ull"; else res += "ul"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::UInt: if (splat) { res += convert_to_string(c.scalar(vector, 0)); if (is_legacy()) { // Fake unsigned constant literals with signed ones if possible. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. if (c.scalar_i32(vector, 0) < 0) SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); } else if (backend.uint32_t_literal_suffix) res += "u"; } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar(vector, i)); if (is_legacy()) { // Fake unsigned constant literals with signed ones if possible. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. if (c.scalar_i32(vector, i) < 0) SPIRV_CROSS_THROW( "Tried to convert uint literal into int, but this made the literal negative."); } else if (backend.uint32_t_literal_suffix) res += "u"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Int: if (splat) res += convert_to_string(c.scalar_i32(vector, 0)); else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else res += convert_to_string(c.scalar_i32(vector, i)); if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::UShort: if (splat) { res += convert_to_string(c.scalar(vector, 0)); if (is_legacy()) { // Fake unsigned constant literals with signed ones if possible. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. if (c.scalar_i16(vector, 0) < 0) SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); } else res += backend.uint16_t_literal_suffix; } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar(vector, i)); if (is_legacy()) { // Fake unsigned constant literals with signed ones if possible. // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. if (c.scalar_i16(vector, i) < 0) SPIRV_CROSS_THROW( "Tried to convert uint literal into int, but this made the literal negative."); } else res += backend.uint16_t_literal_suffix; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Short: if (splat) { res += convert_to_string(c.scalar_i16(vector, 0)); res += backend.int16_t_literal_suffix; } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar_i16(vector, i)); res += backend.int16_t_literal_suffix; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::UByte: if (splat) { res += convert_to_string(c.scalar_u8(vector, 0)); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += type_to_glsl(scalar_type); res += "("; res += convert_to_string(c.scalar_u8(vector, i)); res += ")"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::SByte: if (splat) { res += convert_to_string(c.scalar_i8(vector, 0)); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += type_to_glsl(scalar_type); res += "("; res += convert_to_string(c.scalar_i8(vector, i)); res += ")"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Boolean: if (splat) res += c.scalar(vector, 0) ? "true" : "false"; else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else res += c.scalar(vector, i) ? "true" : "false"; if (i + 1 < c.vector_size()) res += ", "; } } break; default: SPIRV_CROSS_THROW("Invalid constant expression basetype."); } if (c.vector_size() > 1 && !swizzle_splat) res += ")"; return res; } SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id) { forced_temporaries.insert(id); emit_uninitialized_temporary(type, id); return set(id, to_name(id), type, true); } void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id) { // If we're declaring temporaries inside continue blocks, // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. if (current_continue_block && !hoisted_temporaries.count(result_id)) { auto &header = get(current_continue_block->loop_dominator); if (find_if(begin(header.declare_temporary), end(header.declare_temporary), [result_type, result_id](const pair &tmp) { return tmp.first == result_type && tmp.second == result_id; }) == end(header.declare_temporary)) { header.declare_temporary.emplace_back(result_type, result_id); hoisted_temporaries.insert(result_id); force_recompile(); } } else if (hoisted_temporaries.count(result_id) == 0) { auto &type = get(result_type); auto &flags = ir.meta[result_id].decoration.decoration_flags; // The result_id has not been made into an expression yet, so use flags interface. add_local_variable_name(result_id); statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), ";"); } } string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) { auto &type = get(result_type); auto &flags = ir.meta[result_id].decoration.decoration_flags; // If we're declaring temporaries inside continue blocks, // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. if (current_continue_block && !hoisted_temporaries.count(result_id)) { auto &header = get(current_continue_block->loop_dominator); if (find_if(begin(header.declare_temporary), end(header.declare_temporary), [result_type, result_id](const pair &tmp) { return tmp.first == result_type && tmp.second == result_id; }) == end(header.declare_temporary)) { header.declare_temporary.emplace_back(result_type, result_id); hoisted_temporaries.insert(result_id); force_recompile(); } return join(to_name(result_id), " = "); } else if (hoisted_temporaries.count(result_id)) { // The temporary has already been declared earlier, so just "declare" the temporary by writing to it. return join(to_name(result_id), " = "); } else { // The result_id has not been made into an expression yet, so use flags interface. add_local_variable_name(result_id); return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); } } bool CompilerGLSL::expression_is_forwarded(uint32_t id) { return forwarded_temporaries.find(id) != end(forwarded_temporaries); } SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, bool suppress_usage_tracking) { if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries))) { // Just forward it without temporary. // If the forward is trivial, we do not force flushing to temporary for this expression. if (!suppress_usage_tracking) forwarded_temporaries.insert(result_id); return set(result_id, rhs, result_type, true); } else { // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). statement(declare_temporary(result_type, result_id), rhs, ";"); return set(result_id, to_name(result_id), result_type, true); } } void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { bool forward = should_forward(op0); emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { bool forward = should_forward(op0) && should_forward(op1); emit_op(result_type, result_id, join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op) { auto &type = get(result_type); auto expr = type_to_glsl_constructor(type); expr += '('; for (uint32_t i = 0; i < type.vecsize; i++) { // Make sure to call to_expression multiple times to ensure // that these expressions are properly flushed to temporaries if needed. expr += op; expr += to_extract_component_expression(operand, i); if (i + 1 < type.vecsize) expr += ", "; } expr += ')'; emit_op(result_type, result_id, expr, should_forward(operand)); inherit_expression_dependencies(result_id, operand); } void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { auto &type = get(result_type); auto expr = type_to_glsl_constructor(type); expr += '('; for (uint32_t i = 0; i < type.vecsize; i++) { // Make sure to call to_expression multiple times to ensure // that these expressions are properly flushed to temporaries if needed. expr += to_extract_component_expression(op0, i); expr += ' '; expr += op; expr += ' '; expr += to_extract_component_expression(op1, i); if (i + 1 < type.vecsize) expr += ", "; } expr += ')'; emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) { auto &type0 = expression_type(op0); auto &type1 = expression_type(op1); // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected // since equality test is exactly the same. bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); // Create a fake type so we can bitcast to it. // We only deal with regular arithmetic types here like int, uints and so on. SPIRType expected_type; expected_type.basetype = input_type; expected_type.vecsize = type0.vecsize; expected_type.columns = type0.columns; expected_type.width = type0.width; if (cast) { cast_op0 = bitcast_glsl(expected_type, op0); cast_op1 = bitcast_glsl(expected_type, op1); } else { // If we don't cast, our actual input type is that of the first (or second) argument. cast_op0 = to_enclosed_unpacked_expression(op0); cast_op1 = to_enclosed_unpacked_expression(op1); input_type = type0.basetype; } return expected_type; } void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) { string cast_op0, cast_op1; auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); auto &out_type = get(result_type); // We might have casted away from the result type, so bitcast again. // For example, arithmetic right shift with uint inputs. // Special case boolean outputs since relational opcodes output booleans instead of int/uint. string expr; if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) { expected_type.basetype = input_type; expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(cast_op0, " ", op, " ", cast_op1); expr += ')'; } else expr += join(cast_op0, " ", op, " ", cast_op1); emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { bool forward = should_forward(op0); emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { bool forward = should_forward(op0) && should_forward(op1); emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type) { auto &out_type = get(result_type); auto &expr_type = expression_type(op0); auto expected_type = out_type; // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends. expected_type.basetype = input_type; expected_type.width = expr_type.width; string cast_op = expr_type.basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); string expr; if (out_type.basetype != expected_result_type) { expected_type.basetype = expected_result_type; expected_type.width = out_type.width; expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(op, "(", cast_op, ")"); expr += ')'; } else { expr += join(op, "(", cast_op, ")"); } emit_op(result_type, result_id, expr, should_forward(op0)); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op, SPIRType::BaseType input_type) { auto &out_type = get(result_type); auto expected_type = out_type; expected_type.basetype = input_type; string cast_op0 = expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); string cast_op1 = expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1); string cast_op2 = expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2); string expr; if (out_type.basetype != input_type) { expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); expr += ')'; } else { expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); } emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); inherit_expression_dependencies(result_id, op2); } void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) { string cast_op0, cast_op1; auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); auto &out_type = get(result_type); // Special case boolean outputs since relational opcodes output booleans instead of int/uint. string expr; if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) { expected_type.basetype = input_type; expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); expr += ')'; } else { expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); } emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op) { bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2); emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); inherit_expression_dependencies(result_id, op2); } void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, uint32_t op3, const char *op) { bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); inherit_expression_dependencies(result_id, op2); inherit_expression_dependencies(result_id, op3); } // EXT_shader_texture_lod only concerns fragment shaders so lod tex functions // are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex // functions in vertex shaders so we revert those back to plain calls when // the lod is a constant value of zero. bool CompilerGLSL::check_explicit_lod_allowed(uint32_t lod) { auto &execution = get_entry_point(); bool allowed = !is_legacy_es() || execution.model == ExecutionModelFragment; if (!allowed && lod != 0) { auto *lod_constant = maybe_get(lod); if (!lod_constant || lod_constant->scalar_f32() != 0.0f) { SPIRV_CROSS_THROW("Explicit lod not allowed in legacy ES non-fragment shaders."); } } return allowed; } string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod, uint32_t tex) { const char *type; switch (imgtype.image.dim) { case spv::Dim1D: type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; break; case spv::Dim2D: type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; break; case spv::Dim3D: type = "3D"; break; case spv::DimCube: type = "Cube"; break; case spv::DimRect: type = "2DRect"; break; case spv::DimBuffer: type = "Buffer"; break; case spv::DimSubpassData: type = "2D"; break; default: type = ""; break; } bool use_explicit_lod = check_explicit_lod_allowed(lod); if (op == "textureLod" || op == "textureProjLod" || op == "textureGrad" || op == "textureProjGrad") { if (is_legacy_es()) { if (use_explicit_lod) require_extension_internal("GL_EXT_shader_texture_lod"); } else if (is_legacy()) require_extension_internal("GL_ARB_shader_texture_lod"); } if (op == "textureLodOffset" || op == "textureProjLodOffset") { if (is_legacy_es()) SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES")); require_extension_internal("GL_EXT_gpu_shader4"); } // GLES has very limited support for shadow samplers. // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, // everything else can just throw if (image_is_comparison(imgtype, tex) && is_legacy_es()) { if (op == "texture" || op == "textureProj") require_extension_internal("GL_EXT_shadow_samplers"); else SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES")); } bool is_es_and_depth = is_legacy_es() && image_is_comparison(imgtype, tex); std::string type_prefix = image_is_comparison(imgtype, tex) ? "shadow" : "texture"; if (op == "texture") return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type); else if (op == "textureLod") { if (use_explicit_lod) return join(type_prefix, type, is_legacy_es() ? "LodEXT" : "Lod"); else return join(type_prefix, type); } else if (op == "textureProj") return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj"); else if (op == "textureGrad") return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad"); else if (op == "textureProjLod") { if (use_explicit_lod) return join(type_prefix, type, is_legacy_es() ? "ProjLodEXT" : "ProjLod"); else return join(type_prefix, type, "Proj"); } else if (op == "textureLodOffset") { if (use_explicit_lod) return join(type_prefix, type, "LodOffset"); else return join(type_prefix, type); } else if (op == "textureProjGrad") return join(type_prefix, type, is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad"); else if (op == "textureProjLodOffset") { if (use_explicit_lod) return join(type_prefix, type, "ProjLodOffset"); else return join(type_prefix, type, "ProjOffset"); } else { SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); } } bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) { auto *cleft = maybe_get(left); auto *cright = maybe_get(right); auto &lerptype = expression_type(lerp); // If our targets aren't constants, we cannot use construction. if (!cleft || !cright) return false; // If our targets are spec constants, we cannot use construction. if (cleft->specialization || cright->specialization) return false; // We can only use trivial construction if we have a scalar // (should be possible to do it for vectors as well, but that is overkill for now). if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1) return false; // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. bool ret = false; switch (type.basetype) { case SPIRType::Short: case SPIRType::UShort: ret = cleft->scalar_u16() == 0 && cright->scalar_u16() == 1; break; case SPIRType::Int: case SPIRType::UInt: ret = cleft->scalar() == 0 && cright->scalar() == 1; break; case SPIRType::Half: ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f; break; case SPIRType::Float: ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f; break; case SPIRType::Double: ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0; break; case SPIRType::Int64: case SPIRType::UInt64: ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1; break; default: break; } if (ret) op = type_to_glsl_constructor(type); return ret; } string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value, uint32_t false_value) { string expr; auto &lerptype = expression_type(select); if (lerptype.vecsize == 1) expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ", to_enclosed_pointer_expression(false_value)); else { auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); }; expr = type_to_glsl_constructor(restype); expr += "("; for (uint32_t i = 0; i < restype.vecsize; i++) { expr += swiz(select, i); expr += " ? "; expr += swiz(true_value, i); expr += " : "; expr += swiz(false_value, i); if (i + 1 < restype.vecsize) expr += ", "; } expr += ")"; } return expr; } void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) { auto &lerptype = expression_type(lerp); auto &restype = get(result_type); // If this results in a variable pointer, assume it may be written through. if (restype.pointer) { register_write(left); register_write(right); } string mix_op; bool has_boolean_mix = *backend.boolean_mix_function && ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp); // Cannot use boolean mix when the lerp argument is just one boolean, // fall back to regular trinary statements. if (lerptype.vecsize == 1) has_boolean_mix = false; // If we can reduce the mix to a simple cast, do so. // This helps for cases like int(bool), uint(bool) which is implemented with // OpSelect bool 1 0. if (trivial_mix) { emit_unary_func_op(result_type, id, lerp, mix_op.c_str()); } else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) { // Boolean mix not supported on desktop without extension. // Was added in OpenGL 4.5 with ES 3.1 compat. // // Could use GL_EXT_shader_integer_mix on desktop at least, // but Apple doesn't support it. :( // Just implement it as ternary expressions. auto expr = to_ternary_expression(get(result_type), lerp, right, left); emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp)); inherit_expression_dependencies(id, left); inherit_expression_dependencies(id, right); inherit_expression_dependencies(id, lerp); } else if (lerptype.basetype == SPIRType::Boolean) emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function); else emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); } string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id) { // Keep track of the array indices we have used to load the image. // We'll need to use the same array index into the combined image sampler array. auto image_expr = to_expression(image_id); string array_expr; auto array_index = image_expr.find_first_of('['); if (array_index != string::npos) array_expr = image_expr.substr(array_index, string::npos); auto &args = current_function->arguments; // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect // all possible combinations into new sampler2D uniforms. auto *image = maybe_get_backing_variable(image_id); auto *samp = maybe_get_backing_variable(samp_id); if (image) image_id = image->self; if (samp) samp_id = samp->self; auto image_itr = find_if(begin(args), end(args), [image_id](const SPIRFunction::Parameter ¶m) { return param.id == image_id; }); auto sampler_itr = find_if(begin(args), end(args), [samp_id](const SPIRFunction::Parameter ¶m) { return param.id == samp_id; }); if (image_itr != end(args) || sampler_itr != end(args)) { // If any parameter originates from a parameter, we will find it in our argument list. bool global_image = image_itr == end(args); bool global_sampler = sampler_itr == end(args); uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args)); uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args)); auto &combined = current_function->combined_parameters; auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && p.sampler_id == sid; }); if (itr != end(combined)) return to_expression(itr->id) + array_expr; else { SPIRV_CROSS_THROW( "Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used " "before compile() was called?"); } } else { // For global sampler2D, look directly at the global remapping table. auto &mapping = combined_image_samplers; auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) { return combined.image_id == image_id && combined.sampler_id == samp_id; }); if (itr != end(combined_image_samplers)) return to_expression(itr->combined_id) + array_expr; else { SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " "before compile() was called?"); } } } void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) { if (options.vulkan_semantics && combined_image_samplers.empty()) { emit_binary_func_op(result_type, result_id, image_id, samp_id, type_to_glsl(get(result_type), result_id).c_str()); // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. forwarded_temporaries.erase(result_id); } else { // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); } } static inline bool image_opcode_is_sample_no_dref(Op op) { switch (op) { case OpImageSampleExplicitLod: case OpImageSampleImplicitLod: case OpImageSampleProjExplicitLod: case OpImageSampleProjImplicitLod: case OpImageFetch: case OpImageRead: case OpImageSparseSampleExplicitLod: case OpImageSparseSampleImplicitLod: case OpImageSparseSampleProjExplicitLod: case OpImageSparseSampleProjImplicitLod: case OpImageSparseFetch: case OpImageSparseRead: return true; default: return false; } } void CompilerGLSL::emit_texture_op(const Instruction &i) { auto *ops = stream(i); auto op = static_cast(i.op); uint32_t length = i.length; SmallVector inherited_expressions; uint32_t result_type_id = ops[0]; uint32_t id = ops[1]; uint32_t img = ops[2]; uint32_t coord = ops[3]; uint32_t dref = 0; uint32_t comp = 0; bool gather = false; bool proj = false; bool fetch = false; const uint32_t *opt = nullptr; auto &result_type = get(result_type_id); inherited_expressions.push_back(coord); // Make sure non-uniform decoration is back-propagated to where it needs to be. if (has_decoration(img, DecorationNonUniformEXT)) propagate_nonuniform_qualifier(img); switch (op) { case OpImageSampleDrefImplicitLod: case OpImageSampleDrefExplicitLod: dref = ops[4]; opt = &ops[5]; length -= 5; break; case OpImageSampleProjDrefImplicitLod: case OpImageSampleProjDrefExplicitLod: dref = ops[4]; opt = &ops[5]; length -= 5; proj = true; break; case OpImageDrefGather: dref = ops[4]; opt = &ops[5]; length -= 5; gather = true; break; case OpImageGather: comp = ops[4]; opt = &ops[5]; length -= 5; gather = true; break; case OpImageFetch: case OpImageRead: // Reads == fetches in Metal (other langs will not get here) opt = &ops[4]; length -= 4; fetch = true; break; case OpImageSampleProjImplicitLod: case OpImageSampleProjExplicitLod: opt = &ops[4]; length -= 4; proj = true; break; default: opt = &ops[4]; length -= 4; break; } // Bypass pointers because we need the real image struct auto &type = expression_type(img); auto &imgtype = get(type.self); uint32_t coord_components = 0; switch (imgtype.image.dim) { case spv::Dim1D: coord_components = 1; break; case spv::Dim2D: coord_components = 2; break; case spv::Dim3D: coord_components = 3; break; case spv::DimCube: coord_components = 3; break; case spv::DimBuffer: coord_components = 1; break; default: coord_components = 2; break; } if (dref) inherited_expressions.push_back(dref); if (proj) coord_components++; if (imgtype.image.arrayed) coord_components++; uint32_t bias = 0; uint32_t lod = 0; uint32_t grad_x = 0; uint32_t grad_y = 0; uint32_t coffset = 0; uint32_t offset = 0; uint32_t coffsets = 0; uint32_t sample = 0; uint32_t minlod = 0; uint32_t flags = 0; if (length) { flags = *opt++; length--; } auto test = [&](uint32_t &v, uint32_t flag) { if (length && (flags & flag)) { v = *opt++; inherited_expressions.push_back(v); length--; } }; test(bias, ImageOperandsBiasMask); test(lod, ImageOperandsLodMask); test(grad_x, ImageOperandsGradMask); test(grad_y, ImageOperandsGradMask); test(coffset, ImageOperandsConstOffsetMask); test(offset, ImageOperandsOffsetMask); test(coffsets, ImageOperandsConstOffsetsMask); test(sample, ImageOperandsSampleMask); test(minlod, ImageOperandsMinLodMask); string expr; bool forward = false; expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset), (!!grad_x || !!grad_y), !!dref, lod, minlod); expr += "("; expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod, coffset, offset, bias, comp, sample, minlod, &forward); expr += ")"; // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here. if (is_legacy() && image_is_comparison(imgtype, img)) expr += ".r"; // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. // Remap back to 4 components as sampling opcodes expect. if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op)) { bool image_is_depth = false; const auto *combined = maybe_get(img); uint32_t image_id = combined ? combined->image : img; if (combined && image_is_comparison(imgtype, combined->image)) image_is_depth = true; else if (image_is_comparison(imgtype, img)) image_is_depth = true; // We must also check the backing variable for the image. // We might have loaded an OpImage, and used that handle for two different purposes. // Once with comparison, once without. auto *image_variable = maybe_get_backing_variable(image_id); if (image_variable && image_is_comparison(get(image_variable->basetype), image_variable->self)) image_is_depth = true; if (image_is_depth) expr = remap_swizzle(result_type, 1, expr); } if (!backend.support_small_type_sampling_result && result_type.width < 32) { // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically. // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision. expr = join(type_to_glsl_constructor(result_type), "(", expr, ")"); } // Deals with reads from MSL. We might need to downconvert to fewer components. if (op == OpImageRead) expr = remap_swizzle(result_type, 4, expr); emit_op(result_type_id, id, expr, forward); for (auto &inherit : inherited_expressions) inherit_expression_dependencies(id, inherit); switch (op) { case OpImageSampleDrefImplicitLod: case OpImageSampleImplicitLod: case OpImageSampleProjImplicitLod: case OpImageSampleProjDrefImplicitLod: register_control_dependent_expression(id); break; default: break; } } bool CompilerGLSL::expression_is_constant_null(uint32_t id) const { auto *c = maybe_get(id); if (!c) return false; return c->constant_is_null(); } // Returns the function name for a texture sampling function for the specified image and sampling characteristics. // For some subclasses, the function is a method on the specified image. string CompilerGLSL::to_function_name(uint32_t tex, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool, uint32_t lod, uint32_t minlod) { if (minlod != 0) SPIRV_CROSS_THROW("Sparse texturing not yet supported."); string fname; // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. // To emulate this, we will have to use textureGrad with a constant gradient of 0. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. bool workaround_lod_array_shadow_as_grad = false; if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && image_is_comparison(imgtype, tex) && lod) { if (!expression_is_constant_null(lod)) { SPIRV_CROSS_THROW( "textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be expressed in GLSL."); } workaround_lod_array_shadow_as_grad = true; } if (is_fetch) fname += "texelFetch"; else { fname += "texture"; if (is_gather) fname += "Gather"; if (has_array_offsets) fname += "Offsets"; if (is_proj) fname += "Proj"; if (has_grad || workaround_lod_array_shadow_as_grad) fname += "Grad"; if (!!lod && !workaround_lod_array_shadow_as_grad) fname += "Lod"; } if (has_offset) fname += "Offset"; return is_legacy() ? legacy_tex_op(fname, imgtype, lod, tex) : fname; } std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) { auto *var = maybe_get_backing_variable(id); // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL. // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions. if (var) { auto &type = get(var->basetype); if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) { if (options.vulkan_semantics) { // Newer glslang supports this extension to deal with texture2D as argument to texture functions. if (dummy_sampler_id) SPIRV_CROSS_THROW("Vulkan GLSL should not have a dummy sampler for combining."); require_extension_internal("GL_EXT_samplerless_texture_functions"); } else { if (!dummy_sampler_id) SPIRV_CROSS_THROW( "Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?"); return to_combined_image_sampler(id, dummy_sampler_id); } } } return to_expression(id); } // Returns the function args for a texture sampling function for the specified image and sampling characteristics. string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, uint32_t /*minlod*/, bool *p_forward) { string farg_str; if (is_fetch) farg_str = convert_separate_image_to_expression(img); else farg_str = to_expression(img); bool swizz_func = backend.swizzle_is_function; auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { if (comps == in_comps) return ""; switch (comps) { case 1: return ".x"; case 2: return swizz_func ? ".xy()" : ".xy"; case 3: return swizz_func ? ".xyz()" : ".xyz"; default: return ""; } }; bool forward = should_forward(coord); // The IR can give us more components than we need, so chop them off as needed. auto swizzle_expr = swizzle(coord_components, expression_type(coord).vecsize); // Only enclose the UV expression if needed. auto coord_expr = (*swizzle_expr == '\0') ? to_expression(coord) : (to_enclosed_expression(coord) + swizzle_expr); // texelFetch only takes int, not uint. auto &coord_type = expression_type(coord); if (coord_type.basetype == SPIRType::UInt) { auto expected_type = coord_type; expected_type.vecsize = coord_components; expected_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr); } // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. // To emulate this, we will have to use textureGrad with a constant gradient of 0. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. bool workaround_lod_array_shadow_as_grad = ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && image_is_comparison(imgtype, img) && lod; if (dref) { forward = forward && should_forward(dref); // SPIR-V splits dref and coordinate. if (is_gather || coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather. { farg_str += ", "; farg_str += to_expression(coord); farg_str += ", "; farg_str += to_expression(dref); } else if (is_proj) { // Have to reshuffle so we get vec4(coord, dref, proj), special case. // Other shading languages splits up the arguments for coord and compare value like SPIR-V. // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow. farg_str += ", vec4("; if (imgtype.image.dim == Dim1D) { // Could reuse coord_expr, but we will mess up the temporary usage checking. farg_str += to_enclosed_expression(coord) + ".x"; farg_str += ", "; farg_str += "0.0, "; farg_str += to_expression(dref); farg_str += ", "; farg_str += to_enclosed_expression(coord) + ".y)"; } else if (imgtype.image.dim == Dim2D) { // Could reuse coord_expr, but we will mess up the temporary usage checking. farg_str += to_enclosed_expression(coord) + (swizz_func ? ".xy()" : ".xy"); farg_str += ", "; farg_str += to_expression(dref); farg_str += ", "; farg_str += to_enclosed_expression(coord) + ".z)"; } else SPIRV_CROSS_THROW("Invalid type for textureProj with shadow."); } else { // Create a composite which merges coord/dref into a single vector. auto type = expression_type(coord); type.vecsize = coord_components + 1; farg_str += ", "; farg_str += type_to_glsl_constructor(type); farg_str += "("; farg_str += coord_expr; farg_str += ", "; farg_str += to_expression(dref); farg_str += ")"; } } else { farg_str += ", "; farg_str += coord_expr; } if (grad_x || grad_y) { forward = forward && should_forward(grad_x); forward = forward && should_forward(grad_y); farg_str += ", "; farg_str += to_expression(grad_x); farg_str += ", "; farg_str += to_expression(grad_y); } if (lod) { if (workaround_lod_array_shadow_as_grad) { // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0. // Implementing this as plain texture() is not safe on some implementations. if (imgtype.image.dim == Dim2D) farg_str += ", vec2(0.0), vec2(0.0)"; else if (imgtype.image.dim == DimCube) farg_str += ", vec3(0.0), vec3(0.0)"; } else { if (check_explicit_lod_allowed(lod)) { forward = forward && should_forward(lod); farg_str += ", "; auto &lod_expr_type = expression_type(lod); // Lod expression for TexelFetch in GLSL must be int, and only int. if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms && lod_expr_type.basetype != SPIRType::Int) { farg_str += join("int(", to_expression(lod), ")"); } else { farg_str += to_expression(lod); } } } } else if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) { // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. farg_str += ", 0"; } if (coffset) { forward = forward && should_forward(coffset); farg_str += ", "; farg_str += to_expression(coffset); } else if (offset) { forward = forward && should_forward(offset); farg_str += ", "; farg_str += to_expression(offset); } if (bias) { forward = forward && should_forward(bias); farg_str += ", "; farg_str += to_expression(bias); } if (comp) { forward = forward && should_forward(comp); farg_str += ", "; farg_str += to_expression(comp); } if (sample) { farg_str += ", "; farg_str += to_expression(sample); } *p_forward = forward; return farg_str; } void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length) { auto op = static_cast(eop); if (is_legacy() && is_unsigned_glsl_opcode(op)) SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets."); // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length); auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); switch (op) { // FP fiddling case GLSLstd450Round: emit_unary_func_op(result_type, id, args[0], "round"); break; case GLSLstd450RoundEven: if ((options.es && options.version >= 300) || (!options.es && options.version >= 130)) emit_unary_func_op(result_type, id, args[0], "roundEven"); else SPIRV_CROSS_THROW("roundEven supported only in ESSL 300 and GLSL 130 and up."); break; case GLSLstd450Trunc: emit_unary_func_op(result_type, id, args[0], "trunc"); break; case GLSLstd450SAbs: emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type); break; case GLSLstd450FAbs: emit_unary_func_op(result_type, id, args[0], "abs"); break; case GLSLstd450SSign: emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type); break; case GLSLstd450FSign: emit_unary_func_op(result_type, id, args[0], "sign"); break; case GLSLstd450Floor: emit_unary_func_op(result_type, id, args[0], "floor"); break; case GLSLstd450Ceil: emit_unary_func_op(result_type, id, args[0], "ceil"); break; case GLSLstd450Fract: emit_unary_func_op(result_type, id, args[0], "fract"); break; case GLSLstd450Radians: emit_unary_func_op(result_type, id, args[0], "radians"); break; case GLSLstd450Degrees: emit_unary_func_op(result_type, id, args[0], "degrees"); break; case GLSLstd450Fma: if ((!options.es && options.version < 400) || (options.es && options.version < 320)) { auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ", to_enclosed_expression(args[2])); emit_op(result_type, id, expr, should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2])); for (uint32_t i = 0; i < 3; i++) inherit_expression_dependencies(id, args[i]); } else emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma"); break; case GLSLstd450Modf: register_call_out_argument(args[1]); forced_temporaries.insert(id); emit_binary_func_op(result_type, id, args[0], args[1], "modf"); break; case GLSLstd450ModfStruct: { forced_temporaries.insert(id); auto &type = get(result_type); emit_uninitialized_temporary_expression(result_type, id); statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ", to_expression(id), ".", to_member_name(type, 1), ");"); break; } // Minmax case GLSLstd450UMin: emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false); break; case GLSLstd450SMin: emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false); break; case GLSLstd450FMin: emit_binary_func_op(result_type, id, args[0], args[1], "min"); break; case GLSLstd450FMax: emit_binary_func_op(result_type, id, args[0], args[1], "max"); break; case GLSLstd450UMax: emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false); break; case GLSLstd450SMax: emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false); break; case GLSLstd450FClamp: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); break; case GLSLstd450UClamp: emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type); break; case GLSLstd450SClamp: emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type); break; // Trig case GLSLstd450Sin: emit_unary_func_op(result_type, id, args[0], "sin"); break; case GLSLstd450Cos: emit_unary_func_op(result_type, id, args[0], "cos"); break; case GLSLstd450Tan: emit_unary_func_op(result_type, id, args[0], "tan"); break; case GLSLstd450Asin: emit_unary_func_op(result_type, id, args[0], "asin"); break; case GLSLstd450Acos: emit_unary_func_op(result_type, id, args[0], "acos"); break; case GLSLstd450Atan: emit_unary_func_op(result_type, id, args[0], "atan"); break; case GLSLstd450Sinh: emit_unary_func_op(result_type, id, args[0], "sinh"); break; case GLSLstd450Cosh: emit_unary_func_op(result_type, id, args[0], "cosh"); break; case GLSLstd450Tanh: emit_unary_func_op(result_type, id, args[0], "tanh"); break; case GLSLstd450Asinh: emit_unary_func_op(result_type, id, args[0], "asinh"); break; case GLSLstd450Acosh: emit_unary_func_op(result_type, id, args[0], "acosh"); break; case GLSLstd450Atanh: emit_unary_func_op(result_type, id, args[0], "atanh"); break; case GLSLstd450Atan2: emit_binary_func_op(result_type, id, args[0], args[1], "atan"); break; // Exponentials case GLSLstd450Pow: emit_binary_func_op(result_type, id, args[0], args[1], "pow"); break; case GLSLstd450Exp: emit_unary_func_op(result_type, id, args[0], "exp"); break; case GLSLstd450Log: emit_unary_func_op(result_type, id, args[0], "log"); break; case GLSLstd450Exp2: emit_unary_func_op(result_type, id, args[0], "exp2"); break; case GLSLstd450Log2: emit_unary_func_op(result_type, id, args[0], "log2"); break; case GLSLstd450Sqrt: emit_unary_func_op(result_type, id, args[0], "sqrt"); break; case GLSLstd450InverseSqrt: emit_unary_func_op(result_type, id, args[0], "inversesqrt"); break; // Matrix math case GLSLstd450Determinant: emit_unary_func_op(result_type, id, args[0], "determinant"); break; case GLSLstd450MatrixInverse: emit_unary_func_op(result_type, id, args[0], "inverse"); break; // Lerping case GLSLstd450FMix: case GLSLstd450IMix: { emit_mix_op(result_type, id, args[0], args[1], args[2]); break; } case GLSLstd450Step: emit_binary_func_op(result_type, id, args[0], args[1], "step"); break; case GLSLstd450SmoothStep: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep"); break; // Packing case GLSLstd450Frexp: register_call_out_argument(args[1]); forced_temporaries.insert(id); emit_binary_func_op(result_type, id, args[0], args[1], "frexp"); break; case GLSLstd450FrexpStruct: { forced_temporaries.insert(id); auto &type = get(result_type); emit_uninitialized_temporary_expression(result_type, id); statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ", to_expression(id), ".", to_member_name(type, 1), ");"); break; } case GLSLstd450Ldexp: emit_binary_func_op(result_type, id, args[0], args[1], "ldexp"); break; case GLSLstd450PackSnorm4x8: emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); break; case GLSLstd450PackUnorm4x8: emit_unary_func_op(result_type, id, args[0], "packUnorm4x8"); break; case GLSLstd450PackSnorm2x16: emit_unary_func_op(result_type, id, args[0], "packSnorm2x16"); break; case GLSLstd450PackUnorm2x16: emit_unary_func_op(result_type, id, args[0], "packUnorm2x16"); break; case GLSLstd450PackHalf2x16: emit_unary_func_op(result_type, id, args[0], "packHalf2x16"); break; case GLSLstd450UnpackSnorm4x8: emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8"); break; case GLSLstd450UnpackUnorm4x8: emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8"); break; case GLSLstd450UnpackSnorm2x16: emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16"); break; case GLSLstd450UnpackUnorm2x16: emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16"); break; case GLSLstd450UnpackHalf2x16: emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16"); break; case GLSLstd450PackDouble2x32: emit_unary_func_op(result_type, id, args[0], "packDouble2x32"); break; case GLSLstd450UnpackDouble2x32: emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32"); break; // Vector math case GLSLstd450Length: emit_unary_func_op(result_type, id, args[0], "length"); break; case GLSLstd450Distance: emit_binary_func_op(result_type, id, args[0], args[1], "distance"); break; case GLSLstd450Cross: emit_binary_func_op(result_type, id, args[0], args[1], "cross"); break; case GLSLstd450Normalize: emit_unary_func_op(result_type, id, args[0], "normalize"); break; case GLSLstd450FaceForward: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward"); break; case GLSLstd450Reflect: emit_binary_func_op(result_type, id, args[0], args[1], "reflect"); break; case GLSLstd450Refract: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract"); break; // Bit-fiddling case GLSLstd450FindILsb: // findLSB always returns int. emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type); break; case GLSLstd450FindSMsb: emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type); break; case GLSLstd450FindUMsb: emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type, int_type); // findMSB always returns int. break; // Multisampled varying case GLSLstd450InterpolateAtCentroid: emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid"); break; case GLSLstd450InterpolateAtSample: emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample"); break; case GLSLstd450InterpolateAtOffset: emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset"); break; case GLSLstd450NMin: case GLSLstd450NMax: { emit_nminmax_op(result_type, id, args[0], args[1], op); break; } case GLSLstd450NClamp: { // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. uint32_t &max_id = extra_sub_expressions[id | 0x80000000u]; if (!max_id) max_id = ir.increase_bound_by(1); // Inherit precision qualifiers. ir.meta[max_id] = ir.meta[id]; emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax); emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin); break; } default: statement("// unimplemented GLSL op ", eop); break; } } void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op) { // Need to emulate this call. uint32_t &ids = extra_sub_expressions[id]; if (!ids) { ids = ir.increase_bound_by(5); auto btype = get(result_type); btype.basetype = SPIRType::Boolean; set(ids, btype); } uint32_t btype_id = ids + 0; uint32_t left_nan_id = ids + 1; uint32_t right_nan_id = ids + 2; uint32_t tmp_id = ids + 3; uint32_t mixed_first_id = ids + 4; // Inherit precision qualifiers. ir.meta[tmp_id] = ir.meta[id]; ir.meta[mixed_first_id] = ir.meta[id]; emit_unary_func_op(btype_id, left_nan_id, op0, "isnan"); emit_unary_func_op(btype_id, right_nan_id, op1, "isnan"); emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max"); emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id); emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id); } void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension_internal("GL_AMD_shader_ballot"); enum AMDShaderBallot { SwizzleInvocationsAMD = 1, SwizzleInvocationsMaskedAMD = 2, WriteInvocationAMD = 3, MbcntAMD = 4 }; auto op = static_cast(eop); switch (op) { case SwizzleInvocationsAMD: emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD"); register_control_dependent_expression(id); break; case SwizzleInvocationsMaskedAMD: emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD"); register_control_dependent_expression(id); break; case WriteInvocationAMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD"); register_control_dependent_expression(id); break; case MbcntAMD: emit_unary_func_op(result_type, id, args[0], "mbcntAMD"); register_control_dependent_expression(id); break; default: statement("// unimplemented SPV AMD shader ballot op ", eop); break; } } void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension_internal("GL_AMD_shader_explicit_vertex_parameter"); enum AMDShaderExplicitVertexParameter { InterpolateAtVertexAMD = 1 }; auto op = static_cast(eop); switch (op) { case InterpolateAtVertexAMD: emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD"); break; default: statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop); break; } } void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension_internal("GL_AMD_shader_trinary_minmax"); enum AMDShaderTrinaryMinMax { FMin3AMD = 1, UMin3AMD = 2, SMin3AMD = 3, FMax3AMD = 4, UMax3AMD = 5, SMax3AMD = 6, FMid3AMD = 7, UMid3AMD = 8, SMid3AMD = 9 }; auto op = static_cast(eop); switch (op) { case FMin3AMD: case UMin3AMD: case SMin3AMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3"); break; case FMax3AMD: case UMax3AMD: case SMax3AMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3"); break; case FMid3AMD: case UMid3AMD: case SMid3AMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3"); break; default: statement("// unimplemented SPV AMD shader trinary minmax op ", eop); break; } } void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension_internal("GL_AMD_gcn_shader"); enum AMDGCNShader { CubeFaceIndexAMD = 1, CubeFaceCoordAMD = 2, TimeAMD = 3 }; auto op = static_cast(eop); switch (op) { case CubeFaceIndexAMD: emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD"); break; case CubeFaceCoordAMD: emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD"); break; case TimeAMD: { string expr = "timeAMD()"; emit_op(result_type, id, expr, true); register_control_dependent_expression(id); break; } default: statement("// unimplemented SPV AMD gcn shader op ", eop); break; } } void CompilerGLSL::emit_subgroup_op(const Instruction &i) { const uint32_t *ops = stream(i); auto op = static_cast(i.op); if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics."); switch (op) { case OpGroupNonUniformElect: require_extension_internal("GL_KHR_shader_subgroup_basic"); break; case OpGroupNonUniformBroadcast: case OpGroupNonUniformBroadcastFirst: case OpGroupNonUniformBallot: case OpGroupNonUniformInverseBallot: case OpGroupNonUniformBallotBitExtract: case OpGroupNonUniformBallotBitCount: case OpGroupNonUniformBallotFindLSB: case OpGroupNonUniformBallotFindMSB: require_extension_internal("GL_KHR_shader_subgroup_ballot"); break; case OpGroupNonUniformShuffle: case OpGroupNonUniformShuffleXor: require_extension_internal("GL_KHR_shader_subgroup_shuffle"); break; case OpGroupNonUniformShuffleUp: case OpGroupNonUniformShuffleDown: require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative"); break; case OpGroupNonUniformAll: case OpGroupNonUniformAny: case OpGroupNonUniformAllEqual: require_extension_internal("GL_KHR_shader_subgroup_vote"); break; case OpGroupNonUniformFAdd: case OpGroupNonUniformFMul: case OpGroupNonUniformFMin: case OpGroupNonUniformFMax: case OpGroupNonUniformIAdd: case OpGroupNonUniformIMul: case OpGroupNonUniformSMin: case OpGroupNonUniformSMax: case OpGroupNonUniformUMin: case OpGroupNonUniformUMax: case OpGroupNonUniformBitwiseAnd: case OpGroupNonUniformBitwiseOr: case OpGroupNonUniformBitwiseXor: { auto operation = static_cast(ops[3]); if (operation == GroupOperationClusteredReduce) { require_extension_internal("GL_KHR_shader_subgroup_clustered"); } else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan || operation == GroupOperationReduce) { require_extension_internal("GL_KHR_shader_subgroup_arithmetic"); } else SPIRV_CROSS_THROW("Invalid group operation."); break; } case OpGroupNonUniformQuadSwap: case OpGroupNonUniformQuadBroadcast: require_extension_internal("GL_KHR_shader_subgroup_quad"); break; default: SPIRV_CROSS_THROW("Invalid opcode for subgroup."); } uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto scope = static_cast(get(ops[2]).scalar()); if (scope != ScopeSubgroup) SPIRV_CROSS_THROW("Only subgroup scope is supported."); switch (op) { case OpGroupNonUniformElect: emit_op(result_type, id, "subgroupElect()", true); break; case OpGroupNonUniformBroadcast: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast"); break; case OpGroupNonUniformBroadcastFirst: emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst"); break; case OpGroupNonUniformBallot: emit_unary_func_op(result_type, id, ops[3], "subgroupBallot"); break; case OpGroupNonUniformInverseBallot: emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot"); break; case OpGroupNonUniformBallotBitExtract: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract"); break; case OpGroupNonUniformBallotFindLSB: emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB"); break; case OpGroupNonUniformBallotFindMSB: emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB"); break; case OpGroupNonUniformBallotBitCount: { auto operation = static_cast(ops[3]); if (operation == GroupOperationReduce) emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount"); else if (operation == GroupOperationInclusiveScan) emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount"); else if (operation == GroupOperationExclusiveScan) emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount"); else SPIRV_CROSS_THROW("Invalid BitCount operation."); break; } case OpGroupNonUniformShuffle: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle"); break; case OpGroupNonUniformShuffleXor: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor"); break; case OpGroupNonUniformShuffleUp: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp"); break; case OpGroupNonUniformShuffleDown: emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown"); break; case OpGroupNonUniformAll: emit_unary_func_op(result_type, id, ops[3], "subgroupAll"); break; case OpGroupNonUniformAny: emit_unary_func_op(result_type, id, ops[3], "subgroupAny"); break; case OpGroupNonUniformAllEqual: emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual"); break; // clang-format off #define GLSL_GROUP_OP(op, glsl_op) \ case OpGroupNonUniform##op: \ { \ auto operation = static_cast(ops[3]); \ if (operation == GroupOperationReduce) \ emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \ else if (operation == GroupOperationInclusiveScan) \ emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \ else if (operation == GroupOperationExclusiveScan) \ emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \ else if (operation == GroupOperationClusteredReduce) \ emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \ else \ SPIRV_CROSS_THROW("Invalid group operation."); \ break; \ } GLSL_GROUP_OP(FAdd, Add) GLSL_GROUP_OP(FMul, Mul) GLSL_GROUP_OP(FMin, Min) GLSL_GROUP_OP(FMax, Max) GLSL_GROUP_OP(IAdd, Add) GLSL_GROUP_OP(IMul, Mul) GLSL_GROUP_OP(SMin, Min) GLSL_GROUP_OP(SMax, Max) GLSL_GROUP_OP(UMin, Min) GLSL_GROUP_OP(UMax, Max) GLSL_GROUP_OP(BitwiseAnd, And) GLSL_GROUP_OP(BitwiseOr, Or) GLSL_GROUP_OP(BitwiseXor, Xor) #undef GLSL_GROUP_OP // clang-format on case OpGroupNonUniformQuadSwap: { uint32_t direction = get(ops[4]).scalar(); if (direction == 0) emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal"); else if (direction == 1) emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical"); else if (direction == 2) emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal"); else SPIRV_CROSS_THROW("Invalid quad swap direction."); break; } case OpGroupNonUniformQuadBroadcast: { emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast"); break; } default: SPIRV_CROSS_THROW("Invalid opcode for subgroup."); } register_control_dependent_expression(id); } string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) { // OpBitcast can deal with pointers. if (out_type.pointer || in_type.pointer) return type_to_glsl(out_type); if (out_type.basetype == in_type.basetype) return ""; assert(out_type.basetype != SPIRType::Boolean); assert(in_type.basetype != SPIRType::Boolean); bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type); bool same_size_cast = out_type.width == in_type.width; // Trivial bitcast case, casts between integers. if (integral_cast && same_size_cast) return type_to_glsl(out_type); // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types). if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1) return "unpack8"; else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1) return "pack16"; else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1) return "pack32"; // Floating <-> Integer special casts. Just have to enumerate all cases. :( // 16-bit, 32-bit and 64-bit floats. if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) { if (is_legacy_es()) SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL."); else if (!options.es && options.version < 330) require_extension_internal("GL_ARB_shader_bit_encoding"); return "floatBitsToUint"; } else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) { if (is_legacy_es()) SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL."); else if (!options.es && options.version < 330) require_extension_internal("GL_ARB_shader_bit_encoding"); return "floatBitsToInt"; } else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) { if (is_legacy_es()) SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL."); else if (!options.es && options.version < 330) require_extension_internal("GL_ARB_shader_bit_encoding"); return "uintBitsToFloat"; } else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) { if (is_legacy_es()) SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL."); else if (!options.es && options.version < 330) require_extension_internal("GL_ARB_shader_bit_encoding"); return "intBitsToFloat"; } else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) return "doubleBitsToInt64"; else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) return "doubleBitsToUint64"; else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) return "int64BitsToDouble"; else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) return "uint64BitsToDouble"; else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half) return "float16BitsToInt16"; else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) return "float16BitsToUint16"; else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short) return "int16BitsToFloat16"; else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) return "uint16BitsToFloat16"; // And finally, some even more special purpose casts. if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2) return "packUint2x32"; else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) return "unpackFloat2x16"; else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) return "packFloat2x16"; else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2) return "packInt2x16"; else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1) return "unpackInt2x16"; else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2) return "packUint2x16"; else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) return "unpackUint2x16"; else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4) return "packInt4x16"; else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1) return "unpackInt4x16"; else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4) return "packUint4x16"; else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1) return "unpackUint4x16"; return ""; } string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) { auto op = bitcast_glsl_op(result_type, expression_type(argument)); if (op.empty()) return to_enclosed_unpacked_expression(argument); else return join(op, "(", to_unpacked_expression(argument), ")"); } std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg) { auto expr = to_expression(arg); auto &src_type = expression_type(arg); if (src_type.basetype != target_type) { auto target = src_type; target.basetype = target_type; expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")"); } return expr; } std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, const std::string &expr) { if (target_type.basetype == expr_type) return expr; auto src_type = target_type; src_type.basetype = expr_type; return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")"); } string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) { switch (builtin) { case BuiltInPosition: return "gl_Position"; case BuiltInPointSize: return "gl_PointSize"; case BuiltInClipDistance: return "gl_ClipDistance"; case BuiltInCullDistance: return "gl_CullDistance"; case BuiltInVertexId: if (options.vulkan_semantics) SPIRV_CROSS_THROW( "Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics."); return "gl_VertexID"; case BuiltInInstanceId: if (options.vulkan_semantics) SPIRV_CROSS_THROW( "Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics."); return "gl_InstanceID"; case BuiltInVertexIndex: if (options.vulkan_semantics) return "gl_VertexIndex"; else return "gl_VertexID"; // gl_VertexID already has the base offset applied. case BuiltInInstanceIndex: if (options.vulkan_semantics) return "gl_InstanceIndex"; else if (options.vertex.support_nonzero_base_instance) return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID. else return "gl_InstanceID"; case BuiltInPrimitiveId: if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry) return "gl_PrimitiveIDIn"; else return "gl_PrimitiveID"; case BuiltInInvocationId: return "gl_InvocationID"; case BuiltInLayer: return "gl_Layer"; case BuiltInViewportIndex: return "gl_ViewportIndex"; case BuiltInTessLevelOuter: return "gl_TessLevelOuter"; case BuiltInTessLevelInner: return "gl_TessLevelInner"; case BuiltInTessCoord: return "gl_TessCoord"; case BuiltInFragCoord: return "gl_FragCoord"; case BuiltInPointCoord: return "gl_PointCoord"; case BuiltInFrontFacing: return "gl_FrontFacing"; case BuiltInFragDepth: return "gl_FragDepth"; case BuiltInNumWorkgroups: return "gl_NumWorkGroups"; case BuiltInWorkgroupSize: return "gl_WorkGroupSize"; case BuiltInWorkgroupId: return "gl_WorkGroupID"; case BuiltInLocalInvocationId: return "gl_LocalInvocationID"; case BuiltInGlobalInvocationId: return "gl_GlobalInvocationID"; case BuiltInLocalInvocationIndex: return "gl_LocalInvocationIndex"; case BuiltInHelperInvocation: return "gl_HelperInvocation"; case BuiltInBaseVertex: if (options.es) SPIRV_CROSS_THROW("BaseVertex not supported in ES profile."); if (options.version < 460) { require_extension_internal("GL_ARB_shader_draw_parameters"); return "gl_BaseVertexARB"; } return "gl_BaseVertex"; case BuiltInBaseInstance: if (options.es) SPIRV_CROSS_THROW("BaseInstance not supported in ES profile."); if (options.version < 460) { require_extension_internal("GL_ARB_shader_draw_parameters"); return "gl_BaseInstanceARB"; } return "gl_BaseInstance"; case BuiltInDrawIndex: if (options.es) SPIRV_CROSS_THROW("DrawIndex not supported in ES profile."); if (options.version < 460) { require_extension_internal("GL_ARB_shader_draw_parameters"); return "gl_DrawIDARB"; } return "gl_DrawID"; case BuiltInSampleId: if (options.es && options.version < 320) require_extension_internal("GL_OES_sample_variables"); if (!options.es && options.version < 400) SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400."); return "gl_SampleID"; case BuiltInSampleMask: if (options.es && options.version < 320) require_extension_internal("GL_OES_sample_variables"); if (!options.es && options.version < 400) SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400."); if (storage == StorageClassInput) return "gl_SampleMaskIn"; else return "gl_SampleMask"; case BuiltInSamplePosition: if (options.es && options.version < 320) require_extension_internal("GL_OES_sample_variables"); if (!options.es && options.version < 400) SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400."); return "gl_SamplePosition"; case BuiltInViewIndex: if (options.vulkan_semantics) { require_extension_internal("GL_EXT_multiview"); return "gl_ViewIndex"; } else { require_extension_internal("GL_OVR_multiview2"); return "gl_ViewID_OVR"; } case BuiltInNumSubgroups: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); require_extension_internal("GL_KHR_shader_subgroup_basic"); return "gl_NumSubgroups"; case BuiltInSubgroupId: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); require_extension_internal("GL_KHR_shader_subgroup_basic"); return "gl_SubgroupID"; case BuiltInSubgroupSize: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); require_extension_internal("GL_KHR_shader_subgroup_basic"); return "gl_SubgroupSize"; case BuiltInSubgroupLocalInvocationId: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); require_extension_internal("GL_KHR_shader_subgroup_basic"); return "gl_SubgroupInvocationID"; case BuiltInSubgroupEqMask: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); require_extension_internal("GL_KHR_shader_subgroup_ballot"); return "gl_SubgroupEqMask"; case BuiltInSubgroupGeMask: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); require_extension_internal("GL_KHR_shader_subgroup_ballot"); return "gl_SubgroupGeMask"; case BuiltInSubgroupGtMask: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); require_extension_internal("GL_KHR_shader_subgroup_ballot"); return "gl_SubgroupGtMask"; case BuiltInSubgroupLeMask: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); require_extension_internal("GL_KHR_shader_subgroup_ballot"); return "gl_SubgroupLeMask"; case BuiltInSubgroupLtMask: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup."); require_extension_internal("GL_KHR_shader_subgroup_ballot"); return "gl_SubgroupLtMask"; case BuiltInLaunchIdNV: return "gl_LaunchIDNV"; case BuiltInLaunchSizeNV: return "gl_LaunchSizeNV"; case BuiltInWorldRayOriginNV: return "gl_WorldRayOriginNV"; case BuiltInWorldRayDirectionNV: return "gl_WorldRayDirectionNV"; case BuiltInObjectRayOriginNV: return "gl_ObjectRayOriginNV"; case BuiltInObjectRayDirectionNV: return "gl_ObjectRayDirectionNV"; case BuiltInRayTminNV: return "gl_RayTminNV"; case BuiltInRayTmaxNV: return "gl_RayTmaxNV"; case BuiltInInstanceCustomIndexNV: return "gl_InstanceCustomIndexNV"; case BuiltInObjectToWorldNV: return "gl_ObjectToWorldNV"; case BuiltInWorldToObjectNV: return "gl_WorldToObjectNV"; case BuiltInHitTNV: return "gl_HitTNV"; case BuiltInHitKindNV: return "gl_HitKindNV"; case BuiltInIncomingRayFlagsNV: return "gl_IncomingRayFlagsNV"; case BuiltInBaryCoordNV: { if (options.es && options.version < 320) SPIRV_CROSS_THROW("gl_BaryCoordNV requires ESSL 320."); else if (!options.es && options.version < 450) SPIRV_CROSS_THROW("gl_BaryCoordNV requires GLSL 450."); require_extension_internal("GL_NV_fragment_shader_barycentric"); return "gl_BaryCoordNV"; } case BuiltInBaryCoordNoPerspNV: { if (options.es && options.version < 320) SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires ESSL 320."); else if (!options.es && options.version < 450) SPIRV_CROSS_THROW("gl_BaryCoordNoPerspNV requires GLSL 450."); require_extension_internal("GL_NV_fragment_shader_barycentric"); return "gl_BaryCoordNoPerspNV"; } case BuiltInFragStencilRefEXT: { if (!options.es) { require_extension_internal("GL_ARB_shader_stencil_export"); return "gl_FragStencilRefARB"; } else SPIRV_CROSS_THROW("Stencil export not supported in GLES."); } case BuiltInDeviceIndex: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Need Vulkan semantics for device group support."); require_extension_internal("GL_EXT_device_group"); return "gl_DeviceIndex"; default: return join("gl_BuiltIn_", convert_to_string(builtin)); } } const char *CompilerGLSL::index_to_swizzle(uint32_t index) { switch (index) { case 0: return "x"; case 1: return "y"; case 2: return "z"; case 3: return "w"; default: SPIRV_CROSS_THROW("Swizzle index out of range"); } } string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, AccessChainFlags flags, AccessChainMeta *meta) { string expr; bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0; bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; if (!chain_only) expr = to_enclosed_expression(base, register_expression_read); // Start traversing type hierarchy at the proper non-pointer types, // but keep type_id referencing the original pointer for use below. uint32_t type_id = expression_type_id(base); if (!backend.native_pointers) { if (ptr_chain) SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain."); // Wrapped buffer reference pointer types will need to poke into the internal "value" member before // continuing the access chain. if (should_dereference(base)) { auto &type = get(type_id); expr = dereference_expression(type, expr); } } const auto *type = &get_pointee_type(type_id); bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos; bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base); bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked); uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID); bool is_invariant = has_decoration(base, DecorationInvariant); bool pending_array_enclose = false; bool dimension_flatten = false; const auto append_index = [&](uint32_t index) { expr += "["; // If we are indexing into an array of SSBOs or UBOs, we need to index it with a non-uniform qualifier. bool nonuniform_index = has_decoration(index, DecorationNonUniformEXT) && (has_decoration(type->self, DecorationBlock) || has_decoration(type->self, DecorationBufferBlock)); if (nonuniform_index) { expr += backend.nonuniform_qualifier; expr += "("; } if (index_is_literal) expr += convert_to_string(index); else expr += to_expression(index, register_expression_read); if (nonuniform_index) expr += ")"; expr += "]"; }; for (uint32_t i = 0; i < count; i++) { uint32_t index = indices[i]; // Pointer chains if (ptr_chain && i == 0) { // If we are flattening multidimensional arrays, only create opening bracket on first // array index. if (options.flatten_multidimensional_arrays) { dimension_flatten = type->array.size() >= 1; pending_array_enclose = dimension_flatten; if (pending_array_enclose) expr += "["; } if (options.flatten_multidimensional_arrays && dimension_flatten) { // If we are flattening multidimensional arrays, do manual stride computation. if (index_is_literal) expr += convert_to_string(index); else expr += to_enclosed_expression(index, register_expression_read); for (auto j = uint32_t(type->array.size()); j; j--) { expr += " * "; expr += enclose_expression(to_array_size(*type, j - 1)); } if (type->array.empty()) pending_array_enclose = false; else expr += " + "; if (!pending_array_enclose) expr += "]"; } else { append_index(index); } if (type->basetype == SPIRType::ControlPointArray) { type_id = type->parent_type; type = &get(type_id); } access_chain_is_arrayed = true; } // Arrays else if (!type->array.empty()) { // If we are flattening multidimensional arrays, only create opening bracket on first // array index. if (options.flatten_multidimensional_arrays && !pending_array_enclose) { dimension_flatten = type->array.size() > 1; pending_array_enclose = dimension_flatten; if (pending_array_enclose) expr += "["; } assert(type->parent_type); auto *var = maybe_get(base); if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) && !has_decoration(type->self, DecorationBlock)) { // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared. // Normally, these variables live in blocks when compiled from GLSL, // but HLSL seems to just emit straight arrays here. // We must pretend this access goes through gl_in/gl_out arrays // to be able to access certain builtins as arrays. auto builtin = ir.meta[base].decoration.builtin_type; switch (builtin) { // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. // case BuiltInClipDistance: case BuiltInPosition: case BuiltInPointSize: if (var->storage == StorageClassInput) expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr); else if (var->storage == StorageClassOutput) expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr); else append_index(index); break; default: append_index(index); break; } } else if (options.flatten_multidimensional_arrays && dimension_flatten) { // If we are flattening multidimensional arrays, do manual stride computation. auto &parent_type = get(type->parent_type); if (index_is_literal) expr += convert_to_string(index); else expr += to_enclosed_expression(index, register_expression_read); for (auto j = uint32_t(parent_type.array.size()); j; j--) { expr += " * "; expr += enclose_expression(to_array_size(parent_type, j - 1)); } if (parent_type.array.empty()) pending_array_enclose = false; else expr += " + "; if (!pending_array_enclose) expr += "]"; } else { append_index(index); } type_id = type->parent_type; type = &get(type_id); access_chain_is_arrayed = true; } // For structs, the index refers to a constant, which indexes into the members. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. else if (type->basetype == SPIRType::Struct) { if (!index_is_literal) index = get(index).scalar(); if (index >= type->member_types.size()) SPIRV_CROSS_THROW("Member index is out of bounds!"); BuiltIn builtin; if (is_member_builtin(*type, index, &builtin)) { if (access_chain_is_arrayed) { expr += "."; expr += builtin_to_glsl(builtin, type->storage); } else expr = builtin_to_glsl(builtin, type->storage); } else { // If the member has a qualified name, use it as the entire chain string qual_mbr_name = get_member_qualified_name(type_id, index); if (!qual_mbr_name.empty()) expr = qual_mbr_name; else expr += to_member_reference(base, *type, index, ptr_chain); } if (has_member_decoration(type->self, index, DecorationInvariant)) is_invariant = true; is_packed = member_is_packed_physical_type(*type, index); if (member_is_remapped_physical_type(*type, index)) physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID); else physical_type = 0; row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); type = &get(type->member_types[index]); } // Matrix -> Vector else if (type->columns > 1) { // If we have a row-major matrix here, we need to defer any transpose in case this access chain // is used to store a column. We can resolve it right here and now if we access a scalar directly, // by flipping indexing order of the matrix. expr += "["; if (index_is_literal) expr += convert_to_string(index); else expr += to_expression(index, register_expression_read); expr += "]"; type_id = type->parent_type; type = &get(type_id); } // Vector -> Scalar else if (type->vecsize > 1) { string deferred_index; if (row_major_matrix_needs_conversion) { // Flip indexing order. auto column_index = expr.find_last_of('['); if (column_index != string::npos) { deferred_index = expr.substr(column_index); expr.resize(column_index); } } if (index_is_literal && !is_packed && !row_major_matrix_needs_conversion) { expr += "."; expr += index_to_swizzle(index); } else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion) { auto &c = get(index); expr += "."; expr += index_to_swizzle(c.scalar()); } else if (index_is_literal) { // For packed vectors, we can only access them as an array, not by swizzle. expr += join("[", index, "]"); } else { expr += "["; expr += to_expression(index, register_expression_read); expr += "]"; } expr += deferred_index; row_major_matrix_needs_conversion = false; is_packed = false; physical_type = 0; type_id = type->parent_type; type = &get(type_id); } else if (!backend.allow_truncated_access_chain) SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); } if (pending_array_enclose) { SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, " "but the access chain was terminated in the middle of a multidimensional array. " "This is not supported."); } if (meta) { meta->need_transpose = row_major_matrix_needs_conversion; meta->storage_is_packed = is_packed; meta->storage_is_invariant = is_invariant; meta->storage_physical_type = physical_type; } return expr; } string CompilerGLSL::to_flattened_struct_member(const SPIRVariable &var, uint32_t index) { auto &type = get(var.basetype); return sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, index))); } string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, AccessChainMeta *meta, bool ptr_chain) { if (flattened_buffer_blocks.count(base)) { uint32_t matrix_stride = 0; bool need_transpose = false; flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride, ptr_chain); if (meta) { meta->need_transpose = target_type.columns > 1 && need_transpose; meta->storage_is_packed = false; } return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose); } else if (flattened_structs.count(base) && count > 0) { AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; if (ptr_chain) flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1); if (meta) { meta->need_transpose = false; meta->storage_is_packed = false; } return sanitize_underscores(join(to_name(base), "_", chain)); } else { AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; if (ptr_chain) flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; return access_chain_internal(base, indices, count, flags, meta); } } string CompilerGLSL::load_flattened_struct(SPIRVariable &var) { auto expr = type_to_glsl_constructor(get(var.basetype)); expr += '('; auto &type = get(var.basetype); for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) { if (i) expr += ", "; // Flatten the varyings. // Apply name transformation for flattened I/O blocks. expr += to_flattened_struct_member(var, i); } expr += ')'; return expr; } void CompilerGLSL::store_flattened_struct(SPIRVariable &var, uint32_t value) { // We're trying to store a structure which has been flattened. // Need to copy members one by one. auto rhs = to_expression(value); // Store result locally. // Since we're declaring a variable potentially multiple times here, // store the variable in an isolated scope. begin_scope(); statement(variable_decl_function_local(var), " = ", rhs, ";"); auto &type = get(var.basetype); for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) { // Flatten the varyings. // Apply name transformation for flattened I/O blocks. auto lhs = sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, i))); rhs = join(to_name(var.self), ".", to_member_name(type, i)); statement(lhs, " = ", rhs, ";"); } end_scope(); } std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, bool need_transpose) { if (!target_type.array.empty()) SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened"); else if (target_type.basetype == SPIRType::Struct) return flattened_access_chain_struct(base, indices, count, target_type, offset); else if (target_type.columns > 1) return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose); else return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose); } std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset) { std::string expr; expr += type_to_glsl_constructor(target_type); expr += "("; for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i) { if (i != 0) expr += ", "; const SPIRType &member_type = get(target_type.member_types[i]); uint32_t member_offset = type_struct_member_offset(target_type, i); // The access chain terminates at the struct, so we need to find matrix strides and row-major information // ahead of time. bool need_transpose = false; uint32_t matrix_stride = 0; if (member_type.columns > 1) { need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor); matrix_stride = type_struct_member_matrix_stride(target_type, i); } auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride, need_transpose); // Cannot forward transpositions, so resolve them here. if (need_transpose) expr += convert_row_major_matrix(tmp, member_type, 0, false); else expr += tmp; } expr += ")"; return expr; } std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, bool need_transpose) { assert(matrix_stride); SPIRType tmp_type = target_type; if (need_transpose) swap(tmp_type.vecsize, tmp_type.columns); std::string expr; expr += type_to_glsl_constructor(tmp_type); expr += "("; for (uint32_t i = 0; i < tmp_type.columns; i++) { if (i != 0) expr += ", "; expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride, /* need_transpose= */ false); } expr += ")"; return expr; } std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, bool need_transpose) { auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16); auto buffer_name = to_name(expression_type(base).self); if (need_transpose) { std::string expr; if (target_type.vecsize > 1) { expr += type_to_glsl_constructor(target_type); expr += "("; } for (uint32_t i = 0; i < target_type.vecsize; ++i) { if (i != 0) expr += ", "; uint32_t component_offset = result.second + i * matrix_stride; assert(component_offset % (target_type.width / 8) == 0); uint32_t index = component_offset / (target_type.width / 8); expr += buffer_name; expr += "["; expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + expr += convert_to_string(index / 4); expr += "]"; expr += vector_swizzle(1, index % 4); } if (target_type.vecsize > 1) { expr += ")"; } return expr; } else { assert(result.second % (target_type.width / 8) == 0); uint32_t index = result.second / (target_type.width / 8); std::string expr; expr += buffer_name; expr += "["; expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + expr += convert_to_string(index / 4); expr += "]"; expr += vector_swizzle(target_type.vecsize, index % 4); return expr; } } std::pair CompilerGLSL::flattened_access_chain_offset( const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride, bool *need_transpose, uint32_t *out_matrix_stride, bool ptr_chain) { // Start traversing type hierarchy at the proper non-pointer types. const auto *type = &get_pointee_type(basetype); // This holds the type of the current pointer which we are traversing through. // We always start out from a struct type which is the block. // This is primarily used to reflect the array strides and matrix strides later. // For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set // accordingly as members of structs are accessed. assert(type->basetype == SPIRType::Struct); uint32_t type_id = 0; std::string expr; // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; for (uint32_t i = 0; i < count; i++) { uint32_t index = indices[i]; // Pointers if (ptr_chain && i == 0) { // Here, the pointer type will be decorated with an array stride. uint32_t array_stride = get_decoration(basetype.self, DecorationArrayStride); if (!array_stride) SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); auto *constant = maybe_get(index); if (constant) { // Constant array access. offset += constant->scalar() * array_stride; } else { // Dynamic array access. if (array_stride % word_stride) { SPIRV_CROSS_THROW( "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. " "Likely culprit here is a float or vec2 array inside a push constant block which is std430. " "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index); expr += " * "; expr += convert_to_string(array_stride / word_stride); expr += " + "; } // Type ID is unchanged. } // Arrays else if (!type->array.empty()) { // Here, the type_id will be a type ID for the array type itself. uint32_t array_stride = get_decoration(type_id, DecorationArrayStride); if (!array_stride) SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); auto *constant = maybe_get(index); if (constant) { // Constant array access. offset += constant->scalar() * array_stride; } else { // Dynamic array access. if (array_stride % word_stride) { SPIRV_CROSS_THROW( "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. " "Likely culprit here is a float or vec2 array inside a push constant block which is std430. " "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index, false); expr += " * "; expr += convert_to_string(array_stride / word_stride); expr += " + "; } uint32_t parent_type = type->parent_type; type = &get(parent_type); type_id = parent_type; // Type ID now refers to the array type with one less dimension. } // For structs, the index refers to a constant, which indexes into the members. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. else if (type->basetype == SPIRType::Struct) { index = get(index).scalar(); if (index >= type->member_types.size()) SPIRV_CROSS_THROW("Member index is out of bounds!"); offset += type_struct_member_offset(*type, index); type_id = type->member_types[index]; auto &struct_type = *type; type = &get(type->member_types[index]); if (type->columns > 1) { matrix_stride = type_struct_member_matrix_stride(struct_type, index); row_major_matrix_needs_conversion = combined_decoration_for_member(struct_type, index).get(DecorationRowMajor); } else row_major_matrix_needs_conversion = false; } // Matrix -> Vector else if (type->columns > 1) { auto *constant = maybe_get(index); if (constant) { index = get(index).scalar(); offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); } else { uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; // Dynamic array access. if (indexing_stride % word_stride) { SPIRV_CROSS_THROW( "Matrix stride for dynamic indexing must be divisible by the size of a 4-component vector. " "Likely culprit here is a row-major matrix being accessed dynamically. " "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index, false); expr += " * "; expr += convert_to_string(indexing_stride / word_stride); expr += " + "; } uint32_t parent_type = type->parent_type; type = &get(type->parent_type); type_id = parent_type; } // Vector -> Scalar else if (type->vecsize > 1) { auto *constant = maybe_get(index); if (constant) { index = get(index).scalar(); offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); } else { uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); // Dynamic array access. if (indexing_stride % word_stride) { SPIRV_CROSS_THROW( "Stride for dynamic vector indexing must be divisible by the size of a 4-component vector. " "This cannot be flattened in legacy targets."); } expr += to_enclosed_expression(index, false); expr += " * "; expr += convert_to_string(indexing_stride / word_stride); expr += " + "; } uint32_t parent_type = type->parent_type; type = &get(type->parent_type); type_id = parent_type; } else SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); } if (need_transpose) *need_transpose = row_major_matrix_needs_conversion; if (out_matrix_stride) *out_matrix_stride = matrix_stride; return std::make_pair(expr, offset); } bool CompilerGLSL::should_dereference(uint32_t id) { const auto &type = expression_type(id); // Non-pointer expressions don't need to be dereferenced. if (!type.pointer) return false; // Handles shouldn't be dereferenced either. if (!expression_is_lvalue(id)) return false; // If id is a variable but not a phi variable, we should not dereference it. if (auto *var = maybe_get(id)) return var->phi_variable; // If id is an access chain, we should not dereference it. if (auto *expr = maybe_get(id)) return !expr->access_chain; // Otherwise, we should dereference this pointer expression. return true; } bool CompilerGLSL::should_forward(uint32_t id) { // If id is a variable we will try to forward it regardless of force_temporary check below // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL auto *var = maybe_get(id); if (var && var->forwardable) return true; // For debugging emit temporary variables for all expressions if (options.force_temporary) return false; // Immutable expression can always be forwarded. if (is_immutable(id)) return true; return false; } void CompilerGLSL::track_expression_read(uint32_t id) { switch (ir.ids[id].get_type()) { case TypeExpression: { auto &e = get(id); for (auto implied_read : e.implied_read_expressions) track_expression_read(implied_read); break; } case TypeAccessChain: { auto &e = get(id); for (auto implied_read : e.implied_read_expressions) track_expression_read(implied_read); break; } default: break; } // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. if (expression_is_forwarded(id)) { auto &v = expression_usage_counts[id]; v++; if (v >= 2) { //if (v == 2) // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); forced_temporaries.insert(id); // Force a recompile after this pass to avoid forwarding this variable. force_recompile(); } } } bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) { if (forced_temporaries.find(id) != end(forced_temporaries)) return false; for (uint32_t i = 0; i < num_args; i++) if (!should_forward(args[i])) return false; // We need to forward globals as well. if (!pure) { for (auto global : global_variables) if (!should_forward(global)) return false; for (auto aliased : aliased_variables) if (!should_forward(aliased)) return false; } return true; } void CompilerGLSL::register_impure_function_call() { // Impure functions can modify globals and aliased variables, so invalidate them as well. for (auto global : global_variables) flush_dependees(get(global)); for (auto aliased : aliased_variables) flush_dependees(get(aliased)); } void CompilerGLSL::register_call_out_argument(uint32_t id) { register_write(id); auto *var = maybe_get(id); if (var) flush_variable_declaration(var->self); } string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) { // These variables are always function local, // so make sure we emit the variable without storage qualifiers. // Some backends will inject custom variables locally in a function // with a storage qualifier which is not function-local. auto old_storage = var.storage; var.storage = StorageClassFunction; auto expr = variable_decl(var); var.storage = old_storage; return expr; } void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) { if (var.allocate_temporary_copy) { auto &type = get(var.basetype); auto &flags = get_decoration_bitset(var.self); statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); } } void CompilerGLSL::flush_variable_declaration(uint32_t id) { auto *var = maybe_get(id); if (var && var->deferred_declaration) { statement(variable_decl_function_local(*var), ";"); emit_variable_temporary_copies(*var); var->deferred_declaration = false; } } bool CompilerGLSL::remove_duplicate_swizzle(string &op) { auto pos = op.find_last_of('.'); if (pos == string::npos || pos == 0) return false; string final_swiz = op.substr(pos + 1, string::npos); if (backend.swizzle_is_function) { if (final_swiz.size() < 2) return false; if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") final_swiz.erase(final_swiz.size() - 2, string::npos); else return false; } // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. // If so, and previous swizzle is of same length, // we can drop the final swizzle altogether. for (uint32_t i = 0; i < final_swiz.size(); i++) { static const char expected[] = { 'x', 'y', 'z', 'w' }; if (i >= 4 || final_swiz[i] != expected[i]) return false; } auto prevpos = op.find_last_of('.', pos - 1); if (prevpos == string::npos) return false; prevpos++; // Make sure there are only swizzles here ... for (auto i = prevpos; i < pos; i++) { if (op[i] < 'w' || op[i] > 'z') { // If swizzles are foo.xyz() like in C++ backend for example, check for that. if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') break; return false; } } // If original swizzle is large enough, just carve out the components we need. // E.g. foobar.wyx.xy will turn into foobar.wy. if (pos - prevpos >= final_swiz.size()) { op.erase(prevpos + final_swiz.size(), string::npos); // Add back the function call ... if (backend.swizzle_is_function) op += "()"; } return true; } // Optimizes away vector swizzles where we have something like // vec3 foo; // foo.xyz <-- swizzle expression does nothing. // This is a very common pattern after OpCompositeCombine. bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) { auto pos = op.find_last_of('.'); if (pos == string::npos || pos == 0) return false; string final_swiz = op.substr(pos + 1, string::npos); if (backend.swizzle_is_function) { if (final_swiz.size() < 2) return false; if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") final_swiz.erase(final_swiz.size() - 2, string::npos); else return false; } // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. // If so, and previous swizzle is of same length, // we can drop the final swizzle altogether. for (uint32_t i = 0; i < final_swiz.size(); i++) { static const char expected[] = { 'x', 'y', 'z', 'w' }; if (i >= 4 || final_swiz[i] != expected[i]) return false; } auto &type = expression_type(base); // Sanity checking ... assert(type.columns == 1 && type.array.empty()); if (type.vecsize == final_swiz.size()) op.erase(pos, string::npos); return true; } string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) { uint32_t base = 0; string op; string subop; // Can only merge swizzles for vectors. auto &type = get(return_type); bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; bool swizzle_optimization = false; for (uint32_t i = 0; i < length; i++) { auto *e = maybe_get(elems[i]); // If we're merging another scalar which belongs to the same base // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base) { // Only supposed to be used for vector swizzle -> scalar. assert(!e->expression.empty() && e->expression.front() == '.'); subop += e->expression.substr(1, string::npos); swizzle_optimization = true; } else { // We'll likely end up with duplicated swizzles, e.g. // foobar.xyz.xyz from patterns like // OpVectorShuffle // OpCompositeExtract x 3 // OpCompositeConstruct 3x + other scalar. // Just modify op in-place. if (swizzle_optimization) { if (backend.swizzle_is_function) subop += "()"; // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. // Essentially, we can only remove one set of swizzles, since that's what we have control over ... // Case 1: // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. // Case 2: // foo.xyz: Duplicate swizzle won't kick in. // If foo is vec3, we can remove xyz, giving just foo. if (!remove_duplicate_swizzle(subop)) remove_unity_swizzle(base, subop); // Strips away redundant parens if we created them during component extraction. strip_enclosed_expression(subop); swizzle_optimization = false; op += subop; } else op += subop; if (i) op += ", "; subop = to_composite_constructor_expression(elems[i]); } base = e ? e->base_expression : 0; } if (swizzle_optimization) { if (backend.swizzle_is_function) subop += "()"; if (!remove_duplicate_swizzle(subop)) remove_unity_swizzle(base, subop); // Strips away redundant parens if we created them during component extraction. strip_enclosed_expression(subop); } op += subop; return op; } bool CompilerGLSL::skip_argument(uint32_t id) const { if (!combined_image_samplers.empty() || !options.vulkan_semantics) { auto &type = expression_type(id); if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) return true; } return false; } bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs) { // Do this with strings because we have a very clear pattern we can check for and it avoids // adding lots of special cases to the code emission. if (rhs.size() < lhs.size() + 3) return false; // Do not optimize matrices. They are a bit awkward to reason about in general // (in which order does operation happen?), and it does not work on MSL anyways. if (type.vecsize > 1 && type.columns > 1) return false; auto index = rhs.find(lhs); if (index != 0) return false; // TODO: Shift operators, but it's not important for now. auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1); if (op != lhs.size() + 1) return false; // Check that the op is followed by space. This excludes && and ||. if (rhs[op + 1] != ' ') return false; char bop = rhs[op]; auto expr = rhs.substr(lhs.size() + 3); // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. // Find some common patterns which are equivalent. if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)")) statement(lhs, bop, bop, ";"); else statement(lhs, " ", bop, "= ", expr, ";"); return true; } void CompilerGLSL::register_control_dependent_expression(uint32_t expr) { if (forwarded_temporaries.find(expr) == end(forwarded_temporaries)) return; assert(current_emitting_block); current_emitting_block->invalidate_expressions.push_back(expr); } void CompilerGLSL::emit_block_instructions(SPIRBlock &block) { current_emitting_block = █ for (auto &op : block.ops) emit_instruction(op); current_emitting_block = nullptr; } void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr) { if (forwarded_temporaries.count(expr.self)) { forced_temporaries.insert(expr.self); force_recompile(); } for (auto &dependent : expr.expression_dependencies) disallow_forwarding_in_expression_chain(get(dependent)); } void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id) { // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary // in one translation unit, but not another, e.g. due to multiple use of an expression. // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent // expressions to be temporaries. // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough // for all reasonable uses of invariant. if (!has_decoration(store_id, DecorationInvariant)) return; auto *expr = maybe_get(value_id); if (!expr) return; disallow_forwarding_in_expression_chain(*expr); } void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) { auto rhs = to_pointer_expression(rhs_expression); // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. if (!rhs.empty()) { handle_store_to_invariant_variable(lhs_expression, rhs_expression); auto lhs = to_dereferenced_expression(lhs_expression); // We might need to bitcast in order to store to a builtin. bitcast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression)); // Tries to optimize assignments like " = op expr". // While this is purely cosmetic, this is important for legacy ESSL where loop // variable increments must be in either i++ or i += const-expr. // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) statement(lhs, " = ", rhs, ";"); register_write(lhs_expression); } } uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const { if (instr.length < 3) return 32; auto *ops = stream(instr); switch (instr.op) { case OpSConvert: case OpConvertSToF: case OpUConvert: case OpConvertUToF: case OpIEqual: case OpINotEqual: case OpSLessThan: case OpSLessThanEqual: case OpSGreaterThan: case OpSGreaterThanEqual: return expression_type(ops[2]).width; default: { // We can look at result type which is more robust. auto *type = maybe_get(ops[0]); if (type && type_is_integral(*type)) return type->width; else return 32; } } } uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const { if (length < 1) return 32; switch (op) { case GLSLstd450SAbs: case GLSLstd450SSign: case GLSLstd450UMin: case GLSLstd450SMin: case GLSLstd450UMax: case GLSLstd450SMax: case GLSLstd450UClamp: case GLSLstd450SClamp: case GLSLstd450FindSMsb: case GLSLstd450FindUMsb: return expression_type(ops[0]).width; default: { // We don't need to care about other opcodes, just return 32. return 32; } } } void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); auto opcode = static_cast(instruction.op); uint32_t length = instruction.length; #define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) #define GLSL_BOP_CAST(op, type) \ emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) #define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) #define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) #define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) #define GLSL_BFOP_CAST(op, type) \ emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) #define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) #define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) // If we need to do implicit bitcasts, make sure we do it with the correct type. uint32_t integer_width = get_integer_width_for_instruction(instruction); auto int_type = to_signed_basetype(integer_width); auto uint_type = to_unsigned_basetype(integer_width); switch (opcode) { // Dealing with memory case OpLoad: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t ptr = ops[2]; flush_variable_declaration(ptr); // If we're loading from memory that cannot be changed by the shader, // just forward the expression directly to avoid needless temporaries. // If an expression is mutable and forwardable, we speculate that it is immutable. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); // If loading a non-native row-major matrix, mark the expression as need_transpose. bool need_transpose = false; bool old_need_transpose = false; auto *ptr_expression = maybe_get(ptr); if (ptr_expression && ptr_expression->need_transpose) { old_need_transpose = true; ptr_expression->need_transpose = false; need_transpose = true; } else if (is_non_native_row_major_matrix(ptr)) need_transpose = true; // If we are forwarding this load, // don't register the read to access chain here, defer that to when we actually use the expression, // using the add_implied_read_expression mechanism. auto expr = to_dereferenced_expression(ptr, !forward); // We might need to bitcast in order to load from a builtin. bitcast_from_builtin_load(ptr, expr, get(result_type)); // We might be trying to load a gl_Position[N], where we should be // doing float4[](gl_in[i].gl_Position, ...) instead. // Similar workarounds are required for input arrays in tessellation. unroll_array_from_complex_load(id, ptr, expr); auto &type = get(result_type); // Shouldn't need to check for ID, but current glslang codegen requires it in some cases // when loading Image/Sampler descriptors. It does not hurt to check ID as well. if (has_decoration(id, DecorationNonUniformEXT) || has_decoration(ptr, DecorationNonUniformEXT)) { propagate_nonuniform_qualifier(ptr); convert_non_uniform_expression(type, expr); } if (ptr_expression) ptr_expression->need_transpose = old_need_transpose; // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. // However, if we try to load a complex, composite object from a flattened buffer, // we should avoid emitting the same code over and over and lower the result to a temporary. bool usage_tracking = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0 && (type.basetype == SPIRType::Struct || (type.columns > 1)); SPIRExpression *e = nullptr; if (!backend.array_is_value_type && !type.array.empty() && !forward) { // Complicated load case where we need to make a copy of ptr, but we cannot, because // it is an array, and our backend does not support arrays as value types. // Emit the temporary, and copy it explicitly. e = &emit_uninitialized_temporary_expression(result_type, id); emit_array_copy(to_expression(id), ptr); } else e = &emit_op(result_type, id, expr, forward, !usage_tracking); e->need_transpose = need_transpose; register_read(id, ptr, forward); // Pass through whether the result is of a packed type and the physical type ID. if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked)) set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)) { set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)); } inherit_expression_dependencies(id, ptr); if (forward) add_implied_read_expression(*e, ptr); break; } case OpInBoundsAccessChain: case OpAccessChain: case OpPtrAccessChain: { auto *var = maybe_get(ops[2]); if (var) flush_variable_declaration(var->self); // If the base is immutable, the access chain pointer must also be. // If an expression is mutable and forwardable, we speculate that it is immutable. AccessChainMeta meta; bool ptr_chain = opcode == OpPtrAccessChain; auto e = access_chain(ops[2], &ops[3], length - 3, get(ops[0]), &meta, ptr_chain); auto &expr = set(ops[1], move(e), ops[0], should_forward(ops[2])); auto *backing_variable = maybe_get_backing_variable(ops[2]); expr.loaded_from = backing_variable ? backing_variable->self : ops[2]; expr.need_transpose = meta.need_transpose; expr.access_chain = true; // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. if (meta.storage_is_packed) set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked); if (meta.storage_physical_type != 0) set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); if (meta.storage_is_invariant) set_decoration(ops[1], DecorationInvariant); for (uint32_t i = 2; i < length; i++) { inherit_expression_dependencies(ops[1], ops[i]); add_implied_read_expression(expr, ops[i]); } break; } case OpStore: { auto *var = maybe_get(ops[0]); if (has_decoration(ops[0], DecorationNonUniformEXT)) propagate_nonuniform_qualifier(ops[0]); if (var && var->statically_assigned) var->static_expression = ops[1]; else if (var && var->loop_variable && !var->loop_variable_enable) var->static_expression = ops[1]; else if (var && var->remapped_variable) { // Skip the write. } else if (var && flattened_structs.count(ops[0])) { store_flattened_struct(*var, ops[1]); register_write(ops[0]); } else { emit_store_statement(ops[0], ops[1]); } // Storing a pointer results in a variable pointer, so we must conservatively assume // we can write through it. if (expression_type(ops[1]).pointer) register_write(ops[1]); break; } case OpArrayLength: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); set(id, join(type_to_glsl(get(result_type)), "(", e, ".length())"), result_type, true); break; } // Function calls case OpFunctionCall: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t func = ops[2]; const auto *arg = &ops[3]; length -= 3; auto &callee = get(func); auto &return_type = get(callee.return_type); bool pure = function_is_pure(callee); bool callee_has_out_variables = false; bool emit_return_value_as_argument = false; // Invalidate out variables passed to functions since they can be OpStore'd to. for (uint32_t i = 0; i < length; i++) { if (callee.arguments[i].write_count) { register_call_out_argument(arg[i]); callee_has_out_variables = true; } flush_variable_declaration(arg[i]); } if (!return_type.array.empty() && !backend.can_return_array) { callee_has_out_variables = true; emit_return_value_as_argument = true; } if (!pure) register_impure_function_call(); string funexpr; SmallVector arglist; funexpr += to_name(func) + "("; if (emit_return_value_as_argument) { statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";"); arglist.push_back(to_name(id)); } for (uint32_t i = 0; i < length; i++) { // Do not pass in separate images or samplers if we're remapping // to combined image samplers. if (skip_argument(arg[i])) continue; arglist.push_back(to_func_call_arg(arg[i])); } for (auto &combined : callee.combined_parameters) { uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id]; uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id]; arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); } append_global_func_args(callee, length, arglist); funexpr += merge(arglist); funexpr += ")"; // Check for function call constraints. check_function_call_constraints(arg, length); if (return_type.basetype != SPIRType::Void) { // If the function actually writes to an out variable, // take the conservative route and do not forward. // The problem is that we might not read the function // result (and emit the function) before an out variable // is read (common case when return value is ignored! // In order to avoid start tracking invalid variables, // just avoid the forwarding problem altogether. bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure && (forced_temporaries.find(id) == end(forced_temporaries)); if (emit_return_value_as_argument) { statement(funexpr, ";"); set(id, to_name(id), result_type, true); } else emit_op(result_type, id, funexpr, forward); // Function calls are implicit loads from all variables in question. // Set dependencies for them. for (uint32_t i = 0; i < length; i++) register_read(id, arg[i], forward); // If we're going to forward the temporary result, // put dependencies on every variable that must not change. if (forward) register_global_read_dependencies(callee, id); } else statement(funexpr, ";"); break; } // Composite munging case OpCompositeConstruct: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; const auto *const elems = &ops[2]; length -= 2; bool forward = true; for (uint32_t i = 0; i < length; i++) forward = forward && should_forward(elems[i]); auto &out_type = get(result_type); auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr; // Only splat if we have vector constructors. // Arrays and structs must be initialized properly in full. bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; bool splat = false; bool swizzle_splat = false; if (in_type) { splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting; swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar; if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type)) { // Cannot swizzle literal integers as a special case. swizzle_splat = false; } } if (splat || swizzle_splat) { uint32_t input = elems[0]; for (uint32_t i = 0; i < length; i++) { if (input != elems[i]) { splat = false; swizzle_splat = false; } } } if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) forward = false; if (!out_type.array.empty() && !backend.can_declare_arrays_inline) forward = false; if (type_is_empty(out_type) && !backend.supports_empty_struct) forward = false; string constructor_op; if (backend.use_initializer_list && composite) { // Only use this path if we are building composites. // This path cannot be used for arithmetic. if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) constructor_op += type_to_glsl_constructor(get(result_type)); constructor_op += "{ "; if (type_is_empty(out_type) && !backend.supports_empty_struct) constructor_op += "0"; else if (splat) constructor_op += to_expression(elems[0]); else constructor_op += build_composite_combiner(result_type, elems, length); constructor_op += " }"; } else if (swizzle_splat && !composite) { constructor_op = remap_swizzle(get(result_type), 1, to_expression(elems[0])); } else { constructor_op = type_to_glsl_constructor(get(result_type)) + "("; if (type_is_empty(out_type) && !backend.supports_empty_struct) constructor_op += "0"; else if (splat) constructor_op += to_expression(elems[0]); else constructor_op += build_composite_combiner(result_type, elems, length); constructor_op += ")"; } if (!constructor_op.empty()) { emit_op(result_type, id, constructor_op, forward); for (uint32_t i = 0; i < length; i++) inherit_expression_dependencies(id, elems[i]); } break; } case OpVectorInsertDynamic: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t vec = ops[2]; uint32_t comp = ops[3]; uint32_t index = ops[4]; flush_variable_declaration(vec); // Make a copy, then use access chain to store the variable. statement(declare_temporary(result_type, id), to_expression(vec), ";"); set(id, to_name(id), result_type, true); auto chain = access_chain_internal(id, &index, 1, 0, nullptr); statement(chain, " = ", to_expression(comp), ";"); break; } case OpVectorExtractDynamic: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr); emit_op(result_type, id, expr, should_forward(ops[2])); inherit_expression_dependencies(id, ops[2]); inherit_expression_dependencies(id, ops[3]); break; } case OpCompositeExtract: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; length -= 3; auto &type = get(result_type); // We can only split the expression here if our expression is forwarded as a temporary. bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries); // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case. auto &composite_type = expression_type(ops[2]); if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty()) allow_base_expression = false; // Packed expressions cannot be split up. if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked)) allow_base_expression = false; AccessChainMeta meta; SPIRExpression *e = nullptr; // Only apply this optimization if result is scalar. if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) { // We want to split the access chain from the base. // This is so we can later combine different CompositeExtract results // with CompositeConstruct without emitting code like // // vec3 temp = texture(...).xyz // vec4(temp.x, temp.y, temp.z, 1.0). // // when we actually wanted to emit this // vec4(texture(...).xyz, 1.0). // // Including the base will prevent this and would trigger multiple reads // from expression causing it to be forced to an actual temporary in GLSL. auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT, &meta); e = &emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2])); inherit_expression_dependencies(id, ops[2]); e->base_expression = ops[2]; } else { auto expr = access_chain_internal(ops[2], &ops[3], length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); e = &emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2])); inherit_expression_dependencies(id, ops[2]); } // Pass through some meta information to the loaded expression. // We can still end up loading a buffer type to a variable, then CompositeExtract from it // instead of loading everything through an access chain. e->need_transpose = meta.need_transpose; if (meta.storage_is_packed) set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); if (meta.storage_physical_type != 0) set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); if (meta.storage_is_invariant) set_decoration(id, DecorationInvariant); break; } case OpCompositeInsert: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t obj = ops[2]; uint32_t composite = ops[3]; const auto *elems = &ops[4]; length -= 4; flush_variable_declaration(composite); // Make a copy, then use access chain to store the variable. statement(declare_temporary(result_type, id), to_expression(composite), ";"); set(id, to_name(id), result_type, true); auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); statement(chain, " = ", to_expression(obj), ";"); break; } case OpCopyMemory: { uint32_t lhs = ops[0]; uint32_t rhs = ops[1]; if (lhs != rhs) { flush_variable_declaration(lhs); flush_variable_declaration(rhs); statement(to_expression(lhs), " = ", to_expression(rhs), ";"); register_write(lhs); } break; } case OpCopyObject: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t rhs = ops[2]; bool pointer = get(result_type).pointer; auto *chain = maybe_get(rhs); if (chain) { // Cannot lower to a SPIRExpression, just copy the object. auto &e = set(id, *chain); e.self = id; } else if (expression_is_lvalue(rhs) && !pointer) { // Need a copy. // For pointer types, we copy the pointer itself. statement(declare_temporary(result_type, id), to_expression(rhs), ";"); set(id, to_name(id), result_type, true); inherit_expression_dependencies(id, rhs); } else { // RHS expression is immutable, so just forward it. // Copying these things really make no sense, but // seems to be allowed anyways. auto &e = set(id, to_expression(rhs), result_type, true); if (pointer) { auto *var = maybe_get_backing_variable(rhs); e.loaded_from = var ? var->self : 0; } // If we're copying an access chain, need to inherit the read expressions. auto *rhs_expr = maybe_get(rhs); if (rhs_expr) e.implied_read_expressions = rhs_expr->implied_read_expressions; } break; } case OpVectorShuffle: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t vec0 = ops[2]; uint32_t vec1 = ops[3]; const auto *elems = &ops[4]; length -= 4; auto &type0 = expression_type(vec0); // If we have the undefined swizzle index -1, we need to swizzle in undefined data, // or in our case, T(0). bool shuffle = false; for (uint32_t i = 0; i < length; i++) if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu) shuffle = true; // Cannot use swizzles with packed expressions, force shuffle path. if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked)) shuffle = true; string expr; bool should_fwd, trivial_forward; if (shuffle) { should_fwd = should_forward(vec0) && should_forward(vec1); trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1); // Constructor style and shuffling from two different vectors. SmallVector args; for (uint32_t i = 0; i < length; i++) { if (elems[i] == 0xffffffffu) { // Use a constant 0 here. // We could use the first component or similar, but then we risk propagating // a value we might not need, and bog down codegen. SPIRConstant c; c.constant_type = type0.parent_type; assert(type0.parent_type != 0); args.push_back(constant_expression(c)); } else if (elems[i] >= type0.vecsize) args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize)); else args.push_back(to_extract_component_expression(vec0, elems[i])); } expr += join(type_to_glsl_constructor(get(result_type)), "(", merge(args), ")"); } else { should_fwd = should_forward(vec0); trivial_forward = !expression_is_forwarded(vec0); // We only source from first vector, so can use swizzle. // If the vector is packed, unpack it before applying a swizzle (needed for MSL) expr += to_enclosed_unpacked_expression(vec0); expr += "."; for (uint32_t i = 0; i < length; i++) { assert(elems[i] != 0xffffffffu); expr += index_to_swizzle(elems[i]); } if (backend.swizzle_is_function && length > 1) expr += "()"; } // A shuffle is trivial in that it doesn't actually *do* anything. // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. emit_op(result_type, id, expr, should_fwd, trivial_forward); inherit_expression_dependencies(id, vec0); inherit_expression_dependencies(id, vec1); break; } // ALU case OpIsNan: GLSL_UFOP(isnan); break; case OpIsInf: GLSL_UFOP(isinf); break; case OpSNegate: case OpFNegate: GLSL_UOP(-); break; case OpIAdd: { // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. auto type = get(ops[0]).basetype; GLSL_BOP_CAST(+, type); break; } case OpFAdd: GLSL_BOP(+); break; case OpISub: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(-, type); break; } case OpFSub: GLSL_BOP(-); break; case OpIMul: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(*, type); break; } case OpVectorTimesMatrix: case OpMatrixTimesVector: { // If the matrix needs transpose, just flip the multiply order. auto *e = maybe_get(ops[opcode == OpMatrixTimesVector ? 2 : 3]); if (e && e->need_transpose) { e->need_transpose = false; string expr; if (opcode == OpMatrixTimesVector) expr = join(to_enclosed_expression(ops[3]), " * ", enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); else expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", to_enclosed_expression(ops[2])); bool forward = should_forward(ops[2]) && should_forward(ops[3]); emit_op(ops[0], ops[1], expr, forward); e->need_transpose = true; inherit_expression_dependencies(ops[1], ops[2]); inherit_expression_dependencies(ops[1], ops[3]); } else GLSL_BOP(*); break; } case OpMatrixTimesMatrix: { auto *a = maybe_get(ops[2]); auto *b = maybe_get(ops[3]); // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. // a^T * b^T = (b * a)^T. if (a && b && a->need_transpose && b->need_transpose) { a->need_transpose = false; b->need_transpose = false; auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); bool forward = should_forward(ops[2]) && should_forward(ops[3]); auto &e = emit_op(ops[0], ops[1], expr, forward); e.need_transpose = true; a->need_transpose = true; b->need_transpose = true; inherit_expression_dependencies(ops[1], ops[2]); inherit_expression_dependencies(ops[1], ops[3]); } else GLSL_BOP(*); break; } case OpFMul: case OpMatrixTimesScalar: case OpVectorTimesScalar: GLSL_BOP(*); break; case OpOuterProduct: GLSL_BFOP(outerProduct); break; case OpDot: GLSL_BFOP(dot); break; case OpTranspose: GLSL_UFOP(transpose); break; case OpSRem: { uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; // Needs special handling. bool forward = should_forward(op0) && should_forward(op1); auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); emit_op(result_type, result_id, expr, forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); break; } case OpSDiv: GLSL_BOP_CAST(/, int_type); break; case OpUDiv: GLSL_BOP_CAST(/, uint_type); break; case OpIAddCarry: case OpISubBorrow: { if (options.es && options.version < 310) SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); else if (!options.es && options.version < 400) SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400."); uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; auto &type = get(result_type); emit_uninitialized_temporary_expression(result_type, result_id); const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow"; statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");"); break; } case OpUMulExtended: case OpSMulExtended: { if (options.es && options.version < 310) SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); else if (!options.es && options.version < 400) SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000."); uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; forced_temporaries.insert(result_id); auto &type = get(result_type); emit_uninitialized_temporary_expression(result_type, result_id); const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended"; statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");"); break; } case OpFDiv: GLSL_BOP(/); break; case OpShiftRightLogical: GLSL_BOP_CAST(>>, uint_type); break; case OpShiftRightArithmetic: GLSL_BOP_CAST(>>, int_type); break; case OpShiftLeftLogical: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(<<, type); break; } case OpBitwiseOr: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(|, type); break; } case OpBitwiseXor: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(^, type); break; } case OpBitwiseAnd: { auto type = get(ops[0]).basetype; GLSL_BOP_CAST(&, type); break; } case OpNot: GLSL_UOP(~); break; case OpUMod: GLSL_BOP_CAST(%, uint_type); break; case OpSMod: GLSL_BOP_CAST(%, int_type); break; case OpFMod: GLSL_BFOP(mod); break; case OpFRem: { if (is_legacy()) SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is " "needed for legacy."); uint32_t result_type = ops[0]; uint32_t result_id = ops[1]; uint32_t op0 = ops[2]; uint32_t op1 = ops[3]; // Needs special handling. bool forward = should_forward(op0) && should_forward(op1); auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(", to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); emit_op(result_type, result_id, expr, forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); break; } // Relational case OpAny: GLSL_UFOP(any); break; case OpAll: GLSL_UFOP(all); break; case OpSelect: emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]); break; case OpLogicalOr: { // No vector variant in GLSL for logical OR. auto result_type = ops[0]; auto id = ops[1]; auto &type = get(result_type); if (type.vecsize > 1) emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||"); else GLSL_BOP(||); break; } case OpLogicalAnd: { // No vector variant in GLSL for logical AND. auto result_type = ops[0]; auto id = ops[1]; auto &type = get(result_type); if (type.vecsize > 1) emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&"); else GLSL_BOP(&&); break; } case OpLogicalNot: { auto &type = get(ops[0]); if (type.vecsize > 1) GLSL_UFOP(not); else GLSL_UOP(!); break; } case OpIEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(equal, int_type); else GLSL_BOP_CAST(==, int_type); break; } case OpLogicalEqual: case OpFOrdEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(equal); else GLSL_BOP(==); break; } case OpINotEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(notEqual, int_type); else GLSL_BOP_CAST(!=, int_type); break; } case OpLogicalNotEqual: case OpFOrdNotEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(notEqual); else GLSL_BOP(!=); break; } case OpUGreaterThan: case OpSGreaterThan: { auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(greaterThan, type); else GLSL_BOP_CAST(>, type); break; } case OpFOrdGreaterThan: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(greaterThan); else GLSL_BOP(>); break; } case OpUGreaterThanEqual: case OpSGreaterThanEqual: { auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(greaterThanEqual, type); else GLSL_BOP_CAST(>=, type); break; } case OpFOrdGreaterThanEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(greaterThanEqual); else GLSL_BOP(>=); break; } case OpULessThan: case OpSLessThan: { auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(lessThan, type); else GLSL_BOP_CAST(<, type); break; } case OpFOrdLessThan: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(lessThan); else GLSL_BOP(<); break; } case OpULessThanEqual: case OpSLessThanEqual: { auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP_CAST(lessThanEqual, type); else GLSL_BOP_CAST(<=, type); break; } case OpFOrdLessThanEqual: { if (expression_type(ops[2]).vecsize > 1) GLSL_BFOP(lessThanEqual); else GLSL_BOP(<=); break; } // Conversion case OpSConvert: case OpConvertSToF: case OpUConvert: case OpConvertUToF: { auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type; uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto &type = get(result_type); auto &arg_type = expression_type(ops[2]); auto func = type_to_glsl_constructor(type); // If we're sign-extending or zero-extending, we need to make sure we cast from the correct type. // For truncation, it does not matter, so don't emit useless casts. if (arg_type.width < type.width) emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype); else emit_unary_func_op(result_type, id, ops[2], func.c_str()); break; } case OpConvertFToU: case OpConvertFToS: { // Cast to expected arithmetic type, then potentially bitcast away to desired signedness. uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto &type = get(result_type); auto expected_type = type; auto &float_type = expression_type(ops[2]); expected_type.basetype = opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width); auto func = type_to_glsl_constructor(expected_type); emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype); break; } case OpFConvert: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto func = type_to_glsl_constructor(get(result_type)); emit_unary_func_op(result_type, id, ops[2], func.c_str()); break; } case OpBitcast: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t arg = ops[2]; auto op = bitcast_glsl_op(get(result_type), expression_type(arg)); emit_unary_func_op(result_type, id, arg, op.c_str()); break; } case OpQuantizeToF16: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t arg = ops[2]; string op; auto &type = get(result_type); switch (type.vecsize) { case 1: op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x"); break; case 2: op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))"); break; case 3: { auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x"); op = join("vec3(", op0, ", ", op1, ")"); break; } case 4: { auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))"); op = join("vec4(", op0, ", ", op1, ")"); break; } default: SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); } emit_op(result_type, id, op, should_forward(arg)); inherit_expression_dependencies(id, arg); break; } // Derivatives case OpDPdx: GLSL_UFOP(dFdx); if (is_legacy_es()) require_extension_internal("GL_OES_standard_derivatives"); register_control_dependent_expression(ops[1]); break; case OpDPdy: GLSL_UFOP(dFdy); if (is_legacy_es()) require_extension_internal("GL_OES_standard_derivatives"); register_control_dependent_expression(ops[1]); break; case OpDPdxFine: GLSL_UFOP(dFdxFine); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; case OpDPdyFine: GLSL_UFOP(dFdyFine); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; case OpDPdxCoarse: if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } GLSL_UFOP(dFdxCoarse); if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; case OpDPdyCoarse: GLSL_UFOP(dFdyCoarse); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; case OpFwidth: GLSL_UFOP(fwidth); if (is_legacy_es()) require_extension_internal("GL_OES_standard_derivatives"); register_control_dependent_expression(ops[1]); break; case OpFwidthCoarse: GLSL_UFOP(fwidthCoarse); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; case OpFwidthFine: GLSL_UFOP(fwidthFine); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension_internal("GL_ARB_derivative_control"); register_control_dependent_expression(ops[1]); break; // Bitfield case OpBitFieldInsert: // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary. GLSL_QFOP(bitfieldInsert); break; case OpBitFieldSExtract: case OpBitFieldUExtract: // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary. GLSL_TFOP(bitfieldExtract); break; case OpBitReverse: GLSL_UFOP(bitfieldReverse); break; case OpBitCount: GLSL_UFOP(bitCount); break; // Atomics case OpAtomicExchange: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t ptr = ops[2]; // Ignore semantics for now, probably only relevant to CL. uint32_t val = ops[5]; const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; forced_temporaries.insert(id); emit_binary_func_op(result_type, id, ptr, val, op); flush_all_atomic_capable_variables(); break; } case OpAtomicCompareExchange: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t ptr = ops[2]; uint32_t val = ops[6]; uint32_t comp = ops[7]; const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; forced_temporaries.insert(id); emit_trinary_func_op(result_type, id, ptr, comp, val, op); flush_all_atomic_capable_variables(); break; } case OpAtomicLoad: flush_all_atomic_capable_variables(); // FIXME: Image? // OpAtomicLoad seems to only be relevant for atomic counters. forced_temporaries.insert(ops[1]); GLSL_UFOP(atomicCounter); break; case OpAtomicStore: SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore."); case OpAtomicIIncrement: case OpAtomicIDecrement: { forced_temporaries.insert(ops[1]); auto &type = expression_type(ops[2]); if (type.storage == StorageClassAtomicCounter) { // Legacy GLSL stuff, not sure if this is relevant to support. if (opcode == OpAtomicIIncrement) GLSL_UFOP(atomicCounterIncrement); else GLSL_UFOP(atomicCounterDecrement); } else { bool atomic_image = check_atomic_image(ops[2]); bool unsigned_type = (type.basetype == SPIRType::UInt) || (atomic_image && get(type.image.type).basetype == SPIRType::UInt); const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; const char *increment = nullptr; if (opcode == OpAtomicIIncrement && unsigned_type) increment = "1u"; else if (opcode == OpAtomicIIncrement) increment = "1"; else if (unsigned_type) increment = "uint(-1)"; else increment = "-1"; emit_op(ops[0], ops[1], join(op, "(", to_expression(ops[2]), ", ", increment, ")"), false); } flush_all_atomic_capable_variables(); break; } case OpAtomicIAdd: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); break; } case OpAtomicISub: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; forced_temporaries.insert(ops[1]); auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); flush_all_atomic_capable_variables(); break; } case OpAtomicSMin: case OpAtomicUMin: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); break; } case OpAtomicSMax: case OpAtomicUMax: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); break; } case OpAtomicAnd: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); break; } case OpAtomicOr: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); break; } case OpAtomicXor: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); break; } // Geometry shaders case OpEmitVertex: statement("EmitVertex();"); break; case OpEndPrimitive: statement("EndPrimitive();"); break; case OpEmitStreamVertex: statement("EmitStreamVertex();"); break; case OpEndStreamPrimitive: statement("EndStreamPrimitive();"); break; // Textures case OpImageSampleExplicitLod: case OpImageSampleProjExplicitLod: case OpImageSampleDrefExplicitLod: case OpImageSampleProjDrefExplicitLod: case OpImageSampleImplicitLod: case OpImageSampleProjImplicitLod: case OpImageSampleDrefImplicitLod: case OpImageSampleProjDrefImplicitLod: case OpImageFetch: case OpImageGather: case OpImageDrefGather: // Gets a bit hairy, so move this to a separate instruction. emit_texture_op(instruction); break; case OpImage: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; // Suppress usage tracking. auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); // When using the image, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); e.loaded_from = var ? var->self : 0; break; } case OpImageQueryLod: { if (!options.es && options.version < 400) { require_extension_internal("GL_ARB_texture_query_lod"); // For some reason, the ARB spec is all-caps. GLSL_BFOP(textureQueryLOD); } else if (options.es) SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile."); else GLSL_BFOP(textureQueryLod); register_control_dependent_expression(ops[1]); break; } case OpImageQueryLevels: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (!options.es && options.version < 430) require_extension_internal("GL_ARB_texture_query_levels"); if (options.es) SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile."); auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); break; } case OpImageQuerySamples: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; string expr; if (type.image.sampled == 2) expr = join("imageSamples(", to_expression(ops[2]), ")"); else expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); break; } case OpSampledImage: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_sampled_image_op(result_type, id, ops[2], ops[3]); inherit_expression_dependencies(id, ops[2]); inherit_expression_dependencies(id, ops[3]); break; } case OpImageQuerySizeLod: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ", ", bitcast_expression(SPIRType::Int, ops[3]), ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); break; } // Image load/store case OpImageRead: { // We added Nonreadable speculatively to the OpImage variable due to glslangValidator // not adding the proper qualifiers. // If it turns out we need to read the image after all, remove the qualifier and recompile. auto *var = maybe_get_backing_variable(ops[2]); if (var) { auto &flags = ir.meta[var->self].decoration.decoration_flags; if (flags.get(DecorationNonReadable)) { flags.clear(DecorationNonReadable); force_recompile(); } } uint32_t result_type = ops[0]; uint32_t id = ops[1]; bool pure; string imgexpr; auto &type = expression_type(ops[2]); if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code { if (type.image.ms) SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible."); auto itr = find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; }); if (itr == end(pls_inputs)) { // For non-PLS inputs, we rely on subpass type remapping information to get it right // since ImageRead always returns 4-component vectors and the backing type is opaque. if (!var->remapped_components) SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); imgexpr = remap_swizzle(get(result_type), var->remapped_components, to_expression(ops[2])); } else { // PLS input could have different number of components than what the SPIR expects, swizzle to // the appropriate vector size. uint32_t components = pls_format_to_components(itr->format); imgexpr = remap_swizzle(get(result_type), components, to_expression(ops[2])); } pure = true; } else if (type.image.dim == DimSubpassData) { if (options.vulkan_semantics) { // With Vulkan semantics, use the proper Vulkan GLSL construct. if (type.image.ms) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) SPIRV_CROSS_THROW( "Multisampled image used in OpImageRead, but unexpected operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")"); } else imgexpr = join("subpassLoad(", to_expression(ops[2]), ")"); } else { if (type.image.ms) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) SPIRV_CROSS_THROW( "Multisampled image used in OpImageRead, but unexpected operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", to_expression(samples), ")"); } else { // Implement subpass loads via texture barrier style sampling. imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); } } imgexpr = remap_swizzle(get(result_type), 4, imgexpr); pure = true; } else { // imageLoad only accepts int coords, not uint. auto coord_expr = to_expression(ops[3]); auto target_coord_type = expression_type(ops[3]); target_coord_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); // Plain image load/store. if (type.image.ms) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); } else imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")"); imgexpr = remap_swizzle(get(result_type), 4, imgexpr); pure = false; } if (var && var->forwardable) { bool forward = forced_temporaries.find(id) == end(forced_temporaries); auto &e = emit_op(result_type, id, imgexpr, forward); // We only need to track dependencies if we're reading from image load/store. if (!pure) { e.loaded_from = var->self; if (forward) var->dependees.push_back(id); } } else emit_op(result_type, id, imgexpr, false); inherit_expression_dependencies(id, ops[2]); if (type.image.ms) inherit_expression_dependencies(id, ops[5]); break; } case OpImageTexelPointer: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto &e = set(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true); // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); e.loaded_from = var ? var->self : 0; break; } case OpImageWrite: { // We added Nonwritable speculatively to the OpImage variable due to glslangValidator // not adding the proper qualifiers. // If it turns out we need to write to the image after all, remove the qualifier and recompile. auto *var = maybe_get_backing_variable(ops[0]); if (var) { auto &flags = ir.meta[var->self].decoration.decoration_flags; if (flags.get(DecorationNonWritable)) { flags.clear(DecorationNonWritable); force_recompile(); } } auto &type = expression_type(ops[0]); auto &value_type = expression_type(ops[2]); auto store_type = value_type; store_type.vecsize = 4; // imageStore only accepts int coords, not uint. auto coord_expr = to_expression(ops[1]); auto target_coord_type = expression_type(ops[1]); target_coord_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr); if (type.image.ms) { uint32_t operands = ops[3]; if (operands != ImageOperandsSampleMask || length != 5) SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); uint32_t samples = ops[4]; statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); } else statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); if (var && variable_storage_is_aliased(*var)) flush_all_aliased_variables(); break; } case OpImageQuerySize: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (type.basetype == SPIRType::Image) { string expr; if (type.image.sampled == 2) { // The size of an image is always constant. expr = join("imageSize(", to_expression(ops[2]), ")"); } else { // This path is hit for samplerBuffers and multisampled images which do not have LOD. expr = join("textureSize(", convert_separate_image_to_expression(ops[2]), ")"); } auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); } else SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); break; } // Compute case OpControlBarrier: case OpMemoryBarrier: { uint32_t execution_scope = 0; uint32_t memory; uint32_t semantics; if (opcode == OpMemoryBarrier) { memory = get(ops[0]).scalar(); semantics = get(ops[1]).scalar(); } else { execution_scope = get(ops[0]).scalar(); memory = get(ops[1]).scalar(); semantics = get(ops[2]).scalar(); } if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup) { if (!options.vulkan_semantics) SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics."); require_extension_internal("GL_KHR_shader_subgroup_basic"); } if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl) { // Control shaders only have barriers, and it implies memory barriers. if (opcode == OpControlBarrier) statement("barrier();"); break; } // We only care about these flags, acquire/release and friends are not relevant to GLSL. semantics = mask_relevant_memory_semantics(semantics); if (opcode == OpMemoryBarrier) { // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier // does what we need, so we avoid redundant barriers. const Instruction *next = get_next_instruction_in_block(instruction); if (next && next->op == OpControlBarrier) { auto *next_ops = stream(*next); uint32_t next_memory = get(next_ops[1]).scalar(); uint32_t next_semantics = get(next_ops[2]).scalar(); next_semantics = mask_relevant_memory_semantics(next_semantics); bool memory_scope_covered = false; if (next_memory == memory) memory_scope_covered = true; else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) { // If we only care about workgroup memory, either Device or Workgroup scope is fine, // scope does not have to match. if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && (memory == ScopeDevice || memory == ScopeWorkgroup)) { memory_scope_covered = true; } } else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) { // The control barrier has device scope, but the memory barrier just has workgroup scope. memory_scope_covered = true; } // If we have the same memory scope, and all memory types are covered, we're good. if (memory_scope_covered && (semantics & next_semantics) == semantics) break; } } // We are synchronizing some memory or syncing execution, // so we cannot forward any loads beyond the memory barrier. if (semantics || opcode == OpControlBarrier) { assert(current_emitting_block); flush_control_dependent_expressions(current_emitting_block->self); flush_all_active_variables(); } if (memory == ScopeWorkgroup) // Only need to consider memory within a group { if (semantics == MemorySemanticsWorkgroupMemoryMask) statement("memoryBarrierShared();"); else if (semantics != 0) statement("groupMemoryBarrier();"); } else if (memory == ScopeSubgroup) { const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) { // These are not relevant for GLSL, but assume it means memoryBarrier(). // memoryBarrier() does everything, so no need to test anything else. statement("subgroupMemoryBarrier();"); } else if ((semantics & all_barriers) == all_barriers) { // Short-hand instead of emitting 3 barriers. statement("subgroupMemoryBarrier();"); } else { // Pick out individual barriers. if (semantics & MemorySemanticsWorkgroupMemoryMask) statement("subgroupMemoryBarrierShared();"); if (semantics & MemorySemanticsUniformMemoryMask) statement("subgroupMemoryBarrierBuffer();"); if (semantics & MemorySemanticsImageMemoryMask) statement("subgroupMemoryBarrierImage();"); } } else { const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask | MemorySemanticsAtomicCounterMemoryMask; if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) { // These are not relevant for GLSL, but assume it means memoryBarrier(). // memoryBarrier() does everything, so no need to test anything else. statement("memoryBarrier();"); } else if ((semantics & all_barriers) == all_barriers) { // Short-hand instead of emitting 4 barriers. statement("memoryBarrier();"); } else { // Pick out individual barriers. if (semantics & MemorySemanticsWorkgroupMemoryMask) statement("memoryBarrierShared();"); if (semantics & MemorySemanticsUniformMemoryMask) statement("memoryBarrierBuffer();"); if (semantics & MemorySemanticsImageMemoryMask) statement("memoryBarrierImage();"); if (semantics & MemorySemanticsAtomicCounterMemoryMask) statement("memoryBarrierAtomicCounter();"); } } if (opcode == OpControlBarrier) { if (execution_scope == ScopeSubgroup) statement("subgroupBarrier();"); else statement("barrier();"); } break; } case OpExtInst: { uint32_t extension_set = ops[2]; if (get(extension_set).ext == SPIRExtension::GLSL) { emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot) { emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) { emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) { emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (get(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader) { emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (get(extension_set).ext == SPIRExtension::SPV_debug_info) { break; // Ignore SPIR-V debug information extended instructions. } else { statement("// unimplemented ext op ", instruction.op); break; } break; } // Legacy sub-group stuff ... case OpSubgroupBallotKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; string expr; expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)"); emit_op(result_type, id, expr, should_forward(ops[2])); require_extension_internal("GL_ARB_shader_ballot"); inherit_expression_dependencies(id, ops[2]); register_control_dependent_expression(ops[1]); break; } case OpSubgroupFirstInvocationKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB"); require_extension_internal("GL_ARB_shader_ballot"); register_control_dependent_expression(ops[1]); break; } case OpSubgroupReadInvocationKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB"); require_extension_internal("GL_ARB_shader_ballot"); register_control_dependent_expression(ops[1]); break; } case OpSubgroupAllKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB"); require_extension_internal("GL_ARB_shader_group_vote"); register_control_dependent_expression(ops[1]); break; } case OpSubgroupAnyKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB"); require_extension_internal("GL_ARB_shader_group_vote"); register_control_dependent_expression(ops[1]); break; } case OpSubgroupAllEqualKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB"); require_extension_internal("GL_ARB_shader_group_vote"); register_control_dependent_expression(ops[1]); break; } case OpGroupIAddNonUniformAMD: case OpGroupFAddNonUniformAMD: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD"); require_extension_internal("GL_AMD_shader_ballot"); register_control_dependent_expression(ops[1]); break; } case OpGroupFMinNonUniformAMD: case OpGroupUMinNonUniformAMD: case OpGroupSMinNonUniformAMD: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD"); require_extension_internal("GL_AMD_shader_ballot"); register_control_dependent_expression(ops[1]); break; } case OpGroupFMaxNonUniformAMD: case OpGroupUMaxNonUniformAMD: case OpGroupSMaxNonUniformAMD: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD"); require_extension_internal("GL_AMD_shader_ballot"); register_control_dependent_expression(ops[1]); break; } case OpFragmentMaskFetchAMD: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (type.image.dim == spv::DimSubpassData) { emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD"); } else { emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD"); } require_extension_internal("GL_AMD_shader_fragment_mask"); break; } case OpFragmentFetchAMD: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (type.image.dim == spv::DimSubpassData) { emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD"); } else { emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD"); } require_extension_internal("GL_AMD_shader_fragment_mask"); break; } // Vulkan 1.1 sub-group stuff ... case OpGroupNonUniformElect: case OpGroupNonUniformBroadcast: case OpGroupNonUniformBroadcastFirst: case OpGroupNonUniformBallot: case OpGroupNonUniformInverseBallot: case OpGroupNonUniformBallotBitExtract: case OpGroupNonUniformBallotBitCount: case OpGroupNonUniformBallotFindLSB: case OpGroupNonUniformBallotFindMSB: case OpGroupNonUniformShuffle: case OpGroupNonUniformShuffleXor: case OpGroupNonUniformShuffleUp: case OpGroupNonUniformShuffleDown: case OpGroupNonUniformAll: case OpGroupNonUniformAny: case OpGroupNonUniformAllEqual: case OpGroupNonUniformFAdd: case OpGroupNonUniformIAdd: case OpGroupNonUniformFMul: case OpGroupNonUniformIMul: case OpGroupNonUniformFMin: case OpGroupNonUniformFMax: case OpGroupNonUniformSMin: case OpGroupNonUniformSMax: case OpGroupNonUniformUMin: case OpGroupNonUniformUMax: case OpGroupNonUniformBitwiseAnd: case OpGroupNonUniformBitwiseOr: case OpGroupNonUniformBitwiseXor: case OpGroupNonUniformQuadSwap: case OpGroupNonUniformQuadBroadcast: emit_subgroup_op(instruction); break; case OpFUnordEqual: GLSL_BFOP(unsupported_FUnordEqual); break; case OpFUnordNotEqual: GLSL_BFOP(unsupported_FUnordNotEqual); break; case OpFUnordLessThan: GLSL_BFOP(unsupported_FUnordLessThan); break; case OpFUnordGreaterThan: GLSL_BFOP(unsupported_FUnordGreaterThan); break; case OpFUnordLessThanEqual: GLSL_BFOP(unsupported_FUnordLessThanEqual); break; case OpFUnordGreaterThanEqual: GLSL_BFOP(unsupported_FUnordGreaterThanEqual); break; case OpReportIntersectionNV: statement("reportIntersectionNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); break; case OpIgnoreIntersectionNV: statement("ignoreIntersectionNV();"); break; case OpTerminateRayNV: statement("terminateRayNV();"); break; case OpTraceNV: statement("traceNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", to_expression(ops[9]), ", ", to_expression(ops[10]), ");"); break; case OpExecuteCallableNV: statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); break; case OpConvertUToPtr: { auto &type = get(ops[0]); if (type.storage != StorageClassPhysicalStorageBufferEXT) SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr."); auto op = type_to_glsl(type); emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); break; } case OpConvertPtrToU: { auto &type = get(ops[0]); auto &ptr_type = expression_type(ops[2]); if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT) SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU."); auto op = type_to_glsl(type); emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); break; } case OpUndef: // Undefined value has been declared. break; case OpLine: { emit_line_directive(ops[0], ops[1]); break; } case OpNoLine: break; case OpDemoteToHelperInvocationEXT: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); require_extension_internal("GL_EXT_demote_to_helper_invocation"); statement(backend.demote_literal, ";"); break; case OpIsHelperInvocationEXT: if (!options.vulkan_semantics) SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); require_extension_internal("GL_EXT_demote_to_helper_invocation"); emit_op(ops[0], ops[1], "helperInvocationEXT()", false); break; default: statement("// unimplemented op ", instruction.op); break; } } // Appends function arguments, mapped from global variables, beyond the specified arg index. // This is used when a function call uses fewer arguments than the function defines. // This situation may occur if the function signature has been dynamically modified to // extract global variables referenced from within the function, and convert them to // function arguments. This is necessary for shader languages that do not support global // access to shader input content from within a function (eg. Metal). Each additional // function args uses the name of the global variable. Function nesting will modify the // functions and function calls all the way up the nesting chain. void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist) { auto &args = func.arguments; uint32_t arg_cnt = uint32_t(args.size()); for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) { auto &arg = args[arg_idx]; assert(arg.alias_global_variable); // If the underlying variable needs to be declared // (ie. a local variable with deferred declaration), do so now. uint32_t var_id = get(arg.id).basevariable; if (var_id) flush_variable_declaration(var_id); arglist.push_back(to_func_call_arg(arg.id)); } } string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) { auto &memb = ir.meta[type.self].members; if (index < memb.size() && !memb[index].alias.empty()) return memb[index].alias; else return join("_m", index); } string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool) { return join(".", to_member_name(type, index)); } void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) { auto &memb = ir.meta[type.self].members; if (index < memb.size() && !memb[index].alias.empty()) { auto &name = memb[index].alias; if (name.empty()) return; // Reserved for temporaries. if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) { name.clear(); return; } update_name_cache(type.member_name_cache, name); } } // Checks whether the ID is a row_major matrix that requires conversion before use bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) { // Natively supported row-major matrices do not need to be converted. // Legacy targets do not support row major. if (backend.native_row_major_matrix && !is_legacy()) return false; // Non-matrix or column-major matrix types do not need to be converted. if (!has_decoration(id, DecorationRowMajor)) return false; // Only square row-major matrices can be converted at this time. // Converting non-square matrices will require defining custom GLSL function that // swaps matrix elements while retaining the original dimensional form of the matrix. const auto type = expression_type(id); if (type.columns != type.vecsize) SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); return true; } // Checks whether the member is a row_major matrix that requires conversion before use bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) { // Natively supported row-major matrices do not need to be converted. if (backend.native_row_major_matrix && !is_legacy()) return false; // Non-matrix or column-major matrix types do not need to be converted. if (!has_member_decoration(type.self, index, DecorationRowMajor)) return false; // Only square row-major matrices can be converted at this time. // Converting non-square matrices will require defining custom GLSL function that // swaps matrix elements while retaining the original dimensional form of the matrix. const auto mbr_type = get(type.member_types[index]); if (mbr_type.columns != mbr_type.vecsize) SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); return true; } // Checks if we need to remap physical type IDs when declaring the type ina buffer. bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const { return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID); } // Checks whether the member is in packed data type, that might need to be unpacked. bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const { return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked); } // Wraps the expression string in a function call that converts the // row_major matrix result of the expression to a column_major matrix. // Base implementation uses the standard library transpose() function. // Subclasses may override to use a different function. string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */, bool /*is_packed*/) { strip_enclosed_expression(exp_str); if (!is_matrix(exp_type)) { auto column_index = exp_str.find_last_of('['); if (column_index == string::npos) return exp_str; auto column_expr = exp_str.substr(column_index); exp_str.resize(column_index); auto transposed_expr = type_to_glsl_constructor(exp_type) + "("; // Storing a column to a row-major matrix. Unroll the write. for (uint32_t c = 0; c < exp_type.vecsize; c++) { transposed_expr += join(exp_str, '[', c, ']', column_expr); if (c + 1 < exp_type.vecsize) transposed_expr += ", "; } transposed_expr += ")"; return transposed_expr; } else return join("transpose(", exp_str, ")"); } string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) { string type_name = type_to_glsl(type, id); remap_variable_type_name(type, name, type_name); return join(type_name, " ", name, type_to_array_glsl(type)); } // Emit a structure member. Subclasses may override to modify output, // or to dynamically add a padding member if needed. void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const string &qualifier, uint32_t) { auto &membertype = get(member_type_id); Bitset memberflags; auto &memb = ir.meta[type.self].members; if (index < memb.size()) memberflags = memb[index].decoration_flags; string qualifiers; bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); if (is_block) qualifiers = to_interpolation_qualifiers(memberflags); statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags), variable_decl(membertype, to_member_name(type, index)), ";"); } void CompilerGLSL::emit_struct_padding_target(const SPIRType &) { } const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) { // GL_EXT_buffer_reference variables can be marked as restrict. if (flags.get(DecorationRestrictPointerEXT)) return "restrict "; // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler) return ""; if (options.es) { auto &execution = get_entry_point(); if (flags.get(DecorationRelaxedPrecision)) { bool implied_fmediump = type.basetype == SPIRType::Float && options.fragment.default_float_precision == Options::Mediump && execution.model == ExecutionModelFragment; bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && options.fragment.default_int_precision == Options::Mediump && execution.model == ExecutionModelFragment; return implied_fmediump || implied_imediump ? "" : "mediump "; } else { bool implied_fhighp = type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment)); bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && ((options.fragment.default_int_precision == Options::Highp && execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment)); return implied_fhighp || implied_ihighp ? "" : "highp "; } } else if (backend.allow_precision_qualifiers) { // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. // The default is highp however, so only emit mediump in the rare case that a shader has these. if (flags.get(DecorationRelaxedPrecision)) return "mediump "; else return ""; } else return ""; } const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) { auto &type = expression_type(id); bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es; if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) { // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. auto &result_type = get(type.image.type); if (result_type.width < 32) return "mediump "; } return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); } string CompilerGLSL::to_qualifiers_glsl(uint32_t id) { auto &flags = ir.meta[id].decoration.decoration_flags; string res; auto *var = maybe_get(id); if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) res += "shared "; res += to_interpolation_qualifiers(flags); if (var) res += to_storage_qualifiers_glsl(*var); auto &type = expression_type(id); if (type.image.dim != DimSubpassData && type.image.sampled == 2) { if (flags.get(DecorationCoherent)) res += "coherent "; if (flags.get(DecorationRestrict)) res += "restrict "; if (flags.get(DecorationNonWritable)) res += "readonly "; if (flags.get(DecorationNonReadable)) res += "writeonly "; } res += to_precision_qualifiers_glsl(id); return res; } string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) { // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... auto &type = expression_type(arg.id); const char *direction = ""; if (type.pointer) { if (arg.write_count && arg.read_count) direction = "inout "; else if (arg.write_count) direction = "out "; } return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id)); } string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) { return to_expression(var.initializer); } string CompilerGLSL::variable_decl(const SPIRVariable &variable) { // Ignore the pointer type since GLSL doesn't have pointers. auto &type = get_variable_data_type(variable); if (type.pointer_depth > 1) SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types."); auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self)); if (variable.loop_variable && variable.static_expression) { uint32_t expr = variable.static_expression; if (ir.ids[expr].get_type() != TypeUndef) res += join(" = ", to_expression(variable.static_expression)); } else if (variable.initializer) { uint32_t expr = variable.initializer; if (ir.ids[expr].get_type() != TypeUndef) res += join(" = ", to_initializer_expression(variable)); } return res; } const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) { auto &flags = ir.meta[variable.self].decoration.decoration_flags; if (flags.get(DecorationRelaxedPrecision)) return "mediump "; else return "highp "; } string CompilerGLSL::pls_decl(const PlsRemap &var) { auto &variable = get(var.id); SPIRType type; type.vecsize = pls_format_to_components(var.format); type.basetype = pls_format_to_basetype(var.format); return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", to_name(variable.self)); } uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const { return to_array_size_literal(type, uint32_t(type.array.size() - 1)); } uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const { assert(type.array.size() == type.array_size_literal.size()); if (type.array_size_literal[index]) { return type.array[index]; } else { // Use the default spec constant value. // This is the best we can do. uint32_t array_size_id = type.array[index]; // Explicitly check for this case. The error message you would get (bad cast) makes no sense otherwise. if (ir.ids[array_size_id].get_type() == TypeConstantOp) SPIRV_CROSS_THROW("An array size was found to be an OpSpecConstantOp. This is not supported since " "SPIRV-Cross cannot deduce the actual size here."); uint32_t array_size = get(array_size_id).scalar(); return array_size; } } string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) { assert(type.array.size() == type.array_size_literal.size()); // Tessellation control and evaluation shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays. // Opt for unsized as it's the more "correct" variant to use. if (type.storage == StorageClassInput && (get_entry_point().model == ExecutionModelTessellationControl || get_entry_point().model == ExecutionModelTessellationEvaluation)) return ""; auto &size = type.array[index]; if (!type.array_size_literal[index]) return to_expression(size); else if (size) return convert_to_string(size); else if (!backend.unsized_array_supported) { // For runtime-sized arrays, we can work around // lack of standard support for this by simply having // a single element array. // // Runtime length arrays must always be the last element // in an interface block. return "1"; } else return ""; } string CompilerGLSL::type_to_array_glsl(const SPIRType &type) { if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) { // We are using a wrapped pointer type, and we should not emit any array declarations here. return ""; } if (type.array.empty()) return ""; if (options.flatten_multidimensional_arrays) { string res; res += "["; for (auto i = uint32_t(type.array.size()); i; i--) { res += enclose_expression(to_array_size(type, i - 1)); if (i > 1) res += " * "; } res += "]"; return res; } else { if (type.array.size() > 1) { if (!options.es && options.version < 430) require_extension_internal("GL_ARB_arrays_of_arrays"); else if (options.es && options.version < 310) SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. " "Try using --flatten-multidimensional-arrays or set " "options.flatten_multidimensional_arrays to true."); } string res; for (auto i = uint32_t(type.array.size()); i; i--) { res += "["; res += to_array_size(type, i - 1); res += "]"; } return res; } } string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id) { auto &imagetype = get(type.image.type); string res; switch (imagetype.basetype) { case SPIRType::Int: case SPIRType::Short: case SPIRType::SByte: res = "i"; break; case SPIRType::UInt: case SPIRType::UShort: case SPIRType::UByte: res = "u"; break; default: break; } // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation. // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter. if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) return res + "subpassInput" + (type.image.ms ? "MS" : ""); // If we're emulating subpassInput with samplers, force sampler2D // so we don't have to specify format. if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) { // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. if (type.image.dim == DimBuffer && type.image.sampled == 1) res += "sampler"; else res += type.image.sampled == 2 ? "image" : "texture"; } else res += "sampler"; switch (type.image.dim) { case Dim1D: res += "1D"; break; case Dim2D: res += "2D"; break; case Dim3D: res += "3D"; break; case DimCube: res += "Cube"; break; case DimRect: if (options.es) SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES."); if (is_legacy_desktop()) require_extension_internal("GL_ARB_texture_rectangle"); res += "2DRect"; break; case DimBuffer: if (options.es && options.version < 320) require_extension_internal("GL_OES_texture_buffer"); else if (!options.es && options.version < 300) require_extension_internal("GL_EXT_texture_buffer_object"); res += "Buffer"; break; case DimSubpassData: res += "2D"; break; default: SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported."); } if (type.image.ms) res += "MS"; if (type.image.arrayed) { if (is_legacy_desktop()) require_extension_internal("GL_EXT_texture_array"); res += "Array"; } // "Shadow" state in GLSL only exists for samplers and combined image samplers. if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && image_is_comparison(type, id)) { res += "Shadow"; } return res; } string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) { if (type.array.size() > 1) { if (options.flatten_multidimensional_arrays) SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]()."); else if (!options.es && options.version < 430) require_extension_internal("GL_ARB_arrays_of_arrays"); else if (options.es && options.version < 310) SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310."); } auto e = type_to_glsl(type); for (uint32_t i = 0; i < type.array.size(); i++) e += "[]"; return e; } // The optional id parameter indicates the object whose type we are trying // to find the description for. It is optional. Most type descriptions do not // depend on a specific object's use of that type. string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) { if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) { // Need to create a magic type name which compacts the entire type information. string name = type_to_glsl(get_pointee_type(type)); for (size_t i = 0; i < type.array.size(); i++) { if (type.array_size_literal[i]) name += join(type.array[i], "_"); else name += join("id", type.array[i], "_"); } name += "Pointer"; return name; } switch (type.basetype) { case SPIRType::Struct: // Need OpName lookup here to get a "sensible" name for a struct. if (backend.explicit_struct_type) return join("struct ", to_name(type.self)); else return to_name(type.self); case SPIRType::Image: case SPIRType::SampledImage: return image_type_glsl(type, id); case SPIRType::Sampler: // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing // this distinction into the type system. return comparison_ids.count(id) ? "samplerShadow" : "sampler"; case SPIRType::AccelerationStructureNV: return "accelerationStructureNV"; case SPIRType::Void: return "void"; default: break; } if (type.basetype == SPIRType::UInt && is_legacy()) SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); if (type.vecsize == 1 && type.columns == 1) // Scalar builtin { switch (type.basetype) { case SPIRType::Boolean: return "bool"; case SPIRType::SByte: return backend.basic_int8_type; case SPIRType::UByte: return backend.basic_uint8_type; case SPIRType::Short: return backend.basic_int16_type; case SPIRType::UShort: return backend.basic_uint16_type; case SPIRType::Int: return backend.basic_int_type; case SPIRType::UInt: return backend.basic_uint_type; case SPIRType::AtomicCounter: return "atomic_uint"; case SPIRType::Half: return "float16_t"; case SPIRType::Float: return "float"; case SPIRType::Double: return "double"; case SPIRType::Int64: return "int64_t"; case SPIRType::UInt64: return "uint64_t"; default: return "???"; } } else if (type.vecsize > 1 && type.columns == 1) // Vector builtin { switch (type.basetype) { case SPIRType::Boolean: return join("bvec", type.vecsize); case SPIRType::SByte: return join("i8vec", type.vecsize); case SPIRType::UByte: return join("u8vec", type.vecsize); case SPIRType::Short: return join("i16vec", type.vecsize); case SPIRType::UShort: return join("u16vec", type.vecsize); case SPIRType::Int: return join("ivec", type.vecsize); case SPIRType::UInt: return join("uvec", type.vecsize); case SPIRType::Half: return join("f16vec", type.vecsize); case SPIRType::Float: return join("vec", type.vecsize); case SPIRType::Double: return join("dvec", type.vecsize); case SPIRType::Int64: return join("i64vec", type.vecsize); case SPIRType::UInt64: return join("u64vec", type.vecsize); default: return "???"; } } else if (type.vecsize == type.columns) // Simple Matrix builtin { switch (type.basetype) { case SPIRType::Boolean: return join("bmat", type.vecsize); case SPIRType::Int: return join("imat", type.vecsize); case SPIRType::UInt: return join("umat", type.vecsize); case SPIRType::Half: return join("f16mat", type.vecsize); case SPIRType::Float: return join("mat", type.vecsize); case SPIRType::Double: return join("dmat", type.vecsize); // Matrix types not supported for int64/uint64. default: return "???"; } } else { switch (type.basetype) { case SPIRType::Boolean: return join("bmat", type.columns, "x", type.vecsize); case SPIRType::Int: return join("imat", type.columns, "x", type.vecsize); case SPIRType::UInt: return join("umat", type.columns, "x", type.vecsize); case SPIRType::Half: return join("f16mat", type.columns, "x", type.vecsize); case SPIRType::Float: return join("mat", type.columns, "x", type.vecsize); case SPIRType::Double: return join("dmat", type.columns, "x", type.vecsize); // Matrix types not supported for int64/uint64. default: return "???"; } } } void CompilerGLSL::add_variable(unordered_set &variables_primary, const unordered_set &variables_secondary, string &name) { if (name.empty()) return; // Reserved for temporaries. if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) { name.clear(); return; } // Avoid double underscores. name = sanitize_underscores(name); update_name_cache(variables_primary, variables_secondary, name); } void CompilerGLSL::add_local_variable_name(uint32_t id) { add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias); } void CompilerGLSL::add_resource_name(uint32_t id) { add_variable(resource_names, block_names, ir.meta[id].decoration.alias); } void CompilerGLSL::add_header_line(const std::string &line) { header_lines.push_back(line); } bool CompilerGLSL::has_extension(const std::string &ext) const { auto itr = find(begin(forced_extensions), end(forced_extensions), ext); return itr != end(forced_extensions); } void CompilerGLSL::require_extension(const std::string &ext) { if (!has_extension(ext)) forced_extensions.push_back(ext); } void CompilerGLSL::require_extension_internal(const string &ext) { if (backend.supports_extensions && !has_extension(ext)) { forced_extensions.push_back(ext); force_recompile(); } } void CompilerGLSL::flatten_buffer_block(uint32_t id) { auto &var = get(id); auto &type = get(var.basetype); auto name = to_name(type.self, false); auto &flags = ir.meta[type.self].decoration.decoration_flags; if (!type.array.empty()) SPIRV_CROSS_THROW(name + " is an array of UBOs."); if (type.basetype != SPIRType::Struct) SPIRV_CROSS_THROW(name + " is not a struct."); if (!flags.get(DecorationBlock)) SPIRV_CROSS_THROW(name + " is not a block."); if (type.member_types.empty()) SPIRV_CROSS_THROW(name + " is an empty struct."); flattened_buffer_blocks.insert(id); } bool CompilerGLSL::check_atomic_image(uint32_t id) { auto &type = expression_type(id); if (type.storage == StorageClassImage) { if (options.es && options.version < 320) require_extension_internal("GL_OES_shader_image_atomic"); auto *var = maybe_get_backing_variable(id); if (var) { auto &flags = ir.meta[var->self].decoration.decoration_flags; if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable)) { flags.clear(DecorationNonWritable); flags.clear(DecorationNonReadable); force_recompile(); } } return true; } else return false; } void CompilerGLSL::add_function_overload(const SPIRFunction &func) { Hasher hasher; for (auto &arg : func.arguments) { // Parameters can vary with pointer type or not, // but that will not change the signature in GLSL/HLSL, // so strip the pointer type before hashing. uint32_t type_id = get_pointee_type_id(arg.type); auto &type = get(type_id); if (!combined_image_samplers.empty()) { // If we have combined image samplers, we cannot really trust the image and sampler arguments // we pass down to callees, because they may be shuffled around. // Ignore these arguments, to make sure that functions need to differ in some other way // to be considered different overloads. if (type.basetype == SPIRType::SampledImage || (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler) { continue; } } hasher.u32(type_id); } uint64_t types_hash = hasher.get(); auto function_name = to_name(func.self); auto itr = function_overloads.find(function_name); if (itr != end(function_overloads)) { // There exists a function with this name already. auto &overloads = itr->second; if (overloads.count(types_hash) != 0) { // Overload conflict, assign a new name. add_resource_name(func.self); function_overloads[to_name(func.self)].insert(types_hash); } else { // Can reuse the name. overloads.insert(types_hash); } } else { // First time we see this function name. add_resource_name(func.self); function_overloads[to_name(func.self)].insert(types_hash); } } void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) { if (func.self != ir.default_entry_point) add_function_overload(func); // Avoid shadow declarations. local_variable_names = resource_names; string decl; auto &type = get(func.return_type); decl += flags_to_qualifiers_glsl(type, return_flags); decl += type_to_glsl(type); decl += type_to_array_glsl(type); decl += " "; if (func.self == ir.default_entry_point) { decl += "main"; processing_entry_point = true; } else decl += to_name(func.self); decl += "("; SmallVector arglist; for (auto &arg : func.arguments) { // Do not pass in separate images or samplers if we're remapping // to combined image samplers. if (skip_argument(arg.id)) continue; // Might change the variable name if it already exists in this function. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation // to use same name for variables. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. add_local_variable_name(arg.id); arglist.push_back(argument_decl(arg)); // Hold a pointer to the parameter so we can invalidate the readonly field if needed. auto *var = maybe_get(arg.id); if (var) var->parameter = &arg; } for (auto &arg : func.shadow_arguments) { // Might change the variable name if it already exists in this function. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation // to use same name for variables. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. add_local_variable_name(arg.id); arglist.push_back(argument_decl(arg)); // Hold a pointer to the parameter so we can invalidate the readonly field if needed. auto *var = maybe_get(arg.id); if (var) var->parameter = &arg; } decl += merge(arglist); decl += ")"; statement(decl); } void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) { // Avoid potential cycles. if (func.active) return; func.active = true; // If we depend on a function, emit that function before we emit our own function. for (auto block : func.blocks) { auto &b = get(block); for (auto &i : b.ops) { auto ops = stream(i); auto op = static_cast(i.op); if (op == OpFunctionCall) { // Recursively emit functions which are called. uint32_t id = ops[2]; emit_function(get(id), ir.meta[ops[1]].decoration.decoration_flags); } } } if (func.entry_line.file_id != 0) emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal); emit_function_prototype(func, return_flags); begin_scope(); if (func.self == ir.default_entry_point) emit_entry_point_declarations(); current_function = &func; auto &entry_block = get(func.entry_block); sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack)); for (auto &array : func.constant_arrays_needed_on_stack) { auto &c = get(array); auto &type = get(c.constant_type); statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";"); } for (auto &v : func.local_variables) { auto &var = get(v); var.deferred_declaration = false; if (var.storage == StorageClassWorkgroup) { // Special variable type which cannot have initializer, // need to be declared as standalone variables. // Comes from MSL which can push global variables as local variables in main function. add_local_variable_name(var.self); statement(variable_decl(var), ";"); var.deferred_declaration = false; } else if (var.storage == StorageClassPrivate) { // These variables will not have had their CFG usage analyzed, so move it to the entry block. // Comes from MSL which can push global variables as local variables in main function. // We could just declare them right now, but we would miss out on an important initialization case which is // LUT declaration in MSL. // If we don't declare the variable when it is assigned we're forced to go through a helper function // which copies elements one by one. add_local_variable_name(var.self); auto &dominated = entry_block.dominated_variables; if (find(begin(dominated), end(dominated), var.self) == end(dominated)) entry_block.dominated_variables.push_back(var.self); var.deferred_declaration = true; } else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) { // No need to declare this variable, it has a static expression. var.deferred_declaration = false; } else if (expression_is_lvalue(v)) { add_local_variable_name(var.self); if (var.initializer) statement(variable_decl_function_local(var), ";"); else { // Don't declare variable until first use to declutter the GLSL output quite a lot. // If we don't touch the variable before first branch, // declare it then since we need variable declaration to be in top scope. var.deferred_declaration = true; } } else { // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. // For these types (non-lvalue), we enforce forwarding through a shadowed variable. // This means that when we OpStore to these variables, we just write in the expression ID directly. // This breaks any kind of branching, since the variable must be statically assigned. // Branching on samplers and images would be pretty much impossible to fake in GLSL. var.statically_assigned = true; } var.loop_variable_enable = false; // Loop variables are never declared outside their for-loop, so block any implicit declaration. if (var.loop_variable) var.deferred_declaration = false; } // Enforce declaration order for regression testing purposes. for (auto &block_id : func.blocks) { auto &block = get(block_id); sort(begin(block.dominated_variables), end(block.dominated_variables)); } for (auto &line : current_function->fixup_hooks_in) line(); emit_block_chain(entry_block); end_scope(); processing_entry_point = false; statement(""); // Make sure deferred declaration state for local variables is cleared when we are done with function. // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise. for (auto &v : func.local_variables) { auto &var = get(v); var.deferred_declaration = false; } } void CompilerGLSL::emit_fixup() { auto &execution = get_entry_point(); if (execution.model == ExecutionModelVertex) { if (options.vertex.fixup_clipspace) { const char *suffix = backend.float_literal_suffix ? "f" : ""; statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;"); } if (options.vertex.flip_vert_y) statement("gl_Position.y = -gl_Position.y;"); } } bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to) { auto &child = get(to); for (auto &phi : child.phi_variables) if (phi.parent == from) return true; return false; } void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) { auto &child = get(to); if (child.ignore_phi_from_block == from) return; unordered_set temporary_phi_variables; for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr) { auto &phi = *itr; if (phi.parent == from) { auto &var = get(phi.function_variable); // A Phi variable might be a loop variable, so flush to static expression. if (var.loop_variable && !var.loop_variable_enable) var.static_expression = phi.local_variable; else { flush_variable_declaration(phi.function_variable); // Check if we are going to write to a Phi variable that another statement will read from // as part of another Phi node in our target block. // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. bool need_saved_temporary = find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { return future_phi.local_variable == phi.function_variable && future_phi.parent == from; }) != end(child.phi_variables); if (need_saved_temporary) { // Need to make sure we declare the phi variable with a copy at the right scope. // We cannot safely declare a temporary here since we might be inside a continue block. if (!var.allocate_temporary_copy) { var.allocate_temporary_copy = true; force_recompile(); } statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";"); temporary_phi_variables.insert(phi.function_variable); } // This might be called in continue block, so make sure we // use this to emit ESSL 1.0 compliant increments/decrements. auto lhs = to_expression(phi.function_variable); string rhs; if (temporary_phi_variables.count(phi.local_variable)) rhs = join("_", phi.local_variable, "_copy"); else rhs = to_pointer_expression(phi.local_variable); if (!optimize_read_modify_write(get(var.basetype), lhs, rhs)) statement(lhs, " = ", rhs, ";"); } register_write(phi.function_variable); } } } void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) { auto &to_block = get(to); if (from == to) return; assert(is_continue(to)); if (to_block.complex_continue) { // Just emit the whole block chain as is. auto usage_counts = expression_usage_counts; auto invalid = invalid_expressions; emit_block_chain(to_block); // Expression usage counts and invalid expressions // are moot after returning from the continue block. // Since we emit the same block multiple times, // we don't want to invalidate ourselves. expression_usage_counts = usage_counts; invalid_expressions = invalid; } else { auto &from_block = get(from); bool outside_control_flow = false; uint32_t loop_dominator = 0; // FIXME: Refactor this to not use the old loop_dominator tracking. if (from_block.merge_block) { // If we are a loop header, we don't set the loop dominator, // so just use "self" here. loop_dominator = from; } else if (from_block.loop_dominator != SPIRBlock::NoDominator) { loop_dominator = from_block.loop_dominator; } if (loop_dominator != 0) { auto &dominator = get(loop_dominator); // For non-complex continue blocks, we implicitly branch to the continue block // by having the continue block be part of the loop header in for (; ; continue-block). outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block); } // Some simplification for for-loops. We always end up with a useless continue; // statement since we branch to a loop block. // Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block, // we can avoid writing out an explicit continue statement. // Similar optimization to return statements if we know we're outside flow control. if (!outside_control_flow) statement("continue;"); } } void CompilerGLSL::branch(uint32_t from, uint32_t to) { flush_phi(from, to); flush_control_dependent_expressions(from); flush_all_active_variables(); // This is only a continue if we branch to our loop dominator. if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get(from).loop_dominator == to) { // This can happen if we had a complex continue block which was emitted. // Once the continue block tries to branch to the loop header, just emit continue; // and end the chain here. statement("continue;"); } else if (is_break(to)) { // Very dirty workaround. // Switch constructs are able to break, but they cannot break out of a loop at the same time. // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, // write to the ladder here, and defer the break. // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. if (current_emitting_switch && is_loop_break(to) && current_emitting_switch->loop_dominator != ~0u && get(current_emitting_switch->loop_dominator).merge_block == to) { if (!current_emitting_switch->need_ladder_break) { force_recompile(); current_emitting_switch->need_ladder_break = true; } statement("_", current_emitting_switch->self, "_ladder_break = true;"); } statement("break;"); } else if (is_continue(to) || (from == to)) { // For from == to case can happen for a do-while loop which branches into itself. // We don't mark these cases as continue blocks, but the only possible way to branch into // ourselves is through means of continue blocks. branch_to_continue(from, to); } else if (!is_conditional(to)) emit_block_chain(get(to)); // It is important that we check for break before continue. // A block might serve two purposes, a break block for the inner scope, and // a continue block in the outer scope. // Inner scope always takes precedence. } void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block) { // If we branch directly to a selection merge target, we don't really need a code path. bool true_sub = !is_conditional(true_block); bool false_sub = !is_conditional(false_block); if (true_sub) { emit_block_hints(get(from)); statement("if (", to_expression(cond), ")"); begin_scope(); branch(from, true_block); end_scope(); if (false_sub || is_continue(false_block) || is_break(false_block)) { statement("else"); begin_scope(); branch(from, false_block); end_scope(); } else if (flush_phi_required(from, false_block)) { statement("else"); begin_scope(); flush_phi(from, false_block); end_scope(); } } else if (false_sub && !true_sub) { // Only need false path, use negative conditional. emit_block_hints(get(from)); statement("if (!", to_enclosed_expression(cond), ")"); begin_scope(); branch(from, false_block); end_scope(); if (is_continue(true_block) || is_break(true_block)) { statement("else"); begin_scope(); branch(from, true_block); end_scope(); } else if (flush_phi_required(from, true_block)) { statement("else"); begin_scope(); flush_phi(from, true_block); end_scope(); } } } // FIXME: This currently cannot handle complex continue blocks // as in do-while. // This should be seen as a "trivial" continue block. string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block) { auto *block = &get(continue_block); // While emitting the continue block, declare_temporary will check this // if we have to emit temporaries. current_continue_block = block; SmallVector statements; // Capture all statements into our list. auto *old = redirect_statement; redirect_statement = &statements; // Stamp out all blocks one after each other. while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0) { // Write out all instructions we have in this block. emit_block_instructions(*block); // For plain branchless for/while continue blocks. if (block->next_block) { flush_phi(continue_block, block->next_block); block = &get(block->next_block); } // For do while blocks. The last block will be a select block. else if (block->true_block && follow_true_block) { flush_phi(continue_block, block->true_block); block = &get(block->true_block); } else if (block->false_block && follow_false_block) { flush_phi(continue_block, block->false_block); block = &get(block->false_block); } else { SPIRV_CROSS_THROW("Invalid continue block detected!"); } } // Restore old pointer. redirect_statement = old; // Somewhat ugly, strip off the last ';' since we use ',' instead. // Ideally, we should select this behavior in statement(). for (auto &s : statements) { if (!s.empty() && s.back() == ';') s.erase(s.size() - 1, 1); } current_continue_block = nullptr; return merge(statements); } void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block) { // While loops do not take initializers, so declare all of them outside. for (auto &loop_var : block.loop_variables) { auto &var = get(loop_var); statement(variable_decl(var), ";"); } } string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) { if (block.loop_variables.empty()) return ""; bool same_types = for_loop_initializers_are_same_type(block); // We can only declare for loop initializers if all variables are of same type. // If we cannot do this, declare individual variables before the loop header. // We might have a loop variable candidate which was not assigned to for some reason. uint32_t missing_initializers = 0; for (auto &variable : block.loop_variables) { uint32_t expr = get(variable).static_expression; // Sometimes loop variables are initialized with OpUndef, but we can just declare // a plain variable without initializer in this case. if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) missing_initializers++; } if (block.loop_variables.size() == 1 && missing_initializers == 0) { return variable_decl(get(block.loop_variables.front())); } else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size())) { for (auto &loop_var : block.loop_variables) statement(variable_decl(get(loop_var)), ";"); return ""; } else { // We have a mix of loop variables, either ones with a clear initializer, or ones without. // Separate the two streams. string expr; for (auto &loop_var : block.loop_variables) { uint32_t static_expr = get(loop_var).static_expression; if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef) { statement(variable_decl(get(loop_var)), ";"); } else { auto &var = get(loop_var); auto &type = get_variable_data_type(var); if (expr.empty()) { // For loop initializers are of the form (block.true_block), get(block.merge_block))) condition = join("!", enclose_expression(condition)); statement("while (", condition, ")"); break; } default: block.disable_block_optimization = true; force_recompile(); begin_scope(); // We'll see an end_scope() later. return false; } begin_scope(); return true; } else { block.disable_block_optimization = true; force_recompile(); begin_scope(); // We'll see an end_scope() later. return false; } } else if (method == SPIRBlock::MergeToDirectForLoop) { auto &child = get(block.next_block); // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. flush_undeclared_variables(child); uint32_t current_count = statement_count; // If we're trying to create a true for loop, // we need to make sure that all opcodes before branch statement do not actually emit any code. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. emit_block_instructions(child); bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); if (current_count == statement_count && condition_is_temporary) { uint32_t target_block = child.true_block; switch (continue_type) { case SPIRBlock::ForLoop: { // Important that we do this in this order because // emitting the continue block can invalidate the condition expression. auto initializer = emit_for_loop_initializers(block); auto condition = to_expression(child.condition); // Condition might have to be inverted. if (execution_is_noop(get(child.true_block), get(block.merge_block))) { condition = join("!", enclose_expression(condition)); target_block = child.false_block; } auto continue_block = emit_continue_block(block.continue_block, false, false); emit_block_hints(block); statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); break; } case SPIRBlock::WhileLoop: { emit_while_loop_initializers(block); emit_block_hints(block); auto condition = to_expression(child.condition); // Condition might have to be inverted. if (execution_is_noop(get(child.true_block), get(block.merge_block))) { condition = join("!", enclose_expression(condition)); target_block = child.false_block; } statement("while (", condition, ")"); break; } default: block.disable_block_optimization = true; force_recompile(); begin_scope(); // We'll see an end_scope() later. return false; } begin_scope(); branch(child.self, target_block); return true; } else { block.disable_block_optimization = true; force_recompile(); begin_scope(); // We'll see an end_scope() later. return false; } } else return false; } void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) { for (auto &v : block.dominated_variables) flush_variable_declaration(v); } void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &temporaries) { // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. // Need to sort these to ensure that reference output is stable. sort(begin(temporaries), end(temporaries), [](const pair &a, const pair &b) { return a.second < b.second; }); for (auto &tmp : temporaries) { add_local_variable_name(tmp.second); auto &flags = ir.meta[tmp.second].decoration.decoration_flags; auto &type = get(tmp.first); statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";"); hoisted_temporaries.insert(tmp.second); forced_temporaries.insert(tmp.second); // The temporary might be read from before it's assigned, set up the expression now. set(tmp.second, to_name(tmp.second), tmp.first, true); } } void CompilerGLSL::emit_block_chain(SPIRBlock &block) { bool select_branch_to_true_block = false; bool select_branch_to_false_block = false; bool skip_direct_branch = false; bool emitted_loop_header_variables = false; bool force_complex_continue_block = false; emit_hoisted_temporaries(block.declare_temporary); SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; if (block.continue_block) continue_type = continue_block_type(get(block.continue_block)); // If we have loop variables, stop masking out access to the variable now. for (auto var_id : block.loop_variables) { auto &var = get(var_id); var.loop_variable_enable = true; // We're not going to declare the variable directly, so emit a copy here. emit_variable_temporary_copies(var); } // Remember deferred declaration state. We will restore it before returning. SmallVector rearm_dominated_variables(block.dominated_variables.size()); for (size_t i = 0; i < block.dominated_variables.size(); i++) { uint32_t var_id = block.dominated_variables[i]; auto &var = get(var_id); rearm_dominated_variables[i] = var.deferred_declaration; } // This is the method often used by spirv-opt to implement loops. // The loop header goes straight into the continue block. // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block, // it *MUST* be used in the continue block. This loop method will not work. if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop)) { flush_undeclared_variables(block); if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop)) { if (execution_is_noop(get(block.true_block), get(block.merge_block))) select_branch_to_false_block = true; else select_branch_to_true_block = true; emitted_loop_header_variables = true; force_complex_continue_block = true; } } // This is the older loop behavior in glslang which branches to loop body directly from the loop header. else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop)) { flush_undeclared_variables(block); if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop)) { // The body of while, is actually just the true (or false) block, so always branch there unconditionally. if (execution_is_noop(get(block.true_block), get(block.merge_block))) select_branch_to_false_block = true; else select_branch_to_true_block = true; emitted_loop_header_variables = true; } } // This is the newer loop behavior in glslang which branches from Loop header directly to // a new block, which in turn has a OpBranchSelection without a selection merge. else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop)) { flush_undeclared_variables(block); if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop)) { skip_direct_branch = true; emitted_loop_header_variables = true; } } else if (continue_type == SPIRBlock::DoWhileLoop) { flush_undeclared_variables(block); emit_while_loop_initializers(block); emitted_loop_header_variables = true; // We have some temporaries where the loop header is the dominator. // We risk a case where we have code like: // for (;;) { create-temporary; break; } consume-temporary; // so force-declare temporaries here. emit_hoisted_temporaries(block.potential_declare_temporary); statement("do"); begin_scope(); emit_block_instructions(block); } else if (block.merge == SPIRBlock::MergeLoop) { flush_undeclared_variables(block); emit_while_loop_initializers(block); emitted_loop_header_variables = true; // We have a generic loop without any distinguishable pattern like for, while or do while. get(block.continue_block).complex_continue = true; continue_type = SPIRBlock::ComplexLoop; // We have some temporaries where the loop header is the dominator. // We risk a case where we have code like: // for (;;) { create-temporary; break; } consume-temporary; // so force-declare temporaries here. emit_hoisted_temporaries(block.potential_declare_temporary); statement("for (;;)"); begin_scope(); emit_block_instructions(block); } else { emit_block_instructions(block); } // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem // as writes to said loop variables might have been masked out, we need a recompile. if (!emitted_loop_header_variables && !block.loop_variables.empty()) { force_recompile(); for (auto var : block.loop_variables) get(var).loop_variable = false; block.loop_variables.clear(); } flush_undeclared_variables(block); bool emit_next_block = true; // Handle end of block. switch (block.terminator) { case SPIRBlock::Direct: // True when emitting complex continue block. if (block.loop_dominator == block.next_block) { branch(block.self, block.next_block); emit_next_block = false; } // True if MergeToDirectForLoop succeeded. else if (skip_direct_branch) emit_next_block = false; else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block)) { branch(block.self, block.next_block); emit_next_block = false; } break; case SPIRBlock::Select: // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded. if (select_branch_to_true_block) { if (force_complex_continue_block) { assert(block.true_block == block.continue_block); // We're going to emit a continue block directly here, so make sure it's marked as complex. auto &complex_continue = get(block.continue_block).complex_continue; bool old_complex = complex_continue; complex_continue = true; branch(block.self, block.true_block); complex_continue = old_complex; } else branch(block.self, block.true_block); } else if (select_branch_to_false_block) { if (force_complex_continue_block) { assert(block.false_block == block.continue_block); // We're going to emit a continue block directly here, so make sure it's marked as complex. auto &complex_continue = get(block.continue_block).complex_continue; bool old_complex = complex_continue; complex_continue = true; branch(block.self, block.false_block); complex_continue = old_complex; } else branch(block.self, block.false_block); } else branch(block.self, block.condition, block.true_block, block.false_block); break; case SPIRBlock::MultiSelect: { auto &type = expression_type(block.condition); bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || type.basetype == SPIRType::UByte; if (block.merge == SPIRBlock::MergeNone) SPIRV_CROSS_THROW("Switch statement is not structured"); if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) { // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages. SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors."); } const char *label_suffix = ""; if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix) label_suffix = "u"; else if (type.basetype == SPIRType::UShort) label_suffix = backend.uint16_t_literal_suffix; else if (type.basetype == SPIRType::Short) label_suffix = backend.int16_t_literal_suffix; SPIRBlock *old_emitting_switch = current_emitting_switch; current_emitting_switch = █ if (block.need_ladder_break) statement("bool _", block.self, "_ladder_break = false;"); // Find all unique case constructs. unordered_map> case_constructs; SmallVector block_declaration_order; SmallVector literals_to_merge; // If a switch case branches to the default block for some reason, we can just remove that literal from consideration // and let the default: block handle it. // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here. // We only need to consider possible fallthrough if order[i] branches to order[i + 1]. for (auto &c : block.cases) { if (c.block != block.next_block && c.block != block.default_block) { if (!case_constructs.count(c.block)) block_declaration_order.push_back(c.block); case_constructs[c.block].push_back(c.value); } else if (c.block == block.next_block && block.default_block != block.next_block) { // We might have to flush phi inside specific case labels. // If we can piggyback on default:, do so instead. literals_to_merge.push_back(c.value); } } // Empty literal array -> default. if (block.default_block != block.next_block) { auto &default_block = get(block.default_block); // We need to slide in the default block somewhere in this chain // if there are fall-through scenarios since the default is declared separately in OpSwitch. // Only consider trivial fall-through cases here. size_t num_blocks = block_declaration_order.size(); bool injected_block = false; for (size_t i = 0; i < num_blocks; i++) { auto &case_block = get(block_declaration_order[i]); if (execution_is_direct_branch(case_block, default_block)) { // Fallthrough to default block, we must inject the default block here. block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block); injected_block = true; break; } else if (execution_is_direct_branch(default_block, case_block)) { // Default case is falling through to another case label, we must inject the default block here. block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block); injected_block = true; break; } } // Order does not matter. if (!injected_block) block_declaration_order.push_back(block.default_block); case_constructs[block.default_block] = {}; } size_t num_blocks = block_declaration_order.size(); const auto to_case_label = [](uint32_t literal, bool is_unsigned_case) -> string { return is_unsigned_case ? convert_to_string(literal) : convert_to_string(int32_t(literal)); }; // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture, // we need to flush phi nodes outside the switch block in a branch, // and skip any Phi handling inside the case label to make fall-through work as expected. // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this // inside the case label if at all possible. for (size_t i = 1; i < num_blocks; i++) { if (flush_phi_required(block.self, block_declaration_order[i]) && flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i])) { uint32_t target_block = block_declaration_order[i]; // Make sure we flush Phi, it might have been marked to be ignored earlier. get(target_block).ignore_phi_from_block = 0; auto &literals = case_constructs[target_block]; if (literals.empty()) { // Oh boy, gotta make a complete negative test instead! o.o // Find all possible literals that would *not* make us enter the default block. // If none of those literals match, we flush Phi ... SmallVector conditions; for (size_t j = 0; j < num_blocks; j++) { auto &negative_literals = case_constructs[block_declaration_order[j]]; for (auto &case_label : negative_literals) conditions.push_back(join(to_enclosed_expression(block.condition), " != ", to_case_label(case_label, unsigned_case))); } statement("if (", merge(conditions, " && "), ")"); begin_scope(); flush_phi(block.self, target_block); end_scope(); } else { SmallVector conditions; conditions.reserve(literals.size()); for (auto &case_label : literals) conditions.push_back(join(to_enclosed_expression(block.condition), " == ", to_case_label(case_label, unsigned_case))); statement("if (", merge(conditions, " || "), ")"); begin_scope(); flush_phi(block.self, target_block); end_scope(); } // Mark the block so that we don't flush Phi from header to case label. get(target_block).ignore_phi_from_block = block.self; } } emit_block_hints(block); statement("switch (", to_expression(block.condition), ")"); begin_scope(); for (size_t i = 0; i < num_blocks; i++) { uint32_t target_block = block_declaration_order[i]; auto &literals = case_constructs[target_block]; if (literals.empty()) { // Default case. statement("default:"); } else { for (auto &case_literal : literals) { // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here. statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":"); } } auto &case_block = get(target_block); if (backend.support_case_fallthrough && i + 1 < num_blocks && execution_is_direct_branch(case_block, get(block_declaration_order[i + 1]))) { // We will fall through here, so just terminate the block chain early. // We still need to deal with Phi potentially. // No need for a stack-like thing here since we only do fall-through when there is a // single trivial branch to fall-through target.. current_emitting_switch_fallthrough = true; } else current_emitting_switch_fallthrough = false; begin_scope(); branch(block.self, target_block); end_scope(); current_emitting_switch_fallthrough = false; } // Might still have to flush phi variables if we branch from loop header directly to merge target. if (flush_phi_required(block.self, block.next_block)) { if (block.default_block == block.next_block || !literals_to_merge.empty()) { for (auto &case_literal : literals_to_merge) statement("case ", to_case_label(case_literal, unsigned_case), label_suffix, ":"); if (block.default_block == block.next_block) statement("default:"); begin_scope(); flush_phi(block.self, block.next_block); statement("break;"); end_scope(); } } end_scope(); if (block.need_ladder_break) { statement("if (_", block.self, "_ladder_break)"); begin_scope(); statement("break;"); end_scope(); } current_emitting_switch = old_emitting_switch; break; } case SPIRBlock::Return: for (auto &line : current_function->fixup_hooks_out) line(); if (processing_entry_point) emit_fixup(); if (block.return_value) { auto &type = expression_type(block.return_value); if (!type.array.empty() && !backend.can_return_array) { // If we cannot return arrays, we will have a special out argument we can write to instead. // The backend is responsible for setting this up, and redirection the return values as appropriate. if (ir.ids[block.return_value].get_type() != TypeUndef) emit_array_copy("SPIRV_Cross_return_value", block.return_value); if (!block_is_outside_flow_control_from_block(get(current_function->entry_block), block) || block.loop_dominator != SPIRBlock::NoDominator) { statement("return;"); } } else { // OpReturnValue can return Undef, so don't emit anything for this case. if (ir.ids[block.return_value].get_type() != TypeUndef) statement("return ", to_expression(block.return_value), ";"); } } // If this block is the very final block and not called from control flow, // we do not need an explicit return which looks out of place. Just end the function here. // In the very weird case of for(;;) { return; } executing return is unconditional, // but we actually need a return here ... else if (!block_is_outside_flow_control_from_block(get(current_function->entry_block), block) || block.loop_dominator != SPIRBlock::NoDominator) { statement("return;"); } break; case SPIRBlock::Kill: statement(backend.discard_literal, ";"); break; case SPIRBlock::Unreachable: emit_next_block = false; break; default: SPIRV_CROSS_THROW("Unimplemented block terminator."); } if (block.next_block && emit_next_block) { // If we hit this case, we're dealing with an unconditional branch, which means we will output // that block after this. If we had selection merge, we already flushed phi variables. if (block.merge != SPIRBlock::MergeSelection) flush_phi(block.self, block.next_block); // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. if (!current_emitting_switch_fallthrough) { // For merge selects we might have ignored the fact that a merge target // could have been a break; or continue; // We will need to deal with it here. if (is_loop_break(block.next_block)) { // Cannot check for just break, because switch statements will also use break. assert(block.merge == SPIRBlock::MergeSelection); statement("break;"); } else if (is_continue(block.next_block)) { assert(block.merge == SPIRBlock::MergeSelection); branch_to_continue(block.self, block.next_block); } else if (block.self != block.next_block) emit_block_chain(get(block.next_block)); } } if (block.merge == SPIRBlock::MergeLoop) { if (continue_type == SPIRBlock::DoWhileLoop) { // Make sure that we run the continue block to get the expressions set, but this // should become an empty string. // We have no fallbacks if we cannot forward everything to temporaries ... const auto &continue_block = get(block.continue_block); bool positive_test = execution_is_noop(get(continue_block.true_block), get(continue_block.loop_dominator)); uint32_t current_count = statement_count; auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test); if (statement_count != current_count) { // The DoWhile block has side effects, force ComplexLoop pattern next pass. get(block.continue_block).complex_continue = true; force_recompile(); } // Might have to invert the do-while test here. auto condition = to_expression(continue_block.condition); if (!positive_test) condition = join("!", enclose_expression(condition)); end_scope_decl(join("while (", condition, ")")); } else end_scope(); // We cannot break out of two loops at once, so don't check for break; here. // Using block.self as the "from" block isn't quite right, but it has the same scope // and dominance structure, so it's fine. if (is_continue(block.merge_block)) branch_to_continue(block.self, block.merge_block); else emit_block_chain(get(block.merge_block)); } // Forget about control dependent expressions now. block.invalidate_expressions.clear(); // After we return, we must be out of scope, so if we somehow have to re-emit this function, // re-declare variables if necessary. assert(rearm_dominated_variables.size() == block.dominated_variables.size()); for (size_t i = 0; i < block.dominated_variables.size(); i++) { uint32_t var = block.dominated_variables[i]; get(var).deferred_declaration = rearm_dominated_variables[i]; } // Just like for deferred declaration, we need to forget about loop variable enable // if our block chain is reinstantiated later. for (auto &var_id : block.loop_variables) get(var_id).loop_variable_enable = false; } void CompilerGLSL::begin_scope() { statement("{"); indent++; } void CompilerGLSL::end_scope() { if (!indent) SPIRV_CROSS_THROW("Popping empty indent stack."); indent--; statement("}"); } void CompilerGLSL::end_scope_decl() { if (!indent) SPIRV_CROSS_THROW("Popping empty indent stack."); indent--; statement("};"); } void CompilerGLSL::end_scope_decl(const string &decl) { if (!indent) SPIRV_CROSS_THROW("Popping empty indent stack."); indent--; statement("} ", decl, ";"); } void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) { // If our variable is remapped, and we rely on type-remapping information as // well, then we cannot pass the variable as a function parameter. // Fixing this is non-trivial without stamping out variants of the same function, // so for now warn about this and suggest workarounds instead. for (uint32_t i = 0; i < length; i++) { auto *var = maybe_get(args[i]); if (!var || !var->remapped_variable) continue; auto &type = get(var->basetype); if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) { SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " "This will not work correctly because type-remapping information is lost. " "To workaround, please consider not passing the subpass input as a function parameter, " "or use in/out variables instead which do not need type remapping information."); } } } const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) { // FIXME: This is kind of hacky. There should be a cleaner way. auto offset = uint32_t(&instr - current_emitting_block->ops.data()); if ((offset + 1) < current_emitting_block->ops.size()) return ¤t_emitting_block->ops[offset + 1]; else return nullptr; } uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) { return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask | MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); } void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id) { statement(lhs, " = ", to_expression(rhs_id), ";"); } void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr) { if (!backend.force_gl_in_out_block) return; // This path is only relevant for GL backends. auto *var = maybe_get(source_id); if (!var) return; if (var->storage != StorageClassInput) return; auto &type = get_variable_data_type(*var); if (type.array.empty()) return; auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); bool is_builtin = is_builtin_variable(*var) && (builtin == BuiltInPointSize || builtin == BuiltInPosition); bool is_tess = is_tessellation_shader(); // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it. // We must unroll the array load. // For builtins, we couldn't catch this case normally, // because this is resolved in the OpAccessChain in most cases. // If we load the entire array, we have no choice but to unroll here. if (is_builtin || is_tess) { auto new_expr = join("_", target_id, "_unrolled"); statement(variable_decl(type, new_expr, target_id), ";"); string array_expr; if (type.array_size_literal.front()) { array_expr = convert_to_string(type.array.front()); if (type.array.front() == 0) SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); } else array_expr = to_expression(type.array.front()); // The array size might be a specialization constant, so use a for-loop instead. statement("for (int i = 0; i < int(", array_expr, "); i++)"); begin_scope(); if (is_builtin) statement(new_expr, "[i] = gl_in[i].", expr, ";"); else statement(new_expr, "[i] = ", expr, "[i];"); end_scope(); expr = move(new_expr); } } void CompilerGLSL::bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) { auto *var = maybe_get_backing_variable(source_id); if (var) source_id = var->self; // Only interested in standalone builtin variables. if (!has_decoration(source_id, DecorationBuiltIn)) return; auto builtin = static_cast(get_decoration(source_id, DecorationBuiltIn)); auto expected_type = expr_type.basetype; // TODO: Fill in for more builtins. switch (builtin) { case BuiltInLayer: case BuiltInPrimitiveId: case BuiltInViewportIndex: case BuiltInInstanceId: case BuiltInInstanceIndex: case BuiltInVertexId: case BuiltInVertexIndex: case BuiltInSampleId: case BuiltInBaseVertex: case BuiltInBaseInstance: case BuiltInDrawIndex: case BuiltInFragStencilRefEXT: expected_type = SPIRType::Int; break; case BuiltInGlobalInvocationId: case BuiltInLocalInvocationId: case BuiltInWorkgroupId: case BuiltInLocalInvocationIndex: case BuiltInWorkgroupSize: case BuiltInNumWorkgroups: expected_type = SPIRType::UInt; break; default: break; } if (expected_type != expr_type.basetype) expr = bitcast_expression(expr_type, expected_type, expr); } void CompilerGLSL::bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) { // Only interested in standalone builtin variables. if (!has_decoration(target_id, DecorationBuiltIn)) return; auto builtin = static_cast(get_decoration(target_id, DecorationBuiltIn)); auto expected_type = expr_type.basetype; // TODO: Fill in for more builtins. switch (builtin) { case BuiltInLayer: case BuiltInPrimitiveId: case BuiltInViewportIndex: case BuiltInFragStencilRefEXT: expected_type = SPIRType::Int; break; default: break; } if (expected_type != expr_type.basetype) { auto type = expr_type; type.basetype = expected_type; expr = bitcast_expression(type, expr_type.basetype, expr); } } void CompilerGLSL::convert_non_uniform_expression(const SPIRType &type, std::string &expr) { if (*backend.nonuniform_qualifier == '\0') return; // Handle SPV_EXT_descriptor_indexing. if (type.basetype == SPIRType::Sampler || type.basetype == SPIRType::SampledImage || type.basetype == SPIRType::Image) { // The image/sampler ID must be declared as non-uniform. // However, it is not legal GLSL to have // nonuniformEXT(samplers[index]), so we must move the nonuniform qualifier // to the array indexing, like // samplers[nonuniformEXT(index)]. // While the access chain will generally be nonuniformEXT, it's not necessarily so, // so we might have to fixup the OpLoad-ed expression late. auto start_array_index = expr.find_first_of('['); auto end_array_index = expr.find_last_of(']'); // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's // nothing we can do here to express that. if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) return; start_array_index++; expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(", expr.substr(start_array_index, end_array_index - start_array_index), ")", expr.substr(end_array_index, string::npos)); } } void CompilerGLSL::emit_block_hints(const SPIRBlock &) { } void CompilerGLSL::preserve_alias_on_reset(uint32_t id) { preserved_aliases[id] = get_name(id); } void CompilerGLSL::reset_name_caches() { for (auto &preserved : preserved_aliases) set_name(preserved.first, preserved.second); preserved_aliases.clear(); resource_names.clear(); block_input_names.clear(); block_output_names.clear(); block_ubo_names.clear(); block_ssbo_names.clear(); block_names.clear(); function_overloads.clear(); } void CompilerGLSL::fixup_type_alias() { // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. // FIXME: Multiple alias types which are both block-like will be awkward, for now, it's best to just drop the type // alias if the slave type is a block type. ir.for_each_typed_id([&](uint32_t self, SPIRType &type) { if (type.type_alias && type_is_block_like(type)) { // Become the master. ir.for_each_typed_id([&](uint32_t other_id, SPIRType &other_type) { if (other_id == type.self) return; if (other_type.type_alias == type.type_alias) other_type.type_alias = type.self; }); this->get(type.type_alias).type_alias = self; type.type_alias = 0; } }); ir.for_each_typed_id([&](uint32_t, SPIRType &type) { if (type.type_alias && type_is_block_like(type)) { // This is not allowed, drop the type_alias. type.type_alias = 0; } }); } void CompilerGLSL::reorder_type_alias() { // Reorder declaration of types so that the master of the type alias is always emitted first. // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. auto loop_lock = ir.create_loop_hard_lock(); auto &type_ids = ir.ids_for_type[TypeType]; for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr) { auto &type = get(*alias_itr); if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) { // We will skip declaring this type, so make sure the type_alias type comes before. auto master_itr = find(begin(type_ids), end(type_ids), type.type_alias); assert(master_itr != end(type_ids)); if (alias_itr < master_itr) { // Must also swap the type order for the constant-type joined array. auto &joined_types = ir.ids_for_constant_or_type; auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr); auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr); assert(alt_alias_itr != end(joined_types)); assert(alt_master_itr != end(joined_types)); swap(*alias_itr, *master_itr); swap(*alt_alias_itr, *alt_master_itr); } } } } void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) { // If we are redirecting statements, ignore the line directive. // Common case here is continue blocks. if (redirect_statement) return; if (options.emit_line_directives) { require_extension_internal("GL_GOOGLE_cpp_style_line_directive"); statement_no_indent("#line ", line_literal, " \"", get(file_id).str, "\""); } } void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id) { // SPIR-V might only tag the very last ID with NonUniformEXT, but for codegen, // we need to know NonUniformEXT a little earlier, when the resource is actually loaded. // Back-propagate the qualifier based on the expression dependency chain. if (!has_decoration(id, DecorationNonUniformEXT)) { set_decoration(id, DecorationNonUniformEXT); force_recompile(); } auto *e = maybe_get(id); auto *combined = maybe_get(id); auto *chain = maybe_get(id); if (e) { for (auto &expr : e->expression_dependencies) propagate_nonuniform_qualifier(expr); for (auto &expr : e->implied_read_expressions) propagate_nonuniform_qualifier(expr); } else if (combined) { propagate_nonuniform_qualifier(combined->image); propagate_nonuniform_qualifier(combined->sampler); } else if (chain) { for (auto &expr : chain->implied_read_expressions) propagate_nonuniform_qualifier(expr); } }