/* * Copyright 2015-2018 ARM Limited * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "spirv_glsl.hpp" #include "GLSL.std.450.h" #include "spirv_common.hpp" #include #include #include using namespace spv; using namespace spirv_cross; using namespace std; static bool packing_is_vec4_padded(BufferPackingStandard packing) { switch (packing) { case BufferPackingHLSLCbuffer: case BufferPackingHLSLCbufferPackOffset: case BufferPackingStd140: case BufferPackingStd140EnhancedLayout: return true; default: return false; } } static bool packing_is_hlsl(BufferPackingStandard packing) { switch (packing) { case BufferPackingHLSLCbuffer: case BufferPackingHLSLCbufferPackOffset: return true; default: return false; } } static bool packing_has_flexible_offset(BufferPackingStandard packing) { switch (packing) { case BufferPackingStd140: case BufferPackingStd430: case BufferPackingHLSLCbuffer: return false; default: return true; } } static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing) { switch (packing) { case BufferPackingStd140EnhancedLayout: return BufferPackingStd140; case BufferPackingStd430EnhancedLayout: return BufferPackingStd430; case BufferPackingHLSLCbufferPackOffset: return BufferPackingHLSLCbuffer; default: return packing; } } // Sanitizes underscores for GLSL where multiple underscores in a row are not allowed. string CompilerGLSL::sanitize_underscores(const string &str) { string res; res.reserve(str.size()); bool last_underscore = false; for (auto c : str) { if (c == '_') { if (last_underscore) continue; res += c; last_underscore = true; } else { res += c; last_underscore = false; } } return res; } // Returns true if an arithmetic operation does not change behavior depending on signedness. static bool glsl_opcode_is_sign_invariant(Op opcode) { switch (opcode) { case OpIEqual: case OpINotEqual: case OpISub: case OpIAdd: case OpIMul: case OpShiftLeftLogical: case OpBitwiseOr: case OpBitwiseXor: case OpBitwiseAnd: return true; default: return false; } } static const char *to_pls_layout(PlsFormat format) { switch (format) { case PlsR11FG11FB10F: return "layout(r11f_g11f_b10f) "; case PlsR32F: return "layout(r32f) "; case PlsRG16F: return "layout(rg16f) "; case PlsRGB10A2: return "layout(rgb10_a2) "; case PlsRGBA8: return "layout(rgba8) "; case PlsRG16: return "layout(rg16) "; case PlsRGBA8I: return "layout(rgba8i)"; case PlsRG16I: return "layout(rg16i) "; case PlsRGB10A2UI: return "layout(rgb10_a2ui) "; case PlsRGBA8UI: return "layout(rgba8ui) "; case PlsRG16UI: return "layout(rg16ui) "; case PlsR32UI: return "layout(r32ui) "; default: return ""; } } static SPIRType::BaseType pls_format_to_basetype(PlsFormat format) { switch (format) { default: case PlsR11FG11FB10F: case PlsR32F: case PlsRG16F: case PlsRGB10A2: case PlsRGBA8: case PlsRG16: return SPIRType::Float; case PlsRGBA8I: case PlsRG16I: return SPIRType::Int; case PlsRGB10A2UI: case PlsRGBA8UI: case PlsRG16UI: case PlsR32UI: return SPIRType::UInt; } } static uint32_t pls_format_to_components(PlsFormat format) { switch (format) { default: case PlsR32F: case PlsR32UI: return 1; case PlsRG16F: case PlsRG16: case PlsRG16UI: case PlsRG16I: return 2; case PlsR11FG11FB10F: return 3; case PlsRGB10A2: case PlsRGBA8: case PlsRGBA8I: case PlsRGB10A2UI: case PlsRGBA8UI: return 4; } } static const char *vector_swizzle(int vecsize, int index) { static const char *swizzle[4][4] = { { ".x", ".y", ".z", ".w" }, { ".xy", ".yz", ".zw" }, { ".xyz", ".yzw" }, { "" } }; assert(vecsize >= 1 && vecsize <= 4); assert(index >= 0 && index < 4); assert(swizzle[vecsize - 1][index]); return swizzle[vecsize - 1][index]; } void CompilerGLSL::reset() { // We do some speculative optimizations which should pretty much always work out, // but just in case the SPIR-V is rather weird, recompile until it's happy. // This typically only means one extra pass. force_recompile = false; // Clear invalid expression tracking. invalid_expressions.clear(); current_function = nullptr; // Clear temporary usage tracking. expression_usage_counts.clear(); forwarded_temporaries.clear(); resource_names.clear(); for (auto &id : ids) { if (id.get_type() == TypeVariable) { // Clear unflushed dependees. id.get().dependees.clear(); } else if (id.get_type() == TypeExpression) { // And remove all expressions. id.reset(); } else if (id.get_type() == TypeFunction) { // Reset active state for all functions. id.get().active = false; id.get().flush_undeclared = true; } } statement_count = 0; indent = 0; } void CompilerGLSL::remap_pls_variables() { for (auto &input : pls_inputs) { auto &var = get(input.id); bool input_is_target = false; if (var.storage == StorageClassUniformConstant) { auto &type = get(var.basetype); input_is_target = type.image.dim == DimSubpassData; } if (var.storage != StorageClassInput && !input_is_target) SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs."); var.remapped_variable = true; } for (auto &output : pls_outputs) { auto &var = get(output.id); if (var.storage != StorageClassOutput) SPIRV_CROSS_THROW("Can only use out variables for PLS outputs."); var.remapped_variable = true; } } void CompilerGLSL::find_static_extensions() { for (auto &id : ids) { if (id.get_type() == TypeType) { auto &type = id.get(); if (type.basetype == SPIRType::Double) { if (options.es) SPIRV_CROSS_THROW("FP64 not supported in ES profile."); if (!options.es && options.version < 400) require_extension("GL_ARB_gpu_shader_fp64"); } if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) { if (options.es) SPIRV_CROSS_THROW("64-bit integers not supported in ES profile."); if (!options.es) require_extension("GL_ARB_gpu_shader_int64"); } } } auto &execution = get_entry_point(); switch (execution.model) { case ExecutionModelGLCompute: if (!options.es && options.version < 430) require_extension("GL_ARB_compute_shader"); if (options.es && options.version < 310) SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders."); break; case ExecutionModelGeometry: if (options.es && options.version < 320) require_extension("GL_EXT_geometry_shader"); if (!options.es && options.version < 150) require_extension("GL_ARB_geometry_shader4"); if ((execution.flags & (1ull << ExecutionModeInvocations)) && execution.invocations != 1) { // Instanced GS is part of 400 core or this extension. if (!options.es && options.version < 400) require_extension("GL_ARB_gpu_shader5"); } break; case ExecutionModelTessellationEvaluation: case ExecutionModelTessellationControl: if (options.es && options.version < 320) require_extension("GL_EXT_tessellation_shader"); if (!options.es && options.version < 400) require_extension("GL_ARB_tessellation_shader"); break; default: break; } if (!pls_inputs.empty() || !pls_outputs.empty()) require_extension("GL_EXT_shader_pixel_local_storage"); if (options.separate_shader_objects && !options.es && options.version < 410) require_extension("GL_ARB_separate_shader_objects"); } string CompilerGLSL::compile() { // Force a classic "C" locale, reverts when function returns ClassicLocale classic_locale; if (options.vulkan_semantics) backend.allow_precision_qualifiers = true; backend.force_gl_in_out_block = true; // Scan the SPIR-V to find trivial uses of extensions. find_static_extensions(); fixup_image_load_store_access(); update_active_builtins(); analyze_image_and_sampler_usage(); uint32_t pass_count = 0; do { if (pass_count >= 3) SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!"); reset(); // Move constructor for this type is broken on GCC 4.9 ... buffer = unique_ptr(new ostringstream()); emit_header(); emit_resources(); emit_function(get(entry_point), 0); pass_count++; } while (force_recompile); // Entry point in GLSL is always main(). get_entry_point().name = "main"; return buffer->str(); } std::string CompilerGLSL::get_partial_source() { return buffer ? buffer->str() : "No compiled source available yet."; } void CompilerGLSL::emit_header() { auto &execution = get_entry_point(); statement("#version ", options.version, options.es && options.version > 100 ? " es" : ""); if (!options.es && options.version < 420) { // Needed for binding = # on UBOs, etc. if (options.enable_420pack_extension) { statement("#ifdef GL_ARB_shading_language_420pack"); statement("#extension GL_ARB_shading_language_420pack : require"); statement("#endif"); } // Needed for: layout(early_fragment_tests) in; if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests)) require_extension("GL_ARB_shader_image_load_store"); } for (auto &ext : forced_extensions) statement("#extension ", ext, " : require"); for (auto &header : header_lines) statement(header); vector inputs; vector outputs; switch (execution.model) { case ExecutionModelGeometry: outputs.push_back(join("max_vertices = ", execution.output_vertices)); if ((execution.flags & (1ull << ExecutionModeInvocations)) && execution.invocations != 1) inputs.push_back(join("invocations = ", execution.invocations)); if (execution.flags & (1ull << ExecutionModeInputPoints)) inputs.push_back("points"); if (execution.flags & (1ull << ExecutionModeInputLines)) inputs.push_back("lines"); if (execution.flags & (1ull << ExecutionModeInputLinesAdjacency)) inputs.push_back("lines_adjacency"); if (execution.flags & (1ull << ExecutionModeTriangles)) inputs.push_back("triangles"); if (execution.flags & (1ull << ExecutionModeInputTrianglesAdjacency)) inputs.push_back("triangles_adjacency"); if (execution.flags & (1ull << ExecutionModeOutputTriangleStrip)) outputs.push_back("triangle_strip"); if (execution.flags & (1ull << ExecutionModeOutputPoints)) outputs.push_back("points"); if (execution.flags & (1ull << ExecutionModeOutputLineStrip)) outputs.push_back("line_strip"); break; case ExecutionModelTessellationControl: if (execution.flags & (1ull << ExecutionModeOutputVertices)) outputs.push_back(join("vertices = ", execution.output_vertices)); break; case ExecutionModelTessellationEvaluation: if (execution.flags & (1ull << ExecutionModeQuads)) inputs.push_back("quads"); if (execution.flags & (1ull << ExecutionModeTriangles)) inputs.push_back("triangles"); if (execution.flags & (1ull << ExecutionModeIsolines)) inputs.push_back("isolines"); if (execution.flags & (1ull << ExecutionModePointMode)) inputs.push_back("point_mode"); if ((execution.flags & (1ull << ExecutionModeIsolines)) == 0) { if (execution.flags & (1ull << ExecutionModeVertexOrderCw)) inputs.push_back("cw"); if (execution.flags & (1ull << ExecutionModeVertexOrderCcw)) inputs.push_back("ccw"); } if (execution.flags & (1ull << ExecutionModeSpacingFractionalEven)) inputs.push_back("fractional_even_spacing"); if (execution.flags & (1ull << ExecutionModeSpacingFractionalOdd)) inputs.push_back("fractional_odd_spacing"); if (execution.flags & (1ull << ExecutionModeSpacingEqual)) inputs.push_back("equal_spacing"); break; case ExecutionModelGLCompute: { if (execution.workgroup_size.constant != 0) { SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); if (wg_x.id) { if (options.vulkan_semantics) inputs.push_back(join("local_size_x_id = ", wg_x.constant_id)); else inputs.push_back(join("local_size_x = ", get(wg_x.id).scalar())); } else inputs.push_back(join("local_size_x = ", execution.workgroup_size.x)); if (wg_y.id) { if (options.vulkan_semantics) inputs.push_back(join("local_size_y_id = ", wg_y.constant_id)); else inputs.push_back(join("local_size_y = ", get(wg_y.id).scalar())); } else inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); if (wg_z.id) { if (options.vulkan_semantics) inputs.push_back(join("local_size_z_id = ", wg_z.constant_id)); else inputs.push_back(join("local_size_z = ", get(wg_z.id).scalar())); } else inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); } else { inputs.push_back(join("local_size_x = ", execution.workgroup_size.x)); inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); } break; } case ExecutionModelFragment: if (options.es) { switch (options.fragment.default_float_precision) { case Options::Lowp: statement("precision lowp float;"); break; case Options::Mediump: statement("precision mediump float;"); break; case Options::Highp: statement("precision highp float;"); break; default: break; } switch (options.fragment.default_int_precision) { case Options::Lowp: statement("precision lowp int;"); break; case Options::Mediump: statement("precision mediump int;"); break; case Options::Highp: statement("precision highp int;"); break; default: break; } } if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests)) inputs.push_back("early_fragment_tests"); if (execution.flags & (1ull << ExecutionModeDepthGreater)) inputs.push_back("depth_greater"); if (execution.flags & (1ull << ExecutionModeDepthLess)) inputs.push_back("depth_less"); break; default: break; } if (!inputs.empty()) statement("layout(", merge(inputs), ") in;"); if (!outputs.empty()) statement("layout(", merge(outputs), ") out;"); statement(""); } bool CompilerGLSL::type_is_empty(const SPIRType &type) { return type.basetype == SPIRType::Struct && type.member_types.empty(); } void CompilerGLSL::emit_struct(SPIRType &type) { // Struct types can be stamped out multiple times // with just different offsets, matrix layouts, etc ... // Type-punning with these types is legal, which complicates things // when we are storing struct and array types in an SSBO for example. if (type.type_alias != 0) return; // Don't declare empty structs in GLSL, this is not allowed. // Empty structs is a corner case of HLSL output, and only sensible thing to do is avoiding to declare // these types. if (type_is_empty(type)) return; add_resource_name(type.self); auto name = type_to_glsl(type); statement(!backend.explicit_struct_type ? "struct " : "", name); begin_scope(); type.member_name_cache.clear(); uint32_t i = 0; bool emitted = false; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; emitted = true; } end_scope_decl(); if (emitted) statement(""); } uint64_t CompilerGLSL::combined_decoration_for_member(const SPIRType &type, uint32_t index) { uint64_t flags = 0; auto &memb = meta[type.self].members; if (index >= memb.size()) return 0; auto &dec = memb[index]; // If our type is a struct, traverse all the members as well recursively. flags |= dec.decoration_flags; for (uint32_t i = 0; i < type.member_types.size(); i++) flags |= combined_decoration_for_member(get(type.member_types[i]), i); return flags; } string CompilerGLSL::to_interpolation_qualifiers(uint64_t flags) { string res; //if (flags & (1ull << DecorationSmooth)) // res += "smooth "; if (flags & (1ull << DecorationFlat)) res += "flat "; if (flags & (1ull << DecorationNoPerspective)) res += "noperspective "; if (flags & (1ull << DecorationCentroid)) res += "centroid "; if (flags & (1ull << DecorationPatch)) res += "patch "; if (flags & (1ull << DecorationSample)) res += "sample "; if (flags & (1ull << DecorationInvariant)) res += "invariant "; return res; } string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) { if (is_legacy()) return ""; bool is_block = (meta[type.self].decoration.decoration_flags & ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0; if (!is_block) return ""; auto &memb = meta[type.self].members; if (index >= memb.size()) return ""; auto &dec = memb[index]; vector attr; // We can only apply layouts on members in block interfaces. // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL. // // We would like to go from (SPIR-V style): // // struct Foo { layout(row_major) mat4 matrix; }; // buffer UBO { Foo foo; }; // // to // // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations. // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level. auto flags = combined_decoration_for_member(type, index); if (flags & (1ull << DecorationRowMajor)) attr.push_back("row_major"); // We don't emit any global layouts, so column_major is default. //if (flags & (1ull << DecorationColMajor)) // attr.push_back("column_major"); if ((dec.decoration_flags & (1ull << DecorationLocation)) != 0 && can_use_io_location(type.storage)) attr.push_back(join("location = ", dec.location)); // DecorationCPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. // This is only done selectively in GLSL as needed. if (has_decoration(type.self, DecorationCPacked) && (dec.decoration_flags & (1ull << DecorationOffset)) != 0) attr.push_back(join("offset = ", dec.offset)); if (attr.empty()) return ""; string res = "layout("; res += merge(attr); res += ") "; return res; } const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) { auto check_desktop = [this] { if (options.es) SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile."); }; switch (format) { case ImageFormatRgba32f: return "rgba32f"; case ImageFormatRgba16f: return "rgba16f"; case ImageFormatR32f: return "r32f"; case ImageFormatRgba8: return "rgba8"; case ImageFormatRgba8Snorm: return "rgba8_snorm"; case ImageFormatRg32f: return "rg32f"; case ImageFormatRg16f: return "rg16f"; case ImageFormatRgba32i: return "rgba32i"; case ImageFormatRgba16i: return "rgba16i"; case ImageFormatR32i: return "r32i"; case ImageFormatRgba8i: return "rgba8i"; case ImageFormatRg32i: return "rg32i"; case ImageFormatRg16i: return "rg16i"; case ImageFormatRgba32ui: return "rgba32ui"; case ImageFormatRgba16ui: return "rgba16ui"; case ImageFormatR32ui: return "r32ui"; case ImageFormatRgba8ui: return "rgba8ui"; case ImageFormatRg32ui: return "rg32ui"; case ImageFormatRg16ui: return "rg16ui"; // Desktop-only formats case ImageFormatR11fG11fB10f: check_desktop(); return "r11f_g11f_b10f"; case ImageFormatR16f: check_desktop(); return "r16f"; case ImageFormatRgb10A2: check_desktop(); return "rgb10_a2"; case ImageFormatR8: check_desktop(); return "r8"; case ImageFormatRg8: check_desktop(); return "rg8"; case ImageFormatR16: check_desktop(); return "r16"; case ImageFormatRg16: check_desktop(); return "rg16"; case ImageFormatRgba16: check_desktop(); return "rgba16"; case ImageFormatR16Snorm: check_desktop(); return "r16_snorm"; case ImageFormatRg16Snorm: check_desktop(); return "rg16_snorm"; case ImageFormatRgba16Snorm: check_desktop(); return "rgba16_snorm"; case ImageFormatR8Snorm: check_desktop(); return "r8_snorm"; case ImageFormatRg8Snorm: check_desktop(); return "rg8_snorm"; case ImageFormatR8ui: check_desktop(); return "r8ui"; case ImageFormatRg8ui: check_desktop(); return "rg8ui"; case ImageFormatR16ui: check_desktop(); return "r16ui"; case ImageFormatRgb10a2ui: check_desktop(); return "rgb10_a2ui"; case ImageFormatR8i: check_desktop(); return "r8i"; case ImageFormatRg8i: check_desktop(); return "rg8i"; case ImageFormatR16i: check_desktop(); return "r16i"; default: case ImageFormatUnknown: return nullptr; } } uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard) { switch (type.basetype) { case SPIRType::Double: case SPIRType::Int64: case SPIRType::UInt64: return 8; default: return 4; } } uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, uint64_t flags, BufferPackingStandard packing) { const uint32_t base_alignment = type_to_packed_base_size(type, packing); if (!type.array.empty()) { uint32_t minimum_alignment = 1; if (packing_is_vec4_padded(packing)) minimum_alignment = 16; auto *tmp = &get(type.parent_type); while (!tmp->array.empty()) tmp = &get(tmp->parent_type); // Get the alignment of the base type, then maybe round up. return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing)); } if (type.basetype == SPIRType::Struct) { // Rule 9. Structs alignments are maximum alignment of its members. uint32_t alignment = 0; for (uint32_t i = 0; i < type.member_types.size(); i++) { auto member_flags = meta[type.self].members.at(i).decoration_flags; alignment = max(alignment, type_to_packed_alignment(get(type.member_types[i]), member_flags, packing)); } // In std140, struct alignment is rounded up to 16. if (packing_is_vec4_padded(packing)) alignment = max(alignment, 16u); return alignment; } else { // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle // a vec4, this is handled outside since that part knows our current offset. if (type.columns == 1 && packing_is_hlsl(packing)) return base_alignment; // From 7.6.2.2 in GL 4.5 core spec. // Rule 1 if (type.vecsize == 1 && type.columns == 1) return base_alignment; // Rule 2 if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1) return type.vecsize * base_alignment; // Rule 3 if (type.vecsize == 3 && type.columns == 1) return 4 * base_alignment; // Rule 4 implied. Alignment does not change in std430. // Rule 5. Column-major matrices are stored as arrays of // vectors. if ((flags & (1ull << DecorationColMajor)) && type.columns > 1) { if (packing_is_vec4_padded(packing)) return 4 * base_alignment; else if (type.vecsize == 3) return 4 * base_alignment; else return type.vecsize * base_alignment; } // Rule 6 implied. // Rule 7. if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1) { if (packing_is_vec4_padded(packing)) return 4 * base_alignment; else if (type.columns == 3) return 4 * base_alignment; else return type.columns * base_alignment; } // Rule 8 implied. } SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?"); } uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, uint64_t flags, BufferPackingStandard packing) { // Array stride is equal to aligned size of the underlying type. uint32_t parent = type.parent_type; assert(parent); auto &tmp = get(parent); uint32_t size = type_to_packed_size(tmp, flags, packing); if (tmp.array.empty()) { uint32_t alignment = type_to_packed_alignment(type, flags, packing); return (size + alignment - 1) & ~(alignment - 1); } else { // For multidimensional arrays, array stride always matches size of subtype. // The alignment cannot change because multidimensional arrays are basically N * M array elements. return size; } } uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, uint64_t flags, BufferPackingStandard packing) { if (!type.array.empty()) { return to_array_size_literal(type, uint32_t(type.array.size()) - 1) * type_to_packed_array_stride(type, flags, packing); } const uint32_t base_alignment = type_to_packed_base_size(type, packing); uint32_t size = 0; if (type.basetype == SPIRType::Struct) { uint32_t pad_alignment = 1; for (uint32_t i = 0; i < type.member_types.size(); i++) { auto member_flags = meta[type.self].members.at(i).decoration_flags; auto &member_type = get(type.member_types[i]); uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing); uint32_t alignment = max(packed_alignment, pad_alignment); // The next member following a struct member is aligned to the base alignment of the struct that came before. // GL 4.5 spec, 7.6.2.2. if (member_type.basetype == SPIRType::Struct) pad_alignment = packed_alignment; else pad_alignment = 1; size = (size + alignment - 1) & ~(alignment - 1); size += type_to_packed_size(member_type, member_flags, packing); } } else { if (type.columns == 1) size = type.vecsize * base_alignment; if ((flags & (1ull << DecorationColMajor)) && type.columns > 1) { if (packing_is_vec4_padded(packing)) size = type.columns * 4 * base_alignment; else if (type.vecsize == 3) size = type.columns * 4 * base_alignment; else size = type.columns * type.vecsize * base_alignment; } if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1) { if (packing_is_vec4_padded(packing)) size = type.vecsize * 4 * base_alignment; else if (type.columns == 3) size = type.vecsize * 4 * base_alignment; else size = type.vecsize * type.columns * base_alignment; } } return size; } bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, uint32_t start_offset, uint32_t end_offset) { // This is very tricky and error prone, but try to be exhaustive and correct here. // SPIR-V doesn't directly say if we're using std430 or std140. // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters), // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information. // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing). // // It is almost certain that we're using std430, but it gets tricky with arrays in particular. // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430. // // The only two differences between std140 and std430 are related to padding alignment/array stride // in arrays and structs. In std140 they take minimum vec4 alignment. // std430 only removes the vec4 requirement. uint32_t offset = 0; uint32_t pad_alignment = 1; for (uint32_t i = 0; i < type.member_types.size(); i++) { auto &memb_type = get(type.member_types[i]); auto member_flags = meta[type.self].members.at(i).decoration_flags; // Verify alignment rules. uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing); uint32_t packed_size = type_to_packed_size(memb_type, member_flags, packing); if (packing_is_hlsl(packing)) { // If a member straddles across a vec4 boundary, alignment is actually vec4. uint32_t begin_word = offset / 16; uint32_t end_word = (offset + packed_size - 1) / 16; if (begin_word != end_word) packed_alignment = max(packed_alignment, 16u); } uint32_t alignment = max(packed_alignment, pad_alignment); offset = (offset + alignment - 1) & ~(alignment - 1); // Field is not in the specified range anymore and we can ignore any further fields. if (offset >= end_offset) break; // The next member following a struct member is aligned to the base alignment of the struct that came before. // GL 4.5 spec, 7.6.2.2. if (memb_type.basetype == SPIRType::Struct) pad_alignment = packed_alignment; else pad_alignment = 1; // Only care about packing if we are in the given range if (offset >= start_offset) { // We only care about offsets in std140, std430, etc ... // For EnhancedLayout variants, we have the flexibility to choose our own offsets. if (!packing_has_flexible_offset(packing)) { uint32_t actual_offset = type_struct_member_offset(type, i); if (actual_offset != offset) // This cannot be the packing we're looking for. return false; } // Verify array stride rules. if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) != type_struct_member_array_stride(type, i)) return false; // Verify that sub-structs also follow packing rules. // We cannot use enhanced layouts on substructs, so they better be up to spec. auto substruct_packing = packing_to_substruct_packing(packing); if (!memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing)) return false; } // Bump size. offset += packed_size; } return true; } bool CompilerGLSL::can_use_io_location(StorageClass storage) { // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL. // Be very explicit here about how to solve the issue. if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) || (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput)) { if (!options.es && options.version < 410 && !options.separate_shader_objects) return false; else if (options.es && options.version < 310) return false; } if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) || (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput)) { if (options.es && options.version < 300) return false; else if (!options.es && options.version < 330) return false; } return true; } string CompilerGLSL::layout_for_variable(const SPIRVariable &var) { // FIXME: Come up with a better solution for when to disable layouts. // Having layouts depend on extensions as well as which types // of layouts are used. For now, the simple solution is to just disable // layouts for legacy versions. if (is_legacy()) return ""; vector attr; auto &dec = meta[var.self].decoration; auto &type = get(var.basetype); auto flags = dec.decoration_flags; auto typeflags = meta[type.self].decoration.decoration_flags; if (options.vulkan_semantics && var.storage == StorageClassPushConstant) attr.push_back("push_constant"); if (flags & (1ull << DecorationRowMajor)) attr.push_back("row_major"); if (flags & (1ull << DecorationColMajor)) attr.push_back("column_major"); if (options.vulkan_semantics) { if (flags & (1ull << DecorationInputAttachmentIndex)) attr.push_back(join("input_attachment_index = ", dec.input_attachment)); } if ((flags & (1ull << DecorationLocation)) != 0 && can_use_io_location(var.storage)) { uint64_t combined_decoration = 0; for (uint32_t i = 0; i < meta[type.self].members.size(); i++) combined_decoration |= combined_decoration_for_member(type, i); // If our members have location decorations, we don't need to // emit location decorations at the top as well (looks weird). if ((combined_decoration & (1ull << DecorationLocation)) == 0) attr.push_back(join("location = ", dec.location)); } // set = 0 is the default. Do not emit set = decoration in regular GLSL output, but // we should preserve it in Vulkan GLSL mode. if (var.storage != StorageClassPushConstant) { if ((flags & (1ull << DecorationDescriptorSet)) && (dec.set != 0 || options.vulkan_semantics)) attr.push_back(join("set = ", dec.set)); } bool can_use_binding; if (options.es) can_use_binding = options.version >= 310; else can_use_binding = options.enable_420pack_extension || (options.version >= 420); if (can_use_binding && (flags & (1ull << DecorationBinding))) attr.push_back(join("binding = ", dec.binding)); if (flags & (1ull << DecorationOffset)) attr.push_back(join("offset = ", dec.offset)); bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; bool ssbo_block = var.storage == StorageClassStorageBuffer || (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBufferBlock))); // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. // If SPIR-V does not comply with either layout, we cannot really work around it. if (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBlock))) { if (buffer_is_packing_standard(type, BufferPackingStd140)) attr.push_back("std140"); else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) { attr.push_back("std140"); // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. if (options.es && !options.vulkan_semantics) SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " "not support GL_ARB_enhanced_layouts."); if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension("GL_ARB_enhanced_layouts"); // This is a very last minute to check for this, but use this unused decoration to mark that we should emit // explicit offsets for this block type. // layout_for_variable() will be called before the actual buffer emit. // The alternative is a full pass before codegen where we deduce this decoration, // but then we are just doing the exact same work twice, and more complexity. set_decoration(type.self, DecorationCPacked); } else { SPIRV_CROSS_THROW("Uniform buffer cannot be expressed as std140, even with enhanced layouts. You can try " "flattening this block to " "support a more flexible layout."); } } else if (push_constant_block || ssbo_block) { if (buffer_is_packing_standard(type, BufferPackingStd430)) attr.push_back("std430"); else if (buffer_is_packing_standard(type, BufferPackingStd140)) attr.push_back("std140"); else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) { attr.push_back("std140"); // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. if (options.es && !options.vulkan_semantics) SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " "not support GL_ARB_enhanced_layouts."); if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension("GL_ARB_enhanced_layouts"); set_decoration(type.self, DecorationCPacked); } else if (buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) { attr.push_back("std430"); if (options.es && !options.vulkan_semantics) SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " "not support GL_ARB_enhanced_layouts."); if (!options.es && !options.vulkan_semantics && options.version < 440) require_extension("GL_ARB_enhanced_layouts"); set_decoration(type.self, DecorationCPacked); } else { SPIRV_CROSS_THROW("Buffer block cannot be expressed as neither std430 nor std140, even with enhanced " "layouts. You can try flattening this block to support a more flexible layout."); } } // For images, the type itself adds a layout qualifer. // Only emit the format for storage images. if (type.basetype == SPIRType::Image && type.image.sampled == 2) { const char *fmt = format_to_glsl(type.image.format); if (fmt) attr.push_back(fmt); } if (attr.empty()) return ""; string res = "layout("; res += merge(attr); res += ") "; return res; } void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var) { if (flattened_buffer_blocks.count(var.self)) emit_buffer_block_flattened(var); else if (options.vulkan_semantics) emit_push_constant_block_vulkan(var); else emit_push_constant_block_glsl(var); } void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var) { emit_buffer_block(var); } void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) { // OpenGL has no concept of push constant blocks, implement it as a uniform struct. auto &type = get(var.basetype); auto &flags = meta[var.self].decoration.decoration_flags; flags &= ~((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)); #if 0 if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " "Remap to location with reflection API first or disable these decorations."); #endif // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. auto &block_flags = meta[type.self].decoration.decoration_flags; uint64_t block_flag = block_flags & (1ull << DecorationBlock); block_flags &= ~block_flag; emit_struct(type); block_flags |= block_flag; emit_uniform(var); statement(""); } void CompilerGLSL::emit_buffer_block(const SPIRVariable &var) { if (flattened_buffer_blocks.count(var.self)) emit_buffer_block_flattened(var); else if (is_legacy()) emit_buffer_block_legacy(var); else emit_buffer_block_native(var); } void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var) { auto &type = get(var.basetype); bool ssbo = var.storage == StorageClassStorageBuffer || ((meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0); if (ssbo) SPIRV_CROSS_THROW("SSBOs not supported in legacy targets."); // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. auto &block_flags = meta[type.self].decoration.decoration_flags; uint64_t block_flag = block_flags & (1ull << DecorationBlock); block_flags &= ~block_flag; emit_struct(type); block_flags |= block_flag; emit_uniform(var); statement(""); } void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) { auto &type = get(var.basetype); uint64_t flags = get_buffer_block_flags(var); bool ssbo = var.storage == StorageClassStorageBuffer || ((meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0); bool is_restrict = ssbo && (flags & (1ull << DecorationRestrict)) != 0; bool is_writeonly = ssbo && (flags & (1ull << DecorationNonReadable)) != 0; bool is_readonly = ssbo && (flags & (1ull << DecorationNonWritable)) != 0; bool is_coherent = ssbo && (flags & (1ull << DecorationCoherent)) != 0; // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... auto buffer_name = to_name(type.self, false); // Shaders never use the block by interface name, so we don't // have to track this other than updating name caches. if (meta[type.self].decoration.alias.empty() || resource_names.find(buffer_name) != end(resource_names)) buffer_name = get_block_fallback_name(var.self); // Make sure we get something unique. add_variable(resource_names, buffer_name); // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. // This cannot conflict with anything else, so we're safe now. if (buffer_name.empty()) buffer_name = join("_", get(var.basetype).self, "_", var.self); // Save for post-reflection later. declared_block_names[var.self] = buffer_name; statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "", is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ", buffer_name); begin_scope(); type.member_name_cache.clear(); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; } add_resource_name(var.self); end_scope_decl(to_name(var.self) + type_to_array_glsl(type)); statement(""); } void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var) { auto &type = get(var.basetype); // Block names should never alias. auto buffer_name = to_name(type.self, false); size_t buffer_size = (get_declared_struct_size(type) + 15) / 16; SPIRType::BaseType basic_type; if (get_common_basic_type(type, basic_type)) { SPIRType tmp; tmp.basetype = basic_type; tmp.vecsize = 4; if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt) SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint."); auto flags = get_buffer_block_flags(var); statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[", buffer_size, "];"); } else SPIRV_CROSS_THROW("All basic types in a flattened block must be the same."); } const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) { auto &execution = get_entry_point(); if (var.storage == StorageClassInput || var.storage == StorageClassOutput) { if (is_legacy() && execution.model == ExecutionModelVertex) return var.storage == StorageClassInput ? "attribute " : "varying "; else if (is_legacy() && execution.model == ExecutionModelFragment) return "varying "; // Fragment outputs are renamed so they never hit this case. else return var.storage == StorageClassInput ? "in " : "out "; } else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || var.storage == StorageClassPushConstant) { return "uniform "; } return ""; } void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual) { auto &type = get(var.basetype); if (!type.array.empty()) SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings."); auto old_flags = meta[type.self].decoration.decoration_flags; // Emit the members as if they are part of a block to get all qualifiers. meta[type.self].decoration.decoration_flags |= 1ull << DecorationBlock; type.member_name_cache.clear(); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); auto &membertype = get(member); if (membertype.basetype == SPIRType::Struct) SPIRV_CROSS_THROW("Cannot flatten struct inside structs in I/O variables."); // Pass in the varying qualifier here so it will appear in the correct declaration order. // Replace member name while emitting it so it encodes both struct name and member name. // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, // which is not allowed. auto backup_name = get_member_name(type.self, i); auto member_name = to_member_name(type, i); set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name))); emit_struct_member(type, member, i, qual); // Restore member name. set_member_name(type.self, i, member_name); i++; } meta[type.self].decoration.decoration_flags = old_flags; // Treat this variable as flattened from now on. flattened_structs.insert(var.self); } void CompilerGLSL::emit_interface_block(const SPIRVariable &var) { auto &type = get(var.basetype); // Either make it plain in/out or in/out blocks depending on what shader is doing ... bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0; const char *qual = to_storage_qualifiers_glsl(var); if (block) { // ESSL earlier than 310 and GLSL earlier than 150 did not support // I/O variables which are struct types. // To support this, flatten the struct into separate varyings instead. if ((options.es && options.version < 310) || (!options.es && options.version < 150)) { // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320. // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150). emit_flattened_io_block(var, qual); } else { if (options.es && options.version < 320) { // Geometry and tessellation extensions imply this extension. if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader")) require_extension("GL_EXT_shader_io_blocks"); } // Block names should never alias. auto block_name = to_name(type.self, false); // Shaders never use the block by interface name, so we don't // have to track this other than updating name caches. if (resource_names.find(block_name) != end(resource_names)) block_name = get_fallback_name(type.self); else resource_names.insert(block_name); statement(layout_for_variable(var), qual, block_name); begin_scope(); type.member_name_cache.clear(); uint32_t i = 0; for (auto &member : type.member_types) { add_member_name(type, i); emit_struct_member(type, member, i); i++; } add_resource_name(var.self); end_scope_decl(join(to_name(var.self), type_to_array_glsl(type))); statement(""); } } else { // ESSL earlier than 310 and GLSL earlier than 150 did not support // I/O variables which are struct types. // To support this, flatten the struct into separate varyings instead. if (type.basetype == SPIRType::Struct && ((options.es && options.version < 310) || (!options.es && options.version < 150))) { emit_flattened_io_block(var, qual); } else { add_resource_name(var.self); statement(layout_for_variable(var), variable_decl(var), ";"); } } } void CompilerGLSL::emit_uniform(const SPIRVariable &var) { auto &type = get(var.basetype); if (type.basetype == SPIRType::Image && type.image.sampled == 2) { if (!options.es && options.version < 420) require_extension("GL_ARB_shader_image_load_store"); else if (options.es && options.version < 310) SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store."); } add_resource_name(var.self); statement(layout_for_variable(var), variable_decl(var), ";"); } void CompilerGLSL::emit_specialization_constant(const SPIRConstant &constant) { auto &type = get(constant.constant_type); auto name = to_name(constant.self); SpecializationConstant wg_x, wg_y, wg_z; uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); if (constant.self == workgroup_size_id || constant.self == wg_x.id || constant.self == wg_y.id || constant.self == wg_z.id) { // These specialization constants are implicitly declared by emitting layout() in; return; } // Only scalars have constant IDs. if (has_decoration(constant.self, DecorationSpecId)) { statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ", variable_decl(type, name), " = ", constant_expression(constant), ";"); } else { statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";"); } } void CompilerGLSL::replace_illegal_names() { // clang-format off static const unordered_set keywords = { "active", "asm", "atomic_uint", "attribute", "bool", "break", "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard", "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4", "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float", "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray", "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube", "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect", "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant", "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect", "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "line", "linear", "long", "lowp", "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "matrix", "mediump", "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "point", "precision", "public", "readonly", "resource", "restrict", "return", "row_major", "sample", "sampler", "sampler1D", "sampler1DArray", "sampler1DArrayShadow", "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray", "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer", "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "short", "sizeof", "smooth", "static", "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D", "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube", "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray", "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube", "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile", "volatile", "while", "writeonly", "texture" }; // clang-format on for (auto &id : ids) { if (id.get_type() == TypeVariable) { auto &var = id.get(); if (!is_hidden_variable(var)) { auto &m = meta[var.self].decoration; if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords)) m.alias = join("_", m.alias); } } } } void CompilerGLSL::replace_fragment_output(SPIRVariable &var) { auto &m = meta[var.self].decoration; uint32_t location = 0; if (m.decoration_flags & (1ull << DecorationLocation)) location = m.location; // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will // do the access chain part of this for us. auto &type = get(var.basetype); if (type.array.empty()) { // Redirect the write to a specific render target in legacy GLSL. m.alias = join("gl_FragData[", location, "]"); if (is_legacy_es() && location != 0) require_extension("GL_EXT_draw_buffers"); } else if (type.array.size() == 1) { // If location is non-zero, we probably have to add an offset. // This gets really tricky since we'd have to inject an offset in the access chain. // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now. m.alias = "gl_FragData"; if (location != 0) SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. " "This is unimplemented in SPIRV-Cross."); if (is_legacy_es()) require_extension("GL_EXT_draw_buffers"); } else SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL."); var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is. } void CompilerGLSL::replace_fragment_outputs() { for (auto &id : ids) { if (id.get_type() == TypeVariable) { auto &var = id.get(); auto &type = get(var.basetype); if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput) replace_fragment_output(var); } } } string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) { if (out_type.vecsize == input_components) return expr; else if (input_components == 1 && !backend.can_swizzle_scalar) return join(type_to_glsl(out_type), "(", expr, ")"); else { auto e = enclose_expression(expr) + "."; // Just clamp the swizzle index if we have more outputs than inputs. for (uint32_t c = 0; c < out_type.vecsize; c++) e += index_to_swizzle(min(c, input_components - 1)); if (backend.swizzle_is_function && out_type.vecsize > 1) e += "()"; remove_duplicate_swizzle(e); return e; } } void CompilerGLSL::emit_pls() { auto &execution = get_entry_point(); if (execution.model != ExecutionModelFragment) SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders."); if (!options.es) SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES."); if (options.version < 300) SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above."); if (!pls_inputs.empty()) { statement("__pixel_local_inEXT _PLSIn"); begin_scope(); for (auto &input : pls_inputs) statement(pls_decl(input), ";"); end_scope_decl(); statement(""); } if (!pls_outputs.empty()) { statement("__pixel_local_outEXT _PLSOut"); begin_scope(); for (auto &output : pls_outputs) statement(pls_decl(output), ";"); end_scope_decl(); statement(""); } } void CompilerGLSL::fixup_image_load_store_access() { for (auto &id : ids) { if (id.get_type() != TypeVariable) continue; uint32_t var = id.get().self; auto &vartype = expression_type(var); if (vartype.basetype == SPIRType::Image) { // Older glslangValidator does not emit required qualifiers here. // Solve this by making the image access as restricted as possible and loosen up if we need to. // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. auto &flags = meta.at(var).decoration.decoration_flags; static const uint64_t NoWrite = 1ull << DecorationNonWritable; static const uint64_t NoRead = 1ull << DecorationNonReadable; if ((flags & (NoWrite | NoRead)) == 0) flags |= NoRead | NoWrite; } } } void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model) { uint64_t emitted_builtins = 0; uint64_t global_builtins = 0; const SPIRVariable *block_var = nullptr; bool emitted_block = false; bool builtin_array = false; for (auto &id : ids) { if (id.get_type() != TypeVariable) continue; auto &var = id.get(); auto &type = get(var.basetype); bool block = has_decoration(type.self, DecorationBlock); uint64_t builtins = 0; if (var.storage == storage && block && is_builtin_variable(var)) { for (auto &m : meta[type.self].members) if (m.builtin) builtins |= 1ull << m.builtin_type; } else if (var.storage == storage && !block && is_builtin_variable(var)) { // While we're at it, collect all declared global builtins (HLSL mostly ...). auto &m = meta[var.self].decoration; if (m.builtin) global_builtins |= 1ull << m.builtin_type; } if (!builtins) continue; if (emitted_block) SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block."); emitted_builtins = builtins; emitted_block = true; builtin_array = !type.array.empty(); block_var = &var; } global_builtins &= (1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance); // Try to collect all other declared builtins. if (!emitted_block) emitted_builtins = global_builtins; // Can't declare an empty interface block. if (!emitted_builtins) return; if (storage == StorageClassOutput) statement("out gl_PerVertex"); else statement("in gl_PerVertex"); begin_scope(); if (emitted_builtins & (1ull << BuiltInPosition)) statement("vec4 gl_Position;"); if (emitted_builtins & (1ull << BuiltInPointSize)) statement("float gl_PointSize;"); if (emitted_builtins & (1ull << BuiltInClipDistance)) statement("float gl_ClipDistance[];"); // TODO: Do we need a fixed array size here? if (emitted_builtins & (1ull << BuiltInCullDistance)) statement("float gl_CullDistance[];"); // TODO: Do we need a fixed array size here? bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl; if (builtin_array) { // Make sure the array has a supported name in the code. if (storage == StorageClassOutput) set_name(block_var->self, "gl_out"); else if (storage == StorageClassInput) set_name(block_var->self, "gl_in"); if (model == ExecutionModelTessellationControl && storage == StorageClassOutput) end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]")); else end_scope_decl(join(to_name(block_var->self), tessellation ? "[gl_MaxPatchVertices]" : "[]")); } else end_scope_decl(); statement(""); } void CompilerGLSL::declare_undefined_values() { bool emitted = false; for (auto &id : ids) { if (id.get_type() != TypeUndef) continue; auto &undef = id.get(); statement(variable_decl(get(undef.basetype), to_name(undef.self), undef.self), ";"); emitted = true; } if (emitted) statement(""); } void CompilerGLSL::emit_resources() { auto &execution = get_entry_point(); replace_illegal_names(); // Legacy GL uses gl_FragData[], redeclare all fragment outputs // with builtins. if (execution.model == ExecutionModelFragment && is_legacy()) replace_fragment_outputs(); // Emit PLS blocks if we have such variables. if (!pls_inputs.empty() || !pls_outputs.empty()) emit_pls(); // Emit custom gl_PerVertex for SSO compatibility. if (options.separate_shader_objects && !options.es) { switch (execution.model) { case ExecutionModelGeometry: case ExecutionModelTessellationControl: case ExecutionModelTessellationEvaluation: emit_declared_builtin_block(StorageClassInput, execution.model); emit_declared_builtin_block(StorageClassOutput, execution.model); break; case ExecutionModelVertex: emit_declared_builtin_block(StorageClassOutput, execution.model); break; default: break; } } bool emitted = false; // If emitted Vulkan GLSL, // emit specialization constants as actual floats, // spec op expressions will redirect to the constant name. // // TODO: If we have the fringe case that we create a spec constant which depends on a struct type, // we'll have to deal with that, but there's currently no known way to express that. if (options.vulkan_semantics) { for (auto &id : ids) { if (id.get_type() == TypeConstant) { auto &c = id.get(); if (!c.specialization) continue; emit_specialization_constant(c); emitted = true; } } } if (emitted) statement(""); emitted = false; // Output all basic struct types which are not Block or BufferBlock as these are declared inplace // when such variables are instantiated. for (auto &id : ids) { if (id.get_type() == TypeType) { auto &type = id.get(); if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer && (meta[type.self].decoration.decoration_flags & ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0) { emit_struct(type); } } } // Output UBOs and SSBOs for (auto &id : ids) { if (id.get_type() == TypeVariable) { auto &var = id.get(); auto &type = get(var.basetype); bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform; bool has_block_flags = (meta[type.self].decoration.decoration_flags & ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0; if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && has_block_flags) { emit_buffer_block(var); } } } // Output push constant blocks for (auto &id : ids) { if (id.get_type() == TypeVariable) { auto &var = id.get(); auto &type = get(var.basetype); if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && !is_hidden_variable(var)) { emit_push_constant_block(var); } } } bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics; // Output Uniform Constants (values, samplers, images, etc). for (auto &id : ids) { if (id.get_type() == TypeVariable) { auto &var = id.get(); auto &type = get(var.basetype); // If we're remapping separate samplers and images, only emit the combined samplers. if (skip_separate_image_sampler) { // Sampler buffers are always used without a sampler, and they will also work in regular GL. bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; bool separate_sampler = type.basetype == SPIRType::Sampler; if (!sampler_buffer && (separate_image || separate_sampler)) continue; } if (var.storage != StorageClassFunction && type.pointer && (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) && !is_hidden_variable(var)) { emit_uniform(var); emitted = true; } } } if (emitted) statement(""); emitted = false; // Output in/out interfaces. for (auto &id : ids) { if (id.get_type() == TypeVariable) { auto &var = id.get(); auto &type = get(var.basetype); // HLSL output from glslang may emit interface variables which are "empty". // Just avoid declaring them. if (type_is_empty(type)) continue; if (var.storage != StorageClassFunction && type.pointer && (var.storage == StorageClassInput || var.storage == StorageClassOutput) && interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var)) { emit_interface_block(var); emitted = true; } else if (is_builtin_variable(var)) { // For gl_InstanceIndex emulation on GLES, the API user needs to // supply this uniform. if (meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics) { statement("uniform int SPIRV_Cross_BaseInstance;"); emitted = true; } } } } // Global variables. for (auto global : global_variables) { auto &var = get(global); if (var.storage != StorageClassOutput) { add_resource_name(var.self); statement(variable_decl(var), ";"); emitted = true; } } if (emitted) statement(""); declare_undefined_values(); } // Returns a string representation of the ID, usable as a function arg. // Default is to simply return the expression representation fo the arg ID. // Subclasses may override to modify the return value. string CompilerGLSL::to_func_call_arg(uint32_t id) { return to_expression(id); } void CompilerGLSL::handle_invalid_expression(uint32_t id) { // We tried to read an invalidated expression. // This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated. forced_temporaries.insert(id); force_recompile = true; } // Converts the format of the current expression from packed to unpacked, // by wrapping the expression in a constructor of the appropriate type. // GLSL does not support packed formats, so simply return the expression. // Subclasses that do will override string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &) { return expr_str; } // Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. void CompilerGLSL::strip_enclosed_expression(string &expr) { if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') return; // Have to make sure that our first and last parens actually enclose everything inside it. uint32_t paren_count = 0; for (auto &c : expr) { if (c == '(') paren_count++; else if (c == ')') { paren_count--; // If we hit 0 and this is not the final char, our first and final parens actually don't // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). if (paren_count == 0 && &c != &expr.back()) return; } } expr.erase(expr.size() - 1, 1); expr.erase(begin(expr)); } string CompilerGLSL::enclose_expression(const string &expr) { bool need_parens = false; // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back // unary expressions. if (!expr.empty()) { auto c = expr.front(); if (c == '-' || c == '+' || c == '!' || c == '~') need_parens = true; } if (!need_parens) { uint32_t paren_count = 0; for (auto c : expr) { if (c == '(') paren_count++; else if (c == ')') { assert(paren_count); paren_count--; } else if (c == ' ' && paren_count == 0) { need_parens = true; break; } } assert(paren_count == 0); } // If this expression contains any spaces which are not enclosed by parentheses, // we need to enclose it so we can treat the whole string as an expression. // This happens when two expressions have been part of a binary op earlier. if (need_parens) return join('(', expr, ')'); else return expr; } // Just like to_expression except that we enclose the expression inside parentheses if needed. string CompilerGLSL::to_enclosed_expression(uint32_t id) { return enclose_expression(to_expression(id)); } string CompilerGLSL::to_expression(uint32_t id) { auto itr = invalid_expressions.find(id); if (itr != end(invalid_expressions)) handle_invalid_expression(id); if (ids[id].get_type() == TypeExpression) { // We might have a more complex chain of dependencies. // A possible scenario is that we // // %1 = OpLoad // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. // // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, // and see that we should not forward reads of the original variable. auto &expr = get(id); for (uint32_t dep : expr.expression_dependencies) if (invalid_expressions.find(dep) != end(invalid_expressions)) handle_invalid_expression(dep); } track_expression_read(id); switch (ids[id].get_type()) { case TypeExpression: { auto &e = get(id); if (e.base_expression) return to_enclosed_expression(e.base_expression) + e.expression; else if (e.need_transpose) { bool is_packed = has_decoration(id, DecorationCPacked); return convert_row_major_matrix(e.expression, get(e.expression_type), is_packed); } else { if (force_recompile) { // During first compilation phase, certain expression patterns can trigger exponential growth of memory. // Avoid this by returning dummy expressions during this phase. // Do not use empty expressions here, because those are sentinels for other cases. return "_"; } else return e.expression; } } case TypeConstant: { auto &c = get(id); auto &type = get(c.constant_type); // WorkGroupSize may be a constant. auto &dec = meta[c.self].decoration; if (dec.builtin) return builtin_to_glsl(dec.builtin_type, StorageClassGeneric); else if (c.specialization && options.vulkan_semantics) return to_name(id); else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) return to_name(id); else if (!type.array.empty() && !backend.can_declare_arrays_inline) return to_name(id); else return constant_expression(c); } case TypeConstantOp: return constant_op_expression(get(id)); case TypeVariable: { auto &var = get(id); // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, // the variable has not been declared yet. if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) return to_expression(var.static_expression); else if (var.deferred_declaration) { var.deferred_declaration = false; return variable_decl(var); } else if (flattened_structs.count(id)) { return load_flattened_struct(var); } else { auto &dec = meta[var.self].decoration; if (dec.builtin) return builtin_to_glsl(dec.builtin_type, var.storage); else return to_name(id); } } case TypeCombinedImageSampler: // This type should never be taken the expression of directly. // The intention is that texture sampling functions will extract the image and samplers // separately and take their expressions as needed. // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler // expression ala sampler2D(texture, sampler). SPIRV_CROSS_THROW("Combined image samplers have no default expression representation."); case TypeAccessChain: // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad. SPIRV_CROSS_THROW("Access chains have no default expression representation."); default: return to_name(id); } } string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) { auto &type = get(cop.basetype); bool binary = false; bool unary = false; string op; // TODO: Find a clean way to reuse emit_instruction. switch (cop.opcode) { case OpSConvert: case OpUConvert: case OpFConvert: op = type_to_glsl_constructor(type); break; #define BOP(opname, x) \ case Op##opname: \ binary = true; \ op = x; \ break #define UOP(opname, x) \ case Op##opname: \ unary = true; \ op = x; \ break UOP(SNegate, "-"); UOP(Not, "~"); BOP(IAdd, "+"); BOP(ISub, "-"); BOP(IMul, "*"); BOP(SDiv, "/"); BOP(UDiv, "/"); BOP(UMod, "%"); BOP(SMod, "%"); BOP(ShiftRightLogical, ">>"); BOP(ShiftRightArithmetic, ">>"); BOP(ShiftLeftLogical, "<<"); BOP(BitwiseOr, "|"); BOP(BitwiseXor, "^"); BOP(BitwiseAnd, "&"); BOP(LogicalOr, "||"); BOP(LogicalAnd, "&&"); UOP(LogicalNot, "!"); BOP(LogicalEqual, "=="); BOP(LogicalNotEqual, "!="); BOP(IEqual, "=="); BOP(INotEqual, "!="); BOP(ULessThan, "<"); BOP(SLessThan, "<"); BOP(ULessThanEqual, "<="); BOP(SLessThanEqual, "<="); BOP(UGreaterThan, ">"); BOP(SGreaterThan, ">"); BOP(UGreaterThanEqual, ">="); BOP(SGreaterThanEqual, ">="); case OpSelect: { if (cop.arguments.size() < 3) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); // This one is pretty annoying. It's triggered from // uint(bool), int(bool) from spec constants. // In order to preserve its compile-time constness in Vulkan GLSL, // we need to reduce the OpSelect expression back to this simplified model. // If we cannot, fail. if (!to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0])) { SPIRV_CROSS_THROW( "Cannot implement specialization constant op OpSelect. " "Need trivial select implementation which can be resolved to a simple cast from boolean."); } break; } default: // Some opcodes are unimplemented here, these are currently not possible to test from glslang. SPIRV_CROSS_THROW("Unimplemented spec constant op."); } SPIRType::BaseType input_type; bool skip_cast_if_equal_type = glsl_opcode_is_sign_invariant(cop.opcode); switch (cop.opcode) { case OpIEqual: case OpINotEqual: input_type = SPIRType::Int; break; default: input_type = type.basetype; break; } #undef BOP #undef UOP if (binary) { if (cop.arguments.size() < 2) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); string cast_op0; string cast_op1; auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0], cop.arguments[1], skip_cast_if_equal_type); if (type.basetype != input_type && type.basetype != SPIRType::Boolean) { expected_type.basetype = input_type; auto expr = bitcast_glsl_op(type, expected_type); expr += '('; expr += join(cast_op0, " ", op, " ", cast_op1); expr += ')'; return expr; } else return join("(", cast_op0, " ", op, " ", cast_op1, ")"); } else if (unary) { if (cop.arguments.size() < 1) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); // Auto-bitcast to result type as needed. // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")"); } else { if (cop.arguments.size() < 1) SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); return join(op, "(", to_expression(cop.arguments[0]), ")"); } } string CompilerGLSL::constant_expression(const SPIRConstant &c) { if (!c.subconstants.empty()) { // Handles Arrays and structures. string res; if (backend.use_initializer_list) res = "{ "; else res = type_to_glsl_constructor(get(c.constant_type)) + "("; for (auto &elem : c.subconstants) { auto &subc = get(elem); if (subc.specialization && options.vulkan_semantics) res += to_name(elem); else res += constant_expression(subc); if (&elem != &c.subconstants.back()) res += ", "; } res += backend.use_initializer_list ? " }" : ")"; return res; } else if (c.columns() == 1) { return constant_expression_vector(c, 0); } else { string res = type_to_glsl(get(c.constant_type)) + "("; for (uint32_t col = 0; col < c.columns(); col++) { if (options.vulkan_semantics && c.specialization_constant_id(col) != 0) res += to_name(c.specialization_constant_id(col)); else res += constant_expression_vector(c, col); if (col + 1 < c.columns()) res += ", "; } res += ")"; return res; } } string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) { auto type = get(c.constant_type); type.columns = 1; string res; bool splat = backend.use_constructor_splatting && c.vector_size() > 1; bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1; if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Double) { // Cannot swizzle literal integers as a special case. swizzle_splat = false; } if (splat || swizzle_splat) { // Cannot use constant splatting if we have specialization constants somewhere in the vector. for (uint32_t i = 0; i < c.vector_size(); i++) { if (options.vulkan_semantics && c.specialization_constant_id(vector, i) != 0) { splat = false; swizzle_splat = false; break; } } } if (splat || swizzle_splat) { if (type.width == 64) { uint64_t ident = c.scalar_u64(vector, 0); for (uint32_t i = 1; i < c.vector_size(); i++) { if (ident != c.scalar_u64(vector, i)) { splat = false; swizzle_splat = false; break; } } } else { uint32_t ident = c.scalar(vector, 0); for (uint32_t i = 1; i < c.vector_size(); i++) { if (ident != c.scalar(vector, i)) { splat = false; swizzle_splat = false; } } } } if (c.vector_size() > 1 && !swizzle_splat) res += type_to_glsl(type) + "("; switch (type.basetype) { case SPIRType::Float: if (splat || swizzle_splat) { res += convert_to_string(c.scalar_f32(vector, 0)); if (backend.float_literal_suffix) res += "f"; if (swizzle_splat) res = remap_swizzle(get(c.constant_type), 1, res); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else res += convert_to_string(c.scalar_f32(vector, i)); if (backend.float_literal_suffix) res += "f"; if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Double: if (splat || swizzle_splat) { res += convert_to_string(c.scalar_f64(vector, 0)); if (backend.double_literal_suffix) res += "lf"; if (swizzle_splat) res = remap_swizzle(get(c.constant_type), 1, res); } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar_f64(vector, i)); if (backend.double_literal_suffix) res += "lf"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Int64: if (splat) { res += convert_to_string(c.scalar_i64(vector, 0)); if (backend.long_long_literal_suffix) res += "ll"; else res += "l"; } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar_i64(vector, i)); if (backend.long_long_literal_suffix) res += "ll"; else res += "l"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::UInt64: if (splat) { res += convert_to_string(c.scalar_u64(vector, 0)); if (backend.long_long_literal_suffix) res += "ull"; else res += "ul"; } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar_u64(vector, i)); if (backend.long_long_literal_suffix) res += "ull"; else res += "ul"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::UInt: if (splat) { res += convert_to_string(c.scalar(vector, 0)); if (backend.uint32_t_literal_suffix) res += "u"; } else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else { res += convert_to_string(c.scalar(vector, i)); if (backend.uint32_t_literal_suffix) res += "u"; } if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Int: if (splat) res += convert_to_string(c.scalar_i32(vector, 0)); else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else res += convert_to_string(c.scalar_i32(vector, i)); if (i + 1 < c.vector_size()) res += ", "; } } break; case SPIRType::Boolean: if (splat) res += c.scalar(vector, 0) ? "true" : "false"; else { for (uint32_t i = 0; i < c.vector_size(); i++) { if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) res += to_name(c.specialization_constant_id(vector, i)); else res += c.scalar(vector, i) ? "true" : "false"; if (i + 1 < c.vector_size()) res += ", "; } } break; default: SPIRV_CROSS_THROW("Invalid constant expression basetype."); } if (c.vector_size() > 1 && !swizzle_splat) res += ")"; return res; } string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) { auto &type = get(result_type); auto flags = meta[result_id].decoration.decoration_flags; // If we're declaring temporaries inside continue blocks, // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. if (current_continue_block && !hoisted_temporaries.count(result_id)) { auto &header = get(current_continue_block->loop_dominator); if (find_if(begin(header.declare_temporary), end(header.declare_temporary), [result_type, result_id](const pair &tmp) { return tmp.first == result_type && tmp.second == result_id; }) == end(header.declare_temporary)) { header.declare_temporary.emplace_back(result_type, result_id); hoisted_temporaries.insert(result_id); force_recompile = true; } return join(to_name(result_id), " = "); } else if (hoisted_temporaries.count(result_id)) { // The temporary has already been declared earlier, so just "declare" the temporary by writing to it. return join(to_name(result_id), " = "); } else { // The result_id has not been made into an expression yet, so use flags interface. return join(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); } } bool CompilerGLSL::expression_is_forwarded(uint32_t id) { return forwarded_temporaries.find(id) != end(forwarded_temporaries); } SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, bool suppress_usage_tracking) { if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries))) { // Just forward it without temporary. // If the forward is trivial, we do not force flushing to temporary for this expression. if (!suppress_usage_tracking) forwarded_temporaries.insert(result_id); return set(result_id, rhs, result_type, true); } else { // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). statement(declare_temporary(result_type, result_id), rhs, ";"); return set(result_id, to_name(result_id), result_type, true); } } void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { bool forward = should_forward(op0); emit_op(result_type, result_id, join(op, to_enclosed_expression(op0)), forward); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { bool forward = should_forward(op0) && should_forward(op1); emit_op(result_type, result_id, join(to_enclosed_expression(op0), " ", op, " ", to_enclosed_expression(op1)), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op) { auto &type = get(result_type); auto expr = type_to_glsl_constructor(type); expr += '('; for (uint32_t i = 0; i < type.vecsize; i++) { // Make sure to call to_expression multiple times to ensure // that these expressions are properly flushed to temporaries if needed. expr += op; expr += to_enclosed_expression(operand); expr += '.'; expr += index_to_swizzle(i); if (i + 1 < type.vecsize) expr += ", "; } expr += ')'; emit_op(result_type, result_id, expr, should_forward(operand)); inherit_expression_dependencies(result_id, operand); } void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { auto &type = get(result_type); auto expr = type_to_glsl_constructor(type); expr += '('; for (uint32_t i = 0; i < type.vecsize; i++) { // Make sure to call to_expression multiple times to ensure // that these expressions are properly flushed to temporaries if needed. expr += to_enclosed_expression(op0); expr += '.'; expr += index_to_swizzle(i); expr += ' '; expr += op; expr += ' '; expr += to_enclosed_expression(op1); expr += '.'; expr += index_to_swizzle(i); if (i + 1 < type.vecsize) expr += ", "; } expr += ')'; emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) { auto &type0 = expression_type(op0); auto &type1 = expression_type(op1); // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected // since equality test is exactly the same. bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); // Create a fake type so we can bitcast to it. // We only deal with regular arithmetic types here like int, uints and so on. SPIRType expected_type; expected_type.basetype = input_type; expected_type.vecsize = type0.vecsize; expected_type.columns = type0.columns; expected_type.width = type0.width; if (cast) { cast_op0 = bitcast_glsl(expected_type, op0); cast_op1 = bitcast_glsl(expected_type, op1); } else { // If we don't cast, our actual input type is that of the first (or second) argument. cast_op0 = to_enclosed_expression(op0); cast_op1 = to_enclosed_expression(op1); input_type = type0.basetype; } return expected_type; } void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) { string cast_op0, cast_op1; auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); auto &out_type = get(result_type); // We might have casted away from the result type, so bitcast again. // For example, arithmetic right shift with uint inputs. // Special case boolean outputs since relational opcodes output booleans instead of int/uint. string expr; if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) { expected_type.basetype = input_type; expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(cast_op0, " ", op, " ", cast_op1); expr += ')'; } else expr += join(cast_op0, " ", op, " ", cast_op1); emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); } void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) { bool forward = should_forward(op0); emit_op(result_type, result_id, join(op, "(", to_expression(op0), ")"), forward); inherit_expression_dependencies(result_id, op0); } void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) { bool forward = should_forward(op0) && should_forward(op1); emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ")"), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); } void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) { string cast_op0, cast_op1; auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); auto &out_type = get(result_type); // Special case boolean outputs since relational opcodes output booleans instead of int/uint. string expr; if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) { expected_type.basetype = input_type; expr = bitcast_glsl_op(out_type, expected_type); expr += '('; expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); expr += ')'; } else { expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); } emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); } void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op) { bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2); emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ")"), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); inherit_expression_dependencies(result_id, op2); } void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, uint32_t op3, const char *op) { bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ", ", to_expression(op3), ")"), forward); inherit_expression_dependencies(result_id, op0); inherit_expression_dependencies(result_id, op1); inherit_expression_dependencies(result_id, op2); inherit_expression_dependencies(result_id, op3); } // EXT_shader_texture_lod only concerns fragment shaders so lod tex functions // are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex // functions in vertex shaders so we revert those back to plain calls when // the lod is a constant value of zero. bool CompilerGLSL::check_explicit_lod_allowed(uint32_t lod) { auto &execution = get_entry_point(); bool allowed = !is_legacy_es() || execution.model == ExecutionModelFragment; if (!allowed && lod != 0) { auto *lod_constant = maybe_get(lod); if (!lod_constant || lod_constant->scalar_f32() != 0.0f) { SPIRV_CROSS_THROW("Explicit lod not allowed in legacy ES non-fragment shaders."); } } return allowed; } string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod) { const char *type; switch (imgtype.image.dim) { case spv::Dim1D: type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; break; case spv::Dim2D: type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; break; case spv::Dim3D: type = "3D"; break; case spv::DimCube: type = "Cube"; break; case spv::DimBuffer: type = "Buffer"; break; case spv::DimSubpassData: type = "2D"; break; default: type = ""; break; } bool use_explicit_lod = check_explicit_lod_allowed(lod); if (op == "textureLod" || op == "textureProjLod") { if (is_legacy_es()) { if (use_explicit_lod) require_extension("GL_EXT_shader_texture_lod"); } else if (is_legacy()) require_extension("GL_ARB_shader_texture_lod"); } if (op == "texture") return join("texture", type); else if (op == "textureLod") { if (use_explicit_lod) return join("texture", type, is_legacy_es() ? "LodEXT" : "Lod"); else return join("texture", type); } else if (op == "textureProj") return join("texture", type, "Proj"); else if (op == "textureProjLod") { if (use_explicit_lod) return join("texture", type, is_legacy_es() ? "ProjLodEXT" : "ProjLod"); else return join("texture", type); } else { SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); } } bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) { auto *cleft = maybe_get(left); auto *cright = maybe_get(right); auto &lerptype = expression_type(lerp); // If our targets aren't constants, we cannot use construction. if (!cleft || !cright) return false; // If our targets are spec constants, we cannot use construction. if (cleft->specialization || cright->specialization) return false; // We can only use trivial construction if we have a scalar // (should be possible to do it for vectors as well, but that is overkill for now). if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1) return false; // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. bool ret = false; switch (type.basetype) { case SPIRType::Int: case SPIRType::UInt: ret = cleft->scalar() == 0 && cright->scalar() == 1; break; case SPIRType::Float: ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f; break; case SPIRType::Double: ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0; break; case SPIRType::Int64: case SPIRType::UInt64: ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1; break; default: break; } if (ret) op = type_to_glsl_constructor(type); return ret; } void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) { auto &lerptype = expression_type(lerp); auto &restype = get(result_type); string mix_op; bool has_boolean_mix = backend.boolean_mix_support && ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp); // Cannot use boolean mix when the lerp argument is just one boolean, // fall back to regular trinary statements. if (lerptype.vecsize == 1) has_boolean_mix = false; // If we can reduce the mix to a simple cast, do so. // This helps for cases like int(bool), uint(bool) which is implemented with // OpSelect bool 1 0. if (trivial_mix) { emit_unary_func_op(result_type, id, lerp, mix_op.c_str()); } else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) { // Boolean mix not supported on desktop without extension. // Was added in OpenGL 4.5 with ES 3.1 compat. // // Could use GL_EXT_shader_integer_mix on desktop at least, // but Apple doesn't support it. :( // Just implement it as ternary expressions. string expr; if (lerptype.vecsize == 1) expr = join(to_enclosed_expression(lerp), " ? ", to_enclosed_expression(right), " : ", to_enclosed_expression(left)); else { auto swiz = [this](uint32_t expression, uint32_t i) { return join(to_enclosed_expression(expression), ".", index_to_swizzle(i)); }; expr = type_to_glsl_constructor(restype); expr += "("; for (uint32_t i = 0; i < restype.vecsize; i++) { expr += swiz(lerp, i); expr += " ? "; expr += swiz(right, i); expr += " : "; expr += swiz(left, i); if (i + 1 < restype.vecsize) expr += ", "; } expr += ")"; } emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp)); } else emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); } string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id) { auto &args = current_function->arguments; // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect // all possible combinations into new sampler2D uniforms. auto *image = maybe_get_backing_variable(image_id); auto *samp = maybe_get_backing_variable(samp_id); if (image) image_id = image->self; if (samp) samp_id = samp->self; auto image_itr = find_if(begin(args), end(args), [image_id](const SPIRFunction::Parameter ¶m) { return param.id == image_id; }); auto sampler_itr = find_if(begin(args), end(args), [samp_id](const SPIRFunction::Parameter ¶m) { return param.id == samp_id; }); if (image_itr != end(args) || sampler_itr != end(args)) { // If any parameter originates from a parameter, we will find it in our argument list. bool global_image = image_itr == end(args); bool global_sampler = sampler_itr == end(args); uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args)); uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args)); auto &combined = current_function->combined_parameters; auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && p.sampler_id == sid; }); if (itr != end(combined)) return to_expression(itr->id); else { SPIRV_CROSS_THROW( "Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used " "before compile() was called?"); } } else { // For global sampler2D, look directly at the global remapping table. auto &mapping = combined_image_samplers; auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) { return combined.image_id == image_id && combined.sampler_id == samp_id; }); if (itr != end(combined_image_samplers)) return to_expression(itr->combined_id); else { SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " "before compile() was called?"); } } } void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) { if (options.vulkan_semantics && combined_image_samplers.empty()) { emit_binary_func_op(result_type, result_id, image_id, samp_id, type_to_glsl(get(result_type)).c_str()); } else emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true); } void CompilerGLSL::emit_texture_op(const Instruction &i) { auto ops = stream(i); auto op = static_cast(i.op); uint32_t length = i.length; if (i.offset + length > spirv.size()) SPIRV_CROSS_THROW("Compiler::parse() opcode out of range."); uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t img = ops[2]; uint32_t coord = ops[3]; uint32_t dref = 0; uint32_t comp = 0; bool gather = false; bool proj = false; bool fetch = false; const uint32_t *opt = nullptr; switch (op) { case OpImageSampleDrefImplicitLod: case OpImageSampleDrefExplicitLod: dref = ops[4]; opt = &ops[5]; length -= 5; break; case OpImageSampleProjDrefImplicitLod: case OpImageSampleProjDrefExplicitLod: dref = ops[4]; opt = &ops[5]; length -= 5; proj = true; break; case OpImageDrefGather: dref = ops[4]; opt = &ops[5]; length -= 5; gather = true; break; case OpImageGather: comp = ops[4]; opt = &ops[5]; length -= 5; gather = true; break; case OpImageFetch: case OpImageRead: // Reads == fetches in Metal (other langs will not get here) opt = &ops[4]; length -= 4; fetch = true; break; case OpImageSampleProjImplicitLod: case OpImageSampleProjExplicitLod: opt = &ops[4]; length -= 4; proj = true; break; default: opt = &ops[4]; length -= 4; break; } // Bypass pointers because we need the real image struct auto &type = expression_type(img); auto &imgtype = get(type.self); uint32_t coord_components = 0; switch (imgtype.image.dim) { case spv::Dim1D: coord_components = 1; break; case spv::Dim2D: coord_components = 2; break; case spv::Dim3D: coord_components = 3; break; case spv::DimCube: coord_components = 3; break; case spv::DimBuffer: coord_components = 1; break; default: coord_components = 2; break; } if (proj) coord_components++; if (imgtype.image.arrayed) coord_components++; uint32_t bias = 0; uint32_t lod = 0; uint32_t grad_x = 0; uint32_t grad_y = 0; uint32_t coffset = 0; uint32_t offset = 0; uint32_t coffsets = 0; uint32_t sample = 0; uint32_t flags = 0; if (length) { flags = *opt++; length--; } auto test = [&](uint32_t &v, uint32_t flag) { if (length && (flags & flag)) { v = *opt++; length--; } }; test(bias, ImageOperandsBiasMask); test(lod, ImageOperandsLodMask); test(grad_x, ImageOperandsGradMask); test(grad_y, ImageOperandsGradMask); test(coffset, ImageOperandsConstOffsetMask); test(offset, ImageOperandsOffsetMask); test(coffsets, ImageOperandsConstOffsetsMask); test(sample, ImageOperandsSampleMask); string expr; bool forward = false; expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset), (!!grad_x || !!grad_y), !!dref, lod); expr += "("; expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod, coffset, offset, bias, comp, sample, &forward); expr += ")"; emit_op(result_type, id, expr, forward); } // Returns the function name for a texture sampling function for the specified image and sampling characteristics. // For some subclasses, the function is a method on the specified image. string CompilerGLSL::to_function_name(uint32_t, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool, uint32_t lod) { string fname; // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. // To emulate this, we will have to use textureGrad with a constant gradient of 0. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. bool workaround_lod_array_shadow_as_grad = false; if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && imgtype.image.depth && lod) { auto *constant_lod = maybe_get(lod); if (!constant_lod || constant_lod->scalar_f32() != 0.0f) SPIRV_CROSS_THROW( "textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be expressed in GLSL."); workaround_lod_array_shadow_as_grad = true; } if (is_fetch) fname += "texelFetch"; else { fname += "texture"; if (is_gather) fname += "Gather"; if (has_array_offsets) fname += "Offsets"; if (is_proj) fname += "Proj"; if (has_grad || workaround_lod_array_shadow_as_grad) fname += "Grad"; if (!!lod && !workaround_lod_array_shadow_as_grad) fname += "Lod"; } if (has_offset) fname += "Offset"; return is_legacy() ? legacy_tex_op(fname, imgtype, lod) : fname; } // Returns the function args for a texture sampling function for the specified image and sampling characteristics. string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool, bool, bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, bool *p_forward) { string farg_str = to_expression(img); bool swizz_func = backend.swizzle_is_function; auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { if (comps == in_comps) return ""; switch (comps) { case 1: return ".x"; case 2: return swizz_func ? ".xy()" : ".xy"; case 3: return swizz_func ? ".xyz()" : ".xyz"; default: return ""; } }; bool forward = should_forward(coord); // The IR can give us more components than we need, so chop them off as needed. auto swizzle_expr = swizzle(coord_components, expression_type(coord).vecsize); // Only enclose the UV expression if needed. auto coord_expr = (*swizzle_expr == '\0') ? to_expression(coord) : (to_enclosed_expression(coord) + swizzle_expr); // texelFetch only takes int, not uint. auto &coord_type = expression_type(coord); if (coord_type.basetype == SPIRType::UInt) { auto expected_type = coord_type; expected_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr); } // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. // To emulate this, we will have to use textureGrad with a constant gradient of 0. // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. bool workaround_lod_array_shadow_as_grad = ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && imgtype.image.depth && lod; if (dref) { forward = forward && should_forward(dref); // SPIR-V splits dref and coordinate. if (coord_components == 4) // GLSL also splits the arguments in two. { farg_str += ", "; farg_str += to_expression(coord); farg_str += ", "; farg_str += to_expression(dref); } else if (is_proj) { // Have to reshuffle so we get vec4(coord, dref, proj), special case. // Other shading languages splits up the arguments for coord and compare value like SPIR-V. // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow. farg_str += ", vec4("; if (imgtype.image.dim == Dim1D) { // Could reuse coord_expr, but we will mess up the temporary usage checking. farg_str += to_enclosed_expression(coord) + ".x"; farg_str += ", "; farg_str += "0.0, "; farg_str += to_expression(dref); farg_str += ", "; farg_str += to_enclosed_expression(coord) + ".y)"; } else if (imgtype.image.dim == Dim2D) { // Could reuse coord_expr, but we will mess up the temporary usage checking. farg_str += to_enclosed_expression(coord) + (swizz_func ? ".xy()" : ".xy"); farg_str += ", "; farg_str += to_expression(dref); farg_str += ", "; farg_str += to_enclosed_expression(coord) + ".z)"; } else SPIRV_CROSS_THROW("Invalid type for textureProj with shadow."); } else { // Create a composite which merges coord/dref into a single vector. auto type = expression_type(coord); type.vecsize = coord_components + 1; farg_str += ", "; farg_str += type_to_glsl_constructor(type); farg_str += "("; farg_str += coord_expr; farg_str += ", "; farg_str += to_expression(dref); farg_str += ")"; } } else { farg_str += ", "; farg_str += coord_expr; } if (grad_x || grad_y) { forward = forward && should_forward(grad_x); forward = forward && should_forward(grad_y); farg_str += ", "; farg_str += to_expression(grad_x); farg_str += ", "; farg_str += to_expression(grad_y); } if (lod) { if (workaround_lod_array_shadow_as_grad) { // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0. // Implementing this as plain texture() is not safe on some implementations. if (imgtype.image.dim == Dim2D) farg_str += ", vec2(0.0), vec2(0.0)"; else if (imgtype.image.dim == DimCube) farg_str += ", vec3(0.0), vec3(0.0)"; } else { if (check_explicit_lod_allowed(lod)) { forward = forward && should_forward(lod); farg_str += ", "; farg_str += to_expression(lod); } } } if (coffset) { forward = forward && should_forward(coffset); farg_str += ", "; farg_str += to_expression(coffset); } else if (offset) { forward = forward && should_forward(offset); farg_str += ", "; farg_str += to_expression(offset); } if (bias) { forward = forward && should_forward(bias); farg_str += ", "; farg_str += to_expression(bias); } if (comp) { forward = forward && should_forward(comp); farg_str += ", "; farg_str += to_expression(comp); } if (sample) { farg_str += ", "; farg_str += to_expression(sample); } *p_forward = forward; return farg_str; } void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { GLSLstd450 op = static_cast(eop); switch (op) { // FP fiddling case GLSLstd450Round: emit_unary_func_op(result_type, id, args[0], "round"); break; case GLSLstd450RoundEven: if ((options.es && options.version >= 300) || (!options.es && options.version >= 130)) emit_unary_func_op(result_type, id, args[0], "roundEven"); else SPIRV_CROSS_THROW("roundEven supported only in ESSL 300 and GLSL 130 and up."); break; case GLSLstd450Trunc: emit_unary_func_op(result_type, id, args[0], "trunc"); break; case GLSLstd450SAbs: case GLSLstd450FAbs: emit_unary_func_op(result_type, id, args[0], "abs"); break; case GLSLstd450SSign: case GLSLstd450FSign: emit_unary_func_op(result_type, id, args[0], "sign"); break; case GLSLstd450Floor: emit_unary_func_op(result_type, id, args[0], "floor"); break; case GLSLstd450Ceil: emit_unary_func_op(result_type, id, args[0], "ceil"); break; case GLSLstd450Fract: emit_unary_func_op(result_type, id, args[0], "fract"); break; case GLSLstd450Radians: emit_unary_func_op(result_type, id, args[0], "radians"); break; case GLSLstd450Degrees: emit_unary_func_op(result_type, id, args[0], "degrees"); break; case GLSLstd450Fma: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma"); break; case GLSLstd450Modf: register_call_out_argument(args[1]); forced_temporaries.insert(id); emit_binary_func_op(result_type, id, args[0], args[1], "modf"); break; case GLSLstd450ModfStruct: { forced_temporaries.insert(id); auto &type = get(result_type); auto flags = meta[id].decoration.decoration_flags; statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";"); set(id, to_name(id), result_type, true); statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ", to_expression(id), ".", to_member_name(type, 1), ");"); break; } // Minmax case GLSLstd450FMin: case GLSLstd450UMin: case GLSLstd450SMin: emit_binary_func_op(result_type, id, args[0], args[1], "min"); break; case GLSLstd450FMax: case GLSLstd450UMax: case GLSLstd450SMax: emit_binary_func_op(result_type, id, args[0], args[1], "max"); break; case GLSLstd450FClamp: case GLSLstd450UClamp: case GLSLstd450SClamp: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); break; // Trig case GLSLstd450Sin: emit_unary_func_op(result_type, id, args[0], "sin"); break; case GLSLstd450Cos: emit_unary_func_op(result_type, id, args[0], "cos"); break; case GLSLstd450Tan: emit_unary_func_op(result_type, id, args[0], "tan"); break; case GLSLstd450Asin: emit_unary_func_op(result_type, id, args[0], "asin"); break; case GLSLstd450Acos: emit_unary_func_op(result_type, id, args[0], "acos"); break; case GLSLstd450Atan: emit_unary_func_op(result_type, id, args[0], "atan"); break; case GLSLstd450Sinh: emit_unary_func_op(result_type, id, args[0], "sinh"); break; case GLSLstd450Cosh: emit_unary_func_op(result_type, id, args[0], "cosh"); break; case GLSLstd450Tanh: emit_unary_func_op(result_type, id, args[0], "tanh"); break; case GLSLstd450Asinh: emit_unary_func_op(result_type, id, args[0], "asinh"); break; case GLSLstd450Acosh: emit_unary_func_op(result_type, id, args[0], "acosh"); break; case GLSLstd450Atanh: emit_unary_func_op(result_type, id, args[0], "atanh"); break; case GLSLstd450Atan2: emit_binary_func_op(result_type, id, args[0], args[1], "atan"); break; // Exponentials case GLSLstd450Pow: emit_binary_func_op(result_type, id, args[0], args[1], "pow"); break; case GLSLstd450Exp: emit_unary_func_op(result_type, id, args[0], "exp"); break; case GLSLstd450Log: emit_unary_func_op(result_type, id, args[0], "log"); break; case GLSLstd450Exp2: emit_unary_func_op(result_type, id, args[0], "exp2"); break; case GLSLstd450Log2: emit_unary_func_op(result_type, id, args[0], "log2"); break; case GLSLstd450Sqrt: emit_unary_func_op(result_type, id, args[0], "sqrt"); break; case GLSLstd450InverseSqrt: emit_unary_func_op(result_type, id, args[0], "inversesqrt"); break; // Matrix math case GLSLstd450Determinant: emit_unary_func_op(result_type, id, args[0], "determinant"); break; case GLSLstd450MatrixInverse: emit_unary_func_op(result_type, id, args[0], "inverse"); break; // Lerping case GLSLstd450FMix: case GLSLstd450IMix: { emit_mix_op(result_type, id, args[0], args[1], args[2]); break; } case GLSLstd450Step: emit_binary_func_op(result_type, id, args[0], args[1], "step"); break; case GLSLstd450SmoothStep: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep"); break; // Packing case GLSLstd450Frexp: register_call_out_argument(args[1]); forced_temporaries.insert(id); emit_binary_func_op(result_type, id, args[0], args[1], "frexp"); break; case GLSLstd450FrexpStruct: { forced_temporaries.insert(id); auto &type = get(result_type); auto flags = meta[id].decoration.decoration_flags; statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";"); set(id, to_name(id), result_type, true); statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ", to_expression(id), ".", to_member_name(type, 1), ");"); break; } case GLSLstd450Ldexp: emit_binary_func_op(result_type, id, args[0], args[1], "ldexp"); break; case GLSLstd450PackSnorm4x8: emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); break; case GLSLstd450PackUnorm4x8: emit_unary_func_op(result_type, id, args[0], "packUnorm4x8"); break; case GLSLstd450PackSnorm2x16: emit_unary_func_op(result_type, id, args[0], "packSnorm2x16"); break; case GLSLstd450PackUnorm2x16: emit_unary_func_op(result_type, id, args[0], "packUnorm2x16"); break; case GLSLstd450PackHalf2x16: emit_unary_func_op(result_type, id, args[0], "packHalf2x16"); break; case GLSLstd450UnpackSnorm4x8: emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8"); break; case GLSLstd450UnpackUnorm4x8: emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8"); break; case GLSLstd450UnpackSnorm2x16: emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16"); break; case GLSLstd450UnpackUnorm2x16: emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16"); break; case GLSLstd450UnpackHalf2x16: emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16"); break; case GLSLstd450PackDouble2x32: emit_unary_func_op(result_type, id, args[0], "packDouble2x32"); break; case GLSLstd450UnpackDouble2x32: emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32"); break; // Vector math case GLSLstd450Length: emit_unary_func_op(result_type, id, args[0], "length"); break; case GLSLstd450Distance: emit_binary_func_op(result_type, id, args[0], args[1], "distance"); break; case GLSLstd450Cross: emit_binary_func_op(result_type, id, args[0], args[1], "cross"); break; case GLSLstd450Normalize: emit_unary_func_op(result_type, id, args[0], "normalize"); break; case GLSLstd450FaceForward: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward"); break; case GLSLstd450Reflect: emit_binary_func_op(result_type, id, args[0], args[1], "reflect"); break; case GLSLstd450Refract: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract"); break; // Bit-fiddling case GLSLstd450FindILsb: emit_unary_func_op(result_type, id, args[0], "findLSB"); break; case GLSLstd450FindSMsb: case GLSLstd450FindUMsb: emit_unary_func_op(result_type, id, args[0], "findMSB"); break; // Multisampled varying case GLSLstd450InterpolateAtCentroid: emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid"); break; case GLSLstd450InterpolateAtSample: emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample"); break; case GLSLstd450InterpolateAtOffset: emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset"); break; default: statement("// unimplemented GLSL op ", eop); break; } } void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension("GL_AMD_shader_ballot"); enum AMDShaderBallot { SwizzleInvocationsAMD = 1, SwizzleInvocationsMaskedAMD = 2, WriteInvocationAMD = 3, MbcntAMD = 4 }; auto op = static_cast(eop); switch (op) { case SwizzleInvocationsAMD: emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD"); break; case SwizzleInvocationsMaskedAMD: emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD"); break; case WriteInvocationAMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD"); break; case MbcntAMD: emit_unary_func_op(result_type, id, args[0], "mbcntAMD"); break; default: statement("// unimplemented SPV AMD shader ballot op ", eop); break; } } void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension("GL_AMD_shader_explicit_vertex_parameter"); enum AMDShaderExplicitVertexParameter { InterpolateAtVertexAMD = 1 }; auto op = static_cast(eop); switch (op) { case InterpolateAtVertexAMD: emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD"); break; default: statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop); break; } } void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension("GL_AMD_shader_trinary_minmax"); enum AMDShaderTrinaryMinMax { FMin3AMD = 1, UMin3AMD = 2, SMin3AMD = 3, FMax3AMD = 4, UMax3AMD = 5, SMax3AMD = 6, FMid3AMD = 7, UMid3AMD = 8, SMid3AMD = 9 }; auto op = static_cast(eop); switch (op) { case FMin3AMD: case UMin3AMD: case SMin3AMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3"); break; case FMax3AMD: case UMax3AMD: case SMax3AMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3"); break; case FMid3AMD: case UMid3AMD: case SMid3AMD: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3"); break; default: statement("// unimplemented SPV AMD shader trinary minmax op ", eop); break; } } void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t) { require_extension("GL_AMD_gcn_shader"); enum AMDGCNShader { CubeFaceIndexAMD = 1, CubeFaceCoordAMD = 2, TimeAMD = 3 }; auto op = static_cast(eop); switch (op) { case CubeFaceIndexAMD: emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD"); break; case CubeFaceCoordAMD: emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD"); break; case TimeAMD: { string expr = "timeAMD()"; emit_op(result_type, id, expr, true); break; } default: statement("// unimplemented SPV AMD gcn shader op ", eop); break; } } string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) { if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int) return type_to_glsl(out_type); else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64) return type_to_glsl(out_type); else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) return "floatBitsToUint"; else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt) return type_to_glsl(out_type); else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64) return type_to_glsl(out_type); else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) return "floatBitsToInt"; else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) return "uintBitsToFloat"; else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) return "intBitsToFloat"; else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) return "doubleBitsToInt64"; else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) return "doubleBitsToUint64"; else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) return "int64BitsToDouble"; else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) return "uint64BitsToDouble"; else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2) return "packUint2x32"; else return ""; } string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) { auto op = bitcast_glsl_op(result_type, expression_type(argument)); if (op.empty()) return to_enclosed_expression(argument); else return join(op, "(", to_expression(argument), ")"); } std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg) { auto expr = to_expression(arg); auto &src_type = expression_type(arg); if (src_type.basetype != target_type) { auto target = src_type; target.basetype = target_type; expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")"); } return expr; } std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, const std::string &expr) { if (target_type.basetype == expr_type) return expr; auto src_type = target_type; src_type.basetype = expr_type; return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")"); } string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) { switch (builtin) { case BuiltInPosition: return "gl_Position"; case BuiltInPointSize: return "gl_PointSize"; case BuiltInClipDistance: return "gl_ClipDistance"; case BuiltInCullDistance: return "gl_CullDistance"; case BuiltInVertexId: if (options.vulkan_semantics) SPIRV_CROSS_THROW( "Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics."); return "gl_VertexID"; case BuiltInInstanceId: if (options.vulkan_semantics) SPIRV_CROSS_THROW( "Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics."); return "gl_InstanceID"; case BuiltInVertexIndex: if (options.vulkan_semantics) return "gl_VertexIndex"; else return "gl_VertexID"; // gl_VertexID already has the base offset applied. case BuiltInInstanceIndex: if (options.vulkan_semantics) return "gl_InstanceIndex"; else return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID. case BuiltInPrimitiveId: return "gl_PrimitiveID"; case BuiltInInvocationId: return "gl_InvocationID"; case BuiltInLayer: return "gl_Layer"; case BuiltInViewportIndex: return "gl_ViewportIndex"; case BuiltInTessLevelOuter: return "gl_TessLevelOuter"; case BuiltInTessLevelInner: return "gl_TessLevelInner"; case BuiltInTessCoord: return "gl_TessCoord"; case BuiltInFragCoord: return "gl_FragCoord"; case BuiltInPointCoord: return "gl_PointCoord"; case BuiltInFrontFacing: return "gl_FrontFacing"; case BuiltInFragDepth: return "gl_FragDepth"; case BuiltInNumWorkgroups: return "gl_NumWorkGroups"; case BuiltInWorkgroupSize: return "gl_WorkGroupSize"; case BuiltInWorkgroupId: return "gl_WorkGroupID"; case BuiltInLocalInvocationId: return "gl_LocalInvocationID"; case BuiltInGlobalInvocationId: return "gl_GlobalInvocationID"; case BuiltInLocalInvocationIndex: return "gl_LocalInvocationIndex"; case BuiltInSampleId: if (options.es && options.version < 320) require_extension("GL_OES_sample_variables"); if (!options.es && options.version < 400) SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400."); return "gl_SampleID"; case BuiltInSampleMask: if (options.es && options.version < 320) require_extension("GL_OES_sample_variables"); if (!options.es && options.version < 400) SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400."); if (storage == StorageClassInput) return "gl_SampleMaskIn"; else return "gl_SampleMask"; case BuiltInSamplePosition: if (options.es && options.version < 320) require_extension("GL_OES_sample_variables"); if (!options.es && options.version < 400) SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400."); return "gl_SamplePosition"; case BuiltInViewIndex: if (options.vulkan_semantics) { require_extension("GL_EXT_multiview"); return "gl_ViewIndex"; } else { require_extension("GL_OVR_multiview2"); return "gl_ViewID_OVR"; } default: return join("gl_BuiltIn_", convert_to_string(builtin)); } } const char *CompilerGLSL::index_to_swizzle(uint32_t index) { switch (index) { case 0: return "x"; case 1: return "y"; case 2: return "z"; case 3: return "w"; default: SPIRV_CROSS_THROW("Swizzle index out of range"); } } string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal, bool chain_only, bool *need_transpose, bool *result_is_packed) { string expr; if (!chain_only) expr = to_enclosed_expression(base); uint32_t type_id = expression_type_id(base); const auto *type = &get(type_id); // Start traversing type hierarchy at the proper non-pointer types, // but keep type_id referencing the original pointer for use below. while (type->pointer) { assert(type->parent_type); type = &get(type->parent_type); } bool access_chain_is_arrayed = false; bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base); bool is_packed = false; bool pending_array_enclose = false; bool dimension_flatten = false; for (uint32_t i = 0; i < count; i++) { uint32_t index = indices[i]; // Arrays if (!type->array.empty()) { // If we are flattening multidimensional arrays, only create opening bracket on first // array index. if (options.flatten_multidimensional_arrays && !pending_array_enclose) { dimension_flatten = type->array.size() > 1; pending_array_enclose = dimension_flatten; if (pending_array_enclose) expr += "["; } assert(type->parent_type); const auto append_index = [&]() { expr += "["; if (index_is_literal) expr += convert_to_string(index); else expr += to_expression(index); expr += "]"; }; auto *var = maybe_get(base); if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) && !has_decoration(type->self, DecorationBlock)) { // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared. // Normally, these variables live in blocks when compiled from GLSL, // but HLSL seems to just emit straight arrays here. // We must pretend this access goes through gl_in/gl_out arrays // to be able to access certain builtins as arrays. auto builtin = meta[base].decoration.builtin_type; switch (builtin) { // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. // case BuiltInClipDistance: case BuiltInPosition: case BuiltInPointSize: if (var->storage == StorageClassInput) expr = join("gl_in[", to_expression(index), "].", expr); else if (var->storage == StorageClassOutput) expr = join("gl_out[", to_expression(index), "].", expr); else append_index(); break; default: append_index(); break; } } else if (options.flatten_multidimensional_arrays && dimension_flatten) { // If we are flattening multidimensional arrays, do manual stride computation. auto &parent_type = get(type->parent_type); if (index_is_literal) expr += convert_to_string(index); else expr += to_enclosed_expression(index); for (auto j = uint32_t(parent_type.array.size()); j; j--) { expr += " * "; expr += enclose_expression(to_array_size(parent_type, j - 1)); } if (parent_type.array.empty()) pending_array_enclose = false; else expr += " + "; if (!pending_array_enclose) expr += "]"; } else { append_index(); } type_id = type->parent_type; type = &get(type_id); access_chain_is_arrayed = true; } // For structs, the index refers to a constant, which indexes into the members. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. else if (type->basetype == SPIRType::Struct) { if (!index_is_literal) index = get(index).scalar(); if (index >= type->member_types.size()) SPIRV_CROSS_THROW("Member index is out of bounds!"); BuiltIn builtin; if (is_member_builtin(*type, index, &builtin)) { // FIXME: We rely here on OpName on gl_in/gl_out to make this work properly. // To make this properly work by omitting all OpName opcodes, // we need to infer gl_in or gl_out based on the builtin, and stage. if (access_chain_is_arrayed) { expr += "."; expr += builtin_to_glsl(builtin, type->storage); } else expr = builtin_to_glsl(builtin, type->storage); } else { // If the member has a qualified name, use it as the entire chain string qual_mbr_name = get_member_qualified_name(type_id, index); if (!qual_mbr_name.empty()) expr = qual_mbr_name; else { expr += "."; expr += to_member_name(*type, index); } } is_packed = member_is_packed_type(*type, index); row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); type = &get(type->member_types[index]); } // Matrix -> Vector else if (type->columns > 1) { if (row_major_matrix_needs_conversion) { expr = convert_row_major_matrix(expr, *type, is_packed); row_major_matrix_needs_conversion = false; is_packed = false; } expr += "["; if (index_is_literal) expr += convert_to_string(index); else expr += to_expression(index); expr += "]"; type_id = type->parent_type; type = &get(type_id); } // Vector -> Scalar else if (type->vecsize > 1) { if (is_packed) { expr = unpack_expression_type(expr, *type); is_packed = false; } if (index_is_literal) { expr += "."; expr += index_to_swizzle(index); } else if (ids[index].get_type() == TypeConstant) { auto &c = get(index); expr += "."; expr += index_to_swizzle(c.scalar()); } else { expr += "["; expr += to_expression(index); expr += "]"; } type_id = type->parent_type; type = &get(type_id); } else if (!backend.allow_truncated_access_chain) SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); } if (pending_array_enclose) { SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, " "but the access chain was terminated in the middle of a multidimensional array. " "This is not supported."); } if (need_transpose) *need_transpose = row_major_matrix_needs_conversion; if (result_is_packed) *result_is_packed = is_packed; return expr; } string CompilerGLSL::to_flattened_struct_member(const SPIRVariable &var, uint32_t index) { auto &type = get(var.basetype); return sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, index))); } string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, bool *out_need_transpose, bool *result_is_packed) { if (flattened_buffer_blocks.count(base)) { uint32_t matrix_stride = 0; bool need_transpose = false; flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride); if (out_need_transpose) *out_need_transpose = target_type.columns > 1 && need_transpose; if (result_is_packed) *result_is_packed = false; return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose); } else if (flattened_structs.count(base) && count > 0) { auto chain = access_chain_internal(base, indices, count, false, true).substr(1); if (out_need_transpose) *out_need_transpose = false; if (result_is_packed) *result_is_packed = false; return sanitize_underscores(join(to_name(base), "_", chain)); } else { return access_chain_internal(base, indices, count, false, false, out_need_transpose, result_is_packed); } } string CompilerGLSL::load_flattened_struct(SPIRVariable &var) { auto expr = type_to_glsl_constructor(get(var.basetype)); expr += '('; auto &type = get(var.basetype); for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) { if (i) expr += ", "; // Flatten the varyings. // Apply name transformation for flattened I/O blocks. expr += to_flattened_struct_member(var, i); } expr += ')'; return expr; } void CompilerGLSL::store_flattened_struct(SPIRVariable &var, uint32_t value) { // We're trying to store a structure which has been flattened. // Need to copy members one by one. auto rhs = to_expression(value); // Store result locally. // Since we're declaring a variable potentially multiple times here, // store the variable in an isolated scope. begin_scope(); statement(variable_decl_function_local(var), " = ", rhs, ";"); auto &type = get(var.basetype); for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) { // Flatten the varyings. // Apply name transformation for flattened I/O blocks. auto lhs = sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, i))); rhs = join(to_name(var.self), ".", to_member_name(type, i)); statement(lhs, " = ", rhs, ";"); } end_scope(); } std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, bool need_transpose) { if (!target_type.array.empty()) SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened"); else if (target_type.basetype == SPIRType::Struct) return flattened_access_chain_struct(base, indices, count, target_type, offset); else if (target_type.columns > 1) return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose); else return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose); } std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset) { std::string expr; expr += type_to_glsl_constructor(target_type); expr += "("; for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i) { if (i != 0) expr += ", "; const SPIRType &member_type = get(target_type.member_types[i]); uint32_t member_offset = type_struct_member_offset(target_type, i); // The access chain terminates at the struct, so we need to find matrix strides and row-major information // ahead of time. bool need_transpose = false; uint32_t matrix_stride = 0; if (member_type.columns > 1) { need_transpose = (combined_decoration_for_member(target_type, i) & (1ull << DecorationRowMajor)) != 0; matrix_stride = type_struct_member_matrix_stride(target_type, i); } auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride, need_transpose); // Cannot forward transpositions, so resolve them here. if (need_transpose) expr += convert_row_major_matrix(tmp, member_type, false); else expr += tmp; } expr += ")"; return expr; } std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, bool need_transpose) { assert(matrix_stride); SPIRType tmp_type = target_type; if (need_transpose) swap(tmp_type.vecsize, tmp_type.columns); std::string expr; expr += type_to_glsl_constructor(tmp_type); expr += "("; for (uint32_t i = 0; i < tmp_type.columns; i++) { if (i != 0) expr += ", "; expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride, /* need_transpose= */ false); } expr += ")"; return expr; } std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, bool need_transpose) { auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16); auto buffer_name = to_name(expression_type(base).self); if (need_transpose) { std::string expr; if (target_type.vecsize > 1) { expr += type_to_glsl_constructor(target_type); expr += "("; } for (uint32_t i = 0; i < target_type.vecsize; ++i) { if (i != 0) expr += ", "; uint32_t component_offset = result.second + i * matrix_stride; assert(component_offset % (target_type.width / 8) == 0); uint32_t index = component_offset / (target_type.width / 8); expr += buffer_name; expr += "["; expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + expr += convert_to_string(index / 4); expr += "]"; expr += vector_swizzle(1, index % 4); } if (target_type.vecsize > 1) { expr += ")"; } return expr; } else { assert(result.second % (target_type.width / 8) == 0); uint32_t index = result.second / (target_type.width / 8); std::string expr; expr += buffer_name; expr += "["; expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + expr += convert_to_string(index / 4); expr += "]"; expr += vector_swizzle(target_type.vecsize, index % 4); return expr; } } std::pair CompilerGLSL::flattened_access_chain_offset(const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride, bool *need_transpose, uint32_t *out_matrix_stride) { const auto *type = &basetype; // Start traversing type hierarchy at the proper non-pointer types. while (type->pointer) { assert(type->parent_type); type = &get(type->parent_type); } // This holds the type of the current pointer which we are traversing through. // We always start out from a struct type which is the block. // This is primarily used to reflect the array strides and matrix strides later. // For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set // accordingly as members of structs are accessed. assert(type->basetype == SPIRType::Struct); uint32_t type_id = 0; std::string expr; // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; for (uint32_t i = 0; i < count; i++) { uint32_t index = indices[i]; // Arrays if (!type->array.empty()) { // Here, the type_id will be a type ID for the array type itself. uint32_t array_stride = get_decoration(type_id, DecorationArrayStride); if (!array_stride) SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); auto *constant = maybe_get(index); if (constant) { // Constant array access. offset += constant->scalar() * array_stride; } else { // Dynamic array access. if (array_stride % word_stride) { SPIRV_CROSS_THROW( "Array stride for dynamic indexing must be divisible by the size of a 4-component vector. " "Likely culprit here is a float or vec2 array inside a push constant block which is std430. " "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index); expr += " * "; expr += convert_to_string(array_stride / word_stride); expr += " + "; } uint32_t parent_type = type->parent_type; type = &get(parent_type); type_id = parent_type; // Type ID now refers to the array type with one less dimension. } // For structs, the index refers to a constant, which indexes into the members. // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. else if (type->basetype == SPIRType::Struct) { index = get(index).scalar(); if (index >= type->member_types.size()) SPIRV_CROSS_THROW("Member index is out of bounds!"); offset += type_struct_member_offset(*type, index); type_id = type->member_types[index]; auto &struct_type = *type; type = &get(type->member_types[index]); if (type->columns > 1) { matrix_stride = type_struct_member_matrix_stride(struct_type, index); row_major_matrix_needs_conversion = (combined_decoration_for_member(struct_type, index) & (1ull << DecorationRowMajor)) != 0; } else row_major_matrix_needs_conversion = false; } // Matrix -> Vector else if (type->columns > 1) { auto *constant = maybe_get(index); if (constant) { index = get(index).scalar(); offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); } else { uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; // Dynamic array access. if (indexing_stride % word_stride) { SPIRV_CROSS_THROW( "Matrix stride for dynamic indexing must be divisible by the size of a 4-component vector. " "Likely culprit here is a row-major matrix being accessed dynamically. " "This cannot be flattened. Try using std140 layout instead."); } expr += to_enclosed_expression(index); expr += " * "; expr += convert_to_string(indexing_stride / word_stride); expr += " + "; } uint32_t parent_type = type->parent_type; type = &get(type->parent_type); type_id = parent_type; } // Vector -> Scalar else if (type->vecsize > 1) { auto *constant = maybe_get(index); if (constant) { index = get(index).scalar(); offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); } else { uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); // Dynamic array access. if (indexing_stride % word_stride) { SPIRV_CROSS_THROW( "Stride for dynamic vector indexing must be divisible by the size of a 4-component vector. " "This cannot be flattened in legacy targets."); } expr += to_enclosed_expression(index); expr += " * "; expr += convert_to_string(indexing_stride / word_stride); expr += " + "; } uint32_t parent_type = type->parent_type; type = &get(type->parent_type); type_id = parent_type; } else SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); } if (need_transpose) *need_transpose = row_major_matrix_needs_conversion; if (out_matrix_stride) *out_matrix_stride = matrix_stride; return std::make_pair(expr, offset); } bool CompilerGLSL::should_forward(uint32_t id) { // Immutable expression can always be forwarded. // If not immutable, we can speculate about it by forwarding potentially mutable variables. auto *var = maybe_get(id); bool forward = var ? var->forwardable : false; return (is_immutable(id) || forward) && !options.force_temporary; } void CompilerGLSL::track_expression_read(uint32_t id) { // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. if (expression_is_forwarded(id)) { auto &v = expression_usage_counts[id]; v++; if (v >= 2) { //if (v == 2) // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); forced_temporaries.insert(id); // Force a recompile after this pass to avoid forwarding this variable. force_recompile = true; } } } bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) { if (forced_temporaries.find(id) != end(forced_temporaries)) return false; for (uint32_t i = 0; i < num_args; i++) if (!should_forward(args[i])) return false; // We need to forward globals as well. if (!pure) { for (auto global : global_variables) if (!should_forward(global)) return false; for (auto aliased : aliased_variables) if (!should_forward(aliased)) return false; } return true; } void CompilerGLSL::register_impure_function_call() { // Impure functions can modify globals and aliased variables, so invalidate them as well. for (auto global : global_variables) flush_dependees(get(global)); for (auto aliased : aliased_variables) flush_dependees(get(aliased)); } void CompilerGLSL::register_call_out_argument(uint32_t id) { register_write(id); auto *var = maybe_get(id); if (var) flush_variable_declaration(var->self); } string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) { // These variables are always function local, // so make sure we emit the variable without storage qualifiers. // Some backends will inject custom variables locally in a function // with a storage qualifier which is not function-local. auto old_storage = var.storage; var.storage = StorageClassFunction; auto expr = variable_decl(var); var.storage = old_storage; return expr; } void CompilerGLSL::flush_variable_declaration(uint32_t id) { auto *var = maybe_get(id); if (var && var->deferred_declaration) { statement(variable_decl_function_local(*var), ";"); var->deferred_declaration = false; } } bool CompilerGLSL::remove_duplicate_swizzle(string &op) { auto pos = op.find_last_of('.'); if (pos == string::npos || pos == 0) return false; string final_swiz = op.substr(pos + 1, string::npos); if (backend.swizzle_is_function) { if (final_swiz.size() < 2) return false; if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") final_swiz.erase(final_swiz.size() - 2, string::npos); else return false; } // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. // If so, and previous swizzle is of same length, // we can drop the final swizzle altogether. for (uint32_t i = 0; i < final_swiz.size(); i++) { static const char expected[] = { 'x', 'y', 'z', 'w' }; if (i >= 4 || final_swiz[i] != expected[i]) return false; } auto prevpos = op.find_last_of('.', pos - 1); if (prevpos == string::npos) return false; prevpos++; // Make sure there are only swizzles here ... for (auto i = prevpos; i < pos; i++) { if (op[i] < 'w' || op[i] > 'z') { // If swizzles are foo.xyz() like in C++ backend for example, check for that. if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') break; return false; } } // If original swizzle is large enough, just carve out the components we need. // E.g. foobar.wyx.xy will turn into foobar.wy. if (pos - prevpos >= final_swiz.size()) { op.erase(prevpos + final_swiz.size(), string::npos); // Add back the function call ... if (backend.swizzle_is_function) op += "()"; } return true; } // Optimizes away vector swizzles where we have something like // vec3 foo; // foo.xyz <-- swizzle expression does nothing. // This is a very common pattern after OpCompositeCombine. bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) { auto pos = op.find_last_of('.'); if (pos == string::npos || pos == 0) return false; string final_swiz = op.substr(pos + 1, string::npos); if (backend.swizzle_is_function) { if (final_swiz.size() < 2) return false; if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") final_swiz.erase(final_swiz.size() - 2, string::npos); else return false; } // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. // If so, and previous swizzle is of same length, // we can drop the final swizzle altogether. for (uint32_t i = 0; i < final_swiz.size(); i++) { static const char expected[] = { 'x', 'y', 'z', 'w' }; if (i >= 4 || final_swiz[i] != expected[i]) return false; } auto &type = expression_type(base); // Sanity checking ... assert(type.columns == 1 && type.array.empty()); if (type.vecsize == final_swiz.size()) op.erase(pos, string::npos); return true; } string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) { uint32_t base = 0; string op; string subop; // Can only merge swizzles for vectors. auto &type = get(return_type); bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; bool swizzle_optimization = false; for (uint32_t i = 0; i < length; i++) { auto *e = maybe_get(elems[i]); // If we're merging another scalar which belongs to the same base // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base) { // Only supposed to be used for vector swizzle -> scalar. assert(!e->expression.empty() && e->expression.front() == '.'); subop += e->expression.substr(1, string::npos); swizzle_optimization = true; } else { // We'll likely end up with duplicated swizzles, e.g. // foobar.xyz.xyz from patterns like // OpVectorShuffle // OpCompositeExtract x 3 // OpCompositeConstruct 3x + other scalar. // Just modify op in-place. if (swizzle_optimization) { if (backend.swizzle_is_function) subop += "()"; // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. // Essentially, we can only remove one set of swizzles, since that's what we have control over ... // Case 1: // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. // Case 2: // foo.xyz: Duplicate swizzle won't kick in. // If foo is vec3, we can remove xyz, giving just foo. if (!remove_duplicate_swizzle(subop)) remove_unity_swizzle(base, subop); // Strips away redundant parens if we created them during component extraction. strip_enclosed_expression(subop); swizzle_optimization = false; op += subop; } else op += subop; if (i) op += ", "; subop = to_expression(elems[i]); } base = e ? e->base_expression : 0; } if (swizzle_optimization) { if (backend.swizzle_is_function) subop += "()"; if (!remove_duplicate_swizzle(subop)) remove_unity_swizzle(base, subop); // Strips away redundant parens if we created them during component extraction. strip_enclosed_expression(subop); } op += subop; return op; } bool CompilerGLSL::skip_argument(uint32_t id) const { if (!combined_image_samplers.empty() || !options.vulkan_semantics) { auto &type = expression_type(id); if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) return true; } return false; } bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &rhs) { // Do this with strings because we have a very clear pattern we can check for and it avoids // adding lots of special cases to the code emission. if (rhs.size() < lhs.size() + 3) return false; auto index = rhs.find(lhs); if (index != 0) return false; // TODO: Shift operators, but it's not important for now. auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1); if (op != lhs.size() + 1) return false; // Check that the op is followed by space. This excludes && and ||. if (rhs[op + 1] != ' ') return false; char bop = rhs[op]; auto expr = rhs.substr(lhs.size() + 3); // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. // Find some common patterns which are equivalent. if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)")) statement(lhs, bop, bop, ";"); else statement(lhs, " ", bop, "= ", expr, ";"); return true; } void CompilerGLSL::emit_block_instructions(const SPIRBlock &block) { current_emitting_block = █ for (auto &op : block.ops) emit_instruction(op); current_emitting_block = nullptr; } void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); auto opcode = static_cast(instruction.op); uint32_t length = instruction.length; #define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) #define BOP_CAST(op, type) \ emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, glsl_opcode_is_sign_invariant(opcode)) #define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) #define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) #define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) #define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) #define BFOP_CAST(op, type) \ emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, glsl_opcode_is_sign_invariant(opcode)) #define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) #define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) switch (opcode) { // Dealing with memory case OpLoad: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t ptr = ops[2]; flush_variable_declaration(ptr); // If we're loading from memory that cannot be changed by the shader, // just forward the expression directly to avoid needless temporaries. // If an expression is mutable and forwardable, we speculate that it is immutable. bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); // If loading a non-native row-major matrix, mark the expression as need_transpose. bool need_transpose = false; bool old_need_transpose = false; auto *ptr_expression = maybe_get(ptr); if (ptr_expression && ptr_expression->need_transpose) { old_need_transpose = true; ptr_expression->need_transpose = false; need_transpose = true; } else if (is_non_native_row_major_matrix(ptr)) need_transpose = true; auto expr = to_expression(ptr); if (ptr_expression) ptr_expression->need_transpose = old_need_transpose; // Suppress usage tracking since using same expression multiple times does not imply any extra work. auto &e = emit_op(result_type, id, expr, forward, true); e.need_transpose = need_transpose; register_read(id, ptr, forward); // Pass through whether the result is of a packed type. if (has_decoration(ptr, DecorationCPacked)) set_decoration(id, DecorationCPacked); break; } case OpInBoundsAccessChain: case OpAccessChain: { auto *var = maybe_get(ops[2]); if (var) flush_variable_declaration(var->self); // If the base is immutable, the access chain pointer must also be. // If an expression is mutable and forwardable, we speculate that it is immutable. bool need_transpose, result_is_packed; auto e = access_chain(ops[2], &ops[3], length - 3, get(ops[0]), &need_transpose, &result_is_packed); auto &expr = set(ops[1], move(e), ops[0], should_forward(ops[2])); expr.loaded_from = ops[2]; expr.need_transpose = need_transpose; // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. if (result_is_packed) set_decoration(ops[1], DecorationCPacked); else unset_decoration(ops[1], DecorationCPacked); break; } case OpStore: { auto *var = maybe_get(ops[0]); if (var && var->statically_assigned) var->static_expression = ops[1]; else if (var && var->loop_variable && !var->loop_variable_enable) var->static_expression = ops[1]; else if (var && flattened_structs.count(ops[0])) { store_flattened_struct(*var, ops[1]); register_write(ops[0]); } else { auto rhs = to_expression(ops[1]); // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. if (!rhs.empty()) { auto lhs = to_expression(ops[0]); // Tries to optimize assignments like " = op expr". // While this is purely cosmetic, this is important for legacy ESSL where loop // variable increments must be in either i++ or i += const-expr. // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. if (!optimize_read_modify_write(lhs, rhs)) statement(lhs, " = ", rhs, ";"); register_write(ops[0]); } } break; } case OpArrayLength: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto e = access_chain_internal(ops[2], &ops[3], length - 3, true); set(id, e + ".length()", result_type, true); break; } // Function calls case OpFunctionCall: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t func = ops[2]; const auto *arg = &ops[3]; length -= 3; auto &callee = get(func); auto &return_type = get(callee.return_type); bool pure = function_is_pure(callee); bool callee_has_out_variables = false; bool emit_return_value_as_argument = false; // Invalidate out variables passed to functions since they can be OpStore'd to. for (uint32_t i = 0; i < length; i++) { if (callee.arguments[i].write_count) { register_call_out_argument(arg[i]); callee_has_out_variables = true; } flush_variable_declaration(arg[i]); } if (!return_type.array.empty() && !backend.can_return_array) { callee_has_out_variables = true; emit_return_value_as_argument = true; } if (!pure) register_impure_function_call(); string funexpr; vector arglist; funexpr += to_name(func) + "("; if (emit_return_value_as_argument) { statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";"); arglist.push_back(to_name(id)); } for (uint32_t i = 0; i < length; i++) { // Do not pass in separate images or samplers if we're remapping // to combined image samplers. if (skip_argument(arg[i])) continue; arglist.push_back(to_func_call_arg(arg[i])); } for (auto &combined : callee.combined_parameters) { uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id]; uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id]; auto *image = maybe_get_backing_variable(image_id); if (image) image_id = image->self; auto *samp = maybe_get_backing_variable(sampler_id); if (samp) sampler_id = samp->self; arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); } append_global_func_args(callee, length, arglist); funexpr += merge(arglist); funexpr += ")"; // Check for function call constraints. check_function_call_constraints(arg, length); if (return_type.basetype != SPIRType::Void) { // If the function actually writes to an out variable, // take the conservative route and do not forward. // The problem is that we might not read the function // result (and emit the function) before an out variable // is read (common case when return value is ignored! // In order to avoid start tracking invalid variables, // just avoid the forwarding problem altogether. bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure && (forced_temporaries.find(id) == end(forced_temporaries)); if (emit_return_value_as_argument) { statement(funexpr, ";"); set(id, to_name(id), result_type, true); } else emit_op(result_type, id, funexpr, forward); // Function calls are implicit loads from all variables in question. // Set dependencies for them. for (uint32_t i = 0; i < length; i++) register_read(id, arg[i], forward); // If we're going to forward the temporary result, // put dependencies on every variable that must not change. if (forward) register_global_read_dependencies(callee, id); } else statement(funexpr, ";"); break; } // Composite munging case OpCompositeConstruct: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; const auto *elems = &ops[2]; length -= 2; bool forward = true; for (uint32_t i = 0; i < length; i++) forward = forward && should_forward(elems[i]); auto &out_type = get(result_type); if (!length) { if (out_type.basetype == SPIRType::Struct) { // It is technically allowed to make a blank struct, // but we cannot make a meaningful expression out of it in high level languages, // so make it a blank expression. emit_op(result_type, id, "", forward); break; } else SPIRV_CROSS_THROW("Invalid input to OpCompositeConstruct."); } auto &in_type = expression_type(elems[0]); // Only splat if we have vector constructors. // Arrays and structs must be initialized properly in full. bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; bool splat = in_type.vecsize == 1 && in_type.columns == 1 && !composite && backend.use_constructor_splatting; bool swizzle_splat = in_type.vecsize == 1 && in_type.columns == 1 && backend.can_swizzle_scalar; if (ids[elems[0]].get_type() == TypeConstant && (in_type.basetype != SPIRType::Float && in_type.basetype != SPIRType::Double)) { // Cannot swizzle literal integers as a special case. swizzle_splat = false; } if (splat || swizzle_splat) { uint32_t input = elems[0]; for (uint32_t i = 0; i < length; i++) { if (input != elems[i]) { splat = false; swizzle_splat = false; } } } if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) forward = false; if (!out_type.array.empty() && !backend.can_declare_arrays_inline) forward = false; string constructor_op; if (backend.use_initializer_list && composite) { // Only use this path if we are building composites. // This path cannot be used for arithmetic. if (backend.use_typed_initializer_list) constructor_op += type_to_glsl_constructor(get(result_type)); constructor_op += "{ "; if (splat) constructor_op += to_expression(elems[0]); else constructor_op += build_composite_combiner(result_type, elems, length); constructor_op += " }"; } else if (swizzle_splat && !composite) { constructor_op = remap_swizzle(get(result_type), 1, to_expression(elems[0])); } else { constructor_op = type_to_glsl_constructor(get(result_type)) + "("; if (splat) constructor_op += to_expression(elems[0]); else constructor_op += build_composite_combiner(result_type, elems, length); constructor_op += ")"; } emit_op(result_type, id, constructor_op, forward); break; } case OpVectorInsertDynamic: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t vec = ops[2]; uint32_t comp = ops[3]; uint32_t index = ops[4]; flush_variable_declaration(vec); // Make a copy, then use access chain to store the variable. statement(declare_temporary(result_type, id), to_expression(vec), ";"); set(id, to_name(id), result_type, true); auto chain = access_chain_internal(id, &index, 1, false); statement(chain, " = ", to_expression(comp), ";"); break; } case OpVectorExtractDynamic: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto expr = access_chain_internal(ops[2], &ops[3], 1, false); emit_op(result_type, id, expr, should_forward(ops[2])); break; } case OpCompositeExtract: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; length -= 3; auto &type = get(result_type); // We can only split the expression here if our expression is forwarded as a temporary. bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries); // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case. auto &composite_type = expression_type(ops[2]); if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty()) allow_base_expression = false; // Only apply this optimization if result is scalar. if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) { // We want to split the access chain from the base. // This is so we can later combine different CompositeExtract results // with CompositeConstruct without emitting code like // // vec3 temp = texture(...).xyz // vec4(temp.x, temp.y, temp.z, 1.0). // // when we actually wanted to emit this // vec4(texture(...).xyz, 1.0). // // Including the base will prevent this and would trigger multiple reads // from expression causing it to be forced to an actual temporary in GLSL. auto expr = access_chain_internal(ops[2], &ops[3], length, true, true); auto &e = emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2])); e.base_expression = ops[2]; } else { auto expr = access_chain_internal(ops[2], &ops[3], length, true); emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2])); } break; } case OpCompositeInsert: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t obj = ops[2]; uint32_t composite = ops[3]; const auto *elems = &ops[4]; length -= 4; flush_variable_declaration(composite); // Make a copy, then use access chain to store the variable. statement(declare_temporary(result_type, id), to_expression(composite), ";"); set(id, to_name(id), result_type, true); auto chain = access_chain_internal(id, elems, length, true); statement(chain, " = ", to_expression(obj), ";"); break; } case OpCopyMemory: { uint32_t lhs = ops[0]; uint32_t rhs = ops[1]; if (lhs != rhs) { flush_variable_declaration(lhs); flush_variable_declaration(rhs); statement(to_expression(lhs), " = ", to_expression(rhs), ";"); register_write(lhs); } break; } case OpCopyObject: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t rhs = ops[2]; bool pointer = get(result_type).pointer; if (expression_is_lvalue(rhs) && !pointer) { // Need a copy. // For pointer types, we copy the pointer itself. statement(declare_temporary(result_type, id), to_expression(rhs), ";"); set(id, to_name(id), result_type, true); } else { // RHS expression is immutable, so just forward it. // Copying these things really make no sense, but // seems to be allowed anyways. auto &e = set(id, to_expression(rhs), result_type, true); if (pointer) { auto *var = maybe_get_backing_variable(rhs); e.loaded_from = var ? var->self : 0; } } break; } case OpVectorShuffle: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t vec0 = ops[2]; uint32_t vec1 = ops[3]; const auto *elems = &ops[4]; length -= 4; auto &type0 = expression_type(vec0); bool shuffle = false; for (uint32_t i = 0; i < length; i++) if (elems[i] >= type0.vecsize) shuffle = true; string expr; bool should_fwd, trivial_forward; if (shuffle) { should_fwd = should_forward(vec0) && should_forward(vec1); trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1); // Constructor style and shuffling from two different vectors. vector args; for (uint32_t i = 0; i < length; i++) { if (elems[i] >= type0.vecsize) args.push_back(join(to_enclosed_expression(vec1), ".", index_to_swizzle(elems[i] - type0.vecsize))); else args.push_back(join(to_enclosed_expression(vec0), ".", index_to_swizzle(elems[i]))); } expr += join(type_to_glsl_constructor(get(result_type)), "(", merge(args), ")"); } else { should_fwd = should_forward(vec0); trivial_forward = !expression_is_forwarded(vec0); // We only source from first vector, so can use swizzle. // If the vector is packed, unpack it before applying a swizzle (needed for MSL) expr += to_enclosed_expression(vec0); if (has_decoration(vec0, DecorationCPacked)) expr = unpack_expression_type(expr, expression_type(vec0)); expr += "."; for (uint32_t i = 0; i < length; i++) expr += index_to_swizzle(elems[i]); if (backend.swizzle_is_function && length > 1) expr += "()"; } // A shuffle is trivial in that it doesn't actually *do* anything. // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. emit_op(result_type, id, expr, should_fwd, trivial_forward); break; } // ALU case OpIsNan: UFOP(isnan); break; case OpIsInf: UFOP(isinf); break; case OpSNegate: case OpFNegate: UOP(-); break; case OpIAdd: { // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. auto type = get(ops[0]).basetype; BOP_CAST(+, type); break; } case OpFAdd: BOP(+); break; case OpISub: { auto type = get(ops[0]).basetype; BOP_CAST(-, type); break; } case OpFSub: BOP(-); break; case OpIMul: { auto type = get(ops[0]).basetype; BOP_CAST(*, type); break; } case OpVectorTimesMatrix: case OpMatrixTimesVector: { // If the matrix needs transpose, just flip the multiply order. auto *e = maybe_get(ops[opcode == OpMatrixTimesVector ? 2 : 3]); if (e && e->need_transpose) { e->need_transpose = false; emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*"); e->need_transpose = true; } else BOP(*); break; } case OpFMul: case OpMatrixTimesScalar: case OpVectorTimesScalar: case OpMatrixTimesMatrix: BOP(*); break; case OpOuterProduct: BFOP(outerProduct); break; case OpDot: BFOP(dot); break; case OpTranspose: UFOP(transpose); break; case OpSDiv: BOP_CAST(/, SPIRType::Int); break; case OpUDiv: BOP_CAST(/, SPIRType::UInt); break; case OpFDiv: BOP(/); break; case OpShiftRightLogical: BOP_CAST(>>, SPIRType::UInt); break; case OpShiftRightArithmetic: BOP_CAST(>>, SPIRType::Int); break; case OpShiftLeftLogical: { auto type = get(ops[0]).basetype; BOP_CAST(<<, type); break; } case OpBitwiseOr: { auto type = get(ops[0]).basetype; BOP_CAST(|, type); break; } case OpBitwiseXor: { auto type = get(ops[0]).basetype; BOP_CAST (^, type); break; } case OpBitwiseAnd: { auto type = get(ops[0]).basetype; BOP_CAST(&, type); break; } case OpNot: UOP(~); break; case OpUMod: BOP_CAST(%, SPIRType::UInt); break; case OpSMod: BOP_CAST(%, SPIRType::Int); break; case OpFMod: BFOP(mod); break; // Relational case OpAny: UFOP(any); break; case OpAll: UFOP(all); break; case OpSelect: emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]); break; case OpLogicalOr: { // No vector variant in GLSL for logical OR. auto result_type = ops[0]; auto id = ops[1]; auto &type = get(result_type); if (type.vecsize > 1) emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||"); else BOP(||); break; } case OpLogicalAnd: { // No vector variant in GLSL for logical AND. auto result_type = ops[0]; auto id = ops[1]; auto &type = get(result_type); if (type.vecsize > 1) emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&"); else BOP(&&); break; } case OpLogicalNot: { auto &type = get(ops[0]); if (type.vecsize > 1) UFOP(not); else UOP(!); break; } case OpIEqual: { if (expression_type(ops[2]).vecsize > 1) BFOP_CAST(equal, SPIRType::Int); else BOP_CAST(==, SPIRType::Int); break; } case OpLogicalEqual: case OpFOrdEqual: { if (expression_type(ops[2]).vecsize > 1) BFOP(equal); else BOP(==); break; } case OpINotEqual: { if (expression_type(ops[2]).vecsize > 1) BFOP_CAST(notEqual, SPIRType::Int); else BOP_CAST(!=, SPIRType::Int); break; } case OpLogicalNotEqual: case OpFOrdNotEqual: { if (expression_type(ops[2]).vecsize > 1) BFOP(notEqual); else BOP(!=); break; } case OpUGreaterThan: case OpSGreaterThan: { auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) BFOP_CAST(greaterThan, type); else BOP_CAST(>, type); break; } case OpFOrdGreaterThan: { if (expression_type(ops[2]).vecsize > 1) BFOP(greaterThan); else BOP(>); break; } case OpUGreaterThanEqual: case OpSGreaterThanEqual: { auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) BFOP_CAST(greaterThanEqual, type); else BOP_CAST(>=, type); break; } case OpFOrdGreaterThanEqual: { if (expression_type(ops[2]).vecsize > 1) BFOP(greaterThanEqual); else BOP(>=); break; } case OpULessThan: case OpSLessThan: { auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) BFOP_CAST(lessThan, type); else BOP_CAST(<, type); break; } case OpFOrdLessThan: { if (expression_type(ops[2]).vecsize > 1) BFOP(lessThan); else BOP(<); break; } case OpULessThanEqual: case OpSLessThanEqual: { auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int; if (expression_type(ops[2]).vecsize > 1) BFOP_CAST(lessThanEqual, type); else BOP_CAST(<=, type); break; } case OpFOrdLessThanEqual: { if (expression_type(ops[2]).vecsize > 1) BFOP(lessThanEqual); else BOP(<=); break; } // Conversion case OpConvertFToU: case OpConvertFToS: case OpConvertSToF: case OpConvertUToF: case OpUConvert: case OpSConvert: case OpFConvert: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto func = type_to_glsl_constructor(get(result_type)); emit_unary_func_op(result_type, id, ops[2], func.c_str()); break; } case OpBitcast: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t arg = ops[2]; auto op = bitcast_glsl_op(get(result_type), expression_type(arg)); emit_unary_func_op(result_type, id, arg, op.c_str()); break; } case OpQuantizeToF16: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t arg = ops[2]; string op; auto &type = get(result_type); switch (type.vecsize) { case 1: op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x"); break; case 2: op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))"); break; case 3: { auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x"); op = join("vec3(", op0, ", ", op1, ")"); break; } case 4: { auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))"); op = join("vec4(", op0, ", ", op1, ")"); break; } default: SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); } emit_op(result_type, id, op, should_forward(arg)); break; } // Derivatives case OpDPdx: UFOP(dFdx); if (is_legacy_es()) require_extension("GL_OES_standard_derivatives"); break; case OpDPdy: UFOP(dFdy); if (is_legacy_es()) require_extension("GL_OES_standard_derivatives"); break; case OpDPdxFine: UFOP(dFdxFine); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension("GL_ARB_derivative_control"); break; case OpDPdyFine: UFOP(dFdyFine); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension("GL_ARB_derivative_control"); break; case OpDPdxCoarse: if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } UFOP(dFdxCoarse); if (options.version < 450) require_extension("GL_ARB_derivative_control"); break; case OpDPdyCoarse: UFOP(dFdyCoarse); if (options.es) { SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); } if (options.version < 450) require_extension("GL_ARB_derivative_control"); break; case OpFwidth: UFOP(fwidth); if (is_legacy_es()) require_extension("GL_OES_standard_derivatives"); break; // Bitfield case OpBitFieldInsert: // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary. QFOP(bitfieldInsert); break; case OpBitFieldSExtract: case OpBitFieldUExtract: // TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary. TFOP(bitfieldExtract); break; case OpBitReverse: UFOP(bitfieldReverse); break; case OpBitCount: UFOP(bitCount); break; // Atomics case OpAtomicExchange: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t ptr = ops[2]; // Ignore semantics for now, probably only relevant to CL. uint32_t val = ops[5]; const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; forced_temporaries.insert(id); emit_binary_func_op(result_type, id, ptr, val, op); flush_all_atomic_capable_variables(); break; } case OpAtomicCompareExchange: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; uint32_t ptr = ops[2]; uint32_t val = ops[6]; uint32_t comp = ops[7]; const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; forced_temporaries.insert(id); emit_trinary_func_op(result_type, id, ptr, comp, val, op); flush_all_atomic_capable_variables(); break; } case OpAtomicLoad: flush_all_atomic_capable_variables(); // FIXME: Image? // OpAtomicLoad seems to only be relevant for atomic counters. UFOP(atomicCounter); register_read(ops[1], ops[2], should_forward(ops[2])); break; case OpAtomicStore: SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore."); case OpAtomicIIncrement: forced_temporaries.insert(ops[1]); // FIXME: Image? UFOP(atomicCounterIncrement); flush_all_atomic_capable_variables(); register_read(ops[1], ops[2], should_forward(ops[2])); break; case OpAtomicIDecrement: forced_temporaries.insert(ops[1]); // FIXME: Image? UFOP(atomicCounterDecrement); flush_all_atomic_capable_variables(); register_read(ops[1], ops[2], should_forward(ops[2])); break; case OpAtomicIAdd: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); register_read(ops[1], ops[2], should_forward(ops[2])); break; } case OpAtomicISub: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; forced_temporaries.insert(ops[1]); auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); flush_all_atomic_capable_variables(); register_read(ops[1], ops[2], should_forward(ops[2])); break; } case OpAtomicSMin: case OpAtomicUMin: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); register_read(ops[1], ops[2], should_forward(ops[2])); break; } case OpAtomicSMax: case OpAtomicUMax: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); register_read(ops[1], ops[2], should_forward(ops[2])); break; } case OpAtomicAnd: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); register_read(ops[1], ops[2], should_forward(ops[2])); break; } case OpAtomicOr: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); register_read(ops[1], ops[2], should_forward(ops[2])); break; } case OpAtomicXor: { const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; forced_temporaries.insert(ops[1]); emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op); flush_all_atomic_capable_variables(); register_read(ops[1], ops[2], should_forward(ops[2])); break; } // Geometry shaders case OpEmitVertex: statement("EmitVertex();"); break; case OpEndPrimitive: statement("EndPrimitive();"); break; case OpEmitStreamVertex: statement("EmitStreamVertex();"); break; case OpEndStreamPrimitive: statement("EndStreamPrimitive();"); break; // Textures case OpImageSampleExplicitLod: case OpImageSampleProjExplicitLod: case OpImageSampleDrefExplicitLod: case OpImageSampleProjDrefExplicitLod: case OpImageSampleImplicitLod: case OpImageSampleProjImplicitLod: case OpImageSampleDrefImplicitLod: case OpImageSampleProjDrefImplicitLod: case OpImageFetch: case OpImageGather: case OpImageDrefGather: // Gets a bit hairy, so move this to a separate instruction. emit_texture_op(instruction); break; case OpImage: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto &e = emit_op(result_type, id, to_expression(ops[2]), true); // When using the image, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); e.loaded_from = var ? var->self : 0; break; } case OpImageQueryLod: { if (!options.es && options.version < 400) { require_extension("GL_ARB_texture_query_lod"); // For some reason, the ARB spec is all-caps. BFOP(textureQueryLOD); } else if (options.es) SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile."); else BFOP(textureQueryLod); break; } case OpImageQueryLevels: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (!options.es && options.version < 430) require_extension("GL_ARB_texture_query_levels"); if (options.es) SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile."); auto expr = join("textureQueryLevels(", to_expression(ops[2]), ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); break; } case OpImageQuerySamples: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; string expr; if (type.image.sampled == 2) expr = join("imageSamples(", to_expression(ops[2]), ")"); else expr = join("textureSamples(", to_expression(ops[2]), ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); break; } case OpSampledImage: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_sampled_image_op(result_type, id, ops[2], ops[3]); break; } case OpImageQuerySizeLod: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto expr = join("textureSize(", to_expression(ops[2]), ", ", bitcast_expression(SPIRType::Int, ops[3]), ")"); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); break; } // Image load/store case OpImageRead: { // We added Nonreadable speculatively to the OpImage variable due to glslangValidator // not adding the proper qualifiers. // If it turns out we need to read the image after all, remove the qualifier and recompile. auto *var = maybe_get_backing_variable(ops[2]); if (var) { auto &flags = meta.at(var->self).decoration.decoration_flags; if (flags & (1ull << DecorationNonReadable)) { flags &= ~(1ull << DecorationNonReadable); force_recompile = true; } } uint32_t result_type = ops[0]; uint32_t id = ops[1]; bool pure; string imgexpr; auto &type = expression_type(ops[2]); if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code { if (type.image.ms) SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible."); auto itr = find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; }); if (itr == end(pls_inputs)) { // For non-PLS inputs, we rely on subpass type remapping information to get it right // since ImageRead always returns 4-component vectors and the backing type is opaque. if (!var->remapped_components) SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); imgexpr = remap_swizzle(get(result_type), var->remapped_components, to_expression(ops[2])); } else { // PLS input could have different number of components than what the SPIR expects, swizzle to // the appropriate vector size. uint32_t components = pls_format_to_components(itr->format); imgexpr = remap_swizzle(get(result_type), components, to_expression(ops[2])); } pure = true; } else if (type.image.dim == DimSubpassData) { if (options.vulkan_semantics) { // With Vulkan semantics, use the proper Vulkan GLSL construct. if (type.image.ms) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) SPIRV_CROSS_THROW( "Multisampled image used in OpImageRead, but unexpected operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")"); } else imgexpr = join("subpassLoad(", to_expression(ops[2]), ")"); } else { if (type.image.ms) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) SPIRV_CROSS_THROW( "Multisampled image used in OpImageRead, but unexpected operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", to_expression(samples), ")"); } else { // Implement subpass loads via texture barrier style sampling. imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); } } imgexpr = remap_swizzle(get(result_type), 4, imgexpr); pure = true; } else { // imageLoad only accepts int coords, not uint. auto coord_expr = to_expression(ops[3]); auto target_coord_type = expression_type(ops[3]); target_coord_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); // Plain image load/store. if (type.image.ms) { uint32_t operands = ops[4]; if (operands != ImageOperandsSampleMask || length != 6) SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used."); uint32_t samples = ops[5]; imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); } else imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")"); imgexpr = remap_swizzle(get(result_type), 4, imgexpr); pure = false; } if (var && var->forwardable) { bool forward = forced_temporaries.find(id) == end(forced_temporaries); auto &e = emit_op(result_type, id, imgexpr, forward); // We only need to track dependencies if we're reading from image load/store. if (!pure) { e.loaded_from = var->self; if (forward) var->dependees.push_back(id); } } else emit_op(result_type, id, imgexpr, false); break; } case OpImageTexelPointer: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; auto &e = set(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true); // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); e.loaded_from = var ? var->self : 0; break; } case OpImageWrite: { // We added Nonwritable speculatively to the OpImage variable due to glslangValidator // not adding the proper qualifiers. // If it turns out we need to write to the image after all, remove the qualifier and recompile. auto *var = maybe_get_backing_variable(ops[0]); if (var) { auto &flags = meta.at(var->self).decoration.decoration_flags; if (flags & (1ull << DecorationNonWritable)) { flags &= ~(1ull << DecorationNonWritable); force_recompile = true; } } auto &type = expression_type(ops[0]); auto &value_type = expression_type(ops[2]); auto store_type = value_type; store_type.vecsize = 4; // imageStore only accepts int coords, not uint. auto coord_expr = to_expression(ops[1]); auto target_coord_type = expression_type(ops[1]); target_coord_type.basetype = SPIRType::Int; coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr); if (type.image.ms) { uint32_t operands = ops[3]; if (operands != ImageOperandsSampleMask || length != 5) SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); uint32_t samples = ops[4]; statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); } else statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); if (var && variable_storage_is_aliased(*var)) flush_all_aliased_variables(); break; } case OpImageQuerySize: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (type.basetype == SPIRType::Image) { string expr; if (type.image.sampled == 2) { // The size of an image is always constant. expr = join("imageSize(", to_expression(ops[2]), ")"); } else { // This path is hit for samplerBuffers and multisampled images which do not have LOD. expr = join("textureSize(", to_expression(ops[2]), ")"); } auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::Int, expr); emit_op(result_type, id, expr, true); } else SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); break; } // Compute case OpControlBarrier: case OpMemoryBarrier: { if (get_entry_point().model == ExecutionModelTessellationControl) { // Control shaders only have barriers, and it implies memory barriers. if (opcode == OpControlBarrier) statement("barrier();"); break; } uint32_t memory; uint32_t semantics; if (opcode == OpMemoryBarrier) { memory = get(ops[0]).scalar(); semantics = get(ops[1]).scalar(); } else { memory = get(ops[1]).scalar(); semantics = get(ops[2]).scalar(); } // We only care about these flags, acquire/release and friends are not relevant to GLSL. semantics = mask_relevant_memory_semantics(semantics); if (opcode == OpMemoryBarrier) { // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier // does what we need, so we avoid redundant barriers. const Instruction *next = get_next_instruction_in_block(instruction); if (next && next->op == OpControlBarrier) { auto *next_ops = stream(*next); uint32_t next_memory = get(next_ops[1]).scalar(); uint32_t next_semantics = get(next_ops[2]).scalar(); next_semantics = mask_relevant_memory_semantics(next_semantics); bool memory_scope_covered = false; if (next_memory == memory) memory_scope_covered = true; else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) { // If we only care about workgroup memory, either Device or Workgroup scope is fine, // scope does not have to match. if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && (memory == ScopeDevice || memory == ScopeWorkgroup)) { memory_scope_covered = true; } } else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) { // The control barrier has device scope, but the memory barrier just has workgroup scope. memory_scope_covered = true; } // If we have the same memory scope, and all memory types are covered, we're good. if (memory_scope_covered && (semantics & next_semantics) == semantics) break; } } // We are synchronizing some memory or syncing execution, // so we cannot forward any loads beyond the memory barrier. if (semantics || opcode == OpControlBarrier) flush_all_active_variables(); if (memory == ScopeWorkgroup) // Only need to consider memory within a group { if (semantics == MemorySemanticsWorkgroupMemoryMask) statement("memoryBarrierShared();"); else if (semantics != 0) statement("groupMemoryBarrier();"); } else { const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask | MemorySemanticsAtomicCounterMemoryMask; if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) { // These are not relevant for GLSL, but assume it means memoryBarrier(). // memoryBarrier() does everything, so no need to test anything else. statement("memoryBarrier();"); } else if ((semantics & all_barriers) == all_barriers) { // Short-hand instead of emitting 4 barriers. statement("memoryBarrier();"); } else { // Pick out individual barriers. if (semantics & MemorySemanticsWorkgroupMemoryMask) statement("memoryBarrierShared();"); if (semantics & MemorySemanticsUniformMemoryMask) statement("memoryBarrierBuffer();"); if (semantics & MemorySemanticsImageMemoryMask) statement("memoryBarrierImage();"); if (semantics & MemorySemanticsAtomicCounterMemoryMask) statement("memoryBarrierAtomicCounter();"); } } if (opcode == OpControlBarrier) statement("barrier();"); break; } case OpExtInst: { uint32_t extension_set = ops[2]; if (get(extension_set).ext == SPIRExtension::GLSL) { emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot) { emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) { emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (get(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) { emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else if (get(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader) { emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); } else { statement("// unimplemented ext op ", instruction.op); break; } break; } case OpSubgroupBallotKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; string expr; expr = join("unpackUint2x32(ballotARB(" + to_expression(ops[2]) + "))"); emit_op(result_type, id, expr, true); require_extension("GL_ARB_shader_ballot"); break; } case OpSubgroupFirstInvocationKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB"); require_extension("GL_ARB_shader_ballot"); break; } case OpSubgroupReadInvocationKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB"); require_extension("GL_ARB_shader_ballot"); break; } case OpSubgroupAllKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB"); require_extension("GL_ARB_shader_group_vote"); break; } case OpSubgroupAnyKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB"); require_extension("GL_ARB_shader_group_vote"); break; } case OpSubgroupAllEqualKHR: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB"); require_extension("GL_ARB_shader_group_vote"); break; } case OpGroupIAddNonUniformAMD: case OpGroupFAddNonUniformAMD: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD"); require_extension("GL_AMD_shader_ballot"); break; } case OpGroupFMinNonUniformAMD: case OpGroupUMinNonUniformAMD: case OpGroupSMinNonUniformAMD: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD"); require_extension("GL_AMD_shader_ballot"); break; } case OpGroupFMaxNonUniformAMD: case OpGroupUMaxNonUniformAMD: case OpGroupSMaxNonUniformAMD: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD"); require_extension("GL_AMD_shader_ballot"); break; } case OpFragmentMaskFetchAMD: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (type.image.dim == spv::DimSubpassData) { emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD"); } else { emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD"); } require_extension("GL_AMD_shader_fragment_mask"); break; } case OpFragmentFetchAMD: { auto &type = expression_type(ops[2]); uint32_t result_type = ops[0]; uint32_t id = ops[1]; if (type.image.dim == spv::DimSubpassData) { emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD"); } else { emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD"); } require_extension("GL_AMD_shader_fragment_mask"); break; } default: statement("// unimplemented op ", instruction.op); break; } } // Appends function arguments, mapped from global variables, beyond the specified arg index. // This is used when a function call uses fewer arguments than the function defines. // This situation may occur if the function signature has been dynamically modified to // extract global variables referenced from within the function, and convert them to // function arguments. This is necessary for shader languages that do not support global // access to shader input content from within a function (eg. Metal). Each additional // function args uses the name of the global variable. Function nesting will modify the // functions and function calls all the way up the nesting chain. void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, vector &arglist) { auto &args = func.arguments; uint32_t arg_cnt = uint32_t(args.size()); for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) { auto &arg = args[arg_idx]; assert(arg.alias_global_variable); arglist.push_back(to_func_call_arg(arg.id)); // If the underlying variable needs to be declared // (ie. a local variable with deferred declaration), do so now. uint32_t var_id = get(arg.id).basevariable; if (var_id) flush_variable_declaration(var_id); } } string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) { auto &memb = meta[type.self].members; if (index < memb.size() && !memb[index].alias.empty()) return memb[index].alias; else return join("_m", index); } void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) { auto &memb = meta[type.self].members; if (index < memb.size() && !memb[index].alias.empty()) { auto &name = memb[index].alias; if (name.empty()) return; // Reserved for temporaries. if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) { name.clear(); return; } update_name_cache(type.member_name_cache, name); } } // Checks whether the ID is a row_major matrix that requires conversion before use bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) { // Natively supported row-major matrices do not need to be converted. // Legacy targets do not support row major. if (backend.native_row_major_matrix && !is_legacy()) return false; // Non-matrix or column-major matrix types do not need to be converted. if (!(meta[id].decoration.decoration_flags & (1ull << DecorationRowMajor))) return false; // Only square row-major matrices can be converted at this time. // Converting non-square matrices will require defining custom GLSL function that // swaps matrix elements while retaining the original dimensional form of the matrix. const auto type = expression_type(id); if (type.columns != type.vecsize) SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); return true; } // Checks whether the member is a row_major matrix that requires conversion before use bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) { // Natively supported row-major matrices do not need to be converted. if (backend.native_row_major_matrix && !is_legacy()) return false; // Non-matrix or column-major matrix types do not need to be converted. if (!(combined_decoration_for_member(type, index) & (1ull << DecorationRowMajor))) return false; // Only square row-major matrices can be converted at this time. // Converting non-square matrices will require defining custom GLSL function that // swaps matrix elements while retaining the original dimensional form of the matrix. const auto mbr_type = get(type.member_types[index]); if (mbr_type.columns != mbr_type.vecsize) SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); return true; } // Checks whether the member is in packed data type, that might need to be unpacked. // GLSL does not define packed data types, but certain subclasses do. bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const { return has_member_decoration(type.self, index, DecorationCPacked); } // Wraps the expression string in a function call that converts the // row_major matrix result of the expression to a column_major matrix. // Base implementation uses the standard library transpose() function. // Subclasses may override to use a different function. string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/) { strip_enclosed_expression(exp_str); return join("transpose(", exp_str, ")"); } string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) { string type_name = type_to_glsl(type, id); remap_variable_type_name(type, name, type_name); return join(type_name, " ", name, type_to_array_glsl(type)); } // Emit a structure member. Subclasses may override to modify output, // or to dynamically add a padding member if needed. void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const string &qualifier, uint32_t) { auto &membertype = get(member_type_id); uint64_t memberflags = 0; auto &memb = meta[type.self].members; if (index < memb.size()) memberflags = memb[index].decoration_flags; string qualifiers; bool is_block = (meta[type.self].decoration.decoration_flags & ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0; if (is_block) qualifiers = to_interpolation_qualifiers(memberflags); statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_precision_qualifiers_glsl(membertype, memberflags), variable_decl(membertype, to_member_name(type, index)), ";"); } const char *CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags) { // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler) return ""; if (options.es) { auto &execution = get_entry_point(); if (flags & (1ull << DecorationRelaxedPrecision)) { bool implied_fmediump = type.basetype == SPIRType::Float && options.fragment.default_float_precision == Options::Mediump && execution.model == ExecutionModelFragment; bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && options.fragment.default_int_precision == Options::Mediump && execution.model == ExecutionModelFragment; return implied_fmediump || implied_imediump ? "" : "mediump "; } else { bool implied_fhighp = type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment)); bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && ((options.fragment.default_int_precision == Options::Highp && execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment)); return implied_fhighp || implied_ihighp ? "" : "highp "; } } else if (backend.allow_precision_qualifiers) { // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. // The default is highp however, so only emit mediump in the rare case that a shader has these. if (flags & (1ull << DecorationRelaxedPrecision)) { bool can_use_mediump = type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt; return can_use_mediump ? "mediump " : ""; } else return ""; } else return ""; } const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) { return flags_to_precision_qualifiers_glsl(expression_type(id), meta[id].decoration.decoration_flags); } string CompilerGLSL::to_qualifiers_glsl(uint32_t id) { auto flags = meta[id].decoration.decoration_flags; string res; auto *var = maybe_get(id); if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) res += "shared "; res += to_interpolation_qualifiers(flags); if (var) res += to_storage_qualifiers_glsl(*var); auto &type = expression_type(id); if (type.image.dim != DimSubpassData && type.image.sampled == 2) { if (flags & (1ull << DecorationCoherent)) res += "coherent "; if (flags & (1ull << DecorationRestrict)) res += "restrict "; if (flags & (1ull << DecorationNonWritable)) res += "readonly "; if (flags & (1ull << DecorationNonReadable)) res += "writeonly "; } res += to_precision_qualifiers_glsl(id); return res; } string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) { // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... auto &type = expression_type(arg.id); const char *direction = ""; if (type.pointer) { if (arg.write_count && arg.read_count) direction = "inout "; else if (arg.write_count) direction = "out "; } return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id)); } string CompilerGLSL::variable_decl(const SPIRVariable &variable) { // Ignore the pointer type since GLSL doesn't have pointers. auto &type = get(variable.basetype); auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self)); if (variable.loop_variable && variable.static_expression) { uint32_t expr = variable.static_expression; if (ids[expr].get_type() != TypeUndef) res += join(" = ", to_expression(variable.static_expression)); } else if (variable.initializer) { uint32_t expr = variable.initializer; if (ids[expr].get_type() != TypeUndef) res += join(" = ", to_expression(variable.initializer)); } return res; } const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) { auto flags = meta[variable.self].decoration.decoration_flags; if (flags & (1ull << DecorationRelaxedPrecision)) return "mediump "; else return "highp "; } string CompilerGLSL::pls_decl(const PlsRemap &var) { auto &variable = get(var.id); SPIRType type; type.vecsize = pls_format_to_components(var.format); type.basetype = pls_format_to_basetype(var.format); return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", to_name(variable.self)); } uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const { assert(type.array.size() == type.array_size_literal.size()); if (!type.array_size_literal[index]) SPIRV_CROSS_THROW("The array size is not a literal, but a specialization constant or spec constant op."); return type.array[index]; } string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) { assert(type.array.size() == type.array_size_literal.size()); // Tessellation control shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays. // Opt for unsized as it's the more "correct" variant to use. if (type.storage == StorageClassInput && get_entry_point().model == ExecutionModelTessellationControl) return ""; auto &size = type.array[index]; if (!type.array_size_literal[index]) return to_expression(size); else if (size) return convert_to_string(size); else if (!backend.flexible_member_array_supported) { // For runtime-sized arrays, we can work around // lack of standard support for this by simply having // a single element array. // // Runtime length arrays must always be the last element // in an interface block. return "1"; } else return ""; } string CompilerGLSL::type_to_array_glsl(const SPIRType &type) { if (type.array.empty()) return ""; if (options.flatten_multidimensional_arrays) { string res; res += "["; for (auto i = uint32_t(type.array.size()); i; i--) { res += enclose_expression(to_array_size(type, i - 1)); if (i > 1) res += " * "; } res += "]"; return res; } else { if (type.array.size() > 1) { if (!options.es && options.version < 430) require_extension("GL_ARB_arrays_of_arrays"); else if (options.es && options.version < 310) SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. " "Try using --flatten-multidimensional-arrays or set " "options.flatten_multidimensional_arrays to true."); } string res; for (auto i = uint32_t(type.array.size()); i; i--) { res += "["; res += to_array_size(type, i - 1); res += "]"; } return res; } } string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t /* id */) { auto &imagetype = get(type.image.type); string res; switch (imagetype.basetype) { case SPIRType::Int: res = "i"; break; case SPIRType::UInt: res = "u"; break; default: break; } if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) return res + "subpassInput" + (type.image.ms ? "MS" : ""); // If we're emulating subpassInput with samplers, force sampler2D // so we don't have to specify format. if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) { // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. if (type.image.dim == DimBuffer && type.image.sampled == 1) res += "sampler"; else res += type.image.sampled == 2 ? "image" : "texture"; } else res += "sampler"; switch (type.image.dim) { case Dim1D: res += "1D"; break; case Dim2D: res += "2D"; break; case Dim3D: res += "3D"; break; case DimCube: res += "Cube"; break; case DimBuffer: if (options.es && options.version < 320) require_extension("GL_OES_texture_buffer"); else if (!options.es && options.version < 300) require_extension("GL_EXT_texture_buffer_object"); res += "Buffer"; break; case DimSubpassData: res += "2D"; break; default: SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported."); } if (type.image.ms) res += "MS"; if (type.image.arrayed) { if (is_legacy_desktop()) require_extension("GL_EXT_texture_array"); res += "Array"; } // "Shadow" state in GLSL only exists for samplers and combined image samplers. if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && type.image.depth) res += "Shadow"; return res; } string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) { if (type.array.size() > 1) { if (options.flatten_multidimensional_arrays) SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]()."); else if (!options.es && options.version < 430) require_extension("GL_ARB_arrays_of_arrays"); else if (options.es && options.version < 310) SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310."); } auto e = type_to_glsl(type); for (uint32_t i = 0; i < type.array.size(); i++) e += "[]"; return e; } // The optional id parameter indicates the object whose type we are trying // to find the description for. It is optional. Most type descriptions do not // depend on a specific object's use of that type. string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) { // Ignore the pointer type since GLSL doesn't have pointers. switch (type.basetype) { case SPIRType::Struct: // Need OpName lookup here to get a "sensible" name for a struct. if (backend.explicit_struct_type) return join("struct ", to_name(type.self)); else return to_name(type.self); case SPIRType::Image: case SPIRType::SampledImage: return image_type_glsl(type, id); case SPIRType::Sampler: // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing // this distinction into the type system. return comparison_samplers.count(id) ? "samplerShadow" : "sampler"; case SPIRType::Void: return "void"; default: break; } if (type.vecsize == 1 && type.columns == 1) // Scalar builtin { switch (type.basetype) { case SPIRType::Boolean: return "bool"; case SPIRType::Int: return backend.basic_int_type; case SPIRType::UInt: return backend.basic_uint_type; case SPIRType::AtomicCounter: return "atomic_uint"; case SPIRType::Float: return "float"; case SPIRType::Double: return "double"; case SPIRType::Int64: return "int64_t"; case SPIRType::UInt64: return "uint64_t"; default: return "???"; } } else if (type.vecsize > 1 && type.columns == 1) // Vector builtin { switch (type.basetype) { case SPIRType::Boolean: return join("bvec", type.vecsize); case SPIRType::Int: return join("ivec", type.vecsize); case SPIRType::UInt: return join("uvec", type.vecsize); case SPIRType::Float: return join("vec", type.vecsize); case SPIRType::Double: return join("dvec", type.vecsize); case SPIRType::Int64: return join("i64vec", type.vecsize); case SPIRType::UInt64: return join("u64vec", type.vecsize); default: return "???"; } } else if (type.vecsize == type.columns) // Simple Matrix builtin { switch (type.basetype) { case SPIRType::Boolean: return join("bmat", type.vecsize); case SPIRType::Int: return join("imat", type.vecsize); case SPIRType::UInt: return join("umat", type.vecsize); case SPIRType::Float: return join("mat", type.vecsize); case SPIRType::Double: return join("dmat", type.vecsize); // Matrix types not supported for int64/uint64. default: return "???"; } } else { switch (type.basetype) { case SPIRType::Boolean: return join("bmat", type.columns, "x", type.vecsize); case SPIRType::Int: return join("imat", type.columns, "x", type.vecsize); case SPIRType::UInt: return join("umat", type.columns, "x", type.vecsize); case SPIRType::Float: return join("mat", type.columns, "x", type.vecsize); case SPIRType::Double: return join("dmat", type.columns, "x", type.vecsize); // Matrix types not supported for int64/uint64. default: return "???"; } } } void CompilerGLSL::add_variable(unordered_set &variables, string &name) { if (name.empty()) return; // Reserved for temporaries. if (name[0] == '_' && name.size() >= 2 && isdigit(name[1])) { name.clear(); return; } update_name_cache(variables, name); } void CompilerGLSL::add_variable(unordered_set &variables, uint32_t id) { auto &name = meta[id].decoration.alias; add_variable(variables, name); } void CompilerGLSL::add_local_variable_name(uint32_t id) { add_variable(local_variable_names, id); } void CompilerGLSL::add_resource_name(uint32_t id) { add_variable(resource_names, id); } void CompilerGLSL::add_header_line(const std::string &line) { header_lines.push_back(line); } bool CompilerGLSL::has_extension(const std::string &ext) const { auto itr = find(begin(forced_extensions), end(forced_extensions), ext); return itr != end(forced_extensions); } void CompilerGLSL::require_extension(const string &ext) { if (!has_extension(ext)) { forced_extensions.push_back(ext); force_recompile = true; } } void CompilerGLSL::flatten_buffer_block(uint32_t id) { auto &var = get(id); auto &type = get(var.basetype); auto name = to_name(type.self, false); auto flags = meta.at(type.self).decoration.decoration_flags; if (!type.array.empty()) SPIRV_CROSS_THROW(name + " is an array of UBOs."); if (type.basetype != SPIRType::Struct) SPIRV_CROSS_THROW(name + " is not a struct."); if ((flags & (1ull << DecorationBlock)) == 0) SPIRV_CROSS_THROW(name + " is not a block."); if (type.member_types.empty()) SPIRV_CROSS_THROW(name + " is an empty struct."); flattened_buffer_blocks.insert(id); } bool CompilerGLSL::check_atomic_image(uint32_t id) { auto &type = expression_type(id); if (type.storage == StorageClassImage) { if (options.es && options.version < 320) require_extension("GL_OES_shader_image_atomic"); auto *var = maybe_get_backing_variable(id); if (var) { auto &flags = meta.at(var->self).decoration.decoration_flags; if (flags & ((1ull << DecorationNonWritable) | (1ull << DecorationNonReadable))) { flags &= ~(1ull << DecorationNonWritable); flags &= ~(1ull << DecorationNonReadable); force_recompile = true; } } return true; } else return false; } void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_flags) { // Avoid shadow declarations. local_variable_names = resource_names; string decl; auto &type = get(func.return_type); decl += flags_to_precision_qualifiers_glsl(type, return_flags); decl += type_to_glsl(type); decl += type_to_array_glsl(type); decl += " "; if (func.self == entry_point) { decl += "main"; processing_entry_point = true; } else decl += to_name(func.self); decl += "("; vector arglist; for (auto &arg : func.arguments) { // Do not pass in separate images or samplers if we're remapping // to combined image samplers. if (skip_argument(arg.id)) continue; // Might change the variable name if it already exists in this function. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation // to use same name for variables. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. add_local_variable_name(arg.id); arglist.push_back(argument_decl(arg)); // Hold a pointer to the parameter so we can invalidate the readonly field if needed. auto *var = maybe_get(arg.id); if (var) var->parameter = &arg; } for (auto &arg : func.shadow_arguments) { // Might change the variable name if it already exists in this function. // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation // to use same name for variables. // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. add_local_variable_name(arg.id); arglist.push_back(argument_decl(arg)); // Hold a pointer to the parameter so we can invalidate the readonly field if needed. auto *var = maybe_get(arg.id); if (var) var->parameter = &arg; } decl += merge(arglist); decl += ")"; statement(decl); } void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags) { // Avoid potential cycles. if (func.active) return; func.active = true; // If we depend on a function, emit that function before we emit our own function. for (auto block : func.blocks) { auto &b = get(block); for (auto &i : b.ops) { auto ops = stream(i); auto op = static_cast(i.op); if (op == OpFunctionCall) { // Recursively emit functions which are called. uint32_t id = ops[2]; emit_function(get(id), meta[ops[1]].decoration.decoration_flags); } } } emit_function_prototype(func, return_flags); begin_scope(); current_function = &func; auto &entry_block = get(func.entry_block); if (!func.analyzed_variable_scope) { analyze_variable_scope(func); // Check if we can actually use the loop variables we found in analyze_variable_scope. // To use multiple initializers, we need the same type and qualifiers. for (auto block : func.blocks) { auto &b = get(block); if (b.loop_variables.size() < 2) continue; uint64_t flags = get_decoration_mask(b.loop_variables.front()); uint32_t type = get(b.loop_variables.front()).basetype; bool invalid_initializers = false; for (auto loop_variable : b.loop_variables) { if (flags != get_decoration_mask(loop_variable) || type != get(b.loop_variables.front()).basetype) { invalid_initializers = true; break; } } if (invalid_initializers) { for (auto loop_variable : b.loop_variables) get(loop_variable).loop_variable = false; b.loop_variables.clear(); } } func.analyzed_variable_scope = true; } for (auto &v : func.local_variables) { auto &var = get(v); if (expression_is_lvalue(v)) { add_local_variable_name(var.self); if (var.initializer) statement(variable_decl_function_local(var), ";"); else { // Don't declare variable until first use to declutter the GLSL output quite a lot. // If we don't touch the variable before first branch, // declare it then since we need variable declaration to be in top scope. // Never declare empty structs. They have no meaningful representation. auto &type = get(var.basetype); bool empty_struct = type.basetype == SPIRType::Struct && type.member_types.empty(); var.deferred_declaration = !empty_struct; } } else { // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. // For these types (non-lvalue), we enforce forwarding through a shadowed variable. // This means that when we OpStore to these variables, we just write in the expression ID directly. // This breaks any kind of branching, since the variable must be statically assigned. // Branching on samplers and images would be pretty much impossible to fake in GLSL. var.statically_assigned = true; } var.loop_variable_enable = false; // Loop variables are never declared outside their for-loop, so block any implicit declaration. if (var.loop_variable) var.deferred_declaration = false; } entry_block.loop_dominator = SPIRBlock::NoDominator; emit_block_chain(entry_block); end_scope(); processing_entry_point = false; statement(""); } void CompilerGLSL::emit_fixup() { auto &execution = get_entry_point(); if (execution.model == ExecutionModelVertex) { if (options.vertex.fixup_clipspace) { const char *suffix = backend.float_literal_suffix ? "f" : ""; statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;"); } if (options.vertex.flip_vert_y) statement("gl_Position.y = -gl_Position.y;"); } } bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to) { auto &child = get(to); for (auto &phi : child.phi_variables) if (phi.parent == from) return true; return false; } void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) { auto &child = get(to); for (auto &phi : child.phi_variables) { if (phi.parent == from) { auto &var = get(phi.function_variable); // A Phi variable might be a loop variable, so flush to static expression. if (var.loop_variable && !var.loop_variable_enable) var.static_expression = phi.local_variable; else { flush_variable_declaration(phi.function_variable); // This might be called in continue block, so make sure we // use this to emit ESSL 1.0 compliant increments/decrements. auto lhs = to_expression(phi.function_variable); auto rhs = to_expression(phi.local_variable); if (!optimize_read_modify_write(lhs, rhs)) statement(lhs, " = ", rhs, ";"); } } } } void CompilerGLSL::branch(uint32_t from, uint32_t to) { flush_phi(from, to); flush_all_active_variables(); // This is only a continue if we branch to our loop dominator. if (loop_blocks.find(to) != end(loop_blocks) && get(from).loop_dominator == to) { // This can happen if we had a complex continue block which was emitted. // Once the continue block tries to branch to the loop header, just emit continue; // and end the chain here. statement("continue;"); } else if (is_continue(to)) { auto &to_block = get(to); if (to_block.complex_continue) { // Just emit the whole block chain as is. auto usage_counts = expression_usage_counts; auto invalid = invalid_expressions; emit_block_chain(to_block); // Expression usage counts and invalid expressions // are moot after returning from the continue block. // Since we emit the same block multiple times, // we don't want to invalidate ourselves. expression_usage_counts = usage_counts; invalid_expressions = invalid; } else { auto &from_block = get(from); bool outside_control_flow = false; uint32_t loop_dominator = 0; // FIXME: Refactor this to not use the old loop_dominator tracking. if (from_block.merge_block) { // If we are a loop header, we don't set the loop dominator, // so just use "self" here. loop_dominator = from; } else if (from_block.loop_dominator != SPIRBlock::NoDominator) { loop_dominator = from_block.loop_dominator; } if (loop_dominator != 0) { auto &dominator = get(loop_dominator); // For non-complex continue blocks, we implicitly branch to the continue block // by having the continue block be part of the loop header in for (; ; continue-block). outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block); } // Some simplification for for-loops. We always end up with a useless continue; // statement since we branch to a loop block. // Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block, // we can avoid writing out an explicit continue statement. // Similar optimization to return statements if we know we're outside flow control. if (!outside_control_flow) statement("continue;"); } } else if (is_break(to)) statement("break;"); else if (!is_conditional(to)) emit_block_chain(get(to)); } void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block) { // If we branch directly to a selection merge target, we don't really need a code path. bool true_sub = !is_conditional(true_block); bool false_sub = !is_conditional(false_block); if (true_sub) { statement("if (", to_expression(cond), ")"); begin_scope(); branch(from, true_block); end_scope(); if (false_sub) { statement("else"); begin_scope(); branch(from, false_block); end_scope(); } else if (flush_phi_required(from, false_block)) { statement("else"); begin_scope(); flush_phi(from, false_block); end_scope(); } } else if (false_sub && !true_sub) { // Only need false path, use negative conditional. statement("if (!", to_expression(cond), ")"); begin_scope(); branch(from, false_block); end_scope(); if (flush_phi_required(from, true_block)) { statement("else"); begin_scope(); flush_phi(from, true_block); end_scope(); } } } void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block) { // Propagate down the loop dominator block, so that dominated blocks can back trace. if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator) { uint32_t dominator = block.merge == SPIRBlock::MergeLoop ? block.self : block.loop_dominator; auto set_dominator = [this](uint32_t self, uint32_t new_dominator) { auto &dominated_block = this->get(self); // If we already have a loop dominator, we're trying to break out to merge targets // which should not update the loop dominator. if (!dominated_block.loop_dominator) dominated_block.loop_dominator = new_dominator; }; // After merging a loop, we inherit the loop dominator always. if (block.merge_block) set_dominator(block.merge_block, block.loop_dominator); if (block.true_block) set_dominator(block.true_block, dominator); if (block.false_block) set_dominator(block.false_block, dominator); if (block.next_block) set_dominator(block.next_block, dominator); for (auto &c : block.cases) set_dominator(c.block, dominator); // In older glslang output continue_block can be == loop header. if (block.continue_block && block.continue_block != block.self) set_dominator(block.continue_block, dominator); } } // FIXME: This currently cannot handle complex continue blocks // as in do-while. // This should be seen as a "trivial" continue block. string CompilerGLSL::emit_continue_block(uint32_t continue_block) { auto *block = &get(continue_block); // While emitting the continue block, declare_temporary will check this // if we have to emit temporaries. current_continue_block = block; vector statements; // Capture all statements into our list. auto *old = redirect_statement; redirect_statement = &statements; // Stamp out all blocks one after each other. while (loop_blocks.find(block->self) == end(loop_blocks)) { propagate_loop_dominators(*block); // Write out all instructions we have in this block. emit_block_instructions(*block); // For plain branchless for/while continue blocks. if (block->next_block) { flush_phi(continue_block, block->next_block); block = &get(block->next_block); } // For do while blocks. The last block will be a select block. else if (block->true_block) { flush_phi(continue_block, block->true_block); block = &get(block->true_block); } } // Restore old pointer. redirect_statement = old; // Somewhat ugly, strip off the last ';' since we use ',' instead. // Ideally, we should select this behavior in statement(). for (auto &s : statements) { if (!s.empty() && s.back() == ';') s.erase(s.size() - 1, 1); } current_continue_block = nullptr; return merge(statements); } string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) { if (block.loop_variables.empty()) return ""; bool same_types = for_loop_initializers_are_same_type(block); // We can only declare for loop initializers if all variables are of same type. // If we cannot do this, declare individual variables before the loop header. // We might have a loop variable candidate which was not assigned to for some reason. uint32_t missing_initializers = 0; for (auto &variable : block.loop_variables) { uint32_t expr = get(variable).static_expression; // Sometimes loop variables are initialized with OpUndef, but we can just declare // a plain variable without initializer in this case. if (expr == 0 || ids[expr].get_type() == TypeUndef) missing_initializers++; } if (block.loop_variables.size() == 1 && missing_initializers == 0) { return variable_decl(get(block.loop_variables.front())); } else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size())) { for (auto &loop_var : block.loop_variables) statement(variable_decl(get(loop_var)), ";"); return ""; } else { // We have a mix of loop variables, either ones with a clear initializer, or ones without. // Separate the two streams. string expr; for (auto &loop_var : block.loop_variables) { uint32_t static_expr = get(loop_var).static_expression; if (static_expr == 0 || ids[static_expr].get_type() == TypeUndef) { statement(variable_decl(get(loop_var)), ";"); } else { if (expr.empty()) { // For loop initializers are of the form (block.loop_variables.front()); auto &type = get(var.basetype); expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " "); } else expr += ", "; auto &v = get(loop_var); expr += join(to_name(loop_var), " = ", to_expression(v.static_expression)); } } return expr; } } bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block) { if (block.loop_variables.size() <= 1) return true; uint32_t expected = 0; uint64_t expected_flags = 0; for (auto &var : block.loop_variables) { // Don't care about uninitialized variables as they will not be part of the initializers. uint32_t expr = get(var).static_expression; if (expr == 0 || ids[expr].get_type() == TypeUndef) continue; if (expected == 0) { expected = get(var).basetype; expected_flags = get_decoration_mask(var); } else if (expected != get(var).basetype) return false; // Precision flags and things like that must also match. if (expected_flags != get_decoration_mask(var)) return false; } return true; } bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method) { SPIRBlock::ContinueBlockType continue_type = continue_block_type(get(block.continue_block)); if (method == SPIRBlock::MergeToSelectForLoop) { uint32_t current_count = statement_count; // If we're trying to create a true for loop, // we need to make sure that all opcodes before branch statement do not actually emit any code. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. emit_block_instructions(block); bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); // This can work! We only did trivial things which could be forwarded in block body! if (current_count == statement_count && condition_is_temporary) { switch (continue_type) { case SPIRBlock::ForLoop: { // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. flush_undeclared_variables(block); // Important that we do this in this order because // emitting the continue block can invalidate the condition expression. auto initializer = emit_for_loop_initializers(block); auto condition = to_expression(block.condition); auto continue_block = emit_continue_block(block.continue_block); statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); break; } case SPIRBlock::WhileLoop: // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header. flush_undeclared_variables(block); statement("while (", to_expression(block.condition), ")"); break; default: SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics."); } begin_scope(); return true; } else { block.disable_block_optimization = true; force_recompile = true; begin_scope(); // We'll see an end_scope() later. return false; } } else if (method == SPIRBlock::MergeToDirectForLoop) { auto &child = get(block.next_block); // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. flush_undeclared_variables(child); uint32_t current_count = statement_count; // If we're trying to create a true for loop, // we need to make sure that all opcodes before branch statement do not actually emit any code. // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. emit_block_instructions(child); bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); if (current_count == statement_count && condition_is_temporary) { propagate_loop_dominators(child); switch (continue_type) { case SPIRBlock::ForLoop: { // Important that we do this in this order because // emitting the continue block can invalidate the condition expression. auto initializer = emit_for_loop_initializers(block); auto condition = to_expression(child.condition); auto continue_block = emit_continue_block(block.continue_block); statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); break; } case SPIRBlock::WhileLoop: statement("while (", to_expression(child.condition), ")"); break; default: SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics."); } begin_scope(); branch(child.self, child.true_block); return true; } else { block.disable_block_optimization = true; force_recompile = true; begin_scope(); // We'll see an end_scope() later. return false; } } else return false; } void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) { // Enforce declaration order for regression testing purposes. sort(begin(block.dominated_variables), end(block.dominated_variables)); for (auto &v : block.dominated_variables) { auto &var = get(v); if (var.deferred_declaration) statement(variable_decl(var), ";"); var.deferred_declaration = false; } } void CompilerGLSL::emit_block_chain(SPIRBlock &block) { propagate_loop_dominators(block); bool select_branch_to_true_block = false; bool skip_direct_branch = false; bool emitted_for_loop_header = false; // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. // Need to sort these to ensure that reference output is stable. sort(begin(block.declare_temporary), end(block.declare_temporary), [](const pair &a, const pair &b) { return a.second < b.second; }); for (auto &tmp : block.declare_temporary) { auto flags = meta[tmp.second].decoration.decoration_flags; auto &type = get(tmp.first); statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";"); } SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; if (block.continue_block) continue_type = continue_block_type(get(block.continue_block)); // If we have loop variables, stop masking out access to the variable now. for (auto var : block.loop_variables) get(var).loop_variable_enable = true; // This is the older loop behavior in glslang which branches to loop body directly from the loop header. if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop)) { flush_undeclared_variables(block); if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop)) { // The body of while, is actually just the true block, so always branch there unconditionally. select_branch_to_true_block = true; emitted_for_loop_header = true; } } // This is the newer loop behavior in glslang which branches from Loop header directly to // a new block, which in turn has a OpBranchSelection without a selection merge. else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop)) { flush_undeclared_variables(block); if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop)) { skip_direct_branch = true; emitted_for_loop_header = true; } } else if (continue_type == SPIRBlock::DoWhileLoop) { flush_undeclared_variables(block); statement("do"); begin_scope(); emit_block_instructions(block); } else if (block.merge == SPIRBlock::MergeLoop) { flush_undeclared_variables(block); // We have a generic loop without any distinguishable pattern like for, while or do while. get(block.continue_block).complex_continue = true; continue_type = SPIRBlock::ComplexLoop; statement("for (;;)"); begin_scope(); emit_block_instructions(block); } else { emit_block_instructions(block); } // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem // as writes to said loop variables might have been masked out, we need a recompile. if (!emitted_for_loop_header && !block.loop_variables.empty()) { force_recompile = true; for (auto var : block.loop_variables) get(var).loop_variable = false; block.loop_variables.clear(); } flush_undeclared_variables(block); bool emit_next_block = true; // Handle end of block. switch (block.terminator) { case SPIRBlock::Direct: // True when emitting complex continue block. if (block.loop_dominator == block.next_block) { branch(block.self, block.next_block); emit_next_block = false; } // True if MergeToDirectForLoop succeeded. else if (skip_direct_branch) emit_next_block = false; else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block)) { branch(block.self, block.next_block); emit_next_block = false; } break; case SPIRBlock::Select: // True if MergeToSelectForLoop succeeded. if (select_branch_to_true_block) branch(block.self, block.true_block); else branch(block.self, block.condition, block.true_block, block.false_block); break; case SPIRBlock::MultiSelect: { auto &type = expression_type(block.condition); bool uint32_t_case = type.basetype == SPIRType::UInt; statement("switch (", to_expression(block.condition), ")"); begin_scope(); for (auto &c : block.cases) { auto case_value = uint32_t_case ? convert_to_string(uint32_t(c.value)) : convert_to_string(int32_t(c.value)); statement("case ", case_value, ":"); begin_scope(); branch(block.self, c.block); end_scope(); } if (block.default_block != block.next_block) { statement("default:"); begin_scope(); if (is_break(block.default_block)) SPIRV_CROSS_THROW("Cannot break; out of a switch statement and out of a loop at the same time ..."); branch(block.self, block.default_block); end_scope(); } else if (flush_phi_required(block.self, block.next_block)) { statement("default:"); begin_scope(); flush_phi(block.self, block.next_block); statement("break;"); end_scope(); } end_scope(); break; } case SPIRBlock::Return: if (processing_entry_point) emit_fixup(); if (block.return_value) { auto &type = expression_type(block.return_value); if (!type.array.empty() && !backend.can_return_array) { // If we cannot return arrays, we will have a special out argument we can write to instead. // The backend is responsible for setting this up, and redirection the return values as appropriate. if (ids.at(block.return_value).get_type() != TypeUndef) emit_array_copy("SPIRV_Cross_return_value", block.return_value); if (!block_is_outside_flow_control_from_block(get(current_function->entry_block), block) || block.loop_dominator != SPIRBlock::NoDominator) { statement("return;"); } } else { // OpReturnValue can return Undef, so don't emit anything for this case. if (ids.at(block.return_value).get_type() != TypeUndef) statement("return ", to_expression(block.return_value), ";"); } } // If this block is the very final block and not called from control flow, // we do not need an explicit return which looks out of place. Just end the function here. // In the very weird case of for(;;) { return; } executing return is unconditional, // but we actually need a return here ... else if (!block_is_outside_flow_control_from_block(get(current_function->entry_block), block) || block.loop_dominator != SPIRBlock::NoDominator) { statement("return;"); } break; case SPIRBlock::Kill: statement(backend.discard_literal, ";"); break; case SPIRBlock::Unreachable: emit_next_block = false; break; default: SPIRV_CROSS_THROW("Unimplemented block terminator."); } if (block.next_block && emit_next_block) { // If we hit this case, we're dealing with an unconditional branch, which means we will output // that block after this. If we had selection merge, we already flushed phi variables. if (block.merge != SPIRBlock::MergeSelection) flush_phi(block.self, block.next_block); emit_block_chain(get(block.next_block)); } if (block.merge == SPIRBlock::MergeLoop) { if (continue_type == SPIRBlock::DoWhileLoop) { // Make sure that we run the continue block to get the expressions set, but this // should become an empty string. // We have no fallbacks if we cannot forward everything to temporaries ... auto statements = emit_continue_block(block.continue_block); if (!statements.empty()) { // The DoWhile block has side effects, force ComplexLoop pattern next pass. get(block.continue_block).complex_continue = true; force_recompile = true; } end_scope_decl(join("while (", to_expression(get(block.continue_block).condition), ")")); } else end_scope(); flush_phi(block.self, block.merge_block); emit_block_chain(get(block.merge_block)); } } void CompilerGLSL::begin_scope() { statement("{"); indent++; } void CompilerGLSL::end_scope() { if (!indent) SPIRV_CROSS_THROW("Popping empty indent stack."); indent--; statement("}"); } void CompilerGLSL::end_scope_decl() { if (!indent) SPIRV_CROSS_THROW("Popping empty indent stack."); indent--; statement("};"); } void CompilerGLSL::end_scope_decl(const string &decl) { if (!indent) SPIRV_CROSS_THROW("Popping empty indent stack."); indent--; statement("} ", decl, ";"); } void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) { // If our variable is remapped, and we rely on type-remapping information as // well, then we cannot pass the variable as a function parameter. // Fixing this is non-trivial without stamping out variants of the same function, // so for now warn about this and suggest workarounds instead. for (uint32_t i = 0; i < length; i++) { auto *var = maybe_get(args[i]); if (!var || !var->remapped_variable) continue; auto &type = get(var->basetype); if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) { SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " "This will not work correctly because type-remapping information is lost. " "To workaround, please consider not passing the subpass input as a function parameter, " "or use in/out variables instead which do not need type remapping information."); } } } const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) { // FIXME: This is kind of hacky. There should be a cleaner way. auto offset = uint32_t(&instr - current_emitting_block->ops.data()); if ((offset + 1) < current_emitting_block->ops.size()) return ¤t_emitting_block->ops[offset + 1]; else return nullptr; } uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) { return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask | MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); } void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id) { statement(lhs, " = ", to_expression(rhs_id), ";"); }