SPIRV-Cross/spirv_glsl.cpp

/*
 * Copyright 2015-2016 ARM Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "spirv_glsl.hpp"
#include "GLSL.std.450.h"
#include <algorithm>
#include <assert.h>

using namespace spv;
using namespace spirv_cross;
using namespace std;

static const char* to_pls_layout(PlsFormat format)
{
    switch (format)
    {
        case PlsR11FG11FB10F: return "layout(r11f_g11f_b10f) ";
        case PlsR32F: return "layout(r32f) ";
        case PlsRG16F: return "layout(rg16f) ";
        case PlsRGB10A2: return "layout(rgb10_a2) ";
        case PlsRGBA8: return "layout(rgba8) ";
        case PlsRG16: return "layout(rg16) ";
        case PlsRGBA8I: return "layout(rgba8i)" ;
        case PlsRG16I: return "layout(rg16i) ";
        case PlsRGB10A2UI: return "layout(rgb10_a2ui) ";
        case PlsRGBA8UI: return "layout(rgba8ui) ";
        case PlsRG16UI: return "layout(rg16ui) ";
        case PlsR32UI: return "layout(r32ui) ";
        default: return "";
    }
}

static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
{
    switch (format)
    {
        default:
        case PlsR11FG11FB10F:
        case PlsR32F:
        case PlsRG16F:
        case PlsRGB10A2:
        case PlsRGBA8:
        case PlsRG16:
            return SPIRType::Float;

        case PlsRGBA8I:
        case PlsRG16I:
            return SPIRType::Int;

        case PlsRGB10A2UI:
        case PlsRGBA8UI:
        case PlsRG16UI:
        case PlsR32UI:
            return SPIRType::UInt;
    }
}

static uint32_t pls_format_to_components(PlsFormat format)
{
    switch (format)
    {
        default:
        case PlsR32F:
        case PlsR32UI:
            return 1;

        case PlsRG16F:
        case PlsRG16:
        case PlsRG16UI:
        case PlsRG16I:
            return 2;

        case PlsR11FG11FB10F:
            return 3;

        case PlsRGB10A2:
        case PlsRGBA8:
        case PlsRGBA8I:
        case PlsRGB10A2UI:
        case PlsRGBA8UI:
            return 4;
    }
}

void CompilerGLSL::reset()
{
    // We do some speculative optimizations which should pretty much always work out,
    // but just in case the SPIR-V is rather weird, recompile until it's happy.
    // This typically only means one extra pass.
    force_recompile = false;

    // Clear invalid expression tracking.
    invalid_expressions.clear();
    function = nullptr;

    // Clear temporary usage tracking.
    expression_usage_counts.clear();
    forwarded_temporaries.clear();

    // Clear identifier caches
    global_struct_cache.clear();

    for (auto &id : ids)
    {
        // Clear unflushed dependees.
        if (id.get_type() == TypeVariable)
            id.get<SPIRVariable>().dependees.clear();
        // And remove all expressions.
        else if (id.get_type() == TypeExpression)
            id.reset();
        // Reset active state for all functions.
        else if (id.get_type() == TypeFunction)
        {
            id.get<SPIRFunction>().active = false;
            id.get<SPIRFunction>().flush_undeclared = true;
        }
    }

    statement_count = 0;
    indent = 0;
}

void CompilerGLSL::remap_pls_variables()
{
    for (auto &input : pls_inputs)
    {
        auto &var = get<SPIRVariable>(input.id);

        bool input_is_target = false;
        if (var.storage == StorageClassUniformConstant)
        {
            auto &type = get<SPIRType>(var.basetype);
            input_is_target = type.image.dim == DimSubpassData;
        }

        if (var.storage != StorageClassInput && !input_is_target)
            throw CompilerError("Can only use in and target variables for PLS inputs.");
        var.remapped_variable = true;
    }

    for (auto &output : pls_outputs)
    {
        auto &var = get<SPIRVariable>(output.id);
        if (var.storage != StorageClassOutput)
            throw CompilerError("Can only use out variables for PLS outputs.");
        var.remapped_variable = true;
    }
}

string CompilerGLSL::compile()
{
    uint32_t pass_count = 0;
    do
    {
        if (pass_count >= 3)
            throw CompilerError("Over 3 compilation loops detected. Must be a bug!");

        reset();

        // Move constructor for this type is broken on GCC 4.9 ...
        buffer = unique_ptr<ostringstream>(new ostringstream());

        emit_header();
        emit_resources();

        emit_function(get<SPIRFunction>(execution.entry_point), 0);

        pass_count++;
    } while (force_recompile);

    return buffer->str();
}

void CompilerGLSL::emit_header()
{
    statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");

    // Needed for binding = # on UBOs, etc.
    if (!options.es && options.version < 420)
    {
        statement("#ifdef GL_ARB_shading_language_420pack");
        statement("#extension GL_ARB_shading_language_420pack : require");
        statement("#endif");
    }

    for (auto &ext : forced_extensions)
        statement("#extension ", ext, " : require");

    if (!pls_inputs.empty() || !pls_outputs.empty())
        statement("#extension GL_EXT_shader_pixel_local_storage : require");

    vector<string> inputs;
    vector<string> outputs;

    switch (execution.model)
    {
        case ExecutionModelGeometry:
            if (options.es && options.version < 320)
                statement("#extension GL_EXT_geometry_shader : require");
            if (!options.es && options.version < 320)
                statement("#extension GL_ARB_geometry_shader4 : require");
            outputs.push_back(join("max_vertices = ", execution.output_vertices));
            if (execution.flags & (1ull << ExecutionModeInvocations))
                inputs.push_back(join("invocations = ", execution.invocations));
            if (execution.flags & (1ull << ExecutionModeInputPoints))
                inputs.push_back("points");
            if (execution.flags & (1ull << ExecutionModeInputLines))
                inputs.push_back("lines");
            if (execution.flags & (1ull << ExecutionModeInputLinesAdjacency))
                inputs.push_back("lines_adjacency");
            if (execution.flags & (1ull << ExecutionModeTriangles))
                inputs.push_back("triangles");
            if (execution.flags & (1ull << ExecutionModeInputTrianglesAdjacency))
                inputs.push_back("triangles_adjacency");
            if (execution.flags & (1ull << ExecutionModeOutputTriangleStrip))
                outputs.push_back("triangle_strip");
            if (execution.flags & (1ull << ExecutionModeOutputPoints))
                outputs.push_back("points");
            if (execution.flags & (1ull << ExecutionModeOutputLineStrip))
                outputs.push_back("line_strip");
            break;

        case ExecutionModelTessellationControl:
            if (options.es && options.version < 320)
                statement("#extension GL_EXT_tessellation_shader : require");
            if (!options.es && options.version < 400)
                statement("#extension GL_ARB_tessellation_shader : require");
            if (execution.flags & (1ull << ExecutionModeOutputVertices))
                outputs.push_back(join("vertices = ", execution.output_vertices));
            break;

        case ExecutionModelTessellationEvaluation:
            if (options.es && options.version < 320)
                statement("#extension GL_EXT_tessellation_shader : require");
            if (!options.es && options.version < 400)
                statement("#extension GL_ARB_tessellation_shader : require");
            if (execution.flags & (1ull << ExecutionModeQuads))
                inputs.push_back("quads");
            if (execution.flags & (1ull << ExecutionModeIsolines))
                inputs.push_back("isolines");
            if (execution.flags & (1ull << ExecutionModePointMode))
                inputs.push_back("point_mode");
            if (execution.flags & (1ull << ExecutionModeVertexOrderCw))
                inputs.push_back("cw");
            if (execution.flags & (1ull << ExecutionModeVertexOrderCcw))
                inputs.push_back("ccw");
            if (execution.flags & (1ull << ExecutionModeSpacingFractionalEven))
                inputs.push_back("fractional_even_spacing");
            if (execution.flags & (1ull << ExecutionModeSpacingFractionalOdd))
                inputs.push_back("fractional_odd_spacing");
            if (execution.flags & (1ull << ExecutionModeSpacingEqual))
                inputs.push_back("equal_spacing");
            break;

        case ExecutionModelGLCompute:
            if (!options.es && options.version < 430)
                statement("#extension GL_ARB_compute_shader : require");
            if (options.es && options.version < 310)
                throw CompilerError("At least ESSL 3.10 required for compute shaders.");
            inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
            inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
            inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
            break;

        case ExecutionModelFragment:
            if (options.es)
            {
                switch (options.fragment.default_float_precision)
                {
                    case Options::Lowp:
                        statement("precision lowp float;");
                        break;

                    case Options::Mediump:
                        statement("precision mediump float;");
                        break;

                    case Options::Highp:
                        statement("precision highp float;");
                        break;

                    default:
                        break;
                }

                switch (options.fragment.default_int_precision)
                {
                    case Options::Lowp:
                        statement("precision lowp int;");
                        break;

                    case Options::Mediump:
                        statement("precision mediump int;");
                        break;

                    case Options::Highp:
                        statement("precision highp int;");
                        break;

                    default:
                        break;
                }
            }

            if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests))
                inputs.push_back("early_fragment_tests");
            if (execution.flags & (1ull << ExecutionModeDepthGreater))
                inputs.push_back("depth_greater");
            if (execution.flags & (1ull << ExecutionModeDepthLess))
                inputs.push_back("depth_less");

            break;

        default:
            break;
    }

    if (!inputs.empty())
        statement("layout(", merge(inputs), ") in;");
    if (!outputs.empty())
        statement("layout(", merge(outputs), ") out;");

    statement("");
}

void CompilerGLSL::emit_struct(const SPIRType &type)
{
    auto name = type_to_glsl(type);

    // Struct types can be stamped out multiple times
    // with just different offsets, matrix layouts, etc ...
    // Type-punning with these types is legal, which complicates things
    // when we are storing struct and array types in an SSBO for example.
    // For now, detect this duplication via OpName, but ideally we should
    // find proper aliases by inspecting the actual type.
    if (global_struct_cache.find(name) != end(global_struct_cache))
        return;
    update_name_cache(global_struct_cache, name);

    statement("struct ", name);
    begin_scope();

    uint32_t i = 0;
    bool emitted = false;
    for (auto &member : type.member_types)
    {
        auto &membertype = get<SPIRType>(member);
        statement(member_decl(type, membertype, i), ";");
        i++;
        emitted = true;
    }
    end_scope_decl();

    if (emitted)
        statement("");
}

uint64_t CompilerGLSL::combined_decoration_for_member(const SPIRType &type, uint32_t index)
{
    uint64_t flags = 0;
    auto &memb = meta[type.self].members;
    if (index >= memb.size())
        return 0;
    auto &dec = memb[index];

    // If our type is a sturct, traverse all the members as well recursively.
    flags |= dec.decoration_flags;
    for (uint32_t i = 0; i < type.member_types.size(); i++)
        flags |= combined_decoration_for_member(get<SPIRType>(type.member_types[i]), i);

    return flags;
}

string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
{
    bool is_block = (meta[type.self].decoration.decoration_flags &
            ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
    if (!is_block)
        return "";

    auto &memb = meta[type.self].members;
    if (index >= memb.size())
        return 0;
    auto &dec = memb[index];

    vector<string> attr;

    // We can only apply layouts on members in block interfaces.
    // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
    // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
    // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
    //
    // We would like to go from (SPIR-V style):
    //
    // struct Foo { layout(row_major) mat4 matrix; };
    // buffer UBO { Foo foo; };
    //
    // to
    //
    // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
    // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
    auto flags = combined_decoration_for_member(type, index);

    if (flags & (1ull << DecorationRowMajor))
        attr.push_back("row_major");
    // We don't emit any global layouts, so column_major is default.
    //if (flags & (1ull << DecorationColMajor))
    //    attr.push_back("column_major");

    if (dec.decoration_flags & (1ull << DecorationLocation))
        attr.push_back(join("location = ", dec.location));

    if (attr.empty())
        return "";

    string res = "layout(";
    res += merge(attr);
    res += ") ";
    return res;
}

const char* CompilerGLSL::format_to_glsl(spv::ImageFormat format)
{
    // Only handle GLES 3.1 compliant types for now ...
    switch (format)
    {
        case ImageFormatRgba32f: return "rgba32f";
        case ImageFormatRgba16f: return "rgba16f";
        case ImageFormatR32f: return "r32f";
        case ImageFormatRgba8: return "rgba8";
        case ImageFormatRgba8Snorm: return "rgba8_snorm";
        case ImageFormatRg32f: return "rg32f";
        case ImageFormatRg16f: return "rg16f";

        case ImageFormatRgba32i: return "rgba32i";
        case ImageFormatRgba16i: return "rgba16i";
        case ImageFormatR32i: return "r32i";
        case ImageFormatRgba8i: return "rgba8i";
        case ImageFormatRg32i: return "rg32i";
        case ImageFormatRg16i: return "rg16i";

        case ImageFormatRgba32ui: return "rgba32ui";
        case ImageFormatRgba16ui: return "rgba16ui";
        case ImageFormatR32ui: return "r32ui";
        case ImageFormatRgba8ui: return "rgba8ui";
        case ImageFormatRg32ui: return "rg32ui";
        case ImageFormatRg16ui: return "rg16ui";

        case ImageFormatUnknown: return nullptr;
        default: return "UNSUPPORTED"; // TODO: Fill in rest.
    }
}

uint32_t CompilerGLSL::type_to_std430_alignment(const SPIRType &type, uint64_t flags)
{
    // float, int and uint all take 4 bytes.
    const uint32_t base_alignment = 4;

    if (type.basetype == SPIRType::Struct)
    {
        // Rule 9. Structs alignments are maximum alignment of its members.
        uint32_t alignment = 0;
        for (uint32_t i = 0; i < type.member_types.size(); i++)
        {
            auto member_flags = meta[type.self].members.at(i).decoration_flags;
            alignment = max(alignment,
                    type_to_std430_alignment(get<SPIRType>(type.member_types[i]), member_flags));
        }

        return alignment;
    }
    else
    {
        // From 7.6.2.2 in GL 4.5 core spec.
        // Rule 1
        if (type.vecsize == 1 && type.columns == 1)
            return base_alignment;

        // Rule 2
        if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
            return type.vecsize * base_alignment;

        // Rule 3
        if (type.vecsize == 3 && type.columns == 1)
            return 4 * base_alignment;

        // Rule 4 implied. Alignment does not change in std430.

        // Rule 5. Column-major matrices are stored as arrays of
        // vectors.
        if ((flags & (1ull << DecorationColMajor)) && type.columns > 1)
        {
            if (type.vecsize == 3)
                return 4 * base_alignment;
            else
                return type.vecsize * base_alignment;
        }

        // Rule 6 implied.

        // Rule 7.
        if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1)
        {
            if (type.columns == 3)
                return 4 * base_alignment;
            else
                return type.columns * base_alignment;
        }

        // Rule 8 implied.
    }

    throw CompilerError("Did not find suitable std430 rule for type. Bogus decorations?");
}

uint32_t CompilerGLSL::type_to_std430_array_stride(const SPIRType &type, uint64_t flags)
{
    // Array stride is equal to aligned size of the underlying type.
    SPIRType tmp = type;
    tmp.array.pop_back();
    uint32_t size = type_to_std430_size(tmp, flags);
    uint32_t alignment = type_to_std430_alignment(tmp, flags);
    return (size + alignment - 1) & ~(alignment - 1);
}

uint32_t CompilerGLSL::type_to_std430_size(const SPIRType &type, uint64_t flags)
{
    if (!type.array.empty())
        return type.array.back() * type_to_std430_array_stride(type, flags);

    // float, int and uint all take 4 bytes.
    const uint32_t base_alignment = 4;
    uint32_t size = 0;

    if (type.basetype == SPIRType::Struct)
    {
        for (uint32_t i = 0; i < type.member_types.size(); i++)
        {
            auto member_flags = meta[type.self].members.at(i).decoration_flags;
            auto &member_type = get<SPIRType>(type.member_types[i]);
            uint32_t alignment = type_to_std430_alignment(member_type, member_flags);
            size = (size + alignment - 1) & ~(alignment - 1);
            size += type_to_std430_size(member_type, member_flags);
        }
    }
    else
    {
        if (type.columns == 1)
            size = type.vecsize * base_alignment;

        if ((flags & (1ull << DecorationColMajor)) && type.columns > 1)
        {
            if (type.vecsize == 3)
                size = type.columns * 4 * base_alignment;
            else
                size = type.columns * type.vecsize * base_alignment;
        }

        if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1)
        {
            if (type.columns == 3)
                size = type.vecsize * 4 * base_alignment;
            else
                size = type.vecsize * type.columns * base_alignment;
        }
    }

    return size;
}

bool CompilerGLSL::ssbo_is_std430_packing(const SPIRType &type)
{
    // This is very tricky and error prone, but try to be exhaustive and correct here.
    // SPIR-V doesn't directly say if we're using std430 or std140.
    // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
    // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
    // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
    //
    // It is almost certain that we're using std430, but it gets tricky with arrays in particular.
    // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
    //
    // The only two differences between std140 and std430 are related to padding alignment/array stride
    // in arrays and structs. In std140 they take minimum vec4 alignment.
    // std430 only removes the vec4 requirement.

    uint32_t offset = 0;

    for (uint32_t i = 0; i < type.member_types.size(); i++)
    {
        auto &memb_type = get<SPIRType>(type.member_types[i]);
        auto member_flags = meta[type.self].members.at(i).decoration_flags;

        // Verify alignment rules.
        uint32_t std430_alignment = type_to_std430_alignment(memb_type, member_flags);
        offset = (offset + std430_alignment - 1) & ~(std430_alignment - 1);

        uint32_t actual_offset = type_struct_member_offset(type, i);
        if (actual_offset != offset) // This cannot be std430.
            return false;

        // Verify array stride rules.
        if (!memb_type.array.empty() &&
                type_to_std430_array_stride(memb_type, member_flags) !=
                type_struct_member_array_stride(type, i))
            return false;

        // Verify that sub-structs also follow std430 rules.
        if (!memb_type.member_types.empty() &&
                !ssbo_is_std430_packing(memb_type))
            return false;

        // Bump size.
        offset += type_to_std430_size(memb_type, member_flags);
    }

    return true;
}

string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
{
    vector<string> attr;

    auto &dec = meta[var.self].decoration;
    auto &type = get<SPIRType>(var.basetype);
    auto flags = dec.decoration_flags;
    auto typeflags = meta[type.self].decoration.decoration_flags;

    if (flags & (1ull << DecorationRowMajor))
        attr.push_back("row_major");
    if (flags & (1ull << DecorationColMajor))
        attr.push_back("column_major");
    if (flags & (1ull << DecorationLocation))
        attr.push_back(join("location = ", dec.location));
    if ((flags & (1ull << DecorationDescriptorSet)) && dec.set != 0) // set = 0 is the default.
        attr.push_back(join("set = ", dec.set));
    if (flags & (1ull << DecorationBinding))
        attr.push_back(join("binding = ", dec.binding));
    if (flags & (1ull << DecorationCoherent))
        attr.push_back("coherent");
    if (flags & (1ull << DecorationOffset))
        attr.push_back(join("offset = ", dec.offset));

    // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
    // If SPIR-V does not comply with either layout, we cannot really work around it.
    if (var.storage == StorageClassUniform &&
            (typeflags & (1ull << DecorationBlock)))
        attr.push_back("std140");

    if (var.storage == StorageClassUniform &&
            (typeflags & (1ull << DecorationBufferBlock)))
        attr.push_back(ssbo_is_std430_packing(type) ? "std430" : "std140");

    // For images, the type itself adds a layout qualifer.
    if (type.basetype == SPIRType::Image)
    {
        const char *fmt = format_to_glsl(type.image.format);
        if (fmt)
            attr.push_back(fmt);
    }

    if (attr.empty())
        return "";

    string res = "layout(";
    res += merge(attr);
    res += ") ";
    return res;
}

void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
{
    // OpenGL has no concept of push constant blocks, implement it as a uniform struct.
    auto &type = get<SPIRType>(var.basetype);

    auto &flags = meta[var.self].decoration.decoration_flags;
    flags &= ~((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet));

#if 0
    if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
        throw CompilerError("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
                            "Remap to location with reflection API first or disable these decorations.");
#endif

    // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
    // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
    auto &block_flags = meta[type.self].decoration.decoration_flags;
    uint64_t block_flag = block_flags & (1ull << DecorationBlock);
    block_flags &= ~block_flag;

    emit_struct(type);

    block_flags |= block_flag;

    emit_uniform(var);
    statement("");
}

void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
{
    auto &type = get<SPIRType>(var.basetype);
    auto ssbo = meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock);
    auto buffer_name = to_name(type.self);
    statement(layout_for_variable(var) + (ssbo ? "buffer " : "uniform ") + buffer_name);
    begin_scope();

    uint32_t i = 0;
    for (auto &member : type.member_types)
    {
        auto &membertype = get<SPIRType>(member);
        statement(member_decl(type, membertype, i), ";");
        i++;
    }

    end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
    statement("");
}

void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
{
    auto &type = get<SPIRType>(var.basetype);

    // Either make it plain in/out or in/out blocks depending on what shader is doing ...
    bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0;

    const char *qual = nullptr;
    if (is_legacy() && execution.model == ExecutionModelVertex)
        qual = var.storage == StorageClassInput ? "attribute " : "varying ";
    else if (is_legacy() && execution.model == ExecutionModelFragment)
        qual = "varying "; // Fragment outputs are renamed so they never hit this case.
    else
        qual = var.storage == StorageClassInput ? "in " : "out ";

    if (block)
    {
        statement(layout_for_variable(var), qual, to_name(type.self));
        begin_scope();

        uint32_t i = 0;
        for (auto &member : type.member_types)
        {
            auto &membertype = get<SPIRType>(member);
            statement(member_decl(type, membertype, i), ";");
            i++;
        }

        end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
        statement("");
    }
    else
    {
        statement(layout_for_variable(var), qual, variable_decl(var), ";");
    }
}

void CompilerGLSL::emit_uniform(const SPIRVariable &var)
{
    auto &type = get<SPIRType>(var.basetype);
    if (type.basetype == SPIRType::Image)
    {
        if (!options.es && options.version < 420)
            require_extension("GL_ARB_shader_image_load_store");
        else if (options.es && options.version < 310)
            throw CompilerError("At least ESSL 3.10 required for shader image load store.");
    }

    statement(layout_for_variable(var), "uniform ", variable_decl(var), ";");
}

void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
{
    auto &m = meta[var.self].decoration;
    uint32_t location = 0;
    if (m.decoration_flags & (1ull << DecorationLocation))
        location = m.location;

    m.alias = join("gl_FragData[", location, "]");
    var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
}

void CompilerGLSL::replace_fragment_outputs()
{
    for (auto &id : ids)
    {
        if (id.get_type() == TypeVariable)
        {
            auto &var = id.get<SPIRVariable>();
            auto &type = get<SPIRType>(var.basetype);

            if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput)
                replace_fragment_output(var);
        }
    }
}

string CompilerGLSL::remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr)
{
    auto &out_type = get<SPIRType>(result_type);

    if (out_type.vecsize == input_components)
        return to_expression(expr);
    else if (input_components == 1)
        return join(type_to_glsl(out_type), "(", to_expression(expr), ")");
    else
    {
        auto e = to_expression(expr) + ".";
        // Just clamp the swizzle index if we have more outputs than inputs.
        for (uint32_t c = 0; c < out_type.vecsize; c++)
            e += index_to_swizzle(min(c, input_components - 1));
        if (backend.swizzle_is_function && out_type.vecsize > 1)
            e += "()";
        return e;
    }
}

void CompilerGLSL::emit_pls()
{
    if (execution.model != ExecutionModelFragment)
        throw CompilerError("Pixel local storage only supported in fragment shaders.");

    if (!options.es)
        throw CompilerError("Pixel local storage only supported in OpenGL ES.");

    if (options.version < 300)
        throw CompilerError("Pixel local storage only supported in ESSL 3.0 and above.");

    if (!pls_inputs.empty())
    {
        statement("__pixel_local_inEXT _PLSIn");
        begin_scope();
        for (auto &input : pls_inputs)
            statement(pls_decl(input), ";");
        end_scope_decl();
        statement("");
    }

    if (!pls_outputs.empty())
    {
        statement("__pixel_local_outEXT _PLSOut");
        begin_scope();
        for (auto &output : pls_outputs)
            statement(pls_decl(output), ";");
        end_scope_decl();
        statement("");
    }
}

void CompilerGLSL::emit_resources()
{
    // Legacy GL uses gl_FragData[], redeclare all fragment outputs
    // with builtins.
    if (execution.model == ExecutionModelFragment && is_legacy())
        replace_fragment_outputs();

    // Emit PLS blocks if we have such variables.
    if (!pls_inputs.empty() || !pls_outputs.empty())
        emit_pls();

    // Output all basic struct types which are not Block or BufferBlock as these are declared inplace
    // when such variables are instantiated.
    for (auto &id : ids)
    {
        if (id.get_type() == TypeType)
        {
            auto &type = id.get<SPIRType>();
            if (type.basetype == SPIRType::Struct &&
                    type.array.empty() &&
                    !type.pointer &&
                    (meta[type.self].decoration.decoration_flags & ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0)
            {
                emit_struct(type);
            }
        }
    }

    // Output UBOs and SSBOs
    for (auto &id : ids)
    {
        if (id.get_type() == TypeVariable)
        {
            auto &var = id.get<SPIRVariable>();
            auto &type = get<SPIRType>(var.basetype);

            if (type.pointer && type.storage == StorageClassUniform &&
                    !is_builtin_variable(var) &&
                    (meta[type.self].decoration.decoration_flags & ((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))))
            {
                emit_buffer_block(var);
            }
        }
    }

    // Output push constant blocks
    for (auto &id : ids)
    {
        if (id.get_type() == TypeVariable)
        {
            auto &var = id.get<SPIRVariable>();
            auto &type = get<SPIRType>(var.basetype);
            if (type.pointer && type.storage == StorageClassPushConstant)
                emit_push_constant_block(var);
        }
    }

    bool emitted = false;

    // Output Uniform Constants (values, samplers, images, etc).
    for (auto &id : ids)
    {
        if (id.get_type() == TypeVariable)
        {
            auto &var = id.get<SPIRVariable>();
            auto &type = get<SPIRType>(var.basetype);

            if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer &&
                    (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter))
            {
                emit_uniform(var);
                emitted = true;
            }
        }
    }

    if (emitted)
        statement("");
    emitted = false;

    // Output in/out interfaces.
    for (auto &id : ids)
    {
        if (id.get_type() == TypeVariable)
        {
            auto &var = id.get<SPIRVariable>();
            auto &type = get<SPIRType>(var.basetype);

            if (!is_builtin_variable(var) &&
                    !var.remapped_variable &&
                    type.pointer &&
                    (var.storage == StorageClassInput || var.storage == StorageClassOutput))
            {
                emit_interface_block(var);
                emitted = true;
            }
            else if (is_builtin_variable(var))
            {
                // For gl_InstanceIndex emulation on GLES, the API user needs to
                // supply this uniform.
                if (meta[var.self].decoration.builtin_type == BuiltInInstanceIndex)
                {
                    statement("uniform int SPIRV_Cross_BaseInstance;");
                    emitted = true;
                }
            }
        }
    }

    // Global variables.
    for (auto global : global_variables)
    {
        auto &var = get<SPIRVariable>(global);
        if (var.storage != StorageClassOutput)
        {
            statement(variable_decl(var), ";");
            emitted = true;
        }
    }

    if (emitted)
        statement("");
}

string CompilerGLSL::to_expression(uint32_t id)
{
    auto itr = invalid_expressions.find(id);
    if (itr != end(invalid_expressions))
    {
        auto &expr = get<SPIRExpression>(id);

        // This expression has been invalidated in the past.
        // Be careful with this expression next pass ...
        // Used for OpCompositeInsert forwarding atm.
        expr.used_while_invalidated = true;

        // We tried to read an invalidated expression.
        // This means we need another pass at compilation, but next time, do not try to forward
        // the variables which caused invalidation to happen in the first place.
        for (auto var : expr.invalidated_by)
        {
            //fprintf(stderr, "Expression %u was invalidated due to variable %u being invalid at read time!\n", id, var);
            get<SPIRVariable>(var).forwardable = false;
        }

        if (expr.invalidated_by.empty() && expr.loaded_from)
        {
            //fprintf(stderr, "Expression %u was invalidated due to variable %u being invalid at read time!\n", id, expr.loaded_from);
            get<SPIRVariable>(expr.loaded_from).forwardable = false;
        }
        force_recompile = true;
    }

    track_expression_read(id);

    switch (ids[id].get_type())
    {
        case TypeExpression:
        {
            auto &e = get<SPIRExpression>(id);
            if (e.base_expression)
                return to_expression(e.base_expression) + e.expression;
            else
                return e.expression;
        }

        case TypeConstant:
            return constant_expression(get<SPIRConstant>(id));

        case TypeVariable:
        {
            auto &var = get<SPIRVariable>(id);
            if (var.statically_assigned)
                return to_expression(var.static_expression);
            else if (var.deferred_declaration)
            {
                var.deferred_declaration = false;
                return variable_decl(var);
            }
            else
            {
                auto &dec = meta[var.self].decoration;
                if (dec.builtin)
                    return builtin_to_glsl(dec.builtin_type);
                else
                    return to_name(id);
            }
        }

        default:
            return to_name(id);
    }
}

string CompilerGLSL::constant_expression(const SPIRConstant &c)
{
    if (!c.subconstants.empty())
    {
        // Handles Arrays and structures.
        string res = type_to_glsl_constructor(get<SPIRType>(c.constant_type)) + "(";
        for (auto &elem : c.subconstants)
        {
            res += constant_expression(get<SPIRConstant>(elem));
            if (&elem != &c.subconstants.back())
                res += ", ";
        }
        res += ")";
        return res;
    }
    else if (c.columns() == 1)
    {
        return constant_expression_vector(c, 0);
    }
    else
    {
        string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
        for (uint32_t col = 0; col < c.columns(); col++)
        {
            res += constant_expression_vector(c, col);
            if (col + 1 < c.columns())
                res += ", ";
        }
        res += ")";
        return res;
    }
}

string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
{
    auto type = get<SPIRType>(c.constant_type);
    type.columns = 1;

    string res;
    if (c.vector_size() > 1)
        res += type_to_glsl(type) + "(";

    bool splat = c.vector_size() > 1;
    if (splat)
    {
        uint32_t ident = c.scalar(vector, 0);
        for (uint32_t i = 1; i < c.vector_size(); i++)
            if (ident != c.scalar(vector, i))
                splat = false;
    }

    switch (type.basetype)
    {
        case SPIRType::Float:
            if (splat)
            {
                res += convert_to_string(c.scalar_f32(vector, 0));
                if (backend.float_literal_suffix)
                    res += "f";
            }
            else
            {
                for (uint32_t i = 0; i < c.vector_size(); i++)
                {
                    res += convert_to_string(c.scalar_f32(vector, i));
                    if (backend.float_literal_suffix)
                        res += "f";
                    if (i + 1 < c.vector_size())
                        res += ", ";
                }
            }
            break;

        case SPIRType::UInt:
            if (splat)
            {
                res += convert_to_string(c.scalar(vector, 0));
                if (backend.uint32_t_literal_suffix)
                    res += "u";
            }
            else
            {
                for (uint32_t i = 0; i < c.vector_size(); i++)
                {
                    res += convert_to_string(c.scalar(vector, i));
                    if (backend.uint32_t_literal_suffix)
                        res += "u";
                    if (i + 1 < c.vector_size())
                        res += ", ";
                }
            }
            break;


        case SPIRType::Int:
            if (splat)
                res += convert_to_string(c.scalar_i32(vector, 0));
            else
            {
                for (uint32_t i = 0; i < c.vector_size(); i++)
                {
                    res += convert_to_string(c.scalar_i32(vector, i));
                    if (i + 1 < c.vector_size())
                        res += ", ";
                }
            }
            break;

        case SPIRType::Bool:
            if (splat)
                res += c.scalar(vector, 0) ? "true" : "false";
            else
            {
                for (uint32_t i = 0; i < c.vector_size(); i++)
                {
                    res += c.scalar(vector, i) ? "true" : "false";
                    if (i + 1 < c.vector_size())
                        res += ", ";
                }
            }
            break;

        default:
            throw CompilerError("Invalid constant expression basetype.");
    }

    if (c.vector_size() > 1)
        res += ")";

    return res;
}

string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
{
    auto &type = get<SPIRType>(result_type);
    auto flags = meta[result_id].decoration.decoration_flags;

    // If we're declaring temporaries inside continue blocks,
    // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
    if (current_continue_block)
    {
        auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
        if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
                    [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
                        return tmp.first == result_type && tmp.second == result_id;
                    }) == end(header.declare_temporary))
        {
            header.declare_temporary.emplace_back(result_type, result_id);
            force_recompile = true;
        }

        return join(to_name(result_id), " = ");
    }
    else
    {
        // The result_id has not been made into an expression yet, so use flags interface.
        return join(flags_to_precision_qualifiers_glsl(type, flags),
                type_to_glsl(type), " ", to_name(result_id), type_to_array_glsl(type), " = ");
    }
}

bool CompilerGLSL::expression_is_forwarded(uint32_t id)
{
    return forwarded_temporaries.find(id) != end(forwarded_temporaries);
}

SPIRExpression& CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs,
        bool forwarding, bool extra_parens, bool suppress_usage_tracking)
{
    if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
    {
        // Just forward it without temporary.
        // If the forward is trivial, we do not force flushing to temporary for this expression.
        if (!suppress_usage_tracking)
            forwarded_temporaries.insert(result_id);

        if (extra_parens)
            return set<SPIRExpression>(result_id, join("(", rhs, ")"), result_type, true);
        else
            return set<SPIRExpression>(result_id, rhs, result_type, true);
    }
    else
    {
        // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
        statement(declare_temporary(result_type, result_id), rhs, ";");
        return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
    }
}

void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
{
    emit_op(result_type, result_id, join(op, to_expression(op0)), should_forward(op0), true);
}

void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
{
    emit_op(result_type, result_id, join(bitcast_glsl(result_type, op0), " ", op, " ", bitcast_glsl(result_type, op1)),
            should_forward(op0) && should_forward(op1), true);
}

void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
{
    emit_op(result_type, result_id, join(op, "(", to_expression(op0), ")"), should_forward(op0), false);
}

void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
{
    emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ")"),
            should_forward(op0) && should_forward(op1), false);
}

void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, const char *op)
{
    emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ")"),
            should_forward(op0) && should_forward(op1) && should_forward(op2), false);
}

void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2, uint32_t op3, const char *op)
{
    emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ", ", to_expression(op3), ")"),
            should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3), false);
}

string CompilerGLSL::legacy_tex_op(const std::string &op,
        const SPIRType &imgtype)
{
    const char *type;
    switch (imgtype.image.dim)
    {
        case spv::Dim1D: type = "1D"; break;
        case spv::Dim2D: type = "2D"; break;
        case spv::Dim3D: type = "3D"; break;
        case spv::DimCube: type = "Cube"; break;
        case spv::DimBuffer: type = "Buffer"; break;
        case spv::DimSubpassData: type = "2D"; break;
        default: type = ""; break;
    }

    if (op == "texture")
        return join("texture", type);
    else if (op == "textureLod")
        return join("texture", type, "Lod");
    else if (op == "textureProj")
        return join("texture", type, "Proj");
    else if (op == "textureProjLod")
        return join("texture", type, "ProjLod");
    else
        throw CompilerError(join("Unsupported legacy texture op: ", op));
}

void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id,
        uint32_t left, uint32_t right, uint32_t lerp)
{
    auto &lerptype = expression_type(lerp);
    auto &restype = get<SPIRType>(result_type);

    bool has_boolean_mix = (options.es && options.version >= 310) ||
                           (!options.es && options.version >= 450);

    // Boolean mix not supported on desktop without extension.
    // Was added in OpenGL 4.5 with ES 3.1 compat.
    if (!has_boolean_mix && lerptype.basetype == SPIRType::Bool)
    {
        // Could use GL_EXT_shader_integer_mix on desktop at least,
        // but Apple doesn't support it. :(
        // Just implement it as ternary expressions.
        string expr;
        if (lerptype.vecsize == 1)
            expr = join(to_expression(lerp), " ? ", to_expression(right), " : ", to_expression(left));
        else
        {
            auto swiz = [this](uint32_t id, uint32_t i) {
                return join(to_expression(id), ".", index_to_swizzle(i));
            };

            expr = type_to_glsl_constructor(restype);
            expr += "(";
            for (uint32_t i = 0; i < restype.vecsize; i++)
            {
                expr += swiz(lerp, i);
                expr += " ? ";
                expr += swiz(right, i);
                expr += " : ";
                expr += swiz(left, i);
                if (i + 1 < restype.vecsize)
                    expr += ", ";
            }
            expr += ")";
        }

        emit_op(result_type, id, expr,
                should_forward(left) &&
                should_forward(right) &&
                should_forward(lerp),
                false);
    }
    else
        emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
}

void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
{
    emit_binary_func_op(result_type, result_id, image_id, samp_id, type_to_glsl(get<SPIRType>(result_type)).c_str());
}

void CompilerGLSL::emit_texture_op(const Instruction &i)
{
    auto ops = stream(i);
    auto op = static_cast<Op>(i.op);
    uint32_t length = i.length;

    if (i.offset + length > spirv.size())
        throw CompilerError("Compiler::parse() opcode out of range.");

    uint32_t result_type = ops[0];
    uint32_t id = ops[1];
    uint32_t img = ops[2];
    uint32_t coord = ops[3];
    uint32_t dref = 0;
    uint32_t comp = 0;
    bool gather = false;
    bool proj = false;
    const uint32_t *opt = nullptr;

    switch (op)
    {
        case OpImageSampleDrefImplicitLod:
        case OpImageSampleDrefExplicitLod:
            dref = ops[4];
            opt = &ops[5];
            length -= 5;
            break;

        case OpImageSampleProjDrefImplicitLod:
        case OpImageSampleProjDrefExplicitLod:
            dref = ops[4];
            proj = true;
            opt = &ops[5];
            length -= 5;
            break;

        case OpImageDrefGather:
            dref = ops[4];
            opt = &ops[5];
            gather = true;
            length -= 5;
            break;

        case OpImageGather:
            comp = ops[4];
            opt = &ops[5];
            gather = true;
            length -= 5;
            break;

        case OpImageSampleProjImplicitLod:
        case OpImageSampleProjExplicitLod:
            opt = &ops[4];
            length -= 4;
            proj = true;
            break;

        default:
            opt = &ops[4];
            length -= 4;
            break;
    }

    auto &imgtype = expression_type(img);
    uint32_t coord_components = 0;
    switch (imgtype.image.dim)
    {
        case spv::Dim1D: coord_components = 1; break;
        case spv::Dim2D: coord_components = 2; break;
        case spv::Dim3D: coord_components = 3; break;
        case spv::DimCube: coord_components = 3; break;
        case spv::DimBuffer: coord_components = 1; break;
        default: coord_components = 2; break;
    }

    if (proj)
        coord_components++;
    if (imgtype.image.arrayed)
        coord_components++;

    uint32_t bias = 0;
    uint32_t lod = 0;
    uint32_t grad_x = 0;
    uint32_t grad_y = 0;
    uint32_t coffset = 0;
    uint32_t offset = 0;
    uint32_t coffsets = 0;
    uint32_t sample = 0;
    uint32_t flags = 0;

    if (length)
    {
        flags = opt[0];
        opt++;
        length--;
    }

    auto test = [&](uint32_t &v, uint32_t flag) {
        if (length && (flags & flag))
        {
            v = *opt++;
            length--;
        }
    };

    test(bias, ImageOperandsBiasMask);
    test(lod, ImageOperandsLodMask);
    test(grad_x, ImageOperandsGradMask);
    test(grad_y, ImageOperandsGradMask);
    test(coffset, ImageOperandsConstOffsetMask);
    test(offset, ImageOperandsOffsetMask);
    test(coffsets, ImageOperandsConstOffsetsMask);
    test(sample, ImageOperandsSampleMask);

    string expr;
    string texop;

    if (op == OpImageFetch)
        texop += "texelFetch";
    else
    {
        texop += "texture";

        if (gather)
            texop += "Gather";
        if (coffsets)
            texop += "Offsets";
        if (proj)
            texop += "Proj";
        if (grad_x || grad_y)
            texop += "Grad";
        if (lod)
            texop += "Lod";
    }

    if (coffset || offset)
        texop += "Offset";

    if (is_legacy())
        texop = legacy_tex_op(texop, imgtype);

    expr += texop;
    expr += "(";
    expr += to_expression(img);

    bool swizz_func = backend.swizzle_is_function;
    auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char* {
        if (comps == in_comps)
            return "";

        switch (comps)
        {
            case 1: return ".x";
            case 2: return swizz_func ? ".xy()" : ".xy";
            case 3: return swizz_func ? ".xyz()" :".xyz";
            default: return "";
        }
    };

    bool forward = should_forward(coord);

    // The IR can give us more components than we need, so chop them off as needed.
    auto coord_expr = to_expression(coord) + swizzle(coord_components, expression_type(coord).vecsize);

    // TODO: implement rest ... A bit intensive.

    if (dref)
    {
        forward = forward && should_forward(dref);

        // SPIR-V splits dref and coordinate.
        if (coord_components == 4) // GLSL also splits the arguments in two.
        {
            expr += ", ";
            expr += to_expression(coord);
            expr += ", ";
            expr += to_expression(dref);
        }
        else
        {
            // Create a composite which merges coord/dref into a single vector.
            auto type = expression_type(coord);
            type.vecsize = coord_components + 1;
            expr += ", ";
            expr += type_to_glsl_constructor(type);
            expr += "(";
            expr += coord_expr;
            expr += ", ";
            expr += to_expression(dref);
            expr += ")";
        }
    }
    else
    {
        expr += ", ";
        expr += coord_expr;
    }

    if (grad_x || grad_y)
    {
        forward = forward && should_forward(grad_x);
        forward = forward && should_forward(grad_y);
        expr += ", ";
        expr += to_expression(grad_x);
        expr += ", ";
        expr += to_expression(grad_y);
    }

    if (lod)
    {
        forward = forward && should_forward(lod);
        expr += ", ";
        expr += to_expression(lod);
    }

    if (coffset)
    {
        forward = forward && should_forward(coffset);
        expr += ", ";
        expr += to_expression(coffset);
    }
    else if (offset)
    {
        forward = forward && should_forward(offset);
        expr += ", ";
        expr += to_expression(offset);
    }

    if (bias)
    {
        forward = forward && should_forward(bias);
        expr += ", ";
        expr += to_expression(bias);
    }

    if (comp)
    {
        forward = forward && should_forward(comp);
        expr += ", ";
        expr += to_expression(comp);
    }

    expr += ")";

    emit_op(result_type, id, expr, forward, false);
}

void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t)
{
    GLSLstd450 op = static_cast<GLSLstd450>(eop);

    switch (op)
    {
        // FP fiddling
        case GLSLstd450Round:
        case GLSLstd450RoundEven:
            emit_unary_func_op(result_type, id, args[0], "round");
            break;
        case GLSLstd450Trunc:
            emit_unary_func_op(result_type, id, args[0], "trunc");
            break;
        case GLSLstd450SAbs:
        case GLSLstd450FAbs:
            emit_unary_func_op(result_type, id, args[0], "abs");
            break;
        case GLSLstd450SSign:
        case GLSLstd450FSign:
            emit_unary_func_op(result_type, id, args[0], "sign");
            break;
        case GLSLstd450Floor:
            emit_unary_func_op(result_type, id, args[0], "floor");
            break;
        case GLSLstd450Ceil:
            emit_unary_func_op(result_type, id, args[0], "ceil");
            break;
        case GLSLstd450Fract:
            emit_unary_func_op(result_type, id, args[0], "fract");
            break;
        case GLSLstd450Radians:
            emit_unary_func_op(result_type, id, args[0], "radians");
            break;
        case GLSLstd450Degrees:
            emit_unary_func_op(result_type, id, args[0], "degrees");
            break;
        case GLSLstd450Fma:
            emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
            break;
        case GLSLstd450Modf:
            register_call_out_argument(args[1]);
            forced_temporaries.insert(id);
            emit_binary_func_op(result_type, id, args[0], args[1], "modf");
            break;

        // Minmax
        case GLSLstd450FMin:
        case GLSLstd450UMin:
        case GLSLstd450SMin:
            emit_binary_func_op(result_type, id, args[0], args[1], "min");
            break;
        case GLSLstd450FMax:
        case GLSLstd450UMax:
        case GLSLstd450SMax:
            emit_binary_func_op(result_type, id, args[0], args[1], "max");
            break;
        case GLSLstd450FClamp:
        case GLSLstd450UClamp:
        case GLSLstd450SClamp:
            emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
            break;

        // Trig
        case GLSLstd450Sin:
            emit_unary_func_op(result_type, id, args[0], "sin");
            break;
        case GLSLstd450Cos:
            emit_unary_func_op(result_type, id, args[0], "cos");
            break;
        case GLSLstd450Tan:
            emit_unary_func_op(result_type, id, args[0], "tan");
            break;
        case GLSLstd450Asin:
            emit_unary_func_op(result_type, id, args[0], "asin");
            break;
        case GLSLstd450Acos:
            emit_unary_func_op(result_type, id, args[0], "acos");
            break;
        case GLSLstd450Atan:
            emit_unary_func_op(result_type, id, args[0], "atan");
            break;
        case GLSLstd450Sinh:
            emit_unary_func_op(result_type, id, args[0], "sinh");
            break;
        case GLSLstd450Cosh:
            emit_unary_func_op(result_type, id, args[0], "cosh");
            break;
        case GLSLstd450Tanh:
            emit_unary_func_op(result_type, id, args[0], "tanh");
            break;
        case GLSLstd450Asinh:
            emit_unary_func_op(result_type, id, args[0], "asinh");
            break;
        case GLSLstd450Acosh:
            emit_unary_func_op(result_type, id, args[0], "acosh");
            break;
        case GLSLstd450Atanh:
            emit_unary_func_op(result_type, id, args[0], "atanh");
            break;
        case GLSLstd450Atan2:
            emit_binary_func_op(result_type, id, args[0], args[1], "atan");
            break;

        // Exponentials
        case GLSLstd450Pow:
            emit_binary_func_op(result_type, id, args[0], args[1], "pow");
            break;
        case GLSLstd450Exp:
            emit_unary_func_op(result_type, id, args[0], "exp");
            break;
        case GLSLstd450Log:
            emit_unary_func_op(result_type, id, args[0], "log");
            break;
        case GLSLstd450Exp2:
            emit_unary_func_op(result_type, id, args[0], "exp2");
            break;
        case GLSLstd450Log2:
            emit_unary_func_op(result_type, id, args[0], "log2");
            break;
        case GLSLstd450Sqrt:
            emit_unary_func_op(result_type, id, args[0], "sqrt");
            break;
        case GLSLstd450InverseSqrt:
            emit_unary_func_op(result_type, id, args[0], "inversesqrt");
            break;

        // Matrix math
        case GLSLstd450Determinant:
            emit_unary_func_op(result_type, id, args[0], "determinant");
            break;
        case GLSLstd450MatrixInverse:
            emit_unary_func_op(result_type, id, args[0], "inverse");
            break;

        // Lerping
        case GLSLstd450FMix:
        case GLSLstd450IMix:
        {
            emit_mix_op(result_type, id, args[0], args[1], args[2]);
            break;
        }
        case GLSLstd450Step:
            emit_binary_func_op(result_type, id, args[0], args[1], "step");
            break;
        case GLSLstd450SmoothStep:
            emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
            break;

        // Packing
        case GLSLstd450Frexp:
            register_call_out_argument(args[1]);
            forced_temporaries.insert(id);
            emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
            break;
        case GLSLstd450Ldexp:
            emit_binary_func_op(result_type, id, args[0], args[1], "ldexp");
            break;
        case GLSLstd450PackSnorm4x8:
            emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
            break;
        case GLSLstd450PackUnorm4x8:
            emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
            break;
        case GLSLstd450PackSnorm2x16:
            emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
            break;
        case GLSLstd450PackUnorm2x16:
            emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
            break;
        case GLSLstd450PackHalf2x16:
            emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
            break;
        case GLSLstd450UnpackSnorm4x8:
            emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
            break;
        case GLSLstd450UnpackUnorm4x8:
            emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
            break;
        case GLSLstd450UnpackSnorm2x16:
            emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
            break;
        case GLSLstd450UnpackUnorm2x16:
            emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
            break;
        case GLSLstd450UnpackHalf2x16:
            emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
            break;

        // Vector math
        case GLSLstd450Length:
            emit_unary_func_op(result_type, id, args[0], "length");
            break;
        case GLSLstd450Distance:
            emit_binary_func_op(result_type, id, args[0], args[1], "distance");
            break;
        case GLSLstd450Cross:
            emit_binary_func_op(result_type, id, args[0], args[1], "cross");
            break;
        case GLSLstd450Normalize:
            emit_unary_func_op(result_type, id, args[0], "normalize");
            break;
        case GLSLstd450FaceForward:
            emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
            break;
        case GLSLstd450Reflect:
            emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
            break;
        case GLSLstd450Refract:
            emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
            break;

        // Bit-fiddling
        case GLSLstd450FindILsb:
            emit_unary_func_op(result_type, id, args[0], "findLSB");
            break;
        case GLSLstd450FindSMsb:
        case GLSLstd450FindUMsb:
            emit_unary_func_op(result_type, id, args[0], "findMSB");
            break;

        // Multisampled varying
        case GLSLstd450InterpolateAtCentroid:
            emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
            break;
        case GLSLstd450InterpolateAtSample:
            emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
            break;
        case GLSLstd450InterpolateAtOffset:
            emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
            break;

        default:
            statement("// unimplemented GLSL op ", eop);
            break;
    }
}

string CompilerGLSL::bitcast_glsl_op(uint32_t result_type, uint32_t argument)
{
    auto &out_type = get<SPIRType>(result_type);
    auto &in_type = expression_type(argument);

    if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
        return type_to_glsl(out_type);
    else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
        return "floatBitsToUint";
    else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt)
        return type_to_glsl(out_type);
    else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
        return "floatBitsToInt";
    else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
        return "uintBitsToFloat";
    else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
        return "intBitsToFloat";
    else
        return "";
}

string CompilerGLSL::bitcast_glsl(uint32_t result_type, uint32_t argument)
{
    auto op = bitcast_glsl_op(result_type, argument);
    if (op.empty())
        return to_expression(argument);
    else
        return join(op, "(", to_expression(argument), ")");
}

string CompilerGLSL::builtin_to_glsl(BuiltIn builtin)
{
    switch (builtin)
    {
        case BuiltInPosition: return "gl_Position";
        case BuiltInPointSize: return "gl_PointSize";
        case BuiltInVertexId: return "gl_VertexID";
        case BuiltInInstanceId: return "gl_InstanceID";
        case BuiltInVertexIndex: return "gl_VertexID"; // gl_VertexID already has the base offset applied.
        case BuiltInInstanceIndex: return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
        case BuiltInPrimitiveId: return "gl_PrimitiveID";
        case BuiltInInvocationId: return "gl_InvocationID";
        case BuiltInLayer: return "gl_Layer";
        case BuiltInTessLevelOuter: return "gl_TessLevelOuter";
        case BuiltInTessLevelInner: return "gl_TessLevelInner";
        case BuiltInTessCoord: return "gl_TessCoord";
        case BuiltInFragCoord: return "gl_FragCoord";
        case BuiltInPointCoord: return "gl_PointCoord";
        case BuiltInFrontFacing: return "gl_FrontFacing";
        case BuiltInFragDepth: return "gl_FragDepth";
        case BuiltInNumWorkgroups: return "gl_NumWorkGroups";
        case BuiltInWorkgroupSize: return "gl_WorkGroupSize";
        case BuiltInWorkgroupId: return "gl_WorkGroupID";
        case BuiltInLocalInvocationId: return "gl_LocalInvocationID";
        case BuiltInGlobalInvocationId: return "gl_GlobalInvocationID";
        case BuiltInLocalInvocationIndex: return "gl_LocalInvocationIndex";
        default: return "gl_???";
    }
}

const char* CompilerGLSL::index_to_swizzle(uint32_t index)
{
    switch (index)
    {
        case 0: return "x";
        case 1: return "y";
        case 2: return "z";
        case 3: return "w";
        default: throw CompilerError("Swizzle index out of range");
    }
}

string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal, bool chain_only)
{
    string expr;
    if (!chain_only)
        expr = to_expression(base);

    const auto *type = &expression_type(base);

    // For resolving array accesses, etc, keep a local copy for poking.
    SPIRType temp;

    bool access_chain_is_arrayed = false;

    for (uint32_t i = 0; i < count; i++)
    {
        uint32_t index = indices[i];

        // Arrays
        if (!type->array.empty())
        {
            expr += "[";
            if (index_is_literal)
                expr += convert_to_string(index);
            else
                expr += to_expression(index);
            expr += "]";

            // We have to modify the type, so keep a local copy.
            if (&temp != type)
                temp = *type;
            type = &temp;
            temp.array.pop_back();

            access_chain_is_arrayed = true;
        }
        // For structs, the index refers to a constant, which indexes into the members.
        // We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
        else if (type->basetype == SPIRType::Struct)
        {
            if (!index_is_literal)
                index = get<SPIRConstant>(index).scalar();

            if (index >= type->member_types.size())
                throw CompilerError("Member index is out of bounds!");

            BuiltIn builtin;
            if (is_member_builtin(*type, index, &builtin))
            {
                // FIXME: We rely here on OpName on gl_in/gl_out to make this work properly.
                // To make this properly work by omitting all OpName opcodes,
                // we need to infer gl_in or gl_out based on the builtin, and stage.
                if (access_chain_is_arrayed)
                {
                    expr += ".";
                    expr += builtin_to_glsl(builtin);
                }
                else
                    expr = builtin_to_glsl(builtin);
            }
            else
            {
                expr += ".";
                expr += to_member_name(*type, index);
            }
            type = &get<SPIRType>(type->member_types[index]);
        }
        // Matrix -> Vector
        else if (type->columns > 1)
        {
            expr += "[";
            if (index_is_literal)
                expr += convert_to_string(index);
            else
                expr += to_expression(index);
            expr += "]";

            // We have to modify the type, so keep a local copy.
            if (&temp != type)
                temp = *type;
            type = &temp;
            temp.columns = 1;
        }
        // Vector -> Scalar
        else if (type->vecsize > 1)
        {
            if (index_is_literal)
            {
                expr += ".";
                expr += index_to_swizzle(index);
            }
            else if (ids[index].get_type() == TypeConstant)
            {
                auto &c = get<SPIRConstant>(index);
                expr += ".";
                expr += index_to_swizzle(c.scalar());
            }
            else
            {
                expr += "[";
                expr += to_expression(index);
                expr += "]";
            }

            // We have to modify the type, so keep a local copy.
            if (&temp != type)
                temp = *type;
            type = &temp;
            temp.vecsize = 1;
        }
        else
            throw CompilerError("Cannot subdivide a scalar value!");
    }

    return expr;
}

bool CompilerGLSL::should_forward(uint32_t id)
{
    return is_immutable(id) &&
        !options.force_temporary;
}

void CompilerGLSL::track_expression_read(uint32_t id)
{
    // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
    // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
    if (expression_is_forwarded(id))
    {
        auto &v = expression_usage_counts[id];
        v++;

        if (v >= 2)
        {
            //if (v == 2)
            //    fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);

            forced_temporaries.insert(id);
            // Force a recompile after this pass to avoid forwarding this variable.
            force_recompile = true;
        }
    }
}

bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
{
    if (forced_temporaries.find(id) != end(forced_temporaries))
        return false;

    for (uint32_t i = 0; i < num_args; i++)
        if (!should_forward(args[i]))
            return false;

    // We need to forward globals as well.
    if (!pure)
    {
        for (auto global : global_variables)
            if (!should_forward(global))
                return false;
        for (auto aliased : aliased_variables)
            if (!should_forward(aliased))
                return false;
    }

    return true;
}

void CompilerGLSL::register_impure_function_call()
{
    // Impure functions can modify globals and aliased variables, so invalidate them as well.
    for (auto global : global_variables)
        flush_dependees(get<SPIRVariable>(global));
    for (auto aliased : aliased_variables)
        flush_dependees(get<SPIRVariable>(aliased));
}

void CompilerGLSL::register_call_out_argument(uint32_t id)
{
    register_write(id);

    auto *var = maybe_get<SPIRVariable>(id);
    if (var)
        flush_variable_declaration(var->self);
}

void CompilerGLSL::flush_variable_declaration(uint32_t id)
{
    auto *var = maybe_get<SPIRVariable>(id);
    if (var && var->deferred_declaration)
    {
        statement(variable_decl(*var), ";");
        var->deferred_declaration = false;
    }
}

bool CompilerGLSL::remove_duplicate_swizzle(string &op)
{
    auto pos = op.find_last_of('.');
    if (pos == string::npos || pos == 0)
        return false;

    string final_swiz = op.substr(pos + 1, string::npos);

    if (backend.swizzle_is_function)
    {
        if (final_swiz.size() < 2)
            return false;

        if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
            final_swiz.erase(final_swiz.size() - 2, string::npos);
        else
            return false;
    }

    // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
    // If so, and previous swizzle is of same length,
    // we can drop the final swizzle altogether.
    for (uint32_t i = 0; i < final_swiz.size(); i++)
    {
        static const char expected[] = { 'x', 'y', 'z', 'w' };
        if (i >= 4 || final_swiz[i] != expected[i])
            return false;
    }

    auto prevpos = op.find_last_of('.', pos - 1);
    if (prevpos == string::npos)
        return false;

    prevpos++;

    // Make sure there are only swizzles here ...
    for (auto i = prevpos; i < pos; i++)
    {
        if (op[i] < 'w' || op[i] > 'z')
        {
            // If swizzles are foo.xyz() like in C++ backend for example, check for that.
            if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
                break;
            return false;
        }
    }

    // If original swizzle is large enough, just carve out the components we need.
    // E.g. foobar.wyx.xy will turn into foobar.wy.
    if (pos - prevpos >= final_swiz.size())
    {
        op.erase(prevpos + final_swiz.size(), string::npos);

        // Add back the function call ...
        if (backend.swizzle_is_function)
            op += "()";
    }
    return true;
}

// Optimizes away vector swizzles where we have something like
// vec3 foo;
// foo.xyz <-- swizzle expression does nothing.
// This is a very common pattern after OpCompositeCombine.
bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
{
    auto pos = op.find_last_of('.');
    if (pos == string::npos || pos == 0)
        return false;

    string final_swiz = op.substr(pos + 1, string::npos);

    if (backend.swizzle_is_function)
    {
        if (final_swiz.size() < 2)
            return false;

        if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
            final_swiz.erase(final_swiz.size() - 2, string::npos);
        else
            return false;
    }

    // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
    // If so, and previous swizzle is of same length,
    // we can drop the final swizzle altogether.
    for (uint32_t i = 0; i < final_swiz.size(); i++)
    {
        static const char expected[] = { 'x', 'y', 'z', 'w' };
        if (i >= 4 || final_swiz[i] != expected[i])
            return false;
    }

    auto &type = expression_type(base);

    // Sanity checking ...
    assert(type.columns == 1 && type.array.empty());

    if (type.vecsize == final_swiz.size())
        op.erase(pos, string::npos);
    return true;
}

string CompilerGLSL::build_composite_combiner(const uint32_t *elems, uint32_t length)
{
    uint32_t base = 0;
    bool swizzle_optimization = false;
    string op;

    for (uint32_t i = 0; i < length; i++)
    {
        auto *e = maybe_get<SPIRExpression>(elems[i]);

        // If we're merging another scalar which belongs to the same base
        // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
        if (e && e->base_expression && e->base_expression == base)
        {
            // Only supposed to be used for vector swizzle -> scalar.
            assert(!e->expression.empty() && e->expression.front() == '.');
            op += e->expression.substr(1, string::npos);
            swizzle_optimization = true;
        }
        else
        {
            // We'll likely end up with duplicated swizzles, e.g.
            // foobar.xyz.xyz from patterns like
            // OpVectorSwizzle
            // OpCompositeExtract x 3
            // OpCompositeConstruct 3x + other scalar.
            // Just modify op in-place.
            if (swizzle_optimization)
            {
                if (backend.swizzle_is_function)
                    op += "()";

                // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
                // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
                // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
                // Essentially, we can only remove one set of swizzles, since that's what we have control over ...
                // Case 1:
                //  foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
                //               foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
                // Case 2:
                //  foo.xyz: Duplicate swizzle won't kick in.
                //           If foo is vec3, we can remove xyz, giving just foo.
                if (!remove_duplicate_swizzle(op))
                    remove_unity_swizzle(base, op);
                swizzle_optimization = false;
            }

            if (i)
                op += ", ";
            op += to_expression(elems[i]);
        }

        base = e ? e->base_expression : 0;
    }

    if (swizzle_optimization)
    {
        if (backend.swizzle_is_function)
            op += "()";

        if (!remove_duplicate_swizzle(op))
            remove_unity_swizzle(base, op);
    }

    return op;
}

void CompilerGLSL::emit_instruction(const Instruction &i)
{
    auto ops = stream(i);
    auto op = static_cast<Op>(i.op);
    uint32_t length = i.length;

#define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
#define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
#define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
#define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
#define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)

    switch (op)
    {
        // Dealing with memory
        case OpLoad:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            uint32_t ptr = ops[2];

            flush_variable_declaration(ptr);

            // If we're loading from memory that cannot be changed by the shader,
            // just forward the expression directly to avoid needless temporaries.
            if (should_forward(ptr))
            {
                set<SPIRExpression>(id, to_expression(ptr), result_type, true);
                register_read(id, ptr, true);
            }
            else
            {
                // If the variable can be modified after this OpLoad, we cannot just forward the expression.
                // We must read it now and store it in a temporary.
                emit_op(result_type, id, to_expression(ptr), false, false);
                register_read(id, ptr, false);
            }
            break;
        }

        case OpInBoundsAccessChain:
        case OpAccessChain:
        {
            auto *var = maybe_get<SPIRVariable>(ops[2]);
            if (var)
                flush_variable_declaration(var->self);

            // If the base is immutable, the access chain pointer must also be.
            auto e = access_chain(ops[2], &ops[3], length - 3, false);
            auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], is_immutable(ops[2]));
            expr.loaded_from = ops[2];
            break;
        }

        case OpStore:
        {
            auto *var = maybe_get<SPIRVariable>(ops[0]);

            if (var && var->statically_assigned)
                var->static_expression = ops[1];
            else
            {
                auto lhs = to_expression(ops[0]);
                auto rhs = to_expression(ops[1]);

                // It is possible with OpLoad/OpCompositeInsert/OpStore that we get <expr> = <same-expr>.
                // For this case, we don't need to invalidate anything and emit any opcode.
                if (lhs != rhs)
                {
                    register_write(ops[0]);
                    statement(lhs, " = ", rhs, ";");
                }
            }
            break;
        }

        case OpArrayLength:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            auto e = access_chain(ops[2], &ops[3], length - 3, true);
            set<SPIRExpression>(id, e + ".length()", result_type, true);
            break;
        }

        // Function calls
        case OpFunctionCall:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            uint32_t func = ops[2];
            const auto *arg = &ops[3];
            length -= 3;

            auto &callee = get<SPIRFunction>(func);
            bool pure = function_is_pure(callee);

            bool callee_has_out_variables = false;

            // Invalidate out variables passed to functions since they can be OpStore'd to.
            for (uint32_t i = 0; i < length; i++)
            {
                if (callee.arguments[i].write_count)
                {
                    register_call_out_argument(arg[i]);
                    callee_has_out_variables = true;
                }

                flush_variable_declaration(arg[i]);
            }

            if (!pure)
                register_impure_function_call();

            string funexpr;
            funexpr += to_name(func) + "(";
            for (uint32_t i = 0; i < length; i++)
            {
                funexpr += to_expression(arg[i]);
                if (i + 1 < length)
                    funexpr += ", ";
            }
            funexpr += ")";

            if (get<SPIRType>(result_type).basetype != SPIRType::Void)
            {
                // If the function actually writes to an out variable,
                // take the conservative route and do not forward.
                // The problem is that we might not read the function
                // result (and emit the function) before an out variable
                // is read (common case when return value is ignored!
                // In order to avoid start tracking invalid variables,
                // just avoid the forwarding problem altogether.
                bool forward = args_will_forward(id, arg, length, pure) &&
                    !callee_has_out_variables && pure &&
                    (forced_temporaries.find(id) == end(forced_temporaries));

                emit_op(result_type, id, funexpr, forward, false);

                // Function calls are implicit loads from all variables in question.
                // Set dependencies for them.
                for (uint32_t i = 0; i < length; i++)
                    register_read(id, arg[i], forward);

                // If we're going to forward the temporary result,
                // put dependencies on every variable that must not change.
                if (forward)
                    register_global_read_dependencies(callee, id);
            }
            else
                statement(funexpr, ";");

            break;
        }

        // Composite munging
        case OpCompositeConstruct:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            const auto *elems = &ops[2];
            length -= 2;

            if (!length)
                throw CompilerError("Invalid input to OpCompositeConstruct.");

            bool forward = true;
            for (uint32_t i = 0; i < length; i++)
                forward = forward && should_forward(elems[i]);

            auto &in_type = expression_type(elems[0]);
            bool splat = in_type.vecsize == 1 && in_type.columns == 1;

            if (splat)
            {
                uint32_t input = elems[0];
                for (uint32_t i = 0; i < length; i++)
                    if (input != elems[i])
                        splat = false;
            }

            auto op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
            if (splat)
                op += to_expression(elems[0]);
            else
                op += build_composite_combiner(elems, length);
            op += ")";
            emit_op(result_type, id, op, forward, false);
            break;
        }

        case OpVectorInsertDynamic:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            uint32_t vec = ops[2];
            uint32_t comp = ops[3];
            uint32_t index = ops[4];

            flush_variable_declaration(vec);

            // Make a copy, then use access chain to store the variable.
            statement(declare_temporary(result_type, id), to_expression(vec), ";");
            set<SPIRExpression>(id, to_name(id), result_type, true);
            auto chain = access_chain(id, &index, 1, false);
            statement(chain, " = ", to_expression(comp), ";");
            break;
        }

        case OpVectorExtractDynamic:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];

            auto expr = access_chain(ops[2], &ops[3], 1, false);
            emit_op(result_type, id, expr, should_forward(ops[2]), false);
            break;
        }

        case OpCompositeExtract:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            length -= 3;

            auto &type = get<SPIRType>(result_type);

            // Only apply this optimization if result is scalar.
            if (should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
            {
                // We want to split the access chain from the base.
                // This is so we can later combine different CompositeExtract results
                // with CompositeConstruct without emitting code like
                //
                // vec3 temp = texture(...).xyz
                // vec4(temp.x, temp.y, temp.z, 1.0).
                //
                // when we actually wanted to emit this
                // vec4(texture(...).xyz, 1.0).
                //
                // Including the base will prevent this and would trigger multiple reads
                // from expression causing it to be forced to an actual temporary in GLSL.
                auto expr = access_chain(ops[2], &ops[3], length, true, true);
                auto &e = emit_op(result_type, id, expr, true, false,
                        !expression_is_forwarded(ops[2]));
                e.base_expression = ops[2];
            }
            else
            {
                auto expr = access_chain(ops[2], &ops[3], length, true);
                emit_op(result_type, id, expr, should_forward(ops[2]), false,
                        !expression_is_forwarded(ops[2]));
            }
            break;
        }

        case OpCompositeInsert:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            uint32_t obj = ops[2];
            uint32_t composite = ops[3];
            const auto *elems = &ops[4];
            length -= 4;

            flush_variable_declaration(composite);

            auto *expr = maybe_get<SPIRExpression>(id);
            if ((expr && expr->used_while_invalidated) || !should_forward(composite))
            {
                // Make a copy, then use access chain to store the variable.
                statement(declare_temporary(result_type, id), to_expression(composite), ";");
                set<SPIRExpression>(id, to_name(id), result_type, true);
                auto chain = access_chain(id, elems, length, true);
                statement(chain, " = ", to_expression(obj), ";");
            }
            else
            {
                auto chain = access_chain(composite, elems, length, true);
                statement(chain, " = ", to_expression(obj), ";");
                set<SPIRExpression>(id, to_expression(composite), result_type, true);

                register_write(composite);
                register_read(id, composite, true);
                // Invalidate the old expression we inserted into.
                invalid_expressions.insert(composite);
            }
            break;
        }

        case OpCopyObject:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            uint32_t rhs = ops[2];
            if (expression_is_lvalue(rhs))
            {
                // Need a copy.
                statement(declare_temporary(result_type, id), to_expression(rhs), ";");
                set<SPIRExpression>(id, to_name(id), result_type, true);
            }
            else
            {
                // RHS expression is immutable, so just forward it.
                // Copying these things really make no sense, but
                // seems to be allowed anyways.
                set<SPIRExpression>(id, to_expression(rhs), result_type, true);
            }
            break;
        }

        case OpVectorShuffle:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            uint32_t vec0 = ops[2];
            uint32_t vec1 = ops[3];
            const auto *elems = &ops[4];
            length -= 4;

            auto &type0 = expression_type(vec0);

            bool shuffle = false;
            for (uint32_t i = 0; i < length; i++)
                if (elems[i] >= type0.vecsize)
                    shuffle = true;

            string expr;
            bool trivial_forward;

            if (shuffle)
            {
                trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);

                // Constructor style and shuffling from two different vectors.
                vector<string> args;
                for (uint32_t i = 0; i < length; i++)
                {
                    if (elems[i] >= type0.vecsize)
                        args.push_back(join(to_expression(vec1), ".", index_to_swizzle(elems[i] - type0.vecsize)));
                    else
                        args.push_back(join(to_expression(vec0), ".", index_to_swizzle(elems[i])));
                }
                expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
            }
            else
            {
                trivial_forward = !expression_is_forwarded(vec0);

                // We only source from first vector, so can use swizzle.
                expr += to_expression(vec0);
                expr += ".";
                for (uint32_t i = 0; i < length; i++)
                    expr += index_to_swizzle(elems[i]);
                if (backend.swizzle_is_function && length > 1)
                    expr += "()";
            }

            // A shuffle is trivial in that it doesn't actually *do* anything.
            // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.

            emit_op(result_type, id, expr, should_forward(vec0) && should_forward(vec1), false, trivial_forward);
            break;
        }

        // ALU
        case OpIsNan:
            UFOP(isnan);
            break;

        case OpIsInf:
            UFOP(isinf);
            break;

        case OpSNegate:
        case OpFNegate:
            UOP(-);
            break;

        case OpIAdd:
        case OpFAdd:
            BOP(+);
            break;

        case OpISub:
        case OpFSub:
            BOP(-);
            break;

        case OpIMul:
        case OpFMul:
        case OpMatrixTimesVector:
        case OpMatrixTimesScalar:
        case OpVectorTimesScalar:
        case OpVectorTimesMatrix:
        case OpMatrixTimesMatrix:
            BOP(*);
            break;

        case OpOuterProduct:
            UFOP(outerProduct);
            break;

        case OpDot:
            BFOP(dot);
            break;

        case OpTranspose:
            UFOP(transpose);
            break;

        case OpSDiv:
        case OpUDiv:
        case OpFDiv:
            BOP(/);
            break;

        // Might need workaround if RightLocal can be used on signed types ...
        case OpShiftRightLogical:
        case OpShiftRightArithmetic:
            BOP(>>);
            break;

        case OpShiftLeftLogical:
            BOP(<<);
            break;

        case OpBitwiseOr:
            BOP(|);
            break;

        case OpBitwiseXor:
            BOP(^);
            break;

        case OpBitwiseAnd:
            BOP(&);
            break;

        case OpNot:
            UOP(~);
            break;

        case OpUMod:
        case OpSMod:
            BOP(%);
            break;

        case OpFMod:
            BFOP(mod);
            break;

        // Relational
        case OpAny:
            UFOP(any);
            break;

        case OpAll:
            UFOP(all);
            break;

        case OpSelect:
            emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
            break;

        case OpLogicalOr:
            BOP(||);
            break;

        case OpLogicalAnd:
            BOP(&&);
            break;

        case OpLogicalNot:
            UOP(!);
            break;

        case OpLogicalEqual:
        case OpIEqual:
        case OpFOrdEqual:
        {
            if (expression_type(ops[2]).vecsize > 1)
                BFOP(equal);
            else
                BOP(==);
            break;
        }

        case OpLogicalNotEqual:
        case OpINotEqual:
        case OpFOrdNotEqual:
        {
            if (expression_type(ops[2]).vecsize > 1)
                BFOP(notEqual);
            else
                BOP(!=);
            break;
        }

        case OpUGreaterThan:
        case OpSGreaterThan:
        case OpFOrdGreaterThan:
        {
            if (expression_type(ops[2]).vecsize > 1)
                BFOP(greaterThan);
            else
                BOP(>);
            break;
        }

        case OpUGreaterThanEqual:
        case OpSGreaterThanEqual:
        case OpFOrdGreaterThanEqual:
        {
            if (expression_type(ops[2]).vecsize > 1)
                BFOP(greaterThanEqual);
            else
                BOP(>=);
            break;
        }

        case OpULessThan:
        case OpSLessThan:
        case OpFOrdLessThan:
        {
            if (expression_type(ops[2]).vecsize > 1)
                BFOP(lessThan);
            else
                BOP(<);
            break;
        }

        case OpULessThanEqual:
        case OpSLessThanEqual:
        case OpFOrdLessThanEqual:
        {
            if (expression_type(ops[2]).vecsize > 1)
                BFOP(lessThanEqual);
            else
                BOP(<=);
            break;
        }

        // Conversion
        case OpConvertFToU:
        case OpConvertFToS:
        case OpConvertSToF:
        case OpConvertUToF:
        case OpUConvert:
        case OpSConvert:
        case OpFConvert:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];

            auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
            emit_unary_func_op(result_type, id, ops[2], func.c_str());
            break;
        }

        case OpBitcast:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            uint32_t arg = ops[2];

            auto op = bitcast_glsl_op(result_type, arg);
            emit_unary_func_op(result_type, id, arg, op.c_str());
            break;
        }

        // Derivatives
        case OpDPdx:
            UFOP(dFdx);
            break;

        case OpDPdy:
            UFOP(dFdy);
            break;

        case OpFwidth:
            UFOP(fwidth);
            break;

        // Bitfield
        case OpBitFieldInsert:
            QFOP(bitfieldInsert);
            break;

        case OpBitFieldSExtract:
        case OpBitFieldUExtract:
            QFOP(bitfieldExtract);
            break;

        case OpBitReverse:
            UFOP(bitfieldReverse);
            break;

        case OpBitCount:
            UFOP(bitCount);
            break;

        // Atomics
        case OpAtomicExchange:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            uint32_t ptr = ops[2];
            // Ignore semantics for now, probably only relevant to CL.
            uint32_t val = ops[5];
            const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
            forced_temporaries.insert(id);
            emit_binary_func_op(result_type, id, ptr, val, op);
            flush_all_atomic_capable_variables();
            break;
        }

        case OpAtomicCompareExchange:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            uint32_t ptr = ops[2];
            uint32_t val = ops[6];
            uint32_t comp = ops[7];
            const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";

            forced_temporaries.insert(id);
            emit_trinary_func_op(result_type, id, ptr, comp, val, op);
            flush_all_atomic_capable_variables();
            break;
        }

        case OpAtomicLoad:
            flush_all_atomic_capable_variables();
            // FIXME: Image?
            UFOP(atomicCounter);
            register_read(ops[1], ops[2], should_forward(ops[2]));
            break;

        // OpAtomicStore unimplemented. Not sure what would use that.
        // OpAtomicLoad seems to only be relevant for atomic counters.

        case OpAtomicIIncrement:
            forced_temporaries.insert(ops[1]);
            // FIXME: Image?
            UFOP(atomicCounterIncrement);
            flush_all_atomic_capable_variables();
            register_read(ops[1], ops[2], should_forward(ops[2]));
            break;

        case OpAtomicIDecrement:
            forced_temporaries.insert(ops[1]);
            // FIXME: Image?
            UFOP(atomicCounterDecrement);
            flush_all_atomic_capable_variables();
            register_read(ops[1], ops[2], should_forward(ops[2]));
            break;

        case OpAtomicIAdd:
        {
            const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
            forced_temporaries.insert(ops[1]);
            emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
            flush_all_atomic_capable_variables();
            register_read(ops[1], ops[2], should_forward(ops[2]));
            break;
        }

        case OpAtomicISub:
        {
            const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
            forced_temporaries.insert(ops[1]);
            auto expr = join(op, "(", to_expression(ops[2]), ", -", to_expression(ops[5]), ")");
            emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]), false);
            flush_all_atomic_capable_variables();
            register_read(ops[1], ops[2], should_forward(ops[2]));
            break;
        }

        case OpAtomicSMin:
        case OpAtomicUMin:
        {
            const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
            forced_temporaries.insert(ops[1]);
            emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
            flush_all_atomic_capable_variables();
            register_read(ops[1], ops[2], should_forward(ops[2]));
            break;
        }

        case OpAtomicSMax:
        case OpAtomicUMax:
        {
            const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
            forced_temporaries.insert(ops[1]);
            emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
            flush_all_atomic_capable_variables();
            register_read(ops[1], ops[2], should_forward(ops[2]));
            break;
        }

        case OpAtomicAnd:
        {
            const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
            forced_temporaries.insert(ops[1]);
            emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
            flush_all_atomic_capable_variables();
            register_read(ops[1], ops[2], should_forward(ops[2]));
            break;
        }

        case OpAtomicOr:
        {
            const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
            forced_temporaries.insert(ops[1]);
            emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
            flush_all_atomic_capable_variables();
            register_read(ops[1], ops[2], should_forward(ops[2]));
            break;
        }

        case OpAtomicXor:
        {
            const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
            forced_temporaries.insert(ops[1]);
            emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
            flush_all_atomic_capable_variables();
            register_read(ops[1], ops[2], should_forward(ops[2]));
            break;
        }

        // Geometry shaders
        case OpEmitVertex:
            statement("EmitVertex();");
            break;

        case OpEndPrimitive:
            statement("EndPrimitive();");
            break;

        case OpEmitStreamVertex:
            statement("EmitStreamVertex();");
            break;

        case OpEndStreamPrimitive:
            statement("EndStreamPrimitive();");
            break;

        // Textures
        case OpImageSampleImplicitLod:
        case OpImageSampleExplicitLod:
        case OpImageSampleProjImplicitLod:
        case OpImageSampleProjExplicitLod:
        case OpImageSampleDrefImplicitLod:
        case OpImageSampleDrefExplicitLod:
        case OpImageSampleProjDrefImplicitLod:
        case OpImageSampleProjDrefExplicitLod:
        case OpImageFetch:
        case OpImageGather:
        case OpImageDrefGather:
            // Gets a bit hairy, so move this to a separate instruction.
            emit_texture_op(i);
            break;

        case OpImage:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            emit_op(result_type, id, to_expression(ops[2]), true, false);
            break;
        }

        case OpSampledImage:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            emit_sampled_image_op(result_type, id, ops[2], ops[3]);
            break;
        }

        case OpImageQuerySizeLod:
            BFOP(textureSize);
            break;

        // Image load/store
        case OpImageRead:
        {
            // We added Nonreadable speculatively to the OpImage variable due to glslangValidator
            // not adding the proper qualifiers.
            // If it turns out we need to read the image after all, remove the qualifier and recompile.
            auto *var = maybe_get_backing_variable(ops[2]);
            if (var)
            {
                auto &flags = meta.at(var->self).decoration.decoration_flags;
                if (flags & (1ull << DecorationNonReadable))
                {
                    flags &= ~(1ull << DecorationNonReadable);
                    force_recompile = true;
                }
            }

            uint32_t result_type = ops[0];
            uint32_t id = ops[1];

            bool pure;
            string imgexpr;
            auto &type = expression_type(ops[2]);

            if (var && var->remapped_variable) // PLS input, just read as-is without any op-code
            {
                // PLS input could have different number of components than what the SPIR expects, swizzle to
                // the appropriate vector size.
                auto itr = find_if(begin(pls_inputs), end(pls_inputs),
                        [var](const PlsRemap &pls) { return pls.id == var->self; });

                if (itr == end(pls_inputs))
                    throw CompilerError("Found PLS remap for OpImageRead, but ID is not a PLS input ...");

                uint32_t components = pls_format_to_components(itr->format);
                imgexpr = remap_swizzle(result_type, components, ops[2]);
                pure = true;
            }
            else if (type.image.dim == DimSubpassData)
            {
                // Implement subpass loads via texture barrier style sampling.
                // Fairly ugly, but should essentially work as a fallback for desktop.
                imgexpr = join("texture(", to_expression(ops[2]),
                        ", gl_FragCoord.xy / vec2(textureSize(", to_expression(ops[2]), ", 0).xy))");
                pure = true;
            }
            else
            {
                // Plain image load/store.
                imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", to_expression(ops[3]), ")");
                pure = false;
            }

            if (var && var->forwardable)
            {
                auto &e = emit_op(result_type, id, imgexpr, true, false);

                // We only need to track dependencies if we're reading from image load/store.
                if (!pure)
                {
                    e.loaded_from = var->self;
                    var->dependees.push_back(id);
                }
            }
            else
                emit_op(result_type, id, imgexpr, false, false);
            break;
        }

        case OpImageTexelPointer:
        {
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];
            auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true);

            auto *var = maybe_get_backing_variable(ops[2]);
            e.loaded_from = var ? var->self : 0;
            break;
        }

        case OpImageWrite:
        {
            // We added Nonwritable speculatively to the OpImage variable due to glslangValidator
            // not adding the proper qualifiers.
            // If it turns out we need to write to the image after all, remove the qualifier and recompile.
            auto *var = maybe_get_backing_variable(ops[0]);
            if (var)
            {
                auto &flags = meta.at(var->self).decoration.decoration_flags;
                if (flags & (1ull << DecorationNonWritable))
                {
                    flags &= ~(1ull << DecorationNonWritable);
                    force_recompile = true;
                }
            }

            statement("imageStore(",
                    to_expression(ops[0]), ", ",
                    to_expression(ops[1]), ", ",
                    to_expression(ops[2]), ");");

            if (var && variable_storage_is_aliased(*var))
                flush_all_aliased_variables();
            break;
        }

        case OpImageQuerySize:
        {
            auto &type = expression_type(ops[2]);
            uint32_t result_type = ops[0];
            uint32_t id = ops[1];

            if (type.basetype == SPIRType::Image)
            {
                // The size of an image is always constant.
                emit_op(result_type, id, join("imageSize(", to_expression(ops[2]), ")"),
                        true, false);
            }
            else
                throw CompilerError("Invalid type for OpImageQuerySize.");
            break;
        }

        // Compute
        case OpControlBarrier:
        {
            // Ignore execution and memory scope.
            if (execution.model == ExecutionModelGLCompute)
            {
                uint32_t mem = get<SPIRConstant>(ops[2]).scalar();
                if (mem == MemorySemanticsWorkgroupMemoryMask)
                    statement("memoryBarrierShared();");
                else if (mem && mem != 4062) // Hacky workaround of glslangValidator which emits "all" barrier for barrier() ...
                    statement("memoryBarrier();");
            }
            statement("barrier();");
            break;
        }

        case OpMemoryBarrier:
        {
            uint32_t mem = get<SPIRConstant>(ops[1]).scalar();

            // We cannot forward any loads beyond the memory barrier.
            if (mem)
                flush_all_active_variables();

            if (mem == MemorySemanticsWorkgroupMemoryMask)
                statement("memoryBarrierShared();");
            else if (mem && mem != 4062) // Hacky workaround of glslangValidator which emits "all" barrier for barrier() ...
                statement("memoryBarrier();");
            break;
        }

        case OpExtInst:
        {
            uint32_t set = ops[2];
            if (get<SPIRExtension>(set).ext != SPIRExtension::GLSL)
            {
                statement("// unimplemented ext op ", i.op);
                break;
            }

            emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
            break;
        }

        default:
            statement("// unimplemented op ", i.op);
            break;
    }
}

string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
{
    auto &memb = meta[type.self].members;
    if (index < memb.size() && !memb[index].alias.empty())
        return memb[index].alias;
    else
        return join("_", index);
}

string CompilerGLSL::member_decl(const SPIRType &type, const SPIRType &membertype, uint32_t index)
{
    uint64_t memberflags = 0;
    auto &memb = meta[type.self].members;
    if (index < memb.size())
        memberflags = memb[index].decoration_flags;

    return join(layout_for_member(type, index),
            flags_to_precision_qualifiers_glsl(membertype, memberflags),
            type_to_glsl(membertype), " ", to_member_name(type, index), type_to_array_glsl(membertype));
}

const char* CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags)
{
    if (options.es)
    {
        // Structs do not have precision qualifiers.
        if (type.basetype != SPIRType::Float &&
                type.basetype != SPIRType::Int &&
                type.basetype != SPIRType::UInt &&
                type.basetype != SPIRType::Image &&
                type.basetype != SPIRType::SampledImage &&
                type.basetype != SPIRType::Sampler)
            return "";

        if (flags & (1ull << DecorationRelaxedPrecision))
        {
            bool implied_fmediump = type.basetype == SPIRType::Float &&
                                    options.fragment.default_float_precision == Options::Mediump &&
                                    execution.model == ExecutionModelFragment;

            bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
                                    options.fragment.default_int_precision == Options::Mediump &&
                                    execution.model == ExecutionModelFragment;

            return implied_fmediump || implied_imediump ? "" : "mediump ";
        }
        else
        {
            bool implied_fhighp = type.basetype == SPIRType::Float &&
                                    ((options.fragment.default_float_precision == Options::Highp &&
                                    execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment));

            bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
                                    ((options.fragment.default_int_precision == Options::Highp &&
                                    execution.model == ExecutionModelFragment) || (execution.model != ExecutionModelFragment));

            return implied_fhighp || implied_ihighp ? "" : "highp ";
        }
    }
    else
        return "";
}

const char* CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
{
    return flags_to_precision_qualifiers_glsl(expression_type(id), meta[id].decoration.decoration_flags);
}

string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
{
    auto flags = meta[id].decoration.decoration_flags;
    string res;

    auto *var = maybe_get<SPIRVariable>(id);

    if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
        res += "shared ";

    res += to_precision_qualifiers_glsl(id);

    //if (flags & (1ull << DecorationSmooth))
    //    res += "smooth ";
    if (flags & (1ull << DecorationFlat))
        res += "flat ";
    if (flags & (1ull << DecorationNoPerspective))
        res += "noperspective ";
    if (flags & (1ull << DecorationPatch))
        res += "patch ";
    if (flags & (1ull << DecorationSample))
        res += "sample ";
    if (flags & (1ull << DecorationInvariant))
        res += "invariant ";

    auto &type = expression_type(id);
    if (type.image.dim != DimSubpassData && type.image.sampled == 2)
    {
        if (flags & (1ull << DecorationNonWritable))
            res += "readonly ";
        if (flags & (1ull << DecorationNonReadable))
            res += "writeonly ";
    }

    return res;
}

string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
{
    // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
    // Not sure if argument being pointer type should make the argument inout.
    auto &type = expression_type(arg.id);
    const char *direction = "";

    if (type.pointer)
    {
        if (arg.write_count && arg.read_count)
            direction = "inout ";
        else if (arg.write_count)
            direction = "out ";
    }

    return join(direction, to_qualifiers_glsl(arg.id), type_to_glsl(type), " ",
            to_name(arg.id), type_to_array_glsl(type));
}

string CompilerGLSL::variable_decl(const SPIRVariable &variable)
{
    // Ignore the pointer type since GLSL doesn't have pointers.
    auto &type = get<SPIRType>(variable.basetype);
    auto res = join(to_qualifiers_glsl(variable.self), type_to_glsl(type), " ", to_name(variable.self), type_to_array_glsl(type));
    if (variable.initializer)
        res += join(" = ", to_expression(variable.initializer));
    return res;
}

const char* CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
{
    auto flags = meta[variable.self].decoration.decoration_flags;
    if (flags & (1ull << DecorationRelaxedPrecision))
        return "mediump ";
    else
        return "highp ";
}

string CompilerGLSL::pls_decl(const PlsRemap &var)
{
    auto &variable = get<SPIRVariable>(var.id);

    SPIRType type;
    type.vecsize = pls_format_to_components(var.format);
    type.basetype = pls_format_to_basetype(var.format);

    return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", to_name(variable.self));
}

string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
{
    string res;
    for (auto &size : type.array)
    {
        res += "[";
        if (size)
            res += convert_to_string(size);
        res += "]";
    }
    return res;
}

string CompilerGLSL::image_type_glsl(const SPIRType &type)
{
    auto &imagetype = get<SPIRType>(type.image.type);
    string res;

    switch (imagetype.basetype)
    {
        case SPIRType::Int: res = "i"; break;
        case SPIRType::UInt: res = "u"; break;
        default: break;
    }

    // If we're emulating subpassInput with samplers, force sampler2D
    // so we don't have to specify format.
    if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
        res += type.image.sampled == 2 ? "image" : "texture";
    else
        res += "sampler";

    switch (type.image.dim)
    {
        case Dim1D: res += "1D"; break;
        case Dim2D: res += "2D"; break;
        case Dim3D: res += "3D"; break;
        case DimCube: res += "Cube"; break;

        case DimBuffer:
            if (options.es && options.version < 320)
                require_extension("GL_OES_texture_buffer");
            else if (!options.es && options.version < 300)
                require_extension("GL_EXT_texture_buffer_object");
            res += "Buffer";
            break;

        case DimSubpassData: res += "2D"; break;
        default: throw CompilerError("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported.");
    }

    if (type.image.arrayed)
        res += "Array";
    if (type.image.depth)
        res += "Shadow";
    if (type.image.ms)
        res += "MS";

    return res;
}

string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
{
    auto e = type_to_glsl(type);
    for (uint32_t i = 0; i < type.array.size(); i++)
        e += "[]";
    return e;
}

string CompilerGLSL::type_to_glsl(const SPIRType &type)
{
    // Ignore the pointer type since GLSL doesn't have pointers.

    switch (type.basetype)
    {
        case SPIRType::Struct:
            // Need OpName lookup here to get a "sensible" name for a struct.
            return to_name(type.self);

        case SPIRType::Image:
        case SPIRType::SampledImage:
            return image_type_glsl(type);

        case SPIRType::Sampler:
            // Not really used.
            return "sampler";

        case SPIRType::Void:
            return "void";

        default:
            break;
    }

    if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
    {
        switch (type.basetype)
        {
            case SPIRType::Bool: return "bool";
            case SPIRType::Int: return backend.basic_int_type;
            case SPIRType::UInt: return backend.basic_uint_type;
            case SPIRType::AtomicCounter: return "atomic_uint";
            case SPIRType::Float: return "float";
            default: return "???";
        }
    }
    else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
    {
        switch (type.basetype)
        {
            case SPIRType::Bool: return join("bvec", type.vecsize);
            case SPIRType::Int: return join("ivec", type.vecsize);
            case SPIRType::UInt: return join("uvec", type.vecsize);
            case SPIRType::Float: return join("vec", type.vecsize);
            default: return "???";
        }
    }
    else if (type.vecsize == type.columns) // Simple Matrix builtin
    {
        switch (type.basetype)
        {
            case SPIRType::Bool: return join("bmat", type.vecsize);
            case SPIRType::Int: return join("imat", type.vecsize);
            case SPIRType::UInt: return join("umat", type.vecsize);
            case SPIRType::Float: return join("mat", type.vecsize);
            default: return "???";
        }
    }
    else
    {
        switch (type.basetype)
        {
            case SPIRType::Bool: return join("bmat", type.columns, "x", type.vecsize);
            case SPIRType::Int: return join("imat", type.columns, "x", type.vecsize);
            case SPIRType::UInt: return join("umat", type.columns, "x", type.vecsize);
            case SPIRType::Float: return join("mat", type.columns, "x", type.vecsize);
            default: return "???";
        }
    }
}

void CompilerGLSL::add_local_variable(uint32_t id)
{
    auto &name = meta[id].decoration.alias;
    if (name.empty())
        return;

    // Reserved for temporaries.
    if (name[0] == '_')
    {
        name.clear();
        return;
    }

    update_name_cache(local_variables, name);
}

void CompilerGLSL::require_extension(const string &ext)
{
    if (forced_extensions.find(ext) == end(forced_extensions))
    {
        forced_extensions.insert(ext);
        force_recompile = true;
    }
}

bool CompilerGLSL::check_atomic_image(uint32_t id)
{
    auto &type = expression_type(id);
    if (type.storage == StorageClassImage)
    {
        if (options.es && options.version < 320)
            require_extension("GL_OES_shader_image_atomic");

        auto *var = maybe_get_backing_variable(id);
        if (var)
        {
            auto &flags = meta.at(var->self).decoration.decoration_flags;
            if (flags & ((1ull << DecorationNonWritable) | (1ull << DecorationNonReadable)))
            {
                flags &= ~(1ull << DecorationNonWritable);
                flags &= ~(1ull << DecorationNonReadable);
                force_recompile = true;
            }
        }
        return true;
    }
    else
        return false;
}

void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_flags)
{
    local_variables.clear();
    string decl;

    auto &type = get<SPIRType>(func.return_type);
    decl += flags_to_precision_qualifiers_glsl(type, return_flags);
    decl += type_to_glsl(type);
    decl += " ";

    if (func.self == execution.entry_point)
    {
        decl += "main";
        processing_entry_point = true;
    }
    else
        decl += to_name(func.self);

    decl += "(";
    for (auto &arg : func.arguments)
    {
        // Might change the variable name if it already exists in this function.
        // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
        // to use same name for variables.
        // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
        add_local_variable(arg.id);

        decl += argument_decl(arg);
        if (&arg != &func.arguments.back())
            decl += ", ";

        // Hold a pointer to the parameter so we can invalidate the readonly field if needed.
        auto *var = maybe_get<SPIRVariable>(arg.id);
        if (var)
            var->parameter = &arg;
    }

    decl += ")";
    statement(decl);
}

void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags)
{
    // Avoid potential cycles.
    if (func.active)
        return;
    func.active = true;

    // If we depend on a function, emit that function before we emit our own function.
    for (auto block : func.blocks)
    {
        auto &b = get<SPIRBlock>(block);
        for (auto &i : b.ops)
        {
            auto ops = stream(i);
            auto op = static_cast<Op>(i.op);

            if (op == OpFunctionCall)
            {
                // Recursively emit functions which are called.
                uint32_t func = ops[2];
                emit_function(get<SPIRFunction>(func), meta[ops[1]].decoration.decoration_flags);
            }
        }
    }

    emit_function_prototype(func, return_flags);
    begin_scope();

    function = &func;

    for (auto &v : func.local_variables)
    {
        auto &var = get<SPIRVariable>(v);
        if (expression_is_lvalue(v))
        {
            add_local_variable(var.self);

            if (var.initializer)
                statement(variable_decl(var), ";");
            else
            {
                // Don't declare variable until first use to declutter the GLSL output quite a lot.
                // If we don't touch the variable before first branch,
                // declare it then since we need variable declaration to be in top scope.
                var.deferred_declaration = true;
            }
        }
        else
        {
            // HACK: SPIRV likes to use samplers and images as local variables, but GLSL does not allow
            // this. For these types (non-lvalue), we enforce forwarding through a shadowed variable.
            // This means that when we OpStore to these variables, we just write in the expression ID directly.
            // This breaks any kind of branching, since the variable must be statically assigned.
            // Branching on samplers and images would be pretty much impossible to fake in GLSL.
            var.statically_assigned = true;
        }
    }

    auto &entry_block = get<SPIRBlock>(func.entry_block);
    entry_block.loop_dominator = SPIRBlock::NoDominator;
    emit_block_chain(entry_block);

    end_scope();
    processing_entry_point = false;
    statement("");
}

void CompilerGLSL::emit_fixup()
{
    if (execution.model == ExecutionModelVertex && options.vertex.fixup_clipspace)
    {
        const char *suffix = backend.float_literal_suffix ? "f" : "";
        statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
    }
}

bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to)
{
    auto &child = get<SPIRBlock>(to);
    for (auto &phi : child.phi_variables)
        if (phi.parent == from)
            return true;
    return false;
}

void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
{
    auto &child = get<SPIRBlock>(to);

    for (auto &phi : child.phi_variables)
        if (phi.parent == from)
            statement(to_expression(phi.function_variable), " = ", to_expression(phi.local_variable), ";");
}

void CompilerGLSL::branch(uint32_t from, uint32_t to)
{
    flush_phi(from, to);
    flush_all_active_variables();

    // This is only a continue if we branch to our loop dominator.
    if (loop_block.find(to) != end(loop_block) &&
        get<SPIRBlock>(from).loop_dominator == to)
    {
        // This can happen if we had a complex continue block which was emitted.
        // Once the continue block tries to branch to the loop header, just emit continue;
        // and end the chain here.
        statement("continue;");
    }
    else if (is_continue(to))
    {
        auto &continue_block = get<SPIRBlock>(to);
        if (continue_block.complex_continue)
        {
            // Just emit the whole block chain as is.
            auto usage_counts = expression_usage_counts;
            auto invalid = invalid_expressions;

            emit_block_chain(continue_block);

            // Expression usage counts and invalid expressions
            // are moot after returning from the continue block.
            // Since we emit the same block multiple times,
            // we don't want to invalidate ourselves.
            expression_usage_counts = usage_counts;
            invalid_expressions = invalid;
        }
        else
        {
            auto &block = get<SPIRBlock>(from);
            auto &dominator = get<SPIRBlock>(block.loop_dominator);

            // For non-complex continue blocks, we implicitly branch to the continue block
            // by having the continue block be part of the loop header in for (; ; continue-block).
            bool outside_control_flow = block_is_outside_flow_control_from_block(dominator, block);

            // Some simplification for for-loops. We always end up with a useless continue;
            // statement since we branch to a loop block.
            // Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
            // we can avoid writing out an explicit continue statement.
            // Similar optimization to return statements if we know we're outside flow control.
            if (!outside_control_flow)
                statement("continue;");
        }
    }
    else if (is_break(to))
        statement("break;");
    else if (!is_conditional(to))
        emit_block_chain(get<SPIRBlock>(to));
}

void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block)
{
    // If we branch directly to a selection merge target, we don't really need a code path.
    bool true_sub = !is_conditional(true_block);
    bool false_sub = !is_conditional(false_block);

    if (true_sub)
    {
        statement("if (", to_expression(cond), ")");
        begin_scope();
        branch(from, true_block);
        end_scope();

        if (false_sub)
        {
            statement("else");
            begin_scope();
            branch(from, false_block);
            end_scope();
        }
        else if (flush_phi_required(from, false_block))
        {
            statement("else");
            begin_scope();
            flush_phi(from, false_block);
            end_scope();
        }
    }
    else if (false_sub && !true_sub)
    {
        // Only need false path, use negative conditional.
        statement("if (!", to_expression(cond), ")");
        begin_scope();
        branch(from, false_block);
        end_scope();

        if (flush_phi_required(from, true_block))
        {
            statement("else");
            begin_scope();
            flush_phi(from, true_block);
            end_scope();
        }
    }
}

void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block)
{
    // Propagate down the loop dominator block, so that dominated blocks can back trace.
    if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator)
    {
        uint32_t dominator = block.merge == SPIRBlock::MergeLoop ?
            block.self : block.loop_dominator;

        auto set_dominator = [this](uint32_t self, uint32_t dominator) {
            auto &block = this->get<SPIRBlock>(self);

            // If we already have a loop dominator, we're trying to break out to merge targets
            // which should not update the loop dominator.
            if (!block.loop_dominator)
                block.loop_dominator = dominator;
        };

        // After merging a loop, we inherit the loop dominator always.
        if (block.merge_block)
            set_dominator(block.merge_block, block.loop_dominator);

        if (block.true_block)
            set_dominator(block.true_block, dominator);
        if (block.false_block)
            set_dominator(block.false_block, dominator);
        if (block.next_block)
            set_dominator(block.next_block, dominator);

        for (auto &c : block.cases)
            set_dominator(c.block, dominator);

        // In older glslang output continue_block can be == loop header.
        if (block.continue_block && block.continue_block != block.self)
            set_dominator(block.continue_block, dominator);
    }
}

// FIXME: This currently cannot handle complex continue blocks
// as in do-while.
// This should be seen as a "trivial" continue block.
string CompilerGLSL::emit_continue_block(uint32_t continue_block)
{
    auto *block = &get<SPIRBlock>(continue_block);

    // While emitting the continue block, declare_temporary will check this
    // if we have to emit temporaries.
    current_continue_block = block;

    vector<string> statements;

    // Capture all statements into our list.
    auto *old = redirect_statement;
    redirect_statement = &statements;

    // Stamp out all blocks one after each other.
    while (loop_block.find(block->self) == end(loop_block))
    {
        propagate_loop_dominators(*block);
        // Write out all instructions we have in this block.
        for (auto &op : block->ops)
            emit_instruction(op);

        // For plain branchless for/while continue blocks.
        if (block->next_block)
        {
            flush_phi(continue_block, block->next_block);
            block = &get<SPIRBlock>(block->next_block);
        }
        // For do while blocks. The last block will be a select block.
        else if (block->true_block)
        {
            flush_phi(continue_block, block->true_block);
            block = &get<SPIRBlock>(block->true_block);
        }
    }

    // Restore old pointer.
    redirect_statement = old;

    // Somewhat ugly, strip off the last ';' since we use ',' instead.
    // Ideally, we should select this behavior in statement().
    for (auto &statement : statements)
    {
        if (!statement.empty() && statement.back() == ';')
            statement.pop_back();
    }

    current_continue_block = nullptr;
    return merge(statements);
}

bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
{
    SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));

    if (method == SPIRBlock::MergeToSelectForLoop)
    {
        uint32_t current_count = statement_count;
        // If we're trying to create a true for loop,
        // we need to make sure that all opcodes before branch statement do not actually emit any code.
        // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
        for (auto &op : block.ops)
            emit_instruction(op);

        bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);

        // This can work! We only did trivial things which could be forwarded in block body!
        if (current_count == statement_count && condition_is_temporary)
        {
            switch (continue_type)
            {
                case SPIRBlock::ForLoop:
                    statement("for (; ", to_expression(block.condition), "; ",
                            emit_continue_block(block.continue_block), ")");
                    break;

                case SPIRBlock::WhileLoop:
                    statement("while (", to_expression(block.condition), ")");
                    break;

                default:
                    throw CompilerError("For/while loop detected, but need while/for loop semantics.");
            }

            begin_scope();
            return true;
        }
        else
        {
            block.disable_block_optimization = true;
            force_recompile = true;
            begin_scope(); // We'll see an end_scope() later.
            return false;
        }
    }
    else if (method == SPIRBlock::MergeToDirectForLoop)
    {
        uint32_t current_count = statement_count;
        auto &child = get<SPIRBlock>(block.next_block);

        // If we're trying to create a true for loop,
        // we need to make sure that all opcodes before branch statement do not actually emit any code.
        // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
        for (auto &op : child.ops)
            emit_instruction(op);

        bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);

        if (current_count == statement_count && condition_is_temporary)
        {
            propagate_loop_dominators(child);

            switch (continue_type)
            {
                case SPIRBlock::ForLoop:
                    statement("for (; ", to_expression(child.condition), "; ",
                            emit_continue_block(block.continue_block), ")");
                    break;

                case SPIRBlock::WhileLoop:
                    statement("while (", to_expression(child.condition), ")");
                    break;

                default:
                    throw CompilerError("For/while loop detected, but need while/for loop semantics.");
            }

            begin_scope();
            branch(child.self, child.true_block);
            return true;
        }
        else
        {
            block.disable_block_optimization = true;
            force_recompile = true;
            begin_scope(); // We'll see an end_scope() later.
            return false;
        }
    }
    else
        return false;
}

void CompilerGLSL::flush_undeclared_variables()
{
    // Declare undeclared variables.
    if (function->flush_undeclared)
    {
        for (auto &v : function->local_variables)
        {
            auto &var = get<SPIRVariable>(v);
            if (var.deferred_declaration)
                statement(variable_decl(var), ";");
            var.deferred_declaration = false;
        }
        function->flush_undeclared = false;
    }
}

void CompilerGLSL::emit_block_chain(SPIRBlock &block)
{
    propagate_loop_dominators(block);

    bool select_branch_to_true_block = false;
    bool skip_direct_branch = false;

    // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
    for (auto &tmp : block.declare_temporary)
    {
        auto flags = meta[tmp.second].decoration.decoration_flags;
        auto &type = get<SPIRType>(tmp.first);
        statement(flags_to_precision_qualifiers_glsl(type, flags),
                  type_to_glsl(type), " ", to_name(tmp.second), type_to_array_glsl(type), ";");
    }

    SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
    if (block.continue_block)
        continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));

    // This is the older loop behavior in glslang which branches to loop body directly from the loop header.
    if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
    {
        flush_undeclared_variables();
        if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
        {
            // The body of while, is actually just the true block, so always branch there
            // unconditionally.
            select_branch_to_true_block = true;
        }
    }
    // This is the newer loop behavior in glslang which branches from Loop header directly to
    // a new block, which in turn has a OpBranchSelection without a selection merge.
    else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
    {
        flush_undeclared_variables();
        if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
            skip_direct_branch = true;
    }
    else if (continue_type == SPIRBlock::DoWhileLoop)
    {
        statement("do");
        begin_scope();
        for (auto &op : block.ops)
           emit_instruction(op);
    }
    else if (block.merge == SPIRBlock::MergeLoop)
    {
        flush_undeclared_variables();

        // We have a generic loop without any distinguishable pattern like for, while or do while.
        get<SPIRBlock>(block.continue_block).complex_continue = true;
        continue_type = SPIRBlock::ComplexLoop;

        statement("for (;;)");
        begin_scope();
        for (auto &op : block.ops)
           emit_instruction(op);
    }
    else
    {
        for (auto &op : block.ops)
           emit_instruction(op);
    }

    bool emit_next_block = true;

    // Handle end of block.
    switch (block.terminator)
    {
        case SPIRBlock::Direct:
            // True when emitting complex continue block.
            if (block.loop_dominator == block.next_block)
            {
                branch(block.self, block.next_block);
                emit_next_block = false;
            }
            // True if MergeToDirectForLoop succeeded.
            else if (skip_direct_branch)
                emit_next_block = false;
            else if (is_continue(block.next_block) ||
                is_break(block.next_block) ||
                is_conditional(block.next_block))
            {
                branch(block.self, block.next_block);
                emit_next_block = false;
            }
            break;

        case SPIRBlock::Select:
            // True if MergeToSelectForLoop succeeded.
            if (select_branch_to_true_block)
                branch(block.self, block.true_block);
            else
            {
                flush_undeclared_variables();
                branch(block.self, block.condition, block.true_block, block.false_block);
            }
            break;

        case SPIRBlock::MultiSelect:
        {
            flush_undeclared_variables();
            auto &type = expression_type(block.condition);
            bool uint32_t_case = type.basetype == SPIRType::UInt;

            statement("switch (", to_expression(block.condition), ")");
            begin_scope();

            for (auto &c : block.cases)
            {
                auto case_value = uint32_t_case ? convert_to_string(uint32_t(c.value)) : convert_to_string(int32_t(c.value));
                statement("case ", case_value, ":");
                begin_scope();
                branch(block.self, c.block);
                end_scope();
            }

            if (block.default_block != block.next_block)
            {
                statement("default:");
                begin_scope();
                if (is_break(block.default_block))
                    throw CompilerError("Cannot break; out of a switch statement and out of a loop at the same time ...");
                branch(block.self, block.default_block);
                end_scope();
            }
            else if (flush_phi_required(block.self, block.next_block))
            {
                statement("default:");
                begin_scope();
                flush_phi(block.self, block.next_block);
                statement("break;");
                end_scope();
            }

            end_scope();
            break;
        }

        case SPIRBlock::Return:
            if (processing_entry_point)
                emit_fixup();

            if (block.return_value)
            {
                // OpReturnValue can return Undef, so don't emit anything for this case.
                if (ids.at(block.return_value).get_type() != TypeUndef)
                    statement("return ", to_expression(block.return_value), ";");
            }
            // If this block is the very final block and not called from control flow,
            // we do not need an explicit return which looks out of place. Just end the function here.
            // In the very weird case of for(;;) { return; } executing return is unconditional,
            // but we actually need a return here ...
            else if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(function->entry_block), block) ||
                    block.loop_dominator != SPIRBlock::NoDominator)
                statement("return;");
            break;

        case SPIRBlock::Kill:
            statement("discard;");
            break;

        default:
            throw CompilerError("Unimplemented block terminator.");
    }

    if (block.next_block && emit_next_block)
    {
        // If we hit this case, we're dealing with an unconditional branch, which means we will output
        // that block after this. If we had selection merge, we already flushed phi variables.
        if (block.merge != SPIRBlock::MergeSelection)
            flush_phi(block.self, block.next_block);
        emit_block_chain(get<SPIRBlock>(block.next_block));
    }

    if (block.merge == SPIRBlock::MergeLoop)
    {
        if (continue_type == SPIRBlock::DoWhileLoop)
        {
            // Make sure that we run the continue block to get the expressions set, but this
            // should become an empty string.
            // We have no fallbacks if we cannot forward everything to temporaries ...
            auto statements = emit_continue_block(block.continue_block);
            if (!statements.empty())
            {
                // The DoWhile block has side effects, force ComplexLoop pattern next pass.
                get<SPIRBlock>(block.continue_block).complex_continue = true;
                force_recompile = true;
            }

            end_scope_decl(join("while (", to_expression(get<SPIRBlock>(block.continue_block).condition), ")"));
        }
        else
            end_scope();

        flush_phi(block.self, block.merge_block);
        emit_block_chain(get<SPIRBlock>(block.merge_block));
    }
}

void CompilerGLSL::begin_scope()
{
    statement("{");
    indent++;
}

void CompilerGLSL::end_scope()
{
    if (!indent)
        throw CompilerError("Popping empty indent stack.");
    indent--;
    statement("}");
}

void CompilerGLSL::end_scope_decl()
{
    if (!indent)
        throw CompilerError("Popping empty indent stack.");
    indent--;
    statement("};");
}

void CompilerGLSL::end_scope_decl(const string &decl)
{
    if (!indent)
        throw CompilerError("Popping empty indent stack.");
    indent--;
    statement("} ", decl, ";");
}