SPIRV-Cross/spirv_glsl.cpp

7788 lines
224 KiB
C++
Raw Normal View History

2016-03-02 17:09:16 +00:00
/*
2017-01-28 08:00:40 +00:00
* Copyright 2015-2017 ARM Limited
2016-03-02 17:09:16 +00:00
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "spirv_glsl.hpp"
2016-03-02 17:09:16 +00:00
#include "GLSL.std.450.h"
#include "spirv_common.hpp"
2016-03-02 17:09:16 +00:00
#include <algorithm>
#include <assert.h>
#include <utility>
2016-03-02 17:09:16 +00:00
using namespace spv;
using namespace spirv_cross;
2016-03-02 17:09:16 +00:00
using namespace std;
2017-10-10 13:23:07 +00:00
static bool packing_is_vec4_padded(BufferPackingStandard packing)
{
switch (packing)
{
case BufferPackingHLSLCbuffer:
case BufferPackingHLSLCbufferPackOffset:
case BufferPackingStd140:
case BufferPackingStd140EnhancedLayout:
return true;
default:
return false;
}
}
static bool packing_is_hlsl(BufferPackingStandard packing)
{
switch (packing)
{
case BufferPackingHLSLCbuffer:
case BufferPackingHLSLCbufferPackOffset:
return true;
default:
return false;
}
}
static bool packing_has_flexible_offset(BufferPackingStandard packing)
{
switch (packing)
{
case BufferPackingStd140:
case BufferPackingStd430:
case BufferPackingHLSLCbuffer:
return false;
default:
return true;
}
}
static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
{
switch (packing)
{
case BufferPackingStd140EnhancedLayout:
return BufferPackingStd140;
case BufferPackingStd430EnhancedLayout:
return BufferPackingStd430;
case BufferPackingHLSLCbufferPackOffset:
return BufferPackingHLSLCbuffer;
default:
return packing;
}
}
// Sanitizes underscores for GLSL where multiple underscores in a row are not allowed.
string CompilerGLSL::sanitize_underscores(const string &str)
{
string res;
res.reserve(str.size());
bool last_underscore = false;
for (auto c : str)
{
if (c == '_')
{
if (last_underscore)
continue;
res += c;
last_underscore = true;
}
else
{
res += c;
last_underscore = false;
}
}
return res;
}
// Returns true if an arithmetic operation does not change behavior depending on signedness.
static bool opcode_is_sign_invariant(Op opcode)
{
switch (opcode)
{
case OpIEqual:
case OpINotEqual:
case OpISub:
case OpIAdd:
case OpIMul:
case OpShiftLeftLogical:
case OpBitwiseOr:
case OpBitwiseXor:
case OpBitwiseAnd:
return true;
default:
return false;
}
}
static const char *to_pls_layout(PlsFormat format)
2016-03-02 17:09:16 +00:00
{
switch (format)
{
case PlsR11FG11FB10F:
return "layout(r11f_g11f_b10f) ";
case PlsR32F:
return "layout(r32f) ";
case PlsRG16F:
return "layout(rg16f) ";
case PlsRGB10A2:
return "layout(rgb10_a2) ";
case PlsRGBA8:
return "layout(rgba8) ";
case PlsRG16:
return "layout(rg16) ";
case PlsRGBA8I:
return "layout(rgba8i)";
case PlsRG16I:
return "layout(rg16i) ";
case PlsRGB10A2UI:
return "layout(rgb10_a2ui) ";
case PlsRGBA8UI:
return "layout(rgba8ui) ";
case PlsRG16UI:
return "layout(rg16ui) ";
case PlsR32UI:
return "layout(r32ui) ";
default:
return "";
}
2016-03-02 17:09:16 +00:00
}
static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
{
switch (format)
{
default:
case PlsR11FG11FB10F:
case PlsR32F:
case PlsRG16F:
case PlsRGB10A2:
case PlsRGBA8:
case PlsRG16:
return SPIRType::Float;
case PlsRGBA8I:
case PlsRG16I:
return SPIRType::Int;
case PlsRGB10A2UI:
case PlsRGBA8UI:
case PlsRG16UI:
case PlsR32UI:
return SPIRType::UInt;
}
2016-03-02 17:09:16 +00:00
}
static uint32_t pls_format_to_components(PlsFormat format)
{
switch (format)
{
default:
case PlsR32F:
case PlsR32UI:
return 1;
case PlsRG16F:
case PlsRG16:
case PlsRG16UI:
case PlsRG16I:
return 2;
case PlsR11FG11FB10F:
return 3;
case PlsRGB10A2:
case PlsRGBA8:
case PlsRGBA8I:
case PlsRGB10A2UI:
case PlsRGBA8UI:
return 4;
}
2016-03-02 17:09:16 +00:00
}
2017-01-20 16:33:59 +00:00
static const char *vector_swizzle(int vecsize, int index)
{
2017-01-20 16:33:59 +00:00
static const char *swizzle[4][4] = {
{ ".x", ".y", ".z", ".w" }, { ".xy", ".yz", ".zw" }, { ".xyz", ".yzw" }, { "" }
};
assert(vecsize >= 1 && vecsize <= 4);
assert(index >= 0 && index < 4);
assert(swizzle[vecsize - 1][index]);
return swizzle[vecsize - 1][index];
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::reset()
{
// We do some speculative optimizations which should pretty much always work out,
// but just in case the SPIR-V is rather weird, recompile until it's happy.
// This typically only means one extra pass.
force_recompile = false;
// Clear invalid expression tracking.
invalid_expressions.clear();
current_function = nullptr;
// Clear temporary usage tracking.
expression_usage_counts.clear();
forwarded_temporaries.clear();
resource_names.clear();
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
// Clear unflushed dependees.
id.get<SPIRVariable>().dependees.clear();
}
else if (id.get_type() == TypeExpression)
{
// And remove all expressions.
id.reset();
}
else if (id.get_type() == TypeFunction)
{
// Reset active state for all functions.
id.get<SPIRFunction>().active = false;
id.get<SPIRFunction>().flush_undeclared = true;
}
}
statement_count = 0;
indent = 0;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::remap_pls_variables()
{
for (auto &input : pls_inputs)
{
auto &var = get<SPIRVariable>(input.id);
bool input_is_target = false;
if (var.storage == StorageClassUniformConstant)
{
auto &type = get<SPIRType>(var.basetype);
input_is_target = type.image.dim == DimSubpassData;
}
if (var.storage != StorageClassInput && !input_is_target)
SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
var.remapped_variable = true;
}
for (auto &output : pls_outputs)
{
auto &var = get<SPIRVariable>(output.id);
if (var.storage != StorageClassOutput)
SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
var.remapped_variable = true;
}
2016-03-02 17:09:16 +00:00
}
2016-07-27 08:59:00 +00:00
void CompilerGLSL::find_static_extensions()
{
for (auto &id : ids)
{
if (id.get_type() == TypeType)
{
auto &type = id.get<SPIRType>();
if (type.basetype == SPIRType::Double)
{
if (options.es)
SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
2016-07-27 08:59:00 +00:00
if (!options.es && options.version < 400)
require_extension("GL_ARB_gpu_shader_fp64");
}
2016-07-27 09:27:00 +00:00
if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
{
if (options.es)
SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
2016-07-27 09:27:00 +00:00
if (!options.es)
require_extension("GL_ARB_gpu_shader_int64");
}
2016-07-27 08:59:00 +00:00
}
}
auto &execution = get_entry_point();
switch (execution.model)
{
case ExecutionModelGLCompute:
if (!options.es && options.version < 430)
require_extension("GL_ARB_compute_shader");
if (options.es && options.version < 310)
SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
break;
case ExecutionModelGeometry:
if (options.es && options.version < 320)
require_extension("GL_EXT_geometry_shader");
2017-06-17 01:06:46 +00:00
if (!options.es && options.version < 150)
require_extension("GL_ARB_geometry_shader4");
if ((execution.flags & (1ull << ExecutionModeInvocations)) && execution.invocations != 1)
{
// Instanced GS is part of 400 core or this extension.
if (!options.es && options.version < 400)
require_extension("GL_ARB_gpu_shader5");
}
break;
case ExecutionModelTessellationEvaluation:
case ExecutionModelTessellationControl:
if (options.es && options.version < 320)
require_extension("GL_EXT_tessellation_shader");
if (!options.es && options.version < 400)
require_extension("GL_ARB_tessellation_shader");
break;
default:
break;
}
if (!pls_inputs.empty() || !pls_outputs.empty())
require_extension("GL_EXT_shader_pixel_local_storage");
if (options.separate_shader_objects && !options.es && options.version < 410)
require_extension("GL_ARB_separate_shader_objects");
2016-07-27 08:59:00 +00:00
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::compile()
{
// Force a classic "C" locale, reverts when function returns
ClassicLocale classic_locale;
2016-07-27 08:59:00 +00:00
// Scan the SPIR-V to find trivial uses of extensions.
find_static_extensions();
fixup_image_load_store_access();
update_active_builtins();
analyze_sampler_comparison_states();
2016-07-27 08:59:00 +00:00
uint32_t pass_count = 0;
do
{
if (pass_count >= 3)
SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
2016-03-02 17:09:16 +00:00
reset();
2016-03-02 17:09:16 +00:00
// Move constructor for this type is broken on GCC 4.9 ...
buffer = unique_ptr<ostringstream>(new ostringstream());
2016-03-02 17:09:16 +00:00
emit_header();
emit_resources();
2016-03-02 17:09:16 +00:00
emit_function(get<SPIRFunction>(entry_point), 0);
2016-03-02 17:09:16 +00:00
pass_count++;
} while (force_recompile);
2016-03-02 17:09:16 +00:00
return buffer->str();
2016-03-02 17:09:16 +00:00
}
std::string CompilerGLSL::get_partial_source()
{
return buffer ? buffer->str() : "No compiled source available yet.";
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::emit_header()
{
auto &execution = get_entry_point();
statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
if (!options.es && options.version < 420)
{
// Needed for binding = # on UBOs, etc.
if (options.enable_420pack_extension)
{
statement("#ifdef GL_ARB_shading_language_420pack");
statement("#extension GL_ARB_shading_language_420pack : require");
statement("#endif");
}
// Needed for: layout(early_fragment_tests) in;
if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests))
require_extension("GL_ARB_shader_image_load_store");
}
for (auto &ext : forced_extensions)
statement("#extension ", ext, " : require");
for (auto &header : header_lines)
statement(header);
vector<string> inputs;
vector<string> outputs;
switch (execution.model)
{
case ExecutionModelGeometry:
outputs.push_back(join("max_vertices = ", execution.output_vertices));
if ((execution.flags & (1ull << ExecutionModeInvocations)) && execution.invocations != 1)
inputs.push_back(join("invocations = ", execution.invocations));
if (execution.flags & (1ull << ExecutionModeInputPoints))
inputs.push_back("points");
if (execution.flags & (1ull << ExecutionModeInputLines))
inputs.push_back("lines");
if (execution.flags & (1ull << ExecutionModeInputLinesAdjacency))
inputs.push_back("lines_adjacency");
if (execution.flags & (1ull << ExecutionModeTriangles))
inputs.push_back("triangles");
if (execution.flags & (1ull << ExecutionModeInputTrianglesAdjacency))
inputs.push_back("triangles_adjacency");
if (execution.flags & (1ull << ExecutionModeOutputTriangleStrip))
outputs.push_back("triangle_strip");
if (execution.flags & (1ull << ExecutionModeOutputPoints))
outputs.push_back("points");
if (execution.flags & (1ull << ExecutionModeOutputLineStrip))
outputs.push_back("line_strip");
break;
case ExecutionModelTessellationControl:
if (execution.flags & (1ull << ExecutionModeOutputVertices))
outputs.push_back(join("vertices = ", execution.output_vertices));
break;
case ExecutionModelTessellationEvaluation:
if (execution.flags & (1ull << ExecutionModeQuads))
inputs.push_back("quads");
if (execution.flags & (1ull << ExecutionModeTriangles))
inputs.push_back("triangles");
if (execution.flags & (1ull << ExecutionModeIsolines))
inputs.push_back("isolines");
if (execution.flags & (1ull << ExecutionModePointMode))
inputs.push_back("point_mode");
if ((execution.flags & (1ull << ExecutionModeIsolines)) == 0)
{
if (execution.flags & (1ull << ExecutionModeVertexOrderCw))
inputs.push_back("cw");
if (execution.flags & (1ull << ExecutionModeVertexOrderCcw))
inputs.push_back("ccw");
}
if (execution.flags & (1ull << ExecutionModeSpacingFractionalEven))
inputs.push_back("fractional_even_spacing");
if (execution.flags & (1ull << ExecutionModeSpacingFractionalOdd))
inputs.push_back("fractional_odd_spacing");
if (execution.flags & (1ull << ExecutionModeSpacingEqual))
inputs.push_back("equal_spacing");
break;
case ExecutionModelGLCompute:
{
if (execution.workgroup_size.constant != 0)
{
SpecializationConstant wg_x, wg_y, wg_z;
get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
if (wg_x.id)
{
if (options.vulkan_semantics)
inputs.push_back(join("local_size_x_id = ", wg_x.constant_id));
else
inputs.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).scalar()));
}
else
inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
if (wg_y.id)
{
if (options.vulkan_semantics)
inputs.push_back(join("local_size_y_id = ", wg_y.constant_id));
else
inputs.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).scalar()));
}
else
inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
if (wg_z.id)
{
if (options.vulkan_semantics)
inputs.push_back(join("local_size_z_id = ", wg_z.constant_id));
else
inputs.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).scalar()));
}
else
inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
}
else
{
inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
}
break;
}
case ExecutionModelFragment:
if (options.es)
{
switch (options.fragment.default_float_precision)
{
case Options::Lowp:
statement("precision lowp float;");
break;
case Options::Mediump:
statement("precision mediump float;");
break;
case Options::Highp:
statement("precision highp float;");
break;
default:
break;
}
switch (options.fragment.default_int_precision)
{
case Options::Lowp:
statement("precision lowp int;");
break;
case Options::Mediump:
statement("precision mediump int;");
break;
case Options::Highp:
statement("precision highp int;");
break;
default:
break;
}
}
if (execution.flags & (1ull << ExecutionModeEarlyFragmentTests))
inputs.push_back("early_fragment_tests");
if (execution.flags & (1ull << ExecutionModeDepthGreater))
inputs.push_back("depth_greater");
if (execution.flags & (1ull << ExecutionModeDepthLess))
inputs.push_back("depth_less");
break;
default:
break;
}
if (!inputs.empty())
statement("layout(", merge(inputs), ") in;");
if (!outputs.empty())
statement("layout(", merge(outputs), ") out;");
statement("");
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::type_is_empty(const SPIRType &type)
{
return type.basetype == SPIRType::Struct && type.member_types.empty();
}
void CompilerGLSL::emit_struct(SPIRType &type)
2016-03-02 17:09:16 +00:00
{
// Struct types can be stamped out multiple times
// with just different offsets, matrix layouts, etc ...
// Type-punning with these types is legal, which complicates things
// when we are storing struct and array types in an SSBO for example.
if (type.type_alias != 0)
return;
// Don't declare empty structs in GLSL, this is not allowed.
// Empty structs is a corner case of HLSL output, and only sensible thing to do is avoiding to declare
// these types.
if (type_is_empty(type))
return;
add_resource_name(type.self);
auto name = type_to_glsl(type);
statement(!backend.explicit_struct_type ? "struct " : "", name);
begin_scope();
type.member_name_cache.clear();
uint32_t i = 0;
bool emitted = false;
for (auto &member : type.member_types)
{
add_member_name(type, i);
emit_struct_member(type, member, i);
i++;
emitted = true;
}
end_scope_decl();
if (emitted)
statement("");
2016-03-02 17:09:16 +00:00
}
uint64_t CompilerGLSL::combined_decoration_for_member(const SPIRType &type, uint32_t index)
{
uint64_t flags = 0;
auto &memb = meta[type.self].members;
if (index >= memb.size())
return 0;
auto &dec = memb[index];
// If our type is a struct, traverse all the members as well recursively.
flags |= dec.decoration_flags;
for (uint32_t i = 0; i < type.member_types.size(); i++)
flags |= combined_decoration_for_member(get<SPIRType>(type.member_types[i]), i);
return flags;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::to_interpolation_qualifiers(uint64_t flags)
{
string res;
//if (flags & (1ull << DecorationSmooth))
// res += "smooth ";
if (flags & (1ull << DecorationFlat))
res += "flat ";
if (flags & (1ull << DecorationNoPerspective))
res += "noperspective ";
if (flags & (1ull << DecorationCentroid))
res += "centroid ";
if (flags & (1ull << DecorationPatch))
res += "patch ";
if (flags & (1ull << DecorationSample))
res += "sample ";
if (flags & (1ull << DecorationInvariant))
res += "invariant ";
return res;
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
{
if (is_legacy())
return "";
bool is_block = (meta[type.self].decoration.decoration_flags &
((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
if (!is_block)
return "";
auto &memb = meta[type.self].members;
if (index >= memb.size())
return "";
auto &dec = memb[index];
vector<string> attr;
// We can only apply layouts on members in block interfaces.
// This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
// This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
// has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
//
// We would like to go from (SPIR-V style):
//
// struct Foo { layout(row_major) mat4 matrix; };
// buffer UBO { Foo foo; };
//
// to
//
// struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
// buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
auto flags = combined_decoration_for_member(type, index);
if (flags & (1ull << DecorationRowMajor))
attr.push_back("row_major");
// We don't emit any global layouts, so column_major is default.
//if (flags & (1ull << DecorationColMajor))
// attr.push_back("column_major");
2017-10-19 12:17:18 +00:00
if ((dec.decoration_flags & (1ull << DecorationLocation)) != 0 && can_use_io_location(type.storage))
attr.push_back(join("location = ", dec.location));
// DecorationCPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
// This is only done selectively in GLSL as needed.
if (has_decoration(type.self, DecorationCPacked) && (dec.decoration_flags & (1ull << DecorationOffset)) != 0)
attr.push_back(join("offset = ", dec.offset));
if (attr.empty())
return "";
string res = "layout(";
res += merge(attr);
res += ") ";
return res;
2016-03-02 17:09:16 +00:00
}
const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
2016-03-02 17:09:16 +00:00
{
2016-07-12 07:35:15 +00:00
auto check_desktop = [this] {
if (options.es)
SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
2016-07-12 07:35:15 +00:00
};
switch (format)
{
case ImageFormatRgba32f:
return "rgba32f";
case ImageFormatRgba16f:
return "rgba16f";
case ImageFormatR32f:
return "r32f";
case ImageFormatRgba8:
return "rgba8";
case ImageFormatRgba8Snorm:
return "rgba8_snorm";
case ImageFormatRg32f:
return "rg32f";
case ImageFormatRg16f:
return "rg16f";
case ImageFormatRgba32i:
return "rgba32i";
case ImageFormatRgba16i:
return "rgba16i";
case ImageFormatR32i:
return "r32i";
case ImageFormatRgba8i:
return "rgba8i";
case ImageFormatRg32i:
return "rg32i";
case ImageFormatRg16i:
return "rg16i";
case ImageFormatRgba32ui:
return "rgba32ui";
case ImageFormatRgba16ui:
return "rgba16ui";
case ImageFormatR32ui:
return "r32ui";
case ImageFormatRgba8ui:
return "rgba8ui";
case ImageFormatRg32ui:
return "rg32ui";
case ImageFormatRg16ui:
return "rg16ui";
2016-07-12 07:35:15 +00:00
// Desktop-only formats
case ImageFormatR11fG11fB10f:
check_desktop();
return "r11f_g11f_b10f";
case ImageFormatR16f:
check_desktop();
return "r16f";
case ImageFormatRgb10A2:
check_desktop();
return "rgb10_a2";
case ImageFormatR8:
check_desktop();
return "r8";
case ImageFormatRg8:
check_desktop();
return "rg8";
case ImageFormatR16:
check_desktop();
return "r16";
case ImageFormatRg16:
check_desktop();
return "rg16";
case ImageFormatRgba16:
check_desktop();
return "rgba16";
case ImageFormatR16Snorm:
check_desktop();
return "r16_snorm";
case ImageFormatRg16Snorm:
check_desktop();
return "rg16_snorm";
case ImageFormatRgba16Snorm:
check_desktop();
return "rgba16_snorm";
case ImageFormatR8Snorm:
check_desktop();
return "r8_snorm";
case ImageFormatRg8Snorm:
check_desktop();
return "rg8_snorm";
case ImageFormatR8ui:
check_desktop();
return "r8ui";
case ImageFormatRg8ui:
check_desktop();
return "rg8ui";
case ImageFormatR16ui:
check_desktop();
return "r16ui";
case ImageFormatRgb10a2ui:
check_desktop();
return "rgb10_a2ui";
case ImageFormatR8i:
check_desktop();
return "r8i";
case ImageFormatRg8i:
check_desktop();
return "rg8i";
case ImageFormatR16i:
check_desktop();
return "r16i";
default:
case ImageFormatUnknown:
return nullptr;
}
2016-03-02 17:09:16 +00:00
}
uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
2016-07-27 08:59:00 +00:00
{
switch (type.basetype)
{
case SPIRType::Double:
2016-07-27 09:27:00 +00:00
case SPIRType::Int64:
case SPIRType::UInt64:
2016-07-27 08:59:00 +00:00
return 8;
default:
return 4;
}
}
uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, uint64_t flags, BufferPackingStandard packing)
2016-03-02 17:09:16 +00:00
{
const uint32_t base_alignment = type_to_packed_base_size(type, packing);
2017-10-10 13:23:07 +00:00
if (!type.array.empty())
{
uint32_t minimum_alignment = 1;
if (packing_is_vec4_padded(packing))
minimum_alignment = 16;
auto *tmp = &get<SPIRType>(type.parent_type);
while (!tmp->array.empty())
tmp = &get<SPIRType>(tmp->parent_type);
// Get the alignment of the base type, then maybe round up.
return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
}
if (type.basetype == SPIRType::Struct)
{
// Rule 9. Structs alignments are maximum alignment of its members.
uint32_t alignment = 0;
for (uint32_t i = 0; i < type.member_types.size(); i++)
{
auto member_flags = meta[type.self].members.at(i).decoration_flags;
2017-10-10 08:22:40 +00:00
alignment =
max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
}
// In std140, struct alignment is rounded up to 16.
2017-10-10 13:23:07 +00:00
if (packing_is_vec4_padded(packing))
alignment = max(alignment, 16u);
return alignment;
}
else
{
2017-10-10 13:23:07 +00:00
// Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
// a vec4, this is handled outside since that part knows our current offset.
if (type.columns == 1 && packing_is_hlsl(packing))
return base_alignment;
// From 7.6.2.2 in GL 4.5 core spec.
// Rule 1
if (type.vecsize == 1 && type.columns == 1)
return base_alignment;
// Rule 2
if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
return type.vecsize * base_alignment;
// Rule 3
if (type.vecsize == 3 && type.columns == 1)
return 4 * base_alignment;
// Rule 4 implied. Alignment does not change in std430.
// Rule 5. Column-major matrices are stored as arrays of
// vectors.
if ((flags & (1ull << DecorationColMajor)) && type.columns > 1)
{
2017-10-10 13:23:07 +00:00
if (packing_is_vec4_padded(packing))
return 4 * base_alignment;
else if (type.vecsize == 3)
return 4 * base_alignment;
else
return type.vecsize * base_alignment;
}
// Rule 6 implied.
// Rule 7.
if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1)
{
2017-10-10 13:23:07 +00:00
if (packing_is_vec4_padded(packing))
return 4 * base_alignment;
else if (type.columns == 3)
return 4 * base_alignment;
else
return type.columns * base_alignment;
}
// Rule 8 implied.
}
SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
2016-03-02 17:09:16 +00:00
}
uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, uint64_t flags, BufferPackingStandard packing)
2016-03-02 17:09:16 +00:00
{
// Array stride is equal to aligned size of the underlying type.
2017-10-10 13:23:07 +00:00
uint32_t parent = type.parent_type;
assert(parent);
2017-10-10 13:23:07 +00:00
auto &tmp = get<SPIRType>(parent);
2017-10-10 13:23:07 +00:00
uint32_t size = type_to_packed_size(tmp, flags, packing);
if (tmp.array.empty())
{
uint32_t alignment = type_to_packed_alignment(type, flags, packing);
return (size + alignment - 1) & ~(alignment - 1);
}
else
{
// For multidimensional arrays, array stride always matches size of subtype.
// The alignment cannot change because multidimensional arrays are basically N * M array elements.
return size;
}
2016-03-02 17:09:16 +00:00
}
uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, uint64_t flags, BufferPackingStandard packing)
2016-03-02 17:09:16 +00:00
{
if (!type.array.empty())
2017-10-10 13:23:07 +00:00
{
2017-10-10 08:22:40 +00:00
return to_array_size_literal(type, uint32_t(type.array.size()) - 1) *
type_to_packed_array_stride(type, flags, packing);
2017-10-10 13:23:07 +00:00
}
const uint32_t base_alignment = type_to_packed_base_size(type, packing);
uint32_t size = 0;
if (type.basetype == SPIRType::Struct)
{
uint32_t pad_alignment = 1;
for (uint32_t i = 0; i < type.member_types.size(); i++)
{
auto member_flags = meta[type.self].members.at(i).decoration_flags;
auto &member_type = get<SPIRType>(type.member_types[i]);
uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
uint32_t alignment = max(packed_alignment, pad_alignment);
// The next member following a struct member is aligned to the base alignment of the struct that came before.
// GL 4.5 spec, 7.6.2.2.
if (member_type.basetype == SPIRType::Struct)
pad_alignment = packed_alignment;
else
pad_alignment = 1;
size = (size + alignment - 1) & ~(alignment - 1);
size += type_to_packed_size(member_type, member_flags, packing);
}
}
else
{
if (type.columns == 1)
size = type.vecsize * base_alignment;
if ((flags & (1ull << DecorationColMajor)) && type.columns > 1)
{
2017-10-10 13:23:07 +00:00
if (packing_is_vec4_padded(packing))
size = type.columns * 4 * base_alignment;
else if (type.vecsize == 3)
size = type.columns * 4 * base_alignment;
else
size = type.columns * type.vecsize * base_alignment;
}
if ((flags & (1ull << DecorationRowMajor)) && type.vecsize > 1)
{
2017-10-10 13:23:07 +00:00
if (packing_is_vec4_padded(packing))
size = type.vecsize * 4 * base_alignment;
else if (type.columns == 3)
size = type.vecsize * 4 * base_alignment;
else
size = type.vecsize * type.columns * base_alignment;
}
}
return size;
2016-03-02 17:09:16 +00:00
}
2017-10-10 13:37:53 +00:00
bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing)
2016-03-02 17:09:16 +00:00
{
// This is very tricky and error prone, but try to be exhaustive and correct here.
// SPIR-V doesn't directly say if we're using std430 or std140.
// SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
// so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
// We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
//
// It is almost certain that we're using std430, but it gets tricky with arrays in particular.
// We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
//
// The only two differences between std140 and std430 are related to padding alignment/array stride
// in arrays and structs. In std140 they take minimum vec4 alignment.
// std430 only removes the vec4 requirement.
uint32_t offset = 0;
uint32_t pad_alignment = 1;
for (uint32_t i = 0; i < type.member_types.size(); i++)
{
auto &memb_type = get<SPIRType>(type.member_types[i]);
auto member_flags = meta[type.self].members.at(i).decoration_flags;
// Verify alignment rules.
uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
2017-10-10 13:23:07 +00:00
uint32_t packed_size = type_to_packed_size(memb_type, member_flags, packing);
if (packing_is_hlsl(packing))
{
// If a member straddles across a vec4 boundary, alignment is actually vec4.
uint32_t begin_word = offset / 16;
uint32_t end_word = (offset + packed_size - 1) / 16;
if (begin_word != end_word)
packed_alignment = max(packed_alignment, 16u);
}
uint32_t alignment = max(packed_alignment, pad_alignment);
offset = (offset + alignment - 1) & ~(alignment - 1);
// The next member following a struct member is aligned to the base alignment of the struct that came before.
// GL 4.5 spec, 7.6.2.2.
if (memb_type.basetype == SPIRType::Struct)
pad_alignment = packed_alignment;
else
pad_alignment = 1;
2017-10-10 13:23:07 +00:00
// We only care about offsets in std140, std430, etc ...
// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
if (!packing_has_flexible_offset(packing))
{
uint32_t actual_offset = type_struct_member_offset(type, i);
if (actual_offset != offset) // This cannot be the packing we're looking for.
return false;
}
// Verify array stride rules.
if (!memb_type.array.empty() &&
type_to_packed_array_stride(memb_type, member_flags, packing) != type_struct_member_array_stride(type, i))
return false;
// Verify that sub-structs also follow packing rules.
// We cannot use enhanced layouts on substructs, so they better be up to spec.
2017-10-10 13:23:07 +00:00
auto substruct_packing = packing_to_substruct_packing(packing);
2017-10-10 13:37:53 +00:00
if (!memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing))
return false;
// Bump size.
2017-10-10 13:23:07 +00:00
offset += packed_size;
}
return true;
2016-03-02 17:09:16 +00:00
}
2017-10-19 12:17:18 +00:00
bool CompilerGLSL::can_use_io_location(StorageClass storage)
{
// Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
// Be very explicit here about how to solve the issue.
if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
(get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
{
if (!options.es && options.version < 410 && !options.separate_shader_objects)
return false;
else if (options.es && options.version < 310)
return false;
}
if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
(get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
{
if (options.es && options.version < 300)
return false;
else if (!options.es && options.version < 330)
return false;
}
return true;
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
{
// FIXME: Come up with a better solution for when to disable layouts.
// Having layouts depend on extensions as well as which types
// of layouts are used. For now, the simple solution is to just disable
// layouts for legacy versions.
if (is_legacy())
return "";
vector<string> attr;
auto &dec = meta[var.self].decoration;
auto &type = get<SPIRType>(var.basetype);
auto flags = dec.decoration_flags;
auto typeflags = meta[type.self].decoration.decoration_flags;
if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
attr.push_back("push_constant");
if (flags & (1ull << DecorationRowMajor))
attr.push_back("row_major");
if (flags & (1ull << DecorationColMajor))
attr.push_back("column_major");
if (options.vulkan_semantics)
{
if (flags & (1ull << DecorationInputAttachmentIndex))
attr.push_back(join("input_attachment_index = ", dec.input_attachment));
}
2017-10-19 12:17:18 +00:00
if ((flags & (1ull << DecorationLocation)) != 0 && can_use_io_location(var.storage))
{
2017-10-19 12:17:18 +00:00
uint64_t combined_decoration = 0;
for (uint32_t i = 0; i < meta[type.self].members.size(); i++)
combined_decoration |= combined_decoration_for_member(type, i);
// If our members have location decorations, we don't need to
// emit location decorations at the top as well (looks weird).
if ((combined_decoration & (1ull << DecorationLocation)) == 0)
attr.push_back(join("location = ", dec.location));
}
// set = 0 is the default. Do not emit set = decoration in regular GLSL output, but
// we should preserve it in Vulkan GLSL mode.
if (var.storage != StorageClassPushConstant)
{
if ((flags & (1ull << DecorationDescriptorSet)) && (dec.set != 0 || options.vulkan_semantics))
attr.push_back(join("set = ", dec.set));
}
bool can_use_binding;
if (options.es)
can_use_binding = options.version >= 310;
else
can_use_binding = options.enable_420pack_extension || (options.version >= 420);
if (can_use_binding && (flags & (1ull << DecorationBinding)))
attr.push_back(join("binding = ", dec.binding));
if (flags & (1ull << DecorationOffset))
attr.push_back(join("offset = ", dec.offset));
bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
bool ssbo_block = var.storage == StorageClassStorageBuffer ||
(var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBufferBlock)));
// Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
// If SPIR-V does not comply with either layout, we cannot really work around it.
if (var.storage == StorageClassUniform && (typeflags & (1ull << DecorationBlock)))
{
2017-10-10 13:37:53 +00:00
if (buffer_is_packing_standard(type, BufferPackingStd140))
attr.push_back("std140");
2017-10-10 13:37:53 +00:00
else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
{
2017-10-10 09:30:29 +00:00
attr.push_back("std140");
// Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
// however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
// Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
if (options.es && !options.vulkan_semantics)
2017-10-10 09:31:05 +00:00
SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
"not support GL_ARB_enhanced_layouts.");
if (!options.es && !options.vulkan_semantics && options.version < 440)
require_extension("GL_ARB_enhanced_layouts");
// This is a very last minute to check for this, but use this unused decoration to mark that we should emit
// explicit offsets for this block type.
// layout_for_variable() will be called before the actual buffer emit.
// The alternative is a full pass before codegen where we deduce this decoration,
// but then we are just doing the exact same work twice, and more complexity.
set_decoration(type.self, DecorationCPacked);
}
else
{
2017-10-10 09:31:05 +00:00
SPIRV_CROSS_THROW("Uniform buffer cannot be expressed as std140, even with enhanced layouts. You can try "
"flattening this block to "
"support a more flexible layout.");
}
}
else if (push_constant_block || ssbo_block)
{
2017-10-10 13:37:53 +00:00
if (buffer_is_packing_standard(type, BufferPackingStd430))
attr.push_back("std430");
2017-10-10 13:37:53 +00:00
else if (buffer_is_packing_standard(type, BufferPackingStd140))
attr.push_back("std140");
2017-10-10 13:37:53 +00:00
else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
{
attr.push_back("std140");
// Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
// however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
// Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
if (options.es && !options.vulkan_semantics)
2017-10-10 09:31:05 +00:00
SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
"not support GL_ARB_enhanced_layouts.");
if (!options.es && !options.vulkan_semantics && options.version < 440)
require_extension("GL_ARB_enhanced_layouts");
set_decoration(type.self, DecorationCPacked);
}
2017-10-10 13:37:53 +00:00
else if (buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
{
attr.push_back("std430");
if (options.es && !options.vulkan_semantics)
2017-10-10 09:31:05 +00:00
SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
"not support GL_ARB_enhanced_layouts.");
if (!options.es && !options.vulkan_semantics && options.version < 440)
require_extension("GL_ARB_enhanced_layouts");
set_decoration(type.self, DecorationCPacked);
}
else
{
2017-10-10 09:31:05 +00:00
SPIRV_CROSS_THROW("Buffer block cannot be expressed as neither std430 nor std140, even with enhanced "
"layouts. You can try flattening this block to support a more flexible layout.");
}
}
// For images, the type itself adds a layout qualifer.
// Only emit the format for storage images.
if (type.basetype == SPIRType::Image && type.image.sampled == 2)
{
const char *fmt = format_to_glsl(type.image.format);
if (fmt)
attr.push_back(fmt);
}
if (attr.empty())
return "";
string res = "layout(";
res += merge(attr);
res += ") ";
return res;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
{
if (flattened_buffer_blocks.count(var.self))
emit_buffer_block_flattened(var);
else if (options.vulkan_semantics)
emit_push_constant_block_vulkan(var);
else
emit_push_constant_block_glsl(var);
}
void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
{
emit_buffer_block(var);
}
void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
2016-03-02 17:09:16 +00:00
{
// OpenGL has no concept of push constant blocks, implement it as a uniform struct.
auto &type = get<SPIRType>(var.basetype);
2016-03-02 17:09:16 +00:00
auto &flags = meta[var.self].decoration.decoration_flags;
flags &= ~((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet));
2016-03-02 17:09:16 +00:00
#if 0
if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
2016-03-02 17:09:16 +00:00
"Remap to location with reflection API first or disable these decorations.");
#endif
// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
auto &block_flags = meta[type.self].decoration.decoration_flags;
uint64_t block_flag = block_flags & (1ull << DecorationBlock);
block_flags &= ~block_flag;
2016-03-02 17:09:16 +00:00
emit_struct(type);
2016-03-02 17:09:16 +00:00
block_flags |= block_flag;
2016-03-02 17:09:16 +00:00
emit_uniform(var);
statement("");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
{
if (flattened_buffer_blocks.count(var.self))
emit_buffer_block_flattened(var);
else if (is_legacy())
emit_buffer_block_legacy(var);
else
emit_buffer_block_native(var);
}
void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
{
auto &type = get<SPIRType>(var.basetype);
bool ssbo = var.storage == StorageClassStorageBuffer ||
((meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0);
if (ssbo)
SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
auto &block_flags = meta[type.self].decoration.decoration_flags;
uint64_t block_flag = block_flags & (1ull << DecorationBlock);
block_flags &= ~block_flag;
emit_struct(type);
block_flags |= block_flag;
emit_uniform(var);
statement("");
}
void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2016-03-02 17:09:16 +00:00
{
auto &type = get<SPIRType>(var.basetype);
2017-01-21 09:07:38 +00:00
uint64_t flags = get_buffer_block_flags(var);
bool ssbo = var.storage == StorageClassStorageBuffer ||
((meta[type.self].decoration.decoration_flags & (1ull << DecorationBufferBlock)) != 0);
2017-01-21 09:07:38 +00:00
bool is_restrict = ssbo && (flags & (1ull << DecorationRestrict)) != 0;
bool is_writeonly = ssbo && (flags & (1ull << DecorationNonReadable)) != 0;
bool is_readonly = ssbo && (flags & (1ull << DecorationNonWritable)) != 0;
bool is_coherent = ssbo && (flags & (1ull << DecorationCoherent)) != 0;
add_resource_name(var.self);
// Block names should never alias.
auto buffer_name = to_name(type.self, false);
// Shaders never use the block by interface name, so we don't
// have to track this other than updating name caches.
if (meta[type.self].decoration.alias.empty() || resource_names.find(buffer_name) != end(resource_names))
buffer_name = get_block_fallback_name(var.self);
else
resource_names.insert(buffer_name);
2017-08-28 07:01:03 +00:00
statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
buffer_name);
2017-01-21 09:07:38 +00:00
begin_scope();
type.member_name_cache.clear();
uint32_t i = 0;
for (auto &member : type.member_types)
{
add_member_name(type, i);
emit_struct_member(type, member, i);
i++;
}
end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
statement("");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
auto &type = get<SPIRType>(var.basetype);
// Block names should never alias.
auto buffer_name = to_name(type.self, false);
size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
SPIRType::BaseType basic_type;
if (get_common_basic_type(type, basic_type))
{
SPIRType tmp;
tmp.basetype = basic_type;
2017-01-22 07:53:52 +00:00
tmp.vecsize = 4;
if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
auto flags = get_buffer_block_flags(var);
2017-01-22 08:06:15 +00:00
statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
buffer_size, "];");
}
else
SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
2016-03-02 17:09:16 +00:00
{
auto &execution = get_entry_point();
if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
{
if (is_legacy() && execution.model == ExecutionModelVertex)
return var.storage == StorageClassInput ? "attribute " : "varying ";
else if (is_legacy() && execution.model == ExecutionModelFragment)
return "varying "; // Fragment outputs are renamed so they never hit this case.
else
return var.storage == StorageClassInput ? "in " : "out ";
}
else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
var.storage == StorageClassPushConstant)
{
return "uniform ";
}
return "";
}
void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
{
auto &type = get<SPIRType>(var.basetype);
if (!type.array.empty())
SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
auto old_flags = meta[type.self].decoration.decoration_flags;
// Emit the members as if they are part of a block to get all qualifiers.
meta[type.self].decoration.decoration_flags |= 1ull << DecorationBlock;
uint32_t i = 0;
for (auto &member : type.member_types)
{
add_member_name(type, i);
auto &membertype = get<SPIRType>(member);
if (membertype.basetype == SPIRType::Struct)
SPIRV_CROSS_THROW("Cannot flatten struct inside structs in I/O variables.");
// Pass in the varying qualifier here so it will appear in the correct declaration order.
// Replace member name while emitting it so it encodes both struct name and member name.
// Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
// which is not allowed.
auto backup_name = get_member_name(type.self, i);
auto member_name = to_member_name(type, i);
set_member_name(type.self, i, sanitize_underscores(join(to_name(type.self), "_", member_name)));
emit_struct_member(type, member, i, qual);
// Restore member name.
set_member_name(type.self, i, member_name);
i++;
}
meta[type.self].decoration.decoration_flags = old_flags;
// Treat this variable as flattened from now on.
flattened_structs.insert(var.self);
}
void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
{
auto &type = get<SPIRType>(var.basetype);
// Either make it plain in/out or in/out blocks depending on what shader is doing ...
bool block = (meta[type.self].decoration.decoration_flags & (1ull << DecorationBlock)) != 0;
const char *qual = to_storage_qualifiers_glsl(var);
if (block)
{
// ESSL earlier than 310 and GLSL earlier than 150 did not support
// I/O variables which are struct types.
// To support this, flatten the struct into separate varyings instead.
if ((options.es && options.version < 310) || (!options.es && options.version < 150))
{
// I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
// On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
emit_flattened_io_block(var, qual);
}
else
{
if (options.es && options.version < 320)
{
// Geometry and tessellation extensions imply this extension.
if (!forced_extensions.count("GL_EXT_geometry_shader") &&
!forced_extensions.count("GL_EXT_tessellation_shader"))
require_extension("GL_EXT_shader_io_blocks");
}
add_resource_name(var.self);
// Block names should never alias.
auto block_name = to_name(type.self, false);
// Shaders never use the block by interface name, so we don't
// have to track this other than updating name caches.
if (resource_names.find(block_name) != end(resource_names))
block_name = get_fallback_name(type.self);
else
resource_names.insert(block_name);
statement(layout_for_variable(var), qual, block_name);
begin_scope();
type.member_name_cache.clear();
uint32_t i = 0;
for (auto &member : type.member_types)
{
add_member_name(type, i);
emit_struct_member(type, member, i);
i++;
}
end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
statement("");
}
}
else
{
// ESSL earlier than 310 and GLSL earlier than 150 did not support
// I/O variables which are struct types.
// To support this, flatten the struct into separate varyings instead.
if (type.basetype == SPIRType::Struct &&
((options.es && options.version < 310) || (!options.es && options.version < 150)))
{
emit_flattened_io_block(var, qual);
}
else
{
add_resource_name(var.self);
statement(layout_for_variable(var), variable_decl(var), ";");
}
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_uniform(const SPIRVariable &var)
{
auto &type = get<SPIRType>(var.basetype);
if (type.basetype == SPIRType::Image && type.image.sampled == 2)
{
if (!options.es && options.version < 420)
require_extension("GL_ARB_shader_image_load_store");
else if (options.es && options.version < 310)
SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
}
add_resource_name(var.self);
statement(layout_for_variable(var), variable_decl(var), ";");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_specialization_constant(const SPIRConstant &constant)
{
auto &type = get<SPIRType>(constant.constant_type);
auto name = to_name(constant.self);
SpecializationConstant wg_x, wg_y, wg_z;
uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
if (constant.self == workgroup_size_id || constant.self == wg_x.id || constant.self == wg_y.id ||
constant.self == wg_z.id)
{
// These specialization constants are implicitly declared by emitting layout() in;
return;
}
2017-09-27 14:10:29 +00:00
// Only scalars have constant IDs.
if (has_decoration(constant.self, DecorationSpecId))
{
statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
variable_decl(type, name), " = ", constant_expression(constant), ";");
}
else
{
statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
}
}
void CompilerGLSL::replace_illegal_names()
{
2016-10-27 11:55:47 +00:00
// clang-format off
static const unordered_set<string> keywords = {
"active", "asm", "atomic_uint", "attribute", "bool", "break",
"bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
"dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
"do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
"for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
"iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
"iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
"image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
"isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
"isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
"mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
"namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precision", "public", "readonly",
"resource", "restrict", "return", "row_major", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
"sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
"sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
"samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "short", "sizeof", "smooth", "static",
"struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
"uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
"uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
"usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
"usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile", "volatile",
"while", "writeonly", "texture"
2016-10-27 11:55:47 +00:00
};
// clang-format on
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
2016-08-26 10:58:50 +00:00
if (!is_hidden_variable(var))
{
auto &m = meta[var.self].decoration;
2016-10-27 11:55:47 +00:00
if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords))
m.alias = join("_", m.alias);
}
}
}
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
{
auto &m = meta[var.self].decoration;
uint32_t location = 0;
if (m.decoration_flags & (1ull << DecorationLocation))
location = m.location;
2016-03-02 17:09:16 +00:00
// If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
// do the access chain part of this for us.
auto &type = get<SPIRType>(var.basetype);
if (type.array.empty())
{
// Redirect the write to a specific render target in legacy GLSL.
m.alias = join("gl_FragData[", location, "]");
2016-09-17 13:56:23 +00:00
if (is_legacy_es() && location != 0)
require_extension("GL_EXT_draw_buffers");
}
else if (type.array.size() == 1)
{
// If location is non-zero, we probably have to add an offset.
// This gets really tricky since we'd have to inject an offset in the access chain.
// FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
m.alias = "gl_FragData";
if (location != 0)
SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
"This is unimplemented in SPIRV-Cross.");
2016-09-17 16:46:10 +00:00
2016-09-17 12:33:16 +00:00
if (is_legacy_es())
require_extension("GL_EXT_draw_buffers");
}
else
SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::replace_fragment_outputs()
{
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer &&
var.storage == StorageClassOutput)
replace_fragment_output(var);
}
}
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::remap_swizzle(uint32_t result_type, uint32_t input_components, uint32_t expr)
{
auto &out_type = get<SPIRType>(result_type);
if (out_type.vecsize == input_components)
return to_expression(expr);
else if (input_components == 1)
return join(type_to_glsl(out_type), "(", to_expression(expr), ")");
else
{
auto e = to_enclosed_expression(expr) + ".";
// Just clamp the swizzle index if we have more outputs than inputs.
for (uint32_t c = 0; c < out_type.vecsize; c++)
e += index_to_swizzle(min(c, input_components - 1));
if (backend.swizzle_is_function && out_type.vecsize > 1)
e += "()";
return e;
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_pls()
{
auto &execution = get_entry_point();
if (execution.model != ExecutionModelFragment)
SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
if (!options.es)
SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
if (options.version < 300)
SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
if (!pls_inputs.empty())
{
statement("__pixel_local_inEXT _PLSIn");
begin_scope();
for (auto &input : pls_inputs)
statement(pls_decl(input), ";");
end_scope_decl();
statement("");
}
if (!pls_outputs.empty())
{
statement("__pixel_local_outEXT _PLSOut");
begin_scope();
for (auto &output : pls_outputs)
statement(pls_decl(output), ";");
end_scope_decl();
statement("");
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::fixup_image_load_store_access()
{
for (auto &id : ids)
{
if (id.get_type() != TypeVariable)
continue;
uint32_t var = id.get<SPIRVariable>().self;
auto &vartype = expression_type(var);
if (vartype.basetype == SPIRType::Image)
{
// Older glslangValidator does not emit required qualifiers here.
// Solve this by making the image access as restricted as possible and loosen up if we need to.
// If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
auto &flags = meta.at(var).decoration.decoration_flags;
static const uint64_t NoWrite = 1ull << DecorationNonWritable;
static const uint64_t NoRead = 1ull << DecorationNonReadable;
if ((flags & (NoWrite | NoRead)) == 0)
flags |= NoRead | NoWrite;
}
}
}
void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
{
bool emitted_block = false;
for (auto &id : ids)
{
if (id.get_type() != TypeVariable)
continue;
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
bool block = has_decoration(type.self, DecorationBlock);
uint64_t builtins = 0;
if (var.storage == storage && block && is_builtin_variable(var))
{
for (auto &m : meta[type.self].members)
if (m.builtin)
builtins |= 1ull << m.builtin_type;
}
if (!builtins)
continue;
if (emitted_block)
SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
if (storage == StorageClassOutput)
statement("out gl_PerVertex");
else
statement("in gl_PerVertex");
begin_scope();
if (builtins & (1ull << BuiltInPosition))
statement("vec4 gl_Position;");
if (builtins & (1ull << BuiltInPointSize))
statement("float gl_PointSize;");
if (builtins & (1ull << BuiltInClipDistance))
statement("float gl_ClipDistance[];"); // TODO: Do we need a fixed array size here?
if (builtins & (1ull << BuiltInCullDistance))
statement("float gl_CullDistance[];"); // TODO: Do we need a fixed array size here?
bool builtin_array = !type.array.empty();
bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl;
if (builtin_array)
{
// Make sure the array has a supported name in the code.
if (storage == StorageClassOutput)
set_name(var.self, "gl_out");
else if (storage == StorageClassInput)
set_name(var.self, "gl_in");
if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
end_scope_decl(join(to_name(var.self), "[", get_entry_point().output_vertices, "]"));
else
end_scope_decl(join(to_name(var.self), tessellation ? "[gl_MaxPatchVertices]" : "[]"));
}
else
end_scope_decl();
statement("");
emitted_block = true;
}
}
void CompilerGLSL::declare_undefined_values()
{
bool emitted = false;
for (auto &id : ids)
{
if (id.get_type() != TypeUndef)
continue;
auto &undef = id.get<SPIRUndef>();
statement(variable_decl(get<SPIRType>(undef.basetype), to_name(undef.self), undef.self), ";");
emitted = true;
}
if (emitted)
statement("");
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::emit_resources()
{
auto &execution = get_entry_point();
replace_illegal_names();
// Legacy GL uses gl_FragData[], redeclare all fragment outputs
// with builtins.
if (execution.model == ExecutionModelFragment && is_legacy())
replace_fragment_outputs();
// Emit PLS blocks if we have such variables.
if (!pls_inputs.empty() || !pls_outputs.empty())
emit_pls();
// Emit custom gl_PerVertex for SSO compatibility.
if (options.separate_shader_objects && !options.es)
{
switch (execution.model)
{
case ExecutionModelGeometry:
case ExecutionModelTessellationControl:
case ExecutionModelTessellationEvaluation:
emit_declared_builtin_block(StorageClassInput, execution.model);
emit_declared_builtin_block(StorageClassOutput, execution.model);
break;
case ExecutionModelVertex:
emit_declared_builtin_block(StorageClassOutput, execution.model);
break;
default:
break;
}
}
bool emitted = false;
// If emitted Vulkan GLSL,
// emit specialization constants as actual floats,
// spec op expressions will redirect to the constant name.
//
// TODO: If we have the fringe case that we create a spec constant which depends on a struct type,
// we'll have to deal with that, but there's currently no known way to express that.
if (options.vulkan_semantics)
{
for (auto &id : ids)
{
if (id.get_type() == TypeConstant)
{
auto &c = id.get<SPIRConstant>();
if (!c.specialization)
continue;
emit_specialization_constant(c);
emitted = true;
}
}
}
if (emitted)
statement("");
emitted = false;
// Output all basic struct types which are not Block or BufferBlock as these are declared inplace
// when such variables are instantiated.
for (auto &id : ids)
{
if (id.get_type() == TypeType)
{
auto &type = id.get<SPIRType>();
if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
(meta[type.self].decoration.decoration_flags &
((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) == 0)
{
emit_struct(type);
}
}
}
// Output UBOs and SSBOs
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform;
bool has_block_flags = (meta[type.self].decoration.decoration_flags &
((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
has_block_flags)
{
emit_buffer_block(var);
}
}
}
// Output push constant blocks
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
!is_hidden_variable(var))
2016-08-26 10:58:50 +00:00
{
emit_push_constant_block(var);
2016-08-26 10:58:50 +00:00
}
}
}
bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
// Output Uniform Constants (values, samplers, images, etc).
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
// If we're remapping separate samplers and images, only emit the combined samplers.
if (skip_separate_image_sampler)
{
// Sampler buffers are always used without a sampler, and they will also work in regular GL.
bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
bool separate_sampler = type.basetype == SPIRType::Sampler;
if (!sampler_buffer && (separate_image || separate_sampler))
continue;
}
if (var.storage != StorageClassFunction && type.pointer &&
(type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) &&
!is_hidden_variable(var))
{
emit_uniform(var);
emitted = true;
}
}
}
if (emitted)
statement("");
emitted = false;
// Output in/out interfaces.
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
// HLSL output from glslang may emit interface variables which are "empty".
// Just avoid declaring them.
if (type_is_empty(type))
continue;
if (var.storage != StorageClassFunction && type.pointer &&
2016-08-26 10:58:50 +00:00
(var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var))
{
emit_interface_block(var);
emitted = true;
}
else if (is_builtin_variable(var))
{
// For gl_InstanceIndex emulation on GLES, the API user needs to
// supply this uniform.
if (meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics)
{
statement("uniform int SPIRV_Cross_BaseInstance;");
emitted = true;
}
}
}
}
// Global variables.
for (auto global : global_variables)
{
auto &var = get<SPIRVariable>(global);
if (var.storage != StorageClassOutput)
{
add_resource_name(var.self);
statement(variable_decl(var), ";");
emitted = true;
}
}
if (emitted)
statement("");
declare_undefined_values();
2016-03-02 17:09:16 +00:00
}
// Returns a string representation of the ID, usable as a function arg.
// Default is to simply return the expression representation fo the arg ID.
// Subclasses may override to modify the return value.
string CompilerGLSL::to_func_call_arg(uint32_t id)
{
2016-10-24 13:24:24 +00:00
return to_expression(id);
}
void CompilerGLSL::handle_invalid_expression(uint32_t id)
{
// We tried to read an invalidated expression.
// This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
forced_temporaries.insert(id);
force_recompile = true;
}
// Converts the format of the current expression from packed to unpacked,
// by wrapping the expression in a constructor of the appropriate type.
// GLSL does not support packed formats, so simply return the expression.
// Subclasses that do will override
string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &)
{
return expr_str;
}
// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
void CompilerGLSL::strip_enclosed_expression(string &expr)
{
if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
return;
// Have to make sure that our first and last parens actually enclose everything inside it.
uint32_t paren_count = 0;
for (auto &c : expr)
{
if (c == '(')
paren_count++;
else if (c == ')')
{
paren_count--;
// If we hit 0 and this is not the final char, our first and final parens actually don't
// enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
if (paren_count == 0 && &c != &expr.back())
return;
}
}
expr.erase(expr.size() - 1, 1);
expr.erase(begin(expr));
}
string CompilerGLSL::enclose_expression(const string &expr)
{
bool need_parens = false;
2017-07-24 08:17:19 +00:00
// If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
// unary expressions.
if (!expr.empty())
{
2017-07-24 08:17:19 +00:00
auto c = expr.front();
if (c == '-' || c == '+' || c == '!' || c == '~')
need_parens = true;
2017-07-24 08:17:19 +00:00
}
if (!need_parens)
{
uint32_t paren_count = 0;
for (auto c : expr)
{
if (c == '(')
paren_count++;
else if (c == ')')
{
assert(paren_count);
paren_count--;
}
else if (c == ' ' && paren_count == 0)
{
need_parens = true;
break;
}
}
assert(paren_count == 0);
}
// If this expression contains any spaces which are not enclosed by parentheses,
// we need to enclose it so we can treat the whole string as an expression.
// This happens when two expressions have been part of a binary op earlier.
if (need_parens)
return join('(', expr, ')');
else
return expr;
}
// Just like to_expression except that we enclose the expression inside parentheses if needed.
string CompilerGLSL::to_enclosed_expression(uint32_t id)
{
return enclose_expression(to_expression(id));
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::to_expression(uint32_t id)
{
auto itr = invalid_expressions.find(id);
if (itr != end(invalid_expressions))
handle_invalid_expression(id);
if (ids[id].get_type() == TypeExpression)
{
// We might have a more complex chain of dependencies.
// A possible scenario is that we
//
// %1 = OpLoad
// %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
// %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
// OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
// %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
//
// However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
// and see that we should not forward reads of the original variable.
auto &expr = get<SPIRExpression>(id);
for (uint32_t dep : expr.expression_dependencies)
if (invalid_expressions.find(dep) != end(invalid_expressions))
handle_invalid_expression(dep);
}
track_expression_read(id);
switch (ids[id].get_type())
{
case TypeExpression:
{
auto &e = get<SPIRExpression>(id);
if (e.base_expression)
return to_enclosed_expression(e.base_expression) + e.expression;
else if (e.need_transpose)
return convert_row_major_matrix(e.expression);
else
return e.expression;
}
case TypeConstant:
{
auto &c = get<SPIRConstant>(id);
// WorkGroupSize may be a constant.
auto &dec = meta[c.self].decoration;
if (dec.builtin)
return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
else if (c.specialization && options.vulkan_semantics)
return to_name(id);
else
return constant_expression(c);
}
case TypeConstantOp:
return constant_op_expression(get<SPIRConstantOp>(id));
case TypeVariable:
{
auto &var = get<SPIRVariable>(id);
2016-12-16 11:43:12 +00:00
// If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
// the variable has not been declared yet.
if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
return to_expression(var.static_expression);
else if (var.deferred_declaration)
{
var.deferred_declaration = false;
return variable_decl(var);
}
else if (flattened_structs.count(id))
{
return load_flattened_struct(var);
}
else
{
auto &dec = meta[var.self].decoration;
if (dec.builtin)
return builtin_to_glsl(dec.builtin_type, var.storage);
else
return to_name(id);
}
}
case TypeCombinedImageSampler:
// This type should never be taken the expression of directly.
// The intention is that texture sampling functions will extract the image and samplers
// separately and take their expressions as needed.
// GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
// expression ala sampler2D(texture, sampler).
SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
2017-08-10 15:12:48 +00:00
case TypeAccessChain:
// We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
SPIRV_CROSS_THROW("Access chains have no default expression representation.");
default:
return to_name(id);
}
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
2016-03-02 17:09:16 +00:00
{
auto &type = get<SPIRType>(cop.basetype);
bool binary = false;
bool unary = false;
string op;
// TODO: Find a clean way to reuse emit_instruction.
switch (cop.opcode)
{
case OpSConvert:
case OpUConvert:
case OpFConvert:
op = type_to_glsl_constructor(type);
break;
#define BOP(opname, x) \
case Op##opname: \
binary = true; \
op = x; \
break
#define UOP(opname, x) \
case Op##opname: \
unary = true; \
op = x; \
break
UOP(SNegate, "-");
UOP(Not, "~");
BOP(IAdd, "+");
BOP(ISub, "-");
BOP(IMul, "*");
BOP(SDiv, "/");
BOP(UDiv, "/");
BOP(UMod, "%");
BOP(SMod, "%");
BOP(ShiftRightLogical, ">>");
BOP(ShiftRightArithmetic, ">>");
BOP(ShiftLeftLogical, "<<");
BOP(BitwiseOr, "|");
BOP(BitwiseXor, "^");
BOP(BitwiseAnd, "&");
BOP(LogicalOr, "||");
BOP(LogicalAnd, "&&");
UOP(LogicalNot, "!");
BOP(LogicalEqual, "==");
BOP(LogicalNotEqual, "!=");
BOP(IEqual, "==");
BOP(INotEqual, "!=");
BOP(ULessThan, "<");
BOP(SLessThan, "<");
BOP(ULessThanEqual, "<=");
BOP(SLessThanEqual, "<=");
BOP(UGreaterThan, ">");
BOP(SGreaterThan, ">");
BOP(UGreaterThanEqual, ">=");
BOP(SGreaterThanEqual, ">=");
case OpSelect:
{
if (cop.arguments.size() < 3)
SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
// This one is pretty annoying. It's triggered from
// uint(bool), int(bool) from spec constants.
// In order to preserve its compile-time constness in Vulkan GLSL,
// we need to reduce the OpSelect expression back to this simplified model.
// If we cannot, fail.
if (!to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
{
SPIRV_CROSS_THROW(
"Cannot implement specialization constant op OpSelect. "
"Need trivial select implementation which can be resolved to a simple cast from boolean.");
}
break;
}
default:
// Some opcodes are unimplemented here, these are currently not possible to test from glslang.
SPIRV_CROSS_THROW("Unimplemented spec constant op.");
}
SPIRType::BaseType input_type;
bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode);
switch (cop.opcode)
{
case OpIEqual:
case OpINotEqual:
input_type = SPIRType::Int;
break;
default:
input_type = type.basetype;
break;
}
#undef BOP
#undef UOP
if (binary)
{
if (cop.arguments.size() < 2)
SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
string cast_op0;
string cast_op1;
auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
cop.arguments[1], skip_cast_if_equal_type);
if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
{
expected_type.basetype = input_type;
auto expr = bitcast_glsl_op(type, expected_type);
expr += '(';
expr += join(cast_op0, " ", op, " ", cast_op1);
expr += ')';
return expr;
}
else
return join("(", cast_op0, " ", op, " ", cast_op1, ")");
}
else if (unary)
{
if (cop.arguments.size() < 1)
SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
// Auto-bitcast to result type as needed.
// Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
}
else
{
if (cop.arguments.size() < 1)
SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
return join(op, "(", to_expression(cop.arguments[0]), ")");
}
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::constant_expression(const SPIRConstant &c)
{
if (!c.subconstants.empty())
{
// Handles Arrays and structures.
string res;
if (backend.use_initializer_list)
res = "{ ";
else
res = type_to_glsl_constructor(get<SPIRType>(c.constant_type)) + "(";
for (auto &elem : c.subconstants)
{
auto &subc = get<SPIRConstant>(elem);
if (subc.specialization && options.vulkan_semantics)
res += to_name(elem);
else
2017-08-03 12:32:07 +00:00
res += constant_expression(subc);
if (&elem != &c.subconstants.back())
res += ", ";
}
res += backend.use_initializer_list ? " }" : ")";
return res;
}
else if (c.columns() == 1)
{
return constant_expression_vector(c, 0);
}
else
{
string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
for (uint32_t col = 0; col < c.columns(); col++)
{
2017-09-27 14:10:29 +00:00
if (options.vulkan_semantics && c.specialization_constant_id(col) != 0)
res += to_name(c.specialization_constant_id(col));
else
res += constant_expression_vector(c, col);
if (col + 1 < c.columns())
res += ", ";
}
res += ")";
return res;
}
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
{
auto type = get<SPIRType>(c.constant_type);
type.columns = 1;
string res;
if (c.vector_size() > 1)
res += type_to_glsl(type) + "(";
bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
2017-09-27 14:10:29 +00:00
if (splat)
{
// Cannot use constant splatting if we have specialization constants somewhere in the vector.
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.specialization_constant_id(vector, i) != 0)
{
splat = false;
break;
}
}
}
if (splat)
{
if (type.width == 64)
2016-07-27 08:59:00 +00:00
{
uint64_t ident = c.scalar_u64(vector, 0);
for (uint32_t i = 1; i < c.vector_size(); i++)
if (ident != c.scalar_u64(vector, i))
splat = false;
}
else
{
uint32_t ident = c.scalar(vector, 0);
for (uint32_t i = 1; i < c.vector_size(); i++)
if (ident != c.scalar(vector, i))
splat = false;
}
}
switch (type.basetype)
{
case SPIRType::Float:
if (splat)
{
res += convert_to_string(c.scalar_f32(vector, 0));
if (backend.float_literal_suffix)
res += "f";
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
else
res += convert_to_string(c.scalar_f32(vector, i));
if (backend.float_literal_suffix)
res += "f";
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
2016-07-27 08:59:00 +00:00
case SPIRType::Double:
if (splat)
{
res += convert_to_string(c.scalar_f64(vector, 0));
if (backend.double_literal_suffix)
res += "lf";
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
else
{
res += convert_to_string(c.scalar_f64(vector, i));
if (backend.double_literal_suffix)
res += "lf";
}
2016-07-27 08:59:00 +00:00
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
2016-07-27 09:27:00 +00:00
case SPIRType::Int64:
if (splat)
{
res += convert_to_string(c.scalar_i64(vector, 0));
if (backend.long_long_literal_suffix)
res += "ll";
else
res += "l";
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
2016-07-27 09:27:00 +00:00
else
2017-09-27 14:10:29 +00:00
{
res += convert_to_string(c.scalar_i64(vector, i));
if (backend.long_long_literal_suffix)
res += "ll";
else
res += "l";
}
2016-07-27 09:27:00 +00:00
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
case SPIRType::UInt64:
if (splat)
{
res += convert_to_string(c.scalar_u64(vector, 0));
if (backend.long_long_literal_suffix)
res += "ull";
else
res += "ul";
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
2016-07-27 09:27:00 +00:00
else
2017-09-27 14:10:29 +00:00
{
res += convert_to_string(c.scalar_u64(vector, i));
if (backend.long_long_literal_suffix)
res += "ull";
else
res += "ul";
}
2016-07-27 09:27:00 +00:00
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
case SPIRType::UInt:
if (splat)
{
res += convert_to_string(c.scalar(vector, 0));
if (backend.uint32_t_literal_suffix)
res += "u";
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
else
{
res += convert_to_string(c.scalar(vector, i));
if (backend.uint32_t_literal_suffix)
res += "u";
}
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
case SPIRType::Int:
if (splat)
res += convert_to_string(c.scalar_i32(vector, 0));
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
else
res += convert_to_string(c.scalar_i32(vector, i));
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
case SPIRType::Boolean:
if (splat)
res += c.scalar(vector, 0) ? "true" : "false";
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
else
res += c.scalar(vector, i) ? "true" : "false";
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
default:
SPIRV_CROSS_THROW("Invalid constant expression basetype.");
}
if (c.vector_size() > 1)
res += ")";
return res;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
{
auto &type = get<SPIRType>(result_type);
auto flags = meta[result_id].decoration.decoration_flags;
// If we're declaring temporaries inside continue blocks,
// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
if (current_continue_block)
{
auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
2016-05-23 11:30:02 +00:00
[result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
return tmp.first == result_type && tmp.second == result_id;
2017-10-10 08:22:40 +00:00
}) == end(header.declare_temporary))
{
header.declare_temporary.emplace_back(result_type, result_id);
force_recompile = true;
}
return join(to_name(result_id), " = ");
}
else
{
// The result_id has not been made into an expression yet, so use flags interface.
return join(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
}
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::expression_is_forwarded(uint32_t id)
{
return forwarded_temporaries.find(id) != end(forwarded_temporaries);
2016-03-02 17:09:16 +00:00
}
SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
bool suppress_usage_tracking)
2016-03-02 17:09:16 +00:00
{
if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
{
// Just forward it without temporary.
// If the forward is trivial, we do not force flushing to temporary for this expression.
if (!suppress_usage_tracking)
forwarded_temporaries.insert(result_id);
return set<SPIRExpression>(result_id, rhs, result_type, true);
}
else
{
// If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
statement(declare_temporary(result_type, result_id), rhs, ";");
return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
{
bool forward = should_forward(op0);
emit_op(result_type, result_id, join(op, to_enclosed_expression(op0)), forward);
inherit_expression_dependencies(result_id, op0);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
{
bool forward = should_forward(op0) && should_forward(op1);
emit_op(result_type, result_id, join(to_enclosed_expression(op0), " ", op, " ", to_enclosed_expression(op1)),
forward);
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
}
2017-03-24 14:00:48 +00:00
void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
{
auto &type = get<SPIRType>(result_type);
auto expr = type_to_glsl_constructor(type);
expr += '(';
for (uint32_t i = 0; i < type.vecsize; i++)
{
// Make sure to call to_expression multiple times to ensure
// that these expressions are properly flushed to temporaries if needed.
expr += op;
expr += to_enclosed_expression(operand);
expr += '.';
expr += index_to_swizzle(i);
if (i + 1 < type.vecsize)
expr += ", ";
}
expr += ')';
emit_op(result_type, result_id, expr, should_forward(operand));
inherit_expression_dependencies(result_id, operand);
}
void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
const char *op)
{
auto &type = get<SPIRType>(result_type);
auto expr = type_to_glsl_constructor(type);
expr += '(';
for (uint32_t i = 0; i < type.vecsize; i++)
{
// Make sure to call to_expression multiple times to ensure
// that these expressions are properly flushed to temporaries if needed.
expr += to_enclosed_expression(op0);
expr += '.';
expr += index_to_swizzle(i);
expr += ' ';
expr += op;
expr += ' ';
expr += to_enclosed_expression(op1);
expr += '.';
expr += index_to_swizzle(i);
if (i + 1 < type.vecsize)
expr += ", ";
}
expr += ')';
emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
2016-03-02 17:09:16 +00:00
}
SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
{
auto &type0 = expression_type(op0);
auto &type1 = expression_type(op1);
// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
// since equality test is exactly the same.
bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
// Create a fake type so we can bitcast to it.
// We only deal with regular arithmetic types here like int, uints and so on.
SPIRType expected_type;
expected_type.basetype = input_type;
expected_type.vecsize = type0.vecsize;
expected_type.columns = type0.columns;
expected_type.width = type0.width;
if (cast)
{
cast_op0 = bitcast_glsl(expected_type, op0);
cast_op1 = bitcast_glsl(expected_type, op1);
}
else
{
// If we don't cast, our actual input type is that of the first (or second) argument.
cast_op0 = to_enclosed_expression(op0);
cast_op1 = to_enclosed_expression(op1);
input_type = type0.basetype;
}
return expected_type;
}
void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
{
string cast_op0, cast_op1;
auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
auto &out_type = get<SPIRType>(result_type);
// We might have casted away from the result type, so bitcast again.
// For example, arithmetic right shift with uint inputs.
// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
string expr;
if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
{
expected_type.basetype = input_type;
expr = bitcast_glsl_op(out_type, expected_type);
expr += '(';
expr += join(cast_op0, " ", op, " ", cast_op1);
expr += ')';
}
else
expr += join(cast_op0, " ", op, " ", cast_op1);
emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
{
bool forward = should_forward(op0);
emit_op(result_type, result_id, join(op, "(", to_expression(op0), ")"), forward);
inherit_expression_dependencies(result_id, op0);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
const char *op)
2016-03-02 17:09:16 +00:00
{
bool forward = should_forward(op0) && should_forward(op1);
emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ")"), forward);
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
{
string cast_op0, cast_op1;
auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
auto &out_type = get<SPIRType>(result_type);
// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
string expr;
if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
{
expected_type.basetype = input_type;
expr = bitcast_glsl_op(out_type, expected_type);
expr += '(';
expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
expr += ')';
}
else
{
expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
}
emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
}
void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
uint32_t op2, const char *op)
2016-03-02 17:09:16 +00:00
{
bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
emit_op(result_type, result_id,
join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ")"), forward);
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
inherit_expression_dependencies(result_id, op2);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
uint32_t op2, uint32_t op3, const char *op)
2016-03-02 17:09:16 +00:00
{
bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
2017-10-10 08:22:40 +00:00
emit_op(result_type, result_id,
join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ", ",
to_expression(op3), ")"),
forward);
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
inherit_expression_dependencies(result_id, op2);
inherit_expression_dependencies(result_id, op3);
2016-03-02 17:09:16 +00:00
}
// EXT_shader_texture_lod only concerns fragment shaders so lod tex functions
// are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex
// functions in vertex shaders so we revert those back to plain calls when
// the lod is a constant value of zero.
bool CompilerGLSL::check_explicit_lod_allowed(uint32_t lod)
{
auto &execution = get_entry_point();
bool allowed = !is_legacy_es() || execution.model == ExecutionModelFragment;
if (!allowed && lod != 0)
{
auto *lod_constant = maybe_get<SPIRConstant>(lod);
if (!lod_constant || lod_constant->scalar_f32() != 0.0f)
{
SPIRV_CROSS_THROW("Explicit lod not allowed in legacy ES non-fragment shaders.");
}
}
return allowed;
}
string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod)
2016-03-02 17:09:16 +00:00
{
const char *type;
switch (imgtype.image.dim)
{
case spv::Dim1D:
type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
break;
case spv::Dim2D:
type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
break;
case spv::Dim3D:
type = "3D";
break;
case spv::DimCube:
type = "Cube";
break;
case spv::DimBuffer:
type = "Buffer";
break;
case spv::DimSubpassData:
type = "2D";
break;
default:
type = "";
break;
}
bool use_explicit_lod = check_explicit_lod_allowed(lod);
if (op == "textureLod" || op == "textureProjLod")
{
if (is_legacy_es())
{
if (use_explicit_lod)
require_extension("GL_EXT_shader_texture_lod");
}
else if (is_legacy())
require_extension("GL_ARB_shader_texture_lod");
}
2016-09-17 13:56:23 +00:00
if (op == "texture")
return join("texture", type);
else if (op == "textureLod")
{
if (use_explicit_lod)
return join("texture", type, is_legacy_es() ? "LodEXT" : "Lod");
else
return join("texture", type);
}
else if (op == "textureProj")
return join("texture", type, "Proj");
else if (op == "textureProjLod")
{
if (use_explicit_lod)
return join("texture", type, is_legacy_es() ? "ProjLodEXT" : "ProjLod");
else
return join("texture", type);
}
else
{
SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
}
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
{
auto *cleft = maybe_get<SPIRConstant>(left);
auto *cright = maybe_get<SPIRConstant>(right);
auto &lerptype = expression_type(lerp);
// If our targets aren't constants, we cannot use construction.
if (!cleft || !cright)
return false;
// If our targets are spec constants, we cannot use construction.
if (cleft->specialization || cright->specialization)
return false;
// We can only use trivial construction if we have a scalar
// (should be possible to do it for vectors as well, but that is overkill for now).
if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1)
return false;
// If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
bool ret = false;
switch (type.basetype)
{
case SPIRType::Int:
case SPIRType::UInt:
ret = cleft->scalar() == 0 && cright->scalar() == 1;
break;
case SPIRType::Float:
ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
break;
case SPIRType::Double:
ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0;
break;
case SPIRType::Int64:
case SPIRType::UInt64:
ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1;
break;
default:
break;
}
if (ret)
op = type_to_glsl_constructor(type);
return ret;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
2016-03-02 17:09:16 +00:00
{
auto &lerptype = expression_type(lerp);
auto &restype = get<SPIRType>(result_type);
string mix_op;
bool has_boolean_mix = backend.boolean_mix_support &&
((options.es && options.version >= 310) || (!options.es && options.version >= 450));
bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
// Cannot use boolean mix when the lerp argument is just one boolean,
// fall back to regular trinary statements.
if (lerptype.vecsize == 1)
has_boolean_mix = false;
// If we can reduce the mix to a simple cast, do so.
// This helps for cases like int(bool), uint(bool) which is implemented with
// OpSelect bool 1 0.
if (trivial_mix)
{
emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
}
else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
{
// Boolean mix not supported on desktop without extension.
// Was added in OpenGL 4.5 with ES 3.1 compat.
//
// Could use GL_EXT_shader_integer_mix on desktop at least,
// but Apple doesn't support it. :(
// Just implement it as ternary expressions.
string expr;
if (lerptype.vecsize == 1)
expr = join(to_enclosed_expression(lerp), " ? ", to_enclosed_expression(right), " : ",
to_enclosed_expression(left));
else
{
2016-05-23 11:30:02 +00:00
auto swiz = [this](uint32_t expression, uint32_t i) {
return join(to_enclosed_expression(expression), ".", index_to_swizzle(i));
};
expr = type_to_glsl_constructor(restype);
expr += "(";
for (uint32_t i = 0; i < restype.vecsize; i++)
{
expr += swiz(lerp, i);
expr += " ? ";
expr += swiz(right, i);
expr += " : ";
expr += swiz(left, i);
if (i + 1 < restype.vecsize)
expr += ", ";
}
expr += ")";
}
emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
}
else
emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
2016-03-02 17:09:16 +00:00
}
2016-09-11 10:54:08 +00:00
string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id)
{
2016-09-11 10:54:08 +00:00
auto &args = current_function->arguments;
// For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
// all possible combinations into new sampler2D uniforms.
auto *image = maybe_get_backing_variable(image_id);
auto *samp = maybe_get_backing_variable(samp_id);
if (image)
image_id = image->self;
if (samp)
samp_id = samp->self;
auto image_itr = find_if(begin(args), end(args),
[image_id](const SPIRFunction::Parameter &param) { return param.id == image_id; });
auto sampler_itr = find_if(begin(args), end(args),
[samp_id](const SPIRFunction::Parameter &param) { return param.id == samp_id; });
if (image_itr != end(args) || sampler_itr != end(args))
{
// If any parameter originates from a parameter, we will find it in our argument list.
bool global_image = image_itr == end(args);
2016-09-11 10:54:08 +00:00
bool global_sampler = sampler_itr == end(args);
uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args));
uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args));
2016-09-11 10:54:08 +00:00
auto &combined = current_function->combined_parameters;
auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
p.sampler_id == sid;
2016-09-11 10:54:08 +00:00
});
if (itr != end(combined))
return to_expression(itr->id);
else
{
SPIRV_CROSS_THROW(
2016-09-11 10:54:08 +00:00
"Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used "
"before compile() was called?");
}
}
else
{
2016-09-11 10:54:08 +00:00
// For global sampler2D, look directly at the global remapping table.
auto &mapping = combined_image_samplers;
auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
return combined.image_id == image_id && combined.sampler_id == samp_id;
});
2016-09-11 10:54:08 +00:00
if (itr != end(combined_image_samplers))
return to_expression(itr->combined_id);
else
{
SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
"before compile() was called?");
}
}
}
void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
{
2016-09-11 10:54:08 +00:00
if (options.vulkan_semantics && combined_image_samplers.empty())
{
emit_binary_func_op(result_type, result_id, image_id, samp_id,
type_to_glsl(get<SPIRType>(result_type)).c_str());
}
else
emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true);
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::emit_texture_op(const Instruction &i)
{
auto ops = stream(i);
auto op = static_cast<Op>(i.op);
uint32_t length = i.length;
if (i.offset + length > spirv.size())
SPIRV_CROSS_THROW("Compiler::parse() opcode out of range.");
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t img = ops[2];
uint32_t coord = ops[3];
uint32_t dref = 0;
uint32_t comp = 0;
bool gather = false;
bool proj = false;
bool fetch = false;
const uint32_t *opt = nullptr;
switch (op)
{
case OpImageSampleDrefImplicitLod:
case OpImageSampleDrefExplicitLod:
dref = ops[4];
opt = &ops[5];
length -= 5;
break;
case OpImageSampleProjDrefImplicitLod:
case OpImageSampleProjDrefExplicitLod:
dref = ops[4];
opt = &ops[5];
length -= 5;
proj = true;
break;
case OpImageDrefGather:
dref = ops[4];
opt = &ops[5];
length -= 5;
gather = true;
break;
case OpImageGather:
comp = ops[4];
opt = &ops[5];
length -= 5;
gather = true;
break;
case OpImageFetch:
case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
opt = &ops[4];
length -= 4;
fetch = true;
break;
case OpImageSampleProjImplicitLod:
case OpImageSampleProjExplicitLod:
opt = &ops[4];
length -= 4;
proj = true;
break;
default:
opt = &ops[4];
length -= 4;
break;
}
// Bypass pointers because we need the real image struct
auto &type = expression_type(img);
auto &imgtype = get<SPIRType>(type.self);
uint32_t coord_components = 0;
switch (imgtype.image.dim)
{
case spv::Dim1D:
coord_components = 1;
break;
case spv::Dim2D:
coord_components = 2;
break;
case spv::Dim3D:
coord_components = 3;
break;
case spv::DimCube:
coord_components = 3;
break;
case spv::DimBuffer:
coord_components = 1;
break;
default:
coord_components = 2;
break;
}
if (proj)
coord_components++;
if (imgtype.image.arrayed)
coord_components++;
uint32_t bias = 0;
uint32_t lod = 0;
uint32_t grad_x = 0;
uint32_t grad_y = 0;
uint32_t coffset = 0;
uint32_t offset = 0;
uint32_t coffsets = 0;
uint32_t sample = 0;
uint32_t flags = 0;
if (length)
{
flags = *opt++;
length--;
}
2016-05-23 11:30:02 +00:00
auto test = [&](uint32_t &v, uint32_t flag) {
if (length && (flags & flag))
{
v = *opt++;
length--;
}
};
test(bias, ImageOperandsBiasMask);
test(lod, ImageOperandsLodMask);
test(grad_x, ImageOperandsGradMask);
test(grad_y, ImageOperandsGradMask);
test(coffset, ImageOperandsConstOffsetMask);
test(offset, ImageOperandsOffsetMask);
test(coffsets, ImageOperandsConstOffsetsMask);
test(sample, ImageOperandsSampleMask);
string expr;
bool forward = false;
2017-01-13 15:32:54 +00:00
expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset),
(!!grad_x || !!grad_y), !!dref, lod);
expr += "(";
expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod,
coffset, offset, bias, comp, sample, &forward);
expr += ")";
emit_op(result_type, id, expr, forward);
}
// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
// For some subclasses, the function is a method on the specified image.
2017-01-08 13:52:57 +00:00
string CompilerGLSL::to_function_name(uint32_t, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
bool has_array_offsets, bool has_offset, bool has_grad, bool, uint32_t lod)
{
string fname;
// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
// To emulate this, we will have to use textureGrad with a constant gradient of 0.
// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
bool workaround_lod_array_shadow_as_grad = false;
if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
imgtype.image.depth && lod)
{
auto *constant_lod = maybe_get<SPIRConstant>(lod);
if (!constant_lod || constant_lod->scalar_f32() != 0.0f)
2017-06-23 07:46:15 +00:00
SPIRV_CROSS_THROW(
2017-06-23 07:50:01 +00:00
"textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be expressed in GLSL.");
workaround_lod_array_shadow_as_grad = true;
}
if (is_fetch)
fname += "texelFetch";
else
{
fname += "texture";
if (is_gather)
fname += "Gather";
if (has_array_offsets)
fname += "Offsets";
if (is_proj)
fname += "Proj";
if (has_grad || workaround_lod_array_shadow_as_grad)
fname += "Grad";
if (!!lod && !workaround_lod_array_shadow_as_grad)
fname += "Lod";
}
if (has_offset)
fname += "Offset";
return is_legacy() ? legacy_tex_op(fname, imgtype, lod) : fname;
}
// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
2017-07-31 08:05:32 +00:00
string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool, bool, bool is_proj, uint32_t coord,
2017-01-08 13:52:57 +00:00
uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y,
uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp,
uint32_t sample, bool *p_forward)
{
string farg_str = to_expression(img);
bool swizz_func = backend.swizzle_is_function;
2016-05-23 11:30:02 +00:00
auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
if (comps == in_comps)
return "";
switch (comps)
{
case 1:
return ".x";
case 2:
return swizz_func ? ".xy()" : ".xy";
case 3:
return swizz_func ? ".xyz()" : ".xyz";
default:
return "";
}
};
bool forward = should_forward(coord);
// The IR can give us more components than we need, so chop them off as needed.
auto swizzle_expr = swizzle(coord_components, expression_type(coord).vecsize);
// Only enclose the UV expression if needed.
auto coord_expr = (*swizzle_expr == '\0') ? to_expression(coord) : (to_enclosed_expression(coord) + swizzle_expr);
// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
// To emulate this, we will have to use textureGrad with a constant gradient of 0.
// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
2017-06-23 07:50:01 +00:00
bool workaround_lod_array_shadow_as_grad =
((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
imgtype.image.depth && lod;
if (dref)
{
forward = forward && should_forward(dref);
// SPIR-V splits dref and coordinate.
if (coord_components == 4) // GLSL also splits the arguments in two.
{
farg_str += ", ";
farg_str += to_expression(coord);
farg_str += ", ";
farg_str += to_expression(dref);
}
2017-07-31 08:05:32 +00:00
else if (is_proj)
{
// Have to reshuffle so we get vec4(coord, dref, proj), special case.
// Other shading languages splits up the arguments for coord and compare value like SPIR-V.
// The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
farg_str += ", vec4(";
if (imgtype.image.dim == Dim1D)
{
// Could reuse coord_expr, but we will mess up the temporary usage checking.
farg_str += to_enclosed_expression(coord) + ".x";
farg_str += ", ";
farg_str += "0.0, ";
farg_str += to_expression(dref);
farg_str += ", ";
farg_str += to_enclosed_expression(coord) + ".y)";
}
else if (imgtype.image.dim == Dim2D)
{
// Could reuse coord_expr, but we will mess up the temporary usage checking.
farg_str += to_enclosed_expression(coord) + (swizz_func ? ".xy()" : ".xy");
farg_str += ", ";
farg_str += to_expression(dref);
farg_str += ", ";
farg_str += to_enclosed_expression(coord) + ".z)";
}
else
SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
}
else
{
// Create a composite which merges coord/dref into a single vector.
auto type = expression_type(coord);
type.vecsize = coord_components + 1;
farg_str += ", ";
farg_str += type_to_glsl_constructor(type);
farg_str += "(";
farg_str += coord_expr;
farg_str += ", ";
farg_str += to_expression(dref);
farg_str += ")";
}
}
else
{
farg_str += ", ";
farg_str += coord_expr;
}
if (grad_x || grad_y)
{
forward = forward && should_forward(grad_x);
forward = forward && should_forward(grad_y);
farg_str += ", ";
farg_str += to_expression(grad_x);
farg_str += ", ";
farg_str += to_expression(grad_y);
}
if (lod)
{
if (workaround_lod_array_shadow_as_grad)
{
// Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
// Implementing this as plain texture() is not safe on some implementations.
if (imgtype.image.dim == Dim2D)
farg_str += ", vec2(0.0), vec2(0.0)";
else if (imgtype.image.dim == DimCube)
farg_str += ", vec3(0.0), vec3(0.0)";
}
else
{
if (check_explicit_lod_allowed(lod))
{
forward = forward && should_forward(lod);
farg_str += ", ";
farg_str += to_expression(lod);
}
}
}
if (coffset)
{
forward = forward && should_forward(coffset);
farg_str += ", ";
farg_str += to_expression(coffset);
}
else if (offset)
{
forward = forward && should_forward(offset);
farg_str += ", ";
farg_str += to_expression(offset);
}
if (bias)
{
forward = forward && should_forward(bias);
farg_str += ", ";
farg_str += to_expression(bias);
}
if (comp)
{
forward = forward && should_forward(comp);
farg_str += ", ";
farg_str += to_expression(comp);
}
2016-06-22 10:35:58 +00:00
if (sample)
{
farg_str += ", ";
farg_str += to_expression(sample);
2016-06-22 10:35:58 +00:00
}
*p_forward = forward;
return farg_str;
2016-03-02 17:09:16 +00:00
}
// Some languages may have additional standard library functions whose names conflict
// with a function defined in the body of the shader. Subclasses can override to rename
// the function name defined in the shader to avoid conflict with the language standard
// functions (eg. MSL includes saturate()).
string CompilerGLSL::clean_func_name(string func_name)
{
2017-01-08 13:52:57 +00:00
return func_name;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t)
{
GLSLstd450 op = static_cast<GLSLstd450>(eop);
switch (op)
{
// FP fiddling
case GLSLstd450Round:
emit_unary_func_op(result_type, id, args[0], "round");
break;
2016-07-27 08:59:00 +00:00
case GLSLstd450RoundEven:
if ((options.es && options.version >= 300) || (!options.es && options.version >= 130))
emit_unary_func_op(result_type, id, args[0], "roundEven");
else
SPIRV_CROSS_THROW("roundEven supported only in ESSL 300 and GLSL 130 and up.");
2016-07-27 08:59:00 +00:00
break;
case GLSLstd450Trunc:
emit_unary_func_op(result_type, id, args[0], "trunc");
break;
case GLSLstd450SAbs:
case GLSLstd450FAbs:
emit_unary_func_op(result_type, id, args[0], "abs");
break;
case GLSLstd450SSign:
case GLSLstd450FSign:
emit_unary_func_op(result_type, id, args[0], "sign");
break;
case GLSLstd450Floor:
emit_unary_func_op(result_type, id, args[0], "floor");
break;
case GLSLstd450Ceil:
emit_unary_func_op(result_type, id, args[0], "ceil");
break;
case GLSLstd450Fract:
emit_unary_func_op(result_type, id, args[0], "fract");
break;
case GLSLstd450Radians:
emit_unary_func_op(result_type, id, args[0], "radians");
break;
case GLSLstd450Degrees:
emit_unary_func_op(result_type, id, args[0], "degrees");
break;
case GLSLstd450Fma:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
break;
case GLSLstd450Modf:
register_call_out_argument(args[1]);
forced_temporaries.insert(id);
emit_binary_func_op(result_type, id, args[0], args[1], "modf");
break;
2017-09-04 08:24:21 +00:00
case GLSLstd450ModfStruct:
{
forced_temporaries.insert(id);
auto &type = get<SPIRType>(result_type);
auto flags = meta[id].decoration.decoration_flags;
statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
to_expression(id), ".", to_member_name(type, 1), ");");
break;
}
// Minmax
case GLSLstd450FMin:
case GLSLstd450UMin:
case GLSLstd450SMin:
emit_binary_func_op(result_type, id, args[0], args[1], "min");
break;
case GLSLstd450FMax:
case GLSLstd450UMax:
case GLSLstd450SMax:
emit_binary_func_op(result_type, id, args[0], args[1], "max");
break;
case GLSLstd450FClamp:
case GLSLstd450UClamp:
case GLSLstd450SClamp:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
break;
// Trig
case GLSLstd450Sin:
emit_unary_func_op(result_type, id, args[0], "sin");
break;
case GLSLstd450Cos:
emit_unary_func_op(result_type, id, args[0], "cos");
break;
case GLSLstd450Tan:
emit_unary_func_op(result_type, id, args[0], "tan");
break;
case GLSLstd450Asin:
emit_unary_func_op(result_type, id, args[0], "asin");
break;
case GLSLstd450Acos:
emit_unary_func_op(result_type, id, args[0], "acos");
break;
case GLSLstd450Atan:
emit_unary_func_op(result_type, id, args[0], "atan");
break;
case GLSLstd450Sinh:
emit_unary_func_op(result_type, id, args[0], "sinh");
break;
case GLSLstd450Cosh:
emit_unary_func_op(result_type, id, args[0], "cosh");
break;
case GLSLstd450Tanh:
emit_unary_func_op(result_type, id, args[0], "tanh");
break;
case GLSLstd450Asinh:
emit_unary_func_op(result_type, id, args[0], "asinh");
break;
case GLSLstd450Acosh:
emit_unary_func_op(result_type, id, args[0], "acosh");
break;
case GLSLstd450Atanh:
emit_unary_func_op(result_type, id, args[0], "atanh");
break;
case GLSLstd450Atan2:
emit_binary_func_op(result_type, id, args[0], args[1], "atan");
break;
// Exponentials
case GLSLstd450Pow:
emit_binary_func_op(result_type, id, args[0], args[1], "pow");
break;
case GLSLstd450Exp:
emit_unary_func_op(result_type, id, args[0], "exp");
break;
case GLSLstd450Log:
emit_unary_func_op(result_type, id, args[0], "log");
break;
case GLSLstd450Exp2:
emit_unary_func_op(result_type, id, args[0], "exp2");
break;
case GLSLstd450Log2:
emit_unary_func_op(result_type, id, args[0], "log2");
break;
case GLSLstd450Sqrt:
emit_unary_func_op(result_type, id, args[0], "sqrt");
break;
case GLSLstd450InverseSqrt:
emit_unary_func_op(result_type, id, args[0], "inversesqrt");
break;
// Matrix math
case GLSLstd450Determinant:
emit_unary_func_op(result_type, id, args[0], "determinant");
break;
case GLSLstd450MatrixInverse:
emit_unary_func_op(result_type, id, args[0], "inverse");
break;
// Lerping
case GLSLstd450FMix:
case GLSLstd450IMix:
{
emit_mix_op(result_type, id, args[0], args[1], args[2]);
break;
}
case GLSLstd450Step:
emit_binary_func_op(result_type, id, args[0], args[1], "step");
break;
case GLSLstd450SmoothStep:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
break;
// Packing
case GLSLstd450Frexp:
register_call_out_argument(args[1]);
forced_temporaries.insert(id);
emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
break;
2017-09-04 08:24:21 +00:00
case GLSLstd450FrexpStruct:
{
forced_temporaries.insert(id);
auto &type = get<SPIRType>(result_type);
auto flags = meta[id].decoration.decoration_flags;
statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
to_expression(id), ".", to_member_name(type, 1), ");");
break;
}
case GLSLstd450Ldexp:
emit_binary_func_op(result_type, id, args[0], args[1], "ldexp");
break;
case GLSLstd450PackSnorm4x8:
emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
break;
case GLSLstd450PackUnorm4x8:
emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
break;
case GLSLstd450PackSnorm2x16:
emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
break;
case GLSLstd450PackUnorm2x16:
emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
break;
case GLSLstd450PackHalf2x16:
emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
break;
case GLSLstd450UnpackSnorm4x8:
emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
break;
case GLSLstd450UnpackUnorm4x8:
emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
break;
case GLSLstd450UnpackSnorm2x16:
emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
break;
case GLSLstd450UnpackUnorm2x16:
emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
break;
case GLSLstd450UnpackHalf2x16:
emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
break;
2016-07-27 08:59:00 +00:00
case GLSLstd450PackDouble2x32:
emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
break;
case GLSLstd450UnpackDouble2x32:
emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
break;
// Vector math
case GLSLstd450Length:
emit_unary_func_op(result_type, id, args[0], "length");
break;
case GLSLstd450Distance:
emit_binary_func_op(result_type, id, args[0], args[1], "distance");
break;
case GLSLstd450Cross:
emit_binary_func_op(result_type, id, args[0], args[1], "cross");
break;
case GLSLstd450Normalize:
emit_unary_func_op(result_type, id, args[0], "normalize");
break;
case GLSLstd450FaceForward:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
break;
case GLSLstd450Reflect:
emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
break;
case GLSLstd450Refract:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
break;
// Bit-fiddling
case GLSLstd450FindILsb:
emit_unary_func_op(result_type, id, args[0], "findLSB");
break;
case GLSLstd450FindSMsb:
case GLSLstd450FindUMsb:
emit_unary_func_op(result_type, id, args[0], "findMSB");
break;
// Multisampled varying
case GLSLstd450InterpolateAtCentroid:
emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
break;
case GLSLstd450InterpolateAtSample:
emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
break;
case GLSLstd450InterpolateAtOffset:
emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
break;
default:
statement("// unimplemented GLSL op ", eop);
break;
}
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
2016-03-02 17:09:16 +00:00
{
if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
return type_to_glsl(out_type);
2016-07-27 09:27:00 +00:00
else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64)
return type_to_glsl(out_type);
else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
return "floatBitsToUint";
else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt)
return type_to_glsl(out_type);
2016-07-27 09:27:00 +00:00
else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64)
return type_to_glsl(out_type);
else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
return "floatBitsToInt";
else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
return "uintBitsToFloat";
else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
return "intBitsToFloat";
2016-07-27 09:27:00 +00:00
else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
return "doubleBitsToInt64";
else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
return "doubleBitsToUint64";
else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
return "int64BitsToDouble";
else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
return "uint64BitsToDouble";
else
return "";
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
2016-03-02 17:09:16 +00:00
{
auto op = bitcast_glsl_op(result_type, expression_type(argument));
if (op.empty())
return to_enclosed_expression(argument);
else
return join(op, "(", to_expression(argument), ")");
2016-03-02 17:09:16 +00:00
}
std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
{
auto expr = to_expression(arg);
auto &src_type = expression_type(arg);
if (src_type.basetype != target_type)
{
auto target = src_type;
target.basetype = target_type;
expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
}
return expr;
}
std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
const std::string &expr)
{
if (target_type.basetype == expr_type)
return expr;
auto src_type = target_type;
src_type.basetype = expr_type;
return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
}
string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
2016-03-02 17:09:16 +00:00
{
switch (builtin)
{
case BuiltInPosition:
return "gl_Position";
case BuiltInPointSize:
return "gl_PointSize";
case BuiltInClipDistance:
return "gl_ClipDistance";
2017-03-05 11:44:29 +00:00
case BuiltInCullDistance:
return "gl_CullDistance";
case BuiltInVertexId:
if (options.vulkan_semantics)
SPIRV_CROSS_THROW(
"Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics.");
return "gl_VertexID";
case BuiltInInstanceId:
if (options.vulkan_semantics)
SPIRV_CROSS_THROW(
"Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics.");
return "gl_InstanceID";
case BuiltInVertexIndex:
if (options.vulkan_semantics)
return "gl_VertexIndex";
else
return "gl_VertexID"; // gl_VertexID already has the base offset applied.
case BuiltInInstanceIndex:
if (options.vulkan_semantics)
return "gl_InstanceIndex";
else
return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
case BuiltInPrimitiveId:
return "gl_PrimitiveID";
case BuiltInInvocationId:
return "gl_InvocationID";
case BuiltInLayer:
return "gl_Layer";
case BuiltInTessLevelOuter:
return "gl_TessLevelOuter";
case BuiltInTessLevelInner:
return "gl_TessLevelInner";
case BuiltInTessCoord:
return "gl_TessCoord";
case BuiltInFragCoord:
return "gl_FragCoord";
case BuiltInPointCoord:
return "gl_PointCoord";
case BuiltInFrontFacing:
return "gl_FrontFacing";
case BuiltInFragDepth:
return "gl_FragDepth";
case BuiltInNumWorkgroups:
return "gl_NumWorkGroups";
case BuiltInWorkgroupSize:
return "gl_WorkGroupSize";
case BuiltInWorkgroupId:
return "gl_WorkGroupID";
case BuiltInLocalInvocationId:
return "gl_LocalInvocationID";
case BuiltInGlobalInvocationId:
return "gl_GlobalInvocationID";
case BuiltInLocalInvocationIndex:
return "gl_LocalInvocationIndex";
case BuiltInSampleId:
if (options.es && options.version < 320)
require_extension("GL_OES_sample_variables");
if (!options.es && options.version < 400)
SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
return "gl_SampleID";
case BuiltInSampleMask:
if (options.es && options.version < 320)
require_extension("GL_OES_sample_variables");
if (!options.es && options.version < 400)
SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
if (storage == StorageClassInput)
return "gl_SampleMaskIn";
else
return "gl_SampleMask";
case BuiltInSamplePosition:
if (options.es && options.version < 320)
require_extension("GL_OES_sample_variables");
if (!options.es && options.version < 400)
SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
return "gl_SamplePosition";
2017-08-15 11:28:16 +00:00
case BuiltInViewIndex:
if (options.vulkan_semantics)
{
require_extension("GL_EXT_multiview");
return "gl_ViewIndex";
}
else
{
require_extension("GL_OVR_multiview2");
return "gl_ViewID_OVR";
}
default:
return join("gl_BuiltIn_", convert_to_string(builtin));
}
2016-03-02 17:09:16 +00:00
}
const char *CompilerGLSL::index_to_swizzle(uint32_t index)
2016-03-02 17:09:16 +00:00
{
switch (index)
{
case 0:
return "x";
case 1:
return "y";
case 2:
return "z";
case 3:
return "w";
default:
SPIRV_CROSS_THROW("Swizzle index out of range");
}
2016-03-02 17:09:16 +00:00
}
2017-02-23 18:33:14 +00:00
string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
bool index_is_literal, bool chain_only, bool *need_transpose)
2016-03-02 17:09:16 +00:00
{
string expr;
if (!chain_only)
expr = to_enclosed_expression(base);
uint32_t type_id = expression_type_id(base);
const auto *type = &get<SPIRType>(type_id);
// Start traversing type hierarchy at the proper non-pointer types,
// but keep type_id referencing the original pointer for use below.
while (type->pointer)
{
assert(type->parent_type);
type = &get<SPIRType>(type->parent_type);
}
bool access_chain_is_arrayed = false;
bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
bool pending_array_enclose = false;
bool dimension_flatten = false;
for (uint32_t i = 0; i < count; i++)
{
uint32_t index = indices[i];
// Arrays
if (!type->array.empty())
{
// If we are flattening multidimensional arrays, only create opening bracket on first
// array index.
if (options.flatten_multidimensional_arrays && !pending_array_enclose)
{
dimension_flatten = type->array.size() > 1;
pending_array_enclose = dimension_flatten;
if (pending_array_enclose)
expr += "[";
}
assert(type->parent_type);
// If we are flattening multidimensional arrays, do manual stride computation.
if (options.flatten_multidimensional_arrays && dimension_flatten)
{
auto &parent_type = get<SPIRType>(type->parent_type);
if (index_is_literal)
expr += convert_to_string(index);
else
expr += to_enclosed_expression(index);
for (auto j = uint32_t(parent_type.array.size()); j; j--)
{
expr += " * ";
expr += enclose_expression(to_array_size(parent_type, j - 1));
}
if (parent_type.array.empty())
pending_array_enclose = false;
else
expr += " + ";
}
else
{
expr += "[";
if (index_is_literal)
expr += convert_to_string(index);
else
expr += to_expression(index);
}
if (!pending_array_enclose)
expr += "]";
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
access_chain_is_arrayed = true;
}
// For structs, the index refers to a constant, which indexes into the members.
// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
else if (type->basetype == SPIRType::Struct)
{
if (!index_is_literal)
index = get<SPIRConstant>(index).scalar();
if (index >= type->member_types.size())
SPIRV_CROSS_THROW("Member index is out of bounds!");
BuiltIn builtin;
if (is_member_builtin(*type, index, &builtin))
{
// FIXME: We rely here on OpName on gl_in/gl_out to make this work properly.
// To make this properly work by omitting all OpName opcodes,
// we need to infer gl_in or gl_out based on the builtin, and stage.
if (access_chain_is_arrayed)
{
expr += ".";
expr += builtin_to_glsl(builtin, type->storage);
}
else
expr = builtin_to_glsl(builtin, type->storage);
}
else
{
// If the member has a qualified name, use it as the entire chain
string qual_mbr_name = get_member_qualified_name(type_id, index);
if (!qual_mbr_name.empty())
expr = qual_mbr_name;
else
{
expr += ".";
expr += to_member_name(*type, index);
}
}
if (member_is_packed_type(*type, index))
{
auto &membertype = get<SPIRType>(type->member_types[index]);
expr = unpack_expression_type(expr, membertype);
}
row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
type = &get<SPIRType>(type->member_types[index]);
}
// Matrix -> Vector
else if (type->columns > 1)
{
if (row_major_matrix_needs_conversion)
{
expr = convert_row_major_matrix(expr);
row_major_matrix_needs_conversion = false;
}
expr += "[";
if (index_is_literal)
expr += convert_to_string(index);
else
expr += to_expression(index);
expr += "]";
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
}
// Vector -> Scalar
else if (type->vecsize > 1)
{
if (index_is_literal)
{
expr += ".";
expr += index_to_swizzle(index);
}
else if (ids[index].get_type() == TypeConstant)
{
auto &c = get<SPIRConstant>(index);
expr += ".";
expr += index_to_swizzle(c.scalar());
}
else
{
expr += "[";
expr += to_expression(index);
expr += "]";
}
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
}
else
SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
}
if (pending_array_enclose)
{
SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
"but the access chain was terminated in the middle of a multidimensional array. "
"This is not supported.");
}
if (need_transpose)
*need_transpose = row_major_matrix_needs_conversion;
return expr;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::to_flattened_struct_member(const SPIRType &type, uint32_t index)
{
return sanitize_underscores(join(to_name(type.self), "_", to_member_name(type, index)));
}
2017-01-20 16:33:59 +00:00
string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
bool *out_need_transpose)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
2017-01-16 22:19:49 +00:00
if (flattened_buffer_blocks.count(base))
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
uint32_t matrix_stride;
bool need_transpose;
flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride);
if (out_need_transpose)
*out_need_transpose = target_type.columns > 1 && need_transpose;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
else if (flattened_structs.count(base) && count > 0)
{
auto chain = access_chain_internal(base, indices, count, false, true).substr(1);
auto &type = get<SPIRType>(get<SPIRVariable>(base).basetype);
if (out_need_transpose)
*out_need_transpose = false;
return sanitize_underscores(join(to_name(type.self), "_", chain));
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
else
{
return access_chain_internal(base, indices, count, false, false, out_need_transpose);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
}
string CompilerGLSL::load_flattened_struct(SPIRVariable &var)
{
auto expr = type_to_glsl_constructor(get<SPIRType>(var.basetype));
expr += '(';
auto &type = get<SPIRType>(var.basetype);
for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
{
if (i)
expr += ", ";
// Flatten the varyings.
// Apply name transformation for flattened I/O blocks.
expr += to_flattened_struct_member(type, i);
}
expr += ')';
return expr;
}
void CompilerGLSL::store_flattened_struct(SPIRVariable &var, uint32_t value)
{
// We're trying to store a structure which has been flattened.
// Need to copy members one by one.
auto rhs = to_expression(value);
// Store result locally.
// Since we're declaring a variable potentially multiple times here,
// store the variable in an isolated scope.
begin_scope();
statement(variable_decl_function_local(var), " = ", rhs, ";");
auto &type = get<SPIRType>(var.basetype);
for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
{
// Flatten the varyings.
// Apply name transformation for flattened I/O blocks.
auto lhs = sanitize_underscores(join(to_name(type.self), "_", to_member_name(type, i)));
rhs = join(to_name(var.self), ".", to_member_name(type, i));
statement(lhs, " = ", rhs, ";");
}
end_scope();
}
2017-01-20 16:33:59 +00:00
std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
bool need_transpose)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
if (!target_type.array.empty())
SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
else if (target_type.basetype == SPIRType::Struct)
return flattened_access_chain_struct(base, indices, count, target_type, offset);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
else if (target_type.columns > 1)
return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
else
return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
2017-01-20 16:33:59 +00:00
std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
const SPIRType &target_type, uint32_t offset)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
std::string expr;
expr += type_to_glsl_constructor(target_type);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
expr += "(";
for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
if (i != 0)
expr += ", ";
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
uint32_t member_offset = type_struct_member_offset(target_type, i);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
// The access chain terminates at the struct, so we need to find matrix strides and row-major information
// ahead of time.
bool need_transpose = false;
uint32_t matrix_stride = 0;
if (member_type.columns > 1)
{
need_transpose = (combined_decoration_for_member(target_type, i) & (1ull << DecorationRowMajor)) != 0;
matrix_stride = type_struct_member_matrix_stride(target_type, i);
}
auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
need_transpose);
// Cannot forward transpositions, so resolve them here.
if (need_transpose)
expr += convert_row_major_matrix(tmp);
else
expr += tmp;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
expr += ")";
return expr;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
2017-01-20 16:33:59 +00:00
std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
const SPIRType &target_type, uint32_t offset,
uint32_t matrix_stride, bool need_transpose)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
assert(matrix_stride);
SPIRType tmp_type = target_type;
if (need_transpose)
swap(tmp_type.vecsize, tmp_type.columns);
std::string expr;
expr += type_to_glsl_constructor(tmp_type);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
expr += "(";
for (uint32_t i = 0; i < tmp_type.columns; i++)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
if (i != 0)
expr += ", ";
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
/* need_transpose= */ false);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
expr += ")";
return expr;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
const SPIRType &target_type, uint32_t offset,
uint32_t matrix_stride, bool need_transpose)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
auto buffer_name = to_name(expression_type(base).self);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
if (need_transpose)
{
std::string expr;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
if (target_type.vecsize > 1)
{
expr += type_to_glsl_constructor(target_type);
expr += "(";
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
for (uint32_t i = 0; i < target_type.vecsize; ++i)
{
if (i != 0)
expr += ", ";
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
uint32_t component_offset = result.second + i * matrix_stride;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
assert(component_offset % (target_type.width / 8) == 0);
uint32_t index = component_offset / (target_type.width / 8);
expr += buffer_name;
expr += "[";
expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
expr += convert_to_string(index / 4);
expr += "]";
expr += vector_swizzle(1, index % 4);
}
if (target_type.vecsize > 1)
{
expr += ")";
}
return expr;
}
else
{
assert(result.second % (target_type.width / 8) == 0);
uint32_t index = result.second / (target_type.width / 8);
std::string expr;
expr += buffer_name;
expr += "[";
expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
expr += convert_to_string(index / 4);
expr += "]";
expr += vector_swizzle(target_type.vecsize, index % 4);
return expr;
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
2017-08-28 07:01:03 +00:00
std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(const SPIRType &basetype,
const uint32_t *indices, uint32_t count,
uint32_t offset, uint32_t word_stride,
2017-01-21 11:47:26 +00:00
bool *need_transpose,
uint32_t *out_matrix_stride)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
const auto *type = &basetype;
2017-01-22 08:06:15 +00:00
// Start traversing type hierarchy at the proper non-pointer types.
while (type->pointer)
{
assert(type->parent_type);
type = &get<SPIRType>(type->parent_type);
}
2017-01-22 08:06:15 +00:00
// This holds the type of the current pointer which we are traversing through.
// We always start out from a struct type which is the block.
// This is primarily used to reflect the array strides and matrix strides later.
// For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set
// accordingly as members of structs are accessed.
assert(type->basetype == SPIRType::Struct);
uint32_t type_id = 0;
2017-01-21 10:30:33 +00:00
uint32_t matrix_stride = 0;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
std::string expr;
bool row_major_matrix_needs_conversion = false;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
for (uint32_t i = 0; i < count; i++)
{
uint32_t index = indices[i];
// Arrays
if (!type->array.empty())
{
2017-01-22 08:06:15 +00:00
// Here, the type_id will be a type ID for the array type itself.
uint32_t array_stride = get_decoration(type_id, DecorationArrayStride);
2017-01-21 10:30:33 +00:00
if (!array_stride)
SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
auto *constant = maybe_get<SPIRConstant>(index);
if (constant)
{
// Constant array access.
offset += constant->scalar() * array_stride;
}
else
{
// Dynamic array access.
if (array_stride % word_stride)
{
2017-01-21 11:47:26 +00:00
SPIRV_CROSS_THROW(
"Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
2017-01-22 08:06:15 +00:00
"Likely culprit here is a float or vec2 array inside a push constant block which is std430. "
"This cannot be flattened. Try using std140 layout instead.");
}
expr += to_enclosed_expression(index);
expr += " * ";
expr += convert_to_string(array_stride / word_stride);
expr += " + ";
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
2017-01-21 10:30:33 +00:00
uint32_t parent_type = type->parent_type;
type = &get<SPIRType>(parent_type);
2017-01-22 08:06:15 +00:00
type_id = parent_type;
// Type ID now refers to the array type with one less dimension.
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
// For structs, the index refers to a constant, which indexes into the members.
// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
else if (type->basetype == SPIRType::Struct)
{
index = get<SPIRConstant>(index).scalar();
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
if (index >= type->member_types.size())
SPIRV_CROSS_THROW("Member index is out of bounds!");
offset += type_struct_member_offset(*type, index);
2017-01-22 08:06:15 +00:00
type_id = type->member_types[index];
2017-01-21 10:30:33 +00:00
auto &struct_type = *type;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
type = &get<SPIRType>(type->member_types[index]);
2017-01-21 10:30:33 +00:00
if (type->columns > 1)
{
2017-01-21 10:30:33 +00:00
matrix_stride = type_struct_member_matrix_stride(struct_type, index);
row_major_matrix_needs_conversion =
(combined_decoration_for_member(struct_type, index) & (1ull << DecorationRowMajor)) != 0;
}
else
row_major_matrix_needs_conversion = false;
}
// Matrix -> Vector
else if (type->columns > 1)
{
if (ids[index].get_type() != TypeConstant)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
SPIRV_CROSS_THROW("Cannot flatten dynamic matrix indexing!");
index = get<SPIRConstant>(index).scalar();
offset += index * (row_major_matrix_needs_conversion ? type->width / 8 : matrix_stride);
2017-01-21 10:30:33 +00:00
uint32_t parent_type = type->parent_type;
type = &get<SPIRType>(type->parent_type);
2017-01-22 08:06:15 +00:00
type_id = parent_type;
}
// Vector -> Scalar
else if (type->vecsize > 1)
{
if (ids[index].get_type() != TypeConstant)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
SPIRV_CROSS_THROW("Cannot flatten dynamic vector indexing!");
index = get<SPIRConstant>(index).scalar();
offset += index * (row_major_matrix_needs_conversion ? matrix_stride : type->width / 8);
2017-01-21 10:30:33 +00:00
uint32_t parent_type = type->parent_type;
type = &get<SPIRType>(type->parent_type);
2017-01-22 08:06:15 +00:00
type_id = parent_type;
}
else
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
}
if (need_transpose)
*need_transpose = row_major_matrix_needs_conversion;
if (out_matrix_stride)
*out_matrix_stride = matrix_stride;
return std::make_pair(expr, offset);
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::should_forward(uint32_t id)
{
// Immutable expression can always be forwarded.
// If not immutable, we can speculate about it by forwarding potentially mutable variables.
auto *var = maybe_get<SPIRVariable>(id);
bool forward = var ? var->forwardable : false;
return (is_immutable(id) || forward) && !options.force_temporary;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::track_expression_read(uint32_t id)
{
// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
if (expression_is_forwarded(id))
{
auto &v = expression_usage_counts[id];
v++;
if (v >= 2)
{
//if (v == 2)
// fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
forced_temporaries.insert(id);
// Force a recompile after this pass to avoid forwarding this variable.
force_recompile = true;
}
}
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
{
if (forced_temporaries.find(id) != end(forced_temporaries))
return false;
for (uint32_t i = 0; i < num_args; i++)
if (!should_forward(args[i]))
return false;
// We need to forward globals as well.
if (!pure)
{
for (auto global : global_variables)
if (!should_forward(global))
return false;
for (auto aliased : aliased_variables)
if (!should_forward(aliased))
return false;
}
return true;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::register_impure_function_call()
{
// Impure functions can modify globals and aliased variables, so invalidate them as well.
for (auto global : global_variables)
flush_dependees(get<SPIRVariable>(global));
for (auto aliased : aliased_variables)
flush_dependees(get<SPIRVariable>(aliased));
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::register_call_out_argument(uint32_t id)
{
register_write(id);
2016-03-02 17:09:16 +00:00
auto *var = maybe_get<SPIRVariable>(id);
if (var)
flush_variable_declaration(var->self);
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
{
// These variables are always function local,
// so make sure we emit the variable without storage qualifiers.
// Some backends will inject custom variables locally in a function
// with a storage qualifier which is not function-local.
auto old_storage = var.storage;
var.storage = StorageClassFunction;
auto expr = variable_decl(var);
var.storage = old_storage;
return expr;
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::flush_variable_declaration(uint32_t id)
{
auto *var = maybe_get<SPIRVariable>(id);
if (var && var->deferred_declaration)
{
statement(variable_decl_function_local(*var), ";");
var->deferred_declaration = false;
}
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::remove_duplicate_swizzle(string &op)
{
auto pos = op.find_last_of('.');
if (pos == string::npos || pos == 0)
return false;
string final_swiz = op.substr(pos + 1, string::npos);
if (backend.swizzle_is_function)
{
if (final_swiz.size() < 2)
return false;
if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
final_swiz.erase(final_swiz.size() - 2, string::npos);
else
return false;
}
// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
// If so, and previous swizzle is of same length,
// we can drop the final swizzle altogether.
for (uint32_t i = 0; i < final_swiz.size(); i++)
{
static const char expected[] = { 'x', 'y', 'z', 'w' };
if (i >= 4 || final_swiz[i] != expected[i])
return false;
}
auto prevpos = op.find_last_of('.', pos - 1);
if (prevpos == string::npos)
return false;
prevpos++;
// Make sure there are only swizzles here ...
for (auto i = prevpos; i < pos; i++)
{
if (op[i] < 'w' || op[i] > 'z')
{
// If swizzles are foo.xyz() like in C++ backend for example, check for that.
if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
break;
return false;
}
}
// If original swizzle is large enough, just carve out the components we need.
// E.g. foobar.wyx.xy will turn into foobar.wy.
if (pos - prevpos >= final_swiz.size())
{
op.erase(prevpos + final_swiz.size(), string::npos);
// Add back the function call ...
if (backend.swizzle_is_function)
op += "()";
}
return true;
2016-03-02 17:09:16 +00:00
}
// Optimizes away vector swizzles where we have something like
// vec3 foo;
// foo.xyz <-- swizzle expression does nothing.
// This is a very common pattern after OpCompositeCombine.
bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
{
auto pos = op.find_last_of('.');
if (pos == string::npos || pos == 0)
return false;
string final_swiz = op.substr(pos + 1, string::npos);
if (backend.swizzle_is_function)
{
if (final_swiz.size() < 2)
return false;
if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
final_swiz.erase(final_swiz.size() - 2, string::npos);
else
return false;
}
// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
// If so, and previous swizzle is of same length,
// we can drop the final swizzle altogether.
for (uint32_t i = 0; i < final_swiz.size(); i++)
{
static const char expected[] = { 'x', 'y', 'z', 'w' };
if (i >= 4 || final_swiz[i] != expected[i])
return false;
}
auto &type = expression_type(base);
// Sanity checking ...
assert(type.columns == 1 && type.array.empty());
if (type.vecsize == final_swiz.size())
op.erase(pos, string::npos);
return true;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::build_composite_combiner(const uint32_t *elems, uint32_t length)
{
uint32_t base = 0;
bool swizzle_optimization = false;
string op;
string subop;
for (uint32_t i = 0; i < length; i++)
{
auto *e = maybe_get<SPIRExpression>(elems[i]);
// If we're merging another scalar which belongs to the same base
// object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
if (e && e->base_expression && e->base_expression == base)
{
// Only supposed to be used for vector swizzle -> scalar.
assert(!e->expression.empty() && e->expression.front() == '.');
subop += e->expression.substr(1, string::npos);
swizzle_optimization = true;
}
else
{
// We'll likely end up with duplicated swizzles, e.g.
// foobar.xyz.xyz from patterns like
// OpVectorSwizzle
// OpCompositeExtract x 3
// OpCompositeConstruct 3x + other scalar.
// Just modify op in-place.
if (swizzle_optimization)
{
if (backend.swizzle_is_function)
subop += "()";
// Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
// The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
// We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
// Essentially, we can only remove one set of swizzles, since that's what we have control over ...
// Case 1:
// foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
// foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
// Case 2:
// foo.xyz: Duplicate swizzle won't kick in.
// If foo is vec3, we can remove xyz, giving just foo.
if (!remove_duplicate_swizzle(subop))
remove_unity_swizzle(base, subop);
// Strips away redundant parens if we created them during component extraction.
strip_enclosed_expression(subop);
swizzle_optimization = false;
op += subop;
}
else
op += subop;
if (i)
op += ", ";
subop = to_expression(elems[i]);
}
base = e ? e->base_expression : 0;
}
if (swizzle_optimization)
{
if (backend.swizzle_is_function)
subop += "()";
if (!remove_duplicate_swizzle(subop))
remove_unity_swizzle(base, subop);
// Strips away redundant parens if we created them during component extraction.
strip_enclosed_expression(subop);
}
op += subop;
return op;
2016-03-02 17:09:16 +00:00
}
2016-09-11 11:05:44 +00:00
bool CompilerGLSL::skip_argument(uint32_t id) const
{
if (!combined_image_samplers.empty() || !options.vulkan_semantics)
{
auto &type = expression_type(id);
if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
return true;
}
return false;
}
bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &rhs)
{
// Do this with strings because we have a very clear pattern we can check for and it avoids
// adding lots of special cases to the code emission.
if (rhs.size() < lhs.size() + 3)
return false;
auto index = rhs.find(lhs);
if (index != 0)
return false;
// TODO: Shift operators, but it's not important for now.
auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
if (op != lhs.size() + 1)
return false;
// Check that the op is followed by space. This excludes && and ||.
2017-09-06 07:15:27 +00:00
if (rhs[op + 1] != ' ')
return false;
char bop = rhs[op];
auto expr = rhs.substr(lhs.size() + 3);
// Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
// Find some common patterns which are equivalent.
if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
statement(lhs, bop, bop, ";");
else
statement(lhs, " ", bop, "= ", expr, ";");
return true;
}
void CompilerGLSL::emit_instruction(const Instruction &instruction)
2016-03-02 17:09:16 +00:00
{
auto ops = stream(instruction);
auto opcode = static_cast<Op>(instruction.op);
uint32_t length = instruction.length;
2016-03-02 17:09:16 +00:00
#define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
#define BOP_CAST(op, type) \
emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
2016-03-02 17:09:16 +00:00
#define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
#define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
#define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
#define BFOP_CAST(op, type) \
emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode))
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
2016-03-02 17:09:16 +00:00
#define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
switch (opcode)
{
// Dealing with memory
case OpLoad:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t ptr = ops[2];
flush_variable_declaration(ptr);
// If we're loading from memory that cannot be changed by the shader,
// just forward the expression directly to avoid needless temporaries.
// If an expression is mutable and forwardable, we speculate that it is immutable.
bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
// If loading a non-native row-major matrix, mark the expression as need_transpose.
bool need_transpose = false;
bool old_need_transpose = false;
auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
if (ptr_expression && ptr_expression->need_transpose)
{
old_need_transpose = true;
ptr_expression->need_transpose = false;
need_transpose = true;
}
else if (is_non_native_row_major_matrix(ptr))
need_transpose = true;
auto expr = to_expression(ptr);
if (ptr_expression)
ptr_expression->need_transpose = old_need_transpose;
// Suppress usage tracking since using same expression multiple times does not imply any extra work.
auto &e = emit_op(result_type, id, expr, forward, true);
e.need_transpose = need_transpose;
register_read(id, ptr, forward);
break;
}
case OpInBoundsAccessChain:
case OpAccessChain:
{
auto *var = maybe_get<SPIRVariable>(ops[2]);
if (var)
flush_variable_declaration(var->self);
// If the base is immutable, the access chain pointer must also be.
// If an expression is mutable and forwardable, we speculate that it is immutable.
bool need_transpose;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &need_transpose);
auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
expr.loaded_from = ops[2];
expr.need_transpose = need_transpose;
break;
}
case OpStore:
{
auto *var = maybe_get<SPIRVariable>(ops[0]);
if (var && var->statically_assigned)
var->static_expression = ops[1];
else if (var && var->loop_variable && !var->loop_variable_enable)
var->static_expression = ops[1];
else if (var && flattened_structs.count(ops[0]))
{
store_flattened_struct(*var, ops[1]);
register_write(ops[0]);
}
else
{
auto rhs = to_expression(ops[1]);
// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
if (!rhs.empty())
{
auto lhs = to_expression(ops[0]);
// Tries to optimize assignments like "<lhs> = <lhs> op expr".
// While this is purely cosmetic, this is important for legacy ESSL where loop
// variable increments must be in either i++ or i += const-expr.
// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
if (!optimize_read_modify_write(lhs, rhs))
statement(lhs, " = ", rhs, ";");
register_write(ops[0]);
}
}
break;
}
case OpArrayLength:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto e = access_chain_internal(ops[2], &ops[3], length - 3, true);
set<SPIRExpression>(id, e + ".length()", result_type, true);
break;
}
// Function calls
case OpFunctionCall:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t func = ops[2];
const auto *arg = &ops[3];
length -= 3;
auto &callee = get<SPIRFunction>(func);
bool pure = function_is_pure(callee);
bool callee_has_out_variables = false;
// Invalidate out variables passed to functions since they can be OpStore'd to.
for (uint32_t i = 0; i < length; i++)
{
if (callee.arguments[i].write_count)
{
register_call_out_argument(arg[i]);
callee_has_out_variables = true;
}
flush_variable_declaration(arg[i]);
}
if (!pure)
register_impure_function_call();
string funexpr;
2016-09-11 11:05:44 +00:00
vector<string> arglist;
funexpr += clean_func_name(to_name(func)) + "(";
for (uint32_t i = 0; i < length; i++)
{
2016-09-11 11:05:44 +00:00
// Do not pass in separate images or samplers if we're remapping
// to combined image samplers.
if (skip_argument(arg[i]))
continue;
2016-10-24 13:24:24 +00:00
arglist.push_back(to_func_call_arg(arg[i]));
}
for (auto &combined : callee.combined_parameters)
{
uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id];
uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id];
auto *image = maybe_get_backing_variable(image_id);
if (image)
image_id = image->self;
auto *samp = maybe_get_backing_variable(sampler_id);
if (samp)
sampler_id = samp->self;
arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
}
2016-10-19 21:09:51 +00:00
2016-10-24 13:24:24 +00:00
append_global_func_args(callee, length, arglist);
2016-10-19 21:09:51 +00:00
2016-09-11 11:05:44 +00:00
funexpr += merge(arglist);
funexpr += ")";
// Check for function call constraints.
check_function_call_constraints(arg, length);
if (get<SPIRType>(result_type).basetype != SPIRType::Void)
{
// If the function actually writes to an out variable,
// take the conservative route and do not forward.
// The problem is that we might not read the function
// result (and emit the function) before an out variable
// is read (common case when return value is ignored!
// In order to avoid start tracking invalid variables,
// just avoid the forwarding problem altogether.
bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
(forced_temporaries.find(id) == end(forced_temporaries));
emit_op(result_type, id, funexpr, forward);
// Function calls are implicit loads from all variables in question.
// Set dependencies for them.
for (uint32_t i = 0; i < length; i++)
register_read(id, arg[i], forward);
// If we're going to forward the temporary result,
// put dependencies on every variable that must not change.
if (forward)
register_global_read_dependencies(callee, id);
}
else
statement(funexpr, ";");
break;
}
// Composite munging
case OpCompositeConstruct:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
const auto *elems = &ops[2];
length -= 2;
bool forward = true;
for (uint32_t i = 0; i < length; i++)
forward = forward && should_forward(elems[i]);
auto &out_type = get<SPIRType>(result_type);
if (!length)
{
if (out_type.basetype == SPIRType::Struct)
{
// It is technically allowed to make a blank struct,
// but we cannot make a meaningful expression out of it in high level languages,
// so make it a blank expression.
emit_op(result_type, id, "", forward);
break;
}
else
SPIRV_CROSS_THROW("Invalid input to OpCompositeConstruct.");
}
auto &in_type = expression_type(elems[0]);
// Only splat if we have vector constructors.
// Arrays and structs must be initialized properly in full.
bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
2016-10-05 17:18:04 +00:00
bool splat = in_type.vecsize == 1 && in_type.columns == 1 && !composite && backend.use_constructor_splatting;
if (splat)
{
uint32_t input = elems[0];
for (uint32_t i = 0; i < length; i++)
if (input != elems[i])
splat = false;
}
string constructor_op;
if (backend.use_initializer_list && composite)
{
// Only use this path if we are building composites.
// This path cannot be used for arithmetic.
constructor_op += "{ ";
if (splat)
constructor_op += to_expression(elems[0]);
else
constructor_op += build_composite_combiner(elems, length);
constructor_op += " }";
}
else
{
constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
if (splat)
constructor_op += to_expression(elems[0]);
else
constructor_op += build_composite_combiner(elems, length);
constructor_op += ")";
}
emit_op(result_type, id, constructor_op, forward);
break;
}
case OpVectorInsertDynamic:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t vec = ops[2];
uint32_t comp = ops[3];
uint32_t index = ops[4];
flush_variable_declaration(vec);
// Make a copy, then use access chain to store the variable.
statement(declare_temporary(result_type, id), to_expression(vec), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
auto chain = access_chain_internal(id, &index, 1, false);
statement(chain, " = ", to_expression(comp), ";");
break;
}
case OpVectorExtractDynamic:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto expr = access_chain_internal(ops[2], &ops[3], 1, false);
emit_op(result_type, id, expr, should_forward(ops[2]));
break;
}
case OpCompositeExtract:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
length -= 3;
auto &type = get<SPIRType>(result_type);
// We can only split the expression here if our expression is forwarded as a temporary.
bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
// Only apply this optimization if result is scalar.
if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
{
// We want to split the access chain from the base.
// This is so we can later combine different CompositeExtract results
// with CompositeConstruct without emitting code like
//
// vec3 temp = texture(...).xyz
// vec4(temp.x, temp.y, temp.z, 1.0).
//
// when we actually wanted to emit this
// vec4(texture(...).xyz, 1.0).
//
// Including the base will prevent this and would trigger multiple reads
// from expression causing it to be forced to an actual temporary in GLSL.
auto expr = access_chain_internal(ops[2], &ops[3], length, true, true);
auto &e = emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
e.base_expression = ops[2];
}
else
{
auto expr = access_chain_internal(ops[2], &ops[3], length, true);
emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
}
break;
}
case OpCompositeInsert:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t obj = ops[2];
uint32_t composite = ops[3];
const auto *elems = &ops[4];
length -= 4;
flush_variable_declaration(composite);
// Make a copy, then use access chain to store the variable.
statement(declare_temporary(result_type, id), to_expression(composite), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
auto chain = access_chain_internal(id, elems, length, true);
statement(chain, " = ", to_expression(obj), ";");
break;
}
case OpCopyMemory:
{
uint32_t lhs = ops[0];
uint32_t rhs = ops[1];
if (lhs != rhs)
{
flush_variable_declaration(lhs);
flush_variable_declaration(rhs);
statement(to_expression(lhs), " = ", to_expression(rhs), ";");
register_write(lhs);
}
break;
}
case OpCopyObject:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t rhs = ops[2];
bool pointer = get<SPIRType>(result_type).pointer;
if (expression_is_lvalue(rhs) && !pointer)
{
// Need a copy.
// For pointer types, we copy the pointer itself.
statement(declare_temporary(result_type, id), to_expression(rhs), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
}
else
{
// RHS expression is immutable, so just forward it.
// Copying these things really make no sense, but
// seems to be allowed anyways.
auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
if (pointer)
{
auto *var = maybe_get_backing_variable(rhs);
e.loaded_from = var ? var->self : 0;
}
}
break;
}
case OpVectorShuffle:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t vec0 = ops[2];
uint32_t vec1 = ops[3];
const auto *elems = &ops[4];
length -= 4;
auto &type0 = expression_type(vec0);
bool shuffle = false;
for (uint32_t i = 0; i < length; i++)
if (elems[i] >= type0.vecsize)
shuffle = true;
string expr;
bool trivial_forward;
if (shuffle)
{
trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);
// Constructor style and shuffling from two different vectors.
vector<string> args;
for (uint32_t i = 0; i < length; i++)
{
if (elems[i] >= type0.vecsize)
args.push_back(join(to_enclosed_expression(vec1), ".", index_to_swizzle(elems[i] - type0.vecsize)));
else
args.push_back(join(to_enclosed_expression(vec0), ".", index_to_swizzle(elems[i])));
}
expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
}
else
{
trivial_forward = !expression_is_forwarded(vec0);
// We only source from first vector, so can use swizzle.
expr += to_enclosed_expression(vec0);
expr += ".";
for (uint32_t i = 0; i < length; i++)
expr += index_to_swizzle(elems[i]);
if (backend.swizzle_is_function && length > 1)
expr += "()";
}
// A shuffle is trivial in that it doesn't actually *do* anything.
// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
emit_op(result_type, id, expr, should_forward(vec0) && should_forward(vec1), trivial_forward);
break;
}
// ALU
case OpIsNan:
UFOP(isnan);
break;
case OpIsInf:
UFOP(isinf);
break;
case OpSNegate:
case OpFNegate:
UOP(-);
break;
case OpIAdd:
{
// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(+, type);
break;
}
case OpFAdd:
BOP(+);
break;
case OpISub:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(-, type);
break;
}
case OpFSub:
BOP(-);
break;
case OpIMul:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(*, type);
break;
}
case OpVectorTimesMatrix:
case OpMatrixTimesVector:
{
// If the matrix needs transpose, just flip the multiply order.
auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
if (e && e->need_transpose)
{
e->need_transpose = false;
emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
e->need_transpose = true;
}
else
BOP(*);
break;
}
case OpFMul:
case OpMatrixTimesScalar:
case OpVectorTimesScalar:
case OpMatrixTimesMatrix:
BOP(*);
break;
case OpOuterProduct:
2016-07-27 08:59:00 +00:00
BFOP(outerProduct);
break;
case OpDot:
BFOP(dot);
break;
case OpTranspose:
UFOP(transpose);
break;
case OpSDiv:
BOP_CAST(/, SPIRType::Int);
break;
case OpUDiv:
BOP_CAST(/, SPIRType::UInt);
break;
case OpFDiv:
2016-05-23 11:30:02 +00:00
BOP(/);
break;
case OpShiftRightLogical:
BOP_CAST(>>, SPIRType::UInt);
break;
case OpShiftRightArithmetic:
BOP_CAST(>>, SPIRType::Int);
break;
case OpShiftLeftLogical:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(<<, type);
break;
}
case OpBitwiseOr:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(|, type);
break;
}
case OpBitwiseXor:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST (^, type);
break;
}
case OpBitwiseAnd:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(&, type);
break;
}
case OpNot:
UOP(~);
break;
case OpUMod:
BOP_CAST(%, SPIRType::UInt);
break;
case OpSMod:
BOP_CAST(%, SPIRType::Int);
break;
case OpFMod:
BFOP(mod);
break;
// Relational
case OpAny:
UFOP(any);
break;
case OpAll:
UFOP(all);
break;
case OpSelect:
emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
break;
case OpLogicalOr:
{
// No vector variant in GLSL for logical OR.
auto result_type = ops[0];
auto id = ops[1];
auto &type = get<SPIRType>(result_type);
if (type.vecsize > 1)
emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||");
else
BOP(||);
break;
}
case OpLogicalAnd:
{
// No vector variant in GLSL for logical AND.
auto result_type = ops[0];
auto id = ops[1];
auto &type = get<SPIRType>(result_type);
if (type.vecsize > 1)
emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&");
else
BOP(&&);
break;
}
case OpLogicalNot:
{
auto &type = get<SPIRType>(ops[0]);
if (type.vecsize > 1)
UFOP(not);
else
UOP(!);
break;
}
case OpIEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(equal, SPIRType::Int);
else
BOP_CAST(==, SPIRType::Int);
break;
}
case OpLogicalEqual:
case OpFOrdEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(equal);
else
2016-05-23 11:30:02 +00:00
BOP(==);
break;
}
case OpINotEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(notEqual, SPIRType::Int);
else
BOP_CAST(!=, SPIRType::Int);
break;
}
case OpLogicalNotEqual:
case OpFOrdNotEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(notEqual);
else
2016-05-23 11:30:02 +00:00
BOP(!=);
break;
}
case OpUGreaterThan:
case OpSGreaterThan:
{
auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(greaterThan, type);
else
BOP_CAST(>, type);
break;
}
case OpFOrdGreaterThan:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(greaterThan);
else
2016-05-23 11:30:02 +00:00
BOP(>);
break;
}
case OpUGreaterThanEqual:
case OpSGreaterThanEqual:
{
auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(greaterThanEqual, type);
else
BOP_CAST(>=, type);
break;
}
case OpFOrdGreaterThanEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(greaterThanEqual);
else
2016-05-23 11:30:02 +00:00
BOP(>=);
break;
}
case OpULessThan:
case OpSLessThan:
{
auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(lessThan, type);
else
BOP_CAST(<, type);
break;
}
case OpFOrdLessThan:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(lessThan);
else
2016-05-23 11:30:02 +00:00
BOP(<);
break;
}
case OpULessThanEqual:
case OpSLessThanEqual:
{
auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(lessThanEqual, type);
else
BOP_CAST(<=, type);
break;
}
case OpFOrdLessThanEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(lessThanEqual);
else
2016-05-23 11:30:02 +00:00
BOP(<=);
break;
}
// Conversion
case OpConvertFToU:
case OpConvertFToS:
case OpConvertSToF:
case OpConvertUToF:
case OpUConvert:
case OpSConvert:
case OpFConvert:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
emit_unary_func_op(result_type, id, ops[2], func.c_str());
break;
}
case OpBitcast:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t arg = ops[2];
auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
emit_unary_func_op(result_type, id, arg, op.c_str());
break;
}
2016-05-31 14:56:15 +00:00
case OpQuantizeToF16:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t arg = ops[2];
string op;
auto &type = get<SPIRType>(result_type);
switch (type.vecsize)
{
case 1:
op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
break;
case 2:
op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
break;
case 3:
{
auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
op = join("vec3(", op0, ", ", op1, ")");
break;
}
case 4:
{
auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
op = join("vec4(", op0, ", ", op1, ")");
break;
}
default:
SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
2016-05-31 14:56:15 +00:00
}
emit_op(result_type, id, op, should_forward(arg));
2016-05-31 14:56:15 +00:00
break;
}
// Derivatives
case OpDPdx:
UFOP(dFdx);
2016-09-17 12:33:16 +00:00
if (is_legacy_es())
require_extension("GL_OES_standard_derivatives");
break;
case OpDPdy:
UFOP(dFdy);
2016-09-17 12:33:16 +00:00
if (is_legacy_es())
require_extension("GL_OES_standard_derivatives");
break;
2017-03-24 12:59:19 +00:00
case OpDPdxFine:
UFOP(dFdxFine);
2017-03-24 14:58:54 +00:00
if (options.es)
{
SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
}
if (options.version < 450)
require_extension("GL_ARB_derivative_control");
2017-03-24 12:59:19 +00:00
break;
case OpDPdyFine:
UFOP(dFdyFine);
2017-03-24 14:58:54 +00:00
if (options.es)
{
SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
}
if (options.version < 450)
require_extension("GL_ARB_derivative_control");
2017-03-24 12:59:19 +00:00
break;
case OpDPdxCoarse:
2017-03-24 14:58:54 +00:00
if (options.es)
{
SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
}
2017-03-24 12:59:19 +00:00
UFOP(dFdxCoarse);
2017-03-24 14:58:54 +00:00
if (options.version < 450)
require_extension("GL_ARB_derivative_control");
2017-03-24 12:59:19 +00:00
break;
case OpDPdyCoarse:
UFOP(dFdyCoarse);
2017-03-24 14:58:54 +00:00
if (options.es)
{
SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
}
if (options.version < 450)
require_extension("GL_ARB_derivative_control");
2017-03-24 12:59:19 +00:00
break;
case OpFwidth:
UFOP(fwidth);
2016-09-17 12:33:16 +00:00
if (is_legacy_es())
require_extension("GL_OES_standard_derivatives");
break;
// Bitfield
case OpBitFieldInsert:
2017-03-25 14:38:20 +00:00
// TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
QFOP(bitfieldInsert);
break;
case OpBitFieldSExtract:
case OpBitFieldUExtract:
2017-03-25 14:38:20 +00:00
// TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
TFOP(bitfieldExtract);
break;
case OpBitReverse:
UFOP(bitfieldReverse);
break;
case OpBitCount:
UFOP(bitCount);
break;
// Atomics
case OpAtomicExchange:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t ptr = ops[2];
// Ignore semantics for now, probably only relevant to CL.
uint32_t val = ops[5];
const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
forced_temporaries.insert(id);
emit_binary_func_op(result_type, id, ptr, val, op);
flush_all_atomic_capable_variables();
break;
}
case OpAtomicCompareExchange:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t ptr = ops[2];
uint32_t val = ops[6];
uint32_t comp = ops[7];
const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
forced_temporaries.insert(id);
emit_trinary_func_op(result_type, id, ptr, comp, val, op);
flush_all_atomic_capable_variables();
break;
}
case OpAtomicLoad:
flush_all_atomic_capable_variables();
// FIXME: Image?
UFOP(atomicCounter);
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
2017-10-10 08:22:40 +00:00
// OpAtomicStore unimplemented. Not sure what would use that.
// OpAtomicLoad seems to only be relevant for atomic counters.
case OpAtomicIIncrement:
forced_temporaries.insert(ops[1]);
// FIXME: Image?
UFOP(atomicCounterIncrement);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
case OpAtomicIDecrement:
forced_temporaries.insert(ops[1]);
// FIXME: Image?
UFOP(atomicCounterDecrement);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
case OpAtomicIAdd:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicISub:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
forced_temporaries.insert(ops[1]);
auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicSMin:
case OpAtomicUMin:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicSMax:
case OpAtomicUMax:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicAnd:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicOr:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicXor:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
// Geometry shaders
case OpEmitVertex:
statement("EmitVertex();");
break;
case OpEndPrimitive:
statement("EndPrimitive();");
break;
case OpEmitStreamVertex:
statement("EmitStreamVertex();");
break;
case OpEndStreamPrimitive:
statement("EndStreamPrimitive();");
break;
// Textures
case OpImageSampleExplicitLod:
case OpImageSampleProjExplicitLod:
case OpImageSampleDrefExplicitLod:
case OpImageSampleProjDrefExplicitLod:
2016-09-17 12:33:16 +00:00
case OpImageSampleImplicitLod:
case OpImageSampleProjImplicitLod:
case OpImageSampleDrefImplicitLod:
case OpImageSampleProjDrefImplicitLod:
case OpImageFetch:
case OpImageGather:
case OpImageDrefGather:
// Gets a bit hairy, so move this to a separate instruction.
emit_texture_op(instruction);
break;
case OpImage:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto &e = emit_op(result_type, id, to_expression(ops[2]), true);
2016-07-11 10:47:46 +00:00
// When using the image, we need to know which variable it is actually loaded from.
auto *var = maybe_get_backing_variable(ops[2]);
e.loaded_from = var ? var->self : 0;
break;
}
2016-07-19 07:22:54 +00:00
case OpImageQueryLod:
{
if (!options.es && options.version < 400)
{
require_extension("GL_ARB_texture_query_lod");
// For some reason, the ARB spec is all-caps.
BFOP(textureQueryLOD);
}
else if (options.es)
SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
2016-07-19 07:22:54 +00:00
else
BFOP(textureQueryLod);
break;
}
2016-07-19 07:28:32 +00:00
case OpImageQueryLevels:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
2016-07-19 07:28:32 +00:00
if (!options.es && options.version < 430)
require_extension("GL_ARB_texture_query_levels");
if (options.es)
SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
auto expr = join("textureQueryLevels(", to_expression(ops[2]), ")");
auto &restype = get<SPIRType>(ops[0]);
expr = bitcast_expression(restype, SPIRType::Int, expr);
emit_op(result_type, id, expr, true);
2016-07-19 07:28:32 +00:00
break;
}
2016-07-11 10:47:46 +00:00
case OpImageQuerySamples:
{
auto &type = expression_type(ops[2]);
uint32_t result_type = ops[0];
uint32_t id = ops[1];
2016-07-11 10:47:46 +00:00
string expr;
if (type.image.sampled == 2)
expr = join("imageSamples(", to_expression(ops[2]), ")");
2016-07-11 10:47:46 +00:00
else
expr = join("textureSamples(", to_expression(ops[2]), ")");
auto &restype = get<SPIRType>(ops[0]);
expr = bitcast_expression(restype, SPIRType::Int, expr);
emit_op(result_type, id, expr, true);
break;
}
case OpSampledImage:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_sampled_image_op(result_type, id, ops[2], ops[3]);
break;
}
case OpImageQuerySizeLod:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
2017-09-19 14:09:19 +00:00
auto expr = join("textureSize(", to_expression(ops[2]), ", ", bitcast_expression(SPIRType::Int, ops[3]), ")");
auto &restype = get<SPIRType>(ops[0]);
expr = bitcast_expression(restype, SPIRType::Int, expr);
emit_op(result_type, id, expr, true);
break;
}
// Image load/store
case OpImageRead:
{
// We added Nonreadable speculatively to the OpImage variable due to glslangValidator
// not adding the proper qualifiers.
// If it turns out we need to read the image after all, remove the qualifier and recompile.
auto *var = maybe_get_backing_variable(ops[2]);
if (var)
{
auto &flags = meta.at(var->self).decoration.decoration_flags;
if (flags & (1ull << DecorationNonReadable))
{
flags &= ~(1ull << DecorationNonReadable);
force_recompile = true;
}
}
uint32_t result_type = ops[0];
uint32_t id = ops[1];
bool pure;
string imgexpr;
auto &type = expression_type(ops[2]);
if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
{
2016-07-11 11:36:11 +00:00
if (type.image.ms)
SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
2016-07-11 11:36:11 +00:00
2016-05-23 11:30:02 +00:00
auto itr =
find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
if (itr == end(pls_inputs))
{
// For non-PLS inputs, we rely on subpass type remapping information to get it right
// since ImageRead always returns 4-component vectors and the backing type is opaque.
if (!var->remapped_components)
SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
imgexpr = remap_swizzle(result_type, var->remapped_components, ops[2]);
}
else
{
// PLS input could have different number of components than what the SPIR expects, swizzle to
// the appropriate vector size.
uint32_t components = pls_format_to_components(itr->format);
imgexpr = remap_swizzle(result_type, components, ops[2]);
}
pure = true;
}
else if (type.image.dim == DimSubpassData)
{
if (options.vulkan_semantics)
{
// With Vulkan semantics, use the proper Vulkan GLSL construct.
2016-07-11 11:36:11 +00:00
if (type.image.ms)
{
uint32_t operands = ops[4];
if (operands != ImageOperandsSampleMask || length != 6)
SPIRV_CROSS_THROW(
2016-07-11 11:36:11 +00:00
"Multisampled image used in OpImageRead, but unexpected operand mask was used.");
uint32_t samples = ops[5];
imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")");
}
else
imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
}
else
{
2016-07-11 11:36:11 +00:00
if (type.image.ms)
{
uint32_t operands = ops[4];
if (operands != ImageOperandsSampleMask || length != 6)
SPIRV_CROSS_THROW(
2016-07-11 11:36:11 +00:00
"Multisampled image used in OpImageRead, but unexpected operand mask was used.");
uint32_t samples = ops[5];
imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
to_expression(samples), ")");
}
else
{
// Implement subpass loads via texture barrier style sampling.
imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
}
}
pure = true;
}
else
{
// Plain image load/store.
if (type.image.ms)
{
uint32_t operands = ops[4];
if (operands != ImageOperandsSampleMask || length != 6)
SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
uint32_t samples = ops[5];
imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ",
to_expression(samples), ")");
}
else
imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", to_expression(ops[3]), ")");
pure = false;
}
if (var && var->forwardable)
{
auto &e = emit_op(result_type, id, imgexpr, true);
// We only need to track dependencies if we're reading from image load/store.
if (!pure)
{
e.loaded_from = var->self;
var->dependees.push_back(id);
}
}
else
emit_op(result_type, id, imgexpr, false);
break;
}
case OpImageTexelPointer:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true);
2016-07-11 10:47:46 +00:00
// When using the pointer, we need to know which variable it is actually loaded from.
auto *var = maybe_get_backing_variable(ops[2]);
e.loaded_from = var ? var->self : 0;
break;
}
case OpImageWrite:
{
// We added Nonwritable speculatively to the OpImage variable due to glslangValidator
// not adding the proper qualifiers.
// If it turns out we need to write to the image after all, remove the qualifier and recompile.
auto *var = maybe_get_backing_variable(ops[0]);
if (var)
{
auto &flags = meta.at(var->self).decoration.decoration_flags;
if (flags & (1ull << DecorationNonWritable))
{
flags &= ~(1ull << DecorationNonWritable);
force_recompile = true;
}
}
auto &type = expression_type(ops[0]);
if (type.image.ms)
{
uint32_t operands = ops[3];
if (operands != ImageOperandsSampleMask || length != 5)
SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
uint32_t samples = ops[4];
statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(samples),
", ", to_expression(ops[2]), ");");
}
else
statement("imageStore(", to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]),
");");
if (var && variable_storage_is_aliased(*var))
flush_all_aliased_variables();
break;
}
case OpImageQuerySize:
{
auto &type = expression_type(ops[2]);
uint32_t result_type = ops[0];
uint32_t id = ops[1];
if (type.basetype == SPIRType::Image)
{
string expr;
if (type.image.sampled == 2)
{
// The size of an image is always constant.
expr = join("imageSize(", to_expression(ops[2]), ")");
}
else
{
// This path is hit for samplerBuffers and multisampled images which do not have LOD.
expr = join("textureSize(", to_expression(ops[2]), ")");
}
auto &restype = get<SPIRType>(ops[0]);
expr = bitcast_expression(restype, SPIRType::Int, expr);
emit_op(result_type, id, expr, true);
}
else
SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
break;
}
// Compute
case OpControlBarrier:
{
// Ignore execution and memory scope.
if (get_entry_point().model == ExecutionModelGLCompute)
{
uint32_t mem = get<SPIRConstant>(ops[2]).scalar();
if (mem == MemorySemanticsWorkgroupMemoryMask)
statement("memoryBarrierShared();");
else if (mem)
statement("memoryBarrier();");
}
statement("barrier();");
break;
}
case OpMemoryBarrier:
{
uint32_t mem = get<SPIRConstant>(ops[1]).scalar();
// We cannot forward any loads beyond the memory barrier.
if (mem)
flush_all_active_variables();
if (mem == MemorySemanticsWorkgroupMemoryMask)
statement("memoryBarrierShared();");
else if (mem)
statement("memoryBarrier();");
break;
}
case OpExtInst:
{
uint32_t extension_set = ops[2];
if (get<SPIRExtension>(extension_set).ext != SPIRExtension::GLSL)
{
statement("// unimplemented ext op ", instruction.op);
break;
}
emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
break;
}
default:
statement("// unimplemented op ", instruction.op);
break;
}
2016-03-02 17:09:16 +00:00
}
2016-10-19 21:09:51 +00:00
// Appends function arguments, mapped from global variables, beyond the specified arg index.
// This is used when a function call uses fewer arguments than the function defines.
2016-10-19 21:09:51 +00:00
// This situation may occur if the function signature has been dynamically modified to
// extract global variables referenced from within the function, and convert them to
// function arguments. This is necessary for shader languages that do not support global
// access to shader input content from within a function (eg. Metal). Each additional
// function args uses the name of the global variable. Function nesting will modify the
// functions and calls all the way up the nesting chain.
void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, vector<string> &arglist)
{
2016-10-24 13:24:24 +00:00
auto &args = func.arguments;
uint32_t arg_cnt = uint32_t(args.size());
2016-10-24 13:24:24 +00:00
for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
{
assert(args[arg_idx].alias_global_variable);
2016-10-24 13:24:24 +00:00
arglist.push_back(to_func_call_arg(args[arg_idx].id));
}
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
{
auto &memb = meta[type.self].members;
if (index < memb.size() && !memb[index].alias.empty())
return memb[index].alias;
else
return join("_m", index);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
{
auto &memb = meta[type.self].members;
if (index < memb.size() && !memb[index].alias.empty())
{
auto &name = memb[index].alias;
if (name.empty())
return;
// Reserved for temporaries.
if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
{
name.clear();
return;
}
update_name_cache(type.member_name_cache, name);
}
}
// Checks whether the ID is a row_major matrix that requires conversion before use
bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
{
// Natively supported row-major matrices do not need to be converted.
// Legacy targets do not support row major.
if (backend.native_row_major_matrix && !is_legacy())
return false;
// Non-matrix or column-major matrix types do not need to be converted.
if (!(meta[id].decoration.decoration_flags & (1ull << DecorationRowMajor)))
return false;
// Only square row-major matrices can be converted at this time.
// Converting non-square matrices will require defining custom GLSL function that
// swaps matrix elements while retaining the original dimensional form of the matrix.
const auto type = expression_type(id);
if (type.columns != type.vecsize)
2016-12-15 19:46:10 +00:00
SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
return true;
}
// Checks whether the member is a row_major matrix that requires conversion before use
bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
{
// Natively supported row-major matrices do not need to be converted.
if (backend.native_row_major_matrix && !is_legacy())
return false;
// Non-matrix or column-major matrix types do not need to be converted.
if (!(combined_decoration_for_member(type, index) & (1ull << DecorationRowMajor)))
return false;
// Only square row-major matrices can be converted at this time.
// Converting non-square matrices will require defining custom GLSL function that
// swaps matrix elements while retaining the original dimensional form of the matrix.
const auto mbr_type = get<SPIRType>(type.member_types[index]);
if (mbr_type.columns != mbr_type.vecsize)
2016-12-15 19:46:10 +00:00
SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
return true;
}
// Checks whether the member is in packed data type, that might need to be unpacked.
// GLSL does not define packed data types, but certain subclasses do.
bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index)
{
return has_member_decoration(type.self, index, DecorationCPacked);
}
// Wraps the expression string in a function call that converts the
// row_major matrix result of the expression to a column_major matrix.
// Base implementation uses the standard library transpose() function.
// Subclasses may override to use a different function.
string CompilerGLSL::convert_row_major_matrix(string exp_str)
{
strip_enclosed_expression(exp_str);
return join("transpose(", exp_str, ")");
}
string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
{
string type_name = type_to_glsl(type, id);
remap_variable_type_name(type, name, type_name);
2016-09-20 08:17:41 +00:00
return join(type_name, " ", name, type_to_array_glsl(type));
}
// Emit a structure member. Subclasses may override to modify output,
// or to dynamically add a padding member if needed.
void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
const string &qualifier)
2016-03-02 17:09:16 +00:00
{
auto &membertype = get<SPIRType>(member_type_id);
uint64_t memberflags = 0;
auto &memb = meta[type.self].members;
if (index < memb.size())
memberflags = memb[index].decoration_flags;
string qualifiers;
bool is_block = (meta[type.self].decoration.decoration_flags &
((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))) != 0;
if (is_block)
qualifiers = to_interpolation_qualifiers(memberflags);
statement(layout_for_member(type, index), qualifiers, qualifier,
flags_to_precision_qualifiers_glsl(membertype, memberflags),
variable_decl(membertype, to_member_name(type, index)), ";");
2016-03-02 17:09:16 +00:00
}
const char *CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &type, uint64_t flags)
2016-03-02 17:09:16 +00:00
{
if (options.es)
{
auto &execution = get_entry_point();
2016-07-27 08:59:00 +00:00
// Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage &&
type.basetype != SPIRType::Sampler)
return "";
if (flags & (1ull << DecorationRelaxedPrecision))
{
bool implied_fmediump = type.basetype == SPIRType::Float &&
options.fragment.default_float_precision == Options::Mediump &&
execution.model == ExecutionModelFragment;
bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
options.fragment.default_int_precision == Options::Mediump &&
execution.model == ExecutionModelFragment;
return implied_fmediump || implied_imediump ? "" : "mediump ";
}
else
{
bool implied_fhighp =
type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
execution.model == ExecutionModelFragment) ||
(execution.model != ExecutionModelFragment));
bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
((options.fragment.default_int_precision == Options::Highp &&
execution.model == ExecutionModelFragment) ||
(execution.model != ExecutionModelFragment));
return implied_fhighp || implied_ihighp ? "" : "highp ";
}
}
else
return "";
2016-03-02 17:09:16 +00:00
}
const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
2016-03-02 17:09:16 +00:00
{
return flags_to_precision_qualifiers_glsl(expression_type(id), meta[id].decoration.decoration_flags);
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
{
auto flags = meta[id].decoration.decoration_flags;
string res;
auto *var = maybe_get<SPIRVariable>(id);
if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
res += "shared ";
res += to_interpolation_qualifiers(flags);
if (var)
res += to_storage_qualifiers_glsl(*var);
2017-08-29 13:52:59 +00:00
auto &type = expression_type(id);
if (type.image.dim != DimSubpassData && type.image.sampled == 2)
{
2017-08-29 13:52:59 +00:00
if (flags & (1ull << DecorationCoherent))
res += "coherent ";
2017-08-29 13:54:22 +00:00
if (flags & (1ull << DecorationRestrict))
res += "restrict ";
if (flags & (1ull << DecorationNonWritable))
res += "readonly ";
if (flags & (1ull << DecorationNonReadable))
res += "writeonly ";
}
2017-08-29 13:52:59 +00:00
res += to_precision_qualifiers_glsl(id);
return res;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
{
// glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
auto &type = expression_type(arg.id);
const char *direction = "";
if (type.pointer)
{
if (arg.write_count && arg.read_count)
direction = "inout ";
else if (arg.write_count)
direction = "out ";
}
return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::variable_decl(const SPIRVariable &variable)
{
// Ignore the pointer type since GLSL doesn't have pointers.
auto &type = get<SPIRType>(variable.basetype);
2017-06-17 08:56:24 +00:00
auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
if (variable.loop_variable)
res += join(" = ", to_expression(variable.static_expression));
else if (variable.initializer)
res += join(" = ", to_expression(variable.initializer));
return res;
2016-03-02 17:09:16 +00:00
}
const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
2016-03-02 17:09:16 +00:00
{
auto flags = meta[variable.self].decoration.decoration_flags;
if (flags & (1ull << DecorationRelaxedPrecision))
return "mediump ";
else
return "highp ";
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::pls_decl(const PlsRemap &var)
{
auto &variable = get<SPIRVariable>(var.id);
2016-03-02 17:09:16 +00:00
SPIRType type;
type.vecsize = pls_format_to_components(var.format);
type.basetype = pls_format_to_basetype(var.format);
2016-03-02 17:09:16 +00:00
return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
to_name(variable.self));
2016-03-02 17:09:16 +00:00
}
uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
{
assert(type.array.size() == type.array_size_literal.size());
if (!type.array_size_literal[index])
SPIRV_CROSS_THROW("The array size is not a literal, but a specialization constant or spec constant op.");
return type.array[index];
}
string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
{
assert(type.array.size() == type.array_size_literal.size());
auto &size = type.array[index];
if (!type.array_size_literal[index])
return to_expression(size);
else if (size)
return convert_to_string(size);
else if (!backend.flexible_member_array_supported)
{
// For runtime-sized arrays, we can work around
// lack of standard support for this by simply having
// a single element array.
//
// Runtime length arrays must always be the last element
// in an interface block.
return "1";
}
else
return "";
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
{
if (type.array.empty())
return "";
if (options.flatten_multidimensional_arrays)
{
string res;
res += "[";
for (auto i = uint32_t(type.array.size()); i; i--)
{
res += enclose_expression(to_array_size(type, i - 1));
if (i > 1)
res += " * ";
}
res += "]";
return res;
}
else
{
if (type.array.size() > 1)
{
if (!options.es && options.version < 430)
require_extension("GL_ARB_arrays_of_arrays");
else if (options.es && options.version < 310)
SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
"Try using --flatten-multidimensional-arrays or set "
"options.flatten_multidimensional_arrays to true.");
}
string res;
for (auto i = uint32_t(type.array.size()); i; i--)
{
res += "[";
res += to_array_size(type, i - 1);
res += "]";
}
return res;
}
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t /* id */)
2016-03-02 17:09:16 +00:00
{
auto &imagetype = get<SPIRType>(type.image.type);
string res;
switch (imagetype.basetype)
{
case SPIRType::Int:
res = "i";
break;
case SPIRType::UInt:
res = "u";
break;
default:
break;
}
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
2016-07-11 11:36:11 +00:00
return res + "subpassInput" + (type.image.ms ? "MS" : "");
// If we're emulating subpassInput with samplers, force sampler2D
// so we don't have to specify format.
if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
{
// Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
if (type.image.dim == DimBuffer && type.image.sampled == 1)
res += "sampler";
else
res += type.image.sampled == 2 ? "image" : "texture";
}
else
res += "sampler";
switch (type.image.dim)
{
case Dim1D:
res += "1D";
break;
case Dim2D:
res += "2D";
break;
case Dim3D:
res += "3D";
break;
case DimCube:
res += "Cube";
break;
case DimBuffer:
if (options.es && options.version < 320)
require_extension("GL_OES_texture_buffer");
else if (!options.es && options.version < 300)
require_extension("GL_EXT_texture_buffer_object");
res += "Buffer";
break;
case DimSubpassData:
res += "2D";
break;
default:
SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported.");
}
2016-07-11 10:47:46 +00:00
if (type.image.ms)
res += "MS";
if (type.image.arrayed)
{
2016-09-17 13:56:23 +00:00
if (is_legacy_desktop())
2016-09-17 12:33:16 +00:00
require_extension("GL_EXT_texture_array");
res += "Array";
}
if (type.image.depth)
res += "Shadow";
return res;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
{
if (type.array.size() > 1)
{
if (options.flatten_multidimensional_arrays)
SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]().");
else if (!options.es && options.version < 430)
require_extension("GL_ARB_arrays_of_arrays");
else if (options.es && options.version < 310)
SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
}
auto e = type_to_glsl(type);
for (uint32_t i = 0; i < type.array.size(); i++)
e += "[]";
return e;
2016-03-02 17:09:16 +00:00
}
// The optional id parameter indicates the object whose type we are trying
// to find the description for. It is optional. Most type descriptions do not
// depend on a specific object's use of that type.
string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
2016-03-02 17:09:16 +00:00
{
// Ignore the pointer type since GLSL doesn't have pointers.
switch (type.basetype)
{
case SPIRType::Struct:
// Need OpName lookup here to get a "sensible" name for a struct.
if (backend.explicit_struct_type)
return join("struct ", to_name(type.self));
else
return to_name(type.self);
case SPIRType::Image:
case SPIRType::SampledImage:
return image_type_glsl(type, id);
case SPIRType::Sampler:
// The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
// this distinction into the type system.
return comparison_samplers.count(id) ? "samplerShadow" : "sampler";
case SPIRType::Void:
return "void";
default:
break;
}
if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
{
switch (type.basetype)
{
case SPIRType::Boolean:
return "bool";
case SPIRType::Int:
return backend.basic_int_type;
case SPIRType::UInt:
return backend.basic_uint_type;
case SPIRType::AtomicCounter:
return "atomic_uint";
case SPIRType::Float:
return "float";
2016-07-27 08:59:00 +00:00
case SPIRType::Double:
return "double";
2016-07-27 09:27:00 +00:00
case SPIRType::Int64:
return "int64_t";
case SPIRType::UInt64:
return "uint64_t";
default:
return "???";
}
}
else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
{
switch (type.basetype)
{
case SPIRType::Boolean:
return join("bvec", type.vecsize);
case SPIRType::Int:
return join("ivec", type.vecsize);
case SPIRType::UInt:
return join("uvec", type.vecsize);
case SPIRType::Float:
return join("vec", type.vecsize);
2016-07-27 08:59:00 +00:00
case SPIRType::Double:
return join("dvec", type.vecsize);
2016-07-27 09:27:00 +00:00
case SPIRType::Int64:
return join("i64vec", type.vecsize);
case SPIRType::UInt64:
return join("u64vec", type.vecsize);
default:
return "???";
}
}
else if (type.vecsize == type.columns) // Simple Matrix builtin
{
switch (type.basetype)
{
case SPIRType::Boolean:
return join("bmat", type.vecsize);
case SPIRType::Int:
return join("imat", type.vecsize);
case SPIRType::UInt:
return join("umat", type.vecsize);
case SPIRType::Float:
return join("mat", type.vecsize);
2016-07-27 08:59:00 +00:00
case SPIRType::Double:
return join("dmat", type.vecsize);
2016-07-27 09:27:00 +00:00
// Matrix types not supported for int64/uint64.
default:
return "???";
}
}
else
{
switch (type.basetype)
{
case SPIRType::Boolean:
return join("bmat", type.columns, "x", type.vecsize);
case SPIRType::Int:
return join("imat", type.columns, "x", type.vecsize);
case SPIRType::UInt:
return join("umat", type.columns, "x", type.vecsize);
case SPIRType::Float:
return join("mat", type.columns, "x", type.vecsize);
2016-07-27 08:59:00 +00:00
case SPIRType::Double:
return join("dmat", type.columns, "x", type.vecsize);
2016-07-27 09:27:00 +00:00
// Matrix types not supported for int64/uint64.
default:
return "???";
}
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::add_variable(unordered_set<string> &variables, uint32_t id)
2016-03-02 17:09:16 +00:00
{
auto &name = meta[id].decoration.alias;
if (name.empty())
return;
// Reserved for temporaries.
if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
{
name.clear();
return;
}
update_name_cache(variables, name);
}
void CompilerGLSL::add_local_variable_name(uint32_t id)
{
add_variable(local_variable_names, id);
}
void CompilerGLSL::add_resource_name(uint32_t id)
{
add_variable(resource_names, id);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::add_header_line(const std::string &line)
{
header_lines.push_back(line);
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::require_extension(const string &ext)
{
if (forced_extensions.find(ext) == end(forced_extensions))
{
forced_extensions.insert(ext);
force_recompile = true;
}
2016-03-02 17:09:16 +00:00
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
void CompilerGLSL::flatten_buffer_block(uint32_t id)
{
auto &var = get<SPIRVariable>(id);
auto &type = get<SPIRType>(var.basetype);
auto name = to_name(type.self, false);
auto flags = meta.at(type.self).decoration.decoration_flags;
if (!type.array.empty())
SPIRV_CROSS_THROW(name + " is an array of UBOs.");
if (type.basetype != SPIRType::Struct)
SPIRV_CROSS_THROW(name + " is not a struct.");
if ((flags & (1ull << DecorationBlock)) == 0)
SPIRV_CROSS_THROW(name + " is not a block.");
if (type.member_types.empty())
SPIRV_CROSS_THROW(name + " is an empty struct.");
flattened_buffer_blocks.insert(id);
}
2016-03-02 17:09:16 +00:00
bool CompilerGLSL::check_atomic_image(uint32_t id)
{
auto &type = expression_type(id);
if (type.storage == StorageClassImage)
{
if (options.es && options.version < 320)
require_extension("GL_OES_shader_image_atomic");
auto *var = maybe_get_backing_variable(id);
if (var)
{
auto &flags = meta.at(var->self).decoration.decoration_flags;
if (flags & ((1ull << DecorationNonWritable) | (1ull << DecorationNonReadable)))
{
flags &= ~(1ull << DecorationNonWritable);
flags &= ~(1ull << DecorationNonReadable);
force_recompile = true;
}
}
return true;
}
else
return false;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_function_prototype(SPIRFunction &func, uint64_t return_flags)
{
// Avoid shadow declarations.
local_variable_names = resource_names;
string decl;
auto &type = get<SPIRType>(func.return_type);
decl += flags_to_precision_qualifiers_glsl(type, return_flags);
decl += type_to_glsl(type);
decl += " ";
if (func.self == entry_point)
{
decl += clean_func_name("main");
processing_entry_point = true;
}
else
decl += clean_func_name(to_name(func.self));
decl += "(";
2016-09-11 11:05:44 +00:00
vector<string> arglist;
for (auto &arg : func.arguments)
{
2016-09-11 11:05:44 +00:00
// Do not pass in separate images or samplers if we're remapping
// to combined image samplers.
if (skip_argument(arg.id))
continue;
// Might change the variable name if it already exists in this function.
// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
// to use same name for variables.
// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
add_local_variable_name(arg.id);
2016-09-11 11:05:44 +00:00
arglist.push_back(argument_decl(arg));
2016-09-11 10:54:08 +00:00
// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
auto *var = maybe_get<SPIRVariable>(arg.id);
if (var)
var->parameter = &arg;
}
for (auto &arg : func.shadow_arguments)
{
// Might change the variable name if it already exists in this function.
// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
// to use same name for variables.
// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
add_local_variable_name(arg.id);
2016-09-11 11:05:44 +00:00
arglist.push_back(argument_decl(arg));
// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
auto *var = maybe_get<SPIRVariable>(arg.id);
if (var)
var->parameter = &arg;
}
2016-09-11 11:05:44 +00:00
decl += merge(arglist);
decl += ")";
statement(decl);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_function(SPIRFunction &func, uint64_t return_flags)
{
// Avoid potential cycles.
if (func.active)
return;
func.active = true;
// If we depend on a function, emit that function before we emit our own function.
for (auto block : func.blocks)
{
auto &b = get<SPIRBlock>(block);
for (auto &i : b.ops)
{
auto ops = stream(i);
auto op = static_cast<Op>(i.op);
if (op == OpFunctionCall)
{
// Recursively emit functions which are called.
uint32_t id = ops[2];
emit_function(get<SPIRFunction>(id), meta[ops[1]].decoration.decoration_flags);
}
}
}
emit_function_prototype(func, return_flags);
begin_scope();
current_function = &func;
auto &entry_block = get<SPIRBlock>(func.entry_block);
if (!func.analyzed_variable_scope)
{
if (options.cfg_analysis)
{
analyze_variable_scope(func);
// Check if we can actually use the loop variables we found in analyze_variable_scope.
// To use multiple initializers, we need the same type and qualifiers.
for (auto block : func.blocks)
{
auto &b = get<SPIRBlock>(block);
if (b.loop_variables.size() < 2)
continue;
uint64_t flags = get_decoration_mask(b.loop_variables.front());
uint32_t type = get<SPIRVariable>(b.loop_variables.front()).basetype;
bool invalid_initializers = false;
for (auto loop_variable : b.loop_variables)
{
if (flags != get_decoration_mask(loop_variable) ||
type != get<SPIRVariable>(b.loop_variables.front()).basetype)
{
invalid_initializers = true;
break;
}
}
if (invalid_initializers)
{
for (auto loop_variable : b.loop_variables)
get<SPIRVariable>(loop_variable).loop_variable = false;
b.loop_variables.clear();
}
}
}
else
entry_block.dominated_variables = func.local_variables;
func.analyzed_variable_scope = true;
}
for (auto &v : func.local_variables)
{
auto &var = get<SPIRVariable>(v);
if (expression_is_lvalue(v))
{
add_local_variable_name(var.self);
if (var.initializer)
statement(variable_decl_function_local(var), ";");
else
{
// Don't declare variable until first use to declutter the GLSL output quite a lot.
// If we don't touch the variable before first branch,
// declare it then since we need variable declaration to be in top scope.
// Never declare empty structs. They have no meaningful representation.
auto &type = get<SPIRType>(var.basetype);
bool empty_struct = type.basetype == SPIRType::Struct && type.member_types.empty();
var.deferred_declaration = !empty_struct;
}
}
else
{
// HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
// For these types (non-lvalue), we enforce forwarding through a shadowed variable.
// This means that when we OpStore to these variables, we just write in the expression ID directly.
// This breaks any kind of branching, since the variable must be statically assigned.
// Branching on samplers and images would be pretty much impossible to fake in GLSL.
var.statically_assigned = true;
}
var.loop_variable_enable = false;
// Loop variables are never declared outside their for-loop, so block any implicit declaration.
if (var.loop_variable)
var.deferred_declaration = false;
}
entry_block.loop_dominator = SPIRBlock::NoDominator;
emit_block_chain(entry_block);
end_scope();
processing_entry_point = false;
statement("");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_fixup()
{
auto &execution = get_entry_point();
if (execution.model == ExecutionModelVertex)
{
if (options.vertex.fixup_clipspace)
{
const char *suffix = backend.float_literal_suffix ? "f" : "";
statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
}
if (options.vertex.flip_vert_y)
statement("gl_Position.y = -gl_Position.y;");
}
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to)
{
auto &child = get<SPIRBlock>(to);
for (auto &phi : child.phi_variables)
if (phi.parent == from)
return true;
return false;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
{
auto &child = get<SPIRBlock>(to);
2016-03-02 17:09:16 +00:00
for (auto &phi : child.phi_variables)
{
if (phi.parent == from)
{
auto &var = get<SPIRVariable>(phi.function_variable);
// A Phi variable might be a loop variable, so flush to static expression.
if (var.loop_variable && !var.loop_variable_enable)
var.static_expression = phi.local_variable;
else
{
flush_variable_declaration(phi.function_variable);
// This might be called in continue block, so make sure we
2017-09-25 08:16:45 +00:00
// use this to emit ESSL 1.0 compliant increments/decrements.
auto lhs = to_expression(phi.function_variable);
auto rhs = to_expression(phi.local_variable);
if (!optimize_read_modify_write(lhs, rhs))
statement(lhs, " = ", rhs, ";");
}
}
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::branch(uint32_t from, uint32_t to)
{
flush_phi(from, to);
flush_all_active_variables();
// This is only a continue if we branch to our loop dominator.
if (loop_blocks.find(to) != end(loop_blocks) && get<SPIRBlock>(from).loop_dominator == to)
{
// This can happen if we had a complex continue block which was emitted.
// Once the continue block tries to branch to the loop header, just emit continue;
// and end the chain here.
statement("continue;");
}
else if (is_continue(to))
{
auto &to_block = get<SPIRBlock>(to);
if (to_block.complex_continue)
{
// Just emit the whole block chain as is.
auto usage_counts = expression_usage_counts;
auto invalid = invalid_expressions;
emit_block_chain(to_block);
// Expression usage counts and invalid expressions
// are moot after returning from the continue block.
// Since we emit the same block multiple times,
// we don't want to invalidate ourselves.
expression_usage_counts = usage_counts;
invalid_expressions = invalid;
}
else
{
auto &from_block = get<SPIRBlock>(from);
auto &dominator = get<SPIRBlock>(from_block.loop_dominator);
// For non-complex continue blocks, we implicitly branch to the continue block
// by having the continue block be part of the loop header in for (; ; continue-block).
bool outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block);
// Some simplification for for-loops. We always end up with a useless continue;
// statement since we branch to a loop block.
// Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
// we can avoid writing out an explicit continue statement.
// Similar optimization to return statements if we know we're outside flow control.
if (!outside_control_flow)
statement("continue;");
}
}
else if (is_break(to))
statement("break;");
else if (!is_conditional(to))
emit_block_chain(get<SPIRBlock>(to));
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block)
{
// If we branch directly to a selection merge target, we don't really need a code path.
bool true_sub = !is_conditional(true_block);
bool false_sub = !is_conditional(false_block);
if (true_sub)
{
statement("if (", to_expression(cond), ")");
begin_scope();
branch(from, true_block);
end_scope();
if (false_sub)
{
statement("else");
begin_scope();
branch(from, false_block);
end_scope();
}
else if (flush_phi_required(from, false_block))
{
statement("else");
begin_scope();
flush_phi(from, false_block);
end_scope();
}
}
else if (false_sub && !true_sub)
{
// Only need false path, use negative conditional.
statement("if (!", to_expression(cond), ")");
begin_scope();
branch(from, false_block);
end_scope();
if (flush_phi_required(from, true_block))
{
statement("else");
begin_scope();
flush_phi(from, true_block);
end_scope();
}
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block)
{
// Propagate down the loop dominator block, so that dominated blocks can back trace.
if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator)
{
uint32_t dominator = block.merge == SPIRBlock::MergeLoop ? block.self : block.loop_dominator;
2016-05-23 11:30:02 +00:00
auto set_dominator = [this](uint32_t self, uint32_t new_dominator) {
auto &dominated_block = this->get<SPIRBlock>(self);
// If we already have a loop dominator, we're trying to break out to merge targets
// which should not update the loop dominator.
if (!dominated_block.loop_dominator)
dominated_block.loop_dominator = new_dominator;
};
// After merging a loop, we inherit the loop dominator always.
if (block.merge_block)
set_dominator(block.merge_block, block.loop_dominator);
if (block.true_block)
set_dominator(block.true_block, dominator);
if (block.false_block)
set_dominator(block.false_block, dominator);
if (block.next_block)
set_dominator(block.next_block, dominator);
for (auto &c : block.cases)
set_dominator(c.block, dominator);
// In older glslang output continue_block can be == loop header.
if (block.continue_block && block.continue_block != block.self)
set_dominator(block.continue_block, dominator);
}
2016-03-02 17:09:16 +00:00
}
// FIXME: This currently cannot handle complex continue blocks
// as in do-while.
// This should be seen as a "trivial" continue block.
string CompilerGLSL::emit_continue_block(uint32_t continue_block)
{
auto *block = &get<SPIRBlock>(continue_block);
// While emitting the continue block, declare_temporary will check this
// if we have to emit temporaries.
current_continue_block = block;
vector<string> statements;
// Capture all statements into our list.
auto *old = redirect_statement;
redirect_statement = &statements;
// Stamp out all blocks one after each other.
while (loop_blocks.find(block->self) == end(loop_blocks))
{
propagate_loop_dominators(*block);
// Write out all instructions we have in this block.
for (auto &op : block->ops)
emit_instruction(op);
// For plain branchless for/while continue blocks.
if (block->next_block)
{
flush_phi(continue_block, block->next_block);
block = &get<SPIRBlock>(block->next_block);
}
// For do while blocks. The last block will be a select block.
else if (block->true_block)
{
flush_phi(continue_block, block->true_block);
block = &get<SPIRBlock>(block->true_block);
}
}
// Restore old pointer.
redirect_statement = old;
// Somewhat ugly, strip off the last ';' since we use ',' instead.
// Ideally, we should select this behavior in statement().
for (auto &s : statements)
{
if (!s.empty() && s.back() == ';')
s.erase(s.size() - 1, 1);
}
current_continue_block = nullptr;
return merge(statements);
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
{
if (block.loop_variables.empty())
return "";
if (block.loop_variables.size() == 1)
{
return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
}
else
{
auto &var = get<SPIRVariable>(block.loop_variables.front());
auto &type = get<SPIRType>(var.basetype);
// Don't remap the type here as we have multiple names,
// doesn't make sense to remap types for loop variables anyways.
// It is assumed here that all relevant qualifiers are equal for all loop variables.
string expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
for (auto &loop_var : block.loop_variables)
{
auto &v = get<SPIRVariable>(loop_var);
expr += join(to_name(loop_var), " = ", to_expression(v.static_expression));
if (&loop_var != &block.loop_variables.back())
expr += ", ";
}
return expr;
}
}
2016-03-02 17:09:16 +00:00
bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
{
SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
if (method == SPIRBlock::MergeToSelectForLoop)
{
uint32_t current_count = statement_count;
// If we're trying to create a true for loop,
// we need to make sure that all opcodes before branch statement do not actually emit any code.
// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
for (auto &op : block.ops)
emit_instruction(op);
bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
// This can work! We only did trivial things which could be forwarded in block body!
if (current_count == statement_count && condition_is_temporary)
{
switch (continue_type)
{
case SPIRBlock::ForLoop:
{
// Important that we do this in this order because
// emitting the continue block can invalidate the condition expression.
auto initializer = emit_for_loop_initializers(block);
auto condition = to_expression(block.condition);
auto continue_block = emit_continue_block(block.continue_block);
statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
break;
}
case SPIRBlock::WhileLoop:
statement("while (", to_expression(block.condition), ")");
break;
default:
SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
}
begin_scope();
return true;
}
else
{
block.disable_block_optimization = true;
force_recompile = true;
begin_scope(); // We'll see an end_scope() later.
return false;
}
}
else if (method == SPIRBlock::MergeToDirectForLoop)
{
auto &child = get<SPIRBlock>(block.next_block);
// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
flush_undeclared_variables(child);
uint32_t current_count = statement_count;
// If we're trying to create a true for loop,
// we need to make sure that all opcodes before branch statement do not actually emit any code.
// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
for (auto &op : child.ops)
emit_instruction(op);
bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
if (current_count == statement_count && condition_is_temporary)
{
propagate_loop_dominators(child);
switch (continue_type)
{
case SPIRBlock::ForLoop:
{
// Important that we do this in this order because
// emitting the continue block can invalidate the condition expression.
auto initializer = emit_for_loop_initializers(block);
auto condition = to_expression(child.condition);
auto continue_block = emit_continue_block(block.continue_block);
statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
break;
}
case SPIRBlock::WhileLoop:
statement("while (", to_expression(child.condition), ")");
break;
default:
SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
}
begin_scope();
branch(child.self, child.true_block);
return true;
}
else
{
block.disable_block_optimization = true;
force_recompile = true;
begin_scope(); // We'll see an end_scope() later.
return false;
}
}
else
return false;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
2016-03-02 17:09:16 +00:00
{
for (auto &v : block.dominated_variables)
{
auto &var = get<SPIRVariable>(v);
if (var.deferred_declaration)
statement(variable_decl(var), ";");
var.deferred_declaration = false;
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_block_chain(SPIRBlock &block)
{
propagate_loop_dominators(block);
bool select_branch_to_true_block = false;
bool skip_direct_branch = false;
bool emitted_for_loop_header = false;
// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
for (auto &tmp : block.declare_temporary)
{
auto flags = meta[tmp.second].decoration.decoration_flags;
auto &type = get<SPIRType>(tmp.first);
statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";");
}
SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
if (block.continue_block)
continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
2016-12-16 11:43:12 +00:00
// If we have loop variables, stop masking out access to the variable now.
for (auto var : block.loop_variables)
get<SPIRVariable>(var).loop_variable_enable = true;
// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
{
flush_undeclared_variables(block);
if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
{
// The body of while, is actually just the true block, so always branch there unconditionally.
select_branch_to_true_block = true;
emitted_for_loop_header = true;
}
}
// This is the newer loop behavior in glslang which branches from Loop header directly to
// a new block, which in turn has a OpBranchSelection without a selection merge.
else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
{
flush_undeclared_variables(block);
if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
{
skip_direct_branch = true;
emitted_for_loop_header = true;
}
}
else if (continue_type == SPIRBlock::DoWhileLoop)
{
statement("do");
begin_scope();
for (auto &op : block.ops)
emit_instruction(op);
}
else if (block.merge == SPIRBlock::MergeLoop)
{
flush_undeclared_variables(block);
// We have a generic loop without any distinguishable pattern like for, while or do while.
get<SPIRBlock>(block.continue_block).complex_continue = true;
continue_type = SPIRBlock::ComplexLoop;
statement("for (;;)");
begin_scope();
for (auto &op : block.ops)
emit_instruction(op);
}
else
{
for (auto &op : block.ops)
emit_instruction(op);
}
// If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
// as writes to said loop variables might have been masked out, we need a recompile.
if (!emitted_for_loop_header && !block.loop_variables.empty())
{
force_recompile = true;
for (auto var : block.loop_variables)
get<SPIRVariable>(var).loop_variable = false;
block.loop_variables.clear();
}
flush_undeclared_variables(block);
bool emit_next_block = true;
// Handle end of block.
switch (block.terminator)
{
case SPIRBlock::Direct:
// True when emitting complex continue block.
if (block.loop_dominator == block.next_block)
{
branch(block.self, block.next_block);
emit_next_block = false;
}
// True if MergeToDirectForLoop succeeded.
else if (skip_direct_branch)
emit_next_block = false;
else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
{
branch(block.self, block.next_block);
emit_next_block = false;
}
break;
case SPIRBlock::Select:
// True if MergeToSelectForLoop succeeded.
if (select_branch_to_true_block)
branch(block.self, block.true_block);
else
branch(block.self, block.condition, block.true_block, block.false_block);
break;
case SPIRBlock::MultiSelect:
{
auto &type = expression_type(block.condition);
bool uint32_t_case = type.basetype == SPIRType::UInt;
statement("switch (", to_expression(block.condition), ")");
begin_scope();
for (auto &c : block.cases)
{
auto case_value =
uint32_t_case ? convert_to_string(uint32_t(c.value)) : convert_to_string(int32_t(c.value));
statement("case ", case_value, ":");
begin_scope();
branch(block.self, c.block);
end_scope();
}
if (block.default_block != block.next_block)
{
statement("default:");
begin_scope();
if (is_break(block.default_block))
SPIRV_CROSS_THROW("Cannot break; out of a switch statement and out of a loop at the same time ...");
branch(block.self, block.default_block);
end_scope();
}
else if (flush_phi_required(block.self, block.next_block))
{
statement("default:");
begin_scope();
flush_phi(block.self, block.next_block);
statement("break;");
end_scope();
}
end_scope();
break;
}
case SPIRBlock::Return:
if (processing_entry_point)
emit_fixup();
if (block.return_value)
{
// OpReturnValue can return Undef, so don't emit anything for this case.
if (ids.at(block.return_value).get_type() != TypeUndef)
statement("return ", to_expression(block.return_value), ";");
}
// If this block is the very final block and not called from control flow,
// we do not need an explicit return which looks out of place. Just end the function here.
// In the very weird case of for(;;) { return; } executing return is unconditional,
// but we actually need a return here ...
else if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
block.loop_dominator != SPIRBlock::NoDominator)
statement("return;");
break;
case SPIRBlock::Kill:
statement(backend.discard_literal, ";");
break;
default:
SPIRV_CROSS_THROW("Unimplemented block terminator.");
}
if (block.next_block && emit_next_block)
{
// If we hit this case, we're dealing with an unconditional branch, which means we will output
// that block after this. If we had selection merge, we already flushed phi variables.
if (block.merge != SPIRBlock::MergeSelection)
flush_phi(block.self, block.next_block);
emit_block_chain(get<SPIRBlock>(block.next_block));
}
if (block.merge == SPIRBlock::MergeLoop)
{
if (continue_type == SPIRBlock::DoWhileLoop)
{
// Make sure that we run the continue block to get the expressions set, but this
// should become an empty string.
// We have no fallbacks if we cannot forward everything to temporaries ...
auto statements = emit_continue_block(block.continue_block);
if (!statements.empty())
{
// The DoWhile block has side effects, force ComplexLoop pattern next pass.
get<SPIRBlock>(block.continue_block).complex_continue = true;
force_recompile = true;
}
end_scope_decl(join("while (", to_expression(get<SPIRBlock>(block.continue_block).condition), ")"));
}
else
end_scope();
flush_phi(block.self, block.merge_block);
emit_block_chain(get<SPIRBlock>(block.merge_block));
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::begin_scope()
{
statement("{");
indent++;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::end_scope()
{
if (!indent)
SPIRV_CROSS_THROW("Popping empty indent stack.");
indent--;
statement("}");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::end_scope_decl()
{
if (!indent)
SPIRV_CROSS_THROW("Popping empty indent stack.");
indent--;
statement("};");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::end_scope_decl(const string &decl)
{
if (!indent)
SPIRV_CROSS_THROW("Popping empty indent stack.");
indent--;
statement("} ", decl, ";");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
{
// If our variable is remapped, and we rely on type-remapping information as
// well, then we cannot pass the variable as a function parameter.
// Fixing this is non-trivial without stamping out variants of the same function,
// so for now warn about this and suggest workarounds instead.
for (uint32_t i = 0; i < length; i++)
{
auto *var = maybe_get<SPIRVariable>(args[i]);
if (!var || !var->remapped_variable)
continue;
auto &type = get<SPIRType>(var->basetype);
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
{
SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
"This will not work correctly because type-remapping information is lost. "
"To workaround, please consider not passing the subpass input as a function parameter, "
"or use in/out variables instead which do not need type remapping information.");
}
}
}