SPIRV-Cross-Vulnerable/spirv_glsl.cpp

9822 lines
289 KiB
C++
Raw Normal View History

2016-03-02 17:09:16 +00:00
/*
* Copyright 2015-2018 ARM Limited
2016-03-02 17:09:16 +00:00
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "spirv_glsl.hpp"
2016-03-02 17:09:16 +00:00
#include "GLSL.std.450.h"
#include "spirv_common.hpp"
2016-03-02 17:09:16 +00:00
#include <algorithm>
#include <assert.h>
#include <cmath>
#include <utility>
2016-03-02 17:09:16 +00:00
using namespace spv;
using namespace spirv_cross;
2016-03-02 17:09:16 +00:00
using namespace std;
static bool is_unsigned_opcode(Op op)
{
// Don't have to be exhaustive, only relevant for legacy target checking ...
switch (op)
{
case OpShiftRightLogical:
case OpUGreaterThan:
case OpUGreaterThanEqual:
case OpULessThan:
case OpULessThanEqual:
case OpUConvert:
case OpUDiv:
case OpUMod:
case OpUMulExtended:
case OpConvertUToF:
case OpConvertFToU:
return true;
default:
return false;
}
}
static bool is_unsigned_glsl_opcode(GLSLstd450 op)
{
// Don't have to be exhaustive, only relevant for legacy target checking ...
switch (op)
{
case GLSLstd450UClamp:
case GLSLstd450UMin:
case GLSLstd450UMax:
case GLSLstd450FindUMsb:
return true;
default:
return false;
}
}
2017-10-10 13:23:07 +00:00
static bool packing_is_vec4_padded(BufferPackingStandard packing)
{
switch (packing)
{
case BufferPackingHLSLCbuffer:
case BufferPackingHLSLCbufferPackOffset:
case BufferPackingStd140:
case BufferPackingStd140EnhancedLayout:
return true;
default:
return false;
}
}
static bool packing_is_hlsl(BufferPackingStandard packing)
{
switch (packing)
{
case BufferPackingHLSLCbuffer:
case BufferPackingHLSLCbufferPackOffset:
return true;
default:
return false;
}
}
static bool packing_has_flexible_offset(BufferPackingStandard packing)
{
switch (packing)
{
case BufferPackingStd140:
case BufferPackingStd430:
case BufferPackingHLSLCbuffer:
return false;
default:
return true;
}
}
static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing)
{
switch (packing)
{
case BufferPackingStd140EnhancedLayout:
return BufferPackingStd140;
case BufferPackingStd430EnhancedLayout:
return BufferPackingStd430;
case BufferPackingHLSLCbufferPackOffset:
return BufferPackingHLSLCbuffer;
default:
return packing;
}
}
// Sanitizes underscores for GLSL where multiple underscores in a row are not allowed.
string CompilerGLSL::sanitize_underscores(const string &str)
{
string res;
res.reserve(str.size());
bool last_underscore = false;
for (auto c : str)
{
if (c == '_')
{
if (last_underscore)
continue;
res += c;
last_underscore = true;
}
else
{
res += c;
last_underscore = false;
}
}
return res;
}
// Returns true if an arithmetic operation does not change behavior depending on signedness.
2018-02-03 22:23:30 +00:00
static bool glsl_opcode_is_sign_invariant(Op opcode)
{
switch (opcode)
{
case OpIEqual:
case OpINotEqual:
case OpISub:
case OpIAdd:
case OpIMul:
case OpShiftLeftLogical:
case OpBitwiseOr:
case OpBitwiseXor:
case OpBitwiseAnd:
return true;
default:
return false;
}
}
static const char *to_pls_layout(PlsFormat format)
2016-03-02 17:09:16 +00:00
{
switch (format)
{
case PlsR11FG11FB10F:
return "layout(r11f_g11f_b10f) ";
case PlsR32F:
return "layout(r32f) ";
case PlsRG16F:
return "layout(rg16f) ";
case PlsRGB10A2:
return "layout(rgb10_a2) ";
case PlsRGBA8:
return "layout(rgba8) ";
case PlsRG16:
return "layout(rg16) ";
case PlsRGBA8I:
return "layout(rgba8i)";
case PlsRG16I:
return "layout(rg16i) ";
case PlsRGB10A2UI:
return "layout(rgb10_a2ui) ";
case PlsRGBA8UI:
return "layout(rgba8ui) ";
case PlsRG16UI:
return "layout(rg16ui) ";
case PlsR32UI:
return "layout(r32ui) ";
default:
return "";
}
2016-03-02 17:09:16 +00:00
}
static SPIRType::BaseType pls_format_to_basetype(PlsFormat format)
{
switch (format)
{
default:
case PlsR11FG11FB10F:
case PlsR32F:
case PlsRG16F:
case PlsRGB10A2:
case PlsRGBA8:
case PlsRG16:
return SPIRType::Float;
case PlsRGBA8I:
case PlsRG16I:
return SPIRType::Int;
case PlsRGB10A2UI:
case PlsRGBA8UI:
case PlsRG16UI:
case PlsR32UI:
return SPIRType::UInt;
}
2016-03-02 17:09:16 +00:00
}
static uint32_t pls_format_to_components(PlsFormat format)
{
switch (format)
{
default:
case PlsR32F:
case PlsR32UI:
return 1;
case PlsRG16F:
case PlsRG16:
case PlsRG16UI:
case PlsRG16I:
return 2;
case PlsR11FG11FB10F:
return 3;
case PlsRGB10A2:
case PlsRGBA8:
case PlsRGBA8I:
case PlsRGB10A2UI:
case PlsRGBA8UI:
return 4;
}
2016-03-02 17:09:16 +00:00
}
2017-01-20 16:33:59 +00:00
static const char *vector_swizzle(int vecsize, int index)
{
2017-01-20 16:33:59 +00:00
static const char *swizzle[4][4] = {
{ ".x", ".y", ".z", ".w" }, { ".xy", ".yz", ".zw" }, { ".xyz", ".yzw" }, { "" }
};
assert(vecsize >= 1 && vecsize <= 4);
assert(index >= 0 && index < 4);
assert(swizzle[vecsize - 1][index]);
return swizzle[vecsize - 1][index];
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::reset()
{
// We do some speculative optimizations which should pretty much always work out,
// but just in case the SPIR-V is rather weird, recompile until it's happy.
// This typically only means one extra pass.
force_recompile = false;
// Clear invalid expression tracking.
invalid_expressions.clear();
current_function = nullptr;
// Clear temporary usage tracking.
expression_usage_counts.clear();
forwarded_temporaries.clear();
resource_names.clear();
function_overloads.clear();
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
// Clear unflushed dependees.
id.get<SPIRVariable>().dependees.clear();
}
else if (id.get_type() == TypeExpression)
{
// And remove all expressions.
id.reset();
}
else if (id.get_type() == TypeFunction)
{
// Reset active state for all functions.
id.get<SPIRFunction>().active = false;
id.get<SPIRFunction>().flush_undeclared = true;
}
}
statement_count = 0;
indent = 0;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::remap_pls_variables()
{
for (auto &input : pls_inputs)
{
auto &var = get<SPIRVariable>(input.id);
bool input_is_target = false;
if (var.storage == StorageClassUniformConstant)
{
auto &type = get<SPIRType>(var.basetype);
input_is_target = type.image.dim == DimSubpassData;
}
if (var.storage != StorageClassInput && !input_is_target)
SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs.");
var.remapped_variable = true;
}
for (auto &output : pls_outputs)
{
auto &var = get<SPIRVariable>(output.id);
if (var.storage != StorageClassOutput)
SPIRV_CROSS_THROW("Can only use out variables for PLS outputs.");
var.remapped_variable = true;
}
2016-03-02 17:09:16 +00:00
}
2016-07-27 08:59:00 +00:00
void CompilerGLSL::find_static_extensions()
{
for (auto &id : ids)
{
if (id.get_type() == TypeType)
{
auto &type = id.get<SPIRType>();
if (type.basetype == SPIRType::Double)
{
if (options.es)
SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
2016-07-27 08:59:00 +00:00
if (!options.es && options.version < 400)
require_extension_internal("GL_ARB_gpu_shader_fp64");
2016-07-27 08:59:00 +00:00
}
2016-07-27 09:27:00 +00:00
if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64)
{
if (options.es)
SPIRV_CROSS_THROW("64-bit integers not supported in ES profile.");
2016-07-27 09:27:00 +00:00
if (!options.es)
require_extension_internal("GL_ARB_gpu_shader_int64");
2016-07-27 09:27:00 +00:00
}
if (type.basetype == SPIRType::Half)
require_extension_internal("GL_AMD_gpu_shader_half_float");
2016-07-27 08:59:00 +00:00
}
}
auto &execution = get_entry_point();
switch (execution.model)
{
case ExecutionModelGLCompute:
if (!options.es && options.version < 430)
require_extension_internal("GL_ARB_compute_shader");
if (options.es && options.version < 310)
SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders.");
break;
case ExecutionModelGeometry:
if (options.es && options.version < 320)
require_extension_internal("GL_EXT_geometry_shader");
2017-06-17 01:06:46 +00:00
if (!options.es && options.version < 150)
require_extension_internal("GL_ARB_geometry_shader4");
if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1)
{
// Instanced GS is part of 400 core or this extension.
if (!options.es && options.version < 400)
require_extension_internal("GL_ARB_gpu_shader5");
}
break;
case ExecutionModelTessellationEvaluation:
case ExecutionModelTessellationControl:
if (options.es && options.version < 320)
require_extension_internal("GL_EXT_tessellation_shader");
if (!options.es && options.version < 400)
require_extension_internal("GL_ARB_tessellation_shader");
break;
default:
break;
}
if (!pls_inputs.empty() || !pls_outputs.empty())
require_extension_internal("GL_EXT_shader_pixel_local_storage");
if (options.separate_shader_objects && !options.es && options.version < 410)
require_extension_internal("GL_ARB_separate_shader_objects");
2016-07-27 08:59:00 +00:00
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::compile()
{
// Force a classic "C" locale, reverts when function returns
ClassicLocale classic_locale;
if (options.vulkan_semantics)
backend.allow_precision_qualifiers = true;
backend.force_gl_in_out_block = true;
backend.supports_extensions = true;
2016-07-27 08:59:00 +00:00
// Scan the SPIR-V to find trivial uses of extensions.
find_static_extensions();
fixup_image_load_store_access();
update_active_builtins();
analyze_image_and_sampler_usage();
2016-07-27 08:59:00 +00:00
uint32_t pass_count = 0;
do
{
if (pass_count >= 3)
SPIRV_CROSS_THROW("Over 3 compilation loops detected. Must be a bug!");
2016-03-02 17:09:16 +00:00
reset();
2016-03-02 17:09:16 +00:00
// Move constructor for this type is broken on GCC 4.9 ...
buffer = unique_ptr<ostringstream>(new ostringstream());
2016-03-02 17:09:16 +00:00
emit_header();
emit_resources();
2016-03-02 17:09:16 +00:00
emit_function(get<SPIRFunction>(entry_point), Bitset());
2016-03-02 17:09:16 +00:00
pass_count++;
} while (force_recompile);
2016-03-02 17:09:16 +00:00
2017-11-13 12:49:11 +00:00
// Entry point in GLSL is always main().
get_entry_point().name = "main";
return buffer->str();
2016-03-02 17:09:16 +00:00
}
std::string CompilerGLSL::get_partial_source()
{
return buffer ? buffer->str() : "No compiled source available yet.";
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::emit_header()
{
auto &execution = get_entry_point();
statement("#version ", options.version, options.es && options.version > 100 ? " es" : "");
if (!options.es && options.version < 420)
{
// Needed for binding = # on UBOs, etc.
if (options.enable_420pack_extension)
{
statement("#ifdef GL_ARB_shading_language_420pack");
statement("#extension GL_ARB_shading_language_420pack : require");
statement("#endif");
}
// Needed for: layout(early_fragment_tests) in;
if (execution.flags.get(ExecutionModeEarlyFragmentTests))
require_extension_internal("GL_ARB_shader_image_load_store");
}
for (auto &ext : forced_extensions)
statement("#extension ", ext, " : require");
for (auto &header : header_lines)
statement(header);
vector<string> inputs;
vector<string> outputs;
switch (execution.model)
{
case ExecutionModelGeometry:
outputs.push_back(join("max_vertices = ", execution.output_vertices));
if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1)
inputs.push_back(join("invocations = ", execution.invocations));
if (execution.flags.get(ExecutionModeInputPoints))
inputs.push_back("points");
if (execution.flags.get(ExecutionModeInputLines))
inputs.push_back("lines");
if (execution.flags.get(ExecutionModeInputLinesAdjacency))
inputs.push_back("lines_adjacency");
if (execution.flags.get(ExecutionModeTriangles))
inputs.push_back("triangles");
if (execution.flags.get(ExecutionModeInputTrianglesAdjacency))
inputs.push_back("triangles_adjacency");
if (execution.flags.get(ExecutionModeOutputTriangleStrip))
outputs.push_back("triangle_strip");
if (execution.flags.get(ExecutionModeOutputPoints))
outputs.push_back("points");
if (execution.flags.get(ExecutionModeOutputLineStrip))
outputs.push_back("line_strip");
break;
case ExecutionModelTessellationControl:
if (execution.flags.get(ExecutionModeOutputVertices))
outputs.push_back(join("vertices = ", execution.output_vertices));
break;
case ExecutionModelTessellationEvaluation:
if (execution.flags.get(ExecutionModeQuads))
inputs.push_back("quads");
if (execution.flags.get(ExecutionModeTriangles))
inputs.push_back("triangles");
if (execution.flags.get(ExecutionModeIsolines))
inputs.push_back("isolines");
if (execution.flags.get(ExecutionModePointMode))
inputs.push_back("point_mode");
if (!execution.flags.get(ExecutionModeIsolines))
{
if (execution.flags.get(ExecutionModeVertexOrderCw))
inputs.push_back("cw");
if (execution.flags.get(ExecutionModeVertexOrderCcw))
inputs.push_back("ccw");
}
if (execution.flags.get(ExecutionModeSpacingFractionalEven))
inputs.push_back("fractional_even_spacing");
if (execution.flags.get(ExecutionModeSpacingFractionalOdd))
inputs.push_back("fractional_odd_spacing");
if (execution.flags.get(ExecutionModeSpacingEqual))
inputs.push_back("equal_spacing");
break;
case ExecutionModelGLCompute:
{
if (execution.workgroup_size.constant != 0)
{
SpecializationConstant wg_x, wg_y, wg_z;
get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
if (wg_x.id)
{
if (options.vulkan_semantics)
inputs.push_back(join("local_size_x_id = ", wg_x.constant_id));
else
inputs.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).scalar()));
}
else
inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
if (wg_y.id)
{
if (options.vulkan_semantics)
inputs.push_back(join("local_size_y_id = ", wg_y.constant_id));
else
inputs.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).scalar()));
}
else
inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
if (wg_z.id)
{
if (options.vulkan_semantics)
inputs.push_back(join("local_size_z_id = ", wg_z.constant_id));
else
inputs.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).scalar()));
}
else
inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
}
else
{
inputs.push_back(join("local_size_x = ", execution.workgroup_size.x));
inputs.push_back(join("local_size_y = ", execution.workgroup_size.y));
inputs.push_back(join("local_size_z = ", execution.workgroup_size.z));
}
break;
}
case ExecutionModelFragment:
if (options.es)
{
switch (options.fragment.default_float_precision)
{
case Options::Lowp:
statement("precision lowp float;");
break;
case Options::Mediump:
statement("precision mediump float;");
break;
case Options::Highp:
statement("precision highp float;");
break;
default:
break;
}
switch (options.fragment.default_int_precision)
{
case Options::Lowp:
statement("precision lowp int;");
break;
case Options::Mediump:
statement("precision mediump int;");
break;
case Options::Highp:
statement("precision highp int;");
break;
default:
break;
}
}
if (execution.flags.get(ExecutionModeEarlyFragmentTests))
inputs.push_back("early_fragment_tests");
if (execution.flags.get(ExecutionModeDepthGreater))
inputs.push_back("depth_greater");
if (execution.flags.get(ExecutionModeDepthLess))
inputs.push_back("depth_less");
break;
default:
break;
}
if (!inputs.empty())
statement("layout(", merge(inputs), ") in;");
if (!outputs.empty())
statement("layout(", merge(outputs), ") out;");
statement("");
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::type_is_empty(const SPIRType &type)
{
return type.basetype == SPIRType::Struct && type.member_types.empty();
}
void CompilerGLSL::emit_struct(SPIRType &type)
2016-03-02 17:09:16 +00:00
{
// Struct types can be stamped out multiple times
// with just different offsets, matrix layouts, etc ...
// Type-punning with these types is legal, which complicates things
// when we are storing struct and array types in an SSBO for example.
// If the type master is packed however, we can no longer assume that the struct declaration will be redundant.
if (type.type_alias != 0 && !has_decoration(type.type_alias, DecorationCPacked))
return;
add_resource_name(type.self);
auto name = type_to_glsl(type);
statement(!backend.explicit_struct_type ? "struct " : "", name);
begin_scope();
type.member_name_cache.clear();
uint32_t i = 0;
bool emitted = false;
for (auto &member : type.member_types)
{
add_member_name(type, i);
emit_struct_member(type, member, i);
i++;
emitted = true;
}
// Don't declare empty structs in GLSL, this is not allowed.
if (type_is_empty(type) && !backend.supports_empty_struct)
{
statement("int empty_struct_member;");
emitted = true;
}
end_scope_decl();
if (emitted)
statement("");
2016-03-02 17:09:16 +00:00
}
Bitset CompilerGLSL::combined_decoration_for_member(const SPIRType &type, uint32_t index)
2016-03-02 17:09:16 +00:00
{
Bitset flags;
auto &memb = meta[type.self].members;
if (index >= memb.size())
return flags;
auto &dec = memb[index];
// If our type is a struct, traverse all the members as well recursively.
flags.merge_or(dec.decoration_flags);
for (uint32_t i = 0; i < type.member_types.size(); i++)
flags.merge_or(combined_decoration_for_member(get<SPIRType>(type.member_types[i]), i));
return flags;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags)
{
string res;
//if (flags & (1ull << DecorationSmooth))
// res += "smooth ";
if (flags.get(DecorationFlat))
res += "flat ";
if (flags.get(DecorationNoPerspective))
res += "noperspective ";
if (flags.get(DecorationCentroid))
res += "centroid ";
if (flags.get(DecorationPatch))
res += "patch ";
if (flags.get(DecorationSample))
res += "sample ";
if (flags.get(DecorationInvariant))
res += "invariant ";
if (flags.get(DecorationExplicitInterpAMD))
res += "__explicitInterpAMD ";
return res;
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index)
{
if (is_legacy())
return "";
bool is_block = meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
if (!is_block)
return "";
auto &memb = meta[type.self].members;
if (index >= memb.size())
return "";
auto &dec = memb[index];
vector<string> attr;
// We can only apply layouts on members in block interfaces.
// This is a bit problematic because in SPIR-V decorations are applied on the struct types directly.
// This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct
// has a decoration, it was originally caused by a top-level layout() qualifier in GLSL.
//
// We would like to go from (SPIR-V style):
//
// struct Foo { layout(row_major) mat4 matrix; };
// buffer UBO { Foo foo; };
//
// to
//
// struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations.
// buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level.
auto flags = combined_decoration_for_member(type, index);
if (flags.get(DecorationRowMajor))
attr.push_back("row_major");
// We don't emit any global layouts, so column_major is default.
//if (flags & (1ull << DecorationColMajor))
// attr.push_back("column_major");
if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true))
attr.push_back(join("location = ", dec.location));
// DecorationCPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers.
// This is only done selectively in GLSL as needed.
if (has_decoration(type.self, DecorationCPacked) && dec.decoration_flags.get(DecorationOffset))
attr.push_back(join("offset = ", dec.offset));
if (attr.empty())
return "";
string res = "layout(";
res += merge(attr);
res += ") ";
return res;
2016-03-02 17:09:16 +00:00
}
const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format)
2016-03-02 17:09:16 +00:00
{
2016-07-12 07:35:15 +00:00
auto check_desktop = [this] {
if (options.es)
SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile.");
2016-07-12 07:35:15 +00:00
};
switch (format)
{
case ImageFormatRgba32f:
return "rgba32f";
case ImageFormatRgba16f:
return "rgba16f";
case ImageFormatR32f:
return "r32f";
case ImageFormatRgba8:
return "rgba8";
case ImageFormatRgba8Snorm:
return "rgba8_snorm";
case ImageFormatRg32f:
return "rg32f";
case ImageFormatRg16f:
return "rg16f";
case ImageFormatRgba32i:
return "rgba32i";
case ImageFormatRgba16i:
return "rgba16i";
case ImageFormatR32i:
return "r32i";
case ImageFormatRgba8i:
return "rgba8i";
case ImageFormatRg32i:
return "rg32i";
case ImageFormatRg16i:
return "rg16i";
case ImageFormatRgba32ui:
return "rgba32ui";
case ImageFormatRgba16ui:
return "rgba16ui";
case ImageFormatR32ui:
return "r32ui";
case ImageFormatRgba8ui:
return "rgba8ui";
case ImageFormatRg32ui:
return "rg32ui";
case ImageFormatRg16ui:
return "rg16ui";
2016-07-12 07:35:15 +00:00
// Desktop-only formats
case ImageFormatR11fG11fB10f:
check_desktop();
return "r11f_g11f_b10f";
case ImageFormatR16f:
check_desktop();
return "r16f";
case ImageFormatRgb10A2:
check_desktop();
return "rgb10_a2";
case ImageFormatR8:
check_desktop();
return "r8";
case ImageFormatRg8:
check_desktop();
return "rg8";
case ImageFormatR16:
check_desktop();
return "r16";
case ImageFormatRg16:
check_desktop();
return "rg16";
case ImageFormatRgba16:
check_desktop();
return "rgba16";
case ImageFormatR16Snorm:
check_desktop();
return "r16_snorm";
case ImageFormatRg16Snorm:
check_desktop();
return "rg16_snorm";
case ImageFormatRgba16Snorm:
check_desktop();
return "rgba16_snorm";
case ImageFormatR8Snorm:
check_desktop();
return "r8_snorm";
case ImageFormatRg8Snorm:
check_desktop();
return "rg8_snorm";
case ImageFormatR8ui:
check_desktop();
return "r8ui";
case ImageFormatRg8ui:
check_desktop();
return "rg8ui";
case ImageFormatR16ui:
check_desktop();
return "r16ui";
case ImageFormatRgb10a2ui:
check_desktop();
return "rgb10_a2ui";
case ImageFormatR8i:
check_desktop();
return "r8i";
case ImageFormatRg8i:
check_desktop();
return "rg8i";
case ImageFormatR16i:
check_desktop();
return "r16i";
default:
case ImageFormatUnknown:
return nullptr;
}
2016-03-02 17:09:16 +00:00
}
uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard)
2016-07-27 08:59:00 +00:00
{
switch (type.basetype)
{
case SPIRType::Double:
2016-07-27 09:27:00 +00:00
case SPIRType::Int64:
case SPIRType::UInt64:
2016-07-27 08:59:00 +00:00
return 8;
2018-03-06 14:39:12 +00:00
case SPIRType::Float:
case SPIRType::Int:
case SPIRType::UInt:
2016-07-27 08:59:00 +00:00
return 4;
2018-03-06 14:39:12 +00:00
case SPIRType::Half:
return 2;
default:
SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size.");
2016-07-27 08:59:00 +00:00
}
}
2018-03-13 13:05:33 +00:00
uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
BufferPackingStandard packing)
2016-03-02 17:09:16 +00:00
{
2017-10-10 13:23:07 +00:00
if (!type.array.empty())
{
uint32_t minimum_alignment = 1;
if (packing_is_vec4_padded(packing))
minimum_alignment = 16;
auto *tmp = &get<SPIRType>(type.parent_type);
while (!tmp->array.empty())
tmp = &get<SPIRType>(tmp->parent_type);
// Get the alignment of the base type, then maybe round up.
return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing));
}
if (type.basetype == SPIRType::Struct)
{
// Rule 9. Structs alignments are maximum alignment of its members.
uint32_t alignment = 0;
for (uint32_t i = 0; i < type.member_types.size(); i++)
{
auto member_flags = meta[type.self].members.at(i).decoration_flags;
2017-10-10 08:22:40 +00:00
alignment =
max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing));
}
// In std140, struct alignment is rounded up to 16.
2017-10-10 13:23:07 +00:00
if (packing_is_vec4_padded(packing))
alignment = max(alignment, 16u);
return alignment;
}
else
{
2018-03-06 14:39:12 +00:00
const uint32_t base_alignment = type_to_packed_base_size(type, packing);
2017-10-10 13:23:07 +00:00
// Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle
// a vec4, this is handled outside since that part knows our current offset.
if (type.columns == 1 && packing_is_hlsl(packing))
return base_alignment;
// From 7.6.2.2 in GL 4.5 core spec.
// Rule 1
if (type.vecsize == 1 && type.columns == 1)
return base_alignment;
// Rule 2
if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1)
return type.vecsize * base_alignment;
// Rule 3
if (type.vecsize == 3 && type.columns == 1)
return 4 * base_alignment;
// Rule 4 implied. Alignment does not change in std430.
// Rule 5. Column-major matrices are stored as arrays of
// vectors.
if (flags.get(DecorationColMajor) && type.columns > 1)
{
2017-10-10 13:23:07 +00:00
if (packing_is_vec4_padded(packing))
return 4 * base_alignment;
else if (type.vecsize == 3)
return 4 * base_alignment;
else
return type.vecsize * base_alignment;
}
// Rule 6 implied.
// Rule 7.
if (flags.get(DecorationRowMajor) && type.vecsize > 1)
{
2017-10-10 13:23:07 +00:00
if (packing_is_vec4_padded(packing))
return 4 * base_alignment;
else if (type.columns == 3)
return 4 * base_alignment;
else
return type.columns * base_alignment;
}
// Rule 8 implied.
}
SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?");
2016-03-02 17:09:16 +00:00
}
2018-03-13 13:05:33 +00:00
uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags,
BufferPackingStandard packing)
2016-03-02 17:09:16 +00:00
{
// Array stride is equal to aligned size of the underlying type.
2017-10-10 13:23:07 +00:00
uint32_t parent = type.parent_type;
assert(parent);
2017-10-10 13:23:07 +00:00
auto &tmp = get<SPIRType>(parent);
2017-10-10 13:23:07 +00:00
uint32_t size = type_to_packed_size(tmp, flags, packing);
if (tmp.array.empty())
{
uint32_t alignment = type_to_packed_alignment(type, flags, packing);
return (size + alignment - 1) & ~(alignment - 1);
}
else
{
// For multidimensional arrays, array stride always matches size of subtype.
// The alignment cannot change because multidimensional arrays are basically N * M array elements.
return size;
}
2016-03-02 17:09:16 +00:00
}
uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
2016-03-02 17:09:16 +00:00
{
if (!type.array.empty())
2017-10-10 13:23:07 +00:00
{
2017-10-10 08:22:40 +00:00
return to_array_size_literal(type, uint32_t(type.array.size()) - 1) *
type_to_packed_array_stride(type, flags, packing);
2017-10-10 13:23:07 +00:00
}
uint32_t size = 0;
if (type.basetype == SPIRType::Struct)
{
uint32_t pad_alignment = 1;
for (uint32_t i = 0; i < type.member_types.size(); i++)
{
auto member_flags = meta[type.self].members.at(i).decoration_flags;
auto &member_type = get<SPIRType>(type.member_types[i]);
uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing);
uint32_t alignment = max(packed_alignment, pad_alignment);
// The next member following a struct member is aligned to the base alignment of the struct that came before.
// GL 4.5 spec, 7.6.2.2.
if (member_type.basetype == SPIRType::Struct)
pad_alignment = packed_alignment;
else
pad_alignment = 1;
size = (size + alignment - 1) & ~(alignment - 1);
size += type_to_packed_size(member_type, member_flags, packing);
}
}
else
{
2018-03-06 14:39:12 +00:00
const uint32_t base_alignment = type_to_packed_base_size(type, packing);
if (type.columns == 1)
size = type.vecsize * base_alignment;
if (flags.get(DecorationColMajor) && type.columns > 1)
{
2017-10-10 13:23:07 +00:00
if (packing_is_vec4_padded(packing))
size = type.columns * 4 * base_alignment;
else if (type.vecsize == 3)
size = type.columns * 4 * base_alignment;
else
size = type.columns * type.vecsize * base_alignment;
}
if (flags.get(DecorationRowMajor) && type.vecsize > 1)
{
2017-10-10 13:23:07 +00:00
if (packing_is_vec4_padded(packing))
size = type.vecsize * 4 * base_alignment;
else if (type.columns == 3)
size = type.vecsize * 4 * base_alignment;
else
size = type.vecsize * type.columns * base_alignment;
}
}
return size;
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing,
uint32_t start_offset, uint32_t end_offset)
2016-03-02 17:09:16 +00:00
{
// This is very tricky and error prone, but try to be exhaustive and correct here.
// SPIR-V doesn't directly say if we're using std430 or std140.
// SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters),
// so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information.
// We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing).
//
// It is almost certain that we're using std430, but it gets tricky with arrays in particular.
// We will assume std430, but infer std140 if we can prove the struct is not compliant with std430.
//
// The only two differences between std140 and std430 are related to padding alignment/array stride
// in arrays and structs. In std140 they take minimum vec4 alignment.
// std430 only removes the vec4 requirement.
uint32_t offset = 0;
uint32_t pad_alignment = 1;
for (uint32_t i = 0; i < type.member_types.size(); i++)
{
auto &memb_type = get<SPIRType>(type.member_types[i]);
auto member_flags = meta[type.self].members.at(i).decoration_flags;
// Verify alignment rules.
uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing);
2017-10-10 13:23:07 +00:00
uint32_t packed_size = type_to_packed_size(memb_type, member_flags, packing);
if (packing_is_hlsl(packing))
{
// If a member straddles across a vec4 boundary, alignment is actually vec4.
uint32_t begin_word = offset / 16;
uint32_t end_word = (offset + packed_size - 1) / 16;
if (begin_word != end_word)
packed_alignment = max(packed_alignment, 16u);
}
uint32_t alignment = max(packed_alignment, pad_alignment);
offset = (offset + alignment - 1) & ~(alignment - 1);
// Field is not in the specified range anymore and we can ignore any further fields.
if (offset >= end_offset)
break;
// The next member following a struct member is aligned to the base alignment of the struct that came before.
// GL 4.5 spec, 7.6.2.2.
if (memb_type.basetype == SPIRType::Struct)
pad_alignment = packed_alignment;
else
pad_alignment = 1;
// Only care about packing if we are in the given range
if (offset >= start_offset)
{
// We only care about offsets in std140, std430, etc ...
// For EnhancedLayout variants, we have the flexibility to choose our own offsets.
if (!packing_has_flexible_offset(packing))
{
uint32_t actual_offset = type_struct_member_offset(type, i);
if (actual_offset != offset) // This cannot be the packing we're looking for.
return false;
}
// Verify array stride rules.
if (!memb_type.array.empty() && type_to_packed_array_stride(memb_type, member_flags, packing) !=
type_struct_member_array_stride(type, i))
return false;
// Verify that sub-structs also follow packing rules.
// We cannot use enhanced layouts on substructs, so they better be up to spec.
auto substruct_packing = packing_to_substruct_packing(packing);
if (!memb_type.member_types.empty() && !buffer_is_packing_standard(memb_type, substruct_packing))
return false;
}
// Bump size.
2017-10-10 13:23:07 +00:00
offset += packed_size;
}
return true;
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block)
2017-10-19 12:17:18 +00:00
{
// Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL.
// Be very explicit here about how to solve the issue.
if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) ||
(get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput))
{
uint32_t minimum_desktop_version = block ? 440 : 410;
// ARB_enhanced_layouts vs ARB_separate_shader_objects ...
if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects)
2017-10-19 12:17:18 +00:00
return false;
else if (options.es && options.version < 310)
return false;
}
if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) ||
(get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput))
{
if (options.es && options.version < 300)
return false;
else if (!options.es && options.version < 330)
return false;
}
if (storage == StorageClassUniform || storage == StorageClassUniformConstant)
{
if (options.es && options.version < 310)
return false;
else if (!options.es && options.version < 430)
return false;
}
return true;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::layout_for_variable(const SPIRVariable &var)
{
// FIXME: Come up with a better solution for when to disable layouts.
// Having layouts depend on extensions as well as which types
// of layouts are used. For now, the simple solution is to just disable
// layouts for legacy versions.
if (is_legacy())
return "";
vector<string> attr;
auto &dec = meta[var.self].decoration;
auto &type = get<SPIRType>(var.basetype);
auto flags = dec.decoration_flags;
auto typeflags = meta[type.self].decoration.decoration_flags;
if (options.vulkan_semantics && var.storage == StorageClassPushConstant)
attr.push_back("push_constant");
if (flags.get(DecorationRowMajor))
attr.push_back("row_major");
if (flags.get(DecorationColMajor))
attr.push_back("column_major");
if (options.vulkan_semantics)
{
if (flags.get(DecorationInputAttachmentIndex))
attr.push_back(join("input_attachment_index = ", dec.input_attachment));
}
bool is_block = has_decoration(type.self, DecorationBlock);
if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block))
{
Bitset combined_decoration;
2017-10-19 12:17:18 +00:00
for (uint32_t i = 0; i < meta[type.self].members.size(); i++)
combined_decoration.merge_or(combined_decoration_for_member(type, i));
2017-10-19 12:17:18 +00:00
// If our members have location decorations, we don't need to
// emit location decorations at the top as well (looks weird).
if (!combined_decoration.get(DecorationLocation))
2017-10-19 12:17:18 +00:00
attr.push_back(join("location = ", dec.location));
}
if (flags.get(DecorationIndex))
attr.push_back(join("index = ", dec.index));
// set = 0 is the default. Do not emit set = decoration in regular GLSL output, but
// we should preserve it in Vulkan GLSL mode.
if (var.storage != StorageClassPushConstant)
{
if (flags.get(DecorationDescriptorSet) && (dec.set != 0 || options.vulkan_semantics))
attr.push_back(join("set = ", dec.set));
}
// GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ...
bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140);
bool can_use_binding;
if (options.es)
can_use_binding = options.version >= 310;
else
can_use_binding = options.enable_420pack_extension || (options.version >= 420);
2018-04-03 14:58:05 +00:00
// Make sure we don't emit binding layout for a classic uniform on GLSL 1.30.
if (!can_use_buffer_blocks && var.storage == StorageClassUniform)
can_use_binding = false;
if (can_use_binding && flags.get(DecorationBinding))
attr.push_back(join("binding = ", dec.binding));
if (flags.get(DecorationOffset))
attr.push_back(join("offset = ", dec.offset));
bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant;
bool ssbo_block = var.storage == StorageClassStorageBuffer ||
(var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock));
// Instead of adding explicit offsets for every element here, just assume we're using std140 or std430.
// If SPIR-V does not comply with either layout, we cannot really work around it.
if (can_use_buffer_blocks && var.storage == StorageClassUniform && typeflags.get(DecorationBlock))
{
2017-10-10 13:37:53 +00:00
if (buffer_is_packing_standard(type, BufferPackingStd140))
attr.push_back("std140");
2017-10-10 13:37:53 +00:00
else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
{
2017-10-10 09:30:29 +00:00
attr.push_back("std140");
// Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
// however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
// Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
if (options.es && !options.vulkan_semantics)
2017-10-10 09:31:05 +00:00
SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
"not support GL_ARB_enhanced_layouts.");
if (!options.es && !options.vulkan_semantics && options.version < 440)
require_extension_internal("GL_ARB_enhanced_layouts");
// This is a very last minute to check for this, but use this unused decoration to mark that we should emit
// explicit offsets for this block type.
// layout_for_variable() will be called before the actual buffer emit.
// The alternative is a full pass before codegen where we deduce this decoration,
// but then we are just doing the exact same work twice, and more complexity.
set_decoration(type.self, DecorationCPacked);
}
else
{
2017-10-10 09:31:05 +00:00
SPIRV_CROSS_THROW("Uniform buffer cannot be expressed as std140, even with enhanced layouts. You can try "
"flattening this block to "
"support a more flexible layout.");
}
}
else if (can_use_buffer_blocks && (push_constant_block || ssbo_block))
{
2017-10-10 13:37:53 +00:00
if (buffer_is_packing_standard(type, BufferPackingStd430))
attr.push_back("std430");
2017-10-10 13:37:53 +00:00
else if (buffer_is_packing_standard(type, BufferPackingStd140))
attr.push_back("std140");
2017-10-10 13:37:53 +00:00
else if (buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout))
{
attr.push_back("std140");
// Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference,
// however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout.
// Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there.
if (options.es && !options.vulkan_semantics)
2017-10-10 09:31:05 +00:00
SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
"not support GL_ARB_enhanced_layouts.");
if (!options.es && !options.vulkan_semantics && options.version < 440)
require_extension_internal("GL_ARB_enhanced_layouts");
set_decoration(type.self, DecorationCPacked);
}
2017-10-10 13:37:53 +00:00
else if (buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout))
{
attr.push_back("std430");
if (options.es && !options.vulkan_semantics)
2017-10-10 09:31:05 +00:00
SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do "
"not support GL_ARB_enhanced_layouts.");
if (!options.es && !options.vulkan_semantics && options.version < 440)
require_extension_internal("GL_ARB_enhanced_layouts");
set_decoration(type.self, DecorationCPacked);
}
else
{
2017-10-10 09:31:05 +00:00
SPIRV_CROSS_THROW("Buffer block cannot be expressed as neither std430 nor std140, even with enhanced "
"layouts. You can try flattening this block to support a more flexible layout.");
}
}
// For images, the type itself adds a layout qualifer.
// Only emit the format for storage images.
if (type.basetype == SPIRType::Image && type.image.sampled == 2)
{
const char *fmt = format_to_glsl(type.image.format);
if (fmt)
attr.push_back(fmt);
}
if (attr.empty())
return "";
string res = "layout(";
res += merge(attr);
res += ") ";
return res;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var)
{
if (flattened_buffer_blocks.count(var.self))
emit_buffer_block_flattened(var);
else if (options.vulkan_semantics)
emit_push_constant_block_vulkan(var);
else
emit_push_constant_block_glsl(var);
}
void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var)
{
emit_buffer_block(var);
}
void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var)
2016-03-02 17:09:16 +00:00
{
// OpenGL has no concept of push constant blocks, implement it as a uniform struct.
auto &type = get<SPIRType>(var.basetype);
2016-03-02 17:09:16 +00:00
auto &flags = meta[var.self].decoration.decoration_flags;
flags.clear(DecorationBinding);
flags.clear(DecorationDescriptorSet);
2016-03-02 17:09:16 +00:00
#if 0
if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet)))
SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. "
2016-03-02 17:09:16 +00:00
"Remap to location with reflection API first or disable these decorations.");
#endif
// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
auto &block_flags = meta[type.self].decoration.decoration_flags;
bool block_flag = block_flags.get(DecorationBlock);
block_flags.clear(DecorationBlock);
2016-03-02 17:09:16 +00:00
emit_struct(type);
2016-03-02 17:09:16 +00:00
if (block_flag)
block_flags.set(DecorationBlock);
2016-03-02 17:09:16 +00:00
emit_uniform(var);
statement("");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_buffer_block(const SPIRVariable &var)
{
if (flattened_buffer_blocks.count(var.self))
emit_buffer_block_flattened(var);
else if (is_legacy() || (!options.es && options.version == 130))
emit_buffer_block_legacy(var);
else
emit_buffer_block_native(var);
}
void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var)
{
auto &type = get<SPIRType>(var.basetype);
bool ssbo = var.storage == StorageClassStorageBuffer ||
meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
if (ssbo)
SPIRV_CROSS_THROW("SSBOs not supported in legacy targets.");
// We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily.
// Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed.
auto &block_flags = meta[type.self].decoration.decoration_flags;
bool block_flag = block_flags.get(DecorationBlock);
block_flags.clear(DecorationBlock);
emit_struct(type);
if (block_flag)
block_flags.set(DecorationBlock);
emit_uniform(var);
statement("");
}
void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var)
2016-03-02 17:09:16 +00:00
{
auto &type = get<SPIRType>(var.basetype);
2017-01-21 09:07:38 +00:00
Bitset flags = get_buffer_block_flags(var);
bool ssbo = var.storage == StorageClassStorageBuffer ||
meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
bool is_restrict = ssbo && flags.get(DecorationRestrict);
bool is_writeonly = ssbo && flags.get(DecorationNonReadable);
bool is_readonly = ssbo && flags.get(DecorationNonWritable);
bool is_coherent = ssbo && flags.get(DecorationCoherent);
// Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ...
auto buffer_name = to_name(type.self, false);
// Shaders never use the block by interface name, so we don't
// have to track this other than updating name caches.
if (meta[type.self].decoration.alias.empty() || resource_names.find(buffer_name) != end(resource_names))
buffer_name = get_block_fallback_name(var.self);
// Make sure we get something unique.
add_variable(resource_names, buffer_name);
// If for some reason buffer_name is an illegal name, make a final fallback to a workaround name.
// This cannot conflict with anything else, so we're safe now.
if (buffer_name.empty())
buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self);
// Save for post-reflection later.
declared_block_names[var.self] = buffer_name;
2017-08-28 07:01:03 +00:00
statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "",
is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ",
buffer_name);
2017-01-21 09:07:38 +00:00
begin_scope();
type.member_name_cache.clear();
uint32_t i = 0;
for (auto &member : type.member_types)
{
add_member_name(type, i);
emit_struct_member(type, member, i);
i++;
}
add_resource_name(var.self);
end_scope_decl(to_name(var.self) + type_to_array_glsl(type));
statement("");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
auto &type = get<SPIRType>(var.basetype);
// Block names should never alias.
auto buffer_name = to_name(type.self, false);
size_t buffer_size = (get_declared_struct_size(type) + 15) / 16;
SPIRType::BaseType basic_type;
if (get_common_basic_type(type, basic_type))
{
SPIRType tmp;
tmp.basetype = basic_type;
2017-01-22 07:53:52 +00:00
tmp.vecsize = 4;
if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt)
SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint.");
auto flags = get_buffer_block_flags(var);
2017-01-22 08:06:15 +00:00
statement("uniform ", flags_to_precision_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[",
buffer_size, "];");
}
else
SPIRV_CROSS_THROW("All basic types in a flattened block must be the same.");
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var)
2016-03-02 17:09:16 +00:00
{
auto &execution = get_entry_point();
if (var.storage == StorageClassInput || var.storage == StorageClassOutput)
{
if (is_legacy() && execution.model == ExecutionModelVertex)
return var.storage == StorageClassInput ? "attribute " : "varying ";
else if (is_legacy() && execution.model == ExecutionModelFragment)
return "varying "; // Fragment outputs are renamed so they never hit this case.
else
return var.storage == StorageClassInput ? "in " : "out ";
}
else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform ||
var.storage == StorageClassPushConstant)
{
return "uniform ";
}
return "";
}
void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual)
{
auto &type = get<SPIRType>(var.basetype);
if (!type.array.empty())
SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings.");
auto old_flags = meta[type.self].decoration.decoration_flags;
// Emit the members as if they are part of a block to get all qualifiers.
meta[type.self].decoration.decoration_flags.set(DecorationBlock);
type.member_name_cache.clear();
uint32_t i = 0;
for (auto &member : type.member_types)
{
add_member_name(type, i);
auto &membertype = get<SPIRType>(member);
if (membertype.basetype == SPIRType::Struct)
SPIRV_CROSS_THROW("Cannot flatten struct inside structs in I/O variables.");
// Pass in the varying qualifier here so it will appear in the correct declaration order.
// Replace member name while emitting it so it encodes both struct name and member name.
// Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row,
// which is not allowed.
auto backup_name = get_member_name(type.self, i);
auto member_name = to_member_name(type, i);
set_member_name(type.self, i, sanitize_underscores(join(to_name(var.self), "_", member_name)));
emit_struct_member(type, member, i, qual);
// Restore member name.
set_member_name(type.self, i, member_name);
i++;
}
meta[type.self].decoration.decoration_flags = old_flags;
// Treat this variable as flattened from now on.
flattened_structs.insert(var.self);
}
void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
{
auto &type = get<SPIRType>(var.basetype);
// Either make it plain in/out or in/out blocks depending on what shader is doing ...
bool block = meta[type.self].decoration.decoration_flags.get(DecorationBlock);
const char *qual = to_storage_qualifiers_glsl(var);
if (block)
{
// ESSL earlier than 310 and GLSL earlier than 150 did not support
// I/O variables which are struct types.
// To support this, flatten the struct into separate varyings instead.
if ((options.es && options.version < 310) || (!options.es && options.version < 150))
{
// I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320.
// On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150).
emit_flattened_io_block(var, qual);
}
else
{
if (options.es && options.version < 320)
{
// Geometry and tessellation extensions imply this extension.
2017-11-22 18:27:03 +00:00
if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
require_extension_internal("GL_EXT_shader_io_blocks");
}
// Block names should never alias.
auto block_name = to_name(type.self, false);
// Shaders never use the block by interface name, so we don't
// have to track this other than updating name caches.
if (resource_names.find(block_name) != end(resource_names))
block_name = get_fallback_name(type.self);
else
resource_names.insert(block_name);
statement(layout_for_variable(var), qual, block_name);
begin_scope();
type.member_name_cache.clear();
uint32_t i = 0;
for (auto &member : type.member_types)
{
add_member_name(type, i);
emit_struct_member(type, member, i);
i++;
}
add_resource_name(var.self);
end_scope_decl(join(to_name(var.self), type_to_array_glsl(type)));
statement("");
}
}
else
{
// ESSL earlier than 310 and GLSL earlier than 150 did not support
// I/O variables which are struct types.
// To support this, flatten the struct into separate varyings instead.
if (type.basetype == SPIRType::Struct &&
((options.es && options.version < 310) || (!options.es && options.version < 150)))
{
emit_flattened_io_block(var, qual);
}
else
{
add_resource_name(var.self);
statement(layout_for_variable(var), variable_decl(var), ";");
}
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_uniform(const SPIRVariable &var)
{
auto &type = get<SPIRType>(var.basetype);
if (type.basetype == SPIRType::Image && type.image.sampled == 2)
{
if (!options.es && options.version < 420)
require_extension_internal("GL_ARB_shader_image_load_store");
else if (options.es && options.version < 310)
SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store.");
}
add_resource_name(var.self);
statement(layout_for_variable(var), variable_decl(var), ";");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant)
{
auto &type = get<SPIRType>(constant.basetype);
auto name = to_name(constant.self);
statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";");
}
void CompilerGLSL::emit_specialization_constant(const SPIRConstant &constant)
{
auto &type = get<SPIRType>(constant.constant_type);
auto name = to_name(constant.self);
SpecializationConstant wg_x, wg_y, wg_z;
uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z);
if (constant.self == workgroup_size_id || constant.self == wg_x.id || constant.self == wg_y.id ||
constant.self == wg_z.id)
{
// These specialization constants are implicitly declared by emitting layout() in;
return;
}
2017-09-27 14:10:29 +00:00
// Only scalars have constant IDs.
if (has_decoration(constant.self, DecorationSpecId))
{
statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ",
variable_decl(type, name), " = ", constant_expression(constant), ";");
}
else
{
statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";");
}
}
void CompilerGLSL::emit_entry_point_declarations()
{
}
void CompilerGLSL::replace_illegal_names()
{
2016-10-27 11:55:47 +00:00
// clang-format off
static const unordered_set<string> keywords = {
"abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh",
"atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement",
"atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor",
"bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse",
"ceil", "cos", "cosh", "cross", "degrees",
"dFdx", "dFdxCoarse", "dFdxFine",
"dFdy", "dFdyCoarse", "dFdyFine",
"distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2",
"faceforward", "findLSB", "findMSB", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract", "frexp", "fwidth", "fwidthCoarse", "fwidthFine",
"greaterThan", "greaterThanEqual", "groupMemoryBarrier",
"imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor",
"imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample",
"inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2",
"matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared",
"min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual",
"outerProduct", "packDouble2x32", "packHalf2x16", "packSnorm2x16", "packSnorm4x8", "packUnorm2x16", "packUnorm4x8", "pow",
"radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step",
"tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets",
"textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad",
"textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize",
"transpose", "trunc", "uaddCarry", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackSnorm2x16", "unpackSnorm4x8",
"unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow",
"active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer",
"bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard",
"dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4",
"do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float",
"for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray",
"iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube",
"iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect",
"image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant",
"isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
"isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp",
"mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump",
"namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly",
"resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
"sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray",
"sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer",
"samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static",
"struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D",
"uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube",
"uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray",
"usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube",
"usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile",
"while", "writeonly",
2016-10-27 11:55:47 +00:00
};
// clang-format on
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
2016-08-26 10:58:50 +00:00
if (!is_hidden_variable(var))
{
auto &m = meta[var.self].decoration;
2016-10-27 11:55:47 +00:00
if (m.alias.compare(0, 3, "gl_") == 0 || keywords.find(m.alias) != end(keywords))
m.alias = join("_", m.alias);
}
}
}
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::replace_fragment_output(SPIRVariable &var)
{
auto &m = meta[var.self].decoration;
uint32_t location = 0;
if (m.decoration_flags.get(DecorationLocation))
location = m.location;
2016-03-02 17:09:16 +00:00
// If our variable is arrayed, we must not emit the array part of this as the SPIR-V will
// do the access chain part of this for us.
auto &type = get<SPIRType>(var.basetype);
if (type.array.empty())
{
// Redirect the write to a specific render target in legacy GLSL.
m.alias = join("gl_FragData[", location, "]");
2016-09-17 13:56:23 +00:00
if (is_legacy_es() && location != 0)
require_extension_internal("GL_EXT_draw_buffers");
}
else if (type.array.size() == 1)
{
// If location is non-zero, we probably have to add an offset.
// This gets really tricky since we'd have to inject an offset in the access chain.
// FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now.
m.alias = "gl_FragData";
if (location != 0)
SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. "
"This is unimplemented in SPIRV-Cross.");
2016-09-17 16:46:10 +00:00
2016-09-17 12:33:16 +00:00
if (is_legacy_es())
require_extension_internal("GL_EXT_draw_buffers");
}
else
SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL.");
var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is.
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::replace_fragment_outputs()
{
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer &&
var.storage == StorageClassOutput)
replace_fragment_output(var);
}
}
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr)
2016-03-02 17:09:16 +00:00
{
if (out_type.vecsize == input_components)
return expr;
2017-12-12 10:03:46 +00:00
else if (input_components == 1 && !backend.can_swizzle_scalar)
return join(type_to_glsl(out_type), "(", expr, ")");
else
{
auto e = enclose_expression(expr) + ".";
// Just clamp the swizzle index if we have more outputs than inputs.
for (uint32_t c = 0; c < out_type.vecsize; c++)
e += index_to_swizzle(min(c, input_components - 1));
if (backend.swizzle_is_function && out_type.vecsize > 1)
e += "()";
remove_duplicate_swizzle(e);
return e;
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_pls()
{
auto &execution = get_entry_point();
if (execution.model != ExecutionModelFragment)
SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders.");
if (!options.es)
SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES.");
if (options.version < 300)
SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above.");
if (!pls_inputs.empty())
{
statement("__pixel_local_inEXT _PLSIn");
begin_scope();
for (auto &input : pls_inputs)
statement(pls_decl(input), ";");
end_scope_decl();
statement("");
}
if (!pls_outputs.empty())
{
statement("__pixel_local_outEXT _PLSOut");
begin_scope();
for (auto &output : pls_outputs)
statement(pls_decl(output), ";");
end_scope_decl();
statement("");
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::fixup_image_load_store_access()
{
for (auto &id : ids)
{
if (id.get_type() != TypeVariable)
continue;
uint32_t var = id.get<SPIRVariable>().self;
auto &vartype = expression_type(var);
if (vartype.basetype == SPIRType::Image)
{
// Older glslangValidator does not emit required qualifiers here.
// Solve this by making the image access as restricted as possible and loosen up if we need to.
// If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing.
auto &flags = meta.at(var).decoration.decoration_flags;
if (!flags.get(DecorationNonWritable) && !flags.get(DecorationNonReadable))
{
flags.set(DecorationNonWritable);
flags.set(DecorationNonReadable);
}
}
}
}
void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
{
Bitset emitted_builtins;
Bitset global_builtins;
2017-11-17 12:37:28 +00:00
const SPIRVariable *block_var = nullptr;
bool emitted_block = false;
2017-11-17 12:37:28 +00:00
bool builtin_array = false;
2018-03-01 11:30:55 +00:00
// Need to use declared size in the type.
// These variables might have been declared, but not statically used, so we haven't deduced their size yet.
uint32_t cull_distance_size = 0;
uint32_t clip_distance_size = 0;
for (auto &id : ids)
{
if (id.get_type() != TypeVariable)
continue;
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
bool block = has_decoration(type.self, DecorationBlock);
Bitset builtins;
if (var.storage == storage && block && is_builtin_variable(var))
{
2018-03-01 11:30:55 +00:00
uint32_t index = 0;
for (auto &m : meta[type.self].members)
2018-03-01 11:30:55 +00:00
{
if (m.builtin)
2018-03-01 11:30:55 +00:00
{
builtins.set(m.builtin_type);
2018-03-01 11:30:55 +00:00
if (m.builtin_type == BuiltInCullDistance)
cull_distance_size = get<SPIRType>(type.member_types[index]).array.front();
else if (m.builtin_type == BuiltInClipDistance)
clip_distance_size = get<SPIRType>(type.member_types[index]).array.front();
}
index++;
}
}
2017-11-17 12:37:28 +00:00
else if (var.storage == storage && !block && is_builtin_variable(var))
{
// While we're at it, collect all declared global builtins (HLSL mostly ...).
auto &m = meta[var.self].decoration;
if (m.builtin)
2018-03-01 11:30:55 +00:00
{
global_builtins.set(m.builtin_type);
2018-03-01 11:30:55 +00:00
if (m.builtin_type == BuiltInCullDistance)
cull_distance_size = type.array.front();
else if (m.builtin_type == BuiltInClipDistance)
clip_distance_size = type.array.front();
}
2017-11-17 12:37:28 +00:00
}
if (builtins.empty())
continue;
if (emitted_block)
SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
2017-11-17 12:37:28 +00:00
emitted_builtins = builtins;
emitted_block = true;
builtin_array = !type.array.empty();
block_var = &var;
}
2018-03-13 13:05:33 +00:00
global_builtins =
Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) |
(1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance)));
2017-11-17 12:37:28 +00:00
// Try to collect all other declared builtins.
if (!emitted_block)
emitted_builtins = global_builtins;
// Can't declare an empty interface block.
if (emitted_builtins.empty())
2017-11-17 12:37:28 +00:00
return;
if (storage == StorageClassOutput)
statement("out gl_PerVertex");
else
statement("in gl_PerVertex");
begin_scope();
if (emitted_builtins.get(BuiltInPosition))
2017-11-17 12:37:28 +00:00
statement("vec4 gl_Position;");
if (emitted_builtins.get(BuiltInPointSize))
2017-11-17 12:37:28 +00:00
statement("float gl_PointSize;");
if (emitted_builtins.get(BuiltInClipDistance))
2018-03-01 11:30:55 +00:00
statement("float gl_ClipDistance[", clip_distance_size, "];");
if (emitted_builtins.get(BuiltInCullDistance))
2018-03-01 11:30:55 +00:00
statement("float gl_CullDistance[", cull_distance_size, "];");
2017-11-17 12:37:28 +00:00
bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl;
if (builtin_array)
{
// Make sure the array has a supported name in the code.
if (storage == StorageClassOutput)
2017-11-17 12:37:28 +00:00
set_name(block_var->self, "gl_out");
else if (storage == StorageClassInput)
set_name(block_var->self, "gl_in");
2017-11-17 12:37:28 +00:00
if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
else
2017-11-17 12:37:28 +00:00
end_scope_decl(join(to_name(block_var->self), tessellation ? "[gl_MaxPatchVertices]" : "[]"));
}
2017-11-17 12:37:28 +00:00
else
end_scope_decl();
statement("");
}
void CompilerGLSL::declare_undefined_values()
{
bool emitted = false;
for (auto &id : ids)
{
if (id.get_type() != TypeUndef)
continue;
auto &undef = id.get<SPIRUndef>();
statement(variable_decl(get<SPIRType>(undef.basetype), to_name(undef.self), undef.self), ";");
emitted = true;
}
if (emitted)
statement("");
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::emit_resources()
{
auto &execution = get_entry_point();
replace_illegal_names();
// Legacy GL uses gl_FragData[], redeclare all fragment outputs
// with builtins.
if (execution.model == ExecutionModelFragment && is_legacy())
replace_fragment_outputs();
// Emit PLS blocks if we have such variables.
if (!pls_inputs.empty() || !pls_outputs.empty())
emit_pls();
// Emit custom gl_PerVertex for SSO compatibility.
if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment)
{
switch (execution.model)
{
case ExecutionModelGeometry:
case ExecutionModelTessellationControl:
case ExecutionModelTessellationEvaluation:
emit_declared_builtin_block(StorageClassInput, execution.model);
emit_declared_builtin_block(StorageClassOutput, execution.model);
break;
case ExecutionModelVertex:
emit_declared_builtin_block(StorageClassOutput, execution.model);
break;
default:
break;
}
}
else
{
// Need to redeclare clip/cull distance with explicit size to use them.
// SPIR-V mandates these builtins have a size declared.
const char *storage = execution.model == ExecutionModelFragment ? "in" : "out";
if (clip_distance_count != 0)
statement(storage, " float gl_ClipDistance[", clip_distance_count, "];");
if (cull_distance_count != 0)
statement(storage, " float gl_CullDistance[", cull_distance_count, "];");
if (clip_distance_count != 0 || cull_distance_count != 0)
statement("");
}
2018-03-01 11:30:55 +00:00
if (position_invariant)
{
statement("invariant gl_Position;");
statement("");
}
bool emitted = false;
// If emitted Vulkan GLSL,
// emit specialization constants as actual floats,
// spec op expressions will redirect to the constant name.
//
// TODO: If we have the fringe case that we create a spec constant which depends on a struct type,
// we'll have to deal with that, but there's currently no known way to express that.
if (options.vulkan_semantics)
{
for (auto &id : ids)
{
if (id.get_type() == TypeConstant)
{
auto &c = id.get<SPIRConstant>();
if (!c.specialization)
continue;
emit_specialization_constant(c);
emitted = true;
}
else if (id.get_type() == TypeConstantOp)
{
emit_specialization_constant_op(id.get<SPIRConstantOp>());
emitted = true;
}
}
}
if (emitted)
statement("");
emitted = false;
// Output all basic struct types which are not Block or BufferBlock as these are declared inplace
// when such variables are instantiated.
for (auto &id : ids)
{
if (id.get_type() == TypeType)
{
auto &type = id.get<SPIRType>();
if (type.basetype == SPIRType::Struct && type.array.empty() && !type.pointer &&
(!meta[type.self].decoration.decoration_flags.get(DecorationBlock) &&
!meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock)))
{
emit_struct(type);
}
}
}
// Output UBOs and SSBOs
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform;
bool has_block_flags = meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) &&
has_block_flags)
{
emit_buffer_block(var);
}
}
}
// Output push constant blocks
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant &&
!is_hidden_variable(var))
2016-08-26 10:58:50 +00:00
{
emit_push_constant_block(var);
2016-08-26 10:58:50 +00:00
}
}
}
bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics;
// Output Uniform Constants (values, samplers, images, etc).
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
// If we're remapping separate samplers and images, only emit the combined samplers.
if (skip_separate_image_sampler)
{
// Sampler buffers are always used without a sampler, and they will also work in regular GL.
bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer;
bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1;
bool separate_sampler = type.basetype == SPIRType::Sampler;
if (!sampler_buffer && (separate_image || separate_sampler))
continue;
}
if (var.storage != StorageClassFunction && type.pointer &&
(type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter) &&
!is_hidden_variable(var))
{
emit_uniform(var);
emitted = true;
}
}
}
if (emitted)
statement("");
emitted = false;
// Output in/out interfaces.
for (auto &id : ids)
{
if (id.get_type() == TypeVariable)
{
auto &var = id.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
if (var.storage != StorageClassFunction && type.pointer &&
2016-08-26 10:58:50 +00:00
(var.storage == StorageClassInput || var.storage == StorageClassOutput) &&
interface_variable_exists_in_entry_point(var.self) && !is_hidden_variable(var))
{
emit_interface_block(var);
emitted = true;
}
else if (is_builtin_variable(var))
{
// For gl_InstanceIndex emulation on GLES, the API user needs to
// supply this uniform.
if (meta[var.self].decoration.builtin_type == BuiltInInstanceIndex && !options.vulkan_semantics)
{
statement("uniform int SPIRV_Cross_BaseInstance;");
emitted = true;
}
}
}
}
// Global variables.
for (auto global : global_variables)
{
auto &var = get<SPIRVariable>(global);
if (var.storage != StorageClassOutput)
{
add_resource_name(var.self);
statement(variable_decl(var), ";");
emitted = true;
}
}
if (emitted)
statement("");
declare_undefined_values();
2016-03-02 17:09:16 +00:00
}
// Returns a string representation of the ID, usable as a function arg.
// Default is to simply return the expression representation fo the arg ID.
// Subclasses may override to modify the return value.
string CompilerGLSL::to_func_call_arg(uint32_t id)
{
2016-10-24 13:24:24 +00:00
return to_expression(id);
}
void CompilerGLSL::handle_invalid_expression(uint32_t id)
{
// We tried to read an invalidated expression.
// This means we need another pass at compilation, but next time, force temporary variables so that they cannot be invalidated.
forced_temporaries.insert(id);
force_recompile = true;
}
// Converts the format of the current expression from packed to unpacked,
// by wrapping the expression in a constructor of the appropriate type.
// GLSL does not support packed formats, so simply return the expression.
// Subclasses that do will override
string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &)
{
return expr_str;
}
// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all.
void CompilerGLSL::strip_enclosed_expression(string &expr)
{
if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')')
return;
// Have to make sure that our first and last parens actually enclose everything inside it.
uint32_t paren_count = 0;
for (auto &c : expr)
{
if (c == '(')
paren_count++;
else if (c == ')')
{
paren_count--;
// If we hit 0 and this is not the final char, our first and final parens actually don't
// enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d).
if (paren_count == 0 && &c != &expr.back())
return;
}
}
expr.erase(expr.size() - 1, 1);
expr.erase(begin(expr));
}
string CompilerGLSL::enclose_expression(const string &expr)
{
bool need_parens = false;
2017-07-24 08:17:19 +00:00
// If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back
// unary expressions.
if (!expr.empty())
{
2017-07-24 08:17:19 +00:00
auto c = expr.front();
if (c == '-' || c == '+' || c == '!' || c == '~')
need_parens = true;
2017-07-24 08:17:19 +00:00
}
if (!need_parens)
{
uint32_t paren_count = 0;
for (auto c : expr)
{
if (c == '(')
paren_count++;
else if (c == ')')
{
assert(paren_count);
paren_count--;
}
else if (c == ' ' && paren_count == 0)
{
need_parens = true;
break;
}
}
assert(paren_count == 0);
}
// If this expression contains any spaces which are not enclosed by parentheses,
// we need to enclose it so we can treat the whole string as an expression.
// This happens when two expressions have been part of a binary op earlier.
if (need_parens)
return join('(', expr, ')');
else
return expr;
}
// Just like to_expression except that we enclose the expression inside parentheses if needed.
string CompilerGLSL::to_enclosed_expression(uint32_t id)
{
return enclose_expression(to_expression(id));
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::to_expression(uint32_t id)
{
auto itr = invalid_expressions.find(id);
if (itr != end(invalid_expressions))
handle_invalid_expression(id);
if (ids[id].get_type() == TypeExpression)
{
// We might have a more complex chain of dependencies.
// A possible scenario is that we
//
// %1 = OpLoad
// %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1.
// %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that.
// OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions.
// %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before.
//
// However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store,
// and see that we should not forward reads of the original variable.
auto &expr = get<SPIRExpression>(id);
for (uint32_t dep : expr.expression_dependencies)
if (invalid_expressions.find(dep) != end(invalid_expressions))
handle_invalid_expression(dep);
}
track_expression_read(id);
switch (ids[id].get_type())
{
case TypeExpression:
{
auto &e = get<SPIRExpression>(id);
if (e.base_expression)
return to_enclosed_expression(e.base_expression) + e.expression;
else if (e.need_transpose)
{
bool is_packed = has_decoration(id, DecorationCPacked);
return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), is_packed);
}
else
{
if (force_recompile)
{
// During first compilation phase, certain expression patterns can trigger exponential growth of memory.
// Avoid this by returning dummy expressions during this phase.
// Do not use empty expressions here, because those are sentinels for other cases.
return "_";
}
else
return e.expression;
}
}
case TypeConstant:
{
auto &c = get<SPIRConstant>(id);
auto &type = get<SPIRType>(c.constant_type);
// WorkGroupSize may be a constant.
auto &dec = meta[c.self].decoration;
if (dec.builtin)
return builtin_to_glsl(dec.builtin_type, StorageClassGeneric);
else if (c.specialization && options.vulkan_semantics)
return to_name(id);
else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
return to_name(id);
else if (!type.array.empty() && !backend.can_declare_arrays_inline)
return to_name(id);
else
return constant_expression(c);
}
case TypeConstantOp:
if (options.vulkan_semantics)
return to_name(id);
else
return constant_op_expression(get<SPIRConstantOp>(id));
case TypeVariable:
{
auto &var = get<SPIRVariable>(id);
2016-12-16 11:43:12 +00:00
// If we try to use a loop variable before the loop header, we have to redirect it to the static expression,
// the variable has not been declared yet.
if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable))
return to_expression(var.static_expression);
else if (var.deferred_declaration)
{
var.deferred_declaration = false;
return variable_decl(var);
}
else if (flattened_structs.count(id))
{
return load_flattened_struct(var);
}
else
{
auto &dec = meta[var.self].decoration;
if (dec.builtin)
return builtin_to_glsl(dec.builtin_type, var.storage);
else
return to_name(id);
}
}
case TypeCombinedImageSampler:
// This type should never be taken the expression of directly.
// The intention is that texture sampling functions will extract the image and samplers
// separately and take their expressions as needed.
// GLSL does not use this type because OpSampledImage immediately creates a combined image sampler
// expression ala sampler2D(texture, sampler).
SPIRV_CROSS_THROW("Combined image samplers have no default expression representation.");
2017-08-10 15:12:48 +00:00
case TypeAccessChain:
// We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad.
SPIRV_CROSS_THROW("Access chains have no default expression representation.");
default:
return to_name(id);
}
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop)
2016-03-02 17:09:16 +00:00
{
auto &type = get<SPIRType>(cop.basetype);
bool binary = false;
bool unary = false;
string op;
if (is_legacy() && is_unsigned_opcode(cop.opcode))
SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
// TODO: Find a clean way to reuse emit_instruction.
switch (cop.opcode)
{
case OpSConvert:
case OpUConvert:
case OpFConvert:
op = type_to_glsl_constructor(type);
break;
#define BOP(opname, x) \
case Op##opname: \
binary = true; \
op = x; \
break
#define UOP(opname, x) \
case Op##opname: \
unary = true; \
op = x; \
break
UOP(SNegate, "-");
UOP(Not, "~");
BOP(IAdd, "+");
BOP(ISub, "-");
BOP(IMul, "*");
BOP(SDiv, "/");
BOP(UDiv, "/");
BOP(UMod, "%");
BOP(SMod, "%");
BOP(ShiftRightLogical, ">>");
BOP(ShiftRightArithmetic, ">>");
BOP(ShiftLeftLogical, "<<");
BOP(BitwiseOr, "|");
BOP(BitwiseXor, "^");
BOP(BitwiseAnd, "&");
BOP(LogicalOr, "||");
BOP(LogicalAnd, "&&");
UOP(LogicalNot, "!");
BOP(LogicalEqual, "==");
BOP(LogicalNotEqual, "!=");
BOP(IEqual, "==");
BOP(INotEqual, "!=");
BOP(ULessThan, "<");
BOP(SLessThan, "<");
BOP(ULessThanEqual, "<=");
BOP(SLessThanEqual, "<=");
BOP(UGreaterThan, ">");
BOP(SGreaterThan, ">");
BOP(UGreaterThanEqual, ">=");
BOP(SGreaterThanEqual, ">=");
case OpSelect:
{
if (cop.arguments.size() < 3)
SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
// This one is pretty annoying. It's triggered from
// uint(bool), int(bool) from spec constants.
// In order to preserve its compile-time constness in Vulkan GLSL,
// we need to reduce the OpSelect expression back to this simplified model.
// If we cannot, fail.
if (!to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0]))
{
SPIRV_CROSS_THROW(
"Cannot implement specialization constant op OpSelect. "
"Need trivial select implementation which can be resolved to a simple cast from boolean.");
}
break;
}
case OpVectorShuffle:
{
string expr = type_to_glsl_constructor(type);
expr += "(";
uint32_t left_components = expression_type(cop.arguments[0]).vecsize;
string left_arg = to_enclosed_expression(cop.arguments[0]);
string right_arg = to_enclosed_expression(cop.arguments[1]);
for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++)
{
uint32_t index = cop.arguments[i];
if (index >= left_components)
expr += right_arg + "." + "xyzw"[index - left_components];
else
expr += left_arg + "." + "xyzw"[index];
if (i + 1 < uint32_t(cop.arguments.size()))
expr += ", ";
}
expr += ")";
return expr;
}
case OpCompositeExtract:
{
2018-05-15 09:16:35 +00:00
auto expr =
access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), true, false);
return expr;
}
case OpCompositeInsert:
SPIRV_CROSS_THROW("OpCompositeInsert spec constant op is not supported.");
default:
// Some opcodes are unimplemented here, these are currently not possible to test from glslang.
SPIRV_CROSS_THROW("Unimplemented spec constant op.");
}
SPIRType::BaseType input_type;
2018-02-03 22:23:30 +00:00
bool skip_cast_if_equal_type = glsl_opcode_is_sign_invariant(cop.opcode);
switch (cop.opcode)
{
case OpIEqual:
case OpINotEqual:
input_type = SPIRType::Int;
break;
default:
input_type = type.basetype;
break;
}
#undef BOP
#undef UOP
if (binary)
{
if (cop.arguments.size() < 2)
SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
string cast_op0;
string cast_op1;
auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0],
cop.arguments[1], skip_cast_if_equal_type);
if (type.basetype != input_type && type.basetype != SPIRType::Boolean)
{
expected_type.basetype = input_type;
auto expr = bitcast_glsl_op(type, expected_type);
expr += '(';
expr += join(cast_op0, " ", op, " ", cast_op1);
expr += ')';
return expr;
}
else
return join("(", cast_op0, " ", op, " ", cast_op1, ")");
}
else if (unary)
{
if (cop.arguments.size() < 1)
SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
// Auto-bitcast to result type as needed.
// Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants.
return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")");
}
else
{
if (cop.arguments.size() < 1)
SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp.");
return join(op, "(", to_expression(cop.arguments[0]), ")");
}
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::constant_expression(const SPIRConstant &c)
{
if (!c.subconstants.empty())
{
// Handles Arrays and structures.
string res;
if (backend.use_initializer_list)
res = "{ ";
else
res = type_to_glsl_constructor(get<SPIRType>(c.constant_type)) + "(";
for (auto &elem : c.subconstants)
{
auto &subc = get<SPIRConstant>(elem);
if (subc.specialization && options.vulkan_semantics)
res += to_name(elem);
else
2017-08-03 12:32:07 +00:00
res += constant_expression(subc);
if (&elem != &c.subconstants.back())
res += ", ";
}
res += backend.use_initializer_list ? " }" : ")";
return res;
}
else if (c.columns() == 1)
{
return constant_expression_vector(c, 0);
}
else
{
string res = type_to_glsl(get<SPIRType>(c.constant_type)) + "(";
for (uint32_t col = 0; col < c.columns(); col++)
{
2017-09-27 14:10:29 +00:00
if (options.vulkan_semantics && c.specialization_constant_id(col) != 0)
res += to_name(c.specialization_constant_id(col));
else
res += constant_expression_vector(c, col);
if (col + 1 < c.columns())
res += ", ";
}
res += ")";
return res;
}
2016-03-02 17:09:16 +00:00
}
#ifdef _MSC_VER
// sprintf warning.
// We cannot rely on snprintf existing because, ..., MSVC.
#pragma warning(push)
#pragma warning(disable : 4996)
#endif
string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
{
string res;
float float_value = c.scalar_f16(col, row);
if (std::isnan(float_value) || std::isinf(float_value))
{
if (backend.half_literal_suffix)
{
// There is no uintBitsToFloat for 16-bit, so have to rely on legacy fallback here.
if (float_value == numeric_limits<float>::infinity())
res = join("(1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
else if (float_value == -numeric_limits<float>::infinity())
res = join("(-1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
else if (std::isnan(float_value))
res = join("(0.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
else
SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
}
else
{
SPIRType type;
type.basetype = SPIRType::Half;
type.vecsize = 1;
type.columns = 1;
if (float_value == numeric_limits<float>::infinity())
res = join(type_to_glsl(type), "(1.0 / 0.0)");
else if (float_value == -numeric_limits<float>::infinity())
res = join(type_to_glsl(type), "(-1.0 / 0.0)");
else if (std::isnan(float_value))
res = join(type_to_glsl(type), "(0.0 / 0.0)");
else
SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
}
}
else
{
if (backend.half_literal_suffix)
res = convert_to_string(float_value) + backend.half_literal_suffix;
else
{
// In HLSL (FXC), it's important to cast the literals to half precision right away.
// There is no literal for it.
SPIRType type;
type.basetype = SPIRType::Half;
type.vecsize = 1;
type.columns = 1;
res = join(type_to_glsl(type), "(", convert_to_string(float_value), ")");
}
}
return res;
}
string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
{
string res;
float float_value = c.scalar_f32(col, row);
2018-02-26 08:15:52 +00:00
if (std::isnan(float_value) || std::isinf(float_value))
{
// Use special representation.
if (!is_legacy())
{
SPIRType out_type;
SPIRType in_type;
out_type.basetype = SPIRType::Float;
in_type.basetype = SPIRType::UInt;
out_type.vecsize = 1;
in_type.vecsize = 1;
out_type.width = 32;
in_type.width = 32;
char print_buffer[32];
sprintf(print_buffer, "0x%xu", c.scalar(col, row));
res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
}
else
{
if (float_value == numeric_limits<float>::infinity())
{
if (backend.float_literal_suffix)
res = "(1.0f / 0.0f)";
else
res = "(1.0 / 0.0)";
}
else if (float_value == -numeric_limits<float>::infinity())
{
if (backend.float_literal_suffix)
res = "(-1.0f / 0.0f)";
else
res = "(-1.0 / 0.0)";
}
2018-02-26 08:15:52 +00:00
else if (std::isnan(float_value))
{
if (backend.float_literal_suffix)
res = "(0.0f / 0.0f)";
else
res = "(0.0 / 0.0)";
}
else
SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
}
}
else
{
res = convert_to_string(float_value);
if (backend.float_literal_suffix)
res += "f";
}
return res;
}
std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
{
string res;
2018-02-26 08:15:52 +00:00
double double_value = c.scalar_f64(col, row);
2018-02-26 08:15:52 +00:00
if (std::isnan(double_value) || std::isinf(double_value))
{
// Use special representation.
if (!is_legacy())
{
SPIRType out_type;
SPIRType in_type;
out_type.basetype = SPIRType::Double;
in_type.basetype = SPIRType::UInt64;
out_type.vecsize = 1;
in_type.vecsize = 1;
out_type.width = 64;
in_type.width = 64;
uint64_t u64_value = c.scalar_u64(col, row);
if (options.es)
SPIRV_CROSS_THROW("64-bit integers/float not supported in ES profile.");
require_extension_internal("GL_ARB_gpu_shader_int64");
char print_buffer[64];
sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value),
backend.long_long_literal_suffix ? "ull" : "ul");
res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, ")");
}
else
{
if (options.es)
SPIRV_CROSS_THROW("FP64 not supported in ES profile.");
if (options.version < 400)
require_extension_internal("GL_ARB_gpu_shader_fp64");
if (double_value == numeric_limits<double>::infinity())
{
if (backend.double_literal_suffix)
res = "(1.0lf / 0.0lf)";
else
res = "(1.0 / 0.0)";
}
else if (double_value == -numeric_limits<double>::infinity())
{
if (backend.double_literal_suffix)
res = "(-1.0lf / 0.0lf)";
else
res = "(-1.0 / 0.0)";
}
2018-02-26 08:15:52 +00:00
else if (std::isnan(double_value))
{
if (backend.double_literal_suffix)
res = "(0.0lf / 0.0lf)";
else
res = "(0.0 / 0.0)";
}
else
SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
}
}
else
{
res = convert_to_string(double_value);
if (backend.double_literal_suffix)
res += "lf";
}
return res;
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
2016-03-02 17:09:16 +00:00
string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector)
{
auto type = get<SPIRType>(c.constant_type);
type.columns = 1;
string res;
bool splat = backend.use_constructor_splatting && c.vector_size() > 1;
bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1;
2017-09-27 14:10:29 +00:00
if (!type_is_floating_point(type))
{
// Cannot swizzle literal integers as a special case.
swizzle_splat = false;
}
if (splat || swizzle_splat)
2017-09-27 14:10:29 +00:00
{
// Cannot use constant splatting if we have specialization constants somewhere in the vector.
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.specialization_constant_id(vector, i) != 0)
{
splat = false;
swizzle_splat = false;
2017-09-27 14:10:29 +00:00
break;
}
}
}
if (splat || swizzle_splat)
{
if (type.width == 64)
2016-07-27 08:59:00 +00:00
{
uint64_t ident = c.scalar_u64(vector, 0);
for (uint32_t i = 1; i < c.vector_size(); i++)
{
2016-07-27 08:59:00 +00:00
if (ident != c.scalar_u64(vector, i))
{
2016-07-27 08:59:00 +00:00
splat = false;
swizzle_splat = false;
break;
}
}
2016-07-27 08:59:00 +00:00
}
else
{
uint32_t ident = c.scalar(vector, 0);
for (uint32_t i = 1; i < c.vector_size(); i++)
{
2016-07-27 08:59:00 +00:00
if (ident != c.scalar(vector, i))
{
2016-07-27 08:59:00 +00:00
splat = false;
swizzle_splat = false;
}
}
2016-07-27 08:59:00 +00:00
}
}
if (c.vector_size() > 1 && !swizzle_splat)
res += type_to_glsl(type) + "(";
switch (type.basetype)
{
case SPIRType::Half:
if (splat || swizzle_splat)
{
res += convert_half_to_string(c, vector, 0);
if (swizzle_splat)
res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
res += to_name(c.specialization_constant_id(vector, i));
else
res += convert_half_to_string(c, vector, i);
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
case SPIRType::Float:
if (splat || swizzle_splat)
{
res += convert_float_to_string(c, vector, 0);
if (swizzle_splat)
res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
else
res += convert_float_to_string(c, vector, i);
2017-09-27 14:10:29 +00:00
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
2016-07-27 08:59:00 +00:00
case SPIRType::Double:
if (splat || swizzle_splat)
2016-07-27 08:59:00 +00:00
{
res += convert_double_to_string(c, vector, 0);
if (swizzle_splat)
res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res);
2016-07-27 08:59:00 +00:00
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
else
res += convert_double_to_string(c, vector, i);
2017-09-27 14:10:29 +00:00
2016-07-27 08:59:00 +00:00
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
2016-07-27 09:27:00 +00:00
case SPIRType::Int64:
if (splat)
{
res += convert_to_string(c.scalar_i64(vector, 0));
if (backend.long_long_literal_suffix)
res += "ll";
else
res += "l";
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
2016-07-27 09:27:00 +00:00
else
2017-09-27 14:10:29 +00:00
{
res += convert_to_string(c.scalar_i64(vector, i));
if (backend.long_long_literal_suffix)
res += "ll";
else
res += "l";
}
2016-07-27 09:27:00 +00:00
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
case SPIRType::UInt64:
if (splat)
{
res += convert_to_string(c.scalar_u64(vector, 0));
if (backend.long_long_literal_suffix)
res += "ull";
else
res += "ul";
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
2016-07-27 09:27:00 +00:00
else
2017-09-27 14:10:29 +00:00
{
res += convert_to_string(c.scalar_u64(vector, i));
if (backend.long_long_literal_suffix)
res += "ull";
else
res += "ul";
}
2016-07-27 09:27:00 +00:00
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
case SPIRType::UInt:
if (splat)
{
res += convert_to_string(c.scalar(vector, 0));
if (is_legacy())
{
// Fake unsigned constant literals with signed ones if possible.
// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
if (c.scalar_i32(vector, 0) < 0)
SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative.");
}
else if (backend.uint32_t_literal_suffix)
res += "u";
}
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
else
{
res += convert_to_string(c.scalar(vector, i));
if (is_legacy())
{
// Fake unsigned constant literals with signed ones if possible.
// Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed.
if (c.scalar_i32(vector, i) < 0)
2018-04-17 13:01:31 +00:00
SPIRV_CROSS_THROW(
"Tried to convert uint literal into int, but this made the literal negative.");
}
else if (backend.uint32_t_literal_suffix)
2017-09-27 14:10:29 +00:00
res += "u";
}
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
case SPIRType::Int:
if (splat)
res += convert_to_string(c.scalar_i32(vector, 0));
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
else
res += convert_to_string(c.scalar_i32(vector, i));
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
case SPIRType::Boolean:
if (splat)
res += c.scalar(vector, 0) ? "true" : "false";
else
{
for (uint32_t i = 0; i < c.vector_size(); i++)
{
if (options.vulkan_semantics && c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
2017-09-27 14:10:29 +00:00
res += to_name(c.specialization_constant_id(vector, i));
else
res += c.scalar(vector, i) ? "true" : "false";
if (i + 1 < c.vector_size())
res += ", ";
}
}
break;
default:
SPIRV_CROSS_THROW("Invalid constant expression basetype.");
}
if (c.vector_size() > 1 && !swizzle_splat)
res += ")";
return res;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
{
auto &type = get<SPIRType>(result_type);
auto flags = meta[result_id].decoration.decoration_flags;
// If we're declaring temporaries inside continue blocks,
// we must declare the temporary in the loop header so that the continue block can avoid declaring new variables.
if (current_continue_block && !hoisted_temporaries.count(result_id))
{
auto &header = get<SPIRBlock>(current_continue_block->loop_dominator);
if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
2016-05-23 11:30:02 +00:00
[result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
return tmp.first == result_type && tmp.second == result_id;
2017-11-17 12:38:29 +00:00
}) == end(header.declare_temporary))
{
header.declare_temporary.emplace_back(result_type, result_id);
hoisted_temporaries.insert(result_id);
force_recompile = true;
}
return join(to_name(result_id), " = ");
}
else if (hoisted_temporaries.count(result_id))
{
// The temporary has already been declared earlier, so just "declare" the temporary by writing to it.
return join(to_name(result_id), " = ");
}
else
{
// The result_id has not been made into an expression yet, so use flags interface.
add_local_variable_name(result_id);
return join(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = ");
}
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::expression_is_forwarded(uint32_t id)
{
return forwarded_temporaries.find(id) != end(forwarded_temporaries);
2016-03-02 17:09:16 +00:00
}
SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding,
bool suppress_usage_tracking)
2016-03-02 17:09:16 +00:00
{
if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries)))
{
// Just forward it without temporary.
// If the forward is trivial, we do not force flushing to temporary for this expression.
if (!suppress_usage_tracking)
forwarded_temporaries.insert(result_id);
return set<SPIRExpression>(result_id, rhs, result_type, true);
}
else
{
// If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are).
statement(declare_temporary(result_type, result_id), rhs, ";");
return set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
{
bool forward = should_forward(op0);
emit_op(result_type, result_id, join(op, to_enclosed_expression(op0)), forward);
inherit_expression_dependencies(result_id, op0);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
{
bool forward = should_forward(op0) && should_forward(op1);
emit_op(result_type, result_id, join(to_enclosed_expression(op0), " ", op, " ", to_enclosed_expression(op1)),
forward);
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
}
2017-03-24 14:00:48 +00:00
void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op)
{
auto &type = get<SPIRType>(result_type);
auto expr = type_to_glsl_constructor(type);
expr += '(';
for (uint32_t i = 0; i < type.vecsize; i++)
{
// Make sure to call to_expression multiple times to ensure
// that these expressions are properly flushed to temporaries if needed.
expr += op;
expr += to_enclosed_expression(operand);
expr += '.';
expr += index_to_swizzle(i);
if (i + 1 < type.vecsize)
expr += ", ";
}
expr += ')';
emit_op(result_type, result_id, expr, should_forward(operand));
inherit_expression_dependencies(result_id, operand);
}
void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
const char *op)
{
auto &type = get<SPIRType>(result_type);
auto expr = type_to_glsl_constructor(type);
expr += '(';
for (uint32_t i = 0; i < type.vecsize; i++)
{
// Make sure to call to_expression multiple times to ensure
// that these expressions are properly flushed to temporaries if needed.
expr += to_enclosed_expression(op0);
expr += '.';
expr += index_to_swizzle(i);
expr += ' ';
expr += op;
expr += ' ';
expr += to_enclosed_expression(op1);
expr += '.';
expr += index_to_swizzle(i);
if (i + 1 < type.vecsize)
expr += ", ";
}
expr += ')';
emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
2016-03-02 17:09:16 +00:00
}
SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type,
uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type)
{
auto &type0 = expression_type(op0);
auto &type1 = expression_type(op1);
// We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs.
// For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected
// since equality test is exactly the same.
bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type);
// Create a fake type so we can bitcast to it.
// We only deal with regular arithmetic types here like int, uints and so on.
SPIRType expected_type;
expected_type.basetype = input_type;
expected_type.vecsize = type0.vecsize;
expected_type.columns = type0.columns;
expected_type.width = type0.width;
if (cast)
{
cast_op0 = bitcast_glsl(expected_type, op0);
cast_op1 = bitcast_glsl(expected_type, op1);
}
else
{
// If we don't cast, our actual input type is that of the first (or second) argument.
cast_op0 = to_enclosed_expression(op0);
cast_op1 = to_enclosed_expression(op1);
input_type = type0.basetype;
}
return expected_type;
}
void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
{
string cast_op0, cast_op1;
auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
auto &out_type = get<SPIRType>(result_type);
// We might have casted away from the result type, so bitcast again.
// For example, arithmetic right shift with uint inputs.
// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
string expr;
if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
{
expected_type.basetype = input_type;
expr = bitcast_glsl_op(out_type, expected_type);
expr += '(';
expr += join(cast_op0, " ", op, " ", cast_op1);
expr += ')';
}
else
expr += join(cast_op0, " ", op, " ", cast_op1);
emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op)
{
bool forward = should_forward(op0);
emit_op(result_type, result_id, join(op, "(", to_expression(op0), ")"), forward);
inherit_expression_dependencies(result_id, op0);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
const char *op)
2016-03-02 17:09:16 +00:00
{
bool forward = should_forward(op0) && should_forward(op1);
emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ")"), forward);
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type)
{
string cast_op0, cast_op1;
auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type);
auto &out_type = get<SPIRType>(result_type);
// Special case boolean outputs since relational opcodes output booleans instead of int/uint.
string expr;
if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean)
{
expected_type.basetype = input_type;
expr = bitcast_glsl_op(out_type, expected_type);
expr += '(';
expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
expr += ')';
}
else
{
expr += join(op, "(", cast_op0, ", ", cast_op1, ")");
}
emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1));
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
}
void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
uint32_t op2, const char *op)
2016-03-02 17:09:16 +00:00
{
bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2);
emit_op(result_type, result_id,
join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ")"), forward);
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
inherit_expression_dependencies(result_id, op2);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1,
uint32_t op2, uint32_t op3, const char *op)
2016-03-02 17:09:16 +00:00
{
bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
2017-11-17 12:38:29 +00:00
emit_op(result_type, result_id,
join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ", ",
to_expression(op3), ")"),
forward);
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
inherit_expression_dependencies(result_id, op2);
inherit_expression_dependencies(result_id, op3);
2016-03-02 17:09:16 +00:00
}
// EXT_shader_texture_lod only concerns fragment shaders so lod tex functions
// are not allowed in ES 2 vertex shaders. But SPIR-V only supports lod tex
// functions in vertex shaders so we revert those back to plain calls when
// the lod is a constant value of zero.
bool CompilerGLSL::check_explicit_lod_allowed(uint32_t lod)
{
auto &execution = get_entry_point();
bool allowed = !is_legacy_es() || execution.model == ExecutionModelFragment;
if (!allowed && lod != 0)
{
auto *lod_constant = maybe_get<SPIRConstant>(lod);
if (!lod_constant || lod_constant->scalar_f32() != 0.0f)
{
SPIRV_CROSS_THROW("Explicit lod not allowed in legacy ES non-fragment shaders.");
}
}
return allowed;
}
string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t lod)
2016-03-02 17:09:16 +00:00
{
const char *type;
switch (imgtype.image.dim)
{
case spv::Dim1D:
type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D";
break;
case spv::Dim2D:
type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D";
break;
case spv::Dim3D:
type = "3D";
break;
case spv::DimCube:
type = "Cube";
break;
case spv::DimBuffer:
type = "Buffer";
break;
case spv::DimSubpassData:
type = "2D";
break;
default:
type = "";
break;
}
bool use_explicit_lod = check_explicit_lod_allowed(lod);
if (op == "textureLod" || op == "textureProjLod" || op == "textureGrad")
{
if (is_legacy_es())
{
if (use_explicit_lod)
require_extension_internal("GL_EXT_shader_texture_lod");
}
else if (is_legacy())
require_extension_internal("GL_ARB_shader_texture_lod");
}
2016-09-17 13:56:23 +00:00
if (op == "texture")
return join("texture", type);
else if (op == "textureLod")
{
if (use_explicit_lod)
return join("texture", type, is_legacy_es() ? "LodEXT" : "Lod");
else
return join("texture", type);
}
else if (op == "textureProj")
return join("texture", type, "Proj");
else if (op == "textureGrad")
return join("texture", type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad");
else if (op == "textureProjLod")
{
if (use_explicit_lod)
return join("texture", type, is_legacy_es() ? "ProjLodEXT" : "ProjLod");
else
return join("texture", type);
}
else
{
SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op));
}
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp)
{
auto *cleft = maybe_get<SPIRConstant>(left);
auto *cright = maybe_get<SPIRConstant>(right);
auto &lerptype = expression_type(lerp);
// If our targets aren't constants, we cannot use construction.
if (!cleft || !cright)
return false;
// If our targets are spec constants, we cannot use construction.
if (cleft->specialization || cright->specialization)
return false;
// We can only use trivial construction if we have a scalar
// (should be possible to do it for vectors as well, but that is overkill for now).
if (lerptype.basetype != SPIRType::Boolean || lerptype.vecsize > 1)
return false;
// If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor.
bool ret = false;
switch (type.basetype)
{
case SPIRType::Int:
case SPIRType::UInt:
ret = cleft->scalar() == 0 && cright->scalar() == 1;
break;
case SPIRType::Half:
ret = cleft->scalar_f16() == 0.0f && cright->scalar_f16() == 1.0f;
break;
case SPIRType::Float:
ret = cleft->scalar_f32() == 0.0f && cright->scalar_f32() == 1.0f;
break;
case SPIRType::Double:
ret = cleft->scalar_f64() == 0.0 && cright->scalar_f64() == 1.0;
break;
case SPIRType::Int64:
case SPIRType::UInt64:
ret = cleft->scalar_u64() == 0 && cright->scalar_u64() == 1;
break;
default:
break;
}
if (ret)
op = type_to_glsl_constructor(type);
return ret;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp)
2016-03-02 17:09:16 +00:00
{
auto &lerptype = expression_type(lerp);
auto &restype = get<SPIRType>(result_type);
string mix_op;
bool has_boolean_mix = backend.boolean_mix_support &&
((options.es && options.version >= 310) || (!options.es && options.version >= 450));
bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp);
// Cannot use boolean mix when the lerp argument is just one boolean,
// fall back to regular trinary statements.
if (lerptype.vecsize == 1)
has_boolean_mix = false;
// If we can reduce the mix to a simple cast, do so.
// This helps for cases like int(bool), uint(bool) which is implemented with
// OpSelect bool 1 0.
if (trivial_mix)
{
emit_unary_func_op(result_type, id, lerp, mix_op.c_str());
}
else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean)
{
// Boolean mix not supported on desktop without extension.
// Was added in OpenGL 4.5 with ES 3.1 compat.
//
// Could use GL_EXT_shader_integer_mix on desktop at least,
// but Apple doesn't support it. :(
// Just implement it as ternary expressions.
string expr;
if (lerptype.vecsize == 1)
expr = join(to_enclosed_expression(lerp), " ? ", to_enclosed_expression(right), " : ",
to_enclosed_expression(left));
else
{
2016-05-23 11:30:02 +00:00
auto swiz = [this](uint32_t expression, uint32_t i) {
return join(to_enclosed_expression(expression), ".", index_to_swizzle(i));
};
expr = type_to_glsl_constructor(restype);
expr += "(";
for (uint32_t i = 0; i < restype.vecsize; i++)
{
expr += swiz(lerp, i);
expr += " ? ";
expr += swiz(right, i);
expr += " : ";
expr += swiz(left, i);
if (i + 1 < restype.vecsize)
expr += ", ";
}
expr += ")";
}
emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp));
inherit_expression_dependencies(id, left);
inherit_expression_dependencies(id, right);
inherit_expression_dependencies(id, lerp);
}
else
emit_trinary_func_op(result_type, id, left, right, lerp, "mix");
2016-03-02 17:09:16 +00:00
}
2016-09-11 10:54:08 +00:00
string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id)
{
// Keep track of the array indices we have used to load the image.
// We'll need to use the same array index into the combined image sampler array.
auto image_expr = to_expression(image_id);
string array_expr;
auto array_index = image_expr.find_first_of('[');
if (array_index != string::npos)
array_expr = image_expr.substr(array_index, string::npos);
2016-09-11 10:54:08 +00:00
auto &args = current_function->arguments;
// For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect
// all possible combinations into new sampler2D uniforms.
auto *image = maybe_get_backing_variable(image_id);
auto *samp = maybe_get_backing_variable(samp_id);
if (image)
image_id = image->self;
if (samp)
samp_id = samp->self;
auto image_itr = find_if(begin(args), end(args),
[image_id](const SPIRFunction::Parameter &param) { return param.id == image_id; });
auto sampler_itr = find_if(begin(args), end(args),
[samp_id](const SPIRFunction::Parameter &param) { return param.id == samp_id; });
if (image_itr != end(args) || sampler_itr != end(args))
{
// If any parameter originates from a parameter, we will find it in our argument list.
bool global_image = image_itr == end(args);
2016-09-11 10:54:08 +00:00
bool global_sampler = sampler_itr == end(args);
uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args));
uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args));
2016-09-11 10:54:08 +00:00
auto &combined = current_function->combined_parameters;
auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) {
return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid &&
p.sampler_id == sid;
2016-09-11 10:54:08 +00:00
});
if (itr != end(combined))
return to_expression(itr->id) + array_expr;
2016-09-11 10:54:08 +00:00
else
{
SPIRV_CROSS_THROW(
2016-09-11 10:54:08 +00:00
"Cannot find mapping for combined sampler parameter, was build_combined_image_samplers() used "
"before compile() was called?");
}
}
else
{
2016-09-11 10:54:08 +00:00
// For global sampler2D, look directly at the global remapping table.
auto &mapping = combined_image_samplers;
auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) {
return combined.image_id == image_id && combined.sampler_id == samp_id;
});
2016-09-11 10:54:08 +00:00
if (itr != end(combined_image_samplers))
return to_expression(itr->combined_id) + array_expr;
else
{
SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used "
"before compile() was called?");
}
}
}
void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id)
{
2016-09-11 10:54:08 +00:00
if (options.vulkan_semantics && combined_image_samplers.empty())
{
emit_binary_func_op(result_type, result_id, image_id, samp_id,
type_to_glsl(get<SPIRType>(result_type)).c_str());
// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
forwarded_temporaries.erase(result_id);
2016-09-11 10:54:08 +00:00
}
else
{
// Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types.
emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true);
}
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::emit_texture_op(const Instruction &i)
{
auto ops = stream(i);
auto op = static_cast<Op>(i.op);
uint32_t length = i.length;
if (i.offset + length > spirv.size())
SPIRV_CROSS_THROW("Compiler::parse() opcode out of range.");
vector<uint32_t> inherited_expressions;
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t img = ops[2];
uint32_t coord = ops[3];
uint32_t dref = 0;
uint32_t comp = 0;
bool gather = false;
bool proj = false;
bool fetch = false;
const uint32_t *opt = nullptr;
inherited_expressions.push_back(coord);
switch (op)
{
case OpImageSampleDrefImplicitLod:
case OpImageSampleDrefExplicitLod:
dref = ops[4];
opt = &ops[5];
length -= 5;
break;
case OpImageSampleProjDrefImplicitLod:
case OpImageSampleProjDrefExplicitLod:
dref = ops[4];
opt = &ops[5];
length -= 5;
proj = true;
break;
case OpImageDrefGather:
dref = ops[4];
opt = &ops[5];
length -= 5;
gather = true;
break;
case OpImageGather:
comp = ops[4];
opt = &ops[5];
length -= 5;
gather = true;
break;
case OpImageFetch:
case OpImageRead: // Reads == fetches in Metal (other langs will not get here)
opt = &ops[4];
length -= 4;
fetch = true;
break;
case OpImageSampleProjImplicitLod:
case OpImageSampleProjExplicitLod:
opt = &ops[4];
length -= 4;
proj = true;
break;
default:
opt = &ops[4];
length -= 4;
break;
}
// Bypass pointers because we need the real image struct
auto &type = expression_type(img);
auto &imgtype = get<SPIRType>(type.self);
uint32_t coord_components = 0;
switch (imgtype.image.dim)
{
case spv::Dim1D:
coord_components = 1;
break;
case spv::Dim2D:
coord_components = 2;
break;
case spv::Dim3D:
coord_components = 3;
break;
case spv::DimCube:
coord_components = 3;
break;
case spv::DimBuffer:
coord_components = 1;
break;
default:
coord_components = 2;
break;
}
if (dref)
inherited_expressions.push_back(dref);
if (proj)
coord_components++;
if (imgtype.image.arrayed)
coord_components++;
uint32_t bias = 0;
uint32_t lod = 0;
uint32_t grad_x = 0;
uint32_t grad_y = 0;
uint32_t coffset = 0;
uint32_t offset = 0;
uint32_t coffsets = 0;
uint32_t sample = 0;
uint32_t flags = 0;
if (length)
{
flags = *opt++;
length--;
}
2016-05-23 11:30:02 +00:00
auto test = [&](uint32_t &v, uint32_t flag) {
if (length && (flags & flag))
{
v = *opt++;
inherited_expressions.push_back(v);
length--;
}
};
test(bias, ImageOperandsBiasMask);
test(lod, ImageOperandsLodMask);
test(grad_x, ImageOperandsGradMask);
test(grad_y, ImageOperandsGradMask);
test(coffset, ImageOperandsConstOffsetMask);
test(offset, ImageOperandsOffsetMask);
test(coffsets, ImageOperandsConstOffsetsMask);
test(sample, ImageOperandsSampleMask);
string expr;
bool forward = false;
2017-01-13 15:32:54 +00:00
expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset),
(!!grad_x || !!grad_y), !!dref, lod);
expr += "(";
expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod,
coffset, offset, bias, comp, sample, &forward);
expr += ")";
emit_op(result_type, id, expr, forward);
for (auto &inherit : inherited_expressions)
inherit_expression_dependencies(id, inherit);
switch (op)
{
case OpImageSampleDrefImplicitLod:
case OpImageSampleImplicitLod:
case OpImageSampleProjImplicitLod:
case OpImageSampleProjDrefImplicitLod:
register_control_dependent_expression(id);
break;
default:
break;
}
}
// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
// For some subclasses, the function is a method on the specified image.
2017-01-08 13:52:57 +00:00
string CompilerGLSL::to_function_name(uint32_t, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj,
bool has_array_offsets, bool has_offset, bool has_grad, bool, uint32_t lod)
{
string fname;
// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
// To emulate this, we will have to use textureGrad with a constant gradient of 0.
// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
bool workaround_lod_array_shadow_as_grad = false;
if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
imgtype.image.depth && lod)
{
auto *constant_lod = maybe_get<SPIRConstant>(lod);
if (!constant_lod || constant_lod->scalar_f32() != 0.0f)
2017-06-23 07:46:15 +00:00
SPIRV_CROSS_THROW(
2017-06-23 07:50:01 +00:00
"textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be expressed in GLSL.");
workaround_lod_array_shadow_as_grad = true;
}
if (is_fetch)
fname += "texelFetch";
else
{
fname += "texture";
if (is_gather)
fname += "Gather";
if (has_array_offsets)
fname += "Offsets";
if (is_proj)
fname += "Proj";
if (has_grad || workaround_lod_array_shadow_as_grad)
fname += "Grad";
if (!!lod && !workaround_lod_array_shadow_as_grad)
fname += "Lod";
}
if (has_offset)
fname += "Offset";
return is_legacy() ? legacy_tex_op(fname, imgtype, lod) : fname;
}
std::string CompilerGLSL::convert_separate_image_to_combined(uint32_t id)
{
auto &imgtype = expression_type(id);
auto *var = maybe_get_backing_variable(id);
// If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler.
if (var)
{
auto &type = get<SPIRType>(var->basetype);
if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer)
{
if (!dummy_sampler_id)
SPIRV_CROSS_THROW(
2018-04-30 10:46:21 +00:00
"Cannot find dummy sampler ID. Was build_dummy_sampler_for_combined_images() called?");
if (options.vulkan_semantics)
{
auto sampled_type = imgtype;
sampled_type.basetype = SPIRType::SampledImage;
2018-04-30 10:46:21 +00:00
return join(type_to_glsl(sampled_type), "(", to_expression(id), ", ", to_expression(dummy_sampler_id),
")");
}
else
return to_combined_image_sampler(id, dummy_sampler_id);
}
}
return to_expression(id);
}
// Returns the function args for a texture sampling function for the specified image and sampling characteristics.
2018-04-30 10:46:21 +00:00
string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather,
bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref,
uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset,
uint32_t bias, uint32_t comp, uint32_t sample, bool *p_forward)
{
string farg_str;
if (is_fetch)
farg_str = convert_separate_image_to_combined(img);
else
farg_str = to_expression(img);
bool swizz_func = backend.swizzle_is_function;
2016-05-23 11:30:02 +00:00
auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * {
if (comps == in_comps)
return "";
switch (comps)
{
case 1:
return ".x";
case 2:
return swizz_func ? ".xy()" : ".xy";
case 3:
return swizz_func ? ".xyz()" : ".xyz";
default:
return "";
}
};
bool forward = should_forward(coord);
// The IR can give us more components than we need, so chop them off as needed.
auto swizzle_expr = swizzle(coord_components, expression_type(coord).vecsize);
// Only enclose the UV expression if needed.
auto coord_expr = (*swizzle_expr == '\0') ? to_expression(coord) : (to_enclosed_expression(coord) + swizzle_expr);
// texelFetch only takes int, not uint.
auto &coord_type = expression_type(coord);
if (coord_type.basetype == SPIRType::UInt)
{
auto expected_type = coord_type;
expected_type.basetype = SPIRType::Int;
coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr);
}
// textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason.
// To emulate this, we will have to use textureGrad with a constant gradient of 0.
// The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code.
// This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube.
2017-06-23 07:50:01 +00:00
bool workaround_lod_array_shadow_as_grad =
((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) &&
imgtype.image.depth && lod;
if (dref)
{
forward = forward && should_forward(dref);
// SPIR-V splits dref and coordinate.
2018-04-30 10:45:23 +00:00
if (is_gather || coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather.
{
farg_str += ", ";
farg_str += to_expression(coord);
farg_str += ", ";
farg_str += to_expression(dref);
}
2017-07-31 08:05:32 +00:00
else if (is_proj)
{
// Have to reshuffle so we get vec4(coord, dref, proj), special case.
// Other shading languages splits up the arguments for coord and compare value like SPIR-V.
// The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow.
farg_str += ", vec4(";
if (imgtype.image.dim == Dim1D)
{
// Could reuse coord_expr, but we will mess up the temporary usage checking.
farg_str += to_enclosed_expression(coord) + ".x";
farg_str += ", ";
farg_str += "0.0, ";
farg_str += to_expression(dref);
farg_str += ", ";
farg_str += to_enclosed_expression(coord) + ".y)";
}
else if (imgtype.image.dim == Dim2D)
{
// Could reuse coord_expr, but we will mess up the temporary usage checking.
farg_str += to_enclosed_expression(coord) + (swizz_func ? ".xy()" : ".xy");
farg_str += ", ";
farg_str += to_expression(dref);
farg_str += ", ";
farg_str += to_enclosed_expression(coord) + ".z)";
}
else
SPIRV_CROSS_THROW("Invalid type for textureProj with shadow.");
}
else
{
// Create a composite which merges coord/dref into a single vector.
auto type = expression_type(coord);
type.vecsize = coord_components + 1;
farg_str += ", ";
farg_str += type_to_glsl_constructor(type);
farg_str += "(";
farg_str += coord_expr;
farg_str += ", ";
farg_str += to_expression(dref);
farg_str += ")";
}
}
else
{
farg_str += ", ";
farg_str += coord_expr;
}
if (grad_x || grad_y)
{
forward = forward && should_forward(grad_x);
forward = forward && should_forward(grad_y);
farg_str += ", ";
farg_str += to_expression(grad_x);
farg_str += ", ";
farg_str += to_expression(grad_y);
}
if (lod)
{
if (workaround_lod_array_shadow_as_grad)
{
// Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0.
// Implementing this as plain texture() is not safe on some implementations.
if (imgtype.image.dim == Dim2D)
farg_str += ", vec2(0.0), vec2(0.0)";
else if (imgtype.image.dim == DimCube)
farg_str += ", vec3(0.0), vec3(0.0)";
}
else
{
if (check_explicit_lod_allowed(lod))
{
forward = forward && should_forward(lod);
farg_str += ", ";
farg_str += to_expression(lod);
}
}
}
else if (is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms)
{
// Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default.
farg_str += ", 0";
}
if (coffset)
{
forward = forward && should_forward(coffset);
farg_str += ", ";
farg_str += to_expression(coffset);
}
else if (offset)
{
forward = forward && should_forward(offset);
farg_str += ", ";
farg_str += to_expression(offset);
}
if (bias)
{
forward = forward && should_forward(bias);
farg_str += ", ";
farg_str += to_expression(bias);
}
if (comp)
{
forward = forward && should_forward(comp);
farg_str += ", ";
farg_str += to_expression(comp);
}
2016-06-22 10:35:58 +00:00
if (sample)
{
farg_str += ", ";
farg_str += to_expression(sample);
2016-06-22 10:35:58 +00:00
}
*p_forward = forward;
return farg_str;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t)
{
auto op = static_cast<GLSLstd450>(eop);
if (is_legacy() && is_unsigned_glsl_opcode(op))
SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets.");
switch (op)
{
// FP fiddling
case GLSLstd450Round:
emit_unary_func_op(result_type, id, args[0], "round");
break;
2016-07-27 08:59:00 +00:00
case GLSLstd450RoundEven:
if ((options.es && options.version >= 300) || (!options.es && options.version >= 130))
emit_unary_func_op(result_type, id, args[0], "roundEven");
else
SPIRV_CROSS_THROW("roundEven supported only in ESSL 300 and GLSL 130 and up.");
2016-07-27 08:59:00 +00:00
break;
case GLSLstd450Trunc:
emit_unary_func_op(result_type, id, args[0], "trunc");
break;
case GLSLstd450SAbs:
case GLSLstd450FAbs:
emit_unary_func_op(result_type, id, args[0], "abs");
break;
case GLSLstd450SSign:
case GLSLstd450FSign:
emit_unary_func_op(result_type, id, args[0], "sign");
break;
case GLSLstd450Floor:
emit_unary_func_op(result_type, id, args[0], "floor");
break;
case GLSLstd450Ceil:
emit_unary_func_op(result_type, id, args[0], "ceil");
break;
case GLSLstd450Fract:
emit_unary_func_op(result_type, id, args[0], "fract");
break;
case GLSLstd450Radians:
emit_unary_func_op(result_type, id, args[0], "radians");
break;
case GLSLstd450Degrees:
emit_unary_func_op(result_type, id, args[0], "degrees");
break;
case GLSLstd450Fma:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma");
break;
case GLSLstd450Modf:
register_call_out_argument(args[1]);
forced_temporaries.insert(id);
emit_binary_func_op(result_type, id, args[0], args[1], "modf");
break;
2017-09-04 08:24:21 +00:00
case GLSLstd450ModfStruct:
{
forced_temporaries.insert(id);
auto &type = get<SPIRType>(result_type);
auto flags = meta[id].decoration.decoration_flags;
statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ",
to_expression(id), ".", to_member_name(type, 1), ");");
break;
}
// Minmax
case GLSLstd450UMin:
case GLSLstd450FMin:
case GLSLstd450SMin:
emit_binary_func_op(result_type, id, args[0], args[1], "min");
break;
case GLSLstd450FMax:
case GLSLstd450UMax:
case GLSLstd450SMax:
emit_binary_func_op(result_type, id, args[0], args[1], "max");
break;
case GLSLstd450FClamp:
case GLSLstd450UClamp:
case GLSLstd450SClamp:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp");
break;
// Trig
case GLSLstd450Sin:
emit_unary_func_op(result_type, id, args[0], "sin");
break;
case GLSLstd450Cos:
emit_unary_func_op(result_type, id, args[0], "cos");
break;
case GLSLstd450Tan:
emit_unary_func_op(result_type, id, args[0], "tan");
break;
case GLSLstd450Asin:
emit_unary_func_op(result_type, id, args[0], "asin");
break;
case GLSLstd450Acos:
emit_unary_func_op(result_type, id, args[0], "acos");
break;
case GLSLstd450Atan:
emit_unary_func_op(result_type, id, args[0], "atan");
break;
case GLSLstd450Sinh:
emit_unary_func_op(result_type, id, args[0], "sinh");
break;
case GLSLstd450Cosh:
emit_unary_func_op(result_type, id, args[0], "cosh");
break;
case GLSLstd450Tanh:
emit_unary_func_op(result_type, id, args[0], "tanh");
break;
case GLSLstd450Asinh:
emit_unary_func_op(result_type, id, args[0], "asinh");
break;
case GLSLstd450Acosh:
emit_unary_func_op(result_type, id, args[0], "acosh");
break;
case GLSLstd450Atanh:
emit_unary_func_op(result_type, id, args[0], "atanh");
break;
case GLSLstd450Atan2:
emit_binary_func_op(result_type, id, args[0], args[1], "atan");
break;
// Exponentials
case GLSLstd450Pow:
emit_binary_func_op(result_type, id, args[0], args[1], "pow");
break;
case GLSLstd450Exp:
emit_unary_func_op(result_type, id, args[0], "exp");
break;
case GLSLstd450Log:
emit_unary_func_op(result_type, id, args[0], "log");
break;
case GLSLstd450Exp2:
emit_unary_func_op(result_type, id, args[0], "exp2");
break;
case GLSLstd450Log2:
emit_unary_func_op(result_type, id, args[0], "log2");
break;
case GLSLstd450Sqrt:
emit_unary_func_op(result_type, id, args[0], "sqrt");
break;
case GLSLstd450InverseSqrt:
emit_unary_func_op(result_type, id, args[0], "inversesqrt");
break;
// Matrix math
case GLSLstd450Determinant:
emit_unary_func_op(result_type, id, args[0], "determinant");
break;
case GLSLstd450MatrixInverse:
emit_unary_func_op(result_type, id, args[0], "inverse");
break;
// Lerping
case GLSLstd450FMix:
case GLSLstd450IMix:
{
emit_mix_op(result_type, id, args[0], args[1], args[2]);
break;
}
case GLSLstd450Step:
emit_binary_func_op(result_type, id, args[0], args[1], "step");
break;
case GLSLstd450SmoothStep:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep");
break;
// Packing
case GLSLstd450Frexp:
register_call_out_argument(args[1]);
forced_temporaries.insert(id);
emit_binary_func_op(result_type, id, args[0], args[1], "frexp");
break;
2017-09-04 08:24:21 +00:00
case GLSLstd450FrexpStruct:
{
forced_temporaries.insert(id);
auto &type = get<SPIRType>(result_type);
auto flags = meta[id].decoration.decoration_flags;
statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(id)), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ",
to_expression(id), ".", to_member_name(type, 1), ");");
break;
}
case GLSLstd450Ldexp:
emit_binary_func_op(result_type, id, args[0], args[1], "ldexp");
break;
case GLSLstd450PackSnorm4x8:
emit_unary_func_op(result_type, id, args[0], "packSnorm4x8");
break;
case GLSLstd450PackUnorm4x8:
emit_unary_func_op(result_type, id, args[0], "packUnorm4x8");
break;
case GLSLstd450PackSnorm2x16:
emit_unary_func_op(result_type, id, args[0], "packSnorm2x16");
break;
case GLSLstd450PackUnorm2x16:
emit_unary_func_op(result_type, id, args[0], "packUnorm2x16");
break;
case GLSLstd450PackHalf2x16:
emit_unary_func_op(result_type, id, args[0], "packHalf2x16");
break;
case GLSLstd450UnpackSnorm4x8:
emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8");
break;
case GLSLstd450UnpackUnorm4x8:
emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8");
break;
case GLSLstd450UnpackSnorm2x16:
emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16");
break;
case GLSLstd450UnpackUnorm2x16:
emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16");
break;
case GLSLstd450UnpackHalf2x16:
emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16");
break;
2016-07-27 08:59:00 +00:00
case GLSLstd450PackDouble2x32:
emit_unary_func_op(result_type, id, args[0], "packDouble2x32");
break;
case GLSLstd450UnpackDouble2x32:
emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32");
break;
// Vector math
case GLSLstd450Length:
emit_unary_func_op(result_type, id, args[0], "length");
break;
case GLSLstd450Distance:
emit_binary_func_op(result_type, id, args[0], args[1], "distance");
break;
case GLSLstd450Cross:
emit_binary_func_op(result_type, id, args[0], args[1], "cross");
break;
case GLSLstd450Normalize:
emit_unary_func_op(result_type, id, args[0], "normalize");
break;
case GLSLstd450FaceForward:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward");
break;
case GLSLstd450Reflect:
emit_binary_func_op(result_type, id, args[0], args[1], "reflect");
break;
case GLSLstd450Refract:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract");
break;
// Bit-fiddling
case GLSLstd450FindILsb:
emit_unary_func_op(result_type, id, args[0], "findLSB");
break;
case GLSLstd450FindSMsb:
case GLSLstd450FindUMsb:
emit_unary_func_op(result_type, id, args[0], "findMSB");
break;
// Multisampled varying
case GLSLstd450InterpolateAtCentroid:
emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid");
break;
case GLSLstd450InterpolateAtSample:
emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample");
break;
case GLSLstd450InterpolateAtOffset:
emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset");
break;
default:
statement("// unimplemented GLSL op ", eop);
break;
}
2016-03-02 17:09:16 +00:00
}
2017-11-22 18:27:03 +00:00
void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
uint32_t)
2017-11-21 13:04:57 +00:00
{
require_extension_internal("GL_AMD_shader_ballot");
2017-11-21 13:04:57 +00:00
2017-11-22 18:27:03 +00:00
enum AMDShaderBallot
{
2017-11-21 13:04:57 +00:00
SwizzleInvocationsAMD = 1,
SwizzleInvocationsMaskedAMD = 2,
WriteInvocationAMD = 3,
MbcntAMD = 4
};
auto op = static_cast<AMDShaderBallot>(eop);
switch (op)
{
case SwizzleInvocationsAMD:
emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
register_control_dependent_expression(id);
2017-11-21 13:04:57 +00:00
break;
case SwizzleInvocationsMaskedAMD:
emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
register_control_dependent_expression(id);
2017-11-21 13:04:57 +00:00
break;
case WriteInvocationAMD:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
register_control_dependent_expression(id);
2017-11-21 13:04:57 +00:00
break;
case MbcntAMD:
emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
register_control_dependent_expression(id);
2017-11-21 13:04:57 +00:00
break;
default:
statement("// unimplemented SPV AMD shader ballot op ", eop);
break;
}
}
2017-11-22 18:27:03 +00:00
void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
const uint32_t *args, uint32_t)
2017-11-21 13:04:57 +00:00
{
require_extension_internal("GL_AMD_shader_explicit_vertex_parameter");
2017-11-21 13:04:57 +00:00
2017-11-22 18:27:03 +00:00
enum AMDShaderExplicitVertexParameter
{
2017-11-21 13:04:57 +00:00
InterpolateAtVertexAMD = 1
};
auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
switch (op)
{
case InterpolateAtVertexAMD:
emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
break;
default:
statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
break;
}
}
2017-11-22 18:27:03 +00:00
void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
const uint32_t *args, uint32_t)
2017-11-21 13:04:57 +00:00
{
require_extension_internal("GL_AMD_shader_trinary_minmax");
2017-11-21 13:04:57 +00:00
2017-11-22 18:27:03 +00:00
enum AMDShaderTrinaryMinMax
{
2017-11-21 13:04:57 +00:00
FMin3AMD = 1,
UMin3AMD = 2,
SMin3AMD = 3,
FMax3AMD = 4,
UMax3AMD = 5,
SMax3AMD = 6,
FMid3AMD = 7,
UMid3AMD = 8,
SMid3AMD = 9
};
auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
switch (op)
{
case FMin3AMD:
case UMin3AMD:
case SMin3AMD:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
break;
case FMax3AMD:
case UMax3AMD:
case SMax3AMD:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
break;
case FMid3AMD:
case UMid3AMD:
case SMid3AMD:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
break;
default:
statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
break;
}
}
2017-11-22 18:27:03 +00:00
void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
uint32_t)
2017-11-21 13:04:57 +00:00
{
require_extension_internal("GL_AMD_gcn_shader");
2017-11-21 13:04:57 +00:00
2017-11-22 18:27:03 +00:00
enum AMDGCNShader
{
2017-11-21 13:04:57 +00:00
CubeFaceIndexAMD = 1,
CubeFaceCoordAMD = 2,
TimeAMD = 3
};
auto op = static_cast<AMDGCNShader>(eop);
switch (op)
{
case CubeFaceIndexAMD:
emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
break;
case CubeFaceCoordAMD:
emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
break;
case TimeAMD:
{
string expr = "timeAMD()";
emit_op(result_type, id, expr, true);
register_control_dependent_expression(id);
2017-11-21 13:04:57 +00:00
break;
}
default:
statement("// unimplemented SPV AMD gcn shader op ", eop);
break;
}
}
void CompilerGLSL::emit_subgroup_op(const Instruction &i)
{
const uint32_t *ops = stream(i);
auto op = static_cast<Op>(i.op);
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics.");
switch (op)
{
case OpGroupNonUniformElect:
require_extension_internal("GL_KHR_shader_subgroup_basic");
break;
case OpGroupNonUniformBroadcast:
case OpGroupNonUniformBroadcastFirst:
case OpGroupNonUniformBallot:
case OpGroupNonUniformInverseBallot:
case OpGroupNonUniformBallotBitExtract:
case OpGroupNonUniformBallotBitCount:
case OpGroupNonUniformBallotFindLSB:
case OpGroupNonUniformBallotFindMSB:
require_extension_internal("GL_KHR_shader_subgroup_ballot");
break;
case OpGroupNonUniformShuffle:
case OpGroupNonUniformShuffleXor:
require_extension_internal("GL_KHR_shader_subgroup_shuffle");
break;
case OpGroupNonUniformShuffleUp:
case OpGroupNonUniformShuffleDown:
require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
break;
case OpGroupNonUniformAll:
case OpGroupNonUniformAny:
case OpGroupNonUniformAllEqual:
require_extension_internal("GL_KHR_shader_subgroup_vote");
break;
case OpGroupNonUniformFAdd:
case OpGroupNonUniformFMul:
case OpGroupNonUniformFMin:
case OpGroupNonUniformFMax:
2018-04-10 15:16:41 +00:00
case OpGroupNonUniformIAdd:
case OpGroupNonUniformIMul:
case OpGroupNonUniformSMin:
case OpGroupNonUniformSMax:
case OpGroupNonUniformUMin:
case OpGroupNonUniformUMax:
case OpGroupNonUniformBitwiseAnd:
case OpGroupNonUniformBitwiseOr:
case OpGroupNonUniformBitwiseXor:
{
auto operation = static_cast<GroupOperation>(ops[3]);
if (operation == GroupOperationClusteredReduce)
{
require_extension_internal("GL_KHR_shader_subgroup_clustered");
}
2018-04-17 13:01:31 +00:00
else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan ||
operation == GroupOperationReduce)
{
require_extension_internal("GL_KHR_shader_subgroup_arithmetic");
}
else
SPIRV_CROSS_THROW("Invalid group operation.");
break;
}
case OpGroupNonUniformQuadSwap:
case OpGroupNonUniformQuadBroadcast:
require_extension_internal("GL_KHR_shader_subgroup_quad");
break;
default:
SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
}
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto scope = static_cast<Scope>(get<SPIRConstant>(ops[2]).scalar());
if (scope != ScopeSubgroup)
SPIRV_CROSS_THROW("Only subgroup scope is supported.");
switch (op)
{
case OpGroupNonUniformElect:
emit_op(result_type, id, "subgroupElect()", true);
break;
case OpGroupNonUniformBroadcast:
emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast");
break;
case OpGroupNonUniformBroadcastFirst:
emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst");
break;
case OpGroupNonUniformBallot:
emit_unary_func_op(result_type, id, ops[3], "subgroupBallot");
break;
case OpGroupNonUniformInverseBallot:
emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot");
break;
case OpGroupNonUniformBallotBitExtract:
emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract");
break;
case OpGroupNonUniformBallotFindLSB:
2018-04-10 15:16:41 +00:00
emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB");
break;
case OpGroupNonUniformBallotFindMSB:
2018-04-10 15:16:41 +00:00
emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB");
break;
case OpGroupNonUniformBallotBitCount:
{
auto operation = static_cast<GroupOperation>(ops[3]);
if (operation == GroupOperationReduce)
emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount");
else if (operation == GroupOperationInclusiveScan)
emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount");
else if (operation == GroupOperationExclusiveScan)
emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount");
else
SPIRV_CROSS_THROW("Invalid BitCount operation.");
break;
}
case OpGroupNonUniformShuffle:
2018-04-10 15:16:41 +00:00
emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle");
break;
case OpGroupNonUniformShuffleXor:
2018-04-10 15:16:41 +00:00
emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor");
break;
case OpGroupNonUniformShuffleUp:
2018-04-10 15:16:41 +00:00
emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp");
break;
case OpGroupNonUniformShuffleDown:
2018-04-10 15:16:41 +00:00
emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
break;
case OpGroupNonUniformAll:
2018-04-10 15:16:41 +00:00
emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
break;
case OpGroupNonUniformAny:
2018-04-10 15:16:41 +00:00
emit_unary_func_op(result_type, id, ops[3], "subgroupAny");
break;
case OpGroupNonUniformAllEqual:
2018-04-10 15:16:41 +00:00
emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual");
break;
2018-04-17 13:01:31 +00:00
// clang-format off
2018-04-10 15:16:41 +00:00
#define GROUP_OP(op, glsl_op) \
case OpGroupNonUniform##op: \
{ \
auto operation = static_cast<GroupOperation>(ops[3]); \
if (operation == GroupOperationReduce) \
emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \
else if (operation == GroupOperationInclusiveScan) \
emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \
else if (operation == GroupOperationExclusiveScan) \
emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \
else if (operation == GroupOperationClusteredReduce) \
emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \
else \
SPIRV_CROSS_THROW("Invalid group operation."); \
break; \
}
GROUP_OP(FAdd, Add)
GROUP_OP(FMul, Mul)
GROUP_OP(FMin, Min)
GROUP_OP(FMax, Max)
GROUP_OP(IAdd, Add)
GROUP_OP(IMul, Mul)
GROUP_OP(SMin, Min)
GROUP_OP(SMax, Max)
GROUP_OP(UMin, Min)
GROUP_OP(UMax, Max)
GROUP_OP(BitwiseAnd, And)
GROUP_OP(BitwiseOr, Or)
GROUP_OP(BitwiseXor, Xor)
#undef GROUP_OP
2018-04-17 13:01:31 +00:00
// clang-format on
2018-04-10 15:16:41 +00:00
case OpGroupNonUniformQuadSwap:
2018-04-10 15:16:41 +00:00
{
uint32_t direction = get<SPIRConstant>(ops[4]).scalar();
if (direction == 0)
emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal");
else if (direction == 1)
emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical");
else if (direction == 2)
emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal");
else
SPIRV_CROSS_THROW("Invalid quad swap direction.");
break;
}
case OpGroupNonUniformQuadBroadcast:
2018-04-10 15:16:41 +00:00
{
emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast");
break;
}
default:
SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
}
register_control_dependent_expression(id);
}
string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
2016-03-02 17:09:16 +00:00
{
if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
return type_to_glsl(out_type);
2016-07-27 09:27:00 +00:00
else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Int64)
return type_to_glsl(out_type);
else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float)
return "floatBitsToUint";
else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::UInt)
return type_to_glsl(out_type);
2016-07-27 09:27:00 +00:00
else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::UInt64)
return type_to_glsl(out_type);
else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float)
return "floatBitsToInt";
else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt)
return "uintBitsToFloat";
else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int)
return "intBitsToFloat";
2016-07-27 09:27:00 +00:00
else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double)
return "doubleBitsToInt64";
else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double)
return "doubleBitsToUint64";
else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64)
return "int64BitsToDouble";
else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
return "uint64BitsToDouble";
2017-11-21 13:04:57 +00:00
else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
return "packUint2x32";
2018-03-06 15:28:42 +00:00
else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
return "unpackFloat2x16";
else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
return "packFloat2x16";
else
return "";
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument)
2016-03-02 17:09:16 +00:00
{
auto op = bitcast_glsl_op(result_type, expression_type(argument));
if (op.empty())
return to_enclosed_expression(argument);
else
return join(op, "(", to_expression(argument), ")");
2016-03-02 17:09:16 +00:00
}
std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg)
{
auto expr = to_expression(arg);
auto &src_type = expression_type(arg);
if (src_type.basetype != target_type)
{
auto target = src_type;
target.basetype = target_type;
expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")");
}
return expr;
}
std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type,
const std::string &expr)
{
if (target_type.basetype == expr_type)
return expr;
auto src_type = target_type;
src_type.basetype = expr_type;
return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")");
}
string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
2016-03-02 17:09:16 +00:00
{
switch (builtin)
{
case BuiltInPosition:
return "gl_Position";
case BuiltInPointSize:
return "gl_PointSize";
case BuiltInClipDistance:
return "gl_ClipDistance";
2017-03-05 11:44:29 +00:00
case BuiltInCullDistance:
return "gl_CullDistance";
case BuiltInVertexId:
if (options.vulkan_semantics)
SPIRV_CROSS_THROW(
"Cannot implement gl_VertexID in Vulkan GLSL. This shader was created with GL semantics.");
return "gl_VertexID";
case BuiltInInstanceId:
if (options.vulkan_semantics)
SPIRV_CROSS_THROW(
"Cannot implement gl_InstanceID in Vulkan GLSL. This shader was created with GL semantics.");
return "gl_InstanceID";
case BuiltInVertexIndex:
if (options.vulkan_semantics)
return "gl_VertexIndex";
else
return "gl_VertexID"; // gl_VertexID already has the base offset applied.
case BuiltInInstanceIndex:
if (options.vulkan_semantics)
return "gl_InstanceIndex";
else
return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID.
case BuiltInPrimitiveId:
return "gl_PrimitiveID";
case BuiltInInvocationId:
return "gl_InvocationID";
case BuiltInLayer:
return "gl_Layer";
2018-01-04 11:41:25 +00:00
case BuiltInViewportIndex:
return "gl_ViewportIndex";
case BuiltInTessLevelOuter:
return "gl_TessLevelOuter";
case BuiltInTessLevelInner:
return "gl_TessLevelInner";
case BuiltInTessCoord:
return "gl_TessCoord";
case BuiltInFragCoord:
return "gl_FragCoord";
case BuiltInPointCoord:
return "gl_PointCoord";
case BuiltInFrontFacing:
return "gl_FrontFacing";
case BuiltInFragDepth:
return "gl_FragDepth";
case BuiltInNumWorkgroups:
return "gl_NumWorkGroups";
case BuiltInWorkgroupSize:
return "gl_WorkGroupSize";
case BuiltInWorkgroupId:
return "gl_WorkGroupID";
case BuiltInLocalInvocationId:
return "gl_LocalInvocationID";
case BuiltInGlobalInvocationId:
return "gl_GlobalInvocationID";
case BuiltInLocalInvocationIndex:
return "gl_LocalInvocationIndex";
case BuiltInSampleId:
if (options.es && options.version < 320)
require_extension_internal("GL_OES_sample_variables");
if (!options.es && options.version < 400)
SPIRV_CROSS_THROW("gl_SampleID not supported before GLSL 400.");
return "gl_SampleID";
case BuiltInSampleMask:
if (options.es && options.version < 320)
require_extension_internal("GL_OES_sample_variables");
if (!options.es && options.version < 400)
SPIRV_CROSS_THROW("gl_SampleMask/gl_SampleMaskIn not supported before GLSL 400.");
if (storage == StorageClassInput)
return "gl_SampleMaskIn";
else
return "gl_SampleMask";
case BuiltInSamplePosition:
if (options.es && options.version < 320)
require_extension_internal("GL_OES_sample_variables");
if (!options.es && options.version < 400)
SPIRV_CROSS_THROW("gl_SamplePosition not supported before GLSL 400.");
return "gl_SamplePosition";
2017-08-15 11:28:16 +00:00
case BuiltInViewIndex:
if (options.vulkan_semantics)
{
require_extension_internal("GL_EXT_multiview");
2017-08-15 11:28:16 +00:00
return "gl_ViewIndex";
}
else
{
require_extension_internal("GL_OVR_multiview2");
2017-08-15 11:28:16 +00:00
return "gl_ViewID_OVR";
}
case BuiltInNumSubgroups:
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
require_extension_internal("GL_KHR_shader_subgroup_basic");
return "gl_NumSubgroups";
case BuiltInSubgroupId:
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
require_extension_internal("GL_KHR_shader_subgroup_basic");
return "gl_SubgroupID";
case BuiltInSubgroupSize:
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
require_extension_internal("GL_KHR_shader_subgroup_basic");
return "gl_SubgroupSize";
case BuiltInSubgroupLocalInvocationId:
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
require_extension_internal("GL_KHR_shader_subgroup_basic");
return "gl_SubgroupInvocationID";
case BuiltInSubgroupEqMask:
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
require_extension_internal("GL_KHR_shader_subgroup_ballot");
return "gl_SubgroupEqMask";
case BuiltInSubgroupGeMask:
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
require_extension_internal("GL_KHR_shader_subgroup_ballot");
return "gl_SubgroupGeMask";
case BuiltInSubgroupGtMask:
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
require_extension_internal("GL_KHR_shader_subgroup_ballot");
return "gl_SubgroupGtMask";
case BuiltInSubgroupLeMask:
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
require_extension_internal("GL_KHR_shader_subgroup_ballot");
return "gl_SubgroupLeMask";
case BuiltInSubgroupLtMask:
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Need Vulkan semantics for subgroup.");
require_extension_internal("GL_KHR_shader_subgroup_ballot");
return "gl_SubgroupLtMask";
default:
return join("gl_BuiltIn_", convert_to_string(builtin));
}
2016-03-02 17:09:16 +00:00
}
const char *CompilerGLSL::index_to_swizzle(uint32_t index)
2016-03-02 17:09:16 +00:00
{
switch (index)
{
case 0:
return "x";
case 1:
return "y";
case 2:
return "z";
case 3:
return "w";
default:
SPIRV_CROSS_THROW("Swizzle index out of range");
}
2016-03-02 17:09:16 +00:00
}
2017-02-23 18:33:14 +00:00
string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
bool index_is_literal, bool chain_only, bool *need_transpose,
bool *result_is_packed)
2016-03-02 17:09:16 +00:00
{
string expr;
if (!chain_only)
expr = to_enclosed_expression(base);
uint32_t type_id = expression_type_id(base);
const auto *type = &get<SPIRType>(type_id);
// Start traversing type hierarchy at the proper non-pointer types,
// but keep type_id referencing the original pointer for use below.
while (type->pointer)
{
assert(type->parent_type);
type = &get<SPIRType>(type->parent_type);
}
bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos;
bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base);
bool is_packed = has_decoration(base, DecorationCPacked);
bool pending_array_enclose = false;
bool dimension_flatten = false;
for (uint32_t i = 0; i < count; i++)
{
uint32_t index = indices[i];
// Arrays
if (!type->array.empty())
{
// If we are flattening multidimensional arrays, only create opening bracket on first
// array index.
if (options.flatten_multidimensional_arrays && !pending_array_enclose)
{
dimension_flatten = type->array.size() > 1;
pending_array_enclose = dimension_flatten;
if (pending_array_enclose)
expr += "[";
}
assert(type->parent_type);
const auto append_index = [&]() {
expr += "[";
if (index_is_literal)
expr += convert_to_string(index);
else
expr += to_expression(index);
expr += "]";
};
auto *var = maybe_get<SPIRVariable>(base);
if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) &&
!has_decoration(type->self, DecorationBlock))
{
// This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared.
// Normally, these variables live in blocks when compiled from GLSL,
// but HLSL seems to just emit straight arrays here.
// We must pretend this access goes through gl_in/gl_out arrays
// to be able to access certain builtins as arrays.
auto builtin = meta[base].decoration.builtin_type;
switch (builtin)
{
2018-01-09 11:51:21 +00:00
// case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom.
// case BuiltInClipDistance:
case BuiltInPosition:
case BuiltInPointSize:
if (var->storage == StorageClassInput)
expr = join("gl_in[", to_expression(index), "].", expr);
else if (var->storage == StorageClassOutput)
expr = join("gl_out[", to_expression(index), "].", expr);
else
append_index();
break;
default:
append_index();
break;
}
}
else if (options.flatten_multidimensional_arrays && dimension_flatten)
{
// If we are flattening multidimensional arrays, do manual stride computation.
auto &parent_type = get<SPIRType>(type->parent_type);
if (index_is_literal)
expr += convert_to_string(index);
else
expr += to_enclosed_expression(index);
for (auto j = uint32_t(parent_type.array.size()); j; j--)
{
expr += " * ";
expr += enclose_expression(to_array_size(parent_type, j - 1));
}
if (parent_type.array.empty())
pending_array_enclose = false;
else
expr += " + ";
if (!pending_array_enclose)
expr += "]";
}
else
{
append_index();
}
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
access_chain_is_arrayed = true;
}
// For structs, the index refers to a constant, which indexes into the members.
// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
else if (type->basetype == SPIRType::Struct)
{
if (!index_is_literal)
index = get<SPIRConstant>(index).scalar();
if (index >= type->member_types.size())
SPIRV_CROSS_THROW("Member index is out of bounds!");
BuiltIn builtin;
if (is_member_builtin(*type, index, &builtin))
{
// FIXME: We rely here on OpName on gl_in/gl_out to make this work properly.
// To make this properly work by omitting all OpName opcodes,
// we need to infer gl_in or gl_out based on the builtin, and stage.
if (access_chain_is_arrayed)
{
expr += ".";
expr += builtin_to_glsl(builtin, type->storage);
}
else
expr = builtin_to_glsl(builtin, type->storage);
}
else
{
// If the member has a qualified name, use it as the entire chain
string qual_mbr_name = get_member_qualified_name(type_id, index);
if (!qual_mbr_name.empty())
expr = qual_mbr_name;
else
{
expr += ".";
expr += to_member_name(*type, index);
}
}
is_packed = member_is_packed_type(*type, index);
row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index);
type = &get<SPIRType>(type->member_types[index]);
}
// Matrix -> Vector
else if (type->columns > 1)
{
if (row_major_matrix_needs_conversion)
{
expr = convert_row_major_matrix(expr, *type, is_packed);
row_major_matrix_needs_conversion = false;
is_packed = false;
}
expr += "[";
if (index_is_literal)
expr += convert_to_string(index);
else
expr += to_expression(index);
expr += "]";
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
}
// Vector -> Scalar
else if (type->vecsize > 1)
{
if (index_is_literal && !is_packed)
{
expr += ".";
expr += index_to_swizzle(index);
}
else if (ids[index].get_type() == TypeConstant && !is_packed)
{
auto &c = get<SPIRConstant>(index);
expr += ".";
expr += index_to_swizzle(c.scalar());
}
else if (index_is_literal)
{
// For packed vectors, we can only access them as an array, not by swizzle.
expr += join("[", index, "]");
}
else
{
expr += "[";
expr += to_expression(index);
expr += "]";
}
is_packed = false;
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
}
else if (!backend.allow_truncated_access_chain)
SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
}
if (pending_array_enclose)
{
SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, "
"but the access chain was terminated in the middle of a multidimensional array. "
"This is not supported.");
}
if (need_transpose)
*need_transpose = row_major_matrix_needs_conversion;
if (result_is_packed)
*result_is_packed = is_packed;
return expr;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::to_flattened_struct_member(const SPIRVariable &var, uint32_t index)
{
auto &type = get<SPIRType>(var.basetype);
return sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, index)));
}
2017-01-20 16:33:59 +00:00
string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
bool *out_need_transpose, bool *result_is_packed)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
2017-01-16 22:19:49 +00:00
if (flattened_buffer_blocks.count(base))
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
uint32_t matrix_stride = 0;
bool need_transpose = false;
flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride);
if (out_need_transpose)
*out_need_transpose = target_type.columns > 1 && need_transpose;
if (result_is_packed)
*result_is_packed = false;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
else if (flattened_structs.count(base) && count > 0)
{
auto chain = access_chain_internal(base, indices, count, false, true).substr(1);
if (out_need_transpose)
*out_need_transpose = false;
if (result_is_packed)
*result_is_packed = false;
return sanitize_underscores(join(to_name(base), "_", chain));
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
else
{
return access_chain_internal(base, indices, count, false, false, out_need_transpose, result_is_packed);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
}
string CompilerGLSL::load_flattened_struct(SPIRVariable &var)
{
auto expr = type_to_glsl_constructor(get<SPIRType>(var.basetype));
expr += '(';
auto &type = get<SPIRType>(var.basetype);
for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
{
if (i)
expr += ", ";
// Flatten the varyings.
// Apply name transformation for flattened I/O blocks.
expr += to_flattened_struct_member(var, i);
}
expr += ')';
return expr;
}
void CompilerGLSL::store_flattened_struct(SPIRVariable &var, uint32_t value)
{
// We're trying to store a structure which has been flattened.
// Need to copy members one by one.
auto rhs = to_expression(value);
// Store result locally.
// Since we're declaring a variable potentially multiple times here,
// store the variable in an isolated scope.
begin_scope();
statement(variable_decl_function_local(var), " = ", rhs, ";");
auto &type = get<SPIRType>(var.basetype);
for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
{
// Flatten the varyings.
// Apply name transformation for flattened I/O blocks.
auto lhs = sanitize_underscores(join(to_name(var.self), "_", to_member_name(type, i)));
rhs = join(to_name(var.self), ".", to_member_name(type, i));
statement(lhs, " = ", rhs, ";");
}
end_scope();
}
2017-01-20 16:33:59 +00:00
std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
bool need_transpose)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
if (!target_type.array.empty())
SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
else if (target_type.basetype == SPIRType::Struct)
return flattened_access_chain_struct(base, indices, count, target_type, offset);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
else if (target_type.columns > 1)
return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
else
return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
2017-01-20 16:33:59 +00:00
std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
const SPIRType &target_type, uint32_t offset)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
std::string expr;
expr += type_to_glsl_constructor(target_type);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
expr += "(";
for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
if (i != 0)
expr += ", ";
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]);
uint32_t member_offset = type_struct_member_offset(target_type, i);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
// The access chain terminates at the struct, so we need to find matrix strides and row-major information
// ahead of time.
bool need_transpose = false;
uint32_t matrix_stride = 0;
if (member_type.columns > 1)
{
need_transpose = combined_decoration_for_member(target_type, i).get(DecorationRowMajor);
matrix_stride = type_struct_member_matrix_stride(target_type, i);
}
auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
need_transpose);
// Cannot forward transpositions, so resolve them here.
if (need_transpose)
expr += convert_row_major_matrix(tmp, member_type, false);
else
expr += tmp;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
expr += ")";
return expr;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
2017-01-20 16:33:59 +00:00
std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
const SPIRType &target_type, uint32_t offset,
uint32_t matrix_stride, bool need_transpose)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
assert(matrix_stride);
SPIRType tmp_type = target_type;
if (need_transpose)
swap(tmp_type.vecsize, tmp_type.columns);
std::string expr;
expr += type_to_glsl_constructor(tmp_type);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
expr += "(";
for (uint32_t i = 0; i < tmp_type.columns; i++)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
if (i != 0)
expr += ", ";
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride,
/* need_transpose= */ false);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
expr += ")";
return expr;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count,
const SPIRType &target_type, uint32_t offset,
uint32_t matrix_stride, bool need_transpose)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
auto buffer_name = to_name(expression_type(base).self);
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
if (need_transpose)
{
std::string expr;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
if (target_type.vecsize > 1)
{
expr += type_to_glsl_constructor(target_type);
expr += "(";
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
for (uint32_t i = 0; i < target_type.vecsize; ++i)
{
if (i != 0)
expr += ", ";
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
uint32_t component_offset = result.second + i * matrix_stride;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
assert(component_offset % (target_type.width / 8) == 0);
uint32_t index = component_offset / (target_type.width / 8);
expr += buffer_name;
expr += "[";
expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
expr += convert_to_string(index / 4);
expr += "]";
expr += vector_swizzle(1, index % 4);
}
if (target_type.vecsize > 1)
{
expr += ")";
}
return expr;
}
else
{
assert(result.second % (target_type.width / 8) == 0);
uint32_t index = result.second / (target_type.width / 8);
std::string expr;
expr += buffer_name;
expr += "[";
expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a +
expr += convert_to_string(index / 4);
expr += "]";
expr += vector_swizzle(target_type.vecsize, index % 4);
return expr;
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
2017-08-28 07:01:03 +00:00
std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(const SPIRType &basetype,
const uint32_t *indices, uint32_t count,
uint32_t offset, uint32_t word_stride,
2017-01-21 11:47:26 +00:00
bool *need_transpose,
uint32_t *out_matrix_stride)
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
{
const auto *type = &basetype;
2017-01-22 08:06:15 +00:00
// Start traversing type hierarchy at the proper non-pointer types.
while (type->pointer)
{
assert(type->parent_type);
type = &get<SPIRType>(type->parent_type);
}
2017-01-22 08:06:15 +00:00
// This holds the type of the current pointer which we are traversing through.
// We always start out from a struct type which is the block.
// This is primarily used to reflect the array strides and matrix strides later.
// For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set
// accordingly as members of structs are accessed.
assert(type->basetype == SPIRType::Struct);
uint32_t type_id = 0;
std::string expr;
// Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
for (uint32_t i = 0; i < count; i++)
{
uint32_t index = indices[i];
// Arrays
if (!type->array.empty())
{
2017-01-22 08:06:15 +00:00
// Here, the type_id will be a type ID for the array type itself.
uint32_t array_stride = get_decoration(type_id, DecorationArrayStride);
2017-01-21 10:30:33 +00:00
if (!array_stride)
SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
auto *constant = maybe_get<SPIRConstant>(index);
if (constant)
{
// Constant array access.
offset += constant->scalar() * array_stride;
}
else
{
// Dynamic array access.
if (array_stride % word_stride)
{
2017-01-21 11:47:26 +00:00
SPIRV_CROSS_THROW(
"Array stride for dynamic indexing must be divisible by the size of a 4-component vector. "
2017-01-22 08:06:15 +00:00
"Likely culprit here is a float or vec2 array inside a push constant block which is std430. "
"This cannot be flattened. Try using std140 layout instead.");
}
expr += to_enclosed_expression(index);
expr += " * ";
expr += convert_to_string(array_stride / word_stride);
expr += " + ";
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
2017-01-21 10:30:33 +00:00
uint32_t parent_type = type->parent_type;
type = &get<SPIRType>(parent_type);
2017-01-22 08:06:15 +00:00
type_id = parent_type;
// Type ID now refers to the array type with one less dimension.
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
}
// For structs, the index refers to a constant, which indexes into the members.
// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
else if (type->basetype == SPIRType::Struct)
{
index = get<SPIRConstant>(index).scalar();
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
if (index >= type->member_types.size())
SPIRV_CROSS_THROW("Member index is out of bounds!");
offset += type_struct_member_offset(*type, index);
2017-01-22 08:06:15 +00:00
type_id = type->member_types[index];
2017-01-21 10:30:33 +00:00
auto &struct_type = *type;
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
type = &get<SPIRType>(type->member_types[index]);
2017-01-21 10:30:33 +00:00
if (type->columns > 1)
{
2017-01-21 10:30:33 +00:00
matrix_stride = type_struct_member_matrix_stride(struct_type, index);
row_major_matrix_needs_conversion =
combined_decoration_for_member(struct_type, index).get(DecorationRowMajor);
}
else
row_major_matrix_needs_conversion = false;
}
// Matrix -> Vector
else if (type->columns > 1)
{
auto *constant = maybe_get<SPIRConstant>(index);
if (constant)
{
index = get<SPIRConstant>(index).scalar();
offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride);
}
else
{
uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride;
// Dynamic array access.
if (indexing_stride % word_stride)
{
SPIRV_CROSS_THROW(
"Matrix stride for dynamic indexing must be divisible by the size of a 4-component vector. "
"Likely culprit here is a row-major matrix being accessed dynamically. "
"This cannot be flattened. Try using std140 layout instead.");
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
expr += to_enclosed_expression(index);
expr += " * ";
expr += convert_to_string(indexing_stride / word_stride);
expr += " + ";
}
2017-01-21 10:30:33 +00:00
uint32_t parent_type = type->parent_type;
type = &get<SPIRType>(type->parent_type);
2017-01-22 08:06:15 +00:00
type_id = parent_type;
}
// Vector -> Scalar
else if (type->vecsize > 1)
{
auto *constant = maybe_get<SPIRConstant>(index);
if (constant)
{
index = get<SPIRConstant>(index).scalar();
offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8));
}
else
{
uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8);
// Dynamic array access.
if (indexing_stride % word_stride)
{
SPIRV_CROSS_THROW(
"Stride for dynamic vector indexing must be divisible by the size of a 4-component vector. "
"This cannot be flattened in legacy targets.");
}
expr += to_enclosed_expression(index);
expr += " * ";
expr += convert_to_string(indexing_stride / word_stride);
expr += " + ";
}
2017-01-21 10:30:33 +00:00
uint32_t parent_type = type->parent_type;
type = &get<SPIRType>(type->parent_type);
2017-01-22 08:06:15 +00:00
type_id = parent_type;
}
else
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
}
if (need_transpose)
*need_transpose = row_major_matrix_needs_conversion;
if (out_matrix_stride)
*out_matrix_stride = matrix_stride;
return std::make_pair(expr, offset);
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::should_forward(uint32_t id)
{
// Immutable expression can always be forwarded.
// If not immutable, we can speculate about it by forwarding potentially mutable variables.
auto *var = maybe_get<SPIRVariable>(id);
bool forward = var ? var->forwardable : false;
return (is_immutable(id) || forward) && !options.force_temporary;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::track_expression_read(uint32_t id)
{
// If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice.
// In this case, it's better to just bind the complex expression to the temporary and read that temporary twice.
if (expression_is_forwarded(id))
{
auto &v = expression_usage_counts[id];
v++;
if (v >= 2)
{
//if (v == 2)
// fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id);
forced_temporaries.insert(id);
// Force a recompile after this pass to avoid forwarding this variable.
force_recompile = true;
}
}
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure)
{
if (forced_temporaries.find(id) != end(forced_temporaries))
return false;
for (uint32_t i = 0; i < num_args; i++)
if (!should_forward(args[i]))
return false;
// We need to forward globals as well.
if (!pure)
{
for (auto global : global_variables)
if (!should_forward(global))
return false;
for (auto aliased : aliased_variables)
if (!should_forward(aliased))
return false;
}
return true;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::register_impure_function_call()
{
// Impure functions can modify globals and aliased variables, so invalidate them as well.
for (auto global : global_variables)
flush_dependees(get<SPIRVariable>(global));
for (auto aliased : aliased_variables)
flush_dependees(get<SPIRVariable>(aliased));
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::register_call_out_argument(uint32_t id)
{
register_write(id);
2016-03-02 17:09:16 +00:00
auto *var = maybe_get<SPIRVariable>(id);
if (var)
flush_variable_declaration(var->self);
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::variable_decl_function_local(SPIRVariable &var)
{
// These variables are always function local,
// so make sure we emit the variable without storage qualifiers.
// Some backends will inject custom variables locally in a function
// with a storage qualifier which is not function-local.
auto old_storage = var.storage;
var.storage = StorageClassFunction;
auto expr = variable_decl(var);
var.storage = old_storage;
return expr;
}
2016-03-02 17:09:16 +00:00
void CompilerGLSL::flush_variable_declaration(uint32_t id)
{
auto *var = maybe_get<SPIRVariable>(id);
if (var && var->deferred_declaration)
{
statement(variable_decl_function_local(*var), ";");
var->deferred_declaration = false;
}
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::remove_duplicate_swizzle(string &op)
{
auto pos = op.find_last_of('.');
if (pos == string::npos || pos == 0)
return false;
string final_swiz = op.substr(pos + 1, string::npos);
if (backend.swizzle_is_function)
{
if (final_swiz.size() < 2)
return false;
if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
final_swiz.erase(final_swiz.size() - 2, string::npos);
else
return false;
}
// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
// If so, and previous swizzle is of same length,
// we can drop the final swizzle altogether.
for (uint32_t i = 0; i < final_swiz.size(); i++)
{
static const char expected[] = { 'x', 'y', 'z', 'w' };
if (i >= 4 || final_swiz[i] != expected[i])
return false;
}
auto prevpos = op.find_last_of('.', pos - 1);
if (prevpos == string::npos)
return false;
prevpos++;
// Make sure there are only swizzles here ...
for (auto i = prevpos; i < pos; i++)
{
if (op[i] < 'w' || op[i] > 'z')
{
// If swizzles are foo.xyz() like in C++ backend for example, check for that.
if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')')
break;
return false;
}
}
// If original swizzle is large enough, just carve out the components we need.
// E.g. foobar.wyx.xy will turn into foobar.wy.
if (pos - prevpos >= final_swiz.size())
{
op.erase(prevpos + final_swiz.size(), string::npos);
// Add back the function call ...
if (backend.swizzle_is_function)
op += "()";
}
return true;
2016-03-02 17:09:16 +00:00
}
// Optimizes away vector swizzles where we have something like
// vec3 foo;
// foo.xyz <-- swizzle expression does nothing.
// This is a very common pattern after OpCompositeCombine.
bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op)
{
auto pos = op.find_last_of('.');
if (pos == string::npos || pos == 0)
return false;
string final_swiz = op.substr(pos + 1, string::npos);
if (backend.swizzle_is_function)
{
if (final_swiz.size() < 2)
return false;
if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()")
final_swiz.erase(final_swiz.size() - 2, string::npos);
else
return false;
}
// Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar.
// If so, and previous swizzle is of same length,
// we can drop the final swizzle altogether.
for (uint32_t i = 0; i < final_swiz.size(); i++)
{
static const char expected[] = { 'x', 'y', 'z', 'w' };
if (i >= 4 || final_swiz[i] != expected[i])
return false;
}
auto &type = expression_type(base);
// Sanity checking ...
assert(type.columns == 1 && type.array.empty());
if (type.vecsize == final_swiz.size())
op.erase(pos, string::npos);
return true;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length)
2016-03-02 17:09:16 +00:00
{
uint32_t base = 0;
string op;
string subop;
// Can only merge swizzles for vectors.
auto &type = get<SPIRType>(return_type);
bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
bool swizzle_optimization = false;
for (uint32_t i = 0; i < length; i++)
{
auto *e = maybe_get<SPIRExpression>(elems[i]);
// If we're merging another scalar which belongs to the same base
// object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible!
if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base)
{
// Only supposed to be used for vector swizzle -> scalar.
assert(!e->expression.empty() && e->expression.front() == '.');
subop += e->expression.substr(1, string::npos);
swizzle_optimization = true;
}
else
{
// We'll likely end up with duplicated swizzles, e.g.
// foobar.xyz.xyz from patterns like
// OpVectorShuffle
// OpCompositeExtract x 3
// OpCompositeConstruct 3x + other scalar.
// Just modify op in-place.
if (swizzle_optimization)
{
if (backend.swizzle_is_function)
subop += "()";
// Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles.
// The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on.
// We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize.
// Essentially, we can only remove one set of swizzles, since that's what we have control over ...
// Case 1:
// foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done.
// foo.yxz was the result of OpVectorShuffle and we don't know the type of foo.
// Case 2:
// foo.xyz: Duplicate swizzle won't kick in.
// If foo is vec3, we can remove xyz, giving just foo.
if (!remove_duplicate_swizzle(subop))
remove_unity_swizzle(base, subop);
// Strips away redundant parens if we created them during component extraction.
strip_enclosed_expression(subop);
swizzle_optimization = false;
op += subop;
}
else
op += subop;
if (i)
op += ", ";
subop = to_expression(elems[i]);
}
base = e ? e->base_expression : 0;
}
if (swizzle_optimization)
{
if (backend.swizzle_is_function)
subop += "()";
if (!remove_duplicate_swizzle(subop))
remove_unity_swizzle(base, subop);
// Strips away redundant parens if we created them during component extraction.
strip_enclosed_expression(subop);
}
op += subop;
return op;
2016-03-02 17:09:16 +00:00
}
2016-09-11 11:05:44 +00:00
bool CompilerGLSL::skip_argument(uint32_t id) const
{
if (!combined_image_samplers.empty() || !options.vulkan_semantics)
{
auto &type = expression_type(id);
if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1))
return true;
}
return false;
}
bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs)
{
// Do this with strings because we have a very clear pattern we can check for and it avoids
// adding lots of special cases to the code emission.
if (rhs.size() < lhs.size() + 3)
return false;
// Do not optimize matrices. They are a bit awkward to reason about in general
// (in which order does operation happen?), and it does not work on MSL anyways.
if (type.vecsize > 1 && type.columns > 1)
return false;
auto index = rhs.find(lhs);
if (index != 0)
return false;
// TODO: Shift operators, but it's not important for now.
auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1);
if (op != lhs.size() + 1)
return false;
// Check that the op is followed by space. This excludes && and ||.
2017-09-06 07:15:27 +00:00
if (rhs[op + 1] != ' ')
return false;
char bop = rhs[op];
auto expr = rhs.substr(lhs.size() + 3);
// Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code.
// Find some common patterns which are equivalent.
if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)"))
statement(lhs, bop, bop, ";");
else
statement(lhs, " ", bop, "= ", expr, ";");
return true;
}
void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
{
if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
return;
assert(current_emitting_block);
current_emitting_block->invalidate_expressions.push_back(expr);
}
void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
2017-10-20 14:18:02 +00:00
{
current_emitting_block = &block;
for (auto &op : block.ops)
emit_instruction(op);
current_emitting_block = nullptr;
}
void CompilerGLSL::emit_instruction(const Instruction &instruction)
2016-03-02 17:09:16 +00:00
{
auto ops = stream(instruction);
auto opcode = static_cast<Op>(instruction.op);
uint32_t length = instruction.length;
2016-03-02 17:09:16 +00:00
#define BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op)
#define BOP_CAST(op, type) \
2018-02-03 22:23:30 +00:00
emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, glsl_opcode_is_sign_invariant(opcode))
2016-03-02 17:09:16 +00:00
#define UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op)
#define QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op)
#define TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op)
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
#define BFOP_CAST(op, type) \
2018-02-03 22:23:30 +00:00
emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, glsl_opcode_is_sign_invariant(opcode))
#define BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op)
2016-03-02 17:09:16 +00:00
#define UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op)
switch (opcode)
{
// Dealing with memory
case OpLoad:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t ptr = ops[2];
flush_variable_declaration(ptr);
// If we're loading from memory that cannot be changed by the shader,
// just forward the expression directly to avoid needless temporaries.
// If an expression is mutable and forwardable, we speculate that it is immutable.
bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
// If loading a non-native row-major matrix, mark the expression as need_transpose.
bool need_transpose = false;
bool old_need_transpose = false;
auto *ptr_expression = maybe_get<SPIRExpression>(ptr);
if (ptr_expression && ptr_expression->need_transpose)
{
old_need_transpose = true;
ptr_expression->need_transpose = false;
need_transpose = true;
}
else if (is_non_native_row_major_matrix(ptr))
need_transpose = true;
auto expr = to_expression(ptr);
if (ptr_expression)
ptr_expression->need_transpose = old_need_transpose;
// Suppress usage tracking since using same expression multiple times does not imply any extra work.
auto &e = emit_op(result_type, id, expr, forward, true);
e.need_transpose = need_transpose;
register_read(id, ptr, forward);
// Pass through whether the result is of a packed type.
if (has_decoration(ptr, DecorationCPacked))
set_decoration(id, DecorationCPacked);
break;
}
case OpInBoundsAccessChain:
case OpAccessChain:
{
auto *var = maybe_get<SPIRVariable>(ops[2]);
if (var)
flush_variable_declaration(var->self);
// If the base is immutable, the access chain pointer must also be.
// If an expression is mutable and forwardable, we speculate that it is immutable.
bool need_transpose, result_is_packed;
auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &need_transpose, &result_is_packed);
auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
auto *backing_variable = maybe_get_backing_variable(ops[2]);
expr.loaded_from = backing_variable ? backing_variable->self : ops[2];
expr.need_transpose = need_transpose;
// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
if (result_is_packed)
set_decoration(ops[1], DecorationCPacked);
else
unset_decoration(ops[1], DecorationCPacked);
break;
}
case OpStore:
{
auto *var = maybe_get<SPIRVariable>(ops[0]);
if (var && var->statically_assigned)
var->static_expression = ops[1];
else if (var && var->loop_variable && !var->loop_variable_enable)
var->static_expression = ops[1];
else if (var && flattened_structs.count(ops[0]))
{
store_flattened_struct(*var, ops[1]);
register_write(ops[0]);
}
else
{
auto rhs = to_expression(ops[1]);
// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
if (!rhs.empty())
{
auto lhs = to_expression(ops[0]);
// Tries to optimize assignments like "<lhs> = <lhs> op expr".
// While this is purely cosmetic, this is important for legacy ESSL where loop
// variable increments must be in either i++ or i += const-expr.
// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
if (!optimize_read_modify_write(expression_type(ops[1]), lhs, rhs))
statement(lhs, " = ", rhs, ";");
register_write(ops[0]);
}
}
break;
}
case OpArrayLength:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto e = access_chain_internal(ops[2], &ops[3], length - 3, true);
set<SPIRExpression>(id, e + ".length()", result_type, true);
break;
}
// Function calls
case OpFunctionCall:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t func = ops[2];
const auto *arg = &ops[3];
length -= 3;
auto &callee = get<SPIRFunction>(func);
auto &return_type = get<SPIRType>(callee.return_type);
bool pure = function_is_pure(callee);
bool callee_has_out_variables = false;
bool emit_return_value_as_argument = false;
// Invalidate out variables passed to functions since they can be OpStore'd to.
for (uint32_t i = 0; i < length; i++)
{
if (callee.arguments[i].write_count)
{
register_call_out_argument(arg[i]);
callee_has_out_variables = true;
}
flush_variable_declaration(arg[i]);
}
if (!return_type.array.empty() && !backend.can_return_array)
{
callee_has_out_variables = true;
emit_return_value_as_argument = true;
}
if (!pure)
register_impure_function_call();
string funexpr;
2016-09-11 11:05:44 +00:00
vector<string> arglist;
funexpr += to_name(func) + "(";
if (emit_return_value_as_argument)
{
statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type), ";");
arglist.push_back(to_name(id));
}
for (uint32_t i = 0; i < length; i++)
{
2016-09-11 11:05:44 +00:00
// Do not pass in separate images or samplers if we're remapping
// to combined image samplers.
if (skip_argument(arg[i]))
continue;
2016-10-24 13:24:24 +00:00
arglist.push_back(to_func_call_arg(arg[i]));
}
for (auto &combined : callee.combined_parameters)
{
uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id];
uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id];
arglist.push_back(to_combined_image_sampler(image_id, sampler_id));
}
2016-10-19 21:09:51 +00:00
2016-10-24 13:24:24 +00:00
append_global_func_args(callee, length, arglist);
2016-10-19 21:09:51 +00:00
2016-09-11 11:05:44 +00:00
funexpr += merge(arglist);
funexpr += ")";
// Check for function call constraints.
check_function_call_constraints(arg, length);
if (return_type.basetype != SPIRType::Void)
{
// If the function actually writes to an out variable,
// take the conservative route and do not forward.
// The problem is that we might not read the function
// result (and emit the function) before an out variable
// is read (common case when return value is ignored!
// In order to avoid start tracking invalid variables,
// just avoid the forwarding problem altogether.
bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure &&
(forced_temporaries.find(id) == end(forced_temporaries));
if (emit_return_value_as_argument)
{
statement(funexpr, ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
}
else
emit_op(result_type, id, funexpr, forward);
// Function calls are implicit loads from all variables in question.
// Set dependencies for them.
for (uint32_t i = 0; i < length; i++)
register_read(id, arg[i], forward);
// If we're going to forward the temporary result,
// put dependencies on every variable that must not change.
if (forward)
register_global_read_dependencies(callee, id);
}
else
statement(funexpr, ";");
break;
}
// Composite munging
case OpCompositeConstruct:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
2018-03-09 14:26:36 +00:00
const auto *const elems = &ops[2];
length -= 2;
bool forward = true;
for (uint32_t i = 0; i < length; i++)
forward = forward && should_forward(elems[i]);
auto &out_type = get<SPIRType>(result_type);
auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr;
// Only splat if we have vector constructors.
// Arrays and structs must be initialized properly in full.
bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct;
bool splat = false;
bool swizzle_splat = false;
if (in_type)
{
splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting;
swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar;
if (ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type))
{
// Cannot swizzle literal integers as a special case.
swizzle_splat = false;
}
}
if (splat || swizzle_splat)
{
uint32_t input = elems[0];
for (uint32_t i = 0; i < length; i++)
{
if (input != elems[i])
{
splat = false;
swizzle_splat = false;
}
}
}
if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline)
forward = false;
if (!out_type.array.empty() && !backend.can_declare_arrays_inline)
forward = false;
if (type_is_empty(out_type) && !backend.supports_empty_struct)
forward = false;
string constructor_op;
if (backend.use_initializer_list && composite)
{
// Only use this path if we are building composites.
// This path cannot be used for arithmetic.
if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct)
constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type));
constructor_op += "{ ";
if (type_is_empty(out_type) && !backend.supports_empty_struct)
constructor_op += "0";
else if (splat)
constructor_op += to_expression(elems[0]);
else
constructor_op += build_composite_combiner(result_type, elems, length);
constructor_op += " }";
}
else if (swizzle_splat && !composite)
2017-12-12 10:03:46 +00:00
{
constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_expression(elems[0]));
}
else
{
constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "(";
if (type_is_empty(out_type) && !backend.supports_empty_struct)
constructor_op += "0";
else if (splat)
constructor_op += to_expression(elems[0]);
else
constructor_op += build_composite_combiner(result_type, elems, length);
constructor_op += ")";
}
emit_op(result_type, id, constructor_op, forward);
for (uint32_t i = 0; i < length; i++)
inherit_expression_dependencies(id, elems[i]);
break;
}
case OpVectorInsertDynamic:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t vec = ops[2];
uint32_t comp = ops[3];
uint32_t index = ops[4];
flush_variable_declaration(vec);
// Make a copy, then use access chain to store the variable.
statement(declare_temporary(result_type, id), to_expression(vec), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
auto chain = access_chain_internal(id, &index, 1, false);
statement(chain, " = ", to_expression(comp), ";");
break;
}
case OpVectorExtractDynamic:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto expr = access_chain_internal(ops[2], &ops[3], 1, false);
emit_op(result_type, id, expr, should_forward(ops[2]));
inherit_expression_dependencies(id, ops[2]);
inherit_expression_dependencies(id, ops[3]);
break;
}
case OpCompositeExtract:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
length -= 3;
auto &type = get<SPIRType>(result_type);
// We can only split the expression here if our expression is forwarded as a temporary.
bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries);
// Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case.
auto &composite_type = expression_type(ops[2]);
if (composite_type.basetype == SPIRType::Struct || !composite_type.array.empty())
allow_base_expression = false;
// Packed expressions cannot be split up.
if (has_decoration(ops[2], DecorationCPacked))
allow_base_expression = false;
// Only apply this optimization if result is scalar.
if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1)
{
// We want to split the access chain from the base.
// This is so we can later combine different CompositeExtract results
// with CompositeConstruct without emitting code like
//
// vec3 temp = texture(...).xyz
// vec4(temp.x, temp.y, temp.z, 1.0).
//
// when we actually wanted to emit this
// vec4(texture(...).xyz, 1.0).
//
// Including the base will prevent this and would trigger multiple reads
// from expression causing it to be forced to an actual temporary in GLSL.
auto expr = access_chain_internal(ops[2], &ops[3], length, true, true);
auto &e = emit_op(result_type, id, expr, true, !expression_is_forwarded(ops[2]));
inherit_expression_dependencies(id, ops[2]);
e.base_expression = ops[2];
}
else
{
auto expr = access_chain_internal(ops[2], &ops[3], length, true);
emit_op(result_type, id, expr, should_forward(ops[2]), !expression_is_forwarded(ops[2]));
inherit_expression_dependencies(id, ops[2]);
}
break;
}
case OpCompositeInsert:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t obj = ops[2];
uint32_t composite = ops[3];
const auto *elems = &ops[4];
length -= 4;
flush_variable_declaration(composite);
// Make a copy, then use access chain to store the variable.
statement(declare_temporary(result_type, id), to_expression(composite), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
auto chain = access_chain_internal(id, elems, length, true);
statement(chain, " = ", to_expression(obj), ";");
break;
}
case OpCopyMemory:
{
uint32_t lhs = ops[0];
uint32_t rhs = ops[1];
if (lhs != rhs)
{
flush_variable_declaration(lhs);
flush_variable_declaration(rhs);
statement(to_expression(lhs), " = ", to_expression(rhs), ";");
register_write(lhs);
}
break;
}
case OpCopyObject:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t rhs = ops[2];
bool pointer = get<SPIRType>(result_type).pointer;
if (expression_is_lvalue(rhs) && !pointer)
{
// Need a copy.
// For pointer types, we copy the pointer itself.
statement(declare_temporary(result_type, id), to_expression(rhs), ";");
set<SPIRExpression>(id, to_name(id), result_type, true);
inherit_expression_dependencies(id, rhs);
}
else
{
// RHS expression is immutable, so just forward it.
// Copying these things really make no sense, but
// seems to be allowed anyways.
auto &e = set<SPIRExpression>(id, to_expression(rhs), result_type, true);
if (pointer)
{
auto *var = maybe_get_backing_variable(rhs);
e.loaded_from = var ? var->self : 0;
}
}
break;
}
case OpVectorShuffle:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t vec0 = ops[2];
uint32_t vec1 = ops[3];
const auto *elems = &ops[4];
length -= 4;
auto &type0 = expression_type(vec0);
bool shuffle = false;
for (uint32_t i = 0; i < length; i++)
if (elems[i] >= type0.vecsize)
shuffle = true;
string expr;
bool should_fwd, trivial_forward;
if (shuffle)
{
should_fwd = should_forward(vec0) && should_forward(vec1);
trivial_forward = !expression_is_forwarded(vec0) && !expression_is_forwarded(vec1);
// Constructor style and shuffling from two different vectors.
vector<string> args;
for (uint32_t i = 0; i < length; i++)
{
if (elems[i] >= type0.vecsize)
args.push_back(join(to_enclosed_expression(vec1), ".", index_to_swizzle(elems[i] - type0.vecsize)));
else
args.push_back(join(to_enclosed_expression(vec0), ".", index_to_swizzle(elems[i])));
}
expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")");
}
else
{
should_fwd = should_forward(vec0);
trivial_forward = !expression_is_forwarded(vec0);
// We only source from first vector, so can use swizzle.
// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
expr += to_enclosed_expression(vec0);
if (has_decoration(vec0, DecorationCPacked))
expr = unpack_expression_type(expr, expression_type(vec0));
expr += ".";
for (uint32_t i = 0; i < length; i++)
expr += index_to_swizzle(elems[i]);
if (backend.swizzle_is_function && length > 1)
expr += "()";
}
// A shuffle is trivial in that it doesn't actually *do* anything.
// We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed.
emit_op(result_type, id, expr, should_fwd, trivial_forward);
inherit_expression_dependencies(id, vec0);
inherit_expression_dependencies(id, vec1);
break;
}
// ALU
case OpIsNan:
UFOP(isnan);
break;
case OpIsInf:
UFOP(isinf);
break;
case OpSNegate:
case OpFNegate:
UOP(-);
break;
case OpIAdd:
{
// For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts.
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(+, type);
break;
}
case OpFAdd:
BOP(+);
break;
case OpISub:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(-, type);
break;
}
case OpFSub:
BOP(-);
break;
case OpIMul:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(*, type);
break;
}
case OpVectorTimesMatrix:
case OpMatrixTimesVector:
{
// If the matrix needs transpose, just flip the multiply order.
auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]);
if (e && e->need_transpose)
{
e->need_transpose = false;
emit_binary_op(ops[0], ops[1], ops[3], ops[2], "*");
e->need_transpose = true;
}
else
BOP(*);
break;
}
case OpFMul:
case OpMatrixTimesScalar:
case OpVectorTimesScalar:
case OpMatrixTimesMatrix:
BOP(*);
break;
case OpOuterProduct:
2016-07-27 08:59:00 +00:00
BFOP(outerProduct);
break;
case OpDot:
BFOP(dot);
break;
case OpTranspose:
UFOP(transpose);
break;
case OpSDiv:
BOP_CAST(/, SPIRType::Int);
break;
case OpUDiv:
BOP_CAST(/, SPIRType::UInt);
break;
case OpFDiv:
2016-05-23 11:30:02 +00:00
BOP(/);
break;
case OpShiftRightLogical:
BOP_CAST(>>, SPIRType::UInt);
break;
case OpShiftRightArithmetic:
BOP_CAST(>>, SPIRType::Int);
break;
case OpShiftLeftLogical:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(<<, type);
break;
}
case OpBitwiseOr:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(|, type);
break;
}
case OpBitwiseXor:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST (^, type);
break;
}
case OpBitwiseAnd:
{
auto type = get<SPIRType>(ops[0]).basetype;
BOP_CAST(&, type);
break;
}
case OpNot:
UOP(~);
break;
case OpUMod:
BOP_CAST(%, SPIRType::UInt);
break;
case OpSMod:
BOP_CAST(%, SPIRType::Int);
break;
case OpFMod:
BFOP(mod);
break;
2018-02-15 12:31:29 +00:00
case OpFRem:
{
if (is_legacy())
2018-02-15 12:32:49 +00:00
SPIRV_CROSS_THROW("OpFRem requires trunc() and is only supported on non-legacy targets. A workaround is "
"needed for legacy.");
2018-02-15 12:31:29 +00:00
uint32_t result_type = ops[0];
uint32_t result_id = ops[1];
uint32_t op0 = ops[2];
uint32_t op1 = ops[3];
// Needs special handling.
bool forward = should_forward(op0) && should_forward(op1);
2018-02-15 12:32:49 +00:00
auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(",
to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")");
2018-02-15 12:31:29 +00:00
emit_op(result_type, result_id, expr, forward);
inherit_expression_dependencies(result_id, op0);
inherit_expression_dependencies(result_id, op1);
break;
}
// Relational
case OpAny:
UFOP(any);
break;
case OpAll:
UFOP(all);
break;
case OpSelect:
emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]);
break;
case OpLogicalOr:
{
// No vector variant in GLSL for logical OR.
auto result_type = ops[0];
auto id = ops[1];
auto &type = get<SPIRType>(result_type);
if (type.vecsize > 1)
emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||");
else
BOP(||);
break;
}
case OpLogicalAnd:
{
// No vector variant in GLSL for logical AND.
auto result_type = ops[0];
auto id = ops[1];
auto &type = get<SPIRType>(result_type);
if (type.vecsize > 1)
emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&");
else
BOP(&&);
break;
}
case OpLogicalNot:
{
auto &type = get<SPIRType>(ops[0]);
if (type.vecsize > 1)
UFOP(not);
else
UOP(!);
break;
}
case OpIEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(equal, SPIRType::Int);
else
BOP_CAST(==, SPIRType::Int);
break;
}
case OpLogicalEqual:
case OpFOrdEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(equal);
else
2016-05-23 11:30:02 +00:00
BOP(==);
break;
}
case OpINotEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(notEqual, SPIRType::Int);
else
BOP_CAST(!=, SPIRType::Int);
break;
}
case OpLogicalNotEqual:
case OpFOrdNotEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(notEqual);
else
2016-05-23 11:30:02 +00:00
BOP(!=);
break;
}
case OpUGreaterThan:
case OpSGreaterThan:
{
auto type = opcode == OpUGreaterThan ? SPIRType::UInt : SPIRType::Int;
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(greaterThan, type);
else
BOP_CAST(>, type);
break;
}
case OpFOrdGreaterThan:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(greaterThan);
else
2016-05-23 11:30:02 +00:00
BOP(>);
break;
}
case OpUGreaterThanEqual:
case OpSGreaterThanEqual:
{
auto type = opcode == OpUGreaterThanEqual ? SPIRType::UInt : SPIRType::Int;
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(greaterThanEqual, type);
else
BOP_CAST(>=, type);
break;
}
case OpFOrdGreaterThanEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(greaterThanEqual);
else
2016-05-23 11:30:02 +00:00
BOP(>=);
break;
}
case OpULessThan:
case OpSLessThan:
{
auto type = opcode == OpULessThan ? SPIRType::UInt : SPIRType::Int;
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(lessThan, type);
else
BOP_CAST(<, type);
break;
}
case OpFOrdLessThan:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(lessThan);
else
2016-05-23 11:30:02 +00:00
BOP(<);
break;
}
case OpULessThanEqual:
case OpSLessThanEqual:
{
auto type = opcode == OpULessThanEqual ? SPIRType::UInt : SPIRType::Int;
if (expression_type(ops[2]).vecsize > 1)
BFOP_CAST(lessThanEqual, type);
else
BOP_CAST(<=, type);
break;
}
case OpFOrdLessThanEqual:
{
if (expression_type(ops[2]).vecsize > 1)
BFOP(lessThanEqual);
else
2016-05-23 11:30:02 +00:00
BOP(<=);
break;
}
// Conversion
case OpConvertFToU:
case OpConvertFToS:
case OpConvertSToF:
case OpConvertUToF:
case OpUConvert:
case OpSConvert:
case OpFConvert:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto func = type_to_glsl_constructor(get<SPIRType>(result_type));
emit_unary_func_op(result_type, id, ops[2], func.c_str());
break;
}
case OpBitcast:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t arg = ops[2];
auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg));
emit_unary_func_op(result_type, id, arg, op.c_str());
break;
}
2016-05-31 14:56:15 +00:00
case OpQuantizeToF16:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t arg = ops[2];
string op;
auto &type = get<SPIRType>(result_type);
switch (type.vecsize)
{
case 1:
op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x");
break;
case 2:
op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))");
break;
case 3:
{
auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x");
op = join("vec3(", op0, ", ", op1, ")");
break;
}
case 4:
{
auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))");
auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))");
op = join("vec4(", op0, ", ", op1, ")");
break;
}
default:
SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16.");
2016-05-31 14:56:15 +00:00
}
emit_op(result_type, id, op, should_forward(arg));
inherit_expression_dependencies(id, arg);
2016-05-31 14:56:15 +00:00
break;
}
// Derivatives
case OpDPdx:
UFOP(dFdx);
2016-09-17 12:33:16 +00:00
if (is_legacy_es())
require_extension_internal("GL_OES_standard_derivatives");
register_control_dependent_expression(ops[1]);
break;
case OpDPdy:
UFOP(dFdy);
2016-09-17 12:33:16 +00:00
if (is_legacy_es())
require_extension_internal("GL_OES_standard_derivatives");
register_control_dependent_expression(ops[1]);
break;
2017-03-24 12:59:19 +00:00
case OpDPdxFine:
UFOP(dFdxFine);
2017-03-24 14:58:54 +00:00
if (options.es)
{
SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
}
if (options.version < 450)
require_extension_internal("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
2017-03-24 12:59:19 +00:00
break;
case OpDPdyFine:
UFOP(dFdyFine);
2017-03-24 14:58:54 +00:00
if (options.es)
{
SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
}
if (options.version < 450)
require_extension_internal("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
2017-03-24 12:59:19 +00:00
break;
case OpDPdxCoarse:
2017-03-24 14:58:54 +00:00
if (options.es)
{
SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
}
2017-03-24 12:59:19 +00:00
UFOP(dFdxCoarse);
2017-03-24 14:58:54 +00:00
if (options.version < 450)
require_extension_internal("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
2017-03-24 12:59:19 +00:00
break;
case OpDPdyCoarse:
UFOP(dFdyCoarse);
2017-03-24 14:58:54 +00:00
if (options.es)
{
SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
}
if (options.version < 450)
require_extension_internal("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
2017-03-24 12:59:19 +00:00
break;
case OpFwidth:
UFOP(fwidth);
2016-09-17 12:33:16 +00:00
if (is_legacy_es())
require_extension_internal("GL_OES_standard_derivatives");
register_control_dependent_expression(ops[1]);
break;
2018-03-06 15:28:42 +00:00
case OpFwidthCoarse:
UFOP(fwidthCoarse);
if (options.es)
{
SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
}
if (options.version < 450)
require_extension_internal("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
2018-03-06 15:28:42 +00:00
break;
case OpFwidthFine:
UFOP(fwidthFine);
if (options.es)
{
SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES.");
}
if (options.version < 450)
require_extension_internal("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
2018-03-06 15:28:42 +00:00
break;
// Bitfield
case OpBitFieldInsert:
2017-03-25 14:38:20 +00:00
// TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
QFOP(bitfieldInsert);
break;
case OpBitFieldSExtract:
case OpBitFieldUExtract:
2017-03-25 14:38:20 +00:00
// TODO: The signedness of inputs is strict in GLSL, but not in SPIR-V, bitcast if necessary.
TFOP(bitfieldExtract);
break;
case OpBitReverse:
UFOP(bitfieldReverse);
break;
case OpBitCount:
UFOP(bitCount);
break;
// Atomics
case OpAtomicExchange:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t ptr = ops[2];
// Ignore semantics for now, probably only relevant to CL.
uint32_t val = ops[5];
const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
forced_temporaries.insert(id);
emit_binary_func_op(result_type, id, ptr, val, op);
flush_all_atomic_capable_variables();
break;
}
case OpAtomicCompareExchange:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t ptr = ops[2];
uint32_t val = ops[6];
uint32_t comp = ops[7];
const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap";
forced_temporaries.insert(id);
emit_trinary_func_op(result_type, id, ptr, comp, val, op);
flush_all_atomic_capable_variables();
break;
}
case OpAtomicLoad:
flush_all_atomic_capable_variables();
// FIXME: Image?
2018-01-09 11:51:21 +00:00
// OpAtomicLoad seems to only be relevant for atomic counters.
UFOP(atomicCounter);
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
2018-01-09 11:51:21 +00:00
case OpAtomicStore:
SPIRV_CROSS_THROW("Unsupported opcode OpAtomicStore.");
case OpAtomicIIncrement:
forced_temporaries.insert(ops[1]);
// FIXME: Image?
UFOP(atomicCounterIncrement);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
case OpAtomicIDecrement:
forced_temporaries.insert(ops[1]);
// FIXME: Image?
UFOP(atomicCounterDecrement);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
case OpAtomicIAdd:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicISub:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
forced_temporaries.insert(ops[1]);
auto expr = join(op, "(", to_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicSMin:
case OpAtomicUMin:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicSMax:
case OpAtomicUMax:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicAnd:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicOr:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
case OpAtomicXor:
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor";
forced_temporaries.insert(ops[1]);
emit_binary_func_op(ops[0], ops[1], ops[2], ops[5], op);
flush_all_atomic_capable_variables();
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
}
// Geometry shaders
case OpEmitVertex:
statement("EmitVertex();");
break;
case OpEndPrimitive:
statement("EndPrimitive();");
break;
case OpEmitStreamVertex:
statement("EmitStreamVertex();");
break;
case OpEndStreamPrimitive:
statement("EndStreamPrimitive();");
break;
// Textures
case OpImageSampleExplicitLod:
case OpImageSampleProjExplicitLod:
case OpImageSampleDrefExplicitLod:
case OpImageSampleProjDrefExplicitLod:
2016-09-17 12:33:16 +00:00
case OpImageSampleImplicitLod:
case OpImageSampleProjImplicitLod:
case OpImageSampleDrefImplicitLod:
case OpImageSampleProjDrefImplicitLod:
case OpImageFetch:
case OpImageGather:
case OpImageDrefGather:
// Gets a bit hairy, so move this to a separate instruction.
emit_texture_op(instruction);
break;
case OpImage:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
2018-04-27 09:10:10 +00:00
// Suppress usage tracking.
auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true);
2016-07-11 10:47:46 +00:00
// When using the image, we need to know which variable it is actually loaded from.
auto *var = maybe_get_backing_variable(ops[2]);
e.loaded_from = var ? var->self : 0;
break;
}
2016-07-19 07:22:54 +00:00
case OpImageQueryLod:
{
if (!options.es && options.version < 400)
{
require_extension_internal("GL_ARB_texture_query_lod");
2016-07-19 07:22:54 +00:00
// For some reason, the ARB spec is all-caps.
BFOP(textureQueryLOD);
}
else if (options.es)
SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
2016-07-19 07:22:54 +00:00
else
BFOP(textureQueryLod);
register_control_dependent_expression(ops[1]);
2016-07-19 07:22:54 +00:00
break;
}
2016-07-19 07:28:32 +00:00
case OpImageQueryLevels:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
2016-07-19 07:28:32 +00:00
if (!options.es && options.version < 430)
require_extension_internal("GL_ARB_texture_query_levels");
2016-07-19 07:28:32 +00:00
if (options.es)
SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile.");
auto expr = join("textureQueryLevels(", convert_separate_image_to_combined(ops[2]), ")");
auto &restype = get<SPIRType>(ops[0]);
expr = bitcast_expression(restype, SPIRType::Int, expr);
emit_op(result_type, id, expr, true);
2016-07-19 07:28:32 +00:00
break;
}
2016-07-11 10:47:46 +00:00
case OpImageQuerySamples:
{
auto &type = expression_type(ops[2]);
uint32_t result_type = ops[0];
uint32_t id = ops[1];
2016-07-11 10:47:46 +00:00
string expr;
if (type.image.sampled == 2)
expr = join("imageSamples(", to_expression(ops[2]), ")");
2016-07-11 10:47:46 +00:00
else
expr = join("textureSamples(", convert_separate_image_to_combined(ops[2]), ")");
auto &restype = get<SPIRType>(ops[0]);
expr = bitcast_expression(restype, SPIRType::Int, expr);
emit_op(result_type, id, expr, true);
break;
}
case OpSampledImage:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_sampled_image_op(result_type, id, ops[2], ops[3]);
break;
}
case OpImageQuerySizeLod:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
2018-04-30 10:46:21 +00:00
auto expr = join("textureSize(", convert_separate_image_to_combined(ops[2]), ", ",
bitcast_expression(SPIRType::Int, ops[3]), ")");
auto &restype = get<SPIRType>(ops[0]);
expr = bitcast_expression(restype, SPIRType::Int, expr);
emit_op(result_type, id, expr, true);
break;
}
// Image load/store
case OpImageRead:
{
// We added Nonreadable speculatively to the OpImage variable due to glslangValidator
// not adding the proper qualifiers.
// If it turns out we need to read the image after all, remove the qualifier and recompile.
auto *var = maybe_get_backing_variable(ops[2]);
if (var)
{
auto &flags = meta.at(var->self).decoration.decoration_flags;
if (flags.get(DecorationNonReadable))
{
flags.clear(DecorationNonReadable);
force_recompile = true;
}
}
uint32_t result_type = ops[0];
uint32_t id = ops[1];
bool pure;
string imgexpr;
auto &type = expression_type(ops[2]);
if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code
{
2016-07-11 11:36:11 +00:00
if (type.image.ms)
SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible.");
2016-07-11 11:36:11 +00:00
2016-05-23 11:30:02 +00:00
auto itr =
find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; });
if (itr == end(pls_inputs))
{
// For non-PLS inputs, we rely on subpass type remapping information to get it right
// since ImageRead always returns 4-component vectors and the backing type is opaque.
if (!var->remapped_components)
SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly.");
imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2]));
}
else
{
// PLS input could have different number of components than what the SPIR expects, swizzle to
// the appropriate vector size.
uint32_t components = pls_format_to_components(itr->format);
imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2]));
}
pure = true;
}
else if (type.image.dim == DimSubpassData)
{
if (options.vulkan_semantics)
{
// With Vulkan semantics, use the proper Vulkan GLSL construct.
2016-07-11 11:36:11 +00:00
if (type.image.ms)
{
uint32_t operands = ops[4];
if (operands != ImageOperandsSampleMask || length != 6)
SPIRV_CROSS_THROW(
2016-07-11 11:36:11 +00:00
"Multisampled image used in OpImageRead, but unexpected operand mask was used.");
uint32_t samples = ops[5];
imgexpr = join("subpassLoad(", to_expression(ops[2]), ", ", to_expression(samples), ")");
}
else
imgexpr = join("subpassLoad(", to_expression(ops[2]), ")");
}
else
{
2016-07-11 11:36:11 +00:00
if (type.image.ms)
{
uint32_t operands = ops[4];
if (operands != ImageOperandsSampleMask || length != 6)
SPIRV_CROSS_THROW(
2016-07-11 11:36:11 +00:00
"Multisampled image used in OpImageRead, but unexpected operand mask was used.");
uint32_t samples = ops[5];
imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ",
to_expression(samples), ")");
}
else
{
// Implement subpass loads via texture barrier style sampling.
imgexpr = join("texelFetch(", to_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)");
}
}
imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
pure = true;
}
else
{
// imageLoad only accepts int coords, not uint.
auto coord_expr = to_expression(ops[3]);
auto target_coord_type = expression_type(ops[3]);
target_coord_type.basetype = SPIRType::Int;
coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr);
// Plain image load/store.
if (type.image.ms)
{
uint32_t operands = ops[4];
if (operands != ImageOperandsSampleMask || length != 6)
SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected operand mask was used.");
uint32_t samples = ops[5];
imgexpr =
join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")");
}
else
imgexpr = join("imageLoad(", to_expression(ops[2]), ", ", coord_expr, ")");
imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr);
pure = false;
}
if (var && var->forwardable)
{
2017-11-22 10:28:58 +00:00
bool forward = forced_temporaries.find(id) == end(forced_temporaries);
auto &e = emit_op(result_type, id, imgexpr, forward);
// We only need to track dependencies if we're reading from image load/store.
if (!pure)
{
e.loaded_from = var->self;
2017-11-22 10:28:58 +00:00
if (forward)
var->dependees.push_back(id);
}
}
else
emit_op(result_type, id, imgexpr, false);
inherit_expression_dependencies(id, ops[2]);
if (type.image.ms)
inherit_expression_dependencies(id, ops[5]);
break;
}
case OpImageTexelPointer:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
auto &e = set<SPIRExpression>(id, join(to_expression(ops[2]), ", ", to_expression(ops[3])), result_type, true);
2016-07-11 10:47:46 +00:00
// When using the pointer, we need to know which variable it is actually loaded from.
auto *var = maybe_get_backing_variable(ops[2]);
e.loaded_from = var ? var->self : 0;
break;
}
case OpImageWrite:
{
// We added Nonwritable speculatively to the OpImage variable due to glslangValidator
// not adding the proper qualifiers.
// If it turns out we need to write to the image after all, remove the qualifier and recompile.
auto *var = maybe_get_backing_variable(ops[0]);
if (var)
{
auto &flags = meta.at(var->self).decoration.decoration_flags;
if (flags.get(DecorationNonWritable))
{
flags.clear(DecorationNonWritable);
force_recompile = true;
}
}
auto &type = expression_type(ops[0]);
auto &value_type = expression_type(ops[2]);
auto store_type = value_type;
store_type.vecsize = 4;
// imageStore only accepts int coords, not uint.
auto coord_expr = to_expression(ops[1]);
auto target_coord_type = expression_type(ops[1]);
target_coord_type.basetype = SPIRType::Int;
coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr);
if (type.image.ms)
{
uint32_t operands = ops[3];
if (operands != ImageOperandsSampleMask || length != 5)
SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used.");
uint32_t samples = ops[4];
statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ",
remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
}
else
statement("imageStore(", to_expression(ops[0]), ", ", coord_expr, ", ",
remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");");
if (var && variable_storage_is_aliased(*var))
flush_all_aliased_variables();
break;
}
case OpImageQuerySize:
{
auto &type = expression_type(ops[2]);
uint32_t result_type = ops[0];
uint32_t id = ops[1];
if (type.basetype == SPIRType::Image)
{
string expr;
if (type.image.sampled == 2)
{
// The size of an image is always constant.
expr = join("imageSize(", to_expression(ops[2]), ")");
}
else
{
// This path is hit for samplerBuffers and multisampled images which do not have LOD.
expr = join("textureSize(", convert_separate_image_to_combined(ops[2]), ")");
}
auto &restype = get<SPIRType>(ops[0]);
expr = bitcast_expression(restype, SPIRType::Int, expr);
emit_op(result_type, id, expr, true);
}
else
SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize.");
break;
}
// Compute
case OpControlBarrier:
2018-01-09 11:07:07 +00:00
case OpMemoryBarrier:
{
uint32_t execution_scope = 0;
2018-01-09 11:07:07 +00:00
uint32_t memory;
uint32_t semantics;
2017-10-20 14:18:02 +00:00
2018-01-09 11:07:07 +00:00
if (opcode == OpMemoryBarrier)
{
memory = get<SPIRConstant>(ops[0]).scalar();
semantics = get<SPIRConstant>(ops[1]).scalar();
}
else
{
execution_scope = get<SPIRConstant>(ops[0]).scalar();
2018-01-09 11:07:07 +00:00
memory = get<SPIRConstant>(ops[1]).scalar();
semantics = get<SPIRConstant>(ops[2]).scalar();
}
if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup)
{
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("Can only use subgroup operations in Vulkan semantics.");
require_extension_internal("GL_KHR_shader_subgroup_basic");
}
if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl)
{
// Control shaders only have barriers, and it implies memory barriers.
if (opcode == OpControlBarrier)
statement("barrier();");
break;
}
2018-01-09 11:07:07 +00:00
// We only care about these flags, acquire/release and friends are not relevant to GLSL.
semantics = mask_relevant_memory_semantics(semantics);
if (opcode == OpMemoryBarrier)
{
// If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier
// does what we need, so we avoid redundant barriers.
const Instruction *next = get_next_instruction_in_block(instruction);
if (next && next->op == OpControlBarrier)
{
auto *next_ops = stream(*next);
uint32_t next_memory = get<SPIRConstant>(next_ops[1]).scalar();
uint32_t next_semantics = get<SPIRConstant>(next_ops[2]).scalar();
next_semantics = mask_relevant_memory_semantics(next_semantics);
2018-01-09 11:17:38 +00:00
bool memory_scope_covered = false;
if (next_memory == memory)
memory_scope_covered = true;
else if (next_semantics == MemorySemanticsWorkgroupMemoryMask)
{
// If we only care about workgroup memory, either Device or Workgroup scope is fine,
// scope does not have to match.
if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) &&
(memory == ScopeDevice || memory == ScopeWorkgroup))
{
memory_scope_covered = true;
}
}
else if (memory == ScopeWorkgroup && next_memory == ScopeDevice)
{
// The control barrier has device scope, but the memory barrier just has workgroup scope.
memory_scope_covered = true;
}
2018-01-09 11:07:07 +00:00
// If we have the same memory scope, and all memory types are covered, we're good.
2018-01-09 11:17:38 +00:00
if (memory_scope_covered && (semantics & next_semantics) == semantics)
2018-01-09 11:07:07 +00:00
break;
}
}
2018-01-09 11:07:07 +00:00
// We are synchronizing some memory or syncing execution,
// so we cannot forward any loads beyond the memory barrier.
if (semantics || opcode == OpControlBarrier)
{
assert(current_emitting_block);
flush_control_dependent_expressions(current_emitting_block->self);
flush_all_active_variables();
}
2018-01-09 11:07:07 +00:00
if (memory == ScopeWorkgroup) // Only need to consider memory within a group
{
if (semantics == MemorySemanticsWorkgroupMemoryMask)
statement("memoryBarrierShared();");
else if (semantics != 0)
statement("groupMemoryBarrier();");
}
else if (memory == ScopeSubgroup)
{
2018-04-17 13:01:31 +00:00
const uint32_t all_barriers =
MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask;
if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
{
// These are not relevant for GLSL, but assume it means memoryBarrier().
// memoryBarrier() does everything, so no need to test anything else.
statement("subgroupMemoryBarrier();");
}
else if ((semantics & all_barriers) == all_barriers)
{
// Short-hand instead of emitting 3 barriers.
statement("subgroupMemoryBarrier();");
}
else
{
// Pick out individual barriers.
if (semantics & MemorySemanticsWorkgroupMemoryMask)
statement("subgroupMemoryBarrierShared();");
if (semantics & MemorySemanticsUniformMemoryMask)
statement("subgroupMemoryBarrierBuffer();");
if (semantics & MemorySemanticsImageMemoryMask)
statement("subgroupMemoryBarrierImage();");
}
}
2018-01-09 11:07:07 +00:00
else
{
2018-01-09 11:51:21 +00:00
const uint32_t all_barriers = MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
MemorySemanticsImageMemoryMask | MemorySemanticsAtomicCounterMemoryMask;
2018-01-09 11:07:07 +00:00
if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask))
{
// These are not relevant for GLSL, but assume it means memoryBarrier().
// memoryBarrier() does everything, so no need to test anything else.
statement("memoryBarrier();");
}
else if ((semantics & all_barriers) == all_barriers)
{
// Short-hand instead of emitting 4 barriers.
statement("memoryBarrier();");
}
else
{
// Pick out individual barriers.
if (semantics & MemorySemanticsWorkgroupMemoryMask)
statement("memoryBarrierShared();");
if (semantics & MemorySemanticsUniformMemoryMask)
statement("memoryBarrierBuffer();");
if (semantics & MemorySemanticsImageMemoryMask)
statement("memoryBarrierImage();");
if (semantics & MemorySemanticsAtomicCounterMemoryMask)
statement("memoryBarrierAtomicCounter();");
}
}
if (opcode == OpControlBarrier)
{
if (execution_scope == ScopeSubgroup)
statement("subgroupBarrier();");
else
statement("barrier();");
}
break;
}
case OpExtInst:
{
uint32_t extension_set = ops[2];
2017-11-21 13:04:57 +00:00
if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
{
emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
{
emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
{
emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
{
emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
{
2017-11-21 13:04:57 +00:00
emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
2017-11-22 18:27:03 +00:00
else
{
statement("// unimplemented ext op ", instruction.op);
break;
}
2017-11-21 13:04:57 +00:00
break;
}
// Legacy sub-group stuff ...
2017-11-21 13:04:57 +00:00
case OpSubgroupBallotKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
string expr;
expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
emit_op(result_type, id, expr, should_forward(ops[2]));
2017-11-21 13:04:57 +00:00
require_extension_internal("GL_ARB_shader_ballot");
inherit_expression_dependencies(id, ops[2]);
register_control_dependent_expression(ops[1]);
2017-11-21 13:04:57 +00:00
break;
}
case OpSubgroupFirstInvocationKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
require_extension_internal("GL_ARB_shader_ballot");
register_control_dependent_expression(ops[1]);
2017-11-21 13:04:57 +00:00
break;
}
case OpSubgroupReadInvocationKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
require_extension_internal("GL_ARB_shader_ballot");
register_control_dependent_expression(ops[1]);
2017-11-21 13:04:57 +00:00
break;
}
case OpSubgroupAllKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
require_extension_internal("GL_ARB_shader_group_vote");
register_control_dependent_expression(ops[1]);
2017-11-21 13:04:57 +00:00
break;
}
case OpSubgroupAnyKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
require_extension_internal("GL_ARB_shader_group_vote");
register_control_dependent_expression(ops[1]);
2017-11-21 13:04:57 +00:00
break;
}
case OpSubgroupAllEqualKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
require_extension_internal("GL_ARB_shader_group_vote");
register_control_dependent_expression(ops[1]);
2017-11-21 13:04:57 +00:00
break;
}
case OpGroupIAddNonUniformAMD:
case OpGroupFAddNonUniformAMD:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
require_extension_internal("GL_AMD_shader_ballot");
register_control_dependent_expression(ops[1]);
2017-11-21 13:04:57 +00:00
break;
}
case OpGroupFMinNonUniformAMD:
case OpGroupUMinNonUniformAMD:
case OpGroupSMinNonUniformAMD:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
require_extension_internal("GL_AMD_shader_ballot");
register_control_dependent_expression(ops[1]);
2017-11-21 13:04:57 +00:00
break;
}
case OpGroupFMaxNonUniformAMD:
case OpGroupUMaxNonUniformAMD:
case OpGroupSMaxNonUniformAMD:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
require_extension_internal("GL_AMD_shader_ballot");
register_control_dependent_expression(ops[1]);
2017-11-21 13:04:57 +00:00
break;
}
case OpFragmentMaskFetchAMD:
{
auto &type = expression_type(ops[2]);
uint32_t result_type = ops[0];
uint32_t id = ops[1];
if (type.image.dim == spv::DimSubpassData)
{
emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
}
2017-11-22 18:27:03 +00:00
else
2017-11-21 13:04:57 +00:00
{
emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
}
require_extension_internal("GL_AMD_shader_fragment_mask");
2017-11-21 13:04:57 +00:00
break;
}
case OpFragmentFetchAMD:
{
auto &type = expression_type(ops[2]);
uint32_t result_type = ops[0];
uint32_t id = ops[1];
if (type.image.dim == spv::DimSubpassData)
{
emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
}
2017-11-22 18:27:03 +00:00
else
2017-11-21 13:04:57 +00:00
{
emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
}
require_extension_internal("GL_AMD_shader_fragment_mask");
break;
}
// Vulkan 1.1 sub-group stuff ...
case OpGroupNonUniformElect:
case OpGroupNonUniformBroadcast:
case OpGroupNonUniformBroadcastFirst:
case OpGroupNonUniformBallot:
case OpGroupNonUniformInverseBallot:
case OpGroupNonUniformBallotBitExtract:
case OpGroupNonUniformBallotBitCount:
case OpGroupNonUniformBallotFindLSB:
case OpGroupNonUniformBallotFindMSB:
case OpGroupNonUniformShuffle:
case OpGroupNonUniformShuffleXor:
case OpGroupNonUniformShuffleUp:
case OpGroupNonUniformShuffleDown:
case OpGroupNonUniformAll:
case OpGroupNonUniformAny:
case OpGroupNonUniformAllEqual:
case OpGroupNonUniformFAdd:
2018-04-10 15:16:41 +00:00
case OpGroupNonUniformIAdd:
case OpGroupNonUniformFMul:
2018-04-10 15:16:41 +00:00
case OpGroupNonUniformIMul:
case OpGroupNonUniformFMin:
case OpGroupNonUniformFMax:
2018-04-10 15:16:41 +00:00
case OpGroupNonUniformSMin:
case OpGroupNonUniformSMax:
case OpGroupNonUniformUMin:
case OpGroupNonUniformUMax:
case OpGroupNonUniformBitwiseAnd:
case OpGroupNonUniformBitwiseOr:
case OpGroupNonUniformBitwiseXor:
case OpGroupNonUniformQuadSwap:
case OpGroupNonUniformQuadBroadcast:
emit_subgroup_op(instruction);
break;
default:
statement("// unimplemented op ", instruction.op);
break;
}
2016-03-02 17:09:16 +00:00
}
2016-10-19 21:09:51 +00:00
// Appends function arguments, mapped from global variables, beyond the specified arg index.
// This is used when a function call uses fewer arguments than the function defines.
2016-10-19 21:09:51 +00:00
// This situation may occur if the function signature has been dynamically modified to
// extract global variables referenced from within the function, and convert them to
// function arguments. This is necessary for shader languages that do not support global
// access to shader input content from within a function (eg. Metal). Each additional
// function args uses the name of the global variable. Function nesting will modify the
// functions and function calls all the way up the nesting chain.
2016-10-19 21:09:51 +00:00
void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, vector<string> &arglist)
{
2016-10-24 13:24:24 +00:00
auto &args = func.arguments;
uint32_t arg_cnt = uint32_t(args.size());
2016-10-24 13:24:24 +00:00
for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++)
{
auto &arg = args[arg_idx];
assert(arg.alias_global_variable);
arglist.push_back(to_func_call_arg(arg.id));
// If the underlying variable needs to be declared
// (ie. a local variable with deferred declaration), do so now.
uint32_t var_id = get<SPIRVariable>(arg.id).basevariable;
if (var_id)
flush_variable_declaration(var_id);
}
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index)
{
auto &memb = meta[type.self].members;
if (index < memb.size() && !memb[index].alias.empty())
return memb[index].alias;
else
return join("_m", index);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index)
{
auto &memb = meta[type.self].members;
if (index < memb.size() && !memb[index].alias.empty())
{
auto &name = memb[index].alias;
if (name.empty())
return;
// Reserved for temporaries.
if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
{
name.clear();
return;
}
update_name_cache(type.member_name_cache, name);
}
}
// Checks whether the ID is a row_major matrix that requires conversion before use
bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id)
{
// Natively supported row-major matrices do not need to be converted.
// Legacy targets do not support row major.
if (backend.native_row_major_matrix && !is_legacy())
return false;
// Non-matrix or column-major matrix types do not need to be converted.
if (!meta[id].decoration.decoration_flags.get(DecorationRowMajor))
return false;
// Only square row-major matrices can be converted at this time.
// Converting non-square matrices will require defining custom GLSL function that
// swaps matrix elements while retaining the original dimensional form of the matrix.
const auto type = expression_type(id);
if (type.columns != type.vecsize)
2016-12-15 19:46:10 +00:00
SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
return true;
}
// Checks whether the member is a row_major matrix that requires conversion before use
bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index)
{
// Natively supported row-major matrices do not need to be converted.
if (backend.native_row_major_matrix && !is_legacy())
return false;
// Non-matrix or column-major matrix types do not need to be converted.
if (!combined_decoration_for_member(type, index).get(DecorationRowMajor))
return false;
// Only square row-major matrices can be converted at this time.
// Converting non-square matrices will require defining custom GLSL function that
// swaps matrix elements while retaining the original dimensional form of the matrix.
const auto mbr_type = get<SPIRType>(type.member_types[index]);
if (mbr_type.columns != mbr_type.vecsize)
2016-12-15 19:46:10 +00:00
SPIRV_CROSS_THROW("Row-major matrices must be square on this platform.");
return true;
}
// Checks whether the member is in packed data type, that might need to be unpacked.
// GLSL does not define packed data types, but certain subclasses do.
bool CompilerGLSL::member_is_packed_type(const SPIRType &type, uint32_t index) const
{
return has_member_decoration(type.self, index, DecorationCPacked);
}
// Wraps the expression string in a function call that converts the
// row_major matrix result of the expression to a column_major matrix.
// Base implementation uses the standard library transpose() function.
// Subclasses may override to use a different function.
string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType & /*exp_type*/, bool /*is_packed*/)
{
strip_enclosed_expression(exp_str);
return join("transpose(", exp_str, ")");
}
string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id)
{
string type_name = type_to_glsl(type, id);
remap_variable_type_name(type, name, type_name);
2016-09-20 08:17:41 +00:00
return join(type_name, " ", name, type_to_array_glsl(type));
}
// Emit a structure member. Subclasses may override to modify output,
// or to dynamically add a padding member if needed.
void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index,
const string &qualifier, uint32_t)
2016-03-02 17:09:16 +00:00
{
auto &membertype = get<SPIRType>(member_type_id);
Bitset memberflags;
auto &memb = meta[type.self].members;
if (index < memb.size())
memberflags = memb[index].decoration_flags;
string qualifiers;
bool is_block = meta[type.self].decoration.decoration_flags.get(DecorationBlock) ||
meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
if (is_block)
qualifiers = to_interpolation_qualifiers(memberflags);
statement(layout_for_member(type, index), qualifiers, qualifier,
flags_to_precision_qualifiers_glsl(membertype, memberflags),
variable_decl(membertype, to_member_name(type, index)), ";");
2016-03-02 17:09:16 +00:00
}
const char *CompilerGLSL::flags_to_precision_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
2016-03-02 17:09:16 +00:00
{
// Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp).
if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt &&
type.basetype != SPIRType::Image && type.basetype != SPIRType::SampledImage &&
type.basetype != SPIRType::Sampler)
return "";
if (options.es)
{
auto &execution = get_entry_point();
if (flags.get(DecorationRelaxedPrecision))
{
bool implied_fmediump = type.basetype == SPIRType::Float &&
options.fragment.default_float_precision == Options::Mediump &&
execution.model == ExecutionModelFragment;
bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
options.fragment.default_int_precision == Options::Mediump &&
execution.model == ExecutionModelFragment;
return implied_fmediump || implied_imediump ? "" : "mediump ";
}
else
{
bool implied_fhighp =
type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp &&
execution.model == ExecutionModelFragment) ||
(execution.model != ExecutionModelFragment));
bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) &&
((options.fragment.default_int_precision == Options::Highp &&
execution.model == ExecutionModelFragment) ||
(execution.model != ExecutionModelFragment));
return implied_fhighp || implied_ihighp ? "" : "highp ";
}
}
else if (backend.allow_precision_qualifiers)
{
// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
// The default is highp however, so only emit mediump in the rare case that a shader has these.
if (flags.get(DecorationRelaxedPrecision))
return "mediump ";
else
return "";
}
else
return "";
2016-03-02 17:09:16 +00:00
}
const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
2016-03-02 17:09:16 +00:00
{
return flags_to_precision_qualifiers_glsl(expression_type(id), meta[id].decoration.decoration_flags);
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::to_qualifiers_glsl(uint32_t id)
{
auto flags = meta[id].decoration.decoration_flags;
string res;
auto *var = maybe_get<SPIRVariable>(id);
if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied)
res += "shared ";
res += to_interpolation_qualifiers(flags);
if (var)
res += to_storage_qualifiers_glsl(*var);
2017-08-29 13:52:59 +00:00
auto &type = expression_type(id);
if (type.image.dim != DimSubpassData && type.image.sampled == 2)
{
if (flags.get(DecorationCoherent))
2017-08-29 13:52:59 +00:00
res += "coherent ";
if (flags.get(DecorationRestrict))
2017-08-29 13:54:22 +00:00
res += "restrict ";
if (flags.get(DecorationNonWritable))
res += "readonly ";
if (flags.get(DecorationNonReadable))
res += "writeonly ";
}
2017-08-29 13:52:59 +00:00
res += to_precision_qualifiers_glsl(id);
return res;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
{
// glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ...
auto &type = expression_type(arg.id);
const char *direction = "";
if (type.pointer)
{
if (arg.write_count && arg.read_count)
direction = "inout ";
else if (arg.write_count)
direction = "out ";
}
return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id));
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::variable_decl(const SPIRVariable &variable)
{
// Ignore the pointer type since GLSL doesn't have pointers.
auto &type = get<SPIRType>(variable.basetype);
2017-06-17 08:56:24 +00:00
auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self));
if (variable.loop_variable && variable.static_expression)
{
uint32_t expr = variable.static_expression;
if (ids[expr].get_type() != TypeUndef)
res += join(" = ", to_expression(variable.static_expression));
}
else if (variable.initializer)
{
uint32_t expr = variable.initializer;
if (ids[expr].get_type() != TypeUndef)
res += join(" = ", to_expression(variable.initializer));
}
return res;
2016-03-02 17:09:16 +00:00
}
const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable)
2016-03-02 17:09:16 +00:00
{
auto flags = meta[variable.self].decoration.decoration_flags;
if (flags.get(DecorationRelaxedPrecision))
return "mediump ";
else
return "highp ";
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::pls_decl(const PlsRemap &var)
{
auto &variable = get<SPIRVariable>(var.id);
2016-03-02 17:09:16 +00:00
SPIRType type;
type.vecsize = pls_format_to_components(var.format);
type.basetype = pls_format_to_basetype(var.format);
2016-03-02 17:09:16 +00:00
return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ",
to_name(variable.self));
2016-03-02 17:09:16 +00:00
}
uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const
{
assert(type.array.size() == type.array_size_literal.size());
if (type.array_size_literal[index])
{
return type.array[index];
}
else
{
// Use the default spec constant value.
// This is the best we can do.
uint32_t array_size_id = type.array[index];
uint32_t array_size = get<SPIRConstant>(array_size_id).scalar();
return array_size;
}
}
string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
{
assert(type.array.size() == type.array_size_literal.size());
// Tessellation control shaders must have either gl_MaxPatchVertices or unsized arrays for input arrays.
// Opt for unsized as it's the more "correct" variant to use.
if (type.storage == StorageClassInput && get_entry_point().model == ExecutionModelTessellationControl)
return "";
auto &size = type.array[index];
if (!type.array_size_literal[index])
return to_expression(size);
else if (size)
return convert_to_string(size);
else if (!backend.flexible_member_array_supported)
{
// For runtime-sized arrays, we can work around
// lack of standard support for this by simply having
// a single element array.
//
// Runtime length arrays must always be the last element
// in an interface block.
return "1";
}
else
return "";
}
2016-03-02 17:09:16 +00:00
string CompilerGLSL::type_to_array_glsl(const SPIRType &type)
{
if (type.array.empty())
return "";
if (options.flatten_multidimensional_arrays)
{
string res;
res += "[";
for (auto i = uint32_t(type.array.size()); i; i--)
{
res += enclose_expression(to_array_size(type, i - 1));
if (i > 1)
res += " * ";
}
res += "]";
return res;
}
else
{
if (type.array.size() > 1)
{
if (!options.es && options.version < 430)
require_extension_internal("GL_ARB_arrays_of_arrays");
else if (options.es && options.version < 310)
SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. "
"Try using --flatten-multidimensional-arrays or set "
"options.flatten_multidimensional_arrays to true.");
}
string res;
for (auto i = uint32_t(type.array.size()); i; i--)
{
res += "[";
res += to_array_size(type, i - 1);
res += "]";
}
return res;
}
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t /* id */)
2016-03-02 17:09:16 +00:00
{
auto &imagetype = get<SPIRType>(type.image.type);
string res;
switch (imagetype.basetype)
{
case SPIRType::Int:
res = "i";
break;
case SPIRType::UInt:
res = "u";
break;
default:
break;
}
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics)
2016-07-11 11:36:11 +00:00
return res + "subpassInput" + (type.image.ms ? "MS" : "");
// If we're emulating subpassInput with samplers, force sampler2D
// so we don't have to specify format.
if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData)
{
// Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V.
if (type.image.dim == DimBuffer && type.image.sampled == 1)
res += "sampler";
else
res += type.image.sampled == 2 ? "image" : "texture";
}
else
res += "sampler";
switch (type.image.dim)
{
case Dim1D:
res += "1D";
break;
case Dim2D:
res += "2D";
break;
case Dim3D:
res += "3D";
break;
case DimCube:
res += "Cube";
break;
case DimBuffer:
if (options.es && options.version < 320)
require_extension_internal("GL_OES_texture_buffer");
else if (!options.es && options.version < 300)
require_extension_internal("GL_EXT_texture_buffer_object");
res += "Buffer";
break;
case DimSubpassData:
res += "2D";
break;
default:
SPIRV_CROSS_THROW("Only 1D, 2D, 3D, Buffer, InputTarget and Cube textures supported.");
}
2016-07-11 10:47:46 +00:00
if (type.image.ms)
res += "MS";
if (type.image.arrayed)
{
2016-09-17 13:56:23 +00:00
if (is_legacy_desktop())
require_extension_internal("GL_EXT_texture_array");
res += "Array";
}
// "Shadow" state in GLSL only exists for samplers and combined image samplers.
if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && type.image.depth)
res += "Shadow";
return res;
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type)
{
if (type.array.size() > 1)
{
if (options.flatten_multidimensional_arrays)
SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, e.g. float[][]().");
else if (!options.es && options.version < 430)
require_extension_internal("GL_ARB_arrays_of_arrays");
else if (options.es && options.version < 310)
SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310.");
}
auto e = type_to_glsl(type);
for (uint32_t i = 0; i < type.array.size(); i++)
e += "[]";
return e;
2016-03-02 17:09:16 +00:00
}
// The optional id parameter indicates the object whose type we are trying
// to find the description for. It is optional. Most type descriptions do not
// depend on a specific object's use of that type.
string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
2016-03-02 17:09:16 +00:00
{
// Ignore the pointer type since GLSL doesn't have pointers.
switch (type.basetype)
{
case SPIRType::Struct:
// Need OpName lookup here to get a "sensible" name for a struct.
if (backend.explicit_struct_type)
return join("struct ", to_name(type.self));
else
return to_name(type.self);
case SPIRType::Image:
case SPIRType::SampledImage:
return image_type_glsl(type, id);
case SPIRType::Sampler:
// The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing
// this distinction into the type system.
return comparison_samplers.count(id) ? "samplerShadow" : "sampler";
case SPIRType::Void:
return "void";
default:
break;
}
if (type.basetype == SPIRType::UInt && is_legacy())
SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets.");
if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
{
switch (type.basetype)
{
case SPIRType::Boolean:
return "bool";
case SPIRType::Int:
return backend.basic_int_type;
case SPIRType::UInt:
return backend.basic_uint_type;
case SPIRType::AtomicCounter:
return "atomic_uint";
case SPIRType::Half:
return "float16_t";
case SPIRType::Float:
return "float";
2016-07-27 08:59:00 +00:00
case SPIRType::Double:
return "double";
2016-07-27 09:27:00 +00:00
case SPIRType::Int64:
return "int64_t";
case SPIRType::UInt64:
return "uint64_t";
default:
return "???";
}
}
else if (type.vecsize > 1 && type.columns == 1) // Vector builtin
{
switch (type.basetype)
{
case SPIRType::Boolean:
return join("bvec", type.vecsize);
case SPIRType::Int:
return join("ivec", type.vecsize);
case SPIRType::UInt:
return join("uvec", type.vecsize);
case SPIRType::Half:
return join("f16vec", type.vecsize);
case SPIRType::Float:
return join("vec", type.vecsize);
2016-07-27 08:59:00 +00:00
case SPIRType::Double:
return join("dvec", type.vecsize);
2016-07-27 09:27:00 +00:00
case SPIRType::Int64:
return join("i64vec", type.vecsize);
case SPIRType::UInt64:
return join("u64vec", type.vecsize);
default:
return "???";
}
}
else if (type.vecsize == type.columns) // Simple Matrix builtin
{
switch (type.basetype)
{
case SPIRType::Boolean:
return join("bmat", type.vecsize);
case SPIRType::Int:
return join("imat", type.vecsize);
case SPIRType::UInt:
return join("umat", type.vecsize);
case SPIRType::Half:
return join("f16mat", type.vecsize);
case SPIRType::Float:
return join("mat", type.vecsize);
2016-07-27 08:59:00 +00:00
case SPIRType::Double:
return join("dmat", type.vecsize);
2016-07-27 09:27:00 +00:00
// Matrix types not supported for int64/uint64.
default:
return "???";
}
}
else
{
switch (type.basetype)
{
case SPIRType::Boolean:
return join("bmat", type.columns, "x", type.vecsize);
case SPIRType::Int:
return join("imat", type.columns, "x", type.vecsize);
case SPIRType::UInt:
return join("umat", type.columns, "x", type.vecsize);
case SPIRType::Half:
return join("f16mat", type.columns, "x", type.vecsize);
case SPIRType::Float:
return join("mat", type.columns, "x", type.vecsize);
2016-07-27 08:59:00 +00:00
case SPIRType::Double:
return join("dmat", type.columns, "x", type.vecsize);
2016-07-27 09:27:00 +00:00
// Matrix types not supported for int64/uint64.
default:
return "???";
}
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::add_variable(unordered_set<string> &variables, string &name)
2016-03-02 17:09:16 +00:00
{
if (name.empty())
return;
// Reserved for temporaries.
if (name[0] == '_' && name.size() >= 2 && isdigit(name[1]))
{
name.clear();
return;
}
update_name_cache(variables, name);
}
void CompilerGLSL::add_variable(unordered_set<string> &variables, uint32_t id)
{
auto &name = meta[id].decoration.alias;
add_variable(variables, name);
}
void CompilerGLSL::add_local_variable_name(uint32_t id)
{
add_variable(local_variable_names, id);
}
void CompilerGLSL::add_resource_name(uint32_t id)
{
add_variable(resource_names, id);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::add_header_line(const std::string &line)
{
header_lines.push_back(line);
}
2017-11-22 18:27:03 +00:00
bool CompilerGLSL::has_extension(const std::string &ext) const
{
auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
return itr != end(forced_extensions);
}
void CompilerGLSL::require_extension(const std::string &ext)
{
if (!has_extension(ext))
forced_extensions.push_back(ext);
}
void CompilerGLSL::require_extension_internal(const string &ext)
2016-03-02 17:09:16 +00:00
{
if (backend.supports_extensions && !has_extension(ext))
{
2017-11-22 18:27:03 +00:00
forced_extensions.push_back(ext);
force_recompile = true;
}
2016-03-02 17:09:16 +00:00
}
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
void CompilerGLSL::flatten_buffer_block(uint32_t id)
{
auto &var = get<SPIRVariable>(id);
auto &type = get<SPIRType>(var.basetype);
auto name = to_name(type.self, false);
auto flags = meta.at(type.self).decoration.decoration_flags;
if (!type.array.empty())
SPIRV_CROSS_THROW(name + " is an array of UBOs.");
if (type.basetype != SPIRType::Struct)
SPIRV_CROSS_THROW(name + " is not a struct.");
if (!flags.get(DecorationBlock))
Implement buffer block flattening Legacy GLSL targets do not support uniform buffers, and as such require some sort of emulation. There are two alternatives - one is to represent a uniform buffer as a uniform struct, and another one is to flatten it into an array of primitive vector types (vec4). Uniform struct have two disadvantages that make using them prohibitive in some applications: - The location assignment for struct members is arbitrary which means the application has to set each struct member one by one - Some Android drivers fail to link shader programs if both vertex and fragment shader use the same uniform struct Because of this, we need to support flattening uniform buffers into an array. This is not just important for legacy GLSL but also is sometimes useful for ESSL 3.0 where some Android drivers do not have stable UBO support. The way flattening works is the entire buffer is represented as a vec4 array; each access chain is rewritten into a combination of array accesses, swizzles and data type constructors. Specifically: - Extracting a vector or a scalar requires indexing into the array with an optional swizzle, for example CB0[13].yz for reading vec2 - Extracting a matrix or a struct requires extracting each individual vector or struct member and then combining them into the resulting object - Extracting arrays is not supported, mostly because the resulting construct is very inefficient and ESSL 1.0 does not support array constructors. Additionally, while we try to constant-fold each individual indexing operation, there are cases where we have to use dynamic index computation (specifically for indexing arrays with non-constants); so the general form of the primitive array extraction expression is: buffer[stride0*index0+...+strideN*indexN+offset] Where stride/offset are integer literals and index represents variables.
2016-12-07 05:02:15 +00:00
SPIRV_CROSS_THROW(name + " is not a block.");
if (type.member_types.empty())
SPIRV_CROSS_THROW(name + " is an empty struct.");
flattened_buffer_blocks.insert(id);
}
2016-03-02 17:09:16 +00:00
bool CompilerGLSL::check_atomic_image(uint32_t id)
{
auto &type = expression_type(id);
if (type.storage == StorageClassImage)
{
if (options.es && options.version < 320)
require_extension_internal("GL_OES_shader_image_atomic");
auto *var = maybe_get_backing_variable(id);
if (var)
{
auto &flags = meta.at(var->self).decoration.decoration_flags;
if (flags.get(DecorationNonWritable) || flags.get(DecorationNonReadable))
{
flags.clear(DecorationNonWritable);
flags.clear(DecorationNonReadable);
force_recompile = true;
}
}
return true;
}
else
return false;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::add_function_overload(const SPIRFunction &func)
{
Hasher hasher;
for (auto &arg : func.arguments)
{
// Parameters can vary with pointer type or not,
// but that will not change the signature in GLSL/HLSL,
// so strip the pointer type before hashing.
uint32_t type_id = arg.type;
auto *type = &get<SPIRType>(type_id);
while (type->pointer)
{
type_id = type->parent_type;
type = &get<SPIRType>(type_id);
}
if (!combined_image_samplers.empty())
{
// If we have combined image samplers, we cannot really trust the image and sampler arguments
// we pass down to callees, because they may be shuffled around.
// Ignore these arguments, to make sure that functions need to differ in some other way
// to be considered different overloads.
if (type->basetype == SPIRType::SampledImage ||
2018-05-15 09:16:35 +00:00
(type->basetype == SPIRType::Image && type->image.sampled == 1) || type->basetype == SPIRType::Sampler)
{
continue;
}
}
hasher.u32(type_id);
}
uint64_t types_hash = hasher.get();
auto function_name = to_name(func.self);
auto itr = function_overloads.find(function_name);
if (itr != end(function_overloads))
{
// There exists a function with this name already.
auto &overloads = itr->second;
if (overloads.count(types_hash) != 0)
{
// Overload conflict, assign a new name.
add_resource_name(func.self);
function_overloads[to_name(func.self)].insert(types_hash);
}
else
{
// Can reuse the name.
overloads.insert(types_hash);
}
}
else
{
// First time we see this function name.
add_resource_name(func.self);
function_overloads[to_name(func.self)].insert(types_hash);
}
}
void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags)
2016-03-02 17:09:16 +00:00
{
if (func.self != entry_point)
add_function_overload(func);
// Avoid shadow declarations.
local_variable_names = resource_names;
string decl;
auto &type = get<SPIRType>(func.return_type);
decl += flags_to_precision_qualifiers_glsl(type, return_flags);
decl += type_to_glsl(type);
decl += type_to_array_glsl(type);
decl += " ";
if (func.self == entry_point)
{
decl += "main";
processing_entry_point = true;
}
else
decl += to_name(func.self);
decl += "(";
2016-09-11 11:05:44 +00:00
vector<string> arglist;
for (auto &arg : func.arguments)
{
2016-09-11 11:05:44 +00:00
// Do not pass in separate images or samplers if we're remapping
// to combined image samplers.
if (skip_argument(arg.id))
continue;
// Might change the variable name if it already exists in this function.
// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
// to use same name for variables.
// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
add_local_variable_name(arg.id);
2016-09-11 11:05:44 +00:00
arglist.push_back(argument_decl(arg));
2016-09-11 10:54:08 +00:00
// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
auto *var = maybe_get<SPIRVariable>(arg.id);
if (var)
var->parameter = &arg;
}
for (auto &arg : func.shadow_arguments)
{
// Might change the variable name if it already exists in this function.
// SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation
// to use same name for variables.
// Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates.
add_local_variable_name(arg.id);
2016-09-11 11:05:44 +00:00
arglist.push_back(argument_decl(arg));
// Hold a pointer to the parameter so we can invalidate the readonly field if needed.
auto *var = maybe_get<SPIRVariable>(arg.id);
if (var)
var->parameter = &arg;
}
2016-09-11 11:05:44 +00:00
decl += merge(arglist);
decl += ")";
statement(decl);
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
2016-03-02 17:09:16 +00:00
{
// Avoid potential cycles.
if (func.active)
return;
func.active = true;
// If we depend on a function, emit that function before we emit our own function.
for (auto block : func.blocks)
{
auto &b = get<SPIRBlock>(block);
for (auto &i : b.ops)
{
auto ops = stream(i);
auto op = static_cast<Op>(i.op);
if (op == OpFunctionCall)
{
// Recursively emit functions which are called.
uint32_t id = ops[2];
emit_function(get<SPIRFunction>(id), meta[ops[1]].decoration.decoration_flags);
}
}
}
emit_function_prototype(func, return_flags);
begin_scope();
if (func.self == entry_point)
emit_entry_point_declarations();
current_function = &func;
auto &entry_block = get<SPIRBlock>(func.entry_block);
if (!func.analyzed_variable_scope)
{
analyze_variable_scope(func);
// Check if we can actually use the loop variables we found in analyze_variable_scope.
// To use multiple initializers, we need the same type and qualifiers.
for (auto block : func.blocks)
{
auto &b = get<SPIRBlock>(block);
if (b.loop_variables.size() < 2)
continue;
auto &flags = get_decoration_bitset(b.loop_variables.front());
uint32_t type = get<SPIRVariable>(b.loop_variables.front()).basetype;
bool invalid_initializers = false;
for (auto loop_variable : b.loop_variables)
{
if (flags != get_decoration_bitset(loop_variable) ||
type != get<SPIRVariable>(b.loop_variables.front()).basetype)
{
invalid_initializers = true;
break;
}
}
if (invalid_initializers)
{
for (auto loop_variable : b.loop_variables)
get<SPIRVariable>(loop_variable).loop_variable = false;
b.loop_variables.clear();
}
}
func.analyzed_variable_scope = true;
}
for (auto &v : func.local_variables)
{
auto &var = get<SPIRVariable>(v);
if (var.storage == StorageClassWorkgroup)
{
// Special variable type which cannot have initializer,
// need to be declared as standalone variables.
// Comes from MSL which can push global variables as local variables in main function.
add_local_variable_name(var.self);
statement(variable_decl(var), ";");
var.deferred_declaration = false;
}
else if (var.storage == StorageClassPrivate)
{
// These variables will not have had their CFG usage analyzed, so move it to the entry block.
// Comes from MSL which can push global variables as local variables in main function.
// We could just declare them right now, but we would miss out on an important initialization case which is
// LUT declaration in MSL.
// If we don't declare the variable when it is assigned we're forced to go through a helper function
// which copies elements one by one.
add_local_variable_name(var.self);
auto &dominated = entry_block.dominated_variables;
if (find(begin(dominated), end(dominated), var.self) == end(dominated))
entry_block.dominated_variables.push_back(var.self);
var.deferred_declaration = true;
}
else if (expression_is_lvalue(v))
{
add_local_variable_name(var.self);
if (var.initializer)
statement(variable_decl_function_local(var), ";");
else
{
// Don't declare variable until first use to declutter the GLSL output quite a lot.
// If we don't touch the variable before first branch,
// declare it then since we need variable declaration to be in top scope.
var.deferred_declaration = true;
}
}
else
{
// HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this.
// For these types (non-lvalue), we enforce forwarding through a shadowed variable.
// This means that when we OpStore to these variables, we just write in the expression ID directly.
// This breaks any kind of branching, since the variable must be statically assigned.
// Branching on samplers and images would be pretty much impossible to fake in GLSL.
var.statically_assigned = true;
}
var.loop_variable_enable = false;
// Loop variables are never declared outside their for-loop, so block any implicit declaration.
if (var.loop_variable)
var.deferred_declaration = false;
}
entry_block.loop_dominator = SPIRBlock::NoDominator;
emit_block_chain(entry_block);
end_scope();
processing_entry_point = false;
statement("");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::emit_fixup()
{
auto &execution = get_entry_point();
if (execution.model == ExecutionModelVertex)
{
if (options.vertex.fixup_clipspace)
{
const char *suffix = backend.float_literal_suffix ? "f" : "";
statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;");
}
if (options.vertex.flip_vert_y)
statement("gl_Position.y = -gl_Position.y;");
}
2016-03-02 17:09:16 +00:00
}
bool CompilerGLSL::flush_phi_required(uint32_t from, uint32_t to)
{
auto &child = get<SPIRBlock>(to);
for (auto &phi : child.phi_variables)
if (phi.parent == from)
return true;
return false;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::flush_phi(uint32_t from, uint32_t to)
{
auto &child = get<SPIRBlock>(to);
2016-03-02 17:09:16 +00:00
for (auto &phi : child.phi_variables)
{
if (phi.parent == from)
{
auto &var = get<SPIRVariable>(phi.function_variable);
// A Phi variable might be a loop variable, so flush to static expression.
if (var.loop_variable && !var.loop_variable_enable)
var.static_expression = phi.local_variable;
else
{
flush_variable_declaration(phi.function_variable);
// This might be called in continue block, so make sure we
2017-09-25 08:16:45 +00:00
// use this to emit ESSL 1.0 compliant increments/decrements.
auto lhs = to_expression(phi.function_variable);
auto rhs = to_expression(phi.local_variable);
if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs))
statement(lhs, " = ", rhs, ";");
}
register_write(phi.function_variable);
}
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
2016-03-02 17:09:16 +00:00
{
auto &to_block = get<SPIRBlock>(to);
if (from == to)
return;
assert(is_continue(to));
if (to_block.complex_continue)
{
// Just emit the whole block chain as is.
auto usage_counts = expression_usage_counts;
auto invalid = invalid_expressions;
emit_block_chain(to_block);
// Expression usage counts and invalid expressions
// are moot after returning from the continue block.
// Since we emit the same block multiple times,
// we don't want to invalidate ourselves.
expression_usage_counts = usage_counts;
invalid_expressions = invalid;
}
else
{
auto &from_block = get<SPIRBlock>(from);
bool outside_control_flow = false;
uint32_t loop_dominator = 0;
// FIXME: Refactor this to not use the old loop_dominator tracking.
if (from_block.merge_block)
{
// If we are a loop header, we don't set the loop dominator,
// so just use "self" here.
loop_dominator = from;
}
else if (from_block.loop_dominator != SPIRBlock::NoDominator)
{
loop_dominator = from_block.loop_dominator;
}
if (loop_dominator != 0)
{
auto &dominator = get<SPIRBlock>(loop_dominator);
// For non-complex continue blocks, we implicitly branch to the continue block
// by having the continue block be part of the loop header in for (; ; continue-block).
outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block);
}
// Some simplification for for-loops. We always end up with a useless continue;
// statement since we branch to a loop block.
// Walk the CFG, if we uncoditionally execute the block calling continue assuming we're in the loop block,
// we can avoid writing out an explicit continue statement.
// Similar optimization to return statements if we know we're outside flow control.
if (!outside_control_flow)
statement("continue;");
}
}
void CompilerGLSL::branch(uint32_t from, uint32_t to)
{
flush_phi(from, to);
flush_control_dependent_expressions(from);
flush_all_active_variables();
// This is only a continue if we branch to our loop dominator.
if (loop_blocks.find(to) != end(loop_blocks) && get<SPIRBlock>(from).loop_dominator == to)
{
// This can happen if we had a complex continue block which was emitted.
// Once the continue block tries to branch to the loop header, just emit continue;
// and end the chain here.
statement("continue;");
}
else if (is_break(to))
statement("break;");
else if (is_continue(to) || (from == to))
{
// For from == to case can happen for a do-while loop which branches into itself.
// We don't mark these cases as continue blocks, but the only possible way to branch into
// ourselves is through means of continue blocks.
branch_to_continue(from, to);
}
else if (!is_conditional(to))
emit_block_chain(get<SPIRBlock>(to));
// It is important that we check for break before continue.
// A block might serve two purposes, a break block for the inner scope, and
// a continue block in the outer scope.
// Inner scope always takes precedence.
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block)
{
// If we branch directly to a selection merge target, we don't really need a code path.
bool true_sub = !is_conditional(true_block);
bool false_sub = !is_conditional(false_block);
if (true_sub)
{
statement("if (", to_expression(cond), ")");
begin_scope();
branch(from, true_block);
end_scope();
if (false_sub || is_continue(false_block) || is_break(false_block))
{
statement("else");
begin_scope();
branch(from, false_block);
end_scope();
}
else if (flush_phi_required(from, false_block))
{
statement("else");
begin_scope();
flush_phi(from, false_block);
end_scope();
}
}
else if (false_sub && !true_sub)
{
// Only need false path, use negative conditional.
statement("if (!", to_enclosed_expression(cond), ")");
begin_scope();
branch(from, false_block);
end_scope();
if (is_continue(true_block) || is_break(true_block))
{
statement("else");
begin_scope();
branch(from, true_block);
end_scope();
}
else if (flush_phi_required(from, true_block))
{
statement("else");
begin_scope();
flush_phi(from, true_block);
end_scope();
}
}
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::propagate_loop_dominators(const SPIRBlock &block)
{
// Propagate down the loop dominator block, so that dominated blocks can back trace.
if (block.merge == SPIRBlock::MergeLoop || block.loop_dominator)
{
uint32_t dominator = block.merge == SPIRBlock::MergeLoop ? block.self : block.loop_dominator;
2016-05-23 11:30:02 +00:00
auto set_dominator = [this](uint32_t self, uint32_t new_dominator) {
auto &dominated_block = this->get<SPIRBlock>(self);
// If we already have a loop dominator, we're trying to break out to merge targets
// which should not update the loop dominator.
if (!dominated_block.loop_dominator)
dominated_block.loop_dominator = new_dominator;
};
// After merging a loop, we inherit the loop dominator always.
if (block.merge_block)
set_dominator(block.merge_block, block.loop_dominator);
if (block.true_block)
set_dominator(block.true_block, dominator);
if (block.false_block)
set_dominator(block.false_block, dominator);
if (block.next_block)
set_dominator(block.next_block, dominator);
for (auto &c : block.cases)
set_dominator(c.block, dominator);
// In older glslang output continue_block can be == loop header.
if (block.continue_block && block.continue_block != block.self)
set_dominator(block.continue_block, dominator);
}
2016-03-02 17:09:16 +00:00
}
// FIXME: This currently cannot handle complex continue blocks
// as in do-while.
// This should be seen as a "trivial" continue block.
string CompilerGLSL::emit_continue_block(uint32_t continue_block)
{
auto *block = &get<SPIRBlock>(continue_block);
// While emitting the continue block, declare_temporary will check this
// if we have to emit temporaries.
current_continue_block = block;
vector<string> statements;
// Capture all statements into our list.
auto *old = redirect_statement;
redirect_statement = &statements;
// Stamp out all blocks one after each other.
while (loop_blocks.find(block->self) == end(loop_blocks))
{
propagate_loop_dominators(*block);
// Write out all instructions we have in this block.
2017-10-20 14:18:02 +00:00
emit_block_instructions(*block);
// For plain branchless for/while continue blocks.
if (block->next_block)
{
flush_phi(continue_block, block->next_block);
block = &get<SPIRBlock>(block->next_block);
}
// For do while blocks. The last block will be a select block.
else if (block->true_block)
{
flush_phi(continue_block, block->true_block);
block = &get<SPIRBlock>(block->true_block);
}
}
// Restore old pointer.
redirect_statement = old;
// Somewhat ugly, strip off the last ';' since we use ',' instead.
// Ideally, we should select this behavior in statement().
for (auto &s : statements)
{
if (!s.empty() && s.back() == ';')
s.erase(s.size() - 1, 1);
}
current_continue_block = nullptr;
return merge(statements);
2016-03-02 17:09:16 +00:00
}
string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block)
{
if (block.loop_variables.empty())
return "";
bool same_types = for_loop_initializers_are_same_type(block);
// We can only declare for loop initializers if all variables are of same type.
// If we cannot do this, declare individual variables before the loop header.
// We might have a loop variable candidate which was not assigned to for some reason.
uint32_t missing_initializers = 0;
for (auto &variable : block.loop_variables)
{
uint32_t expr = get<SPIRVariable>(variable).static_expression;
// Sometimes loop variables are initialized with OpUndef, but we can just declare
// a plain variable without initializer in this case.
if (expr == 0 || ids[expr].get_type() == TypeUndef)
missing_initializers++;
}
if (block.loop_variables.size() == 1 && missing_initializers == 0)
{
return variable_decl(get<SPIRVariable>(block.loop_variables.front()));
}
else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size()))
{
for (auto &loop_var : block.loop_variables)
statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
return "";
}
else
{
// We have a mix of loop variables, either ones with a clear initializer, or ones without.
// Separate the two streams.
string expr;
for (auto &loop_var : block.loop_variables)
{
uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression;
if (static_expr == 0 || ids[static_expr].get_type() == TypeUndef)
{
statement(variable_decl(get<SPIRVariable>(loop_var)), ";");
}
else
{
if (expr.empty())
{
// For loop initializers are of the form <type id = value, id = value, id = value, etc ...
auto &var = get<SPIRVariable>(loop_var);
auto &type = get<SPIRType>(var.basetype);
expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " ");
}
else
expr += ", ";
auto &v = get<SPIRVariable>(loop_var);
expr += join(to_name(loop_var), " = ", to_expression(v.static_expression));
}
}
return expr;
}
}
bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block)
{
if (block.loop_variables.size() <= 1)
return true;
uint32_t expected = 0;
Bitset expected_flags;
for (auto &var : block.loop_variables)
{
// Don't care about uninitialized variables as they will not be part of the initializers.
uint32_t expr = get<SPIRVariable>(var).static_expression;
if (expr == 0 || ids[expr].get_type() == TypeUndef)
continue;
if (expected == 0)
{
expected = get<SPIRVariable>(var).basetype;
expected_flags = get_decoration_bitset(var);
}
else if (expected != get<SPIRVariable>(var).basetype)
return false;
// Precision flags and things like that must also match.
if (expected_flags != get_decoration_bitset(var))
return false;
}
return true;
}
2016-03-02 17:09:16 +00:00
bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method)
{
SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop)
{
uint32_t current_count = statement_count;
// If we're trying to create a true for loop,
// we need to make sure that all opcodes before branch statement do not actually emit any code.
// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
2017-10-20 14:18:02 +00:00
emit_block_instructions(block);
bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries);
// This can work! We only did trivial things which could be forwarded in block body!
if (current_count == statement_count && condition_is_temporary)
{
switch (continue_type)
{
case SPIRBlock::ForLoop:
{
// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
flush_undeclared_variables(block);
// Important that we do this in this order because
// emitting the continue block can invalidate the condition expression.
auto initializer = emit_for_loop_initializers(block);
auto condition = to_expression(block.condition);
if (method != SPIRBlock::MergeToSelectContinueForLoop)
{
auto continue_block = emit_continue_block(block.continue_block);
statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
}
else
statement("for (", initializer, "; ", condition, "; )");
break;
}
case SPIRBlock::WhileLoop:
// This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header.
flush_undeclared_variables(block);
statement("while (", to_expression(block.condition), ")");
break;
default:
SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
}
begin_scope();
return true;
}
else
{
block.disable_block_optimization = true;
force_recompile = true;
begin_scope(); // We'll see an end_scope() later.
return false;
}
}
else if (method == SPIRBlock::MergeToDirectForLoop)
{
auto &child = get<SPIRBlock>(block.next_block);
// This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header.
flush_undeclared_variables(child);
uint32_t current_count = statement_count;
// If we're trying to create a true for loop,
// we need to make sure that all opcodes before branch statement do not actually emit any code.
// We can then take the condition expression and create a for (; cond ; ) { body; } structure instead.
2017-10-20 14:18:02 +00:00
emit_block_instructions(child);
bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries);
if (current_count == statement_count && condition_is_temporary)
{
propagate_loop_dominators(child);
switch (continue_type)
{
case SPIRBlock::ForLoop:
{
// Important that we do this in this order because
// emitting the continue block can invalidate the condition expression.
auto initializer = emit_for_loop_initializers(block);
auto condition = to_expression(child.condition);
auto continue_block = emit_continue_block(block.continue_block);
statement("for (", initializer, "; ", condition, "; ", continue_block, ")");
break;
}
case SPIRBlock::WhileLoop:
statement("while (", to_expression(child.condition), ")");
break;
default:
SPIRV_CROSS_THROW("For/while loop detected, but need while/for loop semantics.");
}
begin_scope();
branch(child.self, child.true_block);
return true;
}
else
{
block.disable_block_optimization = true;
force_recompile = true;
begin_scope(); // We'll see an end_scope() later.
return false;
}
}
else
return false;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block)
2016-03-02 17:09:16 +00:00
{
// Enforce declaration order for regression testing purposes.
sort(begin(block.dominated_variables), end(block.dominated_variables));
for (auto &v : block.dominated_variables)
{
auto &var = get<SPIRVariable>(v);
if (var.deferred_declaration)
statement(variable_decl(var), ";");
var.deferred_declaration = false;
}
2016-03-02 17:09:16 +00:00
}
2018-03-24 03:16:18 +00:00
void CompilerGLSL::emit_hoisted_temporaries(vector<pair<uint32_t, uint32_t>> &temporaries)
2016-03-02 17:09:16 +00:00
{
// If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header.
// Need to sort these to ensure that reference output is stable.
2018-03-24 03:16:18 +00:00
sort(begin(temporaries), end(temporaries),
[](const pair<uint32_t, uint32_t> &a, const pair<uint32_t, uint32_t> &b) { return a.second < b.second; });
2018-03-24 03:16:18 +00:00
for (auto &tmp : temporaries)
{
add_local_variable_name(tmp.second);
auto flags = meta[tmp.second].decoration.decoration_flags;
auto &type = get<SPIRType>(tmp.first);
statement(flags_to_precision_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), ";");
2018-03-24 03:16:18 +00:00
hoisted_temporaries.insert(tmp.second);
forced_temporaries.insert(tmp.second);
// The temporary might be read from before it's assigned, set up the expression now.
set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true);
}
2018-03-24 03:16:18 +00:00
}
void CompilerGLSL::emit_block_chain(SPIRBlock &block)
{
propagate_loop_dominators(block);
bool select_branch_to_true_block = false;
bool skip_direct_branch = false;
bool emitted_for_loop_header = false;
bool force_complex_continue_block = false;
emit_hoisted_temporaries(block.declare_temporary);
SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone;
if (block.continue_block)
continue_type = continue_block_type(get<SPIRBlock>(block.continue_block));
2016-12-16 11:43:12 +00:00
// If we have loop variables, stop masking out access to the variable now.
for (auto var : block.loop_variables)
get<SPIRVariable>(var).loop_variable_enable = true;
// This is the method often used by spirv-opt to implement loops.
// The loop header goes straight into the continue block.
// However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block,
// it *MUST* be used in the continue block. This loop method will not work.
if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop))
{
flush_undeclared_variables(block);
if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop))
{
select_branch_to_true_block = true;
emitted_for_loop_header = true;
force_complex_continue_block = true;
}
}
// This is the older loop behavior in glslang which branches to loop body directly from the loop header.
else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop))
{
flush_undeclared_variables(block);
if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop))
{
// The body of while, is actually just the true block, so always branch there unconditionally.
select_branch_to_true_block = true;
emitted_for_loop_header = true;
}
}
// This is the newer loop behavior in glslang which branches from Loop header directly to
// a new block, which in turn has a OpBranchSelection without a selection merge.
else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop))
{
flush_undeclared_variables(block);
if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop))
{
skip_direct_branch = true;
emitted_for_loop_header = true;
}
}
else if (continue_type == SPIRBlock::DoWhileLoop)
{
flush_undeclared_variables(block);
2018-03-24 03:16:18 +00:00
// We have some temporaries where the loop header is the dominator.
// We risk a case where we have code like:
// for (;;) { create-temporary; break; } consume-temporary;
// so force-declare temporaries here.
emit_hoisted_temporaries(block.potential_declare_temporary);
statement("do");
begin_scope();
2017-10-20 14:18:02 +00:00
emit_block_instructions(block);
}
else if (block.merge == SPIRBlock::MergeLoop)
{
flush_undeclared_variables(block);
// We have a generic loop without any distinguishable pattern like for, while or do while.
get<SPIRBlock>(block.continue_block).complex_continue = true;
continue_type = SPIRBlock::ComplexLoop;
2018-03-24 03:16:18 +00:00
// We have some temporaries where the loop header is the dominator.
// We risk a case where we have code like:
// for (;;) { create-temporary; break; } consume-temporary;
// so force-declare temporaries here.
emit_hoisted_temporaries(block.potential_declare_temporary);
statement("for (;;)");
begin_scope();
2017-10-20 14:18:02 +00:00
emit_block_instructions(block);
}
else
{
2017-10-20 14:18:02 +00:00
emit_block_instructions(block);
}
// If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem
// as writes to said loop variables might have been masked out, we need a recompile.
if (!emitted_for_loop_header && !block.loop_variables.empty())
{
force_recompile = true;
for (auto var : block.loop_variables)
get<SPIRVariable>(var).loop_variable = false;
block.loop_variables.clear();
}
flush_undeclared_variables(block);
bool emit_next_block = true;
// Handle end of block.
switch (block.terminator)
{
case SPIRBlock::Direct:
// True when emitting complex continue block.
if (block.loop_dominator == block.next_block)
{
branch(block.self, block.next_block);
emit_next_block = false;
}
// True if MergeToDirectForLoop succeeded.
else if (skip_direct_branch)
emit_next_block = false;
else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block))
{
branch(block.self, block.next_block);
emit_next_block = false;
}
break;
case SPIRBlock::Select:
// True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded.
if (select_branch_to_true_block)
{
if (force_complex_continue_block)
{
assert(block.true_block == block.continue_block);
// We're going to emit a continue block directly here, so make sure it's marked as complex.
auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue;
bool old_complex = complex_continue;
complex_continue = true;
branch(block.self, block.true_block);
complex_continue = old_complex;
}
else
branch(block.self, block.true_block);
}
else
branch(block.self, block.condition, block.true_block, block.false_block);
break;
case SPIRBlock::MultiSelect:
{
auto &type = expression_type(block.condition);
bool uint32_t_case = type.basetype == SPIRType::UInt;
statement("switch (", to_expression(block.condition), ")");
begin_scope();
for (auto &c : block.cases)
{
auto case_value =
uint32_t_case ? convert_to_string(uint32_t(c.value)) : convert_to_string(int32_t(c.value));
statement("case ", case_value, ":");
begin_scope();
branch(block.self, c.block);
end_scope();
}
if (block.default_block != block.next_block)
{
statement("default:");
begin_scope();
if (is_break(block.default_block))
SPIRV_CROSS_THROW("Cannot break; out of a switch statement and out of a loop at the same time ...");
branch(block.self, block.default_block);
end_scope();
}
else if (flush_phi_required(block.self, block.next_block))
{
statement("default:");
begin_scope();
flush_phi(block.self, block.next_block);
statement("break;");
end_scope();
}
end_scope();
break;
}
case SPIRBlock::Return:
2018-03-13 13:03:35 +00:00
for (auto &line : current_function->fixup_statements)
statement(line);
if (processing_entry_point)
emit_fixup();
if (block.return_value)
{
auto &type = expression_type(block.return_value);
if (!type.array.empty() && !backend.can_return_array)
{
// If we cannot return arrays, we will have a special out argument we can write to instead.
// The backend is responsible for setting this up, and redirection the return values as appropriate.
if (ids.at(block.return_value).get_type() != TypeUndef)
emit_array_copy("SPIRV_Cross_return_value", block.return_value);
if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
block.loop_dominator != SPIRBlock::NoDominator)
{
statement("return;");
}
}
else
{
// OpReturnValue can return Undef, so don't emit anything for this case.
if (ids.at(block.return_value).get_type() != TypeUndef)
statement("return ", to_expression(block.return_value), ";");
}
}
// If this block is the very final block and not called from control flow,
// we do not need an explicit return which looks out of place. Just end the function here.
// In the very weird case of for(;;) { return; } executing return is unconditional,
// but we actually need a return here ...
else if (!block_is_outside_flow_control_from_block(get<SPIRBlock>(current_function->entry_block), block) ||
block.loop_dominator != SPIRBlock::NoDominator)
{
statement("return;");
}
break;
case SPIRBlock::Kill:
statement(backend.discard_literal, ";");
break;
case SPIRBlock::Unreachable:
emit_next_block = false;
break;
default:
SPIRV_CROSS_THROW("Unimplemented block terminator.");
}
if (block.next_block && emit_next_block)
{
// If we hit this case, we're dealing with an unconditional branch, which means we will output
// that block after this. If we had selection merge, we already flushed phi variables.
if (block.merge != SPIRBlock::MergeSelection)
flush_phi(block.self, block.next_block);
// For merge selects we might have ignored the fact that a merge target
// could have been a break; or continue;
// We will need to deal with it here.
if (is_loop_break(block.next_block))
{
// Cannot check for just break, because switch statements will also use break.
assert(block.merge == SPIRBlock::MergeSelection);
statement("break;");
}
else if (is_continue(block.next_block))
{
assert(block.merge == SPIRBlock::MergeSelection);
branch_to_continue(block.self, block.next_block);
}
else
emit_block_chain(get<SPIRBlock>(block.next_block));
}
if (block.merge == SPIRBlock::MergeLoop)
{
if (continue_type == SPIRBlock::DoWhileLoop)
{
// Make sure that we run the continue block to get the expressions set, but this
// should become an empty string.
// We have no fallbacks if we cannot forward everything to temporaries ...
auto statements = emit_continue_block(block.continue_block);
if (!statements.empty())
{
// The DoWhile block has side effects, force ComplexLoop pattern next pass.
get<SPIRBlock>(block.continue_block).complex_continue = true;
force_recompile = true;
}
end_scope_decl(join("while (", to_expression(get<SPIRBlock>(block.continue_block).condition), ")"));
}
else
end_scope();
// We cannot break out of two loops at once, so don't check for break; here.
// Using block.self as the "from" block isn't quite right, but it has the same scope
// and dominance structure, so it's fine.
if (is_continue(block.merge_block))
branch_to_continue(block.self, block.merge_block);
else
emit_block_chain(get<SPIRBlock>(block.merge_block));
}
// Forget about control dependent expressions now.
block.invalidate_expressions.clear();
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::begin_scope()
{
statement("{");
indent++;
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::end_scope()
{
if (!indent)
SPIRV_CROSS_THROW("Popping empty indent stack.");
indent--;
statement("}");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::end_scope_decl()
{
if (!indent)
SPIRV_CROSS_THROW("Popping empty indent stack.");
indent--;
statement("};");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::end_scope_decl(const string &decl)
{
if (!indent)
SPIRV_CROSS_THROW("Popping empty indent stack.");
indent--;
statement("} ", decl, ";");
2016-03-02 17:09:16 +00:00
}
void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length)
{
// If our variable is remapped, and we rely on type-remapping information as
// well, then we cannot pass the variable as a function parameter.
// Fixing this is non-trivial without stamping out variants of the same function,
// so for now warn about this and suggest workarounds instead.
for (uint32_t i = 0; i < length; i++)
{
auto *var = maybe_get<SPIRVariable>(args[i]);
if (!var || !var->remapped_variable)
continue;
auto &type = get<SPIRType>(var->basetype);
if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData)
{
SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. "
"This will not work correctly because type-remapping information is lost. "
"To workaround, please consider not passing the subpass input as a function parameter, "
"or use in/out variables instead which do not need type remapping information.");
}
}
}
2018-01-09 11:07:07 +00:00
const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr)
{
// FIXME: This is kind of hacky. There should be a cleaner way.
auto offset = uint32_t(&instr - current_emitting_block->ops.data());
if ((offset + 1) < current_emitting_block->ops.size())
return &current_emitting_block->ops[offset + 1];
else
return nullptr;
}
2018-01-09 11:41:13 +00:00
uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics)
{
2018-01-09 11:51:21 +00:00
return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask |
MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask |
MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask);
2018-01-09 11:41:13 +00:00
}
void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id)
{
statement(lhs, " = ", to_expression(rhs_id), ";");
}