Honor NoContraction qualifier.
We'll need to force a temporary and mark it as precise. MSL is a little weird here, but we can piggyback on top of the invariant float math option here to force fma() operations everywhere.
This commit is contained in:
parent
0eeaffe048
commit
e47a30e807
39
reference/opt/shaders-hlsl/vert/no-contraction.vert
Normal file
39
reference/opt/shaders-hlsl/vert/no-contraction.vert
Normal file
@ -0,0 +1,39 @@
|
||||
static float4 gl_Position;
|
||||
static float4 vA;
|
||||
static float4 vB;
|
||||
static float4 vC;
|
||||
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
float4 vA : TEXCOORD0;
|
||||
float4 vB : TEXCOORD1;
|
||||
float4 vC : TEXCOORD2;
|
||||
};
|
||||
|
||||
struct SPIRV_Cross_Output
|
||||
{
|
||||
float4 gl_Position : SV_Position;
|
||||
};
|
||||
|
||||
void vert_main()
|
||||
{
|
||||
precise float4 _15 = vA * vB;
|
||||
precise float4 _19 = vA + vB;
|
||||
precise float4 _23 = vA - vB;
|
||||
precise float4 _30 = _15 + vC;
|
||||
precise float4 _34 = _15 + _19;
|
||||
precise float4 _36 = _34 + _23;
|
||||
precise float4 _38 = _36 + _30;
|
||||
gl_Position = _38;
|
||||
}
|
||||
|
||||
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
vA = stage_input.vA;
|
||||
vB = stage_input.vB;
|
||||
vC = stage_input.vC;
|
||||
vert_main();
|
||||
SPIRV_Cross_Output stage_output;
|
||||
stage_output.gl_Position = gl_Position;
|
||||
return stage_output;
|
||||
}
|
88
reference/opt/shaders-msl/vert/no-contraction.vert
Normal file
88
reference/opt/shaders-msl/vert/no-contraction.vert
Normal file
@ -0,0 +1,88 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position [[position]];
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 vA [[attribute(0)]];
|
||||
float4 vB [[attribute(1)]];
|
||||
float4 vC [[attribute(2)]];
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
T spvFMul(T l, T r)
|
||||
{
|
||||
return fma(l, r, T(0));
|
||||
}
|
||||
|
||||
template<typename T, int Cols, int Rows>
|
||||
vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)
|
||||
{
|
||||
vec<T, Cols> res = vec<T, Cols>(0);
|
||||
for (uint i = Rows; i > 0; --i)
|
||||
{
|
||||
vec<T, Cols> tmp(0);
|
||||
for (uint j = 0; j < Cols; ++j)
|
||||
{
|
||||
tmp[j] = m[j][i - 1];
|
||||
}
|
||||
res = fma(tmp, vec<T, Cols>(v[i - 1]), res);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename T, int Cols, int Rows>
|
||||
vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)
|
||||
{
|
||||
vec<T, Rows> res = vec<T, Rows>(0);
|
||||
for (uint i = Cols; i > 0; --i)
|
||||
{
|
||||
res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename T, int LCols, int LRows, int RCols, int RRows>
|
||||
matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)
|
||||
{
|
||||
matrix<T, RCols, LRows> res;
|
||||
for (uint i = 0; i < RCols; i++)
|
||||
{
|
||||
vec<T, RCols> tmp(0);
|
||||
for (uint j = 0; j < LCols; j++)
|
||||
{
|
||||
tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);
|
||||
}
|
||||
res[i] = tmp;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T spvFAdd(T l, T r)
|
||||
{
|
||||
return fma(T(1), l, r);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T spvFSub(T l, T r)
|
||||
{
|
||||
return fma(T(-1), r, l);
|
||||
}
|
||||
|
||||
vertex main0_out main0(main0_in in [[stage_in]])
|
||||
{
|
||||
main0_out out = {};
|
||||
float4 _15 = spvFMul(in.vA, in.vB);
|
||||
out.gl_Position = spvFAdd(spvFAdd(spvFAdd(_15, spvFAdd(in.vA, in.vB)), spvFSub(in.vA, in.vB)), spvFAdd(_15, in.vC));
|
||||
return out;
|
||||
}
|
||||
|
18
reference/opt/shaders/vert/no-contraction.vert
Normal file
18
reference/opt/shaders/vert/no-contraction.vert
Normal file
@ -0,0 +1,18 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) in vec4 vA;
|
||||
layout(location = 1) in vec4 vB;
|
||||
layout(location = 2) in vec4 vC;
|
||||
|
||||
void main()
|
||||
{
|
||||
precise vec4 _15 = vA * vB;
|
||||
precise vec4 _19 = vA + vB;
|
||||
precise vec4 _23 = vA - vB;
|
||||
precise vec4 _30 = _15 + vC;
|
||||
precise vec4 _34 = _15 + _19;
|
||||
precise vec4 _36 = _34 + _23;
|
||||
precise vec4 _38 = _36 + _30;
|
||||
gl_Position = _38;
|
||||
}
|
||||
|
45
reference/shaders-hlsl/vert/no-contraction.vert
Normal file
45
reference/shaders-hlsl/vert/no-contraction.vert
Normal file
@ -0,0 +1,45 @@
|
||||
static float4 gl_Position;
|
||||
static float4 vA;
|
||||
static float4 vB;
|
||||
static float4 vC;
|
||||
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
float4 vA : TEXCOORD0;
|
||||
float4 vB : TEXCOORD1;
|
||||
float4 vC : TEXCOORD2;
|
||||
};
|
||||
|
||||
struct SPIRV_Cross_Output
|
||||
{
|
||||
float4 gl_Position : SV_Position;
|
||||
};
|
||||
|
||||
void vert_main()
|
||||
{
|
||||
precise float4 _15 = vA * vB;
|
||||
float4 mul = _15;
|
||||
precise float4 _19 = vA + vB;
|
||||
float4 add = _19;
|
||||
precise float4 _23 = vA - vB;
|
||||
float4 sub = _23;
|
||||
precise float4 _27 = vA * vB;
|
||||
precise float4 _30 = _27 + vC;
|
||||
float4 mad = _30;
|
||||
precise float4 _34 = mul + add;
|
||||
precise float4 _36 = _34 + sub;
|
||||
precise float4 _38 = _36 + mad;
|
||||
float4 summed = _38;
|
||||
gl_Position = summed;
|
||||
}
|
||||
|
||||
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
vA = stage_input.vA;
|
||||
vB = stage_input.vB;
|
||||
vC = stage_input.vC;
|
||||
vert_main();
|
||||
SPIRV_Cross_Output stage_output;
|
||||
stage_output.gl_Position = gl_Position;
|
||||
return stage_output;
|
||||
}
|
92
reference/shaders-msl/vert/no-contraction.vert
Normal file
92
reference/shaders-msl/vert/no-contraction.vert
Normal file
@ -0,0 +1,92 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 gl_Position [[position]];
|
||||
};
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 vA [[attribute(0)]];
|
||||
float4 vB [[attribute(1)]];
|
||||
float4 vC [[attribute(2)]];
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
T spvFMul(T l, T r)
|
||||
{
|
||||
return fma(l, r, T(0));
|
||||
}
|
||||
|
||||
template<typename T, int Cols, int Rows>
|
||||
vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)
|
||||
{
|
||||
vec<T, Cols> res = vec<T, Cols>(0);
|
||||
for (uint i = Rows; i > 0; --i)
|
||||
{
|
||||
vec<T, Cols> tmp(0);
|
||||
for (uint j = 0; j < Cols; ++j)
|
||||
{
|
||||
tmp[j] = m[j][i - 1];
|
||||
}
|
||||
res = fma(tmp, vec<T, Cols>(v[i - 1]), res);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename T, int Cols, int Rows>
|
||||
vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)
|
||||
{
|
||||
vec<T, Rows> res = vec<T, Rows>(0);
|
||||
for (uint i = Cols; i > 0; --i)
|
||||
{
|
||||
res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename T, int LCols, int LRows, int RCols, int RRows>
|
||||
matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)
|
||||
{
|
||||
matrix<T, RCols, LRows> res;
|
||||
for (uint i = 0; i < RCols; i++)
|
||||
{
|
||||
vec<T, RCols> tmp(0);
|
||||
for (uint j = 0; j < LCols; j++)
|
||||
{
|
||||
tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);
|
||||
}
|
||||
res[i] = tmp;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T spvFAdd(T l, T r)
|
||||
{
|
||||
return fma(T(1), l, r);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T spvFSub(T l, T r)
|
||||
{
|
||||
return fma(T(-1), r, l);
|
||||
}
|
||||
|
||||
vertex main0_out main0(main0_in in [[stage_in]])
|
||||
{
|
||||
main0_out out = {};
|
||||
float4 mul = spvFMul(in.vA, in.vB);
|
||||
float4 add = spvFAdd(in.vA, in.vB);
|
||||
float4 sub = spvFSub(in.vA, in.vB);
|
||||
float4 mad = spvFAdd(spvFMul(in.vA, in.vB), in.vC);
|
||||
float4 summed = spvFAdd(spvFAdd(spvFAdd(mul, add), sub), mad);
|
||||
out.gl_Position = summed;
|
||||
return out;
|
||||
}
|
||||
|
24
reference/shaders/vert/no-contraction.vert
Normal file
24
reference/shaders/vert/no-contraction.vert
Normal file
@ -0,0 +1,24 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) in vec4 vA;
|
||||
layout(location = 1) in vec4 vB;
|
||||
layout(location = 2) in vec4 vC;
|
||||
|
||||
void main()
|
||||
{
|
||||
precise vec4 _15 = vA * vB;
|
||||
vec4 mul = _15;
|
||||
precise vec4 _19 = vA + vB;
|
||||
vec4 add = _19;
|
||||
precise vec4 _23 = vA - vB;
|
||||
vec4 sub = _23;
|
||||
precise vec4 _27 = vA * vB;
|
||||
precise vec4 _30 = _27 + vC;
|
||||
vec4 mad = _30;
|
||||
precise vec4 _34 = mul + add;
|
||||
precise vec4 _36 = _34 + sub;
|
||||
precise vec4 _38 = _36 + mad;
|
||||
vec4 summed = _38;
|
||||
gl_Position = summed;
|
||||
}
|
||||
|
15
shaders-hlsl/vert/no-contraction.vert
Normal file
15
shaders-hlsl/vert/no-contraction.vert
Normal file
@ -0,0 +1,15 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) in vec4 vA;
|
||||
layout(location = 1) in vec4 vB;
|
||||
layout(location = 2) in vec4 vC;
|
||||
|
||||
void main()
|
||||
{
|
||||
precise vec4 mul = vA * vB;
|
||||
precise vec4 add = vA + vB;
|
||||
precise vec4 sub = vA - vB;
|
||||
precise vec4 mad = vA * vB + vC;
|
||||
precise vec4 summed = mul + add + sub + mad;
|
||||
gl_Position = summed;
|
||||
}
|
15
shaders-msl/vert/no-contraction.vert
Normal file
15
shaders-msl/vert/no-contraction.vert
Normal file
@ -0,0 +1,15 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) in vec4 vA;
|
||||
layout(location = 1) in vec4 vB;
|
||||
layout(location = 2) in vec4 vC;
|
||||
|
||||
void main()
|
||||
{
|
||||
precise vec4 mul = vA * vB;
|
||||
precise vec4 add = vA + vB;
|
||||
precise vec4 sub = vA - vB;
|
||||
precise vec4 mad = vA * vB + vC;
|
||||
precise vec4 summed = mul + add + sub + mad;
|
||||
gl_Position = summed;
|
||||
}
|
15
shaders/vert/no-contraction.vert
Normal file
15
shaders/vert/no-contraction.vert
Normal file
@ -0,0 +1,15 @@
|
||||
#version 450
|
||||
|
||||
layout(location = 0) in vec4 vA;
|
||||
layout(location = 1) in vec4 vB;
|
||||
layout(location = 2) in vec4 vC;
|
||||
|
||||
void main()
|
||||
{
|
||||
precise vec4 mul = vA * vB;
|
||||
precise vec4 add = vA + vB;
|
||||
precise vec4 sub = vA - vB;
|
||||
precise vec4 mad = vA * vB + vC;
|
||||
precise vec4 summed = mul + add + sub + mad;
|
||||
gl_Position = summed;
|
||||
}
|
@ -559,18 +559,19 @@ string CompilerGLSL::compile()
|
||||
{
|
||||
ir.fixup_reserved_names();
|
||||
|
||||
if (options.vulkan_semantics)
|
||||
backend.allow_precision_qualifiers = true;
|
||||
else
|
||||
if (!options.vulkan_semantics)
|
||||
{
|
||||
// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
|
||||
backend.nonuniform_qualifier = "";
|
||||
backend.needs_row_major_load_workaround = true;
|
||||
}
|
||||
backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
|
||||
backend.force_gl_in_out_block = true;
|
||||
backend.supports_extensions = true;
|
||||
backend.use_array_constructor = true;
|
||||
|
||||
backend.support_precise_qualifier = (!options.es && options.version >= 400) || (options.es && options.version >= 320);
|
||||
|
||||
if (is_legacy_es())
|
||||
backend.support_case_fallthrough = false;
|
||||
|
||||
@ -5545,7 +5546,12 @@ void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint3
|
||||
|
||||
void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
|
||||
{
|
||||
bool forward = should_forward(op0) && should_forward(op1);
|
||||
// Various FP arithmetic opcodes such as add, sub, mul will hit this.
|
||||
bool force_temporary_precise = backend.support_precise_qualifier &&
|
||||
has_decoration(result_id, DecorationNoContraction) &&
|
||||
type_is_floating_point(get<SPIRType>(result_type));
|
||||
bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
|
||||
|
||||
emit_op(result_type, result_id,
|
||||
join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
|
||||
|
||||
@ -12728,7 +12734,7 @@ void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
|
||||
{
|
||||
}
|
||||
|
||||
const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
|
||||
string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
|
||||
{
|
||||
// GL_EXT_buffer_reference variables can be marked as restrict.
|
||||
if (flags.get(DecorationRestrictPointerEXT))
|
||||
@ -12740,6 +12746,11 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
|
||||
type.basetype != SPIRType::Sampler)
|
||||
return "";
|
||||
|
||||
string qual;
|
||||
|
||||
if (flags.get(DecorationNoContraction) && backend.support_precise_qualifier)
|
||||
qual = "precise ";
|
||||
|
||||
if (options.es)
|
||||
{
|
||||
auto &execution = get_entry_point();
|
||||
@ -12754,7 +12765,7 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
|
||||
options.fragment.default_int_precision == Options::Mediump &&
|
||||
execution.model == ExecutionModelFragment;
|
||||
|
||||
return implied_fmediump || implied_imediump ? "" : "mediump ";
|
||||
qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -12768,7 +12779,7 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
|
||||
execution.model == ExecutionModelFragment) ||
|
||||
(execution.model != ExecutionModelFragment));
|
||||
|
||||
return implied_fhighp || implied_ihighp ? "" : "highp ";
|
||||
qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
|
||||
}
|
||||
}
|
||||
else if (backend.allow_precision_qualifiers)
|
||||
@ -12776,18 +12787,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
|
||||
// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
|
||||
// The default is highp however, so only emit mediump in the rare case that a shader has these.
|
||||
if (flags.get(DecorationRelaxedPrecision))
|
||||
return "mediump ";
|
||||
else
|
||||
return "";
|
||||
qual += "mediump ";
|
||||
}
|
||||
else
|
||||
return "";
|
||||
|
||||
return qual;
|
||||
}
|
||||
|
||||
const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
|
||||
string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
|
||||
{
|
||||
auto &type = expression_type(id);
|
||||
bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
|
||||
bool use_precision_qualifiers = backend.allow_precision_qualifiers;
|
||||
if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
|
||||
{
|
||||
// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.
|
||||
|
@ -581,6 +581,7 @@ protected:
|
||||
bool use_array_constructor = false;
|
||||
bool needs_row_major_load_workaround = false;
|
||||
bool support_pointer_to_pointer = false;
|
||||
bool support_precise_qualifier = false;
|
||||
} backend;
|
||||
|
||||
void emit_struct(SPIRType &type);
|
||||
@ -734,9 +735,9 @@ protected:
|
||||
virtual std::string to_qualifiers_glsl(uint32_t id);
|
||||
void fixup_io_block_patch_qualifiers(const SPIRVariable &var);
|
||||
void emit_output_variable_initializer(const SPIRVariable &var);
|
||||
const char *to_precision_qualifiers_glsl(uint32_t id);
|
||||
std::string to_precision_qualifiers_glsl(uint32_t id);
|
||||
virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
|
||||
const char *flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
|
||||
std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
|
||||
const char *format_to_glsl(spv::ImageFormat format);
|
||||
virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
|
||||
virtual std::string to_interpolation_qualifiers(const Bitset &flags);
|
||||
|
@ -5731,6 +5731,9 @@ string CompilerHLSL::compile()
|
||||
backend.nonuniform_qualifier = "NonUniformResourceIndex";
|
||||
backend.support_case_fallthrough = false;
|
||||
|
||||
// SM 4.1 does not support precise for some reason.
|
||||
backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40;
|
||||
|
||||
fixup_type_alias();
|
||||
reorder_type_alias();
|
||||
build_function_control_flow_graphs_and_analyze();
|
||||
|
@ -4843,6 +4843,16 @@ void CompilerMSL::emit_custom_functions()
|
||||
statement("");
|
||||
break;
|
||||
|
||||
// "fsub" intrinsic support
|
||||
case SPVFuncImplFSub:
|
||||
statement("template<typename T>");
|
||||
statement("T spvFSub(T l, T r)");
|
||||
begin_scope();
|
||||
statement("return fma(T(-1), r, l);");
|
||||
end_scope();
|
||||
statement("");
|
||||
break;
|
||||
|
||||
// "fmul' intrinsic support
|
||||
case SPVFuncImplFMul:
|
||||
statement("template<typename T>");
|
||||
@ -7579,19 +7589,26 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
break;
|
||||
|
||||
case OpFMul:
|
||||
if (msl_options.invariant_float_math)
|
||||
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
|
||||
MSL_BFOP(spvFMul);
|
||||
else
|
||||
MSL_BOP(*);
|
||||
break;
|
||||
|
||||
case OpFAdd:
|
||||
if (msl_options.invariant_float_math)
|
||||
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
|
||||
MSL_BFOP(spvFAdd);
|
||||
else
|
||||
MSL_BOP(+);
|
||||
break;
|
||||
|
||||
case OpFSub:
|
||||
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
|
||||
MSL_BFOP(spvFSub);
|
||||
else
|
||||
MSL_BOP(-);
|
||||
break;
|
||||
|
||||
// Atomics
|
||||
case OpAtomicExchange:
|
||||
{
|
||||
@ -8033,7 +8050,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
case OpVectorTimesMatrix:
|
||||
case OpMatrixTimesVector:
|
||||
{
|
||||
if (!msl_options.invariant_float_math)
|
||||
if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
|
||||
{
|
||||
CompilerGLSL::emit_instruction(instruction);
|
||||
break;
|
||||
@ -8075,7 +8092,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
||||
|
||||
case OpMatrixTimesMatrix:
|
||||
{
|
||||
if (!msl_options.invariant_float_math)
|
||||
if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
|
||||
{
|
||||
CompilerGLSL::emit_instruction(instruction);
|
||||
break;
|
||||
@ -14856,9 +14873,11 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
|
||||
return SPVFuncImplMod;
|
||||
|
||||
case OpFAdd:
|
||||
if (compiler.msl_options.invariant_float_math)
|
||||
case OpFSub:
|
||||
if (compiler.msl_options.invariant_float_math ||
|
||||
compiler.has_decoration(args[1], DecorationNoContraction))
|
||||
{
|
||||
return SPVFuncImplFAdd;
|
||||
return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub;
|
||||
}
|
||||
break;
|
||||
|
||||
@ -14867,7 +14886,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
|
||||
case OpMatrixTimesVector:
|
||||
case OpVectorTimesMatrix:
|
||||
case OpMatrixTimesMatrix:
|
||||
if (compiler.msl_options.invariant_float_math)
|
||||
if (compiler.msl_options.invariant_float_math ||
|
||||
compiler.has_decoration(args[1], DecorationNoContraction))
|
||||
{
|
||||
return SPVFuncImplFMul;
|
||||
}
|
||||
|
@ -655,6 +655,7 @@ protected:
|
||||
SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations
|
||||
SPVFuncImplFMul,
|
||||
SPVFuncImplFAdd,
|
||||
SPVFuncImplFSub,
|
||||
SPVFuncImplCubemapTo2DArrayFace,
|
||||
SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type
|
||||
SPVFuncImplInverse4x4,
|
||||
|
Loading…
Reference in New Issue
Block a user