Merge pull request #1668 from KhronosGroup/fix-1665

Implement NoContraction decoration
This commit is contained in:
Hans-Kristian Arntzen 2021-05-07 13:51:24 +02:00 committed by GitHub
commit 418542eaef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 504 additions and 26 deletions

View File

@ -0,0 +1,40 @@
static float4 gl_Position;
static float4 vInput0;
static float4 vInput1;
static float4 vInput2;
static float4 vColor;
struct SPIRV_Cross_Input
{
float4 vInput0 : TEXCOORD0;
float4 vInput1 : TEXCOORD1;
float4 vInput2 : TEXCOORD2;
};
struct SPIRV_Cross_Output
{
precise float4 vColor : TEXCOORD0;
precise float4 gl_Position : SV_Position;
};
void vert_main()
{
float4 _20 = vInput1 * vInput2;
float4 _21 = vInput0 + _20;
gl_Position = _21;
float4 _27 = vInput0 - vInput1;
float4 _29 = _27 * vInput2;
vColor = _29;
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
vInput0 = stage_input.vInput0;
vInput1 = stage_input.vInput1;
vInput2 = stage_input.vInput2;
vert_main();
SPIRV_Cross_Output stage_output;
stage_output.gl_Position = gl_Position;
stage_output.vColor = vColor;
return stage_output;
}

View File

@ -0,0 +1,39 @@
static float4 gl_Position;
static float4 vA;
static float4 vB;
static float4 vC;
struct SPIRV_Cross_Input
{
float4 vA : TEXCOORD0;
float4 vB : TEXCOORD1;
float4 vC : TEXCOORD2;
};
struct SPIRV_Cross_Output
{
float4 gl_Position : SV_Position;
};
void vert_main()
{
precise float4 _15 = vA * vB;
precise float4 _19 = vA + vB;
precise float4 _23 = vA - vB;
precise float4 _30 = _15 + vC;
precise float4 _34 = _15 + _19;
precise float4 _36 = _34 + _23;
precise float4 _38 = _36 + _30;
gl_Position = _38;
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
vA = stage_input.vA;
vB = stage_input.vB;
vC = stage_input.vC;
vert_main();
SPIRV_Cross_Output stage_output;
stage_output.gl_Position = gl_Position;
return stage_output;
}

View File

@ -0,0 +1,88 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 gl_Position [[position]];
};
struct main0_in
{
float4 vA [[attribute(0)]];
float4 vB [[attribute(1)]];
float4 vC [[attribute(2)]];
};
template<typename T>
T spvFMul(T l, T r)
{
return fma(l, r, T(0));
}
template<typename T, int Cols, int Rows>
vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)
{
vec<T, Cols> res = vec<T, Cols>(0);
for (uint i = Rows; i > 0; --i)
{
vec<T, Cols> tmp(0);
for (uint j = 0; j < Cols; ++j)
{
tmp[j] = m[j][i - 1];
}
res = fma(tmp, vec<T, Cols>(v[i - 1]), res);
}
return res;
}
template<typename T, int Cols, int Rows>
vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)
{
vec<T, Rows> res = vec<T, Rows>(0);
for (uint i = Cols; i > 0; --i)
{
res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);
}
return res;
}
template<typename T, int LCols, int LRows, int RCols, int RRows>
matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)
{
matrix<T, RCols, LRows> res;
for (uint i = 0; i < RCols; i++)
{
vec<T, RCols> tmp(0);
for (uint j = 0; j < LCols; j++)
{
tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);
}
res[i] = tmp;
}
return res;
}
template<typename T>
T spvFAdd(T l, T r)
{
return fma(T(1), l, r);
}
template<typename T>
T spvFSub(T l, T r)
{
return fma(T(-1), r, l);
}
vertex main0_out main0(main0_in in [[stage_in]])
{
main0_out out = {};
float4 _15 = spvFMul(in.vA, in.vB);
out.gl_Position = spvFAdd(spvFAdd(spvFAdd(_15, spvFAdd(in.vA, in.vB)), spvFSub(in.vA, in.vB)), spvFAdd(_15, in.vC));
return out;
}

View File

@ -0,0 +1,18 @@
#version 450
layout(location = 0) in vec4 vA;
layout(location = 1) in vec4 vB;
layout(location = 2) in vec4 vC;
void main()
{
precise vec4 _15 = vA * vB;
precise vec4 _19 = vA + vB;
precise vec4 _23 = vA - vB;
precise vec4 _30 = _15 + vC;
precise vec4 _34 = _15 + _19;
precise vec4 _36 = _34 + _23;
precise vec4 _38 = _36 + _30;
gl_Position = _38;
}

View File

@ -0,0 +1,40 @@
static float4 gl_Position;
static float4 vInput0;
static float4 vInput1;
static float4 vInput2;
static float4 vColor;
struct SPIRV_Cross_Input
{
float4 vInput0 : TEXCOORD0;
float4 vInput1 : TEXCOORD1;
float4 vInput2 : TEXCOORD2;
};
struct SPIRV_Cross_Output
{
precise float4 vColor : TEXCOORD0;
precise float4 gl_Position : SV_Position;
};
void vert_main()
{
float4 _20 = vInput1 * vInput2;
float4 _21 = vInput0 + _20;
gl_Position = _21;
float4 _27 = vInput0 - vInput1;
float4 _29 = _27 * vInput2;
vColor = _29;
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
vInput0 = stage_input.vInput0;
vInput1 = stage_input.vInput1;
vInput2 = stage_input.vInput2;
vert_main();
SPIRV_Cross_Output stage_output;
stage_output.gl_Position = gl_Position;
stage_output.vColor = vColor;
return stage_output;
}

View File

@ -0,0 +1,45 @@
static float4 gl_Position;
static float4 vA;
static float4 vB;
static float4 vC;
struct SPIRV_Cross_Input
{
float4 vA : TEXCOORD0;
float4 vB : TEXCOORD1;
float4 vC : TEXCOORD2;
};
struct SPIRV_Cross_Output
{
float4 gl_Position : SV_Position;
};
void vert_main()
{
precise float4 _15 = vA * vB;
float4 mul = _15;
precise float4 _19 = vA + vB;
float4 add = _19;
precise float4 _23 = vA - vB;
float4 sub = _23;
precise float4 _27 = vA * vB;
precise float4 _30 = _27 + vC;
float4 mad = _30;
precise float4 _34 = mul + add;
precise float4 _36 = _34 + sub;
precise float4 _38 = _36 + mad;
float4 summed = _38;
gl_Position = summed;
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
vA = stage_input.vA;
vB = stage_input.vB;
vC = stage_input.vC;
vert_main();
SPIRV_Cross_Output stage_output;
stage_output.gl_Position = gl_Position;
return stage_output;
}

View File

@ -0,0 +1,92 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_out
{
float4 gl_Position [[position]];
};
struct main0_in
{
float4 vA [[attribute(0)]];
float4 vB [[attribute(1)]];
float4 vC [[attribute(2)]];
};
template<typename T>
T spvFMul(T l, T r)
{
return fma(l, r, T(0));
}
template<typename T, int Cols, int Rows>
vec<T, Cols> spvFMulVectorMatrix(vec<T, Rows> v, matrix<T, Cols, Rows> m)
{
vec<T, Cols> res = vec<T, Cols>(0);
for (uint i = Rows; i > 0; --i)
{
vec<T, Cols> tmp(0);
for (uint j = 0; j < Cols; ++j)
{
tmp[j] = m[j][i - 1];
}
res = fma(tmp, vec<T, Cols>(v[i - 1]), res);
}
return res;
}
template<typename T, int Cols, int Rows>
vec<T, Rows> spvFMulMatrixVector(matrix<T, Cols, Rows> m, vec<T, Cols> v)
{
vec<T, Rows> res = vec<T, Rows>(0);
for (uint i = Cols; i > 0; --i)
{
res = fma(m[i - 1], vec<T, Rows>(v[i - 1]), res);
}
return res;
}
template<typename T, int LCols, int LRows, int RCols, int RRows>
matrix<T, RCols, LRows> spvFMulMatrixMatrix(matrix<T, LCols, LRows> l, matrix<T, RCols, RRows> r)
{
matrix<T, RCols, LRows> res;
for (uint i = 0; i < RCols; i++)
{
vec<T, RCols> tmp(0);
for (uint j = 0; j < LCols; j++)
{
tmp = fma(vec<T, RCols>(r[i][j]), l[j], tmp);
}
res[i] = tmp;
}
return res;
}
template<typename T>
T spvFAdd(T l, T r)
{
return fma(T(1), l, r);
}
template<typename T>
T spvFSub(T l, T r)
{
return fma(T(-1), r, l);
}
vertex main0_out main0(main0_in in [[stage_in]])
{
main0_out out = {};
float4 mul = spvFMul(in.vA, in.vB);
float4 add = spvFAdd(in.vA, in.vB);
float4 sub = spvFSub(in.vA, in.vB);
float4 mad = spvFAdd(spvFMul(in.vA, in.vB), in.vC);
float4 summed = spvFAdd(spvFAdd(spvFAdd(mul, add), sub), mad);
out.gl_Position = summed;
return out;
}

View File

@ -0,0 +1,24 @@
#version 450
layout(location = 0) in vec4 vA;
layout(location = 1) in vec4 vB;
layout(location = 2) in vec4 vC;
void main()
{
precise vec4 _15 = vA * vB;
vec4 mul = _15;
precise vec4 _19 = vA + vB;
vec4 add = _19;
precise vec4 _23 = vA - vB;
vec4 sub = _23;
precise vec4 _27 = vA * vB;
precise vec4 _30 = _27 + vC;
vec4 mad = _30;
precise vec4 _34 = mul + add;
precise vec4 _36 = _34 + sub;
precise vec4 _38 = _36 + mad;
vec4 summed = _38;
gl_Position = summed;
}

View File

@ -0,0 +1,13 @@
#version 310 es
invariant gl_Position;
layout(location = 0) invariant out vec4 vColor;
layout(location = 0) in vec4 vInput0;
layout(location = 1) in vec4 vInput1;
layout(location = 2) in vec4 vInput2;
void main()
{
gl_Position = vInput0 + vInput1 * vInput2;
vColor = (vInput0 - vInput1) * vInput2;
}

View File

@ -0,0 +1,15 @@
#version 450
layout(location = 0) in vec4 vA;
layout(location = 1) in vec4 vB;
layout(location = 2) in vec4 vC;
void main()
{
precise vec4 mul = vA * vB;
precise vec4 add = vA + vB;
precise vec4 sub = vA - vB;
precise vec4 mad = vA * vB + vC;
precise vec4 summed = mul + add + sub + mad;
gl_Position = summed;
}

View File

@ -0,0 +1,15 @@
#version 450
layout(location = 0) in vec4 vA;
layout(location = 1) in vec4 vB;
layout(location = 2) in vec4 vC;
void main()
{
precise vec4 mul = vA * vB;
precise vec4 add = vA + vB;
precise vec4 sub = vA - vB;
precise vec4 mad = vA * vB + vC;
precise vec4 summed = mul + add + sub + mad;
gl_Position = summed;
}

View File

@ -0,0 +1,15 @@
#version 450
layout(location = 0) in vec4 vA;
layout(location = 1) in vec4 vB;
layout(location = 2) in vec4 vC;
void main()
{
precise vec4 mul = vA * vB;
precise vec4 add = vA + vB;
precise vec4 sub = vA - vB;
precise vec4 mad = vA * vB + vC;
precise vec4 summed = mul + add + sub + mad;
gl_Position = summed;
}

View File

@ -559,18 +559,19 @@ string CompilerGLSL::compile()
{
ir.fixup_reserved_names();
if (options.vulkan_semantics)
backend.allow_precision_qualifiers = true;
else
if (!options.vulkan_semantics)
{
// only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers
backend.nonuniform_qualifier = "";
backend.needs_row_major_load_workaround = true;
}
backend.allow_precision_qualifiers = options.vulkan_semantics || options.es;
backend.force_gl_in_out_block = true;
backend.supports_extensions = true;
backend.use_array_constructor = true;
backend.support_precise_qualifier = (!options.es && options.version >= 400) || (options.es && options.version >= 320);
if (is_legacy_es())
backend.support_case_fallthrough = false;
@ -5545,7 +5546,12 @@ void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint3
void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op)
{
bool forward = should_forward(op0) && should_forward(op1);
// Various FP arithmetic opcodes such as add, sub, mul will hit this.
bool force_temporary_precise = backend.support_precise_qualifier &&
has_decoration(result_id, DecorationNoContraction) &&
type_is_floating_point(get<SPIRType>(result_type));
bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise;
emit_op(result_type, result_id,
join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward);
@ -12728,7 +12734,7 @@ void CompilerGLSL::emit_struct_padding_target(const SPIRType &)
{
}
const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags)
{
// GL_EXT_buffer_reference variables can be marked as restrict.
if (flags.get(DecorationRestrictPointerEXT))
@ -12740,6 +12746,11 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
type.basetype != SPIRType::Sampler)
return "";
string qual;
if (flags.get(DecorationNoContraction) && backend.support_precise_qualifier)
qual = "precise ";
if (options.es)
{
auto &execution = get_entry_point();
@ -12754,7 +12765,7 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
options.fragment.default_int_precision == Options::Mediump &&
execution.model == ExecutionModelFragment;
return implied_fmediump || implied_imediump ? "" : "mediump ";
qual += (implied_fmediump || implied_imediump) ? "" : "mediump ";
}
else
{
@ -12768,7 +12779,7 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
execution.model == ExecutionModelFragment) ||
(execution.model != ExecutionModelFragment));
return implied_fhighp || implied_ihighp ? "" : "highp ";
qual += (implied_fhighp || implied_ihighp) ? "" : "highp ";
}
}
else if (backend.allow_precision_qualifiers)
@ -12776,18 +12787,16 @@ const char *CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const B
// Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient.
// The default is highp however, so only emit mediump in the rare case that a shader has these.
if (flags.get(DecorationRelaxedPrecision))
return "mediump ";
else
return "";
qual += "mediump ";
}
else
return "";
return qual;
}
const char *CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id)
{
auto &type = expression_type(id);
bool use_precision_qualifiers = backend.allow_precision_qualifiers || options.es;
bool use_precision_qualifiers = backend.allow_precision_qualifiers;
if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage))
{
// Force mediump for the sampler type. We cannot declare 16-bit or smaller image types.

View File

@ -581,6 +581,7 @@ protected:
bool use_array_constructor = false;
bool needs_row_major_load_workaround = false;
bool support_pointer_to_pointer = false;
bool support_precise_qualifier = false;
} backend;
void emit_struct(SPIRType &type);
@ -734,9 +735,9 @@ protected:
virtual std::string to_qualifiers_glsl(uint32_t id);
void fixup_io_block_patch_qualifiers(const SPIRVariable &var);
void emit_output_variable_initializer(const SPIRVariable &var);
const char *to_precision_qualifiers_glsl(uint32_t id);
std::string to_precision_qualifiers_glsl(uint32_t id);
virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var);
const char *flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
std::string flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags);
const char *format_to_glsl(spv::ImageFormat format);
virtual std::string layout_for_member(const SPIRType &type, uint32_t index);
virtual std::string to_interpolation_qualifiers(const Bitset &flags);

View File

@ -570,7 +570,7 @@ void CompilerHLSL::emit_builtin_outputs_in_struct()
switch (builtin)
{
case BuiltInPosition:
type = "float4";
type = is_position_invariant() && backend.support_precise_qualifier ? "precise float4" : "float4";
semantic = legacy ? "POSITION" : "SV_Position";
break;
@ -818,8 +818,8 @@ string CompilerHLSL::to_interpolation_qualifiers(const Bitset &flags)
res += "patch "; // Seems to be different in actual HLSL.
if (flags.get(DecorationSample))
res += "sample ";
if (flags.get(DecorationInvariant))
res += "invariant "; // Not supported?
if (flags.get(DecorationInvariant) && backend.support_precise_qualifier)
res += "precise "; // Not supported?
return res;
}
@ -5731,6 +5731,9 @@ string CompilerHLSL::compile()
backend.nonuniform_qualifier = "NonUniformResourceIndex";
backend.support_case_fallthrough = false;
// SM 4.1 does not support precise for some reason.
backend.support_precise_qualifier = hlsl_options.shader_model >= 50 || hlsl_options.shader_model == 40;
fixup_type_alias();
reorder_type_alias();
build_function_control_flow_graphs_and_analyze();

View File

@ -4843,6 +4843,16 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
// "fsub" intrinsic support
case SPVFuncImplFSub:
statement("template<typename T>");
statement("T spvFSub(T l, T r)");
begin_scope();
statement("return fma(T(-1), r, l);");
end_scope();
statement("");
break;
// "fmul' intrinsic support
case SPVFuncImplFMul:
statement("template<typename T>");
@ -7579,19 +7589,26 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
break;
case OpFMul:
if (msl_options.invariant_float_math)
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
MSL_BFOP(spvFMul);
else
MSL_BOP(*);
break;
case OpFAdd:
if (msl_options.invariant_float_math)
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
MSL_BFOP(spvFAdd);
else
MSL_BOP(+);
break;
case OpFSub:
if (msl_options.invariant_float_math || has_decoration(ops[1], DecorationNoContraction))
MSL_BFOP(spvFSub);
else
MSL_BOP(-);
break;
// Atomics
case OpAtomicExchange:
{
@ -8033,7 +8050,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
case OpVectorTimesMatrix:
case OpMatrixTimesVector:
{
if (!msl_options.invariant_float_math)
if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
{
CompilerGLSL::emit_instruction(instruction);
break;
@ -8075,7 +8092,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
case OpMatrixTimesMatrix:
{
if (!msl_options.invariant_float_math)
if (!msl_options.invariant_float_math && !has_decoration(ops[1], DecorationNoContraction))
{
CompilerGLSL::emit_instruction(instruction);
break;
@ -14856,9 +14873,11 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
return SPVFuncImplMod;
case OpFAdd:
if (compiler.msl_options.invariant_float_math)
case OpFSub:
if (compiler.msl_options.invariant_float_math ||
compiler.has_decoration(args[1], DecorationNoContraction))
{
return SPVFuncImplFAdd;
return opcode == OpFAdd ? SPVFuncImplFAdd : SPVFuncImplFSub;
}
break;
@ -14867,7 +14886,8 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o
case OpMatrixTimesVector:
case OpVectorTimesMatrix:
case OpMatrixTimesMatrix:
if (compiler.msl_options.invariant_float_math)
if (compiler.msl_options.invariant_float_math ||
compiler.has_decoration(args[1], DecorationNoContraction))
{
return SPVFuncImplFMul;
}

View File

@ -655,6 +655,7 @@ protected:
SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations
SPVFuncImplFMul,
SPVFuncImplFAdd,
SPVFuncImplFSub,
SPVFuncImplCubemapTo2DArrayFace,
SPVFuncImplUnsafeArray, // Allow Metal to use the array<T> template to make arrays a value type
SPVFuncImplInverse4x4,