MSL: Reintroduce workaround for constant arrays being passed by value.

This commit is contained in:
Hans-Kristian Arntzen 2020-02-24 13:22:52 +01:00
parent 20b28f72fa
commit 30343f3e95
5 changed files with 193 additions and 5 deletions

View File

@ -0,0 +1,103 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
constant float4 _68[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
struct main0_out
{
float4 gl_Position [[position]];
};
struct main0_in
{
int Index1 [[attribute(0)]];
int Index2 [[attribute(1)]];
};
template<typename T, uint A>
inline void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
template<typename T, uint A>
inline void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A])
{
for (uint i = 0; i < A; i++)
{
dst[i] = src[i];
}
}
static inline __attribute__((always_inline))
float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
{
float4 indexable[4];
spvArrayCopyFromStackToStack1(indexable, positions);
float4 indexable_1[4];
spvArrayCopyFromStackToStack1(indexable_1, positions2);
return indexable[Index1] + indexable_1[Index2];
}
static inline __attribute__((always_inline))
float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
{
return consume_constant_arrays2(positions, positions2, Index1, Index2);
}
vertex main0_out main0(main0_in in [[stage_in]])
{
float4 _68_array_copy[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
main0_out out = {};
float4 LUT2[4];
LUT2[0] = float4(10.0);
LUT2[1] = float4(11.0);
LUT2[2] = float4(12.0);
LUT2[3] = float4(13.0);
out.gl_Position = consume_constant_arrays(_68_array_copy, LUT2, in.Index1, in.Index2);
return out;
}

View File

@ -0,0 +1,26 @@
#version 310 es
layout(location = 0) in int Index1;
layout(location = 1) in int Index2;
vec4 consume_constant_arrays2(const vec4 positions[4], const vec4 positions2[4])
{
return positions[Index1] + positions2[Index2];
}
vec4 consume_constant_arrays(const vec4 positions[4], const vec4 positions2[4])
{
return consume_constant_arrays2(positions, positions2);
}
const vec4 LUT1[] = vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0));
void main()
{
vec4 LUT2[4];
LUT2[0] = vec4(10.0);
LUT2[1] = vec4(11.0);
LUT2[2] = vec4(12.0);
LUT2[3] = vec4(13.0);
gl_Position = consume_constant_arrays(LUT1, LUT2);
}

View File

@ -939,6 +939,11 @@ struct SPIRFunction : IVariant
// Intentionally not a small vector, this one is rare, and std::function can be large.
Vector<std::function<void()>> fixup_hooks_in;
// On function entry, make sure to copy a constant array into thread addr space to work around
// the case where we are passing a constant array by value to a function on backends which do not
// consider arrays value types.
SmallVector<ID> constant_arrays_needed_on_stack;
bool active = false;
bool flush_undeclared = true;
bool do_combined_parameters = true;

View File

@ -11751,6 +11751,14 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
current_function = &func;
auto &entry_block = get<SPIRBlock>(func.entry_block);
sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
for (auto &array : func.constant_arrays_needed_on_stack)
{
auto &c = get<SPIRConstant>(array);
auto &type = get<SPIRType>(c.constant_type);
statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
}
for (auto &v : func.local_variables)
{
auto &var = get<SPIRVariable>(v);

View File

@ -8205,7 +8205,29 @@ string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_
if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler)
arg_str = join("spvDynamicImageSampler<", type_to_glsl(get<SPIRType>(type.image.type)), ">(");
arg_str += CompilerGLSL::to_func_call_arg(arg, id);
auto *c = maybe_get<SPIRConstant>(id);
if (msl_options.force_native_arrays && c && !get<SPIRType>(c->constant_type).array.empty())
{
// If we are passing a constant array directly to a function for some reason,
// the callee will expect an argument in thread const address space
// (since we can only bind to arrays with references in MSL).
// To resolve this, we must emit a copy in this address space.
// This kind of code gen should be rare enough that performance is not a real concern.
// Inline the SPIR-V to avoid this kind of suboptimal codegen.
//
// We risk calling this inside a continue block (invalid code),
// so just create a thread local copy in the current function.
arg_str = join("_", id, "_array_copy");
auto &constants = current_function->constant_arrays_needed_on_stack;
auto itr = find(begin(constants), end(constants), ID(id));
if (itr == end(constants))
{
force_recompile();
constants.push_back(id);
}
}
else
arg_str += CompilerGLSL::to_func_call_arg(arg, id);
// Need to check the base variable in case we need to apply a qualified alias.
uint32_t var_id = 0;
@ -10029,10 +10051,34 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
(storage == StorageClassFunction || storage == StorageClassGeneric))
{
// If the argument is a pure value and not an opaque type, we will pass by value.
if (!address_space.empty())
decl = join(address_space, " ", decl);
decl += " ";
decl += to_expression(name_id);
if (msl_options.force_native_arrays && is_array(type))
{
// We are receiving an array by value. This is problematic.
// We cannot be sure of the target address space since we are supposed to receive a copy,
// but this is not possible with MSL without some extra work.
// We will have to assume we're getting a reference in thread address space.
// If we happen to get a reference in constant address space, the caller must emit a copy and pass that.
// Thread const therefore becomes the only logical choice, since we cannot "create" a constant array from
// non-constant arrays, but we can create thread const from constant.
decl = string("thread const ") + decl;
decl += " (&";
const char *restrict_kw = to_restrict(name_id);
if (*restrict_kw)
{
decl += " ";
decl += restrict_kw;
}
decl += to_expression(name_id);
decl += ")";
decl += type_to_array_glsl(type);
}
else
{
if (!address_space.empty())
decl = join(address_space, " ", decl);
decl += " ";
decl += to_expression(name_id);
}
}
else if (is_array(type) && !type_is_image)
{