Merge pull request #823 from KhronosGroup/fix-818

MSL: Fix case where we pass arrays to functions by value.
This commit is contained in:
Hans-Kristian Arntzen 2019-01-14 14:24:24 +01:00 committed by GitHub
commit b4faf13c86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 271 additions and 3 deletions

View File

@ -0,0 +1,48 @@
static const float4 _68[4] = { 0.0f.xxxx, 1.0f.xxxx, 2.0f.xxxx, 3.0f.xxxx };
static float4 gl_Position;
static int Index1;
static int Index2;
struct SPIRV_Cross_Input
{
int Index1 : TEXCOORD0;
int Index2 : TEXCOORD1;
};
struct SPIRV_Cross_Output
{
float4 gl_Position : SV_Position;
};
float4 consume_constant_arrays2(float4 positions[4], float4 positions2[4])
{
float4 indexable[4] = positions;
float4 indexable_1[4] = positions2;
return indexable[Index1] + indexable_1[Index2];
}
float4 consume_constant_arrays(float4 positions[4], float4 positions2[4])
{
return consume_constant_arrays2(positions, positions2);
}
void vert_main()
{
float4 LUT2[4];
LUT2[0] = 10.0f.xxxx;
LUT2[1] = 11.0f.xxxx;
LUT2[2] = 12.0f.xxxx;
LUT2[3] = 13.0f.xxxx;
gl_Position = consume_constant_arrays(_68, LUT2);
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
Index1 = stage_input.Index1;
Index2 = stage_input.Index2;
vert_main();
SPIRV_Cross_Output stage_output;
stage_output.gl_Position = gl_Position;
return stage_output;
}

View File

@ -0,0 +1,60 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
constant float4 _68[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
struct main0_out
{
float4 gl_Position [[position]];
};
struct main0_in
{
int Index1 [[attribute(0)]];
int Index2 [[attribute(1)]];
};
// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment.
template<typename T, uint N>
void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])
{
for (uint i = 0; i < N; dst[i] = src[i], i++);
}
template<typename T, uint N>
void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])
{
for (uint i = 0; i < N; dst[i] = src[i], i++);
}
float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
{
float4 indexable[4];
spvArrayCopyFromStack1(indexable, positions);
float4 indexable_1[4];
spvArrayCopyFromStack1(indexable_1, positions2);
return indexable[Index1] + indexable_1[Index2];
}
float4 consume_constant_arrays(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2)
{
return consume_constant_arrays2(positions, positions2, Index1, Index2);
}
vertex main0_out main0(main0_in in [[stage_in]])
{
float4 _68_array_copy[4] = { float4(0.0), float4(1.0), float4(2.0), float4(3.0) };
main0_out out = {};
float4 LUT2[4];
LUT2[0] = float4(10.0);
LUT2[1] = float4(11.0);
LUT2[2] = float4(12.0);
LUT2[3] = float4(13.0);
out.gl_Position = consume_constant_arrays(_68_array_copy, LUT2, in.Index1, in.Index2);
return out;
}

View File

@ -0,0 +1,27 @@
#version 310 es
layout(location = 0) in int Index1;
layout(location = 1) in int Index2;
vec4 consume_constant_arrays2(vec4 positions[4], vec4 positions2[4])
{
vec4 indexable[4] = positions;
vec4 indexable_1[4] = positions2;
return indexable[Index1] + indexable_1[Index2];
}
vec4 consume_constant_arrays(vec4 positions[4], vec4 positions2[4])
{
return consume_constant_arrays2(positions, positions2);
}
void main()
{
vec4 LUT2[4];
LUT2[0] = vec4(10.0);
LUT2[1] = vec4(11.0);
LUT2[2] = vec4(12.0);
LUT2[3] = vec4(13.0);
gl_Position = consume_constant_arrays(vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0)), LUT2);
}

View File

@ -0,0 +1,26 @@
#version 310 es
layout(location = 0) in int Index1;
layout(location = 1) in int Index2;
vec4 consume_constant_arrays2(const vec4 positions[4], const vec4 positions2[4])
{
return positions[Index1] + positions2[Index2];
}
vec4 consume_constant_arrays(const vec4 positions[4], const vec4 positions2[4])
{
return consume_constant_arrays2(positions, positions2);
}
const vec4 LUT1[] = vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0));
void main()
{
vec4 LUT2[4];
LUT2[0] = vec4(10.0);
LUT2[1] = vec4(11.0);
LUT2[2] = vec4(12.0);
LUT2[3] = vec4(13.0);
gl_Position = consume_constant_arrays(LUT1, LUT2);
}

View File

@ -0,0 +1,26 @@
#version 310 es
layout(location = 0) in int Index1;
layout(location = 1) in int Index2;
vec4 consume_constant_arrays2(const vec4 positions[4], const vec4 positions2[4])
{
return positions[Index1] + positions2[Index2];
}
vec4 consume_constant_arrays(const vec4 positions[4], const vec4 positions2[4])
{
return consume_constant_arrays2(positions, positions2);
}
const vec4 LUT1[] = vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0));
void main()
{
vec4 LUT2[4];
LUT2[0] = vec4(10.0);
LUT2[1] = vec4(11.0);
LUT2[2] = vec4(12.0);
LUT2[3] = vec4(13.0);
gl_Position = consume_constant_arrays(LUT1, LUT2);
}

View File

@ -0,0 +1,26 @@
#version 310 es
layout(location = 0) in int Index1;
layout(location = 1) in int Index2;
vec4 consume_constant_arrays2(const vec4 positions[4], const vec4 positions2[4])
{
return positions[Index1] + positions2[Index2];
}
vec4 consume_constant_arrays(const vec4 positions[4], const vec4 positions2[4])
{
return consume_constant_arrays2(positions, positions2);
}
const vec4 LUT1[] = vec4[](vec4(0.0), vec4(1.0), vec4(2.0), vec4(3.0));
void main()
{
vec4 LUT2[4];
LUT2[0] = vec4(10.0);
LUT2[1] = vec4(11.0);
LUT2[2] = vec4(12.0);
LUT2[3] = vec4(13.0);
gl_Position = consume_constant_arrays(LUT1, LUT2);
}

View File

@ -812,6 +812,11 @@ struct SPIRFunction : IVariant
// Need to defer this, because they might rely on things which change during compilation. // Need to defer this, because they might rely on things which change during compilation.
std::vector<std::function<void()>> fixup_hooks_in; std::vector<std::function<void()>> fixup_hooks_in;
// On function entry, make sure to copy a constant array into thread addr space to work around
// the case where we are passing a constant array by value to a function on backends which do not
// consider arrays value types.
std::vector<uint32_t> constant_arrays_needed_on_stack;
bool active = false; bool active = false;
bool flush_undeclared = true; bool flush_undeclared = true;
bool do_combined_parameters = true; bool do_combined_parameters = true;

View File

@ -9605,6 +9605,14 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
current_function = &func; current_function = &func;
auto &entry_block = get<SPIRBlock>(func.entry_block); auto &entry_block = get<SPIRBlock>(func.entry_block);
sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack));
for (auto &array : func.constant_arrays_needed_on_stack)
{
auto &c = get<SPIRConstant>(array);
auto &type = get<SPIRType>(c.constant_type);
statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";");
}
for (auto &v : func.local_variables) for (auto &v : func.local_variables)
{ {
auto &var = get<SPIRVariable>(v); auto &var = get<SPIRVariable>(v);

View File

@ -3695,7 +3695,31 @@ void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id
// Manufacture automatic sampler arg for SampledImage texture. // Manufacture automatic sampler arg for SampledImage texture.
string CompilerMSL::to_func_call_arg(uint32_t id) string CompilerMSL::to_func_call_arg(uint32_t id)
{ {
string arg_str = CompilerGLSL::to_func_call_arg(id); string arg_str;
auto *c = maybe_get<SPIRConstant>(id);
if (c && !get<SPIRType>(c->constant_type).array.empty())
{
// If we are passing a constant array directly to a function for some reason,
// the callee will expect an argument in thread const address space
// (since we can only bind to arrays with references in MSL).
// To resolve this, we must emit a copy in this address space.
// This kind of code gen should be rare enough that performance is not a real concern.
// Inline the SPIR-V to avoid this kind of suboptimal codegen.
//
// We risk calling this inside a continue block (invalid code),
// so just create a thread local copy in the current function.
arg_str = join("_", id, "_array_copy");
auto &constants = current_function->constant_arrays_needed_on_stack;
auto itr = find(begin(constants), end(constants), id);
if (itr == end(constants))
{
force_recompile = true;
constants.push_back(id);
}
}
else
arg_str = CompilerGLSL::to_func_call_arg(id);
// Manufacture automatic sampler arg if the arg is a SampledImage texture. // Manufacture automatic sampler arg if the arg is a SampledImage texture.
auto &type = expression_type(id); auto &type = expression_type(id);
@ -4496,8 +4520,26 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
(storage == StorageClassFunction || storage == StorageClassGeneric)) (storage == StorageClassFunction || storage == StorageClassGeneric))
{ {
// If the argument is a pure value and not an opaque type, we will pass by value. // If the argument is a pure value and not an opaque type, we will pass by value.
decl += " "; if (is_array(type))
decl += to_expression(name_id); {
// We are receiving an array by value. This is problematic.
// We cannot be sure of the target address space since we are supposed to receive a copy,
// but this is not possible with MSL without some extra work.
// We will have to assume we're getting a reference in thread address space.
// If we happen to get a reference in constant address space, the caller must emit a copy and pass that.
// Thread const therefore becomes the only logical choice, since we cannot "create" a constant array from
// non-constant arrays, but we can create thread const from constant.
decl = string("thread const ") + decl;
decl += " (&";
decl += to_expression(name_id);
decl += ")";
decl += type_to_array_glsl(type);
}
else
{
decl += " ";
decl += to_expression(name_id);
}
} }
else if (is_array(type) && !type_is_image) else if (is_array(type) && !type_is_image)
{ {