Deal with packing/unpacking on store.

Still a bit buggy, since we cannot deduce between float2[] and
packed_float2. Need a deeper refactor to plumb this through ...
This commit is contained in:
Hans-Kristian Arntzen 2019-01-17 10:06:23 +01:00
parent 64ca1ec677
commit 15b52bee48
12 changed files with 91 additions and 58 deletions

View File

@ -9,7 +9,7 @@ struct foo
packed_float3 baz;
uchar quux;
packed_uchar4 blah;
packed_half2 wibble;
half4 wibble;
};
kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]])
@ -17,6 +17,6 @@ kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thr
_8.bar = gl_LocalInvocationID.x;
_8.baz = float3(gl_GlobalInvocationID);
_8.blah = uchar4(uint4(uint4(uchar4(_8.blah)).xyz + gl_WorkGroupID, 0u));
_8.wibble = half2(float2(half2(_8.wibble)) * float2(gl_NumWorkGroups.xy));
_8.wibble.xy = half2(float2(_8.wibble.xy) * float2(gl_NumWorkGroups.xy));
}

View File

@ -69,7 +69,7 @@ struct SSBO1
struct S0_1
{
float2 a[1];
float4 a[1];
float b;
};
@ -115,21 +115,21 @@ struct SSBO0
Content_1 content;
Content_1 content1[2];
Content_1 content2;
float array[1];
float4 array[1];
};
kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [[buffer(1)]])
{
Content_1 _60 = ssbo_140.content;
ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0];
ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0].xy;
ssbo_430.content.m0s[0].b = _60.m0s[0].b;
ssbo_430.content.m1s[0].a = _60.m1s[0].a;
ssbo_430.content.m1s[0].a = float3(_60.m1s[0].a);
ssbo_430.content.m1s[0].b = _60.m1s[0].b;
ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0];
ssbo_430.content.m2s[0].b = _60.m2s[0].b;
ssbo_430.content.m0.a[0] = _60.m0.a[0];
ssbo_430.content.m0.a[0] = _60.m0.a[0].xy;
ssbo_430.content.m0.b = _60.m0.b;
ssbo_430.content.m1.a = _60.m1.a;
ssbo_430.content.m1.a = float3(_60.m1.a);
ssbo_430.content.m1.b = _60.m1.b;
ssbo_430.content.m2.a[0] = _60.m2.a[0];
ssbo_430.content.m2.b = _60.m2.b;

View File

@ -9,7 +9,7 @@ struct foo
packed_float3 baz;
uchar quux;
packed_uchar4 blah;
packed_half2 wibble;
half4 wibble;
};
kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]])
@ -17,6 +17,6 @@ kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thr
_8.bar = gl_LocalInvocationID.x;
_8.baz = float3(gl_GlobalInvocationID);
_8.blah = uchar4(uint4(uint4(uchar4(_8.blah)).xyz + gl_WorkGroupID, 0u));
_8.wibble = half2(float2(half2(_8.wibble)) * float2(gl_NumWorkGroups.xy));
_8.wibble.xy = half2(float2(_8.wibble.xy) * float2(gl_NumWorkGroups.xy));
}

View File

@ -32,7 +32,7 @@ float4 _main(thread const float4& pos, constant buf& v_11)
{
int _46 = int(pos.x) % 16;
Foo_1 foo;
foo.a = v_11.results[_46].a;
foo.a = float3(v_11.results[_46].a);
foo.b = v_11.results[_46].b;
return float4(dot(foo.a, v_11.bar.xyz), foo.b, 0.0, 0.0);
}

View File

@ -30,7 +30,7 @@ constant uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
kernel void main0(device Buffer0& _15 [[buffer(1)]], device Buffer1& _34 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
T1_1 v;
v.a = _15.buf0[0].a;
v.a = float3(_15.buf0[0].a);
v.b = _15.buf0[0].b;
float x = v.b;
_34.buf1[gl_GlobalInvocationID.x] = x;

View File

@ -69,7 +69,7 @@ struct SSBO1
struct S0_1
{
float2 a[1];
float4 a[1];
float b;
};
@ -115,21 +115,21 @@ struct SSBO0
Content_1 content;
Content_1 content1[2];
Content_1 content2;
float array[1];
float4 array[1];
};
kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [[buffer(1)]])
{
Content_1 _60 = ssbo_140.content;
ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0];
ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0].xy;
ssbo_430.content.m0s[0].b = _60.m0s[0].b;
ssbo_430.content.m1s[0].a = _60.m1s[0].a;
ssbo_430.content.m1s[0].a = float3(_60.m1s[0].a);
ssbo_430.content.m1s[0].b = _60.m1s[0].b;
ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0];
ssbo_430.content.m2s[0].b = _60.m2s[0].b;
ssbo_430.content.m0.a[0] = _60.m0.a[0];
ssbo_430.content.m0.a[0] = _60.m0.a[0].xy;
ssbo_430.content.m0.b = _60.m0.b;
ssbo_430.content.m1.a = _60.m1.a;
ssbo_430.content.m1.a = float3(_60.m1.a);
ssbo_430.content.m1.b = _60.m1.b;
ssbo_430.content.m2.a[0] = _60.m2.a[0];
ssbo_430.content.m2.b = _60.m2.b;

View File

@ -35,7 +35,7 @@ struct main0_out
float4 _main(thread const VertexOutput& IN, constant CB0& v_26)
{
TestStruct_1 st;
st.position = v_26.CB0[1].position;
st.position = float3(v_26.CB0[1].position);
st.radius = v_26.CB0[1].radius;
float4 col = float4(st.position, st.radius);
return col;

View File

@ -43,7 +43,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
for (int i = 0; i < 4; i++)
{
Light_1 light;
light.Position = _21.lights[i].Position;
light.Position = float3(_21.lights[i].Position);
light.Radius = _21.lights[i].Radius;
light.Color = _21.lights[i].Color;
float3 L = in.aVertex.xyz - light.Position;

View File

@ -2486,26 +2486,26 @@ string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expressio
return enclose_expression(to_expression(id, register_expression_read));
}
string CompilerGLSL::to_unpacked_expression(uint32_t id)
string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
{
// If we need to transpose, it will also take care of unpacking rules.
auto *e = maybe_get<SPIRExpression>(id);
bool need_transpose = e && e->need_transpose;
if (!need_transpose && has_decoration(id, DecorationCPacked))
return unpack_expression_type(to_expression(id), expression_type(id));
return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id));
else
return to_expression(id);
return to_expression(id, register_expression_read);
}
string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id)
string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
{
// If we need to transpose, it will also take care of unpacking rules.
auto *e = maybe_get<SPIRExpression>(id);
bool need_transpose = e && e->need_transpose;
if (!need_transpose && has_decoration(id, DecorationCPacked))
return unpack_expression_type(to_expression(id), expression_type(id));
return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id));
else
return to_enclosed_expression(id);
return to_enclosed_expression(id, register_expression_read);
}
string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
@ -2517,22 +2517,22 @@ string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expre
return to_expression(id, register_expression_read);
}
string CompilerGLSL::to_pointer_expression(uint32_t id)
string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
{
auto &type = expression_type(id);
if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
return address_of_expression(to_enclosed_expression(id));
return address_of_expression(to_enclosed_expression(id, register_expression_read));
else
return to_expression(id);
return to_unpacked_expression(id, register_expression_read);
}
string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id)
string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
{
auto &type = expression_type(id);
if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
return address_of_expression(to_enclosed_expression(id));
return address_of_expression(to_enclosed_expression(id, register_expression_read));
else
return to_enclosed_expression(id);
return to_enclosed_expression(id, register_expression_read);
}
string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
@ -6603,6 +6603,30 @@ void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_
disallow_forwarding_in_expression_chain(*expr);
}
void CompilerGLSL::emit_store(uint32_t lhs_expression, uint32_t rhs_expression)
{
auto rhs = to_pointer_expression(rhs_expression);
// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
if (!rhs.empty())
{
handle_store_to_invariant_variable(lhs_expression, rhs_expression);
auto lhs = to_dereferenced_expression(lhs_expression);
// We might need to bitcast in order to store to a builtin.
bitcast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
// Tries to optimize assignments like "<lhs> = <lhs> op expr".
// While this is purely cosmetic, this is important for legacy ESSL where loop
// variable increments must be in either i++ or i += const-expr.
// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
statement(lhs, " = ", rhs, ";");
register_write(lhs_expression);
}
}
void CompilerGLSL::emit_instruction(const Instruction &instruction)
{
auto ops = stream(instruction);
@ -6743,27 +6767,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
}
else
{
auto rhs = to_pointer_expression(ops[1]);
// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
if (!rhs.empty())
{
handle_store_to_invariant_variable(ops[0], ops[1]);
auto lhs = to_dereferenced_expression(ops[0]);
// We might need to bitcast in order to store to a builtin.
bitcast_to_builtin_store(ops[0], rhs, expression_type(ops[1]));
// Tries to optimize assignments like "<lhs> = <lhs> op expr".
// While this is purely cosmetic, this is important for legacy ESSL where loop
// variable increments must be in either i++ or i += const-expr.
// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
if (!optimize_read_modify_write(expression_type(ops[1]), lhs, rhs))
statement(lhs, " = ", rhs, ";");
register_write(ops[0]);
}
emit_store(ops[0], ops[1]);
}
// Storing a pointer results in a variable pointer, so we must conservatively assume
// we can write through it.
if (expression_type(ops[1]).pointer)

View File

@ -476,11 +476,11 @@ protected:
void append_global_func_args(const SPIRFunction &func, uint32_t index, std::vector<std::string> &arglist);
std::string to_expression(uint32_t id, bool register_expression_read = true);
std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true);
std::string to_unpacked_expression(uint32_t id);
std::string to_enclosed_unpacked_expression(uint32_t id);
std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true);
std::string to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read = true);
std::string to_dereferenced_expression(uint32_t id, bool register_expression_read = true);
std::string to_pointer_expression(uint32_t id);
std::string to_enclosed_pointer_expression(uint32_t id);
std::string to_pointer_expression(uint32_t id, bool register_expression_read = true);
std::string to_enclosed_pointer_expression(uint32_t id, bool register_expression_read = true);
std::string to_extract_component_expression(uint32_t id, uint32_t index);
std::string enclose_expression(const std::string &expr);
std::string dereference_expression(const std::string &expr);
@ -624,6 +624,7 @@ protected:
void disallow_forwarding_in_expression_chain(const SPIRExpression &expr);
bool expression_is_constant_null(uint32_t id) const;
virtual void emit_store(uint32_t lhs_expression, uint32_t rhs_expression);
private:
void init()

View File

@ -1593,7 +1593,7 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns;
// Special case for packing. Check for float[] or vec2[] in std140 layout. Here we actually need to pad out instead,
// but we will use the same mechanism as before.
// but we will use the same mechanism.
if (is_array(mbr_type) &&
(is_scalar(mbr_type) || is_vector(mbr_type)) &&
mbr_type.vecsize <= 2 &&
@ -1655,6 +1655,31 @@ MSLStructMemberKey CompilerMSL::get_struct_member_key(uint32_t type_id, uint32_t
return k;
}
void CompilerMSL::emit_store(uint32_t lhs_expression, uint32_t rhs_expression)
{
if (!has_decoration(lhs_expression, DecorationCPacked))
CompilerGLSL::emit_store(lhs_expression, rhs_expression);
else
{
// Special handling when storing to a float[] or float2[] in std140 layout.
auto &type = expression_type(lhs_expression);
string lhs = to_dereferenced_expression(lhs_expression);
string rhs = to_pointer_expression(rhs_expression);
// Unpack the expression so we can store to it with a float or float2.
// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
if (is_scalar(type))
lhs = enclose_expression(lhs) + ".x";
else if (is_vector(type) && type.vecsize == 2)
lhs = enclose_expression(lhs) + ".xy";
if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
statement(lhs, " = ", rhs, ";");
register_write(lhs_expression);
}
}
// Converts the format of the current expression from packed to unpacked,
// by wrapping the expression in a constructor of the appropriate type.
string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type)
@ -4003,7 +4028,7 @@ void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_
td_line += ";";
add_typedef_line(td_line);
}
else if (membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct)
else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct)
{
// A "packed" float array, but we pad here instead to 4-vector.
override_type = membertype;

View File

@ -430,6 +430,7 @@ protected:
void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
void emit_store(uint32_t lhs_expression, uint32_t rhs_expression) override;
void analyze_sampled_image_usage();