Deal with packing/unpacking on store.
Still a bit buggy, since we cannot deduce between float2[] and packed_float2. Need a deeper refactor to plumb this through ...
This commit is contained in:
parent
64ca1ec677
commit
15b52bee48
@ -9,7 +9,7 @@ struct foo
|
||||
packed_float3 baz;
|
||||
uchar quux;
|
||||
packed_uchar4 blah;
|
||||
packed_half2 wibble;
|
||||
half4 wibble;
|
||||
};
|
||||
|
||||
kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]])
|
||||
@ -17,6 +17,6 @@ kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thr
|
||||
_8.bar = gl_LocalInvocationID.x;
|
||||
_8.baz = float3(gl_GlobalInvocationID);
|
||||
_8.blah = uchar4(uint4(uint4(uchar4(_8.blah)).xyz + gl_WorkGroupID, 0u));
|
||||
_8.wibble = half2(float2(half2(_8.wibble)) * float2(gl_NumWorkGroups.xy));
|
||||
_8.wibble.xy = half2(float2(_8.wibble.xy) * float2(gl_NumWorkGroups.xy));
|
||||
}
|
||||
|
||||
|
@ -69,7 +69,7 @@ struct SSBO1
|
||||
|
||||
struct S0_1
|
||||
{
|
||||
float2 a[1];
|
||||
float4 a[1];
|
||||
float b;
|
||||
};
|
||||
|
||||
@ -115,21 +115,21 @@ struct SSBO0
|
||||
Content_1 content;
|
||||
Content_1 content1[2];
|
||||
Content_1 content2;
|
||||
float array[1];
|
||||
float4 array[1];
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [[buffer(1)]])
|
||||
{
|
||||
Content_1 _60 = ssbo_140.content;
|
||||
ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0];
|
||||
ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0].xy;
|
||||
ssbo_430.content.m0s[0].b = _60.m0s[0].b;
|
||||
ssbo_430.content.m1s[0].a = _60.m1s[0].a;
|
||||
ssbo_430.content.m1s[0].a = float3(_60.m1s[0].a);
|
||||
ssbo_430.content.m1s[0].b = _60.m1s[0].b;
|
||||
ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0];
|
||||
ssbo_430.content.m2s[0].b = _60.m2s[0].b;
|
||||
ssbo_430.content.m0.a[0] = _60.m0.a[0];
|
||||
ssbo_430.content.m0.a[0] = _60.m0.a[0].xy;
|
||||
ssbo_430.content.m0.b = _60.m0.b;
|
||||
ssbo_430.content.m1.a = _60.m1.a;
|
||||
ssbo_430.content.m1.a = float3(_60.m1.a);
|
||||
ssbo_430.content.m1.b = _60.m1.b;
|
||||
ssbo_430.content.m2.a[0] = _60.m2.a[0];
|
||||
ssbo_430.content.m2.b = _60.m2.b;
|
||||
|
@ -9,7 +9,7 @@ struct foo
|
||||
packed_float3 baz;
|
||||
uchar quux;
|
||||
packed_uchar4 blah;
|
||||
packed_half2 wibble;
|
||||
half4 wibble;
|
||||
};
|
||||
|
||||
kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_NumWorkGroups [[threadgroups_per_grid]])
|
||||
@ -17,6 +17,6 @@ kernel void main0(device foo& _8 [[buffer(0)]], uint3 gl_LocalInvocationID [[thr
|
||||
_8.bar = gl_LocalInvocationID.x;
|
||||
_8.baz = float3(gl_GlobalInvocationID);
|
||||
_8.blah = uchar4(uint4(uint4(uchar4(_8.blah)).xyz + gl_WorkGroupID, 0u));
|
||||
_8.wibble = half2(float2(half2(_8.wibble)) * float2(gl_NumWorkGroups.xy));
|
||||
_8.wibble.xy = half2(float2(_8.wibble.xy) * float2(gl_NumWorkGroups.xy));
|
||||
}
|
||||
|
||||
|
@ -32,7 +32,7 @@ float4 _main(thread const float4& pos, constant buf& v_11)
|
||||
{
|
||||
int _46 = int(pos.x) % 16;
|
||||
Foo_1 foo;
|
||||
foo.a = v_11.results[_46].a;
|
||||
foo.a = float3(v_11.results[_46].a);
|
||||
foo.b = v_11.results[_46].b;
|
||||
return float4(dot(foo.a, v_11.bar.xyz), foo.b, 0.0, 0.0);
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ constant uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
|
||||
kernel void main0(device Buffer0& _15 [[buffer(1)]], device Buffer1& _34 [[buffer(2)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
T1_1 v;
|
||||
v.a = _15.buf0[0].a;
|
||||
v.a = float3(_15.buf0[0].a);
|
||||
v.b = _15.buf0[0].b;
|
||||
float x = v.b;
|
||||
_34.buf1[gl_GlobalInvocationID.x] = x;
|
||||
|
@ -69,7 +69,7 @@ struct SSBO1
|
||||
|
||||
struct S0_1
|
||||
{
|
||||
float2 a[1];
|
||||
float4 a[1];
|
||||
float b;
|
||||
};
|
||||
|
||||
@ -115,21 +115,21 @@ struct SSBO0
|
||||
Content_1 content;
|
||||
Content_1 content1[2];
|
||||
Content_1 content2;
|
||||
float array[1];
|
||||
float4 array[1];
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO0& ssbo_140 [[buffer(0)]], device SSBO1& ssbo_430 [[buffer(1)]])
|
||||
{
|
||||
Content_1 _60 = ssbo_140.content;
|
||||
ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0];
|
||||
ssbo_430.content.m0s[0].a[0] = _60.m0s[0].a[0].xy;
|
||||
ssbo_430.content.m0s[0].b = _60.m0s[0].b;
|
||||
ssbo_430.content.m1s[0].a = _60.m1s[0].a;
|
||||
ssbo_430.content.m1s[0].a = float3(_60.m1s[0].a);
|
||||
ssbo_430.content.m1s[0].b = _60.m1s[0].b;
|
||||
ssbo_430.content.m2s[0].a[0] = _60.m2s[0].a[0];
|
||||
ssbo_430.content.m2s[0].b = _60.m2s[0].b;
|
||||
ssbo_430.content.m0.a[0] = _60.m0.a[0];
|
||||
ssbo_430.content.m0.a[0] = _60.m0.a[0].xy;
|
||||
ssbo_430.content.m0.b = _60.m0.b;
|
||||
ssbo_430.content.m1.a = _60.m1.a;
|
||||
ssbo_430.content.m1.a = float3(_60.m1.a);
|
||||
ssbo_430.content.m1.b = _60.m1.b;
|
||||
ssbo_430.content.m2.a[0] = _60.m2.a[0];
|
||||
ssbo_430.content.m2.b = _60.m2.b;
|
||||
|
@ -35,7 +35,7 @@ struct main0_out
|
||||
float4 _main(thread const VertexOutput& IN, constant CB0& v_26)
|
||||
{
|
||||
TestStruct_1 st;
|
||||
st.position = v_26.CB0[1].position;
|
||||
st.position = float3(v_26.CB0[1].position);
|
||||
st.radius = v_26.CB0[1].radius;
|
||||
float4 col = float4(st.position, st.radius);
|
||||
return col;
|
||||
|
@ -43,7 +43,7 @@ vertex main0_out main0(main0_in in [[stage_in]], constant UBO& _21 [[buffer(0)]]
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
Light_1 light;
|
||||
light.Position = _21.lights[i].Position;
|
||||
light.Position = float3(_21.lights[i].Position);
|
||||
light.Radius = _21.lights[i].Radius;
|
||||
light.Color = _21.lights[i].Color;
|
||||
float3 L = in.aVertex.xyz - light.Position;
|
||||
|
@ -2486,26 +2486,26 @@ string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expressio
|
||||
return enclose_expression(to_expression(id, register_expression_read));
|
||||
}
|
||||
|
||||
string CompilerGLSL::to_unpacked_expression(uint32_t id)
|
||||
string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read)
|
||||
{
|
||||
// If we need to transpose, it will also take care of unpacking rules.
|
||||
auto *e = maybe_get<SPIRExpression>(id);
|
||||
bool need_transpose = e && e->need_transpose;
|
||||
if (!need_transpose && has_decoration(id, DecorationCPacked))
|
||||
return unpack_expression_type(to_expression(id), expression_type(id));
|
||||
return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id));
|
||||
else
|
||||
return to_expression(id);
|
||||
return to_expression(id, register_expression_read);
|
||||
}
|
||||
|
||||
string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id)
|
||||
string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read)
|
||||
{
|
||||
// If we need to transpose, it will also take care of unpacking rules.
|
||||
auto *e = maybe_get<SPIRExpression>(id);
|
||||
bool need_transpose = e && e->need_transpose;
|
||||
if (!need_transpose && has_decoration(id, DecorationCPacked))
|
||||
return unpack_expression_type(to_expression(id), expression_type(id));
|
||||
return unpack_expression_type(to_expression(id, register_expression_read), expression_type(id));
|
||||
else
|
||||
return to_enclosed_expression(id);
|
||||
return to_enclosed_expression(id, register_expression_read);
|
||||
}
|
||||
|
||||
string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read)
|
||||
@ -2517,22 +2517,22 @@ string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expre
|
||||
return to_expression(id, register_expression_read);
|
||||
}
|
||||
|
||||
string CompilerGLSL::to_pointer_expression(uint32_t id)
|
||||
string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read)
|
||||
{
|
||||
auto &type = expression_type(id);
|
||||
if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
|
||||
return address_of_expression(to_enclosed_expression(id));
|
||||
return address_of_expression(to_enclosed_expression(id, register_expression_read));
|
||||
else
|
||||
return to_expression(id);
|
||||
return to_unpacked_expression(id, register_expression_read);
|
||||
}
|
||||
|
||||
string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id)
|
||||
string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read)
|
||||
{
|
||||
auto &type = expression_type(id);
|
||||
if (type.pointer && expression_is_lvalue(id) && !should_dereference(id))
|
||||
return address_of_expression(to_enclosed_expression(id));
|
||||
return address_of_expression(to_enclosed_expression(id, register_expression_read));
|
||||
else
|
||||
return to_enclosed_expression(id);
|
||||
return to_enclosed_expression(id, register_expression_read);
|
||||
}
|
||||
|
||||
string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index)
|
||||
@ -6603,6 +6603,30 @@ void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_
|
||||
disallow_forwarding_in_expression_chain(*expr);
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_store(uint32_t lhs_expression, uint32_t rhs_expression)
|
||||
{
|
||||
auto rhs = to_pointer_expression(rhs_expression);
|
||||
|
||||
// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
|
||||
if (!rhs.empty())
|
||||
{
|
||||
handle_store_to_invariant_variable(lhs_expression, rhs_expression);
|
||||
|
||||
auto lhs = to_dereferenced_expression(lhs_expression);
|
||||
|
||||
// We might need to bitcast in order to store to a builtin.
|
||||
bitcast_to_builtin_store(lhs_expression, rhs, expression_type(rhs_expression));
|
||||
|
||||
// Tries to optimize assignments like "<lhs> = <lhs> op expr".
|
||||
// While this is purely cosmetic, this is important for legacy ESSL where loop
|
||||
// variable increments must be in either i++ or i += const-expr.
|
||||
// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
|
||||
if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
|
||||
statement(lhs, " = ", rhs, ";");
|
||||
register_write(lhs_expression);
|
||||
}
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
{
|
||||
auto ops = stream(instruction);
|
||||
@ -6743,27 +6767,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
||||
}
|
||||
else
|
||||
{
|
||||
auto rhs = to_pointer_expression(ops[1]);
|
||||
|
||||
// Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null.
|
||||
if (!rhs.empty())
|
||||
{
|
||||
handle_store_to_invariant_variable(ops[0], ops[1]);
|
||||
|
||||
auto lhs = to_dereferenced_expression(ops[0]);
|
||||
|
||||
// We might need to bitcast in order to store to a builtin.
|
||||
bitcast_to_builtin_store(ops[0], rhs, expression_type(ops[1]));
|
||||
|
||||
// Tries to optimize assignments like "<lhs> = <lhs> op expr".
|
||||
// While this is purely cosmetic, this is important for legacy ESSL where loop
|
||||
// variable increments must be in either i++ or i += const-expr.
|
||||
// Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0.
|
||||
if (!optimize_read_modify_write(expression_type(ops[1]), lhs, rhs))
|
||||
statement(lhs, " = ", rhs, ";");
|
||||
register_write(ops[0]);
|
||||
}
|
||||
emit_store(ops[0], ops[1]);
|
||||
}
|
||||
|
||||
// Storing a pointer results in a variable pointer, so we must conservatively assume
|
||||
// we can write through it.
|
||||
if (expression_type(ops[1]).pointer)
|
||||
|
@ -476,11 +476,11 @@ protected:
|
||||
void append_global_func_args(const SPIRFunction &func, uint32_t index, std::vector<std::string> &arglist);
|
||||
std::string to_expression(uint32_t id, bool register_expression_read = true);
|
||||
std::string to_enclosed_expression(uint32_t id, bool register_expression_read = true);
|
||||
std::string to_unpacked_expression(uint32_t id);
|
||||
std::string to_enclosed_unpacked_expression(uint32_t id);
|
||||
std::string to_unpacked_expression(uint32_t id, bool register_expression_read = true);
|
||||
std::string to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read = true);
|
||||
std::string to_dereferenced_expression(uint32_t id, bool register_expression_read = true);
|
||||
std::string to_pointer_expression(uint32_t id);
|
||||
std::string to_enclosed_pointer_expression(uint32_t id);
|
||||
std::string to_pointer_expression(uint32_t id, bool register_expression_read = true);
|
||||
std::string to_enclosed_pointer_expression(uint32_t id, bool register_expression_read = true);
|
||||
std::string to_extract_component_expression(uint32_t id, uint32_t index);
|
||||
std::string enclose_expression(const std::string &expr);
|
||||
std::string dereference_expression(const std::string &expr);
|
||||
@ -624,6 +624,7 @@ protected:
|
||||
void disallow_forwarding_in_expression_chain(const SPIRExpression &expr);
|
||||
|
||||
bool expression_is_constant_null(uint32_t id) const;
|
||||
virtual void emit_store(uint32_t lhs_expression, uint32_t rhs_expression);
|
||||
|
||||
private:
|
||||
void init()
|
||||
|
@ -1593,7 +1593,7 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
|
||||
unpacked_mbr_size = component_size * mbr_type.vecsize * mbr_type.columns;
|
||||
|
||||
// Special case for packing. Check for float[] or vec2[] in std140 layout. Here we actually need to pad out instead,
|
||||
// but we will use the same mechanism as before.
|
||||
// but we will use the same mechanism.
|
||||
if (is_array(mbr_type) &&
|
||||
(is_scalar(mbr_type) || is_vector(mbr_type)) &&
|
||||
mbr_type.vecsize <= 2 &&
|
||||
@ -1655,6 +1655,31 @@ MSLStructMemberKey CompilerMSL::get_struct_member_key(uint32_t type_id, uint32_t
|
||||
return k;
|
||||
}
|
||||
|
||||
void CompilerMSL::emit_store(uint32_t lhs_expression, uint32_t rhs_expression)
|
||||
{
|
||||
if (!has_decoration(lhs_expression, DecorationCPacked))
|
||||
CompilerGLSL::emit_store(lhs_expression, rhs_expression);
|
||||
else
|
||||
{
|
||||
// Special handling when storing to a float[] or float2[] in std140 layout.
|
||||
|
||||
auto &type = expression_type(lhs_expression);
|
||||
string lhs = to_dereferenced_expression(lhs_expression);
|
||||
string rhs = to_pointer_expression(rhs_expression);
|
||||
|
||||
// Unpack the expression so we can store to it with a float or float2.
|
||||
// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
|
||||
if (is_scalar(type))
|
||||
lhs = enclose_expression(lhs) + ".x";
|
||||
else if (is_vector(type) && type.vecsize == 2)
|
||||
lhs = enclose_expression(lhs) + ".xy";
|
||||
|
||||
if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
|
||||
statement(lhs, " = ", rhs, ";");
|
||||
register_write(lhs_expression);
|
||||
}
|
||||
}
|
||||
|
||||
// Converts the format of the current expression from packed to unpacked,
|
||||
// by wrapping the expression in a constructor of the appropriate type.
|
||||
string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type)
|
||||
@ -4003,7 +4028,7 @@ void CompilerMSL::emit_struct_member(const SPIRType &type, uint32_t member_type_
|
||||
td_line += ";";
|
||||
add_typedef_line(td_line);
|
||||
}
|
||||
else if (membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct)
|
||||
else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct)
|
||||
{
|
||||
// A "packed" float array, but we pad here instead to 4-vector.
|
||||
override_type = membertype;
|
||||
|
@ -430,6 +430,7 @@ protected:
|
||||
|
||||
void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override;
|
||||
void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override;
|
||||
void emit_store(uint32_t lhs_expression, uint32_t rhs_expression) override;
|
||||
|
||||
void analyze_sampled_image_usage();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user