Handle control-dependent temporaries.

Derivatives, subgroup and implicit-lod instructions all need to happen
in the block they were created.
This commit is contained in:
Hans-Kristian Arntzen 2018-03-12 17:34:54 +01:00
parent 012377f811
commit 938c7debed
19 changed files with 486 additions and 8 deletions

View File

@ -14,8 +14,8 @@ env:
before_script:
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew upgrade python3; fi
- ./checkout_glslang_spirv_tools.sh
script:
- ./checkout_glslang_spirv_tools.sh
- make -j2
- ./test_shaders.sh

View File

@ -0,0 +1,54 @@
Texture2D<float4> uSampler : register(t0);
SamplerState _uSampler_sampler : register(s0);
static float4 FragColor;
static float4 vInput;
struct SPIRV_Cross_Input
{
float4 vInput : TEXCOORD0;
};
struct SPIRV_Cross_Output
{
float4 FragColor : SV_Target0;
};
void frag_main()
{
FragColor = vInput;
float4 _23 = uSampler.Sample(_uSampler_sampler, vInput.xy);
float4 _26 = ddx(vInput);
float4 _29 = ddy(vInput);
float4 _32 = fwidth(vInput);
float4 _35 = ddx_coarse(vInput);
float4 _38 = ddy_coarse(vInput);
float4 _41 = fwidth(vInput);
float4 _44 = ddx_fine(vInput);
float4 _47 = ddy_fine(vInput);
float4 _50 = fwidth(vInput);
float _56_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vInput.zw);
if (vInput.y > 10.0f)
{
FragColor += _23;
FragColor += _26;
FragColor += _29;
FragColor += _32;
FragColor += _35;
FragColor += _38;
FragColor += _41;
FragColor += _44;
FragColor += _47;
FragColor += _50;
FragColor += float2(_56_tmp, _56_tmp).xyxy;
}
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
vInput = stage_input.vInput;
frag_main();
SPIRV_Cross_Output stage_output;
stage_output.FragColor = FragColor;
return stage_output;
}

View File

@ -0,0 +1,45 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_in
{
float4 vInput [[user(locn0)]];
};
struct main0_out
{
float4 FragColor [[color(0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor = in.vInput;
float4 _23 = uSampler.sample(uSamplerSmplr, in.vInput.xy);
float4 _26 = dfdx(in.vInput);
float4 _29 = dfdy(in.vInput);
float4 _32 = fwidth(in.vInput);
float4 _35 = dfdx(in.vInput);
float4 _38 = dfdy(in.vInput);
float4 _41 = fwidth(in.vInput);
float4 _44 = dfdx(in.vInput);
float4 _47 = dfdy(in.vInput);
float4 _50 = fwidth(in.vInput);
if (in.vInput.y > 10.0)
{
out.FragColor += _23;
out.FragColor += _26;
out.FragColor += _29;
out.FragColor += _32;
out.FragColor += _35;
out.FragColor += _38;
out.FragColor += _41;
out.FragColor += _44;
out.FragColor += _47;
out.FragColor += _50;
}
return out;
}

View File

@ -18,9 +18,11 @@ void main()
{
float _25 = _12.inputDataArray[gl_LocalInvocationID.x];
bool _31 = _25 > 0.0;
uvec4 _37 = uvec4(unpackUint2x32(ballotARB(_31)), 0u, 0u);
uint _44 = mbcntAMD(packUint2x32(uvec2(_37.xy)));
if (_31)
{
_74.inputDataArray[mbcntAMD(packUint2x32(uvec2(unpackUint2x32(ballotARB(_31)).xy)))] = _25;
_74.inputDataArray[_44] = _25;
}
}

View File

@ -0,0 +1,37 @@
#version 450
layout(binding = 0) uniform sampler2D uSampler;
layout(location = 0) out vec4 FragColor;
layout(location = 0) in vec4 vInput;
void main()
{
FragColor = vInput;
vec4 _23 = texture(uSampler, vInput.xy);
vec4 _26 = dFdx(vInput);
vec4 _29 = dFdy(vInput);
vec4 _32 = fwidth(vInput);
vec4 _35 = dFdxCoarse(vInput);
vec4 _38 = dFdyCoarse(vInput);
vec4 _41 = fwidthCoarse(vInput);
vec4 _44 = dFdxFine(vInput);
vec4 _47 = dFdyFine(vInput);
vec4 _50 = fwidthFine(vInput);
vec2 _56 = textureQueryLod(uSampler, vInput.zw);
if (vInput.y > 10.0)
{
FragColor += _23;
FragColor += _26;
FragColor += _29;
FragColor += _32;
FragColor += _35;
FragColor += _38;
FragColor += _41;
FragColor += _44;
FragColor += _47;
FragColor += _50;
FragColor += _56.xyxy;
}
}

View File

@ -0,0 +1,55 @@
Texture2D<float4> uSampler : register(t0);
SamplerState _uSampler_sampler : register(s0);
static float4 FragColor;
static float4 vInput;
struct SPIRV_Cross_Input
{
float4 vInput : TEXCOORD0;
};
struct SPIRV_Cross_Output
{
float4 FragColor : SV_Target0;
};
void frag_main()
{
FragColor = vInput;
float4 t = uSampler.Sample(_uSampler_sampler, vInput.xy);
float4 d0 = ddx(vInput);
float4 d1 = ddy(vInput);
float4 d2 = fwidth(vInput);
float4 d3 = ddx_coarse(vInput);
float4 d4 = ddy_coarse(vInput);
float4 d5 = fwidth(vInput);
float4 d6 = ddx_fine(vInput);
float4 d7 = ddy_fine(vInput);
float4 d8 = fwidth(vInput);
float _56_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vInput.zw);
float2 lod = float2(_56_tmp, _56_tmp);
if (vInput.y > 10.0f)
{
FragColor += t;
FragColor += d0;
FragColor += d1;
FragColor += d2;
FragColor += d3;
FragColor += d4;
FragColor += d5;
FragColor += d6;
FragColor += d7;
FragColor += d8;
FragColor += lod.xyxy;
}
}
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
{
vInput = stage_input.vInput;
frag_main();
SPIRV_Cross_Output stage_output;
stage_output.FragColor = FragColor;
return stage_output;
}

View File

@ -0,0 +1,45 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct main0_in
{
float4 vInput [[user(locn0)]];
};
struct main0_out
{
float4 FragColor [[color(0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor = in.vInput;
float4 t = uSampler.sample(uSamplerSmplr, in.vInput.xy);
float4 d0 = dfdx(in.vInput);
float4 d1 = dfdy(in.vInput);
float4 d2 = fwidth(in.vInput);
float4 d3 = dfdx(in.vInput);
float4 d4 = dfdy(in.vInput);
float4 d5 = fwidth(in.vInput);
float4 d6 = dfdx(in.vInput);
float4 d7 = dfdy(in.vInput);
float4 d8 = fwidth(in.vInput);
if (in.vInput.y > 10.0)
{
out.FragColor += t;
out.FragColor += d0;
out.FragColor += d1;
out.FragColor += d2;
out.FragColor += d3;
out.FragColor += d4;
out.FragColor += d5;
out.FragColor += d6;
out.FragColor += d7;
out.FragColor += d8;
}
return out;
}

View File

@ -18,7 +18,7 @@ void main()
{
float thisLaneData = _12.inputDataArray[gl_LocalInvocationID.x];
bool laneActive = thisLaneData > 0.0;
uint thisLaneOutputSlot = mbcntAMD(packUint2x32(uvec2(unpackUint2x32(ballotARB(laneActive)).xy)));
uint thisLaneOutputSlot = mbcntAMD(packUint2x32(uvec2(uvec4(unpackUint2x32(ballotARB(laneActive)), 0u, 0u).xy)));
int firstInvocation = readFirstInvocationARB(1);
int invocation = readInvocationARB(1, 0u);
vec3 swizzleInvocations = swizzleInvocationsAMD(vec3(0.0, 2.0, 1.0), uvec4(3u));

View File

@ -0,0 +1,37 @@
#version 450
layout(binding = 0) uniform sampler2D uSampler;
layout(location = 0) out vec4 FragColor;
layout(location = 0) in vec4 vInput;
void main()
{
FragColor = vInput;
vec4 t = texture(uSampler, vInput.xy);
vec4 d0 = dFdx(vInput);
vec4 d1 = dFdy(vInput);
vec4 d2 = fwidth(vInput);
vec4 d3 = dFdxCoarse(vInput);
vec4 d4 = dFdyCoarse(vInput);
vec4 d5 = fwidthCoarse(vInput);
vec4 d6 = dFdxFine(vInput);
vec4 d7 = dFdyFine(vInput);
vec4 d8 = fwidthFine(vInput);
vec2 lod = textureQueryLod(uSampler, vInput.zw);
if (vInput.y > 10.0)
{
FragColor += t;
FragColor += d0;
FragColor += d1;
FragColor += d2;
FragColor += d3;
FragColor += d4;
FragColor += d5;
FragColor += d6;
FragColor += d7;
FragColor += d8;
FragColor += lod.xyxy;
}
}

View File

@ -0,0 +1,36 @@
#version 450
layout(location = 0) out vec4 FragColor;
layout(binding = 0) uniform sampler2D uSampler;
layout(location = 0) in vec4 vInput;
void main()
{
FragColor = vInput;
vec4 t = texture(uSampler, vInput.xy);
vec4 d0 = dFdx(vInput);
vec4 d1 = dFdy(vInput);
vec4 d2 = fwidth(vInput);
vec4 d3 = dFdxCoarse(vInput);
vec4 d4 = dFdyCoarse(vInput);
vec4 d5 = fwidthCoarse(vInput);
vec4 d6 = dFdxFine(vInput);
vec4 d7 = dFdyFine(vInput);
vec4 d8 = fwidthFine(vInput);
vec2 lod = textureQueryLod(uSampler, vInput.zw);
if (vInput.y > 10.0)
{
FragColor += t;
FragColor += d0;
FragColor += d1;
FragColor += d2;
FragColor += d3;
FragColor += d4;
FragColor += d5;
FragColor += d6;
FragColor += d7;
FragColor += d8;
FragColor += lod.xyxy;
}
}

View File

@ -0,0 +1,34 @@
#version 450
layout(location = 0) out vec4 FragColor;
layout(binding = 0) uniform sampler2D uSampler;
layout(location = 0) in vec4 vInput;
void main()
{
FragColor = vInput;
vec4 t = texture(uSampler, vInput.xy);
vec4 d0 = dFdx(vInput);
vec4 d1 = dFdy(vInput);
vec4 d2 = fwidth(vInput);
vec4 d3 = dFdxCoarse(vInput);
vec4 d4 = dFdyCoarse(vInput);
vec4 d5 = fwidthCoarse(vInput);
vec4 d6 = dFdxFine(vInput);
vec4 d7 = dFdyFine(vInput);
vec4 d8 = fwidthFine(vInput);
if (vInput.y > 10.0)
{
FragColor += t;
FragColor += d0;
FragColor += d1;
FragColor += d2;
FragColor += d3;
FragColor += d4;
FragColor += d5;
FragColor += d6;
FragColor += d7;
FragColor += d8;
}
}

View File

@ -0,0 +1,36 @@
#version 450
layout(location = 0) out vec4 FragColor;
layout(binding = 0) uniform sampler2D uSampler;
layout(location = 0) in vec4 vInput;
void main()
{
FragColor = vInput;
vec4 t = texture(uSampler, vInput.xy);
vec4 d0 = dFdx(vInput);
vec4 d1 = dFdy(vInput);
vec4 d2 = fwidth(vInput);
vec4 d3 = dFdxCoarse(vInput);
vec4 d4 = dFdyCoarse(vInput);
vec4 d5 = fwidthCoarse(vInput);
vec4 d6 = dFdxFine(vInput);
vec4 d7 = dFdyFine(vInput);
vec4 d8 = fwidthFine(vInput);
vec2 lod = textureQueryLod(uSampler, vInput.zw);
if (vInput.y > 10.0)
{
FragColor += t;
FragColor += d0;
FragColor += d1;
FragColor += d2;
FragColor += d3;
FragColor += d4;
FragColor += d5;
FragColor += d6;
FragColor += d7;
FragColor += d8;
FragColor += lod.xyxy;
}
}

View File

@ -662,6 +662,11 @@ struct SPIRBlock : IVariant
// fail to use a classic for-loop,
// we remove these variables, and fall back to regular variables outside the loop.
std::vector<uint32_t> loop_variables;
// Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or
// sub-group-like operations.
// Make sure that we only use these expressions in the original block.
std::vector<uint32_t> invalidate_expressions;
};
struct SPIRFunction : IVariant

View File

@ -351,6 +351,14 @@ void Compiler::flush_all_atomic_capable_variables()
flush_all_aliased_variables();
}
void Compiler::flush_control_dependent_expressions(uint32_t block_id)
{
auto &block = get<SPIRBlock>(block_id);
for (auto &expr : block.invalidate_expressions)
invalid_expressions.insert(expr);
block.invalidate_expressions.clear();
}
void Compiler::flush_all_active_variables()
{
// Invalidate all temporaries we read from variables in this block since they were forwarded.

View File

@ -612,6 +612,7 @@ protected:
// Dependency tracking for temporaries read from variables.
void flush_dependees(SPIRVariable &var);
void flush_all_active_variables();
void flush_control_dependent_expressions(uint32_t block);
void flush_all_atomic_capable_variables();
void flush_all_aliased_variables();
void register_global_read_dependencies(const SPIRBlock &func, uint32_t id);

View File

@ -3753,6 +3753,19 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
emit_op(result_type, id, expr, forward);
for (auto &inherit : inherited_expressions)
inherit_expression_dependencies(id, inherit);
switch (op)
{
case OpImageSampleDrefImplicitLod:
case OpImageSampleImplicitLod:
case OpImageSampleProjImplicitLod:
case OpImageSampleProjDrefImplicitLod:
register_control_dependent_expression(id);
break;
default:
break;
}
}
// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
@ -4307,18 +4320,22 @@ void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t
{
case SwizzleInvocationsAMD:
emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
register_control_dependent_expression(id);
break;
case SwizzleInvocationsMaskedAMD:
emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
register_control_dependent_expression(id);
break;
case WriteInvocationAMD:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
register_control_dependent_expression(id);
break;
case MbcntAMD:
emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
register_control_dependent_expression(id);
break;
default:
@ -4423,6 +4440,7 @@ void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id,
{
string expr = "timeAMD()";
emit_op(result_type, id, expr, true);
register_control_dependent_expression(id);
break;
}
@ -5570,7 +5588,16 @@ bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &r
return true;
}
void CompilerGLSL::emit_block_instructions(const SPIRBlock &block)
void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
{
if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
return;
assert(current_emitting_block);
current_emitting_block->invalidate_expressions.push_back(expr);
}
void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
{
current_emitting_block = &block;
for (auto &op : block.ops)
@ -6529,12 +6556,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
UFOP(dFdx);
if (is_legacy_es())
require_extension("GL_OES_standard_derivatives");
register_control_dependent_expression(ops[1]);
break;
case OpDPdy:
UFOP(dFdy);
if (is_legacy_es())
require_extension("GL_OES_standard_derivatives");
register_control_dependent_expression(ops[1]);
break;
case OpDPdxFine:
@ -6545,6 +6574,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
}
if (options.version < 450)
require_extension("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
break;
case OpDPdyFine:
@ -6555,6 +6585,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
}
if (options.version < 450)
require_extension("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
break;
case OpDPdxCoarse:
@ -6565,6 +6596,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
UFOP(dFdxCoarse);
if (options.version < 450)
require_extension("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
break;
case OpDPdyCoarse:
@ -6575,12 +6607,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
}
if (options.version < 450)
require_extension("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
break;
case OpFwidth:
UFOP(fwidth);
if (is_legacy_es())
require_extension("GL_OES_standard_derivatives");
register_control_dependent_expression(ops[1]);
break;
case OpFwidthCoarse:
@ -6591,6 +6625,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
}
if (options.version < 450)
require_extension("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
break;
case OpFwidthFine:
@ -6601,6 +6636,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
}
if (options.version < 450)
require_extension("GL_ARB_derivative_control");
register_control_dependent_expression(ops[1]);
break;
// Bitfield
@ -6810,6 +6846,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
else
BFOP(textureQueryLod);
register_control_dependent_expression(ops[1]);
break;
}
@ -7160,7 +7197,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// We are synchronizing some memory or syncing execution,
// so we cannot forward any loads beyond the memory barrier.
if (semantics || opcode == OpControlBarrier)
{
assert(current_emitting_block);
flush_control_dependent_expressions(current_emitting_block->self);
flush_all_active_variables();
}
if (memory == ScopeWorkgroup) // Only need to consider memory within a group
{
@ -7242,10 +7283,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
uint32_t result_type = ops[0];
uint32_t id = ops[1];
string expr;
expr = join("unpackUint2x32(ballotARB(" + to_expression(ops[2]) + "))");
expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
emit_op(result_type, id, expr, true);
require_extension("GL_ARB_shader_ballot");
register_control_dependent_expression(ops[1]);
break;
}
@ -7256,6 +7298,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
require_extension("GL_ARB_shader_ballot");
register_control_dependent_expression(ops[1]);
break;
}
@ -7266,6 +7309,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
require_extension("GL_ARB_shader_ballot");
register_control_dependent_expression(ops[1]);
break;
}
@ -7276,6 +7320,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
require_extension("GL_ARB_shader_group_vote");
register_control_dependent_expression(ops[1]);
break;
}
@ -7286,6 +7331,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
require_extension("GL_ARB_shader_group_vote");
register_control_dependent_expression(ops[1]);
break;
}
@ -7296,6 +7342,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
require_extension("GL_ARB_shader_group_vote");
register_control_dependent_expression(ops[1]);
break;
}
@ -7307,6 +7354,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
require_extension("GL_AMD_shader_ballot");
register_control_dependent_expression(ops[1]);
break;
}
@ -7319,6 +7367,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
require_extension("GL_AMD_shader_ballot");
register_control_dependent_expression(ops[1]);
break;
}
@ -7331,6 +7380,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
require_extension("GL_AMD_shader_ballot");
register_control_dependent_expression(ops[1]);
break;
}
@ -8423,6 +8473,7 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
void CompilerGLSL::branch(uint32_t from, uint32_t to)
{
flush_phi(from, to);
flush_control_dependent_expressions(from);
flush_all_active_variables();
// This is only a continue if we branch to our loop dominator.

View File

@ -199,11 +199,10 @@ protected:
// Virtualize methods which need to be overridden by subclass targets like C++ and such.
virtual void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags);
// Kinda ugly way to let opcodes peek at their neighbor instructions for trivial peephole scenarios.
const SPIRBlock *current_emitting_block = nullptr;
SPIRBlock *current_emitting_block = nullptr;
virtual void emit_instruction(const Instruction &instr);
void emit_block_instructions(const SPIRBlock &block);
void emit_block_instructions(SPIRBlock &block);
virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
uint32_t count);
virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op,
@ -517,6 +516,7 @@ protected:
bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure);
void register_call_out_argument(uint32_t id);
void register_impure_function_call();
void register_control_dependent_expression(uint32_t expr);
// GL_EXT_shader_pixel_local_storage support.
std::vector<PlsRemap> pls_inputs;

View File

@ -2692,6 +2692,20 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
for (auto &inherit : inherited_expressions)
inherit_expression_dependencies(id, inherit);
switch (op)
{
case OpImageSampleDrefImplicitLod:
case OpImageSampleImplicitLod:
case OpImageSampleProjImplicitLod:
case OpImageSampleProjDrefImplicitLod:
case OpImageQueryLod:
register_control_dependent_expression(id);
break;
default:
break;
}
}
string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
@ -3599,32 +3613,39 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
case OpDPdx:
UFOP(ddx);
register_control_dependent_expression(ops[1]);
break;
case OpDPdy:
UFOP(ddy);
register_control_dependent_expression(ops[1]);
break;
case OpDPdxFine:
UFOP(ddx_fine);
register_control_dependent_expression(ops[1]);
break;
case OpDPdyFine:
UFOP(ddy_fine);
register_control_dependent_expression(ops[1]);
break;
case OpDPdxCoarse:
UFOP(ddx_coarse);
register_control_dependent_expression(ops[1]);
break;
case OpDPdyCoarse:
UFOP(ddy_coarse);
register_control_dependent_expression(ops[1]);
break;
case OpFwidth:
case OpFwidthCoarse:
case OpFwidthFine:
UFOP(fwidth);
register_control_dependent_expression(ops[1]);
break;
case OpLogicalNot:
@ -4031,7 +4052,11 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
// We are synchronizing some memory or syncing execution,
// so we cannot forward any loads beyond the memory barrier.
if (semantics || opcode == OpControlBarrier)
{
assert(current_emitting_block);
flush_control_dependent_expressions(current_emitting_block->self);
flush_all_active_variables();
}
if (opcode == OpControlBarrier)
{

View File

@ -1464,18 +1464,21 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
case OpDPdxFine:
case OpDPdxCoarse:
UFOP(dfdx);
register_control_dependent_expression(ops[1]);
break;
case OpDPdy:
case OpDPdyFine:
case OpDPdyCoarse:
UFOP(dfdy);
register_control_dependent_expression(ops[1]);
break;
case OpFwidth:
case OpFwidthCoarse:
case OpFwidthFine:
UFOP(fwidth);
register_control_dependent_expression(ops[1]);
break;
// Bitfield
@ -1874,6 +1877,10 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
bar_stmt += ");";
statement(bar_stmt);
assert(current_emitting_block);
flush_control_dependent_expressions(current_emitting_block->self);
flush_all_active_variables();
}
// Since MSL does not allow structs to be nested within the stage_in struct, the original input