Merge pull request #2233 from cdavis5e/agx-cube-grad-fixup

MSL: Work around broken cube texture gradients on Apple Silicon.
This commit is contained in:
Hans-Kristian Arntzen 2023-11-28 12:05:55 +01:00 committed by GitHub
commit 50e90dd74e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 382 additions and 15 deletions

View File

@ -243,7 +243,7 @@ set(spirv-cross-util-sources
${CMAKE_CURRENT_SOURCE_DIR}/spirv_cross_util.hpp)
set(spirv-cross-abi-major 0)
set(spirv-cross-abi-minor 57)
set(spirv-cross-abi-minor 58)
set(spirv-cross-abi-patch 0)
set(SPIRV_CROSS_VERSION ${spirv-cross-abi-major}.${spirv-cross-abi-minor}.${spirv-cross-abi-patch})

View File

@ -678,6 +678,8 @@ struct CLIArguments
bool msl_sample_dref_lod_array_as_grad = false;
bool msl_runtime_array_rich_descriptor = false;
bool msl_replace_recursive_inputs = false;
bool msl_readwrite_texture_fences = true;
bool msl_agx_manual_cube_grad_fixup = false;
const char *msl_combined_sampler_suffix = nullptr;
bool glsl_emit_push_constant_as_ubo = false;
bool glsl_emit_ubo_as_plain_uniforms = false;
@ -958,6 +960,14 @@ static void print_help_msl()
"\t\tSome Metal devices have a bug where the level() argument to\n"
"\t\tdepth2d_array<T>::sample_compare() in a fragment shader is biased by some\n"
"\t\tunknown amount. This prevents the bias from being added.\n"
"\t[--msl-no-readwrite-texture-fences]:\n\t\tDo not insert fences before each read of a\n"
"\t\tread_write texture. MSL does not guarantee coherence between writes and later reads\n"
"\t\tof read_write textures. If you don't rely on this, you can disable this for a\n"
"\t\tpossible performance improvement.\n"
"\t[--msl-agx-manual-cube-grad-fixup]:\n\t\tManually transform cube texture gradients.\n"
"\t\tAll released Apple Silicon GPUs to date ignore one of the three partial derivatives\n"
"\t\tbased on the selected major axis, and expect the remaining derivatives to be\n"
"\t\tpartially transformed. This fixup gives correct results on Apple Silicon.\n"
"\t[--msl-combined-sampler-suffix <suffix>]:\n\t\tUses a custom suffix for combined samplers.\n");
// clang-format on
}
@ -1236,6 +1246,8 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
msl_opts.ios_support_base_vertex_instance = true;
msl_opts.runtime_array_rich_descriptor = args.msl_runtime_array_rich_descriptor;
msl_opts.replace_recursive_inputs = args.msl_replace_recursive_inputs;
msl_opts.readwrite_texture_fences = args.msl_readwrite_texture_fences;
msl_opts.agx_manual_cube_grad_fixup = args.msl_agx_manual_cube_grad_fixup;
msl_comp->set_msl_options(msl_opts);
for (auto &v : args.msl_discrete_descriptor_sets)
msl_comp->add_discrete_descriptor_set(v);
@ -1790,6 +1802,8 @@ static int main_inner(int argc, char *argv[])
cbs.add("--msl-check-discarded-frag-stores", [&args](CLIParser &) { args.msl_check_discarded_frag_stores = true; });
cbs.add("--msl-sample-dref-lod-array-as-grad",
[&args](CLIParser &) { args.msl_sample_dref_lod_array_as_grad = true; });
cbs.add("--msl-no-readwrite-texture-fences", [&args](CLIParser &) { args.msl_readwrite_texture_fences = false; });
cbs.add("--msl-agx-manual-cube-grad-fixup", [&args](CLIParser &) { args.msl_agx_manual_cube_grad_fixup = true; });
cbs.add("--msl-combined-sampler-suffix", [&args](CLIParser &parser) {
args.msl_combined_sampler_suffix = parser.next_string();
});

View File

@ -0,0 +1,45 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
static inline gradientcube spvGradientCube(float3 P, float3 dPdx, float3 dPdy)
{
// Major axis selection
float3 absP = abs(P);
bool xMajor = absP.x >= max(absP.y, absP.z);
bool yMajor = absP.y >= absP.z;
float3 Q = xMajor ? P.yzx : (yMajor ? P.xzy : P);
float3 dQdx = xMajor ? dPdx.yzx : (yMajor ? dPdx.xzy : dPdx);
float3 dQdy = xMajor ? dPdy.yzx : (yMajor ? dPdy.xzy : dPdy);
// Skip a couple of operations compared to usual projection
float4 d = float4(dQdx.xy, dQdy.xy) - (Q.xy / Q.z).xyxy * float4(dQdx.zz, dQdy.zz);
// Final swizzle to put the intermediate values into non-ignored components
// X major: X and Z
// Y major: X and Y
// Z major: Y and Z
return gradientcube(xMajor ? d.xxy : d.xyx, xMajor ? d.zzw : d.zwz);
}
struct main0_out
{
float4 o_color [[color(0)]];
};
struct main0_in
{
float4 v_texCoord [[user(locn0)]];
float2 v_drefLodBias [[user(locn1)]];
};
fragment main0_out main0(main0_in in [[stage_in]], depthcube_array<float> u_sampler [[texture(0)]], sampler u_samplerSmplr [[sampler(0)]])
{
main0_out out = {};
out.o_color = float4(u_sampler.sample_compare(u_samplerSmplr, in.v_texCoord.xyz, uint(rint(in.v_texCoord.w)), in.v_drefLodBias.x, spvGradientCube(in.v_texCoord.xyz, exp2(in.v_drefLodBias.y - 0.5) / float3(u_sampler.get_width()), exp2(in.v_drefLodBias.y - 0.5) / float3(u_sampler.get_width()))), 0.0, 0.0, 1.0);
return out;
}

View File

@ -0,0 +1,44 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
static inline gradientcube spvGradientCube(float3 P, float3 dPdx, float3 dPdy)
{
// Major axis selection
float3 absP = abs(P);
bool xMajor = absP.x >= max(absP.y, absP.z);
bool yMajor = absP.y >= absP.z;
float3 Q = xMajor ? P.yzx : (yMajor ? P.xzy : P);
float3 dQdx = xMajor ? dPdx.yzx : (yMajor ? dPdx.xzy : dPdx);
float3 dQdy = xMajor ? dPdy.yzx : (yMajor ? dPdy.xzy : dPdy);
// Skip a couple of operations compared to usual projection
float4 d = float4(dQdx.xy, dQdy.xy) - (Q.xy / Q.z).xyxy * float4(dQdx.zz, dQdy.zz);
// Final swizzle to put the intermediate values into non-ignored components
// X major: X and Z
// Y major: X and Y
// Z major: Y and Z
return gradientcube(xMajor ? d.xxy : d.xyx, xMajor ? d.zzw : d.zwz);
}
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float3 vTex [[user(locn0), flat]];
};
fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor += uSampler.sample(uSamplerSmplr, in.vTex, spvGradientCube(in.vTex, float3(5.0), float3(8.0)));
return out;
}

View File

@ -0,0 +1,55 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
static inline gradientcube spvGradientCube(float3 P, float3 dPdx, float3 dPdy)
{
// Major axis selection
float3 absP = abs(P);
bool xMajor = absP.x >= max(absP.y, absP.z);
bool yMajor = absP.y >= absP.z;
float3 Q = xMajor ? P.yzx : (yMajor ? P.xzy : P);
float3 dQdx = xMajor ? dPdx.yzx : (yMajor ? dPdx.xzy : dPdx);
float3 dQdy = xMajor ? dPdy.yzx : (yMajor ? dPdy.xzy : dPdy);
// Skip a couple of operations compared to usual projection
float4 d = float4(dQdx.xy, dQdy.xy) - (Q.xy / Q.z).xyxy * float4(dQdx.zz, dQdy.zz);
// Final swizzle to put the intermediate values into non-ignored components
// X major: X and Z
// Y major: X and Y
// Z major: Y and Z
return gradientcube(xMajor ? d.xxy : d.xyx, xMajor ? d.zzw : d.zwz);
}
struct buf0
{
float4 u_scale;
};
struct buf1
{
float4 u_bias;
};
struct main0_out
{
float4 o_color [[color(0)]];
};
struct main0_in
{
float4 v_texCoord [[user(locn0)]];
float2 v_drefLodBias [[user(locn1)]];
};
fragment main0_out main0(main0_in in [[stage_in]], depthcube_array<float> u_sampler [[texture(0)]], sampler u_samplerSmplr [[sampler(0)]])
{
main0_out out = {};
out.o_color = float4(u_sampler.sample_compare(u_samplerSmplr, in.v_texCoord.xyz, uint(rint(in.v_texCoord.w)), in.v_drefLodBias.x, spvGradientCube(in.v_texCoord.xyz, exp2(in.v_drefLodBias.y - 0.5) / float3(u_sampler.get_width()), exp2(in.v_drefLodBias.y - 0.5) / float3(u_sampler.get_width()))), 0.0, 0.0, 1.0);
return out;
}

View File

@ -0,0 +1,44 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
static inline gradientcube spvGradientCube(float3 P, float3 dPdx, float3 dPdy)
{
// Major axis selection
float3 absP = abs(P);
bool xMajor = absP.x >= max(absP.y, absP.z);
bool yMajor = absP.y >= absP.z;
float3 Q = xMajor ? P.yzx : (yMajor ? P.xzy : P);
float3 dQdx = xMajor ? dPdx.yzx : (yMajor ? dPdx.xzy : dPdx);
float3 dQdy = xMajor ? dPdy.yzx : (yMajor ? dPdy.xzy : dPdy);
// Skip a couple of operations compared to usual projection
float4 d = float4(dQdx.xy, dQdy.xy) - (Q.xy / Q.z).xyxy * float4(dQdx.zz, dQdy.zz);
// Final swizzle to put the intermediate values into non-ignored components
// X major: X and Z
// Y major: X and Y
// Z major: Y and Z
return gradientcube(xMajor ? d.xxy : d.xyx, xMajor ? d.zzw : d.zwz);
}
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float3 vTex [[user(locn0), flat]];
};
fragment main0_out main0(main0_in in [[stage_in]], texturecube<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor += uSampler.sample(uSamplerSmplr, in.vTex, spvGradientCube(in.vTex, float3(5.0), float3(8.0)));
return out;
}

View File

@ -0,0 +1,86 @@
; SPIR-V
; Version: 1.3
; Generator: Khronos Glslang Reference Front End; 11
; Bound: 45
; Schema: 0
OpCapability Shader
OpCapability SampledCubeArray
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %o_color %v_texCoord %v_drefLodBias
OpExecutionMode %main OriginUpperLeft
; Debug Information
OpSource GLSL 450
OpName %main "main" ; id %4
OpName %o_color "o_color" ; id %9
OpName %u_sampler "u_sampler" ; id %13
OpName %v_texCoord "v_texCoord" ; id %16
OpName %v_drefLodBias "v_drefLodBias" ; id %21
OpName %buf0 "buf0" ; id %39
OpMemberName %buf0 0 "u_scale"
OpName %_ "" ; id %41
OpName %buf1 "buf1" ; id %42
OpMemberName %buf1 0 "u_bias"
OpName %__0 "" ; id %44
; Annotations
OpDecorate %o_color RelaxedPrecision
OpDecorate %o_color Location 0
OpDecorate %u_sampler DescriptorSet 0
OpDecorate %u_sampler Binding 0
OpDecorate %v_texCoord Location 0
OpDecorate %v_drefLodBias Location 1
OpMemberDecorate %buf0 0 Offset 0
OpDecorate %buf0 Block
OpDecorate %_ DescriptorSet 0
OpDecorate %_ Binding 1
OpMemberDecorate %buf1 0 Offset 0
OpDecorate %buf1 Block
OpDecorate %__0 DescriptorSet 0
OpDecorate %__0 Binding 2
; Types, variables and constants
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%o_color = OpVariable %_ptr_Output_v4float Output
%10 = OpTypeImage %float Cube 1 1 0 1 Unknown
%11 = OpTypeSampledImage %10
%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11
%u_sampler = OpVariable %_ptr_UniformConstant_11 UniformConstant
%_ptr_Input_v4float = OpTypePointer Input %v4float
%v_texCoord = OpVariable %_ptr_Input_v4float Input
%v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
%v_drefLodBias = OpVariable %_ptr_Input_v2float Input
%uint = OpTypeInt 32 0
%uint_0 = OpConstant %uint 0
%_ptr_Input_float = OpTypePointer Input %float
%v3float = OpTypeVector %float 3
%uint_1 = OpConstant %uint 1
%float_0 = OpConstant %float 0
%float_1 = OpConstant %float 1
%buf0 = OpTypeStruct %v4float
%_ptr_Uniform_buf0 = OpTypePointer Uniform %buf0
%_ = OpVariable %_ptr_Uniform_buf0 Uniform
%buf1 = OpTypeStruct %v4float
%_ptr_Uniform_buf1 = OpTypePointer Uniform %buf1
%__0 = OpVariable %_ptr_Uniform_buf1 Uniform
; Function main
%main = OpFunction %void None %3
%5 = OpLabel
%14 = OpLoad %11 %u_sampler
%18 = OpLoad %v4float %v_texCoord
%25 = OpAccessChain %_ptr_Input_float %v_drefLodBias %uint_0
%26 = OpLoad %float %25
%32 = OpAccessChain %_ptr_Input_float %v_drefLodBias %uint_1
%33 = OpLoad %float %32
%35 = OpImageSampleDrefExplicitLod %float %14 %18 %26 Lod %33
%38 = OpCompositeConstruct %v4float %35 %float_0 %float_0 %float_1
OpStore %o_color %38
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,10 @@
#version 450
layout(location = 0) out vec4 FragColor;
layout(location = 0) flat in vec3 vTex;
layout(binding = 0) uniform samplerCube uSampler;
void main()
{
FragColor += textureGrad(uSampler, vTex, vec3(5.0), vec3(8.0));
}

View File

@ -742,6 +742,18 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c
case SPVC_COMPILER_OPTION_MSL_SAMPLE_DREF_LOD_ARRAY_AS_GRAD:
options->msl.sample_dref_lod_array_as_grad = value != 0;
break;
case SPVC_COMPILER_OPTION_MSL_READWRITE_TEXTURE_FENCES:
options->msl.readwrite_texture_fences = value != 0;
break;
case SPVC_COMPILER_OPTION_MSL_REPLACE_RECURSIVE_INPUTS:
options->msl.replace_recursive_inputs = value != 0;
break;
case SPVC_COMPILER_OPTION_MSL_AGX_MANUAL_CUBE_GRAD_FIXUP:
options->msl.agx_manual_cube_grad_fixup = value != 0;
break;
#endif
default:

View File

@ -40,7 +40,7 @@ extern "C" {
/* Bumped if ABI or API breaks backwards compatibility. */
#define SPVC_C_API_VERSION_MAJOR 0
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
#define SPVC_C_API_VERSION_MINOR 57
#define SPVC_C_API_VERSION_MINOR 58
/* Bumped if internal implementation details change. */
#define SPVC_C_API_VERSION_PATCH 0
@ -725,6 +725,9 @@ typedef enum spvc_compiler_option
SPVC_COMPILER_OPTION_MSL_ARGUMENT_BUFFERS_TIER = 84 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_SAMPLE_DREF_LOD_ARRAY_AS_GRAD = 85 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_READWRITE_TEXTURE_FENCES = 86 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_REPLACE_RECURSIVE_INPUTS = 87 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_MSL_AGX_MANUAL_CUBE_GRAD_FIXUP = 88 | SPVC_COMPILER_OPTION_MSL_BIT,
SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff
} spvc_compiler_option;

View File

@ -5725,6 +5725,31 @@ void CompilerMSL::emit_custom_functions()
break;
}
// Fix up gradient vectors when sampling a cube texture for Apple Silicon.
// h/t Alexey Knyazev (https://github.com/KhronosGroup/MoltenVK/issues/2068#issuecomment-1817799067) for the code.
case SPVFuncImplGradientCube:
statement("static inline gradientcube spvGradientCube(float3 P, float3 dPdx, float3 dPdy)");
begin_scope();
statement("// Major axis selection");
statement("float3 absP = abs(P);");
statement("bool xMajor = absP.x >= max(absP.y, absP.z);");
statement("bool yMajor = absP.y >= absP.z;");
statement("float3 Q = xMajor ? P.yzx : (yMajor ? P.xzy : P);");
statement("float3 dQdx = xMajor ? dPdx.yzx : (yMajor ? dPdx.xzy : dPdx);");
statement("float3 dQdy = xMajor ? dPdy.yzx : (yMajor ? dPdy.xzy : dPdy);");
statement_no_indent("");
statement("// Skip a couple of operations compared to usual projection");
statement("float4 d = float4(dQdx.xy, dQdy.xy) - (Q.xy / Q.z).xyxy * float4(dQdx.zz, dQdy.zz);");
statement_no_indent("");
statement("// Final swizzle to put the intermediate values into non-ignored components");
statement("// X major: X and Z");
statement("// Y major: X and Y");
statement("// Z major: Y and Z");
statement("return gradientcube(xMajor ? d.xxy : d.xyx, xMajor ? d.zzw : d.zwz);");
end_scope();
statement("");
break;
// "fadd" intrinsic support
case SPVFuncImplFAdd:
statement("template<typename T>");
@ -11123,29 +11148,38 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
// rhoX = dP/dx * extent; rhoY = dP/dy * extent
// Therefore, dP/dx = dP/dy = exp2(lod)/extent.
// (Subtracting 0.5 before exponentiation gives better results.)
string grad_opt, extent;
string grad_opt, extent, grad_coord;
VariableID base_img = img;
if (auto *combined = maybe_get<SPIRCombinedImageSampler>(img))
base_img = combined->image;
switch (imgtype.image.dim)
{
case Dim1D:
grad_opt = "2d";
grad_opt = "gradient2d";
extent = join("float2(", to_expression(base_img), ".get_width(), 1.0)");
break;
case Dim2D:
grad_opt = "2d";
grad_opt = "gradient2d";
extent = join("float2(", to_expression(base_img), ".get_width(), ", to_expression(base_img), ".get_height())");
break;
case DimCube:
if (imgtype.image.arrayed && msl_options.emulate_cube_array)
{
grad_opt = "2d";
grad_opt = "gradient2d";
extent = join("float2(", to_expression(base_img), ".get_width())");
}
else
{
grad_opt = "cube";
if (msl_options.agx_manual_cube_grad_fixup)
{
add_spv_func_and_recompile(SPVFuncImplGradientCube);
grad_opt = "spvGradientCube";
grad_coord = tex_coords + ", ";
}
else
{
grad_opt = "gradientcube";
}
extent = join("float3(", to_expression(base_img), ".get_width())");
}
break;
@ -11154,8 +11188,8 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
extent = "float3(1.0)";
break;
}
farg_str += join(", gradient", grad_opt, "(exp2(", to_expression(lod), " - 0.5) / ", extent, ", exp2(",
to_expression(lod), " - 0.5) / ", extent, ")");
farg_str += join(", ", grad_opt, "(", grad_coord, "exp2(", to_expression(lod), " - 0.5) / ", extent,
", exp2(", to_expression(lod), " - 0.5) / ", extent, ")");
}
else
{
@ -11175,27 +11209,37 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
{
forward = forward && should_forward(grad_x);
forward = forward && should_forward(grad_y);
string grad_opt;
string grad_opt, grad_coord;
switch (imgtype.image.dim)
{
case Dim1D:
case Dim2D:
grad_opt = "2d";
grad_opt = "gradient2d";
break;
case Dim3D:
grad_opt = "3d";
grad_opt = "gradient3d";
break;
case DimCube:
if (imgtype.image.arrayed && msl_options.emulate_cube_array)
grad_opt = "2d";
{
grad_opt = "gradient2d";
}
else if (msl_options.agx_manual_cube_grad_fixup)
{
add_spv_func_and_recompile(SPVFuncImplGradientCube);
grad_opt = "spvGradientCube";
grad_coord = tex_coords + ", ";
}
else
grad_opt = "cube";
{
grad_opt = "gradientcube";
}
break;
default:
grad_opt = "unsupported_gradient_dimension";
break;
}
farg_str += ", gradient" + grad_opt + "(" + to_expression(grad_x) + ", " + to_expression(grad_y) + ")";
farg_str += join(", ", grad_opt, "(", grad_coord, to_expression(grad_x), ", ", to_expression(grad_y), ")");
}
if (args.min_lod)

View File

@ -512,6 +512,13 @@ public:
// The bug has been reported to Apple, and will hopefully be fixed in future releases.
bool replace_recursive_inputs = false;
// If set, manual fixups of gradient vectors for cube texture lookups will be performed.
// All released Apple Silicon GPUs to date behave incorrectly when sampling a cube texture
// with explicit gradients. They will ignore one of the three partial derivatives based
// on the selected major axis, and expect the remaining derivatives to be partially
// transformed.
bool agx_manual_cube_grad_fixup = false;
bool is_ios() const
{
return platform == iOS;
@ -756,6 +763,7 @@ protected:
SPVFuncImplArrayOfArrayCopy6Dim = SPVFuncImplArrayCopyMultidimBase + 6,
SPVFuncImplTexelBufferCoords,
SPVFuncImplImage2DAtomicCoords, // Emulate texture2D atomic operations
SPVFuncImplGradientCube,
SPVFuncImplFMul,
SPVFuncImplFAdd,
SPVFuncImplFSub,

View File

@ -355,6 +355,8 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
msl_args.append('--msl-check-discarded-frag-stores')
if '.lod-as-grad.' in shader:
msl_args.append('--msl-sample-dref-lod-array-as-grad')
if '.agx-cube-grad.' in shader:
msl_args.append('--msl-agx-manual-cube-grad-fixup')
if '.decoration-binding.' in shader:
msl_args.append('--msl-decoration-binding')
if '.rich-descriptor.' in shader: