MSL: Support ConstOffsets on image gather.

- Add TextureFunctionArguments::has_array_offsets.
- Add support functions spvGatherConstOffsets() &
  spvGatherCompareConstOffsets().
- Add SPVFuncImpl::SPVFuncImplGatherConstOffsets &
  SPVFuncImplGatherCompareConstOffsets.
- Add unit tests.
This commit is contained in:
Bill Hollings 2024-05-20 12:06:58 -04:00
parent 1ddd8b629c
commit ab1f9f4d7f
10 changed files with 489 additions and 9 deletions

View File

@ -0,0 +1,90 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
template<typename T> struct spvRemoveReference { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
{
return static_cast<thread T&&>(x);
}
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
{
return static_cast<thread T&&>(x);
}
// Wrapper function that processes a texture gather with a constant offset array.
template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename Toff, typename... Tp>
inline vec<T, 4> spvGatherCompareConstOffsets(const thread Tex<T>& t, sampler s, Toff coffsets, Tp... params)
{
vec<T, 4> rslts[4];
for (uint i=0; i < 4; i++)
{
rslts[i] = t.gather_compare(s, spvForward<Tp>(params)..., coffsets[i]);
}
return vec<T, 4>(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);
}
constant spvUnsafeArray<int2, 4> _38 = spvUnsafeArray<int2, 4>({ int2(-8, 3), int2(-4, 7), int2(0, 3), int2(3, 0) });
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float2 coord [[user(locn0)]];
float2 compare_value [[user(locn1)]];
};
fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor = spvGatherCompareConstOffsets(tex, texSmplr, _38, in.coord, in.compare_value.x);
return out;
}

View File

@ -0,0 +1,103 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
template<typename T> struct spvRemoveReference { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
{
return static_cast<thread T&&>(x);
}
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
{
return static_cast<thread T&&>(x);
}
// Wrapper function that processes a texture gather with a constant offset array.
template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename Toff, typename... Tp>
inline vec<T, 4> spvGatherConstOffsets(const thread Tex<T>& t, sampler s, Toff coffsets, component c, Tp... params) METAL_CONST_ARG(c)
{
vec<T, 4> rslts[4];
for (uint i=0; i < 4; i++)
{
switch (c)
{
case component::x:
rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::x);
break;
case component::y:
rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::y);
break;
case component::z:
rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::z);
break;
case component::w:
rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::w);
break;
}
}
return vec<T, 4>(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);
}
constant spvUnsafeArray<int2, 4> _30 = spvUnsafeArray<int2, 4>({ int2(-8), int2(-8, 7), int2(7, -8), int2(7) });
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float3 coord [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], texture2d_array<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor = spvGatherConstOffsets(tex, texSmplr, _30, component::y, in.coord.xy, uint(rint(in.coord.z)));
return out;
}

View File

@ -0,0 +1,90 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
template<typename T> struct spvRemoveReference { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
{
return static_cast<thread T&&>(x);
}
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
{
return static_cast<thread T&&>(x);
}
// Wrapper function that processes a texture gather with a constant offset array.
template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename Toff, typename... Tp>
inline vec<T, 4> spvGatherCompareConstOffsets(const thread Tex<T>& t, sampler s, Toff coffsets, Tp... params)
{
vec<T, 4> rslts[4];
for (uint i=0; i < 4; i++)
{
rslts[i] = t.gather_compare(s, spvForward<Tp>(params)..., coffsets[i]);
}
return vec<T, 4>(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);
}
constant spvUnsafeArray<int2, 4> _38 = spvUnsafeArray<int2, 4>({ int2(-8, 3), int2(-4, 7), int2(0, 3), int2(3, 0) });
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float2 coord [[user(locn0)]];
float2 compare_value [[user(locn1)]];
};
fragment main0_out main0(main0_in in [[stage_in]], depth2d<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor = spvGatherCompareConstOffsets(tex, texSmplr, _38, in.coord, in.compare_value.x);
return out;
}

View File

@ -0,0 +1,103 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];
thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}
device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}
constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}
threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};
template<typename T> struct spvRemoveReference { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&> { typedef T type; };
template<typename T> struct spvRemoveReference<thread T&&> { typedef T type; };
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type& x)
{
return static_cast<thread T&&>(x);
}
template<typename T> inline constexpr thread T&& spvForward(thread typename spvRemoveReference<T>::type&& x)
{
return static_cast<thread T&&>(x);
}
// Wrapper function that processes a texture gather with a constant offset array.
template<typename T, template<typename, access = access::sample, typename = void> class Tex, typename Toff, typename... Tp>
inline vec<T, 4> spvGatherConstOffsets(const thread Tex<T>& t, sampler s, Toff coffsets, component c, Tp... params) METAL_CONST_ARG(c)
{
vec<T, 4> rslts[4];
for (uint i=0; i < 4; i++)
{
switch (c)
{
case component::x:
rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::x);
break;
case component::y:
rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::y);
break;
case component::z:
rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::z);
break;
case component::w:
rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::w);
break;
}
}
return vec<T, 4>(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);
}
constant spvUnsafeArray<int2, 4> _30 = spvUnsafeArray<int2, 4>({ int2(-8), int2(-8, 7), int2(7, -8), int2(7) });
struct main0_out
{
float4 FragColor [[color(0)]];
};
struct main0_in
{
float3 coord [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], texture2d_array<float> tex [[texture(0)]], sampler texSmplr [[sampler(0)]])
{
main0_out out = {};
out.FragColor = spvGatherConstOffsets(tex, texSmplr, _30, component::y, in.coord.xy, uint(rint(in.coord.z)));
return out;
}

View File

@ -0,0 +1,11 @@
#version 450
layout(set = 0, binding = 0) uniform sampler2DShadow tex;
layout(location = 0) out mediump vec4 FragColor;
layout(location = 0) in vec2 coord;
layout(location = 1) in vec2 compare_value;
void main()
{
FragColor = textureGatherOffsets(tex, coord, compare_value.x, ivec2[](ivec2(-8, 3), ivec2(-4, 7), ivec2(0, 3), ivec2(3, 0)));
}

View File

@ -0,0 +1,10 @@
#version 450
layout(set = 0, binding = 0) uniform sampler2DArray tex;
layout(location = 0) out mediump vec4 FragColor;
layout(location = 0) in vec3 coord;
void main()
{
FragColor = textureGatherOffsets(tex, coord, ivec2[](ivec2(-8), ivec2(-8, 7), ivec2(7, -8), ivec2(7)), 1);
}

View File

@ -7847,6 +7847,7 @@ std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool
args.grad_x = grad_x;
args.grad_y = grad_y;
args.lod = lod;
args.has_array_offsets = coffsets != 0;
if (coffsets)
args.offset = coffsets;

View File

@ -477,7 +477,7 @@ protected:
uint32_t coord = 0, coord_components = 0, dref = 0;
uint32_t grad_x = 0, grad_y = 0, lod = 0, offset = 0;
uint32_t bias = 0, component = 0, sample = 0, sparse_texel = 0, min_lod = 0;
bool nonuniform_expression = false;
bool nonuniform_expression = false, has_array_offsets = false;
};
virtual std::string to_function_args(const TextureFunctionArguments &args, bool *p_forward);

View File

@ -6227,6 +6227,57 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
case SPVFuncImplGatherConstOffsets:
statement("// Wrapper function that processes a texture gather with a constant offset array.");
statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
"typename Toff, typename... Tp>");
statement("inline vec<T, 4> spvGatherConstOffsets(const thread Tex<T>& t, sampler s, "
"Toff coffsets, component c, Tp... params) METAL_CONST_ARG(c)");
begin_scope();
statement("vec<T, 4> rslts[4];");
statement("for (uint i=0; i < 4; i++)");
begin_scope();
statement("switch (c)");
begin_scope();
// Work around texture::gather() requiring its component parameter to be a constant expression
statement("case component::x:");
statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::x);");
statement(" break;");
statement("case component::y:");
statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::y);");
statement(" break;");
statement("case component::z:");
statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::z);");
statement(" break;");
statement("case component::w:");
statement(" rslts[i] = t.gather(s, spvForward<Tp>(params)..., coffsets[i], component::w);");
statement(" break;");
end_scope();
end_scope();
// Pull all values from the i0j0 component of each gather footprint
statement("return vec<T, 4>(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);");
end_scope();
statement("");
break;
case SPVFuncImplGatherCompareConstOffsets:
statement("// Wrapper function that processes a texture gather with a constant offset array.");
statement("template<typename T, template<typename, access = access::sample, typename = void> class Tex, "
"typename Toff, typename... Tp>");
statement("inline vec<T, 4> spvGatherCompareConstOffsets(const thread Tex<T>& t, sampler s, "
"Toff coffsets, Tp... params)");
begin_scope();
statement("vec<T, 4> rslts[4];");
statement("for (uint i=0; i < 4; i++)");
begin_scope();
statement(" rslts[i] = t.gather_compare(s, spvForward<Tp>(params)..., coffsets[i]);");
end_scope();
// Pull all values from the i0j0 component of each gather footprint
statement("return vec<T, 4>(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);");
end_scope();
statement("");
break;
case SPVFuncImplSubgroupBroadcast:
// Metal doesn't allow broadcasting boolean values directly, but we can work around that by broadcasting
// them as integers.
@ -10972,8 +11023,7 @@ string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args)
is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler);
}
// Special-case gather. We have to alter the component being looked up
// in the swizzle case.
// Special-case gather. We have to alter the component being looked up in the swizzle case.
if (msl_options.swizzle_texture_samples && args.base.is_gather && !is_dynamic_img_sampler &&
(!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable))
{
@ -10982,6 +11032,16 @@ string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args)
return is_compare ? "spvGatherCompareSwizzle" : "spvGatherSwizzle";
}
// Special-case gather with an array of offsets. We have to lower into 4 separate gathers.
if (args.has_array_offsets && !is_dynamic_img_sampler &&
(!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable))
{
bool is_compare = comparison_ids.count(img);
add_spv_func_and_recompile(is_compare ? SPVFuncImplGatherCompareConstOffsets : SPVFuncImplGatherConstOffsets);
add_spv_func_and_recompile(SPVFuncImplForwardArgs);
return is_compare ? "spvGatherCompareConstOffsets" : "spvGatherConstOffsets";
}
auto *combined = maybe_get<SPIRCombinedImageSampler>(img);
// Texture reference
@ -11162,6 +11222,10 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
farg_str += to_expression(combined ? combined->image : img);
}
// Gathers with constant offsets call a special function, so include the texture.
if (args.has_array_offsets)
farg_str += to_expression(img);
// Sampler reference
if (!args.base.is_fetch)
{
@ -11178,11 +11242,17 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
used_swizzle_buffer = true;
}
// Swizzled gather puts the component before the other args, to allow template
// deduction to work.
if (args.component && msl_options.swizzle_texture_samples)
// Const offsets gather puts the const offsets before the other args.
if (args.has_array_offsets)
{
forward = should_forward(args.component);
forward = forward && should_forward(args.offset);
farg_str += ", " + to_expression(args.offset);
}
// Const offsets gather or swizzled gather puts the component before the other args.
if (args.component && (args.has_array_offsets || msl_options.swizzle_texture_samples))
{
forward = forward && should_forward(args.component);
farg_str += ", " + to_component_argument(args.component);
}
}
@ -11593,7 +11663,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
// Add offsets
string offset_expr;
const SPIRType *offset_type = nullptr;
if (args.offset && !args.base.is_fetch)
if (args.offset && !args.base.is_fetch && !args.has_array_offsets)
{
forward = forward && should_forward(args.offset);
offset_expr = to_expression(args.offset);
@ -11632,7 +11702,7 @@ string CompilerMSL::to_function_args(const TextureFunctionArguments &args, bool
}
}
if (args.component)
if (args.component && !args.has_array_offsets)
{
// If 2D has gather component, ensure it also has an offset arg
if (imgtype.image.dim == Dim2D && offset_expr.empty())

View File

@ -772,6 +772,8 @@ protected:
SPVFuncImplTextureSwizzle,
SPVFuncImplGatherSwizzle,
SPVFuncImplGatherCompareSwizzle,
SPVFuncImplGatherConstOffsets,
SPVFuncImplGatherCompareConstOffsets,
SPVFuncImplSubgroupBroadcast,
SPVFuncImplSubgroupBroadcastFirst,
SPVFuncImplSubgroupBallot,