CompilerMSL enhancements to nested function use of globals.
Allow function calls to include globals as arguments. Allow function calls to include built-ins as arguments. Include all meta info when creating function args from globals. Do not manufacture a sampler for Buffer-type sampled images. Add code option to test_shaders.py to preserve SPIR-V code for interactive debugging.
This commit is contained in:
parent
3fcdce08ab
commit
a68b32733a
189
reference/shaders-msl/vert/functions_nested.vert
Normal file
189
reference/shaders-msl/vert/functions_nested.vert
Normal file
@ -0,0 +1,189 @@
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct attr_desc
|
||||
{
|
||||
int type;
|
||||
int attribute_size;
|
||||
int starting_offset;
|
||||
int stride;
|
||||
int swap_bytes;
|
||||
int is_volatile;
|
||||
};
|
||||
|
||||
struct VertexBuffer
|
||||
{
|
||||
float4x4 scale_offset_mat;
|
||||
uint vertex_base_index;
|
||||
int4 input_attributes[16];
|
||||
};
|
||||
|
||||
struct VertexConstantsBuffer
|
||||
{
|
||||
float4 vc[16];
|
||||
};
|
||||
|
||||
constant float4 _295 = {};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 tc0 [[user(locn0)]];
|
||||
float4 back_color [[user(locn10)]];
|
||||
float4 gl_Position [[position]];
|
||||
};
|
||||
|
||||
attr_desc fetch_desc(thread const int& location, constant VertexBuffer& v_227)
|
||||
{
|
||||
int attribute_flags = v_227.input_attributes[location].w;
|
||||
attr_desc result;
|
||||
result.type = v_227.input_attributes[location].x;
|
||||
result.attribute_size = v_227.input_attributes[location].y;
|
||||
result.starting_offset = v_227.input_attributes[location].z;
|
||||
result.stride = attribute_flags & 255;
|
||||
result.swap_bytes = (attribute_flags >> 8) & 1;
|
||||
result.is_volatile = (attribute_flags >> 9) & 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
uint get_bits(thread const uint4& v, thread const int& swap)
|
||||
{
|
||||
if (swap != 0)
|
||||
{
|
||||
return ((v.w | (v.z << uint(8))) | (v.y << uint(16))) | (v.x << uint(24));
|
||||
}
|
||||
return ((v.x | (v.y << uint(8))) | (v.z << uint(16))) | (v.w << uint(24));
|
||||
}
|
||||
|
||||
float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thread const texture2d<uint> input_stream)
|
||||
{
|
||||
float4 result = float4(0.0, 0.0, 0.0, 1.0);
|
||||
bool reverse_order = false;
|
||||
int first_byte = (vertex_id * desc.stride) + desc.starting_offset;
|
||||
for (int n = 0; n < 4; n++)
|
||||
{
|
||||
if (n == desc.attribute_size)
|
||||
{
|
||||
break;
|
||||
}
|
||||
uint4 tmp;
|
||||
switch (desc.type)
|
||||
{
|
||||
case 0:
|
||||
{
|
||||
int _131 = first_byte;
|
||||
first_byte = _131 + 1;
|
||||
tmp.x = input_stream.read(uint2(_131, 0)).x;
|
||||
int _138 = first_byte;
|
||||
first_byte = _138 + 1;
|
||||
tmp.y = input_stream.read(uint2(_138, 0)).x;
|
||||
uint4 param = tmp;
|
||||
int param_1 = desc.swap_bytes;
|
||||
result[n] = float(get_bits(param, param_1));
|
||||
break;
|
||||
}
|
||||
case 1:
|
||||
{
|
||||
int _156 = first_byte;
|
||||
first_byte = _156 + 1;
|
||||
tmp.x = input_stream.read(uint2(_156, 0)).x;
|
||||
int _163 = first_byte;
|
||||
first_byte = _163 + 1;
|
||||
tmp.y = input_stream.read(uint2(_163, 0)).x;
|
||||
int _170 = first_byte;
|
||||
first_byte = _170 + 1;
|
||||
tmp.z = input_stream.read(uint2(_170, 0)).x;
|
||||
int _177 = first_byte;
|
||||
first_byte = _177 + 1;
|
||||
tmp.w = input_stream.read(uint2(_177, 0)).x;
|
||||
uint4 param_2 = tmp;
|
||||
int param_3 = desc.swap_bytes;
|
||||
result[n] = as_type<float>(get_bits(param_2, param_3));
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
int _195 = first_byte;
|
||||
first_byte = _195 + 1;
|
||||
result[n] = float(input_stream.read(uint2(_195, 0)).x);
|
||||
reverse_order = desc.swap_bytes != 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
float4 _209;
|
||||
if (reverse_order)
|
||||
{
|
||||
_209 = result.wzyx;
|
||||
}
|
||||
else
|
||||
{
|
||||
_209 = result;
|
||||
}
|
||||
return _209;
|
||||
}
|
||||
|
||||
float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1)
|
||||
{
|
||||
int param = location;
|
||||
attr_desc desc = fetch_desc(param, v_227);
|
||||
int vertex_id = gl_VertexIndex - int(v_227.vertex_base_index);
|
||||
if (desc.is_volatile != 0)
|
||||
{
|
||||
attr_desc param_1 = desc;
|
||||
int param_2 = vertex_id;
|
||||
return fetch_attr(param_1, param_2, buff_in_2);
|
||||
}
|
||||
else
|
||||
{
|
||||
attr_desc param_3 = desc;
|
||||
int param_4 = vertex_id;
|
||||
return fetch_attr(param_3, param_4, buff_in_1);
|
||||
}
|
||||
}
|
||||
|
||||
void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1, constant VertexConstantsBuffer& v_309)
|
||||
{
|
||||
int param = 3;
|
||||
float4 in_diff_color = read_location(param, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
|
||||
int param_1 = 0;
|
||||
float4 in_pos = read_location(param_1, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
|
||||
int param_2 = 8;
|
||||
float4 in_tc0 = read_location(param_2, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
|
||||
dst_reg1 = in_diff_color * v_309.vc[13];
|
||||
float4 tmp0;
|
||||
tmp0.x = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[4])).x;
|
||||
tmp0.y = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[5])).y;
|
||||
tmp0.z = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[6])).z;
|
||||
float4 tmp1;
|
||||
tmp1 = float4(in_tc0.xy.x, in_tc0.xy.y, tmp1.z, tmp1.w);
|
||||
tmp1.z = v_309.vc[15].x;
|
||||
dst_reg7.y = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[8])).y;
|
||||
dst_reg7.x = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[7])).x;
|
||||
dst_reg0.y = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[1])).y;
|
||||
dst_reg0.x = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[0])).x;
|
||||
}
|
||||
|
||||
vertex main0_out main0(constant VertexBuffer& v_227 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]], texture2d<uint> buff_in_2 [[texture(0)]], texture2d<uint> buff_in_1 [[texture(1)]], constant VertexConstantsBuffer& v_309 [[buffer(1)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
float4 dst_reg0 = float4(0.0, 0.0, 0.0, 1.0);
|
||||
float4 dst_reg1 = float4(0.0);
|
||||
float4 dst_reg7 = float4(0.0);
|
||||
float4 param = dst_reg0;
|
||||
float4 param_1 = dst_reg1;
|
||||
float4 param_2 = dst_reg7;
|
||||
vs_adjust(param, param_1, param_2, v_227, gl_VertexIndex, buff_in_2, buff_in_1, v_309);
|
||||
dst_reg0 = param;
|
||||
dst_reg1 = param_1;
|
||||
dst_reg7 = param_2;
|
||||
out.gl_Position = dst_reg0;
|
||||
out.back_color = dst_reg1;
|
||||
out.tc0 = dst_reg7;
|
||||
out.gl_Position *= v_227.scale_offset_mat;
|
||||
return out;
|
||||
}
|
||||
|
132
shaders-msl/vert/functions_nested.vert
Normal file
132
shaders-msl/vert/functions_nested.vert
Normal file
@ -0,0 +1,132 @@
|
||||
#version 450
|
||||
#extension GL_ARB_separate_shader_objects : enable
|
||||
|
||||
layout(std140, set = 0, binding = 0) uniform VertexBuffer
|
||||
{
|
||||
mat4 scale_offset_mat;
|
||||
uint vertex_base_index;
|
||||
ivec4 input_attributes[16];
|
||||
};
|
||||
layout(set=0, binding=3) uniform usamplerBuffer buff_in_1;
|
||||
layout(set=0, binding=4) uniform usamplerBuffer buff_in_2;
|
||||
|
||||
layout(location=10) out vec4 back_color;
|
||||
layout(location=0) out vec4 tc0;
|
||||
|
||||
layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer
|
||||
{
|
||||
vec4 vc[16];
|
||||
};
|
||||
|
||||
struct attr_desc
|
||||
{
|
||||
int type;
|
||||
int attribute_size;
|
||||
int starting_offset;
|
||||
int stride;
|
||||
int swap_bytes;
|
||||
int is_volatile;
|
||||
};
|
||||
|
||||
uint get_bits(uvec4 v, int swap)
|
||||
{
|
||||
if (swap != 0) return (v.w | v.z << 8 | v.y << 16 | v.x << 24);
|
||||
return (v.x | v.y << 8 | v.z << 16 | v.w << 24);
|
||||
}
|
||||
|
||||
vec4 fetch_attr(attr_desc desc, int vertex_id, usamplerBuffer input_stream)
|
||||
{
|
||||
vec4 result = vec4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
uvec4 tmp;
|
||||
uint bits;
|
||||
bool reverse_order = false;
|
||||
|
||||
int first_byte = (vertex_id * desc.stride) + desc.starting_offset;
|
||||
for (int n = 0; n < 4; n++)
|
||||
{
|
||||
if (n == desc.attribute_size) break;
|
||||
|
||||
switch (desc.type)
|
||||
{
|
||||
case 0:
|
||||
//signed normalized 16-bit
|
||||
tmp.x = texelFetch(input_stream, first_byte++).x;
|
||||
tmp.y = texelFetch(input_stream, first_byte++).x;
|
||||
result[n] = get_bits(tmp, desc.swap_bytes);
|
||||
break;
|
||||
case 1:
|
||||
//float
|
||||
tmp.x = texelFetch(input_stream, first_byte++).x;
|
||||
tmp.y = texelFetch(input_stream, first_byte++).x;
|
||||
tmp.z = texelFetch(input_stream, first_byte++).x;
|
||||
tmp.w = texelFetch(input_stream, first_byte++).x;
|
||||
result[n] = uintBitsToFloat(get_bits(tmp, desc.swap_bytes));
|
||||
break;
|
||||
case 2:
|
||||
//unsigned byte
|
||||
result[n] = texelFetch(input_stream, first_byte++).x;
|
||||
reverse_order = (desc.swap_bytes != 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return (reverse_order)? result.wzyx: result;
|
||||
}
|
||||
|
||||
attr_desc fetch_desc(int location)
|
||||
{
|
||||
attr_desc result;
|
||||
int attribute_flags = input_attributes[location].w;
|
||||
result.type = input_attributes[location].x;
|
||||
result.attribute_size = input_attributes[location].y;
|
||||
result.starting_offset = input_attributes[location].z;
|
||||
result.stride = attribute_flags & 0xFF;
|
||||
result.swap_bytes = (attribute_flags >> 8) & 0x1;
|
||||
result.is_volatile = (attribute_flags >> 9) & 0x1;
|
||||
return result;
|
||||
}
|
||||
|
||||
vec4 read_location(int location)
|
||||
{
|
||||
attr_desc desc = fetch_desc(location);
|
||||
|
||||
int vertex_id = gl_VertexIndex - int(vertex_base_index);
|
||||
if (desc.is_volatile != 0)
|
||||
return fetch_attr(desc, vertex_id, buff_in_2);
|
||||
else
|
||||
return fetch_attr(desc, vertex_id, buff_in_1);
|
||||
}
|
||||
|
||||
void vs_adjust(inout vec4 dst_reg0, inout vec4 dst_reg1, inout vec4 dst_reg7)
|
||||
{
|
||||
vec4 tmp0;
|
||||
vec4 tmp1;
|
||||
vec4 in_diff_color= read_location(3);
|
||||
vec4 in_pos= read_location(0);
|
||||
vec4 in_tc0= read_location(8);
|
||||
dst_reg1 = (in_diff_color * vc[13]);
|
||||
tmp0.x = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[4])).x;
|
||||
tmp0.y = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[5])).y;
|
||||
tmp0.z = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[6])).z;
|
||||
tmp1.xy = in_tc0.xyxx.xy;
|
||||
tmp1.z = vc[15].xxxx.z;
|
||||
dst_reg7.y = vec4(dot(vec4(tmp1.xyzx.xyz, 1.0), vc[8])).y;
|
||||
dst_reg7.x = vec4(dot(vec4(tmp1.xyzx.xyz, 1.0), vc[7])).x;
|
||||
dst_reg0.y = vec4(dot(vec4(tmp0.xyzx.xyz, 1.0), vc[1])).y;
|
||||
dst_reg0.x = vec4(dot(vec4(tmp0.xyzx.xyz, 1.0), vc[0])).x;
|
||||
}
|
||||
|
||||
void main ()
|
||||
{
|
||||
vec4 dst_reg0= vec4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
vec4 dst_reg1= vec4(0.0, 0.0, 0.0, 0.0);
|
||||
vec4 dst_reg7= vec4(0.0, 0.0, 0.0, 0.0);
|
||||
|
||||
vs_adjust(dst_reg0, dst_reg1, dst_reg7);
|
||||
|
||||
gl_Position = dst_reg0;
|
||||
back_color = dst_reg1;
|
||||
tc0 = dst_reg7;
|
||||
gl_Position = gl_Position * scale_offset_mat;
|
||||
}
|
||||
|
@ -279,6 +279,15 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
|
||||
}
|
||||
case OpFunctionCall:
|
||||
{
|
||||
// First see if any of the function call args are globals
|
||||
for (uint32_t arg_idx = 3; arg_idx < i.length; arg_idx++)
|
||||
{
|
||||
uint32_t arg_id = ops[arg_idx];
|
||||
if (global_var_ids.find(arg_id) != global_var_ids.end())
|
||||
added_arg_ids.insert(arg_id);
|
||||
}
|
||||
|
||||
// Then recurse into the function itself to extract globals used internally in the function
|
||||
uint32_t inner_func_id = ops[2];
|
||||
std::set<uint32_t> inner_func_args;
|
||||
extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids,
|
||||
@ -306,12 +315,10 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
|
||||
func.add_parameter(type_id, next_id, true);
|
||||
set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
|
||||
|
||||
// Ensure both the existing and new variables have the same name, and the name is valid
|
||||
string vld_name = ensure_valid_name(to_name(arg_id), "v");
|
||||
set_name(arg_id, vld_name);
|
||||
set_name(next_id, vld_name);
|
||||
// Ensure the existing variable has a valid name and the new variable has all the same meta info
|
||||
set_name(arg_id, ensure_valid_name(to_name(arg_id), "v"));
|
||||
meta[next_id] = meta[arg_id];
|
||||
|
||||
meta[next_id].decoration.qualified_alias = meta[arg_id].decoration.qualified_alias;
|
||||
next_id++;
|
||||
}
|
||||
}
|
||||
@ -1903,7 +1910,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t)
|
||||
|
||||
// Manufacture automatic sampler arg for SampledImage texture
|
||||
auto &arg_type = get<SPIRType>(arg.type);
|
||||
if (arg_type.basetype == SPIRType::SampledImage)
|
||||
if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer)
|
||||
decl += ", thread const sampler& " + to_sampler_expression(arg.id);
|
||||
|
||||
if (&arg != &func.arguments.back())
|
||||
@ -2197,7 +2204,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
|
||||
{
|
||||
auto &var = id_v.get<SPIRVariable>();
|
||||
auto &type = get<SPIRType>(var.basetype);
|
||||
if (type.basetype == SPIRType::SampledImage)
|
||||
if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
|
||||
arg_str += ", " + to_sampler_expression(id);
|
||||
}
|
||||
|
||||
@ -2583,7 +2590,7 @@ string CompilerMSL::entry_point_args(bool append_comma)
|
||||
{
|
||||
if (!ep_args.empty())
|
||||
ep_args += ", ";
|
||||
BuiltIn bi_type = meta[var_id].decoration.builtin_type;
|
||||
BuiltIn bi_type = (BuiltIn)get_decoration(var_id, DecorationBuiltIn);
|
||||
ep_args += builtin_type_decl(bi_type) + " " + to_expression(var_id);
|
||||
ep_args += " [[" + builtin_qualifier(bi_type) + "]]";
|
||||
}
|
||||
@ -2666,7 +2673,10 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
|
||||
if (constref)
|
||||
decl += "const ";
|
||||
|
||||
decl += type_to_glsl(type, arg.id);
|
||||
if (is_builtin_variable(var))
|
||||
decl += builtin_type_decl((BuiltIn)get_decoration(arg.id, DecorationBuiltIn));
|
||||
else
|
||||
decl += type_to_glsl(type, arg.id);
|
||||
|
||||
if (is_array(type))
|
||||
decl += "*";
|
||||
|
@ -358,11 +358,20 @@ def test_shader_msl(stats, shader, update, keep, opt):
|
||||
noopt = shader_is_noopt(shader[1])
|
||||
spirv, msl = cross_compile_msl(joined_path, is_spirv, opt and (not noopt))
|
||||
regression_check(shader, msl, update, keep, opt)
|
||||
os.remove(spirv)
|
||||
|
||||
# Uncomment the following line to print the temp SPIR-V file path.
|
||||
# This temp SPIR-V file is not deleted until after the Metal validation step below.
|
||||
# If Metal validation fails, the temp SPIR-V file can be copied out and
|
||||
# used as input to an invocation of spirv-cross to debug from Xcode directly.
|
||||
# To do so, build spriv-cross using `make DEBUG=1`, then run the spriv-cross
|
||||
# executable from Xcode using args: `--msl --entry main --output msl_path spirv_path`.
|
||||
# print('SPRIV shader: ' + spirv)
|
||||
|
||||
if not force_no_external_validation:
|
||||
validate_shader_msl(shader, opt)
|
||||
|
||||
os.remove(spirv)
|
||||
|
||||
def test_shader_hlsl(stats, shader, update, keep, opt):
|
||||
joined_path = os.path.join(shader[0], shader[1])
|
||||
print('Testing HLSL shader:', joined_path)
|
||||
|
Loading…
Reference in New Issue
Block a user