CompilerMSL enhancements to nested function use of globals.

Allow function calls to include globals as arguments.
Allow function calls to include built-ins as arguments.
Include all meta info when creating function args from globals.
Do not manufacture a sampler for Buffer-type sampled images.
Add code option to test_shaders.py to preserve SPIR-V code for interactive debugging.
This commit is contained in:
Bill Hollings 2017-12-26 16:32:45 -05:00
parent 3fcdce08ab
commit a68b32733a
4 changed files with 350 additions and 10 deletions

View File

@ -0,0 +1,189 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct attr_desc
{
int type;
int attribute_size;
int starting_offset;
int stride;
int swap_bytes;
int is_volatile;
};
struct VertexBuffer
{
float4x4 scale_offset_mat;
uint vertex_base_index;
int4 input_attributes[16];
};
struct VertexConstantsBuffer
{
float4 vc[16];
};
constant float4 _295 = {};
struct main0_out
{
float4 tc0 [[user(locn0)]];
float4 back_color [[user(locn10)]];
float4 gl_Position [[position]];
};
attr_desc fetch_desc(thread const int& location, constant VertexBuffer& v_227)
{
int attribute_flags = v_227.input_attributes[location].w;
attr_desc result;
result.type = v_227.input_attributes[location].x;
result.attribute_size = v_227.input_attributes[location].y;
result.starting_offset = v_227.input_attributes[location].z;
result.stride = attribute_flags & 255;
result.swap_bytes = (attribute_flags >> 8) & 1;
result.is_volatile = (attribute_flags >> 9) & 1;
return result;
}
uint get_bits(thread const uint4& v, thread const int& swap)
{
if (swap != 0)
{
return ((v.w | (v.z << uint(8))) | (v.y << uint(16))) | (v.x << uint(24));
}
return ((v.x | (v.y << uint(8))) | (v.z << uint(16))) | (v.w << uint(24));
}
float4 fetch_attr(thread const attr_desc& desc, thread const int& vertex_id, thread const texture2d<uint> input_stream)
{
float4 result = float4(0.0, 0.0, 0.0, 1.0);
bool reverse_order = false;
int first_byte = (vertex_id * desc.stride) + desc.starting_offset;
for (int n = 0; n < 4; n++)
{
if (n == desc.attribute_size)
{
break;
}
uint4 tmp;
switch (desc.type)
{
case 0:
{
int _131 = first_byte;
first_byte = _131 + 1;
tmp.x = input_stream.read(uint2(_131, 0)).x;
int _138 = first_byte;
first_byte = _138 + 1;
tmp.y = input_stream.read(uint2(_138, 0)).x;
uint4 param = tmp;
int param_1 = desc.swap_bytes;
result[n] = float(get_bits(param, param_1));
break;
}
case 1:
{
int _156 = first_byte;
first_byte = _156 + 1;
tmp.x = input_stream.read(uint2(_156, 0)).x;
int _163 = first_byte;
first_byte = _163 + 1;
tmp.y = input_stream.read(uint2(_163, 0)).x;
int _170 = first_byte;
first_byte = _170 + 1;
tmp.z = input_stream.read(uint2(_170, 0)).x;
int _177 = first_byte;
first_byte = _177 + 1;
tmp.w = input_stream.read(uint2(_177, 0)).x;
uint4 param_2 = tmp;
int param_3 = desc.swap_bytes;
result[n] = as_type<float>(get_bits(param_2, param_3));
break;
}
case 2:
{
int _195 = first_byte;
first_byte = _195 + 1;
result[n] = float(input_stream.read(uint2(_195, 0)).x);
reverse_order = desc.swap_bytes != 0;
break;
}
}
}
float4 _209;
if (reverse_order)
{
_209 = result.wzyx;
}
else
{
_209 = result;
}
return _209;
}
float4 read_location(thread const int& location, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1)
{
int param = location;
attr_desc desc = fetch_desc(param, v_227);
int vertex_id = gl_VertexIndex - int(v_227.vertex_base_index);
if (desc.is_volatile != 0)
{
attr_desc param_1 = desc;
int param_2 = vertex_id;
return fetch_attr(param_1, param_2, buff_in_2);
}
else
{
attr_desc param_3 = desc;
int param_4 = vertex_id;
return fetch_attr(param_3, param_4, buff_in_1);
}
}
void vs_adjust(thread float4& dst_reg0, thread float4& dst_reg1, thread float4& dst_reg7, constant VertexBuffer& v_227, thread uint& gl_VertexIndex, thread texture2d<uint> buff_in_2, thread texture2d<uint> buff_in_1, constant VertexConstantsBuffer& v_309)
{
int param = 3;
float4 in_diff_color = read_location(param, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
int param_1 = 0;
float4 in_pos = read_location(param_1, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
int param_2 = 8;
float4 in_tc0 = read_location(param_2, v_227, gl_VertexIndex, buff_in_2, buff_in_1);
dst_reg1 = in_diff_color * v_309.vc[13];
float4 tmp0;
tmp0.x = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[4])).x;
tmp0.y = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[5])).y;
tmp0.z = float4(dot(float4(in_pos.xyz, 1.0), v_309.vc[6])).z;
float4 tmp1;
tmp1 = float4(in_tc0.xy.x, in_tc0.xy.y, tmp1.z, tmp1.w);
tmp1.z = v_309.vc[15].x;
dst_reg7.y = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[8])).y;
dst_reg7.x = float4(dot(float4(tmp1.xyz, 1.0), v_309.vc[7])).x;
dst_reg0.y = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[1])).y;
dst_reg0.x = float4(dot(float4(tmp0.xyz, 1.0), v_309.vc[0])).x;
}
vertex main0_out main0(constant VertexBuffer& v_227 [[buffer(0)]], uint gl_VertexIndex [[vertex_id]], texture2d<uint> buff_in_2 [[texture(0)]], texture2d<uint> buff_in_1 [[texture(1)]], constant VertexConstantsBuffer& v_309 [[buffer(1)]])
{
main0_out out = {};
float4 dst_reg0 = float4(0.0, 0.0, 0.0, 1.0);
float4 dst_reg1 = float4(0.0);
float4 dst_reg7 = float4(0.0);
float4 param = dst_reg0;
float4 param_1 = dst_reg1;
float4 param_2 = dst_reg7;
vs_adjust(param, param_1, param_2, v_227, gl_VertexIndex, buff_in_2, buff_in_1, v_309);
dst_reg0 = param;
dst_reg1 = param_1;
dst_reg7 = param_2;
out.gl_Position = dst_reg0;
out.back_color = dst_reg1;
out.tc0 = dst_reg7;
out.gl_Position *= v_227.scale_offset_mat;
return out;
}

View File

@ -0,0 +1,132 @@
#version 450
#extension GL_ARB_separate_shader_objects : enable
layout(std140, set = 0, binding = 0) uniform VertexBuffer
{
mat4 scale_offset_mat;
uint vertex_base_index;
ivec4 input_attributes[16];
};
layout(set=0, binding=3) uniform usamplerBuffer buff_in_1;
layout(set=0, binding=4) uniform usamplerBuffer buff_in_2;
layout(location=10) out vec4 back_color;
layout(location=0) out vec4 tc0;
layout(std140, set=0, binding = 1) uniform VertexConstantsBuffer
{
vec4 vc[16];
};
struct attr_desc
{
int type;
int attribute_size;
int starting_offset;
int stride;
int swap_bytes;
int is_volatile;
};
uint get_bits(uvec4 v, int swap)
{
if (swap != 0) return (v.w | v.z << 8 | v.y << 16 | v.x << 24);
return (v.x | v.y << 8 | v.z << 16 | v.w << 24);
}
vec4 fetch_attr(attr_desc desc, int vertex_id, usamplerBuffer input_stream)
{
vec4 result = vec4(0.0f, 0.0f, 0.0f, 1.0f);
uvec4 tmp;
uint bits;
bool reverse_order = false;
int first_byte = (vertex_id * desc.stride) + desc.starting_offset;
for (int n = 0; n < 4; n++)
{
if (n == desc.attribute_size) break;
switch (desc.type)
{
case 0:
//signed normalized 16-bit
tmp.x = texelFetch(input_stream, first_byte++).x;
tmp.y = texelFetch(input_stream, first_byte++).x;
result[n] = get_bits(tmp, desc.swap_bytes);
break;
case 1:
//float
tmp.x = texelFetch(input_stream, first_byte++).x;
tmp.y = texelFetch(input_stream, first_byte++).x;
tmp.z = texelFetch(input_stream, first_byte++).x;
tmp.w = texelFetch(input_stream, first_byte++).x;
result[n] = uintBitsToFloat(get_bits(tmp, desc.swap_bytes));
break;
case 2:
//unsigned byte
result[n] = texelFetch(input_stream, first_byte++).x;
reverse_order = (desc.swap_bytes != 0);
break;
}
}
return (reverse_order)? result.wzyx: result;
}
attr_desc fetch_desc(int location)
{
attr_desc result;
int attribute_flags = input_attributes[location].w;
result.type = input_attributes[location].x;
result.attribute_size = input_attributes[location].y;
result.starting_offset = input_attributes[location].z;
result.stride = attribute_flags & 0xFF;
result.swap_bytes = (attribute_flags >> 8) & 0x1;
result.is_volatile = (attribute_flags >> 9) & 0x1;
return result;
}
vec4 read_location(int location)
{
attr_desc desc = fetch_desc(location);
int vertex_id = gl_VertexIndex - int(vertex_base_index);
if (desc.is_volatile != 0)
return fetch_attr(desc, vertex_id, buff_in_2);
else
return fetch_attr(desc, vertex_id, buff_in_1);
}
void vs_adjust(inout vec4 dst_reg0, inout vec4 dst_reg1, inout vec4 dst_reg7)
{
vec4 tmp0;
vec4 tmp1;
vec4 in_diff_color= read_location(3);
vec4 in_pos= read_location(0);
vec4 in_tc0= read_location(8);
dst_reg1 = (in_diff_color * vc[13]);
tmp0.x = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[4])).x;
tmp0.y = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[5])).y;
tmp0.z = vec4(dot(vec4(in_pos.xyzx.xyz, 1.0), vc[6])).z;
tmp1.xy = in_tc0.xyxx.xy;
tmp1.z = vc[15].xxxx.z;
dst_reg7.y = vec4(dot(vec4(tmp1.xyzx.xyz, 1.0), vc[8])).y;
dst_reg7.x = vec4(dot(vec4(tmp1.xyzx.xyz, 1.0), vc[7])).x;
dst_reg0.y = vec4(dot(vec4(tmp0.xyzx.xyz, 1.0), vc[1])).y;
dst_reg0.x = vec4(dot(vec4(tmp0.xyzx.xyz, 1.0), vc[0])).x;
}
void main ()
{
vec4 dst_reg0= vec4(0.0f, 0.0f, 0.0f, 1.0f);
vec4 dst_reg1= vec4(0.0, 0.0, 0.0, 0.0);
vec4 dst_reg7= vec4(0.0, 0.0, 0.0, 0.0);
vs_adjust(dst_reg0, dst_reg1, dst_reg7);
gl_Position = dst_reg0;
back_color = dst_reg1;
tc0 = dst_reg7;
gl_Position = gl_Position * scale_offset_mat;
}

View File

@ -279,6 +279,15 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
}
case OpFunctionCall:
{
// First see if any of the function call args are globals
for (uint32_t arg_idx = 3; arg_idx < i.length; arg_idx++)
{
uint32_t arg_id = ops[arg_idx];
if (global_var_ids.find(arg_id) != global_var_ids.end())
added_arg_ids.insert(arg_id);
}
// Then recurse into the function itself to extract globals used internally in the function
uint32_t inner_func_id = ops[2];
std::set<uint32_t> inner_func_args;
extract_global_variables_from_function(inner_func_id, inner_func_args, global_var_ids,
@ -306,12 +315,10 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
func.add_parameter(type_id, next_id, true);
set<SPIRVariable>(next_id, type_id, StorageClassFunction, 0, arg_id);
// Ensure both the existing and new variables have the same name, and the name is valid
string vld_name = ensure_valid_name(to_name(arg_id), "v");
set_name(arg_id, vld_name);
set_name(next_id, vld_name);
// Ensure the existing variable has a valid name and the new variable has all the same meta info
set_name(arg_id, ensure_valid_name(to_name(arg_id), "v"));
meta[next_id] = meta[arg_id];
meta[next_id].decoration.qualified_alias = meta[arg_id].decoration.qualified_alias;
next_id++;
}
}
@ -1903,7 +1910,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, uint64_t)
// Manufacture automatic sampler arg for SampledImage texture
auto &arg_type = get<SPIRType>(arg.type);
if (arg_type.basetype == SPIRType::SampledImage)
if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer)
decl += ", thread const sampler& " + to_sampler_expression(arg.id);
if (&arg != &func.arguments.back())
@ -2197,7 +2204,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id)
{
auto &var = id_v.get<SPIRVariable>();
auto &type = get<SPIRType>(var.basetype);
if (type.basetype == SPIRType::SampledImage)
if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer)
arg_str += ", " + to_sampler_expression(id);
}
@ -2583,7 +2590,7 @@ string CompilerMSL::entry_point_args(bool append_comma)
{
if (!ep_args.empty())
ep_args += ", ";
BuiltIn bi_type = meta[var_id].decoration.builtin_type;
BuiltIn bi_type = (BuiltIn)get_decoration(var_id, DecorationBuiltIn);
ep_args += builtin_type_decl(bi_type) + " " + to_expression(var_id);
ep_args += " [[" + builtin_qualifier(bi_type) + "]]";
}
@ -2666,7 +2673,10 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
if (constref)
decl += "const ";
decl += type_to_glsl(type, arg.id);
if (is_builtin_variable(var))
decl += builtin_type_decl((BuiltIn)get_decoration(arg.id, DecorationBuiltIn));
else
decl += type_to_glsl(type, arg.id);
if (is_array(type))
decl += "*";

View File

@ -358,11 +358,20 @@ def test_shader_msl(stats, shader, update, keep, opt):
noopt = shader_is_noopt(shader[1])
spirv, msl = cross_compile_msl(joined_path, is_spirv, opt and (not noopt))
regression_check(shader, msl, update, keep, opt)
os.remove(spirv)
# Uncomment the following line to print the temp SPIR-V file path.
# This temp SPIR-V file is not deleted until after the Metal validation step below.
# If Metal validation fails, the temp SPIR-V file can be copied out and
# used as input to an invocation of spirv-cross to debug from Xcode directly.
# To do so, build spriv-cross using `make DEBUG=1`, then run the spriv-cross
# executable from Xcode using args: `--msl --entry main --output msl_path spirv_path`.
# print('SPRIV shader: ' + spirv)
if not force_no_external_validation:
validate_shader_msl(shader, opt)
os.remove(spirv)
def test_shader_hlsl(stats, shader, update, keep, opt):
joined_path = os.path.join(shader[0], shader[1])
print('Testing HLSL shader:', joined_path)