Merge pull request #1250 from KhronosGroup/fix-1247

Implement SPIR-V 1.4 OpCopyLogical
This commit is contained in:
Hans-Kristian Arntzen 2020-01-06 16:03:07 +01:00 committed by GitHub
commit 961b9014af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 479 additions and 5 deletions

View File

@ -0,0 +1,60 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct _11
{
float2x2 _m0;
};
struct _12
{
float2x4 _m0;
};
struct B2
{
float4 elem2;
};
struct C
{
float4 c;
B2 b2;
B2 b2_array[4];
_12 _m3;
};
struct B1
{
float4 elem1;
};
struct A
{
float4 a;
B1 b1;
B1 b1_array[4];
_11 _m3;
};
struct _8
{
A a_block;
C c_block;
};
kernel void main0(device _8& _3 [[buffer(0)]])
{
A _31;
_31.a = _3.c_block.c;
_31.b1.elem1 = _3.c_block.b2.elem2;
_31.b1_array[0].elem1 = _3.c_block.b2_array[0].elem2;
_31.b1_array[1].elem1 = _3.c_block.b2_array[1].elem2;
_31.b1_array[2].elem1 = _3.c_block.b2_array[2].elem2;
_31.b1_array[3].elem1 = _3.c_block.b2_array[3].elem2;
_31._m3._m0 = transpose(float2x2(_3.c_block._m3._m0[0].xy, _3.c_block._m3._m0[1].xy));
_3.a_block = _31;
}

View File

@ -0,0 +1,47 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct B2
{
float4 elem2;
};
struct C
{
float4 c;
B2 b2;
B2 b2_array[4];
};
struct B1
{
float4 elem1;
};
struct A
{
float4 a;
B1 b1;
B1 b1_array[4];
};
struct _8
{
A a_block;
C c_block;
};
kernel void main0(device _8& _3 [[buffer(0)]])
{
A _27;
_27.a = _3.c_block.c;
_27.b1.elem1 = _3.c_block.b2.elem2;
_27.b1_array[0].elem1 = _3.c_block.b2_array[0].elem2;
_27.b1_array[1].elem1 = _3.c_block.b2_array[1].elem2;
_27.b1_array[2].elem1 = _3.c_block.b2_array[2].elem2;
_27.b1_array[3].elem1 = _3.c_block.b2_array[3].elem2;
_3.a_block = _27;
}

View File

@ -0,0 +1,45 @@
#version 450
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
struct B2
{
vec4 elem2;
};
struct C
{
vec4 c;
B2 b2;
B2 b2_array[4];
};
struct B1
{
vec4 elem1;
};
struct A
{
vec4 a;
B1 b1;
B1 b1_array[4];
};
layout(binding = 0, std430) buffer _8_3
{
A a_block;
C c_block;
} _3;
void main()
{
A _27;
_27.a = _3.c_block.c;
_27.b1.elem1 = _3.c_block.b2.elem2;
_27.b1_array[0].elem1 = _3.c_block.b2_array[0].elem2;
_27.b1_array[1].elem1 = _3.c_block.b2_array[1].elem2;
_27.b1_array[2].elem1 = _3.c_block.b2_array[2].elem2;
_27.b1_array[3].elem1 = _3.c_block.b2_array[3].elem2;
_3.a_block = _27;
}

View File

@ -0,0 +1,81 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 8
; Bound: 48
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main" %ssbo
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpName %B1 "B1"
OpName %A "A"
OpName %C "C"
OpName %B2 "B2"
OpMemberName %A 0 "a"
OpMemberName %A 1 "b1"
OpMemberName %A 2 "b1_array"
OpMemberName %C 0 "c"
OpMemberName %C 1 "b2"
OpMemberName %C 2 "b2_array"
OpMemberName %B1 0 "elem1"
OpMemberName %B2 0 "elem2"
OpMemberName %SSBO 0 "a_block"
OpMemberName %SSBO 1 "c_block"
OpDecorate %B1Array ArrayStride 16
OpDecorate %B2Array ArrayStride 16
OpMemberDecorate %B1 0 Offset 0
OpMemberDecorate %A 0 Offset 0
OpMemberDecorate %A 1 Offset 16
OpMemberDecorate %A 2 Offset 32
OpMemberDecorate %A 3 Offset 96
OpMemberDecorate %B2 0 Offset 0
OpMemberDecorate %C 0 Offset 0
OpMemberDecorate %C 1 Offset 16
OpMemberDecorate %C 2 Offset 32
OpMemberDecorate %C 3 Offset 96
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %SSBO 1 Offset 112
OpMemberDecorate %A0 0 Offset 0
OpMemberDecorate %C0 0 Offset 0
OpMemberDecorate %A0 0 RowMajor
OpMemberDecorate %A0 0 MatrixStride 8
OpMemberDecorate %C0 0 ColMajor
OpMemberDecorate %C0 0 MatrixStride 16
OpDecorate %SSBO Block
OpDecorate %ssbo DescriptorSet 0
OpDecorate %ssbo Binding 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%uint = OpTypeInt 32 0
%uint_4 = OpConstant %uint 4
%v4float = OpTypeVector %float 4
%v2float = OpTypeVector %float 2
%m2float = OpTypeMatrix %v2float 2
%A0 = OpTypeStruct %m2float
%C0 = OpTypeStruct %m2float
%B2 = OpTypeStruct %v4float
%B2Array = OpTypeArray %B2 %uint_4
%C = OpTypeStruct %v4float %B2 %B2Array %C0
%B1 = OpTypeStruct %v4float
%B1Array = OpTypeArray %B1 %uint_4
%A = OpTypeStruct %v4float %B1 %B1Array %A0
%SSBO = OpTypeStruct %A %C
%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
%ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer
%int = OpTypeInt 32 1
%int_1 = OpConstant %int 1
%_ptr_Uniform_C = OpTypePointer StorageBuffer %C
%int_0 = OpConstant %int 0
%_ptr_Uniform_A = OpTypePointer StorageBuffer %A
%main = OpFunction %void None %3
%5 = OpLabel
%22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1
%39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0
%23 = OpLoad %C %22
%24 = OpCopyLogical %A %23
OpStore %39 %24
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,69 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 8
; Bound: 48
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main" %ssbo
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpName %B1 "B1"
OpName %A "A"
OpName %C "C"
OpName %B2 "B2"
OpMemberName %A 0 "a"
OpMemberName %A 1 "b1"
OpMemberName %A 2 "b1_array"
OpMemberName %C 0 "c"
OpMemberName %C 1 "b2"
OpMemberName %C 2 "b2_array"
OpMemberName %B1 0 "elem1"
OpMemberName %B2 0 "elem2"
OpMemberName %SSBO 0 "a_block"
OpMemberName %SSBO 1 "c_block"
OpDecorate %B1Array ArrayStride 16
OpDecorate %B2Array ArrayStride 16
OpMemberDecorate %B1 0 Offset 0
OpMemberDecorate %A 0 Offset 0
OpMemberDecorate %A 1 Offset 16
OpMemberDecorate %A 2 Offset 32
OpMemberDecorate %B2 0 Offset 0
OpMemberDecorate %C 0 Offset 0
OpMemberDecorate %C 1 Offset 16
OpMemberDecorate %C 2 Offset 32
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %SSBO 1 Offset 96
OpDecorate %SSBO Block
OpDecorate %ssbo DescriptorSet 0
OpDecorate %ssbo Binding 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%uint = OpTypeInt 32 0
%uint_4 = OpConstant %uint 4
%v4float = OpTypeVector %float 4
%B2 = OpTypeStruct %v4float
%B2Array = OpTypeArray %B2 %uint_4
%C = OpTypeStruct %v4float %B2 %B2Array
%B1 = OpTypeStruct %v4float
%B1Array = OpTypeArray %B1 %uint_4
%A = OpTypeStruct %v4float %B1 %B1Array
%SSBO = OpTypeStruct %A %C
%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
%ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer
%int = OpTypeInt 32 1
%int_1 = OpConstant %int 1
%_ptr_Uniform_C = OpTypePointer StorageBuffer %C
%int_0 = OpConstant %int 0
%_ptr_Uniform_A = OpTypePointer StorageBuffer %A
%main = OpFunction %void None %3
%5 = OpLabel
%22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1
%39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0
%23 = OpLoad %C %22
%24 = OpCopyLogical %A %23
OpStore %39 %24
OpReturn
OpFunctionEnd

View File

@ -0,0 +1,69 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 8
; Bound: 48
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint GLCompute %main "main" %ssbo
OpExecutionMode %main LocalSize 1 1 1
OpSource GLSL 450
OpName %B1 "B1"
OpName %A "A"
OpName %C "C"
OpName %B2 "B2"
OpMemberName %A 0 "a"
OpMemberName %A 1 "b1"
OpMemberName %A 2 "b1_array"
OpMemberName %C 0 "c"
OpMemberName %C 1 "b2"
OpMemberName %C 2 "b2_array"
OpMemberName %B1 0 "elem1"
OpMemberName %B2 0 "elem2"
OpMemberName %SSBO 0 "a_block"
OpMemberName %SSBO 1 "c_block"
OpDecorate %B1Array ArrayStride 16
OpDecorate %B2Array ArrayStride 16
OpMemberDecorate %B1 0 Offset 0
OpMemberDecorate %A 0 Offset 0
OpMemberDecorate %A 1 Offset 16
OpMemberDecorate %A 2 Offset 32
OpMemberDecorate %B2 0 Offset 0
OpMemberDecorate %C 0 Offset 0
OpMemberDecorate %C 1 Offset 16
OpMemberDecorate %C 2 Offset 32
OpMemberDecorate %SSBO 0 Offset 0
OpMemberDecorate %SSBO 1 Offset 96
OpDecorate %SSBO Block
OpDecorate %ssbo DescriptorSet 0
OpDecorate %ssbo Binding 0
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%uint = OpTypeInt 32 0
%uint_4 = OpConstant %uint 4
%v4float = OpTypeVector %float 4
%B2 = OpTypeStruct %v4float
%B2Array = OpTypeArray %B2 %uint_4
%C = OpTypeStruct %v4float %B2 %B2Array
%B1 = OpTypeStruct %v4float
%B1Array = OpTypeArray %B1 %uint_4
%A = OpTypeStruct %v4float %B1 %B1Array
%SSBO = OpTypeStruct %A %C
%_ptr_Uniform_SSBO = OpTypePointer StorageBuffer %SSBO
%ssbo = OpVariable %_ptr_Uniform_SSBO StorageBuffer
%int = OpTypeInt 32 1
%int_1 = OpConstant %int 1
%_ptr_Uniform_C = OpTypePointer StorageBuffer %C
%int_0 = OpConstant %int 0
%_ptr_Uniform_A = OpTypePointer StorageBuffer %A
%main = OpFunction %void None %3
%5 = OpLabel
%22 = OpAccessChain %_ptr_Uniform_C %ssbo %int_1
%39 = OpAccessChain %_ptr_Uniform_A %ssbo %int_0
%23 = OpLoad %C %22
%24 = OpCopyLogical %A %23
OpStore %39 %24
OpReturn
OpFunctionEnd

View File

@ -317,6 +317,8 @@ void Compiler::register_write(uint32_t chain)
var = maybe_get<SPIRVariable>(access_chain->loaded_from);
}
auto &chain_type = expression_type(chain);
if (var)
{
bool check_argument_storage_qualifier = true;
@ -359,7 +361,7 @@ void Compiler::register_write(uint32_t chain)
force_recompile();
}
}
else
else if (chain_type.pointer)
{
// If we stored through a variable pointer, then we don't know which
// variable we stored to. So *all* expressions after this point need to
@ -368,6 +370,9 @@ void Compiler::register_write(uint32_t chain)
// only certain variables, we can invalidate only those.
flush_all_active_variables();
}
// If chain_type.pointer is false, we're not writing to memory backed variables, but temporaries instead.
// This can happen in copy_logical_type where we unroll complex reads and writes to temporaries.
}
void Compiler::flush_dependees(SPIRVariable &var)

View File

@ -8555,6 +8555,19 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
break;
}
case OpCopyLogical:
{
// This is used for copying object of different types, arrays and structs.
// We need to unroll the copy, element-by-element.
uint32_t result_type = ops[0];
uint32_t id = ops[1];
uint32_t rhs = ops[2];
emit_uninitialized_temporary_expression(result_type, id);
emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {});
break;
}
case OpCopyObject:
{
uint32_t result_type = ops[0];
@ -13098,3 +13111,80 @@ void CompilerGLSL::propagate_nonuniform_qualifier(uint32_t id)
propagate_nonuniform_qualifier(expr);
}
}
void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id,
uint32_t rhs_id, uint32_t rhs_type_id,
SmallVector<uint32_t> chain)
{
// Fully unroll all member/array indices one by one.
auto &lhs_type = get<SPIRType>(lhs_type_id);
auto &rhs_type = get<SPIRType>(rhs_type_id);
if (!lhs_type.array.empty())
{
// Could use a loop here to support specialization constants, but it gets rather complicated with nested array types,
// and this is a rather obscure opcode anyways, keep it simple unless we are forced to.
uint32_t array_size = to_array_size_literal(lhs_type);
chain.push_back(0);
for (uint32_t i = 0; i < array_size; i++)
{
chain.back() = i;
emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain);
}
}
else if (lhs_type.basetype == SPIRType::Struct)
{
chain.push_back(0);
uint32_t member_count = uint32_t(lhs_type.member_types.size());
for (uint32_t i = 0; i < member_count; i++)
{
chain.back() = i;
emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain);
}
}
else
{
// Need to handle unpack/packing fixups since this can differ wildly between the logical types,
// particularly in MSL.
// To deal with this, we emit access chains and go through emit_store_statement
// to deal with all the special cases we can encounter.
AccessChainMeta lhs_meta, rhs_meta;
auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()), ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta);
auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()), ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta);
uint32_t id = ir.increase_bound_by(2);
lhs_id = id;
rhs_id = id + 1;
{
auto &lhs_expr = set<SPIRExpression>(lhs_id, move(lhs), lhs_type_id, true);
lhs_expr.need_transpose = lhs_meta.need_transpose;
if (lhs_meta.storage_is_packed)
set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked);
if (lhs_meta.storage_physical_type != 0)
set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type);
forwarded_temporaries.insert(lhs_id);
suppressed_usage_tracking.insert(lhs_id);
}
{
auto &rhs_expr = set<SPIRExpression>(rhs_id, move(rhs), rhs_type_id, true);
rhs_expr.need_transpose = rhs_meta.need_transpose;
if (rhs_meta.storage_is_packed)
set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked);
if (rhs_meta.storage_physical_type != 0)
set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type);
forwarded_temporaries.insert(rhs_id);
suppressed_usage_tracking.insert(rhs_id);
}
emit_store_statement(lhs_id, rhs_id);
}
}

View File

@ -275,6 +275,10 @@ protected:
virtual bool builtin_translates_to_nonarray(spv::BuiltIn builtin) const;
void emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id,
uint32_t rhs_id, uint32_t rhs_type_id,
SmallVector<uint32_t> chain);
StringStream<> buffer;
template <typename T>

View File

@ -178,7 +178,9 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
spirv_path = create_temporary()
msl_path = create_temporary(os.path.basename(shader))
spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader]
spirv_env = 'vulkan1.1spv1.4' if ('.spv14.' in shader) else 'vulkan1.1'
spirv_cmd = [paths.spirv_as, '--target-env', spirv_env, '-o', spirv_path, shader]
if '.preserve.' in shader:
spirv_cmd.append('--preserve-numeric-ids')
@ -249,7 +251,7 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
subprocess.check_call(msl_args)
if not shader_is_invalid_spirv(msl_path):
subprocess.check_call([paths.spirv_val, '--scalar-block-layout', '--target-env', 'vulkan1.1', spirv_path])
subprocess.check_call([paths.spirv_val, '--scalar-block-layout', '--target-env', spirv_env, spirv_path])
return (spirv_path, msl_path)
@ -388,10 +390,12 @@ def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, fl
spirv_path = create_temporary()
glsl_path = create_temporary(os.path.basename(shader))
spirv_env = 'vulkan1.1spv1.4' if ('.spv14.' in shader) else 'vulkan1.1'
if vulkan or spirv:
vulkan_glsl_path = create_temporary('vk' + os.path.basename(shader))
spirv_cmd = [paths.spirv_as, '--target-env', 'vulkan1.1', '-o', spirv_path, shader]
spirv_cmd = [paths.spirv_as, '--target-env', spirv_env, '-o', spirv_path, shader]
if '.preserve.' in shader:
spirv_cmd.append('--preserve-numeric-ids')
@ -404,7 +408,7 @@ def cross_compile(shader, vulkan, spirv, invalid_spirv, eliminate, is_legacy, fl
subprocess.check_call([paths.spirv_opt, '--skip-validation', '-O', '-o', spirv_path, spirv_path])
if not invalid_spirv:
subprocess.check_call([paths.spirv_val, '--scalar-block-layout', '--target-env', 'vulkan1.1', spirv_path])
subprocess.check_call([paths.spirv_val, '--scalar-block-layout', '--target-env', spirv_env, spirv_path])
extra_args = ['--iterations', str(iterations)]
if eliminate: