Merge pull request #327 from KhronosGroup/sso-hlsl-vertex-fix

Fix SSO for HLSL vertex shaders
This commit is contained in:
Hans-Kristian Arntzen 2017-11-17 13:54:06 +01:00 committed by GitHub
commit c05d0571cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 192 additions and 57 deletions

View File

@ -0,0 +1,35 @@
#version 450
out gl_PerVertex
{
vec4 gl_Position;
};
struct VSOut
{
float a;
vec4 pos;
};
struct VSOut_1
{
float a;
};
layout(location = 0) out VSOut_1 _entryPointOutput;
VSOut _main()
{
VSOut vout;
vout.a = 40.0;
vout.pos = vec4(1.0);
return vout;
}
void main()
{
VSOut flattenTemp = _main();
_entryPointOutput.a = flattenTemp.a;
gl_Position = flattenTemp.pos;
}

View File

@ -0,0 +1,68 @@
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 1
; Bound: 40
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %main "main" %_entryPointOutput %_entryPointOutput_pos
OpSource HLSL 500
OpName %main "main"
OpName %VSOut "VSOut"
OpMemberName %VSOut 0 "a"
OpMemberName %VSOut 1 "pos"
OpName %_main_ "@main("
OpName %vout "vout"
OpName %flattenTemp "flattenTemp"
OpName %VSOut_0 "VSOut"
OpMemberName %VSOut_0 0 "a"
OpName %_entryPointOutput "@entryPointOutput"
OpName %_entryPointOutput_pos "@entryPointOutput_pos"
OpDecorate %_entryPointOutput Location 0
OpDecorate %_entryPointOutput_pos BuiltIn Position
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%VSOut = OpTypeStruct %float %v4float
%9 = OpTypeFunction %VSOut
%_ptr_Function_VSOut = OpTypePointer Function %VSOut
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%float_40 = OpConstant %float 40
%_ptr_Function_float = OpTypePointer Function %float
%int_1 = OpConstant %int 1
%float_1 = OpConstant %float 1
%21 = OpConstantComposite %v4float %float_1 %float_1 %float_1 %float_1
%_ptr_Function_v4float = OpTypePointer Function %v4float
%VSOut_0 = OpTypeStruct %float
%_ptr_Output_VSOut_0 = OpTypePointer Output %VSOut_0
%_entryPointOutput = OpVariable %_ptr_Output_VSOut_0 Output
%_ptr_Output_float = OpTypePointer Output %float
%_ptr_Output_v4float = OpTypePointer Output %v4float
%_entryPointOutput_pos = OpVariable %_ptr_Output_v4float Output
%main = OpFunction %void None %3
%5 = OpLabel
%flattenTemp = OpVariable %_ptr_Function_VSOut Function
%28 = OpFunctionCall %VSOut %_main_
OpStore %flattenTemp %28
%32 = OpAccessChain %_ptr_Function_float %flattenTemp %int_0
%33 = OpLoad %float %32
%35 = OpAccessChain %_ptr_Output_float %_entryPointOutput %int_0
OpStore %35 %33
%38 = OpAccessChain %_ptr_Function_v4float %flattenTemp %int_1
%39 = OpLoad %v4float %38
OpStore %_entryPointOutput_pos %39
OpReturn
OpFunctionEnd
%_main_ = OpFunction %VSOut None %9
%11 = OpLabel
%vout = OpVariable %_ptr_Function_VSOut Function
%18 = OpAccessChain %_ptr_Function_float %vout %int_0
OpStore %18 %float_40
%23 = OpAccessChain %_ptr_Function_v4float %vout %int_1
OpStore %23 %21
%24 = OpLoad %VSOut %vout
OpReturnValue %24
OpFunctionEnd

View File

@ -171,8 +171,9 @@ void CompilerCPP::emit_resources()
auto &type = get<SPIRType>(var.basetype);
if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassUniform &&
!is_hidden_variable(var) && (meta[type.self].decoration.decoration_flags &
((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))))
!is_hidden_variable(var) &&
(meta[type.self].decoration.decoration_flags &
((1ull << DecorationBlock) | (1ull << DecorationBufferBlock))))
{
emit_buffer_block(var);
}

View File

@ -168,7 +168,7 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
case OpMemoryBarrier:
return false;
// OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
// OpExtInst is potentially impure depending on extension, but GLSL builtins are at least pure.
default:
break;
@ -457,7 +457,7 @@ bool Compiler::is_hidden_variable(const SPIRVariable &var, bool include_builtins
// Combined image samplers are always considered active as they are "magic" variables.
if (find_if(begin(combined_image_samplers), end(combined_image_samplers), [&var](const CombinedImageSampler &samp) {
return samp.combined_id == var.self;
}) != end(combined_image_samplers))
}) != end(combined_image_samplers))
{
return false;
}
@ -2548,7 +2548,7 @@ SPIREntryPoint &Compiler::get_entry_point(const std::string &name)
auto itr =
find_if(begin(entry_points), end(entry_points), [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
return entry.second.orig_name == name;
});
});
if (itr == end(entry_points))
SPIRV_CROSS_THROW("Entry point does not exist.");
@ -2561,7 +2561,7 @@ const SPIREntryPoint &Compiler::get_entry_point(const std::string &name) const
auto itr =
find_if(begin(entry_points), end(entry_points), [&](const std::pair<uint32_t, SPIREntryPoint> &entry) -> bool {
return entry.second.orig_name == name;
});
});
if (itr == end(entry_points))
SPIRV_CROSS_THROW("Entry point does not exist.");
@ -2730,7 +2730,7 @@ void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIR
[&param](const SPIRFunction::CombinedImageSamplerParameter &p) {
return param.image_id == p.image_id && param.sampler_id == p.sampler_id &&
param.global_image == p.global_image && param.global_sampler == p.global_sampler;
});
});
if (itr == end(caller.combined_parameters))
{
@ -2867,7 +2867,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar
auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers),
[image_id, sampler_id](const CombinedImageSampler &combined) {
return combined.image_id == image_id && combined.sampler_id == sampler_id;
});
});
if (itr == end(compiler.combined_image_samplers))
{
@ -3209,8 +3209,8 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry)
break;
}
// Atomics shouldn't be able to access function-local variables.
// Some GLSL builtins access a pointer.
// Atomics shouldn't be able to access function-local variables.
// Some GLSL builtins access a pointer.
default:
break;

View File

@ -1784,7 +1784,12 @@ void CompilerGLSL::fixup_image_load_store_access()
void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model)
{
uint64_t emitted_builtins = 0;
uint64_t global_builtins = 0;
const SPIRVariable *block_var = nullptr;
bool emitted_block = false;
bool builtin_array = false;
for (auto &id : ids)
{
if (id.get_type() != TypeVariable)
@ -1801,6 +1806,13 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
if (m.builtin)
builtins |= 1ull << m.builtin_type;
}
else if (var.storage == storage && !block && is_builtin_variable(var))
{
// While we're at it, collect all declared global builtins (HLSL mostly ...).
auto &m = meta[var.self].decoration;
if (m.builtin)
global_builtins |= 1ull << m.builtin_type;
}
if (!builtins)
continue;
@ -1808,42 +1820,55 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo
if (emitted_block)
SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block.");
if (storage == StorageClassOutput)
statement("out gl_PerVertex");
else
statement("in gl_PerVertex");
begin_scope();
if (builtins & (1ull << BuiltInPosition))
statement("vec4 gl_Position;");
if (builtins & (1ull << BuiltInPointSize))
statement("float gl_PointSize;");
if (builtins & (1ull << BuiltInClipDistance))
statement("float gl_ClipDistance[];"); // TODO: Do we need a fixed array size here?
if (builtins & (1ull << BuiltInCullDistance))
statement("float gl_CullDistance[];"); // TODO: Do we need a fixed array size here?
bool builtin_array = !type.array.empty();
bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl;
if (builtin_array)
{
// Make sure the array has a supported name in the code.
if (storage == StorageClassOutput)
set_name(var.self, "gl_out");
else if (storage == StorageClassInput)
set_name(var.self, "gl_in");
if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
end_scope_decl(join(to_name(var.self), "[", get_entry_point().output_vertices, "]"));
else
end_scope_decl(join(to_name(var.self), tessellation ? "[gl_MaxPatchVertices]" : "[]"));
}
else
end_scope_decl();
statement("");
emitted_builtins = builtins;
emitted_block = true;
builtin_array = !type.array.empty();
block_var = &var;
}
global_builtins &= (1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | (1ull << BuiltInClipDistance) |
(1ull << BuiltInCullDistance);
// Try to collect all other declared builtins.
if (!emitted_block)
emitted_builtins = global_builtins;
// Can't declare an empty interface block.
if (!emitted_builtins)
return;
if (storage == StorageClassOutput)
statement("out gl_PerVertex");
else
statement("in gl_PerVertex");
begin_scope();
if (emitted_builtins & (1ull << BuiltInPosition))
statement("vec4 gl_Position;");
if (emitted_builtins & (1ull << BuiltInPointSize))
statement("float gl_PointSize;");
if (emitted_builtins & (1ull << BuiltInClipDistance))
statement("float gl_ClipDistance[];"); // TODO: Do we need a fixed array size here?
if (emitted_builtins & (1ull << BuiltInCullDistance))
statement("float gl_CullDistance[];"); // TODO: Do we need a fixed array size here?
bool tessellation = model == ExecutionModelTessellationEvaluation || model == ExecutionModelTessellationControl;
if (builtin_array)
{
// Make sure the array has a supported name in the code.
if (storage == StorageClassOutput)
set_name(block_var->self, "gl_out");
else if (storage == StorageClassInput)
set_name(block_var->self, "gl_in");
if (model == ExecutionModelTessellationControl && storage == StorageClassOutput)
end_scope_decl(join(to_name(block_var->self), "[", get_entry_point().output_vertices, "]"));
else
end_scope_decl(join(to_name(block_var->self), tessellation ? "[gl_MaxPatchVertices]" : "[]"));
}
else
end_scope_decl();
statement("");
}
void CompilerGLSL::declare_undefined_values()
@ -2689,7 +2714,7 @@ string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id)
if (find_if(begin(header.declare_temporary), end(header.declare_temporary),
[result_type, result_id](const pair<uint32_t, uint32_t> &tmp) {
return tmp.first == result_type && tmp.second == result_id;
}) == end(header.declare_temporary))
}) == end(header.declare_temporary))
{
header.declare_temporary.emplace_back(result_type, result_id);
force_recompile = true;
@ -2916,8 +2941,9 @@ void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result
uint32_t op2, uint32_t op3, const char *op)
{
bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3);
emit_op(result_type, result_id, join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ",
to_expression(op2), ", ", to_expression(op3), ")"),
emit_op(result_type, result_id,
join(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(op2), ", ",
to_expression(op3), ")"),
forward);
inherit_expression_dependencies(result_id, op0);
@ -5885,8 +5911,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
register_read(ops[1], ops[2], should_forward(ops[2]));
break;
// OpAtomicStore unimplemented. Not sure what would use that.
// OpAtomicLoad seems to only be relevant for atomic counters.
// OpAtomicStore unimplemented. Not sure what would use that.
// OpAtomicLoad seems to only be relevant for atomic counters.
case OpAtomicIIncrement:
forced_temporaries.insert(ops[1]);

View File

@ -1064,7 +1064,10 @@ void CompilerHLSL::emit_resources()
if (requires_op_fmod)
{
static const char *types[] = {
"float", "float2", "float3", "float4",
"float",
"float2",
"float3",
"float4",
};
for (auto &type : types)

View File

@ -1546,7 +1546,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
emit_barrier(ops[0], ops[1], ops[2]);
break;
// OpOuterProduct
// OpOuterProduct
default:
CompilerGLSL::emit_instruction(instruction);
@ -1851,10 +1851,10 @@ void CompilerMSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
break;
}
// TODO:
// GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier)
// GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
// GLSLstd450InterpolateAtOffset
// TODO:
// GLSLstd450InterpolateAtCentroid (centroid_no_perspective qualifier)
// GLSLstd450InterpolateAtSample (sample_no_perspective qualifier)
// GLSLstd450InterpolateAtOffset
default:
CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count);
@ -2739,11 +2739,13 @@ string CompilerMSL::ensure_valid_name(string name, string pfx)
void CompilerMSL::replace_illegal_names()
{
static const unordered_set<string> keywords = {
"kernel", "bias",
"kernel",
"bias",
};
static const unordered_set<string> illegal_func_names = {
"main", "saturate",
"main",
"saturate",
};
for (auto &id : ids)