Merge pull request #339 from KhronosGroup/fixup-pr-338

Fixups for PR #338
This commit is contained in:
Hans-Kristian Arntzen 2017-11-23 00:08:44 +01:00 committed by GitHub
commit 7f438371f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 607 additions and 9 deletions

View File

@ -10,7 +10,7 @@ dist: trusty
# We check out glslang and SPIRV-Tools at specific revisions to avoid test output mismatches
env:
- GLSLANG_REV=f0bc598dd7871689f25514b22a82f7455d762bef SPIRV_TOOLS_REV=40e9c60ffea56f45f388835e6945b01d4d8b022d
- GLSLANG_REV=698bf7547a96b6feb7291e8ddc0d5d16475dbae2 SPIRV_TOOLS_REV=40e9c60ffea56f45f388835e6945b01d4d8b022d
before_script:
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; brew install python3; fi

View File

@ -0,0 +1,11 @@
#version 450
#extension GL_AMD_shader_fragment_mask : require
layout(binding = 0) uniform sampler2DMS t;
void main()
{
vec4 test2 = fragmentFetchAMD(t, 4u);
uint testi2 = fragmentMaskFetchAMD(t);
}

View File

@ -0,0 +1,11 @@
#version 450
#extension GL_AMD_shader_fragment_mask : require
layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInputMS t;
void main()
{
vec4 test2 = fragmentFetchAMD(t, 4u);
uint testi2 = fragmentMaskFetchAMD(t);
}

View File

@ -0,0 +1,15 @@
#version 450
#extension GL_AMD_shader_fragment_mask : require
#extension GL_AMD_shader_explicit_vertex_parameter : require
uniform sampler2DMS texture1;
layout(location = 0) in vec4 vary;
void main()
{
uint testi1 = fragmentMaskFetchAMD(texture1, ivec2(0));
vec4 test1 = fragmentFetchAMD(texture1, ivec2(1), 2u);
vec4 pos = interpolateAtVertexAMD(vary, 0u);
}

View File

@ -0,0 +1,12 @@
#version 450
#extension GL_ARB_gpu_shader_int64 : require
#extension GL_AMD_gcn_shader : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
void main()
{
float cubeFace = cubeFaceIndexAMD(vec3(0.0));
vec2 cubeFaceCoord = cubeFaceCoordAMD(vec3(1.0));
uint64_t time = timeAMD();
}

View File

@ -0,0 +1,32 @@
#version 450
#extension GL_ARB_gpu_shader_int64 : require
#extension GL_ARB_shader_ballot : require
#extension GL_AMD_shader_ballot : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer inputData
{
float inputDataArray[];
} _12;
layout(binding = 1, std430) buffer outputData
{
float outputDataArray[];
} _74;
void main()
{
float thisLaneData = _12.inputDataArray[gl_LocalInvocationID.x];
bool laneActive = thisLaneData > 0.0;
uint thisLaneOutputSlot = mbcntAMD(packUint2x32(uvec2(unpackUint2x32(ballotARB(laneActive)).xy)));
int firstInvocation = readFirstInvocationARB(1);
int invocation = readInvocationARB(1, 0u);
vec3 swizzleInvocations = swizzleInvocationsAMD(vec3(0.0, 2.0, 1.0), uvec4(3u));
vec3 swizzelInvocationsMasked = swizzleInvocationsMaskedAMD(vec3(0.0, 2.0, 1.0), uvec3(2u));
vec3 writeInvocation = writeInvocationAMD(swizzleInvocations, swizzelInvocationsMasked, 0u);
if (laneActive)
{
_74.outputDataArray[thisLaneOutputSlot] = thisLaneData;
}
}

View File

@ -0,0 +1,11 @@
#version 450
#extension GL_AMD_shader_ballot : require
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
void main()
{
float addInvocations = addInvocationsNonUniformAMD(0.0);
int minInvocations = minInvocationsNonUniformAMD(1);
uint maxInvocations = uint(maxInvocationsNonUniformAMD(4));
}

View File

@ -0,0 +1,18 @@
#version 450
#extension GL_ARB_shader_group_vote : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
layout(binding = 0, std430) buffer inputData
{
float inputDataArray[];
} _12;
void main()
{
float thisLaneData = _12.inputDataArray[gl_LocalInvocationID.x];
bool laneActive = thisLaneData > 0.0;
bool allInvocations = allInvocationsARB(laneActive);
bool anyInvocations = anyInvocationARB(laneActive);
bool allInvocationsEqual = allInvocationsEqualARB(laneActive);
}

View File

@ -0,0 +1,11 @@
#version 450
#extension GL_AMD_shader_trinary_minmax : require
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
void main()
{
int t11 = min3(0, 3, 2);
int t12 = max3(0, 3, 2);
int t13 = mid3(0, 3, 2);
}

View File

@ -0,0 +1,10 @@
#version 450
#extension GL_AMD_shader_fragment_mask : require
layout(input_attachment_index = 0, binding = 0) uniform subpassInputMS t;
void main ()
{
vec4 test2 = fragmentFetchAMD(t, 4);
uint testi2 = fragmentMaskFetchAMD(t);
}

View File

@ -0,0 +1,14 @@
#version 450
#extension GL_AMD_shader_fragment_mask : require
#extension GL_AMD_shader_explicit_vertex_parameter : require
uniform sampler2DMS texture1;
layout(location = 0) in vec4 vary;
void main()
{
uint testi1 = fragmentMaskFetchAMD(texture1, ivec2(0));
vec4 test1 = fragmentFetchAMD(texture1, ivec2(1), 2);
vec4 pos = interpolateAtVertexAMD(vary, 0u);
}

View File

@ -0,0 +1,13 @@
#version 450
#extension GL_AMD_gcn_shader : require
#extension GL_ARB_gpu_shader_int64 : require
layout (local_size_x = 64) in;
void main ()
{
float cubeFace = cubeFaceIndexAMD(vec3(0.0));
vec2 cubeFaceCoord = cubeFaceCoordAMD(vec3(1.0));
uint64_t time = timeAMD();
}

View File

@ -0,0 +1,33 @@
#version 450
#extension GL_AMD_shader_ballot : require
#extension GL_ARB_shader_ballot : require
layout (local_size_x = 64) in;
layout (std430, binding = 0) buffer inputData
{
float inputDataArray[];
};
layout (std430, binding = 1) buffer outputData
{
float outputDataArray[];
};
void main ()
{
float thisLaneData = inputDataArray [gl_LocalInvocationID.x];
bool laneActive = (thisLaneData > 0);
uint thisLaneOutputSlot = mbcntAMD (ballotARB (laneActive));
int firstInvocation = readFirstInvocationARB(1);
int invocation = readInvocationARB(1, 0);
vec3 swizzleInvocations = swizzleInvocationsAMD(vec3(0.0, 2.0, 1.0), uvec4(3));
vec3 swizzelInvocationsMasked = swizzleInvocationsMaskedAMD(vec3(0.0, 2.0, 1.0), uvec3(2));
vec3 writeInvocation = writeInvocationAMD(swizzleInvocations, swizzelInvocationsMasked, 0);
if (laneActive) {
outputDataArray[thisLaneOutputSlot] = thisLaneData;
}
}

View File

@ -0,0 +1,9 @@
#version 450
#extension GL_AMD_shader_ballot : require
void main ()
{
float addInvocations = addInvocationsNonUniformAMD(0.0);
int minInvocations = minInvocationsNonUniformAMD(1);
uint maxInvocations = maxInvocationsNonUniformAMD(4);
}

View File

@ -0,0 +1,18 @@
#version 450
#extension GL_ARB_shader_group_vote : require
layout (local_size_x = 64) in;
layout (std430, binding = 0) buffer inputData
{
float inputDataArray[];
};
void main ()
{
float thisLaneData = inputDataArray [gl_LocalInvocationID.x];
bool laneActive = (thisLaneData > 0);
bool allInvocations = allInvocationsARB(laneActive);
bool anyInvocations = anyInvocationARB(laneActive);
bool allInvocationsEqual = allInvocationsEqualARB(laneActive);
}

View File

@ -0,0 +1,11 @@
#version 450
#extension GL_AMD_shader_trinary_minmax : require
layout (local_size_x = 64) in;
void main ()
{
int t11 = min3(0, 3, 2);
int t12 = max3(0, 3, 2);
int t13 = mid3(0, 3, 2);
}

View File

@ -938,6 +938,16 @@ enum Op {
OpSubgroupAnyKHR = 4429,
OpSubgroupAllEqualKHR = 4430,
OpSubgroupReadInvocationKHR = 4432,
OpGroupIAddNonUniformAMD = 5000,
OpGroupFAddNonUniformAMD = 5001,
OpGroupFMinNonUniformAMD = 5002,
OpGroupUMinNonUniformAMD = 5003,
OpGroupSMinNonUniformAMD = 5004,
OpGroupFMaxNonUniformAMD = 5005,
OpGroupUMaxNonUniformAMD = 5006,
OpGroupSMaxNonUniformAMD = 5007,
OpFragmentMaskFetchAMD = 5011,
OpFragmentFetchAMD = 5012,
OpMax = 0x7fffffff,
};

View File

@ -326,7 +326,11 @@ struct SPIRExtension : IVariant
enum Extension
{
Unsupported,
GLSL
GLSL,
SPV_AMD_shader_ballot,
SPV_AMD_shader_explicit_vertex_parameter,
SPV_AMD_shader_trinary_minmax,
SPV_AMD_gcn_shader
};
SPIRExtension(Extension ext_)

View File

@ -573,6 +573,37 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
break;
}
case OpExtInst:
{
if (length < 5)
return false;
uint32_t extension_set = args[2];
if (compiler.get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
{
enum AMDShaderExplicitVertexParameter
{
InterpolateAtVertexAMD = 1
};
auto op = static_cast<AMDShaderExplicitVertexParameter>(args[3]);
switch (op)
{
case InterpolateAtVertexAMD:
{
auto *var = compiler.maybe_get<SPIRVariable>(args[4]);
if (var && storage_class_is_interface(var->storage))
variables.insert(args[4]);
break;
}
default:
break;
}
}
break;
}
case OpAccessChain:
case OpInBoundsAccessChain:
case OpLoad:
@ -1324,6 +1355,14 @@ void Compiler::parse(const Instruction &instruction)
auto ext = extract_string(spirv, instruction.offset + 1);
if (ext == "GLSL.std.450")
set<SPIRExtension>(id, SPIRExtension::GLSL);
else if (ext == "SPV_AMD_shader_ballot")
set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_ballot);
else if (ext == "SPV_AMD_shader_explicit_vertex_parameter")
set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter);
else if (ext == "SPV_AMD_shader_trinary_minmax")
set<SPIRExtension>(id, SPIRExtension::SPV_AMD_shader_trinary_minmax);
else if (ext == "SPV_AMD_gcn_shader")
set<SPIRExtension>(id, SPIRExtension::SPV_AMD_gcn_shader);
else
set<SPIRExtension>(id, SPIRExtension::Unsupported);

View File

@ -1515,8 +1515,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var)
if (options.es && options.version < 320)
{
// Geometry and tessellation extensions imply this extension.
if (!forced_extensions.count("GL_EXT_geometry_shader") &&
!forced_extensions.count("GL_EXT_tessellation_shader"))
if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader"))
require_extension("GL_EXT_shader_io_blocks");
}
@ -3868,6 +3867,150 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
}
}
void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
uint32_t)
{
require_extension("GL_AMD_shader_ballot");
enum AMDShaderBallot
{
SwizzleInvocationsAMD = 1,
SwizzleInvocationsMaskedAMD = 2,
WriteInvocationAMD = 3,
MbcntAMD = 4
};
auto op = static_cast<AMDShaderBallot>(eop);
switch (op)
{
case SwizzleInvocationsAMD:
emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
break;
case SwizzleInvocationsMaskedAMD:
emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
break;
case WriteInvocationAMD:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
break;
case MbcntAMD:
emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
break;
default:
statement("// unimplemented SPV AMD shader ballot op ", eop);
break;
}
}
void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop,
const uint32_t *args, uint32_t)
{
require_extension("GL_AMD_shader_explicit_vertex_parameter");
enum AMDShaderExplicitVertexParameter
{
InterpolateAtVertexAMD = 1
};
auto op = static_cast<AMDShaderExplicitVertexParameter>(eop);
switch (op)
{
case InterpolateAtVertexAMD:
emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD");
break;
default:
statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop);
break;
}
}
void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop,
const uint32_t *args, uint32_t)
{
require_extension("GL_AMD_shader_trinary_minmax");
enum AMDShaderTrinaryMinMax
{
FMin3AMD = 1,
UMin3AMD = 2,
SMin3AMD = 3,
FMax3AMD = 4,
UMax3AMD = 5,
SMax3AMD = 6,
FMid3AMD = 7,
UMid3AMD = 8,
SMid3AMD = 9
};
auto op = static_cast<AMDShaderTrinaryMinMax>(eop);
switch (op)
{
case FMin3AMD:
case UMin3AMD:
case SMin3AMD:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3");
break;
case FMax3AMD:
case UMax3AMD:
case SMax3AMD:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3");
break;
case FMid3AMD:
case UMid3AMD:
case SMid3AMD:
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3");
break;
default:
statement("// unimplemented SPV AMD shader trinary minmax op ", eop);
break;
}
}
void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
uint32_t)
{
require_extension("GL_AMD_gcn_shader");
enum AMDGCNShader
{
CubeFaceIndexAMD = 1,
CubeFaceCoordAMD = 2,
TimeAMD = 3
};
auto op = static_cast<AMDGCNShader>(eop);
switch (op)
{
case CubeFaceIndexAMD:
emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD");
break;
case CubeFaceCoordAMD:
emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD");
break;
case TimeAMD:
{
string expr = "timeAMD()";
emit_op(result_type, id, expr, true);
break;
}
default:
statement("// unimplemented SPV AMD gcn shader op ", eop);
break;
}
}
string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type)
{
if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Int)
@ -3894,6 +4037,8 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
return "int64BitsToDouble";
else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
return "uint64BitsToDouble";
else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2)
return "packUint2x32";
else
return "";
}
@ -6372,13 +6517,168 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
case OpExtInst:
{
uint32_t extension_set = ops[2];
if (get<SPIRExtension>(extension_set).ext != SPIRExtension::GLSL)
if (get<SPIRExtension>(extension_set).ext == SPIRExtension::GLSL)
{
emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_ballot)
{
emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter)
{
emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_shader_trinary_minmax)
{
emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
else if (get<SPIRExtension>(extension_set).ext == SPIRExtension::SPV_AMD_gcn_shader)
{
emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
}
else
{
statement("// unimplemented ext op ", instruction.op);
break;
}
emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4);
break;
}
case OpSubgroupBallotKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
string expr;
expr = join("unpackUint2x32(ballotARB(" + to_expression(ops[2]) + "))");
emit_op(result_type, id, expr, true);
require_extension("GL_ARB_shader_ballot");
break;
}
case OpSubgroupFirstInvocationKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
require_extension("GL_ARB_shader_ballot");
break;
}
case OpSubgroupReadInvocationKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
require_extension("GL_ARB_shader_ballot");
break;
}
case OpSubgroupAllKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
require_extension("GL_ARB_shader_group_vote");
break;
}
case OpSubgroupAnyKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
require_extension("GL_ARB_shader_group_vote");
break;
}
case OpSubgroupAllEqualKHR:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
require_extension("GL_ARB_shader_group_vote");
break;
}
case OpGroupIAddNonUniformAMD:
case OpGroupFAddNonUniformAMD:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
require_extension("GL_AMD_shader_ballot");
break;
}
case OpGroupFMinNonUniformAMD:
case OpGroupUMinNonUniformAMD:
case OpGroupSMinNonUniformAMD:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
require_extension("GL_AMD_shader_ballot");
break;
}
case OpGroupFMaxNonUniformAMD:
case OpGroupUMaxNonUniformAMD:
case OpGroupSMaxNonUniformAMD:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
require_extension("GL_AMD_shader_ballot");
break;
}
case OpFragmentMaskFetchAMD:
{
auto &type = expression_type(ops[2]);
uint32_t result_type = ops[0];
uint32_t id = ops[1];
if (type.image.dim == spv::DimSubpassData)
{
emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD");
}
else
{
emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD");
}
require_extension("GL_AMD_shader_fragment_mask");
break;
}
case OpFragmentFetchAMD:
{
auto &type = expression_type(ops[2]);
uint32_t result_type = ops[0];
uint32_t id = ops[1];
if (type.image.dim == spv::DimSubpassData)
{
emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD");
}
else
{
emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD");
}
require_extension("GL_AMD_shader_fragment_mask");
break;
}
@ -6976,11 +7276,17 @@ void CompilerGLSL::add_header_line(const std::string &line)
header_lines.push_back(line);
}
bool CompilerGLSL::has_extension(const std::string &ext) const
{
auto itr = find(begin(forced_extensions), end(forced_extensions), ext);
return itr != end(forced_extensions);
}
void CompilerGLSL::require_extension(const string &ext)
{
if (forced_extensions.find(ext) == end(forced_extensions))
if (!has_extension(ext))
{
forced_extensions.insert(ext);
forced_extensions.push_back(ext);
force_recompile = true;
}
}

View File

@ -179,6 +179,8 @@ protected:
void reset();
void emit_function(SPIRFunction &func, uint64_t return_flags);
bool has_extension(const std::string &ext) const;
// Virtualize methods which need to be overridden by subclass targets like C++ and such.
virtual void emit_function_prototype(SPIRFunction &func, uint64_t return_flags);
@ -189,6 +191,14 @@ protected:
void emit_block_instructions(const SPIRBlock &block);
virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
uint32_t count);
virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op,
const uint32_t *args, uint32_t count);
virtual void emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t result_id, uint32_t op,
const uint32_t *args, uint32_t count);
virtual void emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t result_id, uint32_t op,
const uint32_t *args, uint32_t count);
virtual void emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
uint32_t count);
virtual void emit_header();
virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id);
virtual void emit_texture_op(const Instruction &i);
@ -445,7 +455,7 @@ protected:
std::unordered_map<uint32_t, uint32_t> expression_usage_counts;
void track_expression_read(uint32_t id);
std::unordered_set<std::string> forced_extensions;
std::vector<std::string> forced_extensions;
std::vector<std::string> header_lines;
uint32_t statement_count;