If a shader's only use of float16 is for FConvert, add the Float16 capability.

When visiting instructions, check that there are no 16-bit storage capabilities and add the Float16 capability. Same for int8/int16.
2024-11-08 11:30:06 +00:00 · 2019-05-01 11:45:36 -05:00 · 2019-05-01 11:45:36 -05:00 · faac86e5d6
commit faac86e5d6
parent 86c72c9486
6 changed files with 157 additions and 18 deletions
--- a/SPIRV/SpvPostProcess.cpp
+++ b/SPIRV/SpvPostProcess.cpp
@ -118,9 +118,46 @@ void Builder::postProcessType(const Instruction& inst, Id typeId)
    case OpAccessChain:
    case OpPtrAccessChain:
    case OpCopyObject:
+        break;
    case OpFConvert:
    case OpSConvert:
    case OpUConvert:
+        // Look for any 8/16-bit storage capabilities. If there are none, assume that
+        // the convert instruction requires the Float16/Int8/16 capability.
+        if (containsType(typeId, OpTypeFloat, 16) || containsType(typeId, OpTypeInt, 16)) {
+            bool foundStorage = false;
+            for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
+                spv::Capability cap = *it;
+                if (cap == spv::CapabilityStorageInputOutput16 ||
+                    cap == spv::CapabilityStoragePushConstant16 ||
+                    cap == spv::CapabilityStorageUniformBufferBlock16 ||
+                    cap == spv::CapabilityStorageUniform16) {
+                    foundStorage = true;
+                    break;
+                }
+            }
+            if (!foundStorage) {
+                if (containsType(typeId, OpTypeFloat, 16))
+                    addCapability(CapabilityFloat16);
+                if (containsType(typeId, OpTypeInt, 16))
+                    addCapability(CapabilityInt16);
+            }
+        }
+        if (containsType(typeId, OpTypeInt, 8)) {
+            bool foundStorage = false;
+            for (auto it = capabilities.begin(); it != capabilities.end(); ++it) {
+                spv::Capability cap = *it;
+                if (cap == spv::CapabilityStoragePushConstant8 ||
+                    cap == spv::CapabilityUniformAndStorageBuffer8BitAccess ||
+                    cap == spv::CapabilityStorageBuffer8BitAccess) {
+                    foundStorage = true;
+                    break;
+                }
+            }
+            if (!foundStorage) {
+                addCapability(CapabilityInt8);
+            }
+        }
        break;
    case OpExtInst:
 #if AMD_EXTENSIONS
@ -327,6 +364,24 @@ void Builder::postProcess()

    // Add per-instruction capabilities, extensions, etc.,

+    // Look for any 8/16 bit type in physical storage buffer class, and set the
+    // appropriate capability. This happens in createSpvVariable for other storage
+    // classes, but there isn't always a variable for physical storage buffer.
+    for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) {
+        Instruction* type = groupedTypes[OpTypePointer][t];
+        if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) {
+            if (containsType(type->getIdOperand(1), OpTypeInt, 8)) {
+                addExtension(spv::E_SPV_KHR_8bit_storage);
+                addCapability(spv::CapabilityStorageBuffer8BitAccess);
+            }
+            if (containsType(type->getIdOperand(1), OpTypeInt, 16) ||
+                containsType(type->getIdOperand(1), OpTypeFloat, 16)) {
+                addExtension(spv::E_SPV_KHR_16bit_storage);
+                addCapability(spv::CapabilityStorageBuffer16BitAccess);
+            }
+        }
+    }
+
    // process all reachable instructions...
    for (auto bi = reachableBlocks.cbegin(); bi != reachableBlocks.cend(); ++bi) {
        const Block* block = *bi;
@ -366,24 +421,6 @@ void Builder::postProcess()
            }
        }
    }
-
-    // Look for any 8/16 bit type in physical storage buffer class, and set the
-    // appropriate capability. This happens in createSpvVariable for other storage
-    // classes, but there isn't always a variable for physical storage buffer.
-    for (int t = 0; t < (int)groupedTypes[OpTypePointer].size(); ++t) {
-        Instruction* type = groupedTypes[OpTypePointer][t];
-        if (type->getImmediateOperand(0) == (unsigned)StorageClassPhysicalStorageBufferEXT) {
-            if (containsType(type->getIdOperand(1), OpTypeInt, 8)) {
-                addExtension(spv::E_SPV_KHR_8bit_storage);
-                addCapability(spv::CapabilityStorageBuffer8BitAccess);
-            }
-            if (containsType(type->getIdOperand(1), OpTypeInt, 16) ||
-                containsType(type->getIdOperand(1), OpTypeFloat, 16)) {
-                addExtension(spv::E_SPV_KHR_16bit_storage);
-                addCapability(spv::CapabilityStorageBuffer16BitAccess);
-            }
-        }
-    }
 }

 }; // end spv namespace
--- a/Test/baseResults/spv.float16convertonlyarith.comp.out
+++ b/Test/baseResults/spv.float16convertonlyarith.comp.out
@ -0,0 +1,39 @@
+spv.float16convertonlyarith.comp
+// Module Version 10000
+// Generated by (magic number): 80007
+// Id's are bound by 22
+
+                              Capability Shader
+                              Capability Float16
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main"
+                              ExecutionMode 4 LocalSize 16 16 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_explicit_arithmetic_types_float16"
+                              Name 4  "main"
+                              Name 9  "v"
+                              Decorate 21 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 4
+               8:             TypePointer Function 7(fvec4)
+              10:    6(float) Constant 0
+              11:    7(fvec4) ConstantComposite 10 10 10 10
+              13:             TypeFloat 16
+              14:             TypeVector 13(float16_t) 4
+              17:             TypeInt 32 0
+              18:             TypeVector 17(int) 3
+              19:     17(int) Constant 16
+              20:     17(int) Constant 1
+              21:   18(ivec3) ConstantComposite 19 19 20
+         4(main):           2 Function None 3
+               5:             Label
+            9(v):      8(ptr) Variable Function
+                              Store 9(v) 11
+              12:    7(fvec4) Load 9(v)
+              15: 14(f16vec4) FConvert 12
+              16:    7(fvec4) FConvert 15
+                              Return
+                              FunctionEnd
--- a/Test/baseResults/spv.float16convertonlystorage.comp.out
+++ b/Test/baseResults/spv.float16convertonlystorage.comp.out
@ -0,0 +1,39 @@
+spv.float16convertonlystorage.comp
+// Module Version 10000
+// Generated by (magic number): 80007
+// Id's are bound by 22
+
+                              Capability Shader
+                              Capability Float16
+               1:             ExtInstImport  "GLSL.std.450"
+                              MemoryModel Logical GLSL450
+                              EntryPoint GLCompute 4  "main"
+                              ExecutionMode 4 LocalSize 16 16 1
+                              Source GLSL 450
+                              SourceExtension  "GL_EXT_shader_16bit_storage"
+                              Name 4  "main"
+                              Name 9  "v"
+                              Decorate 21 BuiltIn WorkgroupSize
+               2:             TypeVoid
+               3:             TypeFunction 2
+               6:             TypeFloat 32
+               7:             TypeVector 6(float) 4
+               8:             TypePointer Function 7(fvec4)
+              10:    6(float) Constant 0
+              11:    7(fvec4) ConstantComposite 10 10 10 10
+              13:             TypeFloat 16
+              14:             TypeVector 13(float16_t) 4
+              17:             TypeInt 32 0
+              18:             TypeVector 17(int) 3
+              19:     17(int) Constant 16
+              20:     17(int) Constant 1
+              21:   18(ivec3) ConstantComposite 19 19 20
+         4(main):           2 Function None 3
+               5:             Label
+            9(v):      8(ptr) Variable Function
+                              Store 9(v) 11
+              12:    7(fvec4) Load 9(v)
+              15: 14(f16vec4) FConvert 12
+              16:    7(fvec4) FConvert 15
+                              Return
+                              FunctionEnd
--- a/Test/spv.float16convertonlyarith.comp
+++ b/Test/spv.float16convertonlyarith.comp
@ -0,0 +1,11 @@
+#version 450 core
+
+#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
+
+layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
+
+void main()
+{
+    vec4 v = vec4(0.0);
+    vec4(f16vec4(v));
+}
--- a/Test/spv.float16convertonlystorage.comp
+++ b/Test/spv.float16convertonlystorage.comp
@ -0,0 +1,11 @@
+#version 450 core
+
+#extension GL_EXT_shader_16bit_storage : require
+
+layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
+
+void main()
+{
+    vec4 v = vec4(0.0);
+    vec4(f16vec4(v));
+}
--- a/gtests/Spv.FromFile.cpp
+++ b/gtests/Spv.FromFile.cpp
@ -300,6 +300,8 @@ INSTANTIATE_TEST_CASE_P(
        "spv.earlyReturnDiscard.frag",
        "spv.extPostDepthCoverage.frag",
        "spv.extPostDepthCoverage_Error.frag",
+        "spv.float16convertonlyarith.comp",
+        "spv.float16convertonlystorage.comp",
        "spv.flowControl.frag",
        "spv.forLoop.frag",
        "spv.forwardFun.frag",