diff --git a/SPIRV/GLSL.ext.EXT.h b/SPIRV/GLSL.ext.EXT.h index caab27938..07f3c3026 100644 --- a/SPIRV/GLSL.ext.EXT.h +++ b/SPIRV/GLSL.ext.EXT.h @@ -41,5 +41,6 @@ static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64"; static const char* const E_SPV_EXT_shader_tile_image = "SPV_EXT_shader_tile_image"; static const char* const E_SPV_EXT_mesh_shader = "SPV_EXT_mesh_shader"; +static const char* const E_SPV_ARM_cooperative_matrix_layouts = "SPV_ARM_cooperative_matrix_layouts"; #endif // #ifndef GLSLextEXT_H diff --git a/SPIRV/GlslangToSpv.cpp b/SPIRV/GlslangToSpv.cpp index 16e224663..0fd79b1a4 100755 --- a/SPIRV/GlslangToSpv.cpp +++ b/SPIRV/GlslangToSpv.cpp @@ -3705,6 +3705,12 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt idImmOps.push_back(spv::IdImmediate(true, operands[1])); // buf if (node->getOp() == glslang::EOpCooperativeMatrixLoad) { idImmOps.push_back(spv::IdImmediate(true, operands[3])); // matrixLayout + auto layout = builder.getConstantScalar(operands[3]); + if (layout == spv::CooperativeMatrixLayoutRowBlockedInterleavedARM || + layout == spv::CooperativeMatrixLayoutColumnBlockedInterleavedARM) { + builder.addExtension(spv::E_SPV_ARM_cooperative_matrix_layouts); + builder.addCapability(spv::CapabilityCooperativeMatrixLayoutsARM); + } idImmOps.push_back(spv::IdImmediate(true, operands[2])); // stride } else { idImmOps.push_back(spv::IdImmediate(true, operands[2])); // stride @@ -3729,6 +3735,12 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt idImmOps.push_back(spv::IdImmediate(true, operands[0])); // object if (node->getOp() == glslang::EOpCooperativeMatrixStore) { idImmOps.push_back(spv::IdImmediate(true, operands[3])); // matrixLayout + auto layout = builder.getConstantScalar(operands[3]); + if (layout == spv::CooperativeMatrixLayoutRowBlockedInterleavedARM || + layout == spv::CooperativeMatrixLayoutColumnBlockedInterleavedARM) { + builder.addExtension(spv::E_SPV_ARM_cooperative_matrix_layouts); + builder.addCapability(spv::CapabilityCooperativeMatrixLayoutsARM); + } idImmOps.push_back(spv::IdImmediate(true, operands[2])); // stride } else { idImmOps.push_back(spv::IdImmediate(true, operands[2])); // stride diff --git a/SPIRV/doc.cpp b/SPIRV/doc.cpp index de465e1dd..0105caa2e 100755 --- a/SPIRV/doc.cpp +++ b/SPIRV/doc.cpp @@ -1035,6 +1035,8 @@ const char* CapabilityString(int info) case CapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT"; case CapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT"; + case CapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM"; + case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR"; case CapabilityDemoteToHelperInvocationEXT: return "DemoteToHelperInvocationEXT"; diff --git a/SPIRV/spirv.hpp b/SPIRV/spirv.hpp index bb3d715e8..afa89a585 100644 --- a/SPIRV/spirv.hpp +++ b/SPIRV/spirv.hpp @@ -1002,6 +1002,7 @@ enum Capability { CapabilityTileImageColorReadAccessEXT = 4166, CapabilityTileImageDepthReadAccessEXT = 4167, CapabilityTileImageStencilReadAccessEXT = 4168, + CapabilityCooperativeMatrixLayoutsARM = 4201, CapabilityFragmentShadingRateKHR = 4422, CapabilitySubgroupBallotKHR = 4423, CapabilityDrawParameters = 4427, @@ -1302,6 +1303,8 @@ enum CooperativeMatrixOperandsMask { enum CooperativeMatrixLayout { CooperativeMatrixLayoutRowMajorKHR = 0, CooperativeMatrixLayoutColumnMajorKHR = 1, + CooperativeMatrixLayoutRowBlockedInterleavedARM = 4202, + CooperativeMatrixLayoutColumnBlockedInterleavedARM = 4203, CooperativeMatrixLayoutMax = 0x7fffffff, }; diff --git a/Test/baseResults/spv.coopmat_armlayout.comp.out b/Test/baseResults/spv.coopmat_armlayout.comp.out new file mode 100644 index 000000000..1af49f292 --- /dev/null +++ b/Test/baseResults/spv.coopmat_armlayout.comp.out @@ -0,0 +1,406 @@ +spv.coopmat_armlayout.comp +Validation failed +// Module Version 10000 +// Generated by (magic number): 8000b +// Id's are bound by 251 + + Capability Shader + Capability Float16 + Capability Int16 + Capability Int8 + Capability CooperativeMatrixLayoutsARM + Capability StorageUniformBufferBlock16 + Capability VulkanMemoryModelKHR + Capability PhysicalStorageBufferAddressesEXT + Capability CooperativeMatrixKHR + Extension "SPV_ARM_cooperative_matrix_layouts" + Extension "SPV_KHR_16bit_storage" + Extension "SPV_KHR_cooperative_matrix" + Extension "SPV_KHR_physical_storage_buffer" + Extension "SPV_KHR_storage_buffer_storage_class" + Extension "SPV_KHR_vulkan_memory_model" + 1: ExtInstImport "GLSL.std.450" + MemoryModel PhysicalStorageBuffer64EXT VulkanKHR + EntryPoint GLCompute 4 "main" + ExecutionMode 4 LocalSize 64 1 1 + Source GLSL 450 + SourceExtension "GL_EXT_buffer_reference" + SourceExtension "GL_EXT_shader_explicit_arithmetic_types" + SourceExtension "GL_KHR_cooperative_matrix" + SourceExtension "GL_KHR_memory_scope_semantics" + Name 4 "main" + Name 15 "f16(f161;" + Name 14 "m" + Name 22 "f32(f1;" + Name 21 "m" + Name 35 "m" + Name 53 "m2" + Name 57 "x" + Name 65 "tempArg" + Name 69 "Block" + MemberName 69(Block) 0 "y" + MemberName 69(Block) 1 "x" + Name 71 "block" + Name 81 "tempArg" + Name 86 "Block16" + MemberName 86(Block16) 0 "y" + MemberName 86(Block16) 1 "x" + MemberName 86(Block16) 2 "b" + Name 89 "Block" + MemberName 89(Block) 0 "y" + MemberName 89(Block) 1 "x" + Name 91 "block16" + Name 98 "tempArg" + Name 111 "D" + Name 115 "A" + Name 119 "B" + Name 121 "C" + Name 125 "l" + Name 129 "Y" + Name 130 "Z" + Name 133 "F" + Name 138 "a" + Name 142 "md1" + Name 153 "mC2" + Name 158 "tempArg" + Name 164 "tempArg" + Name 170 "p1" + Name 171 "param" + Name 174 "p2" + Name 175 "param" + Name 189 "tempArg" + Name 194 "shmatrix" + Name 198 "ms" + Name 205 "ms8A" + Name 209 "ms8B" + Name 213 "ms8C" + Name 228 "m16" + Name 234 "mC" + Name 235 "F" + Name 240 "S" + MemberName 240(S) 0 "a" + MemberName 240(S) 1 "b" + MemberName 240(S) 2 "c" + Name 245 "SC" + Name 250 "scm" + Decorate 67 ArrayStride 4 + Decorate 68 ArrayStride 4 + MemberDecorate 69(Block) 0 Offset 0 + MemberDecorate 69(Block) 1 Offset 4194304 + Decorate 69(Block) Block + Decorate 71(block) DescriptorSet 0 + Decorate 71(block) Binding 0 + Decorate 82 ArrayStride 2 + Decorate 84 ArrayStride 2 + MemberDecorate 86(Block16) 0 Offset 0 + MemberDecorate 86(Block16) 1 Offset 2097152 + MemberDecorate 86(Block16) 2 Offset 2097160 + Decorate 86(Block16) Block + Decorate 87 ArrayStride 4 + Decorate 88 ArrayStride 4 + MemberDecorate 89(Block) 0 Offset 0 + MemberDecorate 89(Block) 1 Offset 4194304 + Decorate 89(Block) Block + Decorate 91(block16) DescriptorSet 0 + Decorate 91(block16) Binding 0 + Decorate 129(Y) SpecId 0 + Decorate 233 BuiltIn WorkgroupSize + Decorate 235(F) SpecId 1 + Decorate 245(SC) SpecId 2 + 2: TypeVoid + 3: TypeFunction 2 + 6: TypeFloat 16 + 7: TypeInt 32 0 + 8: 7(int) Constant 3 + 9: 7(int) Constant 8 + 10: 7(int) Constant 2 + 11: TypeCooperativeMatrixKHR 6(float16_t) 8 9 9 10 + 12: TypePointer Function 11 + 13: TypeFunction 11 12(ptr) + 17: TypeFloat 32 + 18: TypeCooperativeMatrixKHR 17(float) 8 9 9 10 + 19: TypePointer Function 18 + 20: TypeFunction 18 19(ptr) + 32: 7(int) Constant 16 + 33: TypeCooperativeMatrixKHR 17(float) 8 32 9 10 + 34: TypePointer Function 33 + 36: 17(float) Constant 0 + 37: 33 ConstantComposite 36 + 46: 17(float) Constant 1073741824 + 51: TypeCooperativeMatrixKHR 6(float16_t) 8 32 9 10 + 52: TypePointer Function 51 + 56: TypePointer Function 17(float) + 58: TypeInt 32 1 + 59: 58(int) Constant 1 + 62: 58(int) Constant 0 + 66: 7(int) Constant 1048576 + 67: TypeArray 17(float) 66 + 68: TypeRuntimeArray 17(float) + 69(Block): TypeStruct 67 68 + 70: TypePointer StorageBuffer 69(Block) + 71(block): 70(ptr) Variable StorageBuffer + 72: 7(int) Constant 5 + 73: TypePointer StorageBuffer 17(float) + 75: 7(int) Constant 128 + 76: 58(int) Constant 4202 + 82: TypeArray 6(float16_t) 66 + 83: 7(int) Constant 1 + 84: TypeArray 6(float16_t) 83 + TypeForwardPointer 85 PhysicalStorageBufferEXT + 86(Block16): TypeStruct 82 84 85 + 87: TypeArray 17(float) 66 + 88: TypeRuntimeArray 17(float) + 89(Block): TypeStruct 87 88 + 85: TypePointer PhysicalStorageBufferEXT 89(Block) + 90: TypePointer StorageBuffer 86(Block16) + 91(block16): 90(ptr) Variable StorageBuffer + 92: TypePointer StorageBuffer 6(float16_t) + 99: 58(int) Constant 2 + 100: TypePointer StorageBuffer 85(ptr) + 103: TypePointer PhysicalStorageBufferEXT 17(float) + 112: 7(int) Constant 0 + 113: TypeCooperativeMatrixKHR 6(float16_t) 8 32 9 112 + 114: TypePointer Function 113 + 117: TypeCooperativeMatrixKHR 6(float16_t) 8 9 9 83 + 118: TypePointer Function 117 + 124: TypePointer Function 58(int) + 128: 58(int) Constant 8 + 129(Y): 58(int) SpecConstant 2 + 130(Z): 58(int) SpecConstantOp 132 128 129(Y) + 131: TypeCooperativeMatrixKHR 6(float16_t) 8 130(Z) 130(Z) 10 + 132: TypePointer Function 131 + 134:6(float16_t) Constant 0 + 135: 131 ConstantComposite 134 + 136: TypeArray 33 72 + 137: TypePointer Function 136 + 139: 58(int) Constant 3 + 140: 17(float) Constant 1065353216 + 146: 58(int) Constant 1234 + 150: TypeCooperativeMatrixKHR 6(float16_t) 8 130(Z) 9 10 + 151: TypeArray 150 8 + 152: TypePointer Private 151 + 153(mC2): 152(ptr) Variable Private + 154: TypePointer Private 150 + 178: 11 ConstantComposite 134 + 179: 18 ConstantComposite 36 + 183:6(float16_t) Constant 16384 + 186: 17(float) Constant 1082130432 + 190: TypeVector 7(int) 4 + 191: 7(int) Constant 32 + 192: TypeArray 190(ivec4) 191 + 193: TypePointer Workgroup 192 + 194(shmatrix): 193(ptr) Variable Workgroup + 195: TypePointer Workgroup 190(ivec4) + 202: TypeInt 8 1 + 203: TypeCooperativeMatrixKHR 202(int8_t) 8 9 9 112 + 204: TypePointer Function 203 + 207: TypeCooperativeMatrixKHR 202(int8_t) 8 9 9 83 + 208: TypePointer Function 207 + 211: TypeCooperativeMatrixKHR 202(int8_t) 8 9 9 10 + 212: TypePointer Function 211 + 223: 58(int) Constant 16 + 225: TypeInt 16 1 + 226: TypeCooperativeMatrixKHR 225(int16_t) 8 9 9 112 + 227: TypePointer Function 226 + 231: TypeVector 7(int) 3 + 232: 7(int) Constant 64 + 233: 231(ivec3) ConstantComposite 232 83 83 + 234(mC): 154(ptr) Variable Private + 235(F): 17(float) SpecConstant 1077936128 + 236: TypeCooperativeMatrixKHR 17(float) 8 130(Z) 9 10 + 237: 236 ConstantComposite 36 + 238:6(float16_t) Constant 15360 + 239: 11 ConstantComposite 238 + 240(S): TypeStruct 58(int) 58(int) 58(int) + 241: 58(int) Constant 12 + 242: 58(int) Constant 23 + 243: 58(int) Constant 34 + 244: 240(S) ConstantComposite 241 242 243 + 245(SC): 58(int) SpecConstant 1 + 246: TypeCooperativeMatrixKHR 6(float16_t) 8 245(SC) 245(SC) 10 + 247: TypeArray 246 245(SC) + 248: TypeArray 247 245(SC) + 249: TypePointer Private 248 + 250(scm): 249(ptr) Variable Private + 4(main): 2 Function None 3 + 5: Label + 35(m): 34(ptr) Variable Function + 53(m2): 52(ptr) Variable Function + 57(x): 56(ptr) Variable Function + 65(tempArg): 34(ptr) Variable Function + 81(tempArg): 52(ptr) Variable Function + 98(tempArg): 34(ptr) Variable Function + 111(D): 34(ptr) Variable Function + 115(A): 114(ptr) Variable Function + 119(B): 118(ptr) Variable Function + 121(C): 34(ptr) Variable Function + 125(l): 124(ptr) Variable Function + 133(F): 132(ptr) Variable Function + 138(a): 137(ptr) Variable Function + 142(md1): 56(ptr) Variable Function + 158(tempArg): 34(ptr) Variable Function + 164(tempArg): 52(ptr) Variable Function + 170(p1): 12(ptr) Variable Function + 171(param): 12(ptr) Variable Function + 174(p2): 19(ptr) Variable Function + 175(param): 19(ptr) Variable Function + 189(tempArg): 52(ptr) Variable Function + 198(ms): 52(ptr) Variable Function + 205(ms8A): 204(ptr) Variable Function + 209(ms8B): 208(ptr) Variable Function + 213(ms8C): 212(ptr) Variable Function + 228(m16): 227(ptr) Variable Function + Store 35(m) 37 + 38: 33 Load 35(m) + 39: 33 Load 35(m) + 40: 33 FAdd 38 39 + Store 35(m) 40 + 41: 33 Load 35(m) + 42: 33 Load 35(m) + 43: 33 FSub 41 42 + Store 35(m) 43 + 44: 33 Load 35(m) + 45: 33 FNegate 44 + Store 35(m) 45 + 47: 33 Load 35(m) + 48: 33 MatrixTimesScalar 47 46 + Store 35(m) 48 + 49: 33 Load 35(m) + 50: 33 MatrixTimesScalar 49 46 + Store 35(m) 50 + 54: 33 Load 35(m) + 55: 51 FConvert 54 + Store 53(m2) 55 + 60: 56(ptr) AccessChain 35(m) 59 + 61: 17(float) Load 60 + Store 57(x) 61 + 63: 17(float) Load 57(x) + 64: 56(ptr) AccessChain 35(m) 62 + Store 64 63 + 74: 73(ptr) AccessChain 71(block) 59 32 + 77: 33 CooperativeMatrixLoadKHR 74 76 75 MakePointerVisibleKHR NonPrivatePointerKHR 72 + Store 65(tempArg) 77 + 78: 33 Load 65(tempArg) + Store 35(m) 78 + 79: 33 Load 35(m) + 80: 73(ptr) AccessChain 71(block) 59 32 + CooperativeMatrixStoreKHR 80 79 76 75 MakePointerAvailableKHR NonPrivatePointerKHR 72 + 93: 92(ptr) AccessChain 91(block16) 59 32 + 94: 51 CooperativeMatrixLoadKHR 93 76 75 MakePointerVisibleKHR NonPrivatePointerKHR 72 + Store 81(tempArg) 94 + 95: 51 Load 81(tempArg) + Store 53(m2) 95 + 96: 51 Load 53(m2) + 97: 92(ptr) AccessChain 91(block16) 59 32 + CooperativeMatrixStoreKHR 97 96 76 75 MakePointerAvailableKHR NonPrivatePointerKHR 72 + 101: 100(ptr) AccessChain 91(block16) 99 + 102: 85(ptr) Load 101 MakePointerVisibleKHR NonPrivatePointerKHR 72 + 104: 103(ptr) AccessChain 102 59 32 + 105: 33 CooperativeMatrixLoadKHR 104 76 75 Aligned MakePointerVisibleKHR NonPrivatePointerKHR 16 72 + Store 98(tempArg) 105 + 106: 33 Load 98(tempArg) + Store 35(m) 106 + 107: 33 Load 35(m) + 108: 100(ptr) AccessChain 91(block16) 99 + 109: 85(ptr) Load 108 MakePointerVisibleKHR NonPrivatePointerKHR 72 + 110: 103(ptr) AccessChain 109 59 32 + CooperativeMatrixStoreKHR 110 107 76 75 Aligned MakePointerAvailableKHR NonPrivatePointerKHR 16 72 + 116: 113 Load 115(A) + 120: 117 Load 119(B) + 122: 33 Load 121(C) + 123: 33 CooperativeMatrixMulAddKHR 116 120 122 + Store 111(D) 123 + 126: 7(int) CooperativeMatrixLengthKHR 33 + 127: 58(int) Bitcast 126 + Store 125(l) 127 + Store 133(F) 135 + 141: 56(ptr) AccessChain 138(a) 139 62 + Store 141 140 + Store 142(md1) 36 + 143: 33 Load 35(m) + 144: 33 Load 35(m) + 145: 33 FAdd 144 143 + Store 35(m) 145 + 147: 17(float) CompositeExtract 145 1234 + 148: 17(float) Load 142(md1) + 149: 17(float) FAdd 148 147 + Store 142(md1) 149 + 155: 154(ptr) AccessChain 153(mC2) 99 + 156: 150 Load 155 + 157: 154(ptr) AccessChain 153(mC2) 59 + Store 157 156 + 159: 73(ptr) AccessChain 71(block) 62 32 + 160: 33 CooperativeMatrixLoadKHR 159 76 75 MakePointerVisibleKHR NonPrivatePointerKHR 72 + Store 158(tempArg) 160 + 161: 33 Load 158(tempArg) + Store 35(m) 161 + 162: 33 Load 35(m) + 163: 73(ptr) AccessChain 71(block) 62 32 + CooperativeMatrixStoreKHR 163 162 76 75 MakePointerAvailableKHR NonPrivatePointerKHR 72 + 165: 92(ptr) AccessChain 91(block16) 62 32 + 166: 51 CooperativeMatrixLoadKHR 165 76 75 MakePointerVisibleKHR NonPrivatePointerKHR 72 + Store 164(tempArg) 166 + 167: 51 Load 164(tempArg) + Store 53(m2) 167 + 168: 51 Load 53(m2) + 169: 92(ptr) AccessChain 91(block16) 62 32 + CooperativeMatrixStoreKHR 169 168 76 75 MakePointerAvailableKHR NonPrivatePointerKHR 72 + 172: 11 Load 170(p1) + Store 171(param) 172 + 173: 11 FunctionCall 15(f16(f161;) 171(param) + Store 170(p1) 173 + 176: 18 Load 174(p2) + Store 175(param) 176 + 177: 18 FunctionCall 22(f32(f1;) 175(param) + Store 174(p2) 177 + Store 170(p1) 178 + Store 174(p2) 179 + 180: 11 Load 170(p1) + 181: 11 Load 170(p1) + 182: 11 FDiv 181 180 + Store 170(p1) 182 + 184: 11 Load 170(p1) + 185: 11 MatrixTimesScalar 184 183 + Store 170(p1) 185 + 187: 18 Load 174(p2) + 188: 18 MatrixTimesScalar 187 186 + Store 174(p2) 188 + 196: 195(ptr) AccessChain 194(shmatrix) 83 + 197: 51 CooperativeMatrixLoadKHR 196 76 10 MakePointerVisibleKHR NonPrivatePointerKHR 10 + Store 189(tempArg) 197 + 199: 51 Load 189(tempArg) + Store 198(ms) 199 + 200: 51 Load 198(ms) + 201: 195(ptr) AccessChain 194(shmatrix) 83 + CooperativeMatrixStoreKHR 201 200 76 10 MakePointerAvailableKHR NonPrivatePointerKHR 10 + 206: 203 Load 205(ms8A) + 210: 207 Load 209(ms8B) + 214: 211 Load 213(ms8C) + 215: 211 CooperativeMatrixMulAddKHR 206 210 214 ASignedComponentsKHR BSignedComponentsKHR CSignedComponentsKHR ResultSignedComponentsKHR + 216: 203 Load 205(ms8A) + 217: 207 Load 209(ms8B) + 218: 211 Load 213(ms8C) + 219: 211 CooperativeMatrixMulAddKHR 216 217 218 ASignedComponentsKHR BSignedComponentsKHR CSignedComponentsKHR ResultSignedComponentsKHR + 220: 203 Load 205(ms8A) + 221: 207 Load 209(ms8B) + 222: 211 Load 213(ms8C) + 224: 211 CooperativeMatrixMulAddKHR 220 221 222 ASignedComponentsKHR BSignedComponentsKHR CSignedComponentsKHR ResultSignedComponentsKHR SaturatingAccumulationKHR + 229: 226 Load 228(m16) + 230: 195(ptr) AccessChain 194(shmatrix) 83 + CooperativeMatrixStoreKHR 230 229 76 10 MakePointerAvailableKHR NonPrivatePointerKHR 10 + Return + FunctionEnd + 15(f16(f161;): 11 Function None 13 + 14(m): 12(ptr) FunctionParameter + 16: Label + 24: 11 Load 14(m) + 25: 11 FNegate 24 + ReturnValue 25 + FunctionEnd + 22(f32(f1;): 18 Function None 20 + 21(m): 19(ptr) FunctionParameter + 23: Label + 28: 18 Load 21(m) + 29: 18 FNegate 28 + ReturnValue 29 + FunctionEnd diff --git a/Test/spv.coopmat_armlayout.comp b/Test/spv.coopmat_armlayout.comp new file mode 100644 index 000000000..b63ea5e7c --- /dev/null +++ b/Test/spv.coopmat_armlayout.comp @@ -0,0 +1,121 @@ +#version 450 core +#extension GL_KHR_memory_scope_semantics : enable +#extension GL_KHR_cooperative_matrix : enable +#extension GL_EXT_shader_explicit_arithmetic_types : enable +#extension GL_EXT_buffer_reference : enable + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +const int X = 8; +layout(constant_id = 0) const int Y = 2; +const int Z = X*Y; + +coopmat mC; +coopmat mC2[3]; + +layout(constant_id = 1) const float F = 3.0; + +const coopmat mD = coopmat(0.0); +const coopmat mD2 = coopmat(1); + +struct S { int a; int b; int c; }; + +const S s = S(12, 23, 34); + +layout(set = 0, binding = 0, buffer_reference) coherent buffer Block { + float y[1024*1024]; + float x[]; +} block; + +layout(set = 0, binding = 0) coherent buffer Block16 { + float16_t y[1024*1024]; + float16_t x[]; + + Block b; +} block16; + +coopmat f16(coopmat m) { return -m; } +coopmat f32(coopmat m) { return -m; } + +layout(constant_id = 2) const int SC = 1; +coopmat scm[SC][SC]; + +// sized for coopmat +shared uvec4 shmatrix[16*16*2/16]; + +void main() +{ + coopmat1?8:4), gl_MatrixUseAccumulator> m = coopmat1?8:4), gl_MatrixUseAccumulator>(0.0); + + m = m + m; + m = m - m; + m = -m; + m = 2.0*m; + m = m*2.0; + + coopmat m2 = coopmat(m); + + float x = m[1]; + m[0] = x; + + coopMatLoad(m, block.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + coopMatStore(m, block.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + coopMatLoad(m2, block16.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + coopMatStore(m2, block16.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + coopMatLoad(m, block16.b.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + coopMatStore(m, block16.b.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + + coopmat A; + coopmat B; + coopmat C; + coopmat D; + D = coopMatMulAdd(A, B, C); + + int l = D.length(); + + coopmat E; + + coopmat F = coopmat(0.0); + + coopmat1?8:4), gl_MatrixUseAccumulator> a[5]; + a[3][0] = 1.0; + + float md1 = mD[1]; + + md1 += (m += m)[1234]; + + mC2[1] = mC2[2]; + + coopMatLoad(m, block.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + coopMatStore(m, block.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + coopMatLoad(m2, block16.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + coopMatStore(m2, block16.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + + coopmat p1; + coopmat p2; + + p1 = f16(p1); + p2 = f32(p2); + + p1 = coopmat(0.0); + p2 = coopmat(0.0); + + p1 /= p1; + + p1 *= float16_t(2.0); + p2 *= 4.0; + + coopmat ms; + coopMatLoad(ms, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + coopMatStore(ms, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); + + coopmat ms8A; + coopmat ms8B; + coopmat ms8C; + coopMatMulAdd(ms8A, ms8B, ms8C); + coopMatMulAdd(ms8A, ms8B, ms8C, 0); + coopMatMulAdd(ms8A, ms8B, ms8C, gl_MatrixOperandsSaturatingAccumulation); + + coopmat m16; + coopMatStore(m16, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM); +} diff --git a/glslang/MachineIndependent/Initialize.cpp b/glslang/MachineIndependent/Initialize.cpp index a6ec68979..d8a969d77 100755 --- a/glslang/MachineIndependent/Initialize.cpp +++ b/glslang/MachineIndependent/Initialize.cpp @@ -4558,6 +4558,8 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV "const int gl_MatrixOperandsSaturatingAccumulation = 0x10;\n" "const int gl_CooperativeMatrixLayoutRowMajor = 0;\n" "const int gl_CooperativeMatrixLayoutColumnMajor = 1;\n" + "const int gl_CooperativeMatrixLayoutRowBlockedInterleavedARM = 4202;\n" + "const int gl_CooperativeMatrixLayoutColumnBlockedInterleavedARM = 4203;\n" "\n" ); } diff --git a/gtests/Spv.FromFile.cpp b/gtests/Spv.FromFile.cpp index b12799320..4ea2485ce 100644 --- a/gtests/Spv.FromFile.cpp +++ b/gtests/Spv.FromFile.cpp @@ -367,6 +367,7 @@ INSTANTIATE_TEST_SUITE_P( "spv.coopmat.comp", "spv.coopmat_Error.comp", "spv.coopmatKHR.comp", + "spv.coopmat_armlayout.comp", "spv.coopmatKHR_arithmetic.comp", "spv.coopmatKHR_arithmeticError.comp", "spv.coopmatKHR_Error.comp",