Add support for the ARM extended matrix layout

This commit is contained in:
Pedro Olsen Ferreira 2024-06-07 12:48:05 +01:00 committed by arcady-lunarg
parent 68821c4da8
commit 02263efcd6
8 changed files with 548 additions and 0 deletions

View File

@ -41,5 +41,6 @@ static const char* const E_SPV_EXT_shader_atomic_float_min_max = "SPV_EXT_shader
static const char* const E_SPV_EXT_shader_image_int64 = "SPV_EXT_shader_image_int64";
static const char* const E_SPV_EXT_shader_tile_image = "SPV_EXT_shader_tile_image";
static const char* const E_SPV_EXT_mesh_shader = "SPV_EXT_mesh_shader";
static const char* const E_SPV_ARM_cooperative_matrix_layouts = "SPV_ARM_cooperative_matrix_layouts";
#endif // #ifndef GLSLextEXT_H

View File

@ -3705,6 +3705,12 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
idImmOps.push_back(spv::IdImmediate(true, operands[1])); // buf
if (node->getOp() == glslang::EOpCooperativeMatrixLoad) {
idImmOps.push_back(spv::IdImmediate(true, operands[3])); // matrixLayout
auto layout = builder.getConstantScalar(operands[3]);
if (layout == spv::CooperativeMatrixLayoutRowBlockedInterleavedARM ||
layout == spv::CooperativeMatrixLayoutColumnBlockedInterleavedARM) {
builder.addExtension(spv::E_SPV_ARM_cooperative_matrix_layouts);
builder.addCapability(spv::CapabilityCooperativeMatrixLayoutsARM);
}
idImmOps.push_back(spv::IdImmediate(true, operands[2])); // stride
} else {
idImmOps.push_back(spv::IdImmediate(true, operands[2])); // stride
@ -3729,6 +3735,12 @@ bool TGlslangToSpvTraverser::visitAggregate(glslang::TVisit visit, glslang::TInt
idImmOps.push_back(spv::IdImmediate(true, operands[0])); // object
if (node->getOp() == glslang::EOpCooperativeMatrixStore) {
idImmOps.push_back(spv::IdImmediate(true, operands[3])); // matrixLayout
auto layout = builder.getConstantScalar(operands[3]);
if (layout == spv::CooperativeMatrixLayoutRowBlockedInterleavedARM ||
layout == spv::CooperativeMatrixLayoutColumnBlockedInterleavedARM) {
builder.addExtension(spv::E_SPV_ARM_cooperative_matrix_layouts);
builder.addCapability(spv::CapabilityCooperativeMatrixLayoutsARM);
}
idImmOps.push_back(spv::IdImmediate(true, operands[2])); // stride
} else {
idImmOps.push_back(spv::IdImmediate(true, operands[2])); // stride

View File

@ -1035,6 +1035,8 @@ const char* CapabilityString(int info)
case CapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
case CapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
case CapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
case CapabilityDemoteToHelperInvocationEXT: return "DemoteToHelperInvocationEXT";

View File

@ -1002,6 +1002,7 @@ enum Capability {
CapabilityTileImageColorReadAccessEXT = 4166,
CapabilityTileImageDepthReadAccessEXT = 4167,
CapabilityTileImageStencilReadAccessEXT = 4168,
CapabilityCooperativeMatrixLayoutsARM = 4201,
CapabilityFragmentShadingRateKHR = 4422,
CapabilitySubgroupBallotKHR = 4423,
CapabilityDrawParameters = 4427,
@ -1302,6 +1303,8 @@ enum CooperativeMatrixOperandsMask {
enum CooperativeMatrixLayout {
CooperativeMatrixLayoutRowMajorKHR = 0,
CooperativeMatrixLayoutColumnMajorKHR = 1,
CooperativeMatrixLayoutRowBlockedInterleavedARM = 4202,
CooperativeMatrixLayoutColumnBlockedInterleavedARM = 4203,
CooperativeMatrixLayoutMax = 0x7fffffff,
};

View File

@ -0,0 +1,406 @@
spv.coopmat_armlayout.comp
Validation failed
// Module Version 10000
// Generated by (magic number): 8000b
// Id's are bound by 251
Capability Shader
Capability Float16
Capability Int16
Capability Int8
Capability CooperativeMatrixLayoutsARM
Capability StorageUniformBufferBlock16
Capability VulkanMemoryModelKHR
Capability PhysicalStorageBufferAddressesEXT
Capability CooperativeMatrixKHR
Extension "SPV_ARM_cooperative_matrix_layouts"
Extension "SPV_KHR_16bit_storage"
Extension "SPV_KHR_cooperative_matrix"
Extension "SPV_KHR_physical_storage_buffer"
Extension "SPV_KHR_storage_buffer_storage_class"
Extension "SPV_KHR_vulkan_memory_model"
1: ExtInstImport "GLSL.std.450"
MemoryModel PhysicalStorageBuffer64EXT VulkanKHR
EntryPoint GLCompute 4 "main"
ExecutionMode 4 LocalSize 64 1 1
Source GLSL 450
SourceExtension "GL_EXT_buffer_reference"
SourceExtension "GL_EXT_shader_explicit_arithmetic_types"
SourceExtension "GL_KHR_cooperative_matrix"
SourceExtension "GL_KHR_memory_scope_semantics"
Name 4 "main"
Name 15 "f16(f161;"
Name 14 "m"
Name 22 "f32(f1;"
Name 21 "m"
Name 35 "m"
Name 53 "m2"
Name 57 "x"
Name 65 "tempArg"
Name 69 "Block"
MemberName 69(Block) 0 "y"
MemberName 69(Block) 1 "x"
Name 71 "block"
Name 81 "tempArg"
Name 86 "Block16"
MemberName 86(Block16) 0 "y"
MemberName 86(Block16) 1 "x"
MemberName 86(Block16) 2 "b"
Name 89 "Block"
MemberName 89(Block) 0 "y"
MemberName 89(Block) 1 "x"
Name 91 "block16"
Name 98 "tempArg"
Name 111 "D"
Name 115 "A"
Name 119 "B"
Name 121 "C"
Name 125 "l"
Name 129 "Y"
Name 130 "Z"
Name 133 "F"
Name 138 "a"
Name 142 "md1"
Name 153 "mC2"
Name 158 "tempArg"
Name 164 "tempArg"
Name 170 "p1"
Name 171 "param"
Name 174 "p2"
Name 175 "param"
Name 189 "tempArg"
Name 194 "shmatrix"
Name 198 "ms"
Name 205 "ms8A"
Name 209 "ms8B"
Name 213 "ms8C"
Name 228 "m16"
Name 234 "mC"
Name 235 "F"
Name 240 "S"
MemberName 240(S) 0 "a"
MemberName 240(S) 1 "b"
MemberName 240(S) 2 "c"
Name 245 "SC"
Name 250 "scm"
Decorate 67 ArrayStride 4
Decorate 68 ArrayStride 4
MemberDecorate 69(Block) 0 Offset 0
MemberDecorate 69(Block) 1 Offset 4194304
Decorate 69(Block) Block
Decorate 71(block) DescriptorSet 0
Decorate 71(block) Binding 0
Decorate 82 ArrayStride 2
Decorate 84 ArrayStride 2
MemberDecorate 86(Block16) 0 Offset 0
MemberDecorate 86(Block16) 1 Offset 2097152
MemberDecorate 86(Block16) 2 Offset 2097160
Decorate 86(Block16) Block
Decorate 87 ArrayStride 4
Decorate 88 ArrayStride 4
MemberDecorate 89(Block) 0 Offset 0
MemberDecorate 89(Block) 1 Offset 4194304
Decorate 89(Block) Block
Decorate 91(block16) DescriptorSet 0
Decorate 91(block16) Binding 0
Decorate 129(Y) SpecId 0
Decorate 233 BuiltIn WorkgroupSize
Decorate 235(F) SpecId 1
Decorate 245(SC) SpecId 2
2: TypeVoid
3: TypeFunction 2
6: TypeFloat 16
7: TypeInt 32 0
8: 7(int) Constant 3
9: 7(int) Constant 8
10: 7(int) Constant 2
11: TypeCooperativeMatrixKHR 6(float16_t) 8 9 9 10
12: TypePointer Function 11
13: TypeFunction 11 12(ptr)
17: TypeFloat 32
18: TypeCooperativeMatrixKHR 17(float) 8 9 9 10
19: TypePointer Function 18
20: TypeFunction 18 19(ptr)
32: 7(int) Constant 16
33: TypeCooperativeMatrixKHR 17(float) 8 32 9 10
34: TypePointer Function 33
36: 17(float) Constant 0
37: 33 ConstantComposite 36
46: 17(float) Constant 1073741824
51: TypeCooperativeMatrixKHR 6(float16_t) 8 32 9 10
52: TypePointer Function 51
56: TypePointer Function 17(float)
58: TypeInt 32 1
59: 58(int) Constant 1
62: 58(int) Constant 0
66: 7(int) Constant 1048576
67: TypeArray 17(float) 66
68: TypeRuntimeArray 17(float)
69(Block): TypeStruct 67 68
70: TypePointer StorageBuffer 69(Block)
71(block): 70(ptr) Variable StorageBuffer
72: 7(int) Constant 5
73: TypePointer StorageBuffer 17(float)
75: 7(int) Constant 128
76: 58(int) Constant 4202
82: TypeArray 6(float16_t) 66
83: 7(int) Constant 1
84: TypeArray 6(float16_t) 83
TypeForwardPointer 85 PhysicalStorageBufferEXT
86(Block16): TypeStruct 82 84 85
87: TypeArray 17(float) 66
88: TypeRuntimeArray 17(float)
89(Block): TypeStruct 87 88
85: TypePointer PhysicalStorageBufferEXT 89(Block)
90: TypePointer StorageBuffer 86(Block16)
91(block16): 90(ptr) Variable StorageBuffer
92: TypePointer StorageBuffer 6(float16_t)
99: 58(int) Constant 2
100: TypePointer StorageBuffer 85(ptr)
103: TypePointer PhysicalStorageBufferEXT 17(float)
112: 7(int) Constant 0
113: TypeCooperativeMatrixKHR 6(float16_t) 8 32 9 112
114: TypePointer Function 113
117: TypeCooperativeMatrixKHR 6(float16_t) 8 9 9 83
118: TypePointer Function 117
124: TypePointer Function 58(int)
128: 58(int) Constant 8
129(Y): 58(int) SpecConstant 2
130(Z): 58(int) SpecConstantOp 132 128 129(Y)
131: TypeCooperativeMatrixKHR 6(float16_t) 8 130(Z) 130(Z) 10
132: TypePointer Function 131
134:6(float16_t) Constant 0
135: 131 ConstantComposite 134
136: TypeArray 33 72
137: TypePointer Function 136
139: 58(int) Constant 3
140: 17(float) Constant 1065353216
146: 58(int) Constant 1234
150: TypeCooperativeMatrixKHR 6(float16_t) 8 130(Z) 9 10
151: TypeArray 150 8
152: TypePointer Private 151
153(mC2): 152(ptr) Variable Private
154: TypePointer Private 150
178: 11 ConstantComposite 134
179: 18 ConstantComposite 36
183:6(float16_t) Constant 16384
186: 17(float) Constant 1082130432
190: TypeVector 7(int) 4
191: 7(int) Constant 32
192: TypeArray 190(ivec4) 191
193: TypePointer Workgroup 192
194(shmatrix): 193(ptr) Variable Workgroup
195: TypePointer Workgroup 190(ivec4)
202: TypeInt 8 1
203: TypeCooperativeMatrixKHR 202(int8_t) 8 9 9 112
204: TypePointer Function 203
207: TypeCooperativeMatrixKHR 202(int8_t) 8 9 9 83
208: TypePointer Function 207
211: TypeCooperativeMatrixKHR 202(int8_t) 8 9 9 10
212: TypePointer Function 211
223: 58(int) Constant 16
225: TypeInt 16 1
226: TypeCooperativeMatrixKHR 225(int16_t) 8 9 9 112
227: TypePointer Function 226
231: TypeVector 7(int) 3
232: 7(int) Constant 64
233: 231(ivec3) ConstantComposite 232 83 83
234(mC): 154(ptr) Variable Private
235(F): 17(float) SpecConstant 1077936128
236: TypeCooperativeMatrixKHR 17(float) 8 130(Z) 9 10
237: 236 ConstantComposite 36
238:6(float16_t) Constant 15360
239: 11 ConstantComposite 238
240(S): TypeStruct 58(int) 58(int) 58(int)
241: 58(int) Constant 12
242: 58(int) Constant 23
243: 58(int) Constant 34
244: 240(S) ConstantComposite 241 242 243
245(SC): 58(int) SpecConstant 1
246: TypeCooperativeMatrixKHR 6(float16_t) 8 245(SC) 245(SC) 10
247: TypeArray 246 245(SC)
248: TypeArray 247 245(SC)
249: TypePointer Private 248
250(scm): 249(ptr) Variable Private
4(main): 2 Function None 3
5: Label
35(m): 34(ptr) Variable Function
53(m2): 52(ptr) Variable Function
57(x): 56(ptr) Variable Function
65(tempArg): 34(ptr) Variable Function
81(tempArg): 52(ptr) Variable Function
98(tempArg): 34(ptr) Variable Function
111(D): 34(ptr) Variable Function
115(A): 114(ptr) Variable Function
119(B): 118(ptr) Variable Function
121(C): 34(ptr) Variable Function
125(l): 124(ptr) Variable Function
133(F): 132(ptr) Variable Function
138(a): 137(ptr) Variable Function
142(md1): 56(ptr) Variable Function
158(tempArg): 34(ptr) Variable Function
164(tempArg): 52(ptr) Variable Function
170(p1): 12(ptr) Variable Function
171(param): 12(ptr) Variable Function
174(p2): 19(ptr) Variable Function
175(param): 19(ptr) Variable Function
189(tempArg): 52(ptr) Variable Function
198(ms): 52(ptr) Variable Function
205(ms8A): 204(ptr) Variable Function
209(ms8B): 208(ptr) Variable Function
213(ms8C): 212(ptr) Variable Function
228(m16): 227(ptr) Variable Function
Store 35(m) 37
38: 33 Load 35(m)
39: 33 Load 35(m)
40: 33 FAdd 38 39
Store 35(m) 40
41: 33 Load 35(m)
42: 33 Load 35(m)
43: 33 FSub 41 42
Store 35(m) 43
44: 33 Load 35(m)
45: 33 FNegate 44
Store 35(m) 45
47: 33 Load 35(m)
48: 33 MatrixTimesScalar 47 46
Store 35(m) 48
49: 33 Load 35(m)
50: 33 MatrixTimesScalar 49 46
Store 35(m) 50
54: 33 Load 35(m)
55: 51 FConvert 54
Store 53(m2) 55
60: 56(ptr) AccessChain 35(m) 59
61: 17(float) Load 60
Store 57(x) 61
63: 17(float) Load 57(x)
64: 56(ptr) AccessChain 35(m) 62
Store 64 63
74: 73(ptr) AccessChain 71(block) 59 32
77: 33 CooperativeMatrixLoadKHR 74 76 75 MakePointerVisibleKHR NonPrivatePointerKHR 72
Store 65(tempArg) 77
78: 33 Load 65(tempArg)
Store 35(m) 78
79: 33 Load 35(m)
80: 73(ptr) AccessChain 71(block) 59 32
CooperativeMatrixStoreKHR 80 79 76 75 MakePointerAvailableKHR NonPrivatePointerKHR 72
93: 92(ptr) AccessChain 91(block16) 59 32
94: 51 CooperativeMatrixLoadKHR 93 76 75 MakePointerVisibleKHR NonPrivatePointerKHR 72
Store 81(tempArg) 94
95: 51 Load 81(tempArg)
Store 53(m2) 95
96: 51 Load 53(m2)
97: 92(ptr) AccessChain 91(block16) 59 32
CooperativeMatrixStoreKHR 97 96 76 75 MakePointerAvailableKHR NonPrivatePointerKHR 72
101: 100(ptr) AccessChain 91(block16) 99
102: 85(ptr) Load 101 MakePointerVisibleKHR NonPrivatePointerKHR 72
104: 103(ptr) AccessChain 102 59 32
105: 33 CooperativeMatrixLoadKHR 104 76 75 Aligned MakePointerVisibleKHR NonPrivatePointerKHR 16 72
Store 98(tempArg) 105
106: 33 Load 98(tempArg)
Store 35(m) 106
107: 33 Load 35(m)
108: 100(ptr) AccessChain 91(block16) 99
109: 85(ptr) Load 108 MakePointerVisibleKHR NonPrivatePointerKHR 72
110: 103(ptr) AccessChain 109 59 32
CooperativeMatrixStoreKHR 110 107 76 75 Aligned MakePointerAvailableKHR NonPrivatePointerKHR 16 72
116: 113 Load 115(A)
120: 117 Load 119(B)
122: 33 Load 121(C)
123: 33 CooperativeMatrixMulAddKHR 116 120 122
Store 111(D) 123
126: 7(int) CooperativeMatrixLengthKHR 33
127: 58(int) Bitcast 126
Store 125(l) 127
Store 133(F) 135
141: 56(ptr) AccessChain 138(a) 139 62
Store 141 140
Store 142(md1) 36
143: 33 Load 35(m)
144: 33 Load 35(m)
145: 33 FAdd 144 143
Store 35(m) 145
147: 17(float) CompositeExtract 145 1234
148: 17(float) Load 142(md1)
149: 17(float) FAdd 148 147
Store 142(md1) 149
155: 154(ptr) AccessChain 153(mC2) 99
156: 150 Load 155
157: 154(ptr) AccessChain 153(mC2) 59
Store 157 156
159: 73(ptr) AccessChain 71(block) 62 32
160: 33 CooperativeMatrixLoadKHR 159 76 75 MakePointerVisibleKHR NonPrivatePointerKHR 72
Store 158(tempArg) 160
161: 33 Load 158(tempArg)
Store 35(m) 161
162: 33 Load 35(m)
163: 73(ptr) AccessChain 71(block) 62 32
CooperativeMatrixStoreKHR 163 162 76 75 MakePointerAvailableKHR NonPrivatePointerKHR 72
165: 92(ptr) AccessChain 91(block16) 62 32
166: 51 CooperativeMatrixLoadKHR 165 76 75 MakePointerVisibleKHR NonPrivatePointerKHR 72
Store 164(tempArg) 166
167: 51 Load 164(tempArg)
Store 53(m2) 167
168: 51 Load 53(m2)
169: 92(ptr) AccessChain 91(block16) 62 32
CooperativeMatrixStoreKHR 169 168 76 75 MakePointerAvailableKHR NonPrivatePointerKHR 72
172: 11 Load 170(p1)
Store 171(param) 172
173: 11 FunctionCall 15(f16(f161;) 171(param)
Store 170(p1) 173
176: 18 Load 174(p2)
Store 175(param) 176
177: 18 FunctionCall 22(f32(f1;) 175(param)
Store 174(p2) 177
Store 170(p1) 178
Store 174(p2) 179
180: 11 Load 170(p1)
181: 11 Load 170(p1)
182: 11 FDiv 181 180
Store 170(p1) 182
184: 11 Load 170(p1)
185: 11 MatrixTimesScalar 184 183
Store 170(p1) 185
187: 18 Load 174(p2)
188: 18 MatrixTimesScalar 187 186
Store 174(p2) 188
196: 195(ptr) AccessChain 194(shmatrix) 83
197: 51 CooperativeMatrixLoadKHR 196 76 10 MakePointerVisibleKHR NonPrivatePointerKHR 10
Store 189(tempArg) 197
199: 51 Load 189(tempArg)
Store 198(ms) 199
200: 51 Load 198(ms)
201: 195(ptr) AccessChain 194(shmatrix) 83
CooperativeMatrixStoreKHR 201 200 76 10 MakePointerAvailableKHR NonPrivatePointerKHR 10
206: 203 Load 205(ms8A)
210: 207 Load 209(ms8B)
214: 211 Load 213(ms8C)
215: 211 CooperativeMatrixMulAddKHR 206 210 214 ASignedComponentsKHR BSignedComponentsKHR CSignedComponentsKHR ResultSignedComponentsKHR
216: 203 Load 205(ms8A)
217: 207 Load 209(ms8B)
218: 211 Load 213(ms8C)
219: 211 CooperativeMatrixMulAddKHR 216 217 218 ASignedComponentsKHR BSignedComponentsKHR CSignedComponentsKHR ResultSignedComponentsKHR
220: 203 Load 205(ms8A)
221: 207 Load 209(ms8B)
222: 211 Load 213(ms8C)
224: 211 CooperativeMatrixMulAddKHR 220 221 222 ASignedComponentsKHR BSignedComponentsKHR CSignedComponentsKHR ResultSignedComponentsKHR SaturatingAccumulationKHR
229: 226 Load 228(m16)
230: 195(ptr) AccessChain 194(shmatrix) 83
CooperativeMatrixStoreKHR 230 229 76 10 MakePointerAvailableKHR NonPrivatePointerKHR 10
Return
FunctionEnd
15(f16(f161;): 11 Function None 13
14(m): 12(ptr) FunctionParameter
16: Label
24: 11 Load 14(m)
25: 11 FNegate 24
ReturnValue 25
FunctionEnd
22(f32(f1;): 18 Function None 20
21(m): 19(ptr) FunctionParameter
23: Label
28: 18 Load 21(m)
29: 18 FNegate 28
ReturnValue 29
FunctionEnd

View File

@ -0,0 +1,121 @@
#version 450 core
#extension GL_KHR_memory_scope_semantics : enable
#extension GL_KHR_cooperative_matrix : enable
#extension GL_EXT_shader_explicit_arithmetic_types : enable
#extension GL_EXT_buffer_reference : enable
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
const int X = 8;
layout(constant_id = 0) const int Y = 2;
const int Z = X*Y;
coopmat<float16_t, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mC;
coopmat<float16_t, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mC2[3];
layout(constant_id = 1) const float F = 3.0;
const coopmat<float, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mD = coopmat<float, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator>(0.0);
const coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> mD2 = coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(1);
struct S { int a; int b; int c; };
const S s = S(12, 23, 34);
layout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
float y[1024*1024];
float x[];
} block;
layout(set = 0, binding = 0) coherent buffer Block16 {
float16_t y[1024*1024];
float16_t x[];
Block b;
} block16;
coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> f16(coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> m) { return -m; }
coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> f32(coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> m) { return -m; }
layout(constant_id = 2) const int SC = 1;
coopmat<float16_t, gl_ScopeSubgroup, SC, SC, gl_MatrixUseAccumulator> scm[SC][SC];
// sized for coopmat<float16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseAccumulator>
shared uvec4 shmatrix[16*16*2/16];
void main()
{
coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator> m = coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator>(0.0);
m = m + m;
m = m - m;
m = -m;
m = 2.0*m;
m = m*2.0;
coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> m2 = coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator>(m);
float x = m[1];
m[0] = x;
coopMatLoad(m, block.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopMatStore(m, block.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopMatLoad(m2, block16.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopMatStore(m2, block16.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopMatLoad(m, block16.b.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopMatStore(m, block16.b.x, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseA> A;
coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseB> B;
coopmat<float, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> C;
coopmat<float, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> D;
D = coopMatMulAdd(A, B, C);
int l = D.length();
coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> E;
coopmat<float16_t, gl_ScopeSubgroup, Z, Z, gl_MatrixUseAccumulator> F = coopmat<float16_t, gl_ScopeSubgroup, Z, Z, gl_MatrixUseAccumulator>(0.0);
coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator> a[5];
a[3][0] = 1.0;
float md1 = mD[1];
md1 += (m += m)[1234];
mC2[1] = mC2[2];
coopMatLoad(m, block.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopMatStore(m, block.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopMatLoad(m2, block16.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopMatStore(m2, block16.y, 16, 128, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> p1;
coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> p2;
p1 = f16(p1);
p2 = f32(p2);
p1 = coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(0.0);
p2 = coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(0.0);
p1 /= p1;
p1 *= float16_t(2.0);
p2 *= 4.0;
coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> ms;
coopMatLoad(ms, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopMatStore(ms, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseA> ms8A;
coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseB> ms8B;
coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> ms8C;
coopMatMulAdd(ms8A, ms8B, ms8C);
coopMatMulAdd(ms8A, ms8B, ms8C, 0);
coopMatMulAdd(ms8A, ms8B, ms8C, gl_MatrixOperandsSaturatingAccumulation);
coopmat<int16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseA> m16;
coopMatStore(m16, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowBlockedInterleavedARM);
}

View File

@ -4558,6 +4558,8 @@ void TBuiltIns::initialize(int version, EProfile profile, const SpvVersion& spvV
"const int gl_MatrixOperandsSaturatingAccumulation = 0x10;\n"
"const int gl_CooperativeMatrixLayoutRowMajor = 0;\n"
"const int gl_CooperativeMatrixLayoutColumnMajor = 1;\n"
"const int gl_CooperativeMatrixLayoutRowBlockedInterleavedARM = 4202;\n"
"const int gl_CooperativeMatrixLayoutColumnBlockedInterleavedARM = 4203;\n"
"\n"
);
}

View File

@ -367,6 +367,7 @@ INSTANTIATE_TEST_SUITE_P(
"spv.coopmat.comp",
"spv.coopmat_Error.comp",
"spv.coopmatKHR.comp",
"spv.coopmat_armlayout.comp",
"spv.coopmatKHR_arithmetic.comp",
"spv.coopmatKHR_arithmeticError.comp",
"spv.coopmatKHR_Error.comp",