Merge pull request #1255 from KhronosGroup/fix-1254

HLSL: Support reading and writing complex composites from/to ByteAddressBuffers
2020-01-08 15:59:44 +01:00 · 2020-01-08 15:59:44 +01:00 · 34ba8ea4f2
commit 34ba8ea4f2
parent b522b409ae c256525c7b
10 changed files with 784 additions and 86 deletions
--- a/reference/opt/shaders-hlsl/comp/access-chain-load-composite.comp
+++ b/reference/opt/shaders-hlsl/comp/access-chain-load-composite.comp
@ -0,0 +1,108 @@
+struct Baz
+{
+    float c;
+};
+
+struct Bar
+{
+    float d[2][4];
+    Baz baz[2];
+};
+
+struct Foo
+{
+    column_major float2x2 a;
+    float2 b;
+    Bar c[5];
+};
+
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _31 : register(u0);
+
+void comp_main()
+{
+    Foo _36;
+    _36.a = asfloat(uint2x2(_31.Load(0), _31.Load(8), _31.Load(4), _31.Load(12)));
+    _36.b = asfloat(_31.Load2(16));
+    [unroll]
+    for (int _4ident = 0; _4ident < 5; _4ident++)
+    {
+        [unroll]
+        for (int _5ident = 0; _5ident < 2; _5ident++)
+        {
+            [unroll]
+            for (int _6ident = 0; _6ident < 4; _6ident++)
+            {
+                _36.c[_4ident].d[_5ident][_6ident] = asfloat(_31.Load(_6ident * 4 + _5ident * 16 + _4ident * 40 + 24));
+            }
+        }
+        [unroll]
+        for (int _7ident = 0; _7ident < 2; _7ident++)
+        {
+            _36.c[_4ident].baz[_7ident].c = asfloat(_31.Load(_7ident * 4 + _4ident * 40 + 56));
+        }
+    }
+    float2x2 _234 = float2x2(_36.a[0] + 1.0f.xx, _36.a[1] + 1.0f.xx);
+    _31.Store(224, asuint(_234[0].x));
+    _31.Store(228, asuint(_234[1].x));
+    _31.Store(232, asuint(_234[0].y));
+    _31.Store(236, asuint(_234[1].y));
+    _31.Store2(240, asuint(_36.b + 2.0f.xx));
+    _31.Store(248, asuint(_36.c[0].d[0][0]));
+    _31.Store(252, asuint(_36.c[0].d[0][1]));
+    _31.Store(256, asuint(_36.c[0].d[0][2]));
+    _31.Store(260, asuint(_36.c[0].d[0][3]));
+    _31.Store(264, asuint(_36.c[0].d[1][0]));
+    _31.Store(268, asuint(_36.c[0].d[1][1]));
+    _31.Store(272, asuint(_36.c[0].d[1][2]));
+    _31.Store(276, asuint(_36.c[0].d[1][3]));
+    _31.Store(280, asuint(_36.c[0].baz[0].c));
+    _31.Store(284, asuint(_36.c[0].baz[1].c));
+    _31.Store(288, asuint(_36.c[1].d[0][0]));
+    _31.Store(292, asuint(_36.c[1].d[0][1]));
+    _31.Store(296, asuint(_36.c[1].d[0][2]));
+    _31.Store(300, asuint(_36.c[1].d[0][3]));
+    _31.Store(304, asuint(_36.c[1].d[1][0]));
+    _31.Store(308, asuint(_36.c[1].d[1][1]));
+    _31.Store(312, asuint(_36.c[1].d[1][2]));
+    _31.Store(316, asuint(_36.c[1].d[1][3]));
+    _31.Store(320, asuint(_36.c[1].baz[0].c));
+    _31.Store(324, asuint(_36.c[1].baz[1].c));
+    _31.Store(328, asuint(_36.c[2].d[0][0]));
+    _31.Store(332, asuint(_36.c[2].d[0][1]));
+    _31.Store(336, asuint(_36.c[2].d[0][2]));
+    _31.Store(340, asuint(_36.c[2].d[0][3]));
+    _31.Store(344, asuint(_36.c[2].d[1][0]));
+    _31.Store(348, asuint(_36.c[2].d[1][1]));
+    _31.Store(352, asuint(_36.c[2].d[1][2]));
+    _31.Store(356, asuint(_36.c[2].d[1][3]));
+    _31.Store(360, asuint(_36.c[2].baz[0].c));
+    _31.Store(364, asuint(_36.c[2].baz[1].c));
+    _31.Store(368, asuint(_36.c[3].d[0][0]));
+    _31.Store(372, asuint(_36.c[3].d[0][1]));
+    _31.Store(376, asuint(_36.c[3].d[0][2]));
+    _31.Store(380, asuint(_36.c[3].d[0][3]));
+    _31.Store(384, asuint(_36.c[3].d[1][0]));
+    _31.Store(388, asuint(_36.c[3].d[1][1] + 5.0f));
+    _31.Store(392, asuint(_36.c[3].d[1][2]));
+    _31.Store(396, asuint(_36.c[3].d[1][3]));
+    _31.Store(400, asuint(_36.c[3].baz[0].c));
+    _31.Store(404, asuint(_36.c[3].baz[1].c));
+    _31.Store(408, asuint(_36.c[4].d[0][0]));
+    _31.Store(412, asuint(_36.c[4].d[0][1]));
+    _31.Store(416, asuint(_36.c[4].d[0][2]));
+    _31.Store(420, asuint(_36.c[4].d[0][3]));
+    _31.Store(424, asuint(_36.c[4].d[1][0]));
+    _31.Store(428, asuint(_36.c[4].d[1][1]));
+    _31.Store(432, asuint(_36.c[4].d[1][2]));
+    _31.Store(436, asuint(_36.c[4].d[1][3]));
+    _31.Store(440, asuint(_36.c[4].baz[0].c));
+    _31.Store(444, asuint(_36.c[4].baz[1].c));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
--- a/reference/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp
+++ b/reference/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp
@ -0,0 +1,75 @@
+struct Baz
+{
+    float c;
+};
+
+struct Bar
+{
+    float d[2][4];
+    Baz baz[2];
+};
+
+struct Foo
+{
+    column_major float2x2 a;
+    float2 b;
+    Bar c[5];
+};
+
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _10 : register(u0);
+
+void comp_main()
+{
+    Foo _64;
+    _64.a = asfloat(uint2x2(_10.Load(0), _10.Load(8), _10.Load(4), _10.Load(12)));
+    _64.b = asfloat(_10.Load2(16));
+    [unroll]
+    for (int _0ident = 0; _0ident < 5; _0ident++)
+    {
+        [unroll]
+        for (int _1ident = 0; _1ident < 2; _1ident++)
+        {
+            [unroll]
+            for (int _2ident = 0; _2ident < 4; _2ident++)
+            {
+                _64.c[_0ident].d[_1ident][_2ident] = asfloat(_10.Load(_2ident * 4 + _1ident * 16 + _0ident * 40 + 24));
+            }
+        }
+        [unroll]
+        for (int _3ident = 0; _3ident < 2; _3ident++)
+        {
+            _64.c[_0ident].baz[_3ident].c = asfloat(_10.Load(_3ident * 4 + _0ident * 40 + 56));
+        }
+    }
+    _10.Store(224, asuint(_64.a[0].x));
+    _10.Store(228, asuint(_64.a[1].x));
+    _10.Store(232, asuint(_64.a[0].y));
+    _10.Store(236, asuint(_64.a[1].y));
+    _10.Store2(240, asuint(_64.b));
+    [unroll]
+    for (int _4ident = 0; _4ident < 5; _4ident++)
+    {
+        [unroll]
+        for (int _5ident = 0; _5ident < 2; _5ident++)
+        {
+            [unroll]
+            for (int _6ident = 0; _6ident < 4; _6ident++)
+            {
+                _10.Store(_6ident * 4 + _5ident * 16 + _4ident * 40 + 248, asuint(_64.c[_4ident].d[_5ident][_6ident]));
+            }
+        }
+        [unroll]
+        for (int _7ident = 0; _7ident < 2; _7ident++)
+        {
+            _10.Store(_7ident * 4 + _4ident * 40 + 280, asuint(_64.c[_4ident].baz[_7ident].c));
+        }
+    }
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
--- a/reference/shaders-hlsl/comp/access-chain-load-composite.comp
+++ b/reference/shaders-hlsl/comp/access-chain-load-composite.comp
@ -0,0 +1,164 @@
+struct Baz
+{
+    float c;
+};
+
+struct Bar
+{
+    float d[2][4];
+    Baz baz[2];
+};
+
+struct Foo
+{
+    column_major float2x2 a;
+    float2 b;
+    Bar c[5];
+};
+
+static const uint3 gl_WorkGroupSize = uint3(1u, 1u, 1u);
+
+RWByteAddressBuffer _31 : register(u0);
+
+void comp_main()
+{
+    Foo _36;
+    _36.a = asfloat(uint2x2(_31.Load(0), _31.Load(8), _31.Load(4), _31.Load(12)));
+    _36.b = asfloat(_31.Load2(16));
+    [unroll]
+    for (int _4ident = 0; _4ident < 5; _4ident++)
+    {
+        [unroll]
+        for (int _5ident = 0; _5ident < 2; _5ident++)
+        {
+            [unroll]
+            for (int _6ident = 0; _6ident < 4; _6ident++)
+            {
+                _36.c[_4ident].d[_5ident][_6ident] = asfloat(_31.Load(_6ident * 4 + _5ident * 16 + _4ident * 40 + 24));
+            }
+        }
+        [unroll]
+        for (int _7ident = 0; _7ident < 2; _7ident++)
+        {
+            _36.c[_4ident].baz[_7ident].c = asfloat(_31.Load(_7ident * 4 + _4ident * 40 + 56));
+        }
+    }
+    Foo f;
+    f.a = _36.a;
+    f.b = _36.b;
+    f.c[0].d[0][0] = _36.c[0].d[0][0];
+    f.c[0].d[0][1] = _36.c[0].d[0][1];
+    f.c[0].d[0][2] = _36.c[0].d[0][2];
+    f.c[0].d[0][3] = _36.c[0].d[0][3];
+    f.c[0].d[1][0] = _36.c[0].d[1][0];
+    f.c[0].d[1][1] = _36.c[0].d[1][1];
+    f.c[0].d[1][2] = _36.c[0].d[1][2];
+    f.c[0].d[1][3] = _36.c[0].d[1][3];
+    f.c[0].baz[0].c = _36.c[0].baz[0].c;
+    f.c[0].baz[1].c = _36.c[0].baz[1].c;
+    f.c[1].d[0][0] = _36.c[1].d[0][0];
+    f.c[1].d[0][1] = _36.c[1].d[0][1];
+    f.c[1].d[0][2] = _36.c[1].d[0][2];
+    f.c[1].d[0][3] = _36.c[1].d[0][3];
+    f.c[1].d[1][0] = _36.c[1].d[1][0];
+    f.c[1].d[1][1] = _36.c[1].d[1][1];
+    f.c[1].d[1][2] = _36.c[1].d[1][2];
+    f.c[1].d[1][3] = _36.c[1].d[1][3];
+    f.c[1].baz[0].c = _36.c[1].baz[0].c;
+    f.c[1].baz[1].c = _36.c[1].baz[1].c;
+    f.c[2].d[0][0] = _36.c[2].d[0][0];
+    f.c[2].d[0][1] = _36.c[2].d[0][1];
+    f.c[2].d[0][2] = _36.c[2].d[0][2];
+    f.c[2].d[0][3] = _36.c[2].d[0][3];
+    f.c[2].d[1][0] = _36.c[2].d[1][0];
+    f.c[2].d[1][1] = _36.c[2].d[1][1];
+    f.c[2].d[1][2] = _36.c[2].d[1][2];
+    f.c[2].d[1][3] = _36.c[2].d[1][3];
+    f.c[2].baz[0].c = _36.c[2].baz[0].c;
+    f.c[2].baz[1].c = _36.c[2].baz[1].c;
+    f.c[3].d[0][0] = _36.c[3].d[0][0];
+    f.c[3].d[0][1] = _36.c[3].d[0][1];
+    f.c[3].d[0][2] = _36.c[3].d[0][2];
+    f.c[3].d[0][3] = _36.c[3].d[0][3];
+    f.c[3].d[1][0] = _36.c[3].d[1][0];
+    f.c[3].d[1][1] = _36.c[3].d[1][1];
+    f.c[3].d[1][2] = _36.c[3].d[1][2];
+    f.c[3].d[1][3] = _36.c[3].d[1][3];
+    f.c[3].baz[0].c = _36.c[3].baz[0].c;
+    f.c[3].baz[1].c = _36.c[3].baz[1].c;
+    f.c[4].d[0][0] = _36.c[4].d[0][0];
+    f.c[4].d[0][1] = _36.c[4].d[0][1];
+    f.c[4].d[0][2] = _36.c[4].d[0][2];
+    f.c[4].d[0][3] = _36.c[4].d[0][3];
+    f.c[4].d[1][0] = _36.c[4].d[1][0];
+    f.c[4].d[1][1] = _36.c[4].d[1][1];
+    f.c[4].d[1][2] = _36.c[4].d[1][2];
+    f.c[4].d[1][3] = _36.c[4].d[1][3];
+    f.c[4].baz[0].c = _36.c[4].baz[0].c;
+    f.c[4].baz[1].c = _36.c[4].baz[1].c;
+    float2 _229 = 1.0f.xx;
+    f.a = float2x2(f.a[0] + _229, f.a[1] + _229);
+    f.b += 2.0f.xx;
+    f.c[3].d[1][1] += 5.0f;
+    _31.Store(224, asuint(f.a[0].x));
+    _31.Store(228, asuint(f.a[1].x));
+    _31.Store(232, asuint(f.a[0].y));
+    _31.Store(236, asuint(f.a[1].y));
+    _31.Store2(240, asuint(f.b));
+    _31.Store(248, asuint(f.c[0].d[0][0]));
+    _31.Store(252, asuint(f.c[0].d[0][1]));
+    _31.Store(256, asuint(f.c[0].d[0][2]));
+    _31.Store(260, asuint(f.c[0].d[0][3]));
+    _31.Store(264, asuint(f.c[0].d[1][0]));
+    _31.Store(268, asuint(f.c[0].d[1][1]));
+    _31.Store(272, asuint(f.c[0].d[1][2]));
+    _31.Store(276, asuint(f.c[0].d[1][3]));
+    _31.Store(280, asuint(f.c[0].baz[0].c));
+    _31.Store(284, asuint(f.c[0].baz[1].c));
+    _31.Store(288, asuint(f.c[1].d[0][0]));
+    _31.Store(292, asuint(f.c[1].d[0][1]));
+    _31.Store(296, asuint(f.c[1].d[0][2]));
+    _31.Store(300, asuint(f.c[1].d[0][3]));
+    _31.Store(304, asuint(f.c[1].d[1][0]));
+    _31.Store(308, asuint(f.c[1].d[1][1]));
+    _31.Store(312, asuint(f.c[1].d[1][2]));
+    _31.Store(316, asuint(f.c[1].d[1][3]));
+    _31.Store(320, asuint(f.c[1].baz[0].c));
+    _31.Store(324, asuint(f.c[1].baz[1].c));
+    _31.Store(328, asuint(f.c[2].d[0][0]));
+    _31.Store(332, asuint(f.c[2].d[0][1]));
+    _31.Store(336, asuint(f.c[2].d[0][2]));
+    _31.Store(340, asuint(f.c[2].d[0][3]));
+    _31.Store(344, asuint(f.c[2].d[1][0]));
+    _31.Store(348, asuint(f.c[2].d[1][1]));
+    _31.Store(352, asuint(f.c[2].d[1][2]));
+    _31.Store(356, asuint(f.c[2].d[1][3]));
+    _31.Store(360, asuint(f.c[2].baz[0].c));
+    _31.Store(364, asuint(f.c[2].baz[1].c));
+    _31.Store(368, asuint(f.c[3].d[0][0]));
+    _31.Store(372, asuint(f.c[3].d[0][1]));
+    _31.Store(376, asuint(f.c[3].d[0][2]));
+    _31.Store(380, asuint(f.c[3].d[0][3]));
+    _31.Store(384, asuint(f.c[3].d[1][0]));
+    _31.Store(388, asuint(f.c[3].d[1][1]));
+    _31.Store(392, asuint(f.c[3].d[1][2]));
+    _31.Store(396, asuint(f.c[3].d[1][3]));
+    _31.Store(400, asuint(f.c[3].baz[0].c));
+    _31.Store(404, asuint(f.c[3].baz[1].c));
+    _31.Store(408, asuint(f.c[4].d[0][0]));
+    _31.Store(412, asuint(f.c[4].d[0][1]));
+    _31.Store(416, asuint(f.c[4].d[0][2]));
+    _31.Store(420, asuint(f.c[4].d[0][3]));
+    _31.Store(424, asuint(f.c[4].d[1][0]));
+    _31.Store(428, asuint(f.c[4].d[1][1]));
+    _31.Store(432, asuint(f.c[4].d[1][2]));
+    _31.Store(436, asuint(f.c[4].d[1][3]));
+    _31.Store(440, asuint(f.c[4].baz[0].c));
+    _31.Store(444, asuint(f.c[4].baz[1].c));
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
--- a/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp
+++ b/shaders-hlsl-no-opt/asm/comp/access-chain-load-store-composite.asm.comp
@ -0,0 +1,118 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos Glslang Reference Front End; 8
+; Bound: 437
+; Schema: 0
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint GLCompute %main "main"
+               OpExecutionMode %main LocalSize 1 1 1
+               OpSource GLSL 450
+               OpName %main "main"
+               OpName %Baz "Baz"
+               OpMemberName %Baz 0 "c"
+               OpName %Bar "Bar"
+               OpMemberName %Bar 0 "d"
+               OpMemberName %Bar 1 "baz"
+               OpName %Foo "Foo"
+               OpMemberName %Foo 0 "a"
+               OpMemberName %Foo 1 "b"
+               OpMemberName %Foo 2 "c"
+               OpName %Baz_0 "Baz"
+               OpMemberName %Baz_0 0 "c"
+               OpName %Bar_0 "Bar"
+               OpMemberName %Bar_0 0 "d"
+               OpMemberName %Bar_0 1 "baz"
+               OpName %Foo_0 "Foo"
+               OpMemberName %Foo_0 0 "a"
+               OpMemberName %Foo_0 1 "b"
+               OpMemberName %Foo_0 2 "c"
+               OpName %SSBO "SSBO"
+               OpMemberName %SSBO 0 "foo"
+               OpMemberName %SSBO 1 "foo2"
+               OpName %_ ""
+               OpDecorate %_arr_float_uint_4_0 ArrayStride 4
+               OpDecorate %_arr__arr_float_uint_4_0_uint_2 ArrayStride 16
+               OpMemberDecorate %Baz_0 0 Offset 0
+               OpDecorate %_arr_Baz_0_uint_2 ArrayStride 4
+               OpMemberDecorate %Bar_0 0 Offset 0
+               OpMemberDecorate %Bar_0 1 Offset 32
+               OpDecorate %_arr_Bar_0_uint_5 ArrayStride 40
+               OpMemberDecorate %Foo_0 0 RowMajor
+               OpMemberDecorate %Foo_0 0 Offset 0
+               OpMemberDecorate %Foo_0 0 MatrixStride 8
+               OpMemberDecorate %Foo_0 1 Offset 16
+               OpMemberDecorate %Foo_0 2 Offset 24
+               OpMemberDecorate %SSBO 0 Offset 0
+               OpMemberDecorate %SSBO 1 Offset 224
+               OpDecorate %SSBO BufferBlock
+               OpDecorate %_ DescriptorSet 0
+               OpDecorate %_ Binding 0
+               OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+%mat2v2float = OpTypeMatrix %v2float 2
+       %uint = OpTypeInt 32 0
+     %uint_4 = OpConstant %uint 4
+%_arr_float_uint_4 = OpTypeArray %float %uint_4
+     %uint_2 = OpConstant %uint 2
+%_arr__arr_float_uint_4_uint_2 = OpTypeArray %_arr_float_uint_4 %uint_2
+        %Baz = OpTypeStruct %float
+%_arr_Baz_uint_2 = OpTypeArray %Baz %uint_2
+        %Bar = OpTypeStruct %_arr__arr_float_uint_4_uint_2 %_arr_Baz_uint_2
+     %uint_5 = OpConstant %uint 5
+%_arr_Bar_uint_5 = OpTypeArray %Bar %uint_5
+        %Foo = OpTypeStruct %mat2v2float %v2float %_arr_Bar_uint_5
+%_ptr_Function_Foo = OpTypePointer Function %Foo
+%_arr_float_uint_4_0 = OpTypeArray %float %uint_4
+%_arr__arr_float_uint_4_0_uint_2 = OpTypeArray %_arr_float_uint_4_0 %uint_2
+      %Baz_0 = OpTypeStruct %float
+%_arr_Baz_0_uint_2 = OpTypeArray %Baz_0 %uint_2
+      %Bar_0 = OpTypeStruct %_arr__arr_float_uint_4_0_uint_2 %_arr_Baz_0_uint_2
+%_arr_Bar_0_uint_5 = OpTypeArray %Bar_0 %uint_5
+      %Foo_0 = OpTypeStruct %mat2v2float %v2float %_arr_Bar_0_uint_5
+       %SSBO = OpTypeStruct %Foo_0 %Foo_0
+%_ptr_Uniform_SSBO = OpTypePointer Uniform %SSBO
+          %_ = OpVariable %_ptr_Uniform_SSBO Uniform
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+%_ptr_Uniform_Foo_0 = OpTypePointer Uniform %Foo_0
+%_ptr_Function_mat2v2float = OpTypePointer Function %mat2v2float
+      %int_1 = OpConstant %int 1
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+      %int_2 = OpConstant %int 2
+%_ptr_Function__arr_Bar_uint_5 = OpTypePointer Function %_arr_Bar_uint_5
+%_ptr_Function_Bar = OpTypePointer Function %Bar
+%_ptr_Function__arr__arr_float_uint_4_uint_2 = OpTypePointer Function %_arr__arr_float_uint_4_uint_2
+%_ptr_Function__arr_float_uint_4 = OpTypePointer Function %_arr_float_uint_4
+%_ptr_Function_float = OpTypePointer Function %float
+      %int_3 = OpConstant %int 3
+%_ptr_Function__arr_Baz_uint_2 = OpTypePointer Function %_arr_Baz_uint_2
+%_ptr_Function_Baz = OpTypePointer Function %Baz
+      %int_4 = OpConstant %int 4
+    %float_1 = OpConstant %float 1
+    %float_2 = OpConstant %float 2
+    %float_5 = OpConstant %float 5
+%_ptr_Uniform_mat2v2float = OpTypePointer Uniform %mat2v2float
+%_ptr_Uniform_v2float = OpTypePointer Uniform %v2float
+%_ptr_Uniform__arr_Bar_0_uint_5 = OpTypePointer Uniform %_arr_Bar_0_uint_5
+%_ptr_Uniform_Bar_0 = OpTypePointer Uniform %Bar_0
+%_ptr_Uniform__arr__arr_float_uint_4_0_uint_2 = OpTypePointer Uniform %_arr__arr_float_uint_4_0_uint_2
+%_ptr_Uniform__arr_float_uint_4_0 = OpTypePointer Uniform %_arr_float_uint_4_0
+%_ptr_Uniform_float = OpTypePointer Uniform %float
+%_ptr_Uniform__arr_Baz_0_uint_2 = OpTypePointer Uniform %_arr_Baz_0_uint_2
+%_ptr_Uniform_Baz_0 = OpTypePointer Uniform %Baz_0
+     %v3uint = OpTypeVector %uint 3
+     %uint_1 = OpConstant %uint 1
+%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_1 %uint_1 %uint_1
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %ptr_load = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_0
+         %ptr_store = OpAccessChain %_ptr_Uniform_Foo_0 %_ %int_1
+         %loaded = OpLoad %Foo_0 %ptr_load
+		 OpStore %ptr_store %loaded
+               OpReturn
+               OpFunctionEnd
--- a/shaders-hlsl/comp/access-chain-load-composite.comp
+++ b/shaders-hlsl/comp/access-chain-load-composite.comp
@ -0,0 +1,35 @@
+#version 450
+layout(local_size_x = 1) in;
+
+struct Baz
+{
+	float c;
+};
+
+struct Bar
+{
+	float d[2][4];
+	Baz baz[2];
+};
+
+struct Foo
+{
+	mat2 a;
+	vec2 b;
+	Bar c[5];
+};
+
+layout(row_major, std430, set = 0, binding = 0) buffer SSBO
+{
+	Foo foo;
+	Foo foo2;
+};
+
+void main()
+{
+	Foo f = foo;
+	f.a += 1.0;
+	f.b += 2.0;
+	f.c[3].d[1][1] += 5.0;
+	foo2 = f;
+}
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@ -975,6 +975,7 @@ struct SPIRAccessChain : IVariant

 	VariableID loaded_from = 0;
 	uint32_t matrix_stride = 0;
+	uint32_t array_stride = 0;
 	bool row_major_matrix = false;
 	bool immutable = false;

--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -6684,6 +6684,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 	string expr;

 	bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0;
+	bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0;
 	bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0;
 	bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0;
 	bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0;
@ -6728,14 +6729,24 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 	bool pending_array_enclose = false;
 	bool dimension_flatten = false;

-	const auto append_index = [&](uint32_t index) {
-		access_chain_internal_append_index(expr, base, type, flags, access_chain_is_arrayed, index);
+	const auto append_index = [&](uint32_t index, bool is_literal) {
+		AccessChainFlags mod_flags = flags;
+		if (!is_literal)
+			mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT;
+		access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
 	};

 	for (uint32_t i = 0; i < count; i++)
 	{
 		uint32_t index = indices[i];

+		bool is_literal = index_is_literal;
+		if (is_literal && msb_is_id && (index >> 31u) != 0u)
+		{
+			is_literal = false;
+			index &= 0x7fffffffu;
+		}
+
 		// Pointer chains
 		if (ptr_chain && i == 0)
 		{
@ -6752,7 +6763,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 			if (options.flatten_multidimensional_arrays && dimension_flatten)
 			{
 				// If we are flattening multidimensional arrays, do manual stride computation.
-				if (index_is_literal)
+				if (is_literal)
 					expr += convert_to_string(index);
 				else
 					expr += to_enclosed_expression(index, register_expression_read);
@ -6773,7 +6784,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 			}
 			else
 			{
-				append_index(index);
+				append_index(index, is_literal);
 			}

 			if (type->basetype == SPIRType::ControlPointArray)
@ -6820,11 +6831,11 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 					else if (var->storage == StorageClassOutput)
 						expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr);
 					else
-						append_index(index);
+						append_index(index, is_literal);
 					break;

 				default:
-					append_index(index);
+					append_index(index, is_literal);
 					break;
 				}
 			}
@ -6833,7 +6844,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 				// If we are flattening multidimensional arrays, do manual stride computation.
 				auto &parent_type = get<SPIRType>(type->parent_type);

-				if (index_is_literal)
+				if (is_literal)
 					expr += convert_to_string(index);
 				else
 					expr += to_enclosed_expression(index, register_expression_read);
@ -6856,7 +6867,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 			// By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask.
 			else if (!builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn))))
 			{
-				append_index(index);
+				append_index(index, is_literal);
 			}

 			type_id = type->parent_type;
@ -6868,7 +6879,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
 		else if (type->basetype == SPIRType::Struct)
 		{
-			if (!index_is_literal)
+			if (!is_literal)
 				index = get<SPIRConstant>(index).scalar();

 			if (index >= type->member_types.size())
@ -6915,7 +6926,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 			// by flipping indexing order of the matrix.

 			expr += "[";
-			if (index_is_literal)
+			if (is_literal)
 				expr += convert_to_string(index);
 			else
 				expr += to_expression(index, register_expression_read);
@ -6939,7 +6950,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 				}
 			}

-			if (index_is_literal && !is_packed && !row_major_matrix_needs_conversion)
+			if (is_literal && !is_packed && !row_major_matrix_needs_conversion)
 			{
 				expr += ".";
 				expr += index_to_swizzle(index);
@ -6958,7 +6969,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 					expr += index_to_swizzle(c.scalar());
 				}
 			}
-			else if (index_is_literal)
+			else if (is_literal)
 			{
 				// For packed vectors, we can only access them as an array, not by swizzle.
 				expr += join("[", index, "]");
@ -7012,9 +7023,10 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32
 	if (flattened_buffer_blocks.count(base))
 	{
 		uint32_t matrix_stride = 0;
+		uint32_t array_stride = 0;
 		bool need_transpose = false;
 		flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride,
-		                              ptr_chain);
+		                              &array_stride, ptr_chain);

 		if (meta)
 		{
@ -7022,7 +7034,8 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32
 			meta->storage_is_packed = false;
 		}

-		return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
+		return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride,
+		                              need_transpose);
 	}
 	else if (flattened_structs.count(base) && count > 0)
 	{
@ -7093,7 +7106,7 @@ void CompilerGLSL::store_flattened_struct(SPIRVariable &var, uint32_t value)

 std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
                                                 const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
-                                                 bool need_transpose)
+                                                 uint32_t /* array_stride */, bool need_transpose)
 {
 	if (!target_type.array.empty())
 		SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened");
@ -7132,7 +7145,7 @@ std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uin
 		}

 		auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride,
-		                                  need_transpose);
+		                                  0 /* array_stride */, need_transpose);

 		// Cannot forward transpositions, so resolve them here.
 		if (need_transpose)
@ -7239,24 +7252,17 @@ std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uin

 std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
    const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride,
-    bool *need_transpose, uint32_t *out_matrix_stride, bool ptr_chain)
+    bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain)
 {
 	// Start traversing type hierarchy at the proper non-pointer types.
 	const auto *type = &get_pointee_type(basetype);

-	// This holds the type of the current pointer which we are traversing through.
-	// We always start out from a struct type which is the block.
-	// This is primarily used to reflect the array strides and matrix strides later.
-	// For the first access chain index, type_id won't be needed, so just keep it as 0, it will be set
-	// accordingly as members of structs are accessed.
-	assert(type->basetype == SPIRType::Struct);
-	uint32_t type_id = 0;
-
 	std::string expr;

 	// Inherit matrix information in case we are access chaining a vector which might have come from a row major layout.
 	bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false;
 	uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0;
+	uint32_t array_stride = out_array_stride ? *out_array_stride : 0;

 	for (uint32_t i = 0; i < count; i++)
 	{
@ -7266,7 +7272,7 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
 		if (ptr_chain && i == 0)
 		{
 			// Here, the pointer type will be decorated with an array stride.
-			uint32_t array_stride = get_decoration(basetype.self, DecorationArrayStride);
+			array_stride = get_decoration(basetype.self, DecorationArrayStride);
 			if (!array_stride)
 				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");

@ -7292,16 +7298,10 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
 				expr += convert_to_string(array_stride / word_stride);
 				expr += " + ";
 			}
-			// Type ID is unchanged.
 		}
 		// Arrays
 		else if (!type->array.empty())
 		{
-			// Here, the type_id will be a type ID for the array type itself.
-			uint32_t array_stride = get_decoration(type_id, DecorationArrayStride);
-			if (!array_stride)
-				SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block.");
-
 			auto *constant = maybe_get<SPIRConstant>(index);
 			if (constant)
 			{
@ -7327,9 +7327,9 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(

 			uint32_t parent_type = type->parent_type;
 			type = &get<SPIRType>(parent_type);
-			type_id = parent_type;

-			// Type ID now refers to the array type with one less dimension.
+			if (!type->array.empty())
+				array_stride = get_decoration(parent_type, DecorationArrayStride);
 		}
 		// For structs, the index refers to a constant, which indexes into the members.
 		// We also check if this member is a builtin, since we then replace the entire expression with the builtin one.
@ -7341,7 +7341,6 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
 				SPIRV_CROSS_THROW("Member index is out of bounds!");

 			offset += type_struct_member_offset(*type, index);
-			type_id = type->member_types[index];

 			auto &struct_type = *type;
 			type = &get<SPIRType>(type->member_types[index]);
@ -7354,6 +7353,9 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
 			}
 			else
 				row_major_matrix_needs_conversion = false;
+
+			if (!type->array.empty())
+				array_stride = type_struct_member_array_stride(struct_type, index);
 		}
 		// Matrix -> Vector
 		else if (type->columns > 1)
@ -7382,9 +7384,7 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
 				expr += " + ";
 			}

-			uint32_t parent_type = type->parent_type;
 			type = &get<SPIRType>(type->parent_type);
-			type_id = parent_type;
 		}
 		// Vector -> Scalar
 		else if (type->vecsize > 1)
@ -7413,9 +7413,7 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
 				expr += " + ";
 			}

-			uint32_t parent_type = type->parent_type;
 			type = &get<SPIRType>(type->parent_type);
-			type_id = parent_type;
 		}
 		else
 			SPIRV_CROSS_THROW("Cannot subdivide a scalar value!");
@ -7425,6 +7423,8 @@ std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset(
 		*need_transpose = row_major_matrix_needs_conversion;
 	if (out_matrix_stride)
 		*out_matrix_stride = matrix_stride;
+	if (out_array_stride)
+		*out_array_stride = array_stride;

 	return std::make_pair(expr, offset);
 }
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -56,7 +56,8 @@ enum AccessChainFlagBits
 	ACCESS_CHAIN_INDEX_IS_LITERAL_BIT = 1 << 0,
 	ACCESS_CHAIN_CHAIN_ONLY_BIT = 1 << 1,
 	ACCESS_CHAIN_PTR_CHAIN_BIT = 1 << 2,
-	ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3
+	ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT = 1 << 3,
+	ACCESS_CHAIN_LITERAL_MSB_FORCE_ID = 1 << 4
 };
 typedef uint32_t AccessChainFlags;

@ -506,7 +507,7 @@ protected:

 	std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
 	                                   const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,
-	                                   bool need_transpose);
+	                                   uint32_t array_stride, bool need_transpose);
 	std::string flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count,
 	                                          const SPIRType &target_type, uint32_t offset);
 	std::string flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count,
@ -519,6 +520,7 @@ protected:
 	                                                               uint32_t count, uint32_t offset,
 	                                                               uint32_t word_stride, bool *need_transpose = nullptr,
 	                                                               uint32_t *matrix_stride = nullptr,
+	                                                               uint32_t *array_stride = nullptr,
 	                                                               bool ptr_chain = false);

 	const char *index_to_swizzle(uint32_t index);
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@ -3401,7 +3401,57 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	}
 }

-string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain)
+void CompilerHLSL::read_access_chain_array(const string &lhs, const SPIRAccessChain &chain)
+{
+	auto &type = get<SPIRType>(chain.basetype);
+
+	// Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops.
+	auto ident = get_unique_identifier();
+
+	statement("[unroll]");
+	statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ",
+	          ident, "++)");
+	begin_scope();
+	auto subchain = chain;
+	subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index);
+	subchain.basetype = type.parent_type;
+	if (!get<SPIRType>(subchain.basetype).array.empty())
+		subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride);
+	read_access_chain(nullptr, join(lhs, "[", ident, "]"), subchain);
+	end_scope();
+}
+
+void CompilerHLSL::read_access_chain_struct(const string &lhs, const SPIRAccessChain &chain)
+{
+	auto &type = get<SPIRType>(chain.basetype);
+	auto subchain = chain;
+	uint32_t member_count = uint32_t(type.member_types.size());
+
+	for (uint32_t i = 0; i < member_count; i++)
+	{
+		uint32_t offset = type_struct_member_offset(type, i);
+		subchain.static_index = chain.static_index + offset;
+		subchain.basetype = type.member_types[i];
+
+		subchain.matrix_stride = 0;
+		subchain.array_stride = 0;
+		subchain.row_major_matrix = false;
+
+		auto &member_type = get<SPIRType>(subchain.basetype);
+		if (member_type.columns > 1)
+		{
+			subchain.matrix_stride = type_struct_member_matrix_stride(type, i);
+			subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor);
+		}
+
+		if (!member_type.array.empty())
+			subchain.array_stride = type_struct_member_array_stride(type, i);
+
+		read_access_chain(nullptr, join(lhs, ".", to_member_name(type, i)), subchain);
+	}
+}
+
+void CompilerHLSL::read_access_chain(string *expr, const string &lhs, const SPIRAccessChain &chain)
 {
 	auto &type = get<SPIRType>(chain.basetype);

@ -3410,14 +3460,18 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain)
 	target_type.vecsize = type.vecsize;
 	target_type.columns = type.columns;

-	if (type.basetype == SPIRType::Struct)
-		SPIRV_CROSS_THROW("Reading structs from ByteAddressBuffer not yet supported.");
-
-	if (type.width != 32)
-		SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported.");
-
 	if (!type.array.empty())
-		SPIRV_CROSS_THROW("Reading arrays from ByteAddressBuffer not yet supported.");
+	{
+		read_access_chain_array(lhs, chain);
+		return;
+	}
+	else if (type.basetype == SPIRType::Struct)
+	{
+		read_access_chain_struct(lhs, chain);
+		return;
+	}
+	else if (type.width != 32)
+		SPIRV_CROSS_THROW("Reading types other than 32-bit from ByteAddressBuffer not yet supported.");

 	string load_expr;

@ -3525,7 +3579,13 @@ string CompilerHLSL::read_access_chain(const SPIRAccessChain &chain)
 	if (!bitcast_op.empty())
 		load_expr = join(bitcast_op, "(", load_expr, ")");

-	return load_expr;
+	if (lhs.empty())
+	{
+		assert(expr);
+		*expr = move(load_expr);
+	}
+	else
+		statement(lhs, " = ", load_expr, ";");
 }

 void CompilerHLSL::emit_load(const Instruction &instruction)
@ -3542,33 +3602,138 @@ void CompilerHLSL::emit_load(const Instruction &instruction)
 		if (has_decoration(ptr, DecorationNonUniformEXT))
 			propagate_nonuniform_qualifier(ptr);

-		auto load_expr = read_access_chain(*chain);
-
-		bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
-
-		// If we are forwarding this load,
-		// don't register the read to access chain here, defer that to when we actually use the expression,
-		// using the add_implied_read_expression mechanism.
-		if (!forward)
-			track_expression_read(chain->self);
-
-		// Do not forward complex load sequences like matrices, structs and arrays.
 		auto &type = get<SPIRType>(result_type);
-		if (type.columns > 1 || !type.array.empty() || type.basetype == SPIRType::Struct)
-			forward = false;
+		bool composite_load = !type.array.empty() || type.basetype == SPIRType::Struct;

-		auto &e = emit_op(result_type, id, load_expr, forward, true);
-		e.need_transpose = false;
-		register_read(id, ptr, forward);
-		inherit_expression_dependencies(id, ptr);
-		if (forward)
-			add_implied_read_expression(e, chain->self);
+		if (composite_load)
+		{
+			// We cannot make this work in one single expression as we might have nested structures and arrays,
+			// so unroll the load to an uninitialized temporary.
+			emit_uninitialized_temporary_expression(result_type, id);
+			read_access_chain(nullptr, to_expression(id), *chain);
+			track_expression_read(chain->self);
+		}
+		else
+		{
+			string load_expr;
+			read_access_chain(&load_expr, "", *chain);
+
+			bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries);
+
+			// If we are forwarding this load,
+			// don't register the read to access chain here, defer that to when we actually use the expression,
+			// using the add_implied_read_expression mechanism.
+			if (!forward)
+				track_expression_read(chain->self);
+
+			// Do not forward complex load sequences like matrices, structs and arrays.
+			if (type.columns > 1)
+				forward = false;
+
+			auto &e = emit_op(result_type, id, load_expr, forward, true);
+			e.need_transpose = false;
+			register_read(id, ptr, forward);
+			inherit_expression_dependencies(id, ptr);
+			if (forward)
+				add_implied_read_expression(e, chain->self);
+		}
 	}
 	else
 		CompilerGLSL::emit_instruction(instruction);
 }

-void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value)
+void CompilerHLSL::write_access_chain_array(const SPIRAccessChain &chain, uint32_t value,
+                                            const SmallVector<uint32_t> &composite_chain)
+{
+	auto &type = get<SPIRType>(chain.basetype);
+
+	// Need to use a reserved identifier here since it might shadow an identifier in the access chain input or other loops.
+	auto ident = get_unique_identifier();
+
+	uint32_t id = ir.increase_bound_by(2);
+	uint32_t int_type_id = id + 1;
+	SPIRType int_type;
+	int_type.basetype = SPIRType::Int;
+	int_type.width = 32;
+	set<SPIRType>(int_type_id, int_type);
+	set<SPIRExpression>(id, ident, int_type_id, true);
+	set_name(id, ident);
+	suppressed_usage_tracking.insert(id);
+
+	statement("[unroll]");
+	statement("for (int ", ident, " = 0; ", ident, " < ", to_array_size(type, uint32_t(type.array.size() - 1)), "; ",
+	          ident, "++)");
+	begin_scope();
+	auto subchain = chain;
+	subchain.dynamic_index = join(ident, " * ", chain.array_stride, " + ", chain.dynamic_index);
+	subchain.basetype = type.parent_type;
+
+	// Forcefully allow us to use an ID here by setting MSB.
+	auto subcomposite_chain = composite_chain;
+	subcomposite_chain.push_back(0x80000000u | id);
+
+	if (!get<SPIRType>(subchain.basetype).array.empty())
+		subchain.array_stride = get_decoration(subchain.basetype, DecorationArrayStride);
+
+	write_access_chain(subchain, value, subcomposite_chain);
+	end_scope();
+}
+
+void CompilerHLSL::write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value,
+                                             const SmallVector<uint32_t> &composite_chain)
+{
+	auto &type = get<SPIRType>(chain.basetype);
+	uint32_t member_count = uint32_t(type.member_types.size());
+	auto subchain = chain;
+
+	auto subcomposite_chain = composite_chain;
+	subcomposite_chain.push_back(0);
+
+	for (uint32_t i = 0; i < member_count; i++)
+	{
+		uint32_t offset = type_struct_member_offset(type, i);
+		subchain.static_index = chain.static_index + offset;
+		subchain.basetype = type.member_types[i];
+
+		subchain.matrix_stride = 0;
+		subchain.array_stride = 0;
+		subchain.row_major_matrix = false;
+
+		auto &member_type = get<SPIRType>(subchain.basetype);
+		if (member_type.columns > 1)
+		{
+			subchain.matrix_stride = type_struct_member_matrix_stride(type, i);
+			subchain.row_major_matrix = has_member_decoration(type.self, i, DecorationRowMajor);
+		}
+
+		if (!member_type.array.empty())
+			subchain.array_stride = type_struct_member_array_stride(type, i);
+
+		subcomposite_chain.back() = i;
+		write_access_chain(subchain, value, subcomposite_chain);
+	}
+}
+
+string CompilerHLSL::write_access_chain_value(uint32_t value, const SmallVector<uint32_t> &composite_chain,
+                                              bool enclose)
+{
+	string ret;
+	if (composite_chain.empty())
+		ret = to_expression(value);
+	else
+	{
+		AccessChainMeta meta;
+		ret = access_chain_internal(value, composite_chain.data(), uint32_t(composite_chain.size()),
+		                            ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_LITERAL_MSB_FORCE_ID, &meta);
+	}
+
+	if (enclose)
+		ret = enclose_expression(ret);
+	return ret;
+}
+
+void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t value,
+                                      const SmallVector<uint32_t> &composite_chain)
 {
 	auto &type = get<SPIRType>(chain.basetype);

@ -3583,12 +3748,20 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 	target_type.vecsize = type.vecsize;
 	target_type.columns = type.columns;

-	if (type.basetype == SPIRType::Struct)
-		SPIRV_CROSS_THROW("Writing structs to RWByteAddressBuffer not yet supported.");
-	if (type.width != 32)
-		SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported.");
 	if (!type.array.empty())
-		SPIRV_CROSS_THROW("Reading arrays from ByteAddressBuffer not yet supported.");
+	{
+		write_access_chain_array(chain, value, composite_chain);
+		register_write(chain.self);
+		return;
+	}
+	else if (type.basetype == SPIRType::Struct)
+	{
+		write_access_chain_struct(chain, value, composite_chain);
+		register_write(chain.self);
+		return;
+	}
+	else if (type.width != 32)
+		SPIRV_CROSS_THROW("Writing types other than 32-bit to RWByteAddressBuffer not yet supported.");

 	if (type.columns == 1 && !chain.row_major_matrix)
 	{
@ -3611,7 +3784,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 			SPIRV_CROSS_THROW("Unknown vector size.");
 		}

-		auto store_expr = to_expression(value);
+		auto store_expr = write_access_chain_value(value, composite_chain, false);
 		auto bitcast_op = bitcast_glsl_op(target_type, type);
 		if (!bitcast_op.empty())
 			store_expr = join(bitcast_op, "(", store_expr, ")");
@ -3622,7 +3795,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 		// Strided store.
 		for (uint32_t r = 0; r < type.vecsize; r++)
 		{
-			auto store_expr = to_enclosed_expression(value);
+			auto store_expr = write_access_chain_value(value, composite_chain, true);
 			if (type.vecsize > 1)
 			{
 				store_expr += ".";
@ -3660,7 +3833,7 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val

 		for (uint32_t c = 0; c < type.columns; c++)
 		{
-			auto store_expr = join(to_enclosed_expression(value), "[", c, "]");
+			auto store_expr = join(write_access_chain_value(value, composite_chain, true), "[", c, "]");
 			auto bitcast_op = bitcast_glsl_op(target_type, type);
 			if (!bitcast_op.empty())
 				store_expr = join(bitcast_op, "(", store_expr, ")");
@ -3674,7 +3847,8 @@ void CompilerHLSL::write_access_chain(const SPIRAccessChain &chain, uint32_t val
 		{
 			for (uint32_t c = 0; c < type.columns; c++)
 			{
-				auto store_expr = join(to_enclosed_expression(value), "[", c, "].", index_to_swizzle(r));
+				auto store_expr =
+				    join(write_access_chain_value(value, composite_chain, true), "[", c, "].", index_to_swizzle(r));
 				remove_duplicate_swizzle(store_expr);
 				auto bitcast_op = bitcast_glsl_op(target_type, type);
 				if (!bitcast_op.empty())
@ -3693,7 +3867,7 @@ void CompilerHLSL::emit_store(const Instruction &instruction)
 	auto ops = stream(instruction);
 	auto *chain = maybe_get<SPIRAccessChain>(ops[0]);
 	if (chain)
-		write_access_chain(*chain, ops[1]);
+		write_access_chain(*chain, ops[1], {});
 	else
 		CompilerGLSL::emit_instruction(instruction);
 }
@ -3723,7 +3897,10 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)

 	if (need_byte_access_chain)
 	{
-		uint32_t to_plain_buffer_length = static_cast<uint32_t>(type.array.size());
+		// If we have a chain variable, we are already inside the SSBO, and any array type will refer to arrays within a block,
+		// and not array of SSBO.
+		uint32_t to_plain_buffer_length = chain ? 0u : static_cast<uint32_t>(type.array.size());
+
 		auto *backing_variable = maybe_get_backing_variable(ops[2]);

 		string base;
@ -3745,6 +3922,7 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
 		}

 		uint32_t matrix_stride = 0;
+		uint32_t array_stride = 0;
 		bool row_major_matrix = false;

 		// Inherit matrix information.
@ -3752,15 +3930,17 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
 		{
 			matrix_stride = chain->matrix_stride;
 			row_major_matrix = chain->row_major_matrix;
+			array_stride = chain->array_stride;
 		}

-		auto offsets =
-		    flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length],
-		                                  length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix, &matrix_stride);
+		auto offsets = flattened_access_chain_offset(*basetype, &ops[3 + to_plain_buffer_length],
+		                                             length - 3 - to_plain_buffer_length, 0, 1, &row_major_matrix,
+		                                             &matrix_stride, &array_stride);

 		auto &e = set<SPIRAccessChain>(ops[1], ops[0], type.storage, base, offsets.first, offsets.second);
 		e.row_major_matrix = row_major_matrix;
 		e.matrix_stride = matrix_stride;
+		e.array_stride = array_stride;
 		e.immutable = should_forward(ops[2]);
 		e.loaded_from = backing_variable ? backing_variable->self : ID(0);

@ -5030,3 +5210,8 @@ void CompilerHLSL::emit_block_hints(const SPIRBlock &block)
 		break;
 	}
 }
+
+string CompilerHLSL::get_unique_identifier()
+{
+	return join("_", unique_identifier_count++, "ident");
+}
--- a/spirv_hlsl.hpp
+++ b/spirv_hlsl.hpp
@ -182,8 +182,15 @@ private:
 	void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
 	void emit_access_chain(const Instruction &instruction);
 	void emit_load(const Instruction &instruction);
-	std::string read_access_chain(const SPIRAccessChain &chain);
-	void write_access_chain(const SPIRAccessChain &chain, uint32_t value);
+	void read_access_chain(std::string *expr, const std::string &lhs, const SPIRAccessChain &chain);
+	void read_access_chain_struct(const std::string &lhs, const SPIRAccessChain &chain);
+	void read_access_chain_array(const std::string &lhs, const SPIRAccessChain &chain);
+	void write_access_chain(const SPIRAccessChain &chain, uint32_t value, const SmallVector<uint32_t> &composite_chain);
+	void write_access_chain_struct(const SPIRAccessChain &chain, uint32_t value,
+	                               const SmallVector<uint32_t> &composite_chain);
+	void write_access_chain_array(const SPIRAccessChain &chain, uint32_t value,
+	                              const SmallVector<uint32_t> &composite_chain);
+	std::string write_access_chain_value(uint32_t value, const SmallVector<uint32_t> &composite_chain, bool enclose);
 	void emit_store(const Instruction &instruction);
 	void emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op);
 	void emit_subgroup_op(const Instruction &i) override;
@ -257,6 +264,9 @@ private:
 	std::vector<RootConstants> root_constants_layout;

 	void validate_shader_model();
+
+	std::string get_unique_identifier();
+	uint32_t unique_identifier_count = 0;
 };
 } // namespace SPIRV_CROSS_NAMESPACE