// Copyright 2016-2021 The Khronos Group Inc. // SPDX-License-Identifier: Apache-2.0 #version 310 es layout(local_size_x = 64) in; layout(set = 0, binding = 0, std430) readonly buffer SSBO0 { float inputs[]; }; layout(set = 0, binding = 1, std430) writeonly buffer SSBO1 { float outputs[]; }; shared float tmp[gl_WorkGroupSize.x]; void main() { uint local = gl_LocalInvocationIndex; uint work_group = gl_WorkGroupID.x; // Does a trivial parallel reduction through shared memory. tmp[local] = inputs[work_group * gl_WorkGroupSize.x * 2u + local] + inputs[work_group * gl_WorkGroupSize.x * 2u + local + gl_WorkGroupSize.x]; memoryBarrierShared(); barrier(); for (uint limit = 32u; limit > 1u; limit >>= 1u) { if (local < limit) tmp[local] = tmp[local] + tmp[local + limit]; memoryBarrierShared(); barrier(); } if (local == 0u) outputs[work_group] = tmp[0] + tmp[1]; }