40 lines
897 B
Plaintext
40 lines
897 B
Plaintext
// Copyright 2016-2021 The Khronos Group Inc.
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
#version 310 es
|
|
layout(local_size_x = 64) in;
|
|
|
|
layout(set = 0, binding = 0, std430) readonly buffer SSBO0
|
|
{
|
|
float inputs[];
|
|
};
|
|
|
|
layout(set = 0, binding = 1, std430) writeonly buffer SSBO1
|
|
{
|
|
float outputs[];
|
|
};
|
|
|
|
shared float tmp[gl_WorkGroupSize.x];
|
|
|
|
void main()
|
|
{
|
|
uint local = gl_LocalInvocationIndex;
|
|
uint work_group = gl_WorkGroupID.x;
|
|
|
|
// Does a trivial parallel reduction through shared memory.
|
|
tmp[local] = inputs[work_group * gl_WorkGroupSize.x * 2u + local] + inputs[work_group * gl_WorkGroupSize.x * 2u + local + gl_WorkGroupSize.x];
|
|
memoryBarrierShared();
|
|
barrier();
|
|
|
|
for (uint limit = 32u; limit > 1u; limit >>= 1u)
|
|
{
|
|
if (local < limit)
|
|
tmp[local] = tmp[local] + tmp[local + limit];
|
|
memoryBarrierShared();
|
|
barrier();
|
|
}
|
|
|
|
if (local == 0u)
|
|
outputs[work_group] = tmp[0] + tmp[1];
|
|
}
|