Simplify uniform padding in Metal (Ganesh).

Previously, we would pad Metal uniforms to the nearest 16-byte size if
they contained a float3, float4, or matrix type. This does not appear
to be necessary (all tests pass without this level of padding).

Since Metal is C++ based, it does have *some* struct padding, based on
the basic type in the structure with the highest bit-width. Rather
than track this amount, we just assume that it is 8 and round up
Metal uniform blocks to the nearest 8-byte size. This will ~never be
larger than our previous padding, since a typical Skia shader will
generally always include a float2 uniform or larger (e.g. RTFlip is
a float2), and will probably be tighter than before since most shaders
include a color (float3/float4) or matrix uniform.

Change-Id: Ic8dd49f33cb81a24a6415e9ba6e91c9f6faeb1b1
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/556216
Commit-Queue: John Stiles <johnstiles@google.com>
Reviewed-by: Robert Phillips <robertphillips@google.com>
This commit is contained in:
John Stiles 2022-07-06 14:20:30 -04:00 committed by SkCQ
parent 6d57262159
commit eaecd17d7a
3 changed files with 11 additions and 18 deletions

View File

@ -381,12 +381,15 @@ static MTLRenderPipelineColorAttachmentDescriptor* create_color_attachment(
return mtlColorAttachment;
}
static uint32_t buffer_size(uint32_t offset, uint32_t maxAlignment) {
// Metal expects the buffer to be padded at the end according to the alignment
// of the largest element in the buffer.
uint32_t offsetDiff = offset & maxAlignment;
static uint32_t buffer_size(uint32_t offset) {
// Metal uses C++ padding rules, so we round up the buffer size if it's not evenly divisible by
// eight. The padding is dictated by the highest-bit-width basic type in the struct.
// In practice, this will probably be four (float), but on the off chance we ever use a double
// or a pointer, we pad to eight.
constexpr uint32_t kMaxAlignment = 7;
uint32_t offsetDiff = offset & kMaxAlignment;
if (offsetDiff != 0) {
offsetDiff = maxAlignment - offsetDiff + 1;
offsetDiff = kMaxAlignment - offsetDiff + 1;
}
return offset + offsetDiff;
}
@ -713,14 +716,12 @@ GrMtlPipelineState* GrMtlPipelineStateBuilder::finalize(
sk_sp<GrMtlRenderPipeline> renderPipeline = GrMtlRenderPipeline::Make(pipelineState);
uint32_t bufferSize = buffer_size(fUniformHandler.fCurrentUBOOffset,
fUniformHandler.fCurrentUBOMaxAlignment);
return new GrMtlPipelineState(fGpu,
std::move(renderPipeline),
pipelineDescriptor.colorAttachments[0].pixelFormat,
fUniformHandles,
fUniformHandler.fUniforms,
bufferSize,
buffer_size(fUniformHandler.fCurrentUBOOffset),
(uint32_t)fUniformHandler.numSamplers(),
std::move(fGPImpl),
std::move(fXPImpl),

View File

@ -58,9 +58,7 @@ private:
: INHERITED(program)
, fUniforms(kUniformsPerBlock)
, fSamplers(kUniformsPerBlock)
, fCurrentUBOOffset(0)
, fCurrentUBOMaxAlignment(0x0) {
}
, fCurrentUBOOffset(0) {}
UniformHandle internalAddUniformArray(const GrProcessor* owner,
uint32_t visibility,
@ -98,7 +96,6 @@ private:
SkTArray<skgpu::Swizzle> fSamplerSwizzles;
uint32_t fCurrentUBOOffset;
uint32_t fCurrentUBOMaxAlignment;
friend class GrMtlPipelineStateBuilder;

View File

@ -156,13 +156,9 @@ static inline uint32_t sksltype_to_mtl_size(SkSLType type) {
// taking into consideration all alignment requirements. The uniformOffset is set to the offset for
// the new uniform, and currentOffset is updated to be the offset to the end of the new uniform.
static uint32_t get_ubo_aligned_offset(uint32_t* currentOffset,
uint32_t* maxAlignment,
SkSLType type,
int arrayCount) {
uint32_t alignmentMask = sksltype_to_alignment_mask(type);
if (alignmentMask > *maxAlignment) {
*maxAlignment = alignmentMask;
}
uint32_t offsetDiff = *currentOffset & alignmentMask;
if (offsetDiff != 0) {
offsetDiff = alignmentMask - offsetDiff + 1;
@ -200,8 +196,7 @@ GrGLSLUniformHandler::UniformHandle GrMtlUniformHandler::internalAddUniformArray
}
SkString resolvedName = fProgramBuilder->nameVariable(prefix, name, mangleName);
uint32_t offset = get_ubo_aligned_offset(&fCurrentUBOOffset, &fCurrentUBOMaxAlignment,
type, arrayCount);
uint32_t offset = get_ubo_aligned_offset(&fCurrentUBOOffset, type, arrayCount);
SkString layoutQualifier;
layoutQualifier.appendf("offset=%d", offset);