Metal: Hold refs for input buffers from bindBuffer calls.

Mirrors what we have in Vulkan and Direct3D.
Also adds command buffer tracking, again like Vulkan and Direct3D.

Change-Id: I2280d92274d81830aec7950afc64a0147e38c317
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/305396
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Jim Van Verth <jvanverth@google.com>
This commit is contained in:
Jim Van Verth 2020-07-23 11:39:35 -04:00 committed by Skia Commit-Bot
parent 90787fefef
commit a75dc33a67
7 changed files with 106 additions and 52 deletions

View File

@ -25,7 +25,6 @@ public:
id<MTLBuffer> mtlBuffer() const { return fMtlBuffer; }
size_t offset() const { return fOffset; }
void bind(); // for initial binding of XferGpuToCpu buffers
protected:
GrMtlBuffer(GrMtlGpu*, size_t size, GrGpuBufferType intendedType, GrAccessPattern);

View File

@ -35,21 +35,26 @@ GrMtlBuffer::GrMtlBuffer(GrMtlGpu* gpu, size_t size, GrGpuBufferType intendedTyp
: INHERITED(gpu, size, intendedType, accessPattern)
, fIsDynamic(accessPattern != kStatic_GrAccessPattern)
, fOffset(0) {
// In most cases, we'll allocate dynamic buffers when we map them, below.
if (!fIsDynamic) {
NSUInteger options = 0;
if (@available(macOS 10.11, iOS 9.0, *)) {
NSUInteger options = 0;
if (@available(macOS 10.11, iOS 9.0, *)) {
if (fIsDynamic) {
#ifdef SK_BUILD_FOR_MAC
options |= MTLResourceStorageModeManaged;
#else
options |= MTLResourceStorageModeShared;
#endif
} else {
options |= MTLResourceStorageModePrivate;
}
#ifdef SK_BUILD_FOR_MAC
// Mac requires 4-byte alignment for copies so we need
// to ensure we have space for the extra data
size = SkAlign4(size);
#endif
fMtlBuffer = size == 0 ? nil :
[gpu->device() newBufferWithLength: size
options: options];
}
#ifdef SK_BUILD_FOR_MAC
// Mac requires 4-byte alignment for copies so we need
// to ensure we have space for the extra data
size = SkAlign4(size);
#endif
fMtlBuffer = size == 0 ? nil :
[gpu->device() newBufferWithLength: size
options: options];
this->registerWithCache(SkBudgeted::kYes);
VALIDATE();
}
@ -60,11 +65,6 @@ GrMtlBuffer::~GrMtlBuffer() {
SkASSERT(fMapPtr == nullptr);
}
void GrMtlBuffer::bind() {
SkASSERT(fIsDynamic && GrGpuBufferType::kXferGpuToCpu == this->intendedType());
fMtlBuffer = this->mtlGpu()->resourceProvider().getDynamicBuffer(this->size(), &fOffset);
}
bool GrMtlBuffer::onUpdateData(const void* src, size_t srcInBytes) {
if (!fIsDynamic) {
if (fMtlBuffer == nil) {
@ -122,9 +122,6 @@ void GrMtlBuffer::internalMap(size_t sizeInBytes) {
VALIDATE();
SkASSERT(!this->isMapped());
if (fIsDynamic) {
if (GrGpuBufferType::kXferGpuToCpu != this->intendedType()) {
fMtlBuffer = this->mtlGpu()->resourceProvider().getDynamicBuffer(sizeInBytes, &fOffset);
}
fMappedBuffer = fMtlBuffer;
fMapPtr = static_cast<char*>(fMtlBuffer.contents) + fOffset;
} else {

View File

@ -11,15 +11,16 @@
#import <Metal/Metal.h>
#include "include/core/SkRefCnt.h"
#include "src/gpu/GrBuffer.h"
#include "src/gpu/mtl/GrMtlUtil.h"
class GrMtlGpu;
class GrMtlPipelineState;
class GrMtlOpsRenderPass;
class GrMtlCommandBuffer {
class GrMtlCommandBuffer : public SkRefCnt {
public:
static GrMtlCommandBuffer* Create(id<MTLCommandQueue> queue);
static sk_sp<GrMtlCommandBuffer> Make(id<MTLCommandQueue> queue);
~GrMtlCommandBuffer();
void commit(bool waitUntilCompleted);
@ -33,10 +34,16 @@ public:
[fCmdBuffer addCompletedHandler:block];
}
void addGrBuffer(sk_sp<const GrBuffer> buffer) {
fTrackedGrBuffers.push_back(std::move(buffer));
}
void encodeSignalEvent(id<MTLEvent>, uint64_t value) SK_API_AVAILABLE(macos(10.14), ios(12.0));
void encodeWaitForEvent(id<MTLEvent>, uint64_t value) SK_API_AVAILABLE(macos(10.14), ios(12.0));
private:
static const int kInitialTrackedResourcesCount = 32;
GrMtlCommandBuffer(id<MTLCommandBuffer> cmdBuffer)
: fCmdBuffer(cmdBuffer)
, fPreviousRenderPassDescriptor(nil) {}
@ -47,6 +54,8 @@ private:
id<MTLBlitCommandEncoder> fActiveBlitCommandEncoder;
id<MTLRenderCommandEncoder> fActiveRenderCommandEncoder;
MTLRenderPassDescriptor* fPreviousRenderPassDescriptor;
SkSTArray<kInitialTrackedResourcesCount, sk_sp<const GrBuffer>> fTrackedGrBuffers;
};
#endif

View File

@ -15,7 +15,7 @@
#error This file must be compiled with Arc. Use -fobjc-arc flag
#endif
GrMtlCommandBuffer* GrMtlCommandBuffer::Create(id<MTLCommandQueue> queue) {
sk_sp<GrMtlCommandBuffer> GrMtlCommandBuffer::Make(id<MTLCommandQueue> queue) {
id<MTLCommandBuffer> mtlCommandBuffer;
mtlCommandBuffer = [queue commandBuffer];
if (nil == mtlCommandBuffer) {
@ -24,11 +24,12 @@ GrMtlCommandBuffer* GrMtlCommandBuffer::Create(id<MTLCommandQueue> queue) {
mtlCommandBuffer.label = @"GrMtlCommandBuffer::Create";
return new GrMtlCommandBuffer(mtlCommandBuffer);
return sk_sp<GrMtlCommandBuffer>(new GrMtlCommandBuffer(mtlCommandBuffer));
}
GrMtlCommandBuffer::~GrMtlCommandBuffer() {
this->endAllEncoding();
fTrackedGrBuffers.reset();
fCmdBuffer = nil;
}

View File

@ -8,6 +8,7 @@
#ifndef GrMtlGpu_DEFINED
#define GrMtlGpu_DEFINED
#include "include/private/SkDeque.h"
#include "src/gpu/GrFinishCallbacks.h"
#include "src/gpu/GrGpu.h"
#include "src/gpu/GrRenderTarget.h"
@ -15,6 +16,7 @@
#include "src/gpu/GrTexture.h"
#include "src/gpu/mtl/GrMtlCaps.h"
#include "src/gpu/mtl/GrMtlCommandBuffer.h"
#include "src/gpu/mtl/GrMtlResourceProvider.h"
#include "src/gpu/mtl/GrMtlStencilAttachment.h"
#include "src/gpu/mtl/GrMtlUtil.h"
@ -52,11 +54,6 @@ public:
kSkip_SyncQueue
};
// Commits the current command buffer to the queue and then creates a new command buffer. If
// sync is set to kForce_SyncQueue, the function will wait for all work in the committed
// command buffer to finish before returning.
void submitCommandBuffer(SyncQueue sync);
void deleteBackendTexture(const GrBackendTexture&) override;
bool compile(const GrProgramDesc&, const GrProgramInfo&) override;
@ -214,6 +211,13 @@ private:
bool onSubmitToGpu(bool syncCpu) override;
// Commits the current command buffer to the queue and then creates a new command buffer. If
// sync is set to kForce_SyncQueue, the function will wait for all work in the committed
// command buffer to finish before returning.
void submitCommandBuffer(SyncQueue sync);
void checkForFinishedCommandBuffers();
// Function that uploads data onto textures with private storage mode (GPU access only).
bool uploadToTexture(GrMtlTexture* tex, int left, int top, int width, int height,
GrColorType dataColorType, const GrMipLevel texels[], int mipLevels);
@ -247,7 +251,16 @@ private:
id<MTLDevice> fDevice;
id<MTLCommandQueue> fQueue;
GrMtlCommandBuffer* fCmdBuffer;
sk_sp<GrMtlCommandBuffer> fCurrentCmdBuffer;
struct OutstandingCommandBuffer {
OutstandingCommandBuffer(sk_sp<GrMtlCommandBuffer> commandBuffer, GrFence fence)
: fCommandBuffer(std::move(commandBuffer))
, fFence(fence) {}
sk_sp<GrMtlCommandBuffer> fCommandBuffer;
GrFence fFence;
};
SkDeque fOutstandingCommandBuffers;
std::unique_ptr<SkSL::Compiler> fCompiler;

View File

@ -112,12 +112,18 @@ sk_sp<GrGpu> GrMtlGpu::Make(GrDirectContext* direct, const GrContextOptions& opt
return sk_sp<GrGpu>(new GrMtlGpu(direct, options, device, queue, featureSet));
}
// This constant determines how many OutstandingCommandBuffers are allocated together as a block in
// the deque. As such it needs to balance allocating too much memory vs. incurring
// allocation/deallocation thrashing. It should roughly correspond to the max number of outstanding
// command buffers we expect to see.
static const int kDefaultOutstandingAllocCnt = 8;
GrMtlGpu::GrMtlGpu(GrDirectContext* direct, const GrContextOptions& options,
id<MTLDevice> device, id<MTLCommandQueue> queue, MTLFeatureSet featureSet)
: INHERITED(direct)
, fDevice(device)
, fQueue(queue)
, fCmdBuffer(nullptr)
, fOutstandingCommandBuffers(sizeof(OutstandingCommandBuffer), kDefaultOutstandingAllocCnt)
, fCompiler(new SkSL::Compiler())
, fResourceProvider(this)
, fDisconnected(false)
@ -135,24 +141,25 @@ GrMtlGpu::~GrMtlGpu() {
void GrMtlGpu::disconnect(DisconnectType type) {
INHERITED::disconnect(type);
if (DisconnectType::kCleanup == type) {
if (!fDisconnected) {
this->destroyResources();
} else {
delete fCmdBuffer;
fCmdBuffer = nullptr;
fResourceProvider.destroyResources();
fQueue = nil;
fDevice = nil;
fDisconnected = true;
}
}
void GrMtlGpu::destroyResources() {
// Will implicitly delete the command buffer
this->submitCommandBuffer(SyncQueue::kForce_SyncQueue);
// We used a placement new for each object in fOutstandingCommandBuffers, so we're responsible
// for calling the destructor on each of them as well.
while (!fOutstandingCommandBuffers.empty()) {
OutstandingCommandBuffer* buffer =
(OutstandingCommandBuffer*)fOutstandingCommandBuffers.front();
this->deleteFence(buffer->fFence);
buffer->~OutstandingCommandBuffer();
fOutstandingCommandBuffers.pop_front();
}
fResourceProvider.destroyResources();
fQueue = nil;
@ -175,18 +182,44 @@ void GrMtlGpu::submit(GrOpsRenderPass* renderPass) {
}
GrMtlCommandBuffer* GrMtlGpu::commandBuffer() {
if (!fCmdBuffer) {
fCmdBuffer = GrMtlCommandBuffer::Create(fQueue);
if (!fCurrentCmdBuffer) {
fCurrentCmdBuffer = GrMtlCommandBuffer::Make(fQueue);
// This should be done after we have a new command buffer in case the freeing of any
// resources held by a finished command buffer causes us to send a new command to the gpu
// (like changing the resource state).
this->checkForFinishedCommandBuffers();
}
return fCmdBuffer;
return fCurrentCmdBuffer.get();
}
void GrMtlGpu::submitCommandBuffer(SyncQueue sync) {
if (fCmdBuffer) {
fResourceProvider.addBufferCompletionHandler(fCmdBuffer);
fCmdBuffer->commit(SyncQueue::kForce_SyncQueue == sync);
delete fCmdBuffer;
fCmdBuffer = nullptr;
// TODO: handle sync with empty command buffer
if (fCurrentCmdBuffer) {
fResourceProvider.addBufferCompletionHandler(fCurrentCmdBuffer.get());
GrFence fence = this->insertFence();
new (fOutstandingCommandBuffers.push_back()) OutstandingCommandBuffer(
fCurrentCmdBuffer, fence);
fCurrentCmdBuffer->commit(SyncQueue::kForce_SyncQueue == sync);
fCurrentCmdBuffer.reset();
}
}
void GrMtlGpu::checkForFinishedCommandBuffers() {
// Iterate over all the outstanding command buffers to see if any have finished. The command
// buffers are in order from oldest to newest, so we start at the front to check if their fence
// has signaled. If so we pop it off and move onto the next.
// Repeat till we find a command list that has not finished yet (and all others afterwards are
// also guaranteed to not have finished).
OutstandingCommandBuffer* front = (OutstandingCommandBuffer*)fOutstandingCommandBuffers.front();
while (front && this->waitFence(front->fFence)) {
// Since we used placement new we are responsible for calling the destructor manually.
this->deleteFence(front->fFence);
front->~OutstandingCommandBuffer();
fOutstandingCommandBuffers.pop_front();
front = (OutstandingCommandBuffer*)fOutstandingCommandBuffers.front();
}
}
@ -1254,7 +1287,6 @@ bool GrMtlGpu::onTransferPixelsFrom(GrSurface* surface, int left, int top, int w
}
GrMtlBuffer* grMtlBuffer = static_cast<GrMtlBuffer*>(transferBuffer);
grMtlBuffer->bind();
size_t transBufferRowBytes = bpp * width;
size_t transBufferImageBytes = transBufferRowBytes * height;

View File

@ -264,6 +264,7 @@ void GrMtlOpsRenderPass::onBindBuffers(sk_sp<const GrBuffer> indexBuffer,
SkASSERT(!vertexBuffer->isCpuBuffer());
SkASSERT(!static_cast<const GrGpuBuffer*>(vertexBuffer.get())->isMapped());
fActiveVertexBuffer = std::move(vertexBuffer);
fGpu->commandBuffer()->addGrBuffer(fActiveVertexBuffer);
++inputBufferIndex;
}
if (instanceBuffer) {
@ -271,11 +272,13 @@ void GrMtlOpsRenderPass::onBindBuffers(sk_sp<const GrBuffer> indexBuffer,
SkASSERT(!static_cast<const GrGpuBuffer*>(instanceBuffer.get())->isMapped());
this->setVertexBuffer(fActiveRenderCmdEncoder, instanceBuffer.get(), 0, inputBufferIndex++);
fActiveInstanceBuffer = std::move(instanceBuffer);
fGpu->commandBuffer()->addGrBuffer(fActiveInstanceBuffer);
}
if (indexBuffer) {
SkASSERT(!indexBuffer->isCpuBuffer());
SkASSERT(!static_cast<const GrGpuBuffer*>(indexBuffer.get())->isMapped());
fActiveIndexBuffer = std::move(indexBuffer);
fGpu->commandBuffer()->addGrBuffer(fActiveIndexBuffer);
}
}