Add FenceSync support to Metal

Bug: skia:8243 Change-Id: Iadd2445efe86e796b23ea20edbe49d684f626d9a Reviewed-on: https://skia-review.googlesource.com/c/skia/+/212270 Reviewed-by: Brian Salomon <bsalomon@google.com> Commit-Queue: Jim Van Verth <jvanverth@google.com>
2019-05-08 11:40:55 -04:00 · 2019-05-08 11:40:55 -04:00 · f5ca01a69a
commit f5ca01a69a
parent 8cdf5b6fa4
2 changed files with 82 additions and 64 deletions
--- a/src/gpu/mtl/GrMtlUtil.h
+++ b/src/gpu/mtl/GrMtlUtil.h
@ -16,6 +16,20 @@
 class GrMtlGpu;
 class GrSurface;

+#if defined(SK_BUILD_FOR_MAC)
+#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 101400
+#define GR_METAL_SDK_VERSION 200
+#else
+#define GR_METAL_SDK_VERSION 100
+#endif
+#else
+#if __IPHONE_OS_VERSION_MAX_ALLOWED >= 120000 || __TV_OS_VERSION_MAX_ALLOWED >= 120000
+#define GR_METAL_SDK_VERSION 200
+#else
+#define GR_METAL_SDK_VERSION 100
+#endif
+#endif
+
 /**
 * Returns the Metal texture format for the given GrPixelConfig
 */
--- a/tools/gpu/mtl/MtlTestContext.mm
+++ b/tools/gpu/mtl/MtlTestContext.mm
@ -10,6 +10,8 @@
 #include "include/gpu/GrContext.h"
 #include "include/gpu/GrContextOptions.h"

+#include "src/gpu/mtl/GrMtlUtil.h"
+
 #ifdef SK_METAL

 #import <Metal/Metal.h>
@ -19,97 +21,82 @@
 #define SK_END_AUTORELEASE_BLOCK }

 namespace {
+#if GR_METAL_SDK_VERSION >= 200
 /**
 * Implements sk_gpu_test::FenceSync for Metal.
+ *
+ * Uses a single MTLSharedEvent, and inserts a GPU command to increment the value
+ * each time we call insertFence(). On the CPU side we use a MTLSharedEventListener to
+ * wait for the new value to be signaled on the GPU. Since the event listener is handled
+ * on a separate thread, we communicate completion to the main thread via a semaphore.
 */
-
-// TODO
-#if 0
 class MtlFenceSync : public sk_gpu_test::FenceSync {
 public:
-    MtlFenceSync(sk_sp<const GrVkInterface> vk, VkDevice device, VkQueue queue,
-                uint32_t queueFamilyIndex)
-            : fVk(std::move(vk))
-            , fDevice(device)
-            , fQueue(queue) {
+    MtlFenceSync(id<MTLDevice> device, id<MTLCommandQueue> queue)
+            : fDevice(device)
+            , fQueue(queue)
+            , fLatestEvent(0) {
        SkDEBUGCODE(fUnfinishedSyncs = 0;)
-        VkCommandPoolCreateInfo createInfo;
-        createInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
-        createInfo.pNext = nullptr;
-        createInfo.flags = 0;
-        createInfo.queueFamilyIndex = queueFamilyIndex;
-        GR_VK_CALL_ERRCHECK(fVk, CreateCommandPool(fDevice, &createInfo, nullptr, &fCommandPool));
-
-        VkCommandBufferAllocateInfo allocateInfo;
-        allocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
-        allocateInfo.pNext = nullptr;
-        allocateInfo.commandBufferCount = 1;
-        allocateInfo.commandPool = fCommandPool;
-        allocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
-        GR_VK_CALL_ERRCHECK(fVk, AllocateCommandBuffers(fDevice, &allocateInfo, &fCommandBuffer));
-
-        VkCommandBufferBeginInfo beginInfo;
-        beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-        beginInfo.pNext = nullptr;
-        beginInfo.flags = VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
-        beginInfo.pInheritanceInfo = nullptr;
-        GR_VK_CALL_ERRCHECK(fVk, BeginCommandBuffer(fCommandBuffer, &beginInfo));
-        GR_VK_CALL_ERRCHECK(fVk, EndCommandBuffer(fCommandBuffer));
+        SK_BEGIN_AUTORELEASE_BLOCK
+        fSharedEvent = [fDevice newSharedEvent];
+        dispatch_queue_t queue = dispatch_queue_create("MTLFenceSync", NULL);
+        fSharedEventListener = [[MTLSharedEventListener alloc] initWithDispatchQueue:queue];
+        SK_END_AUTORELEASE_BLOCK
    }

-    ~VkFenceSync() override {
+    ~MtlFenceSync() override {
        SkASSERT(!fUnfinishedSyncs);
        // If the above assertion is true then the command buffer should not be in flight.
-        GR_VK_CALL(fVk, FreeCommandBuffers(fDevice, fCommandPool, 1, &fCommandBuffer));
-        GR_VK_CALL(fVk, DestroyCommandPool(fDevice, fCommandPool, nullptr));
+        // ARC should take care of these:
+        fSharedEventListener = nil;
+        fSharedEvent = nil;
    }

    sk_gpu_test::PlatformFence SK_WARN_UNUSED_RESULT insertFence() const override {
-        VkFence fence;
-        VkFenceCreateInfo info;
-        info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
-        info.pNext = nullptr;
-        info.flags = 0;
-        GR_VK_CALL_ERRCHECK(fVk, CreateFence(fDevice, &info, nullptr, &fence));
-        VkSubmitInfo submitInfo;
-        submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-        submitInfo.pNext = nullptr;
-        submitInfo.waitSemaphoreCount = 0;
-        submitInfo.pWaitSemaphores = nullptr;
-        submitInfo.pWaitDstStageMask = nullptr;
-        submitInfo.commandBufferCount = 1;
-        submitInfo.pCommandBuffers = &fCommandBuffer;
-        submitInfo.signalSemaphoreCount = 0;
-        submitInfo.pSignalSemaphores = nullptr;
-        GR_VK_CALL_ERRCHECK(fVk, QueueSubmit(fQueue, 1, &submitInfo, fence));
+        SK_BEGIN_AUTORELEASE_BLOCK
+        id<MTLCommandBuffer> cmdBuffer = [fQueue commandBuffer];
+        ++fLatestEvent;
+        [cmdBuffer encodeSignalEvent:fSharedEvent value:fLatestEvent];
+        [cmdBuffer commit];
+        SK_END_AUTORELEASE_BLOCK
+
        SkDEBUGCODE(++fUnfinishedSyncs;)
-        return (sk_gpu_test::PlatformFence)fence;
+        return (sk_gpu_test::PlatformFence)fLatestEvent;
    }

    bool waitFence(sk_gpu_test::PlatformFence opaqueFence) const override {
-        VkFence fence = (VkFence)opaqueFence;
-        static constexpr uint64_t kForever = ~((uint64_t)0);
-        auto result = GR_VK_CALL(fVk, WaitForFences(fDevice, 1, &fence, true, kForever));
-        return result != VK_TIMEOUT;
+        uint64_t value = (uint64_t)opaqueFence;
+        dispatch_semaphore_t semaphore = dispatch_semaphore_create(0);
+
+        // Add listener for this particular value or greater
+        __block dispatch_semaphore_t block_sema = semaphore;
+        [fSharedEvent notifyListener: fSharedEventListener
+                             atValue: value
+                               block: ^(id<MTLSharedEvent> sharedEvent, uint64_t value) {
+                                   dispatch_semaphore_signal(block_sema);
+                               }];
+
+        long result = dispatch_semaphore_wait(semaphore, DISPATCH_TIME_FOREVER);
+
+        return !result;
    }

    void deleteFence(sk_gpu_test::PlatformFence opaqueFence) const override {
-        VkFence fence = (VkFence)opaqueFence;
-        GR_VK_CALL(fVk, DestroyFence(fDevice, fence, nullptr));
+        // Nothing to delete
        SkDEBUGCODE(--fUnfinishedSyncs;)
    }

 private:
-    sk_sp<const GrVkInterface>  fVk;
-    VkDevice                    fDevice;
-    VkQueue                     fQueue;
-    VkCommandPool               fCommandPool;
-    VkCommandBuffer             fCommandBuffer;
+    id<MTLDevice>               fDevice;
+    id<MTLCommandQueue>         fQueue;
+    id<MTLSharedEvent>          fSharedEvent;
+    MTLSharedEventListener*     fSharedEventListener;
+    mutable uint64_t            fLatestEvent;
    SkDEBUGCODE(mutable int     fUnfinishedSyncs;)
    typedef sk_gpu_test::FenceSync INHERITED;
 };

-GR_STATIC_ASSERT(sizeof(VkFence) <= sizeof(sk_gpu_test::PlatformFence));
+GR_STATIC_ASSERT(sizeof(uint64_t) <= sizeof(sk_gpu_test::PlatformFence));
 #endif

 class MtlTestContextImpl : public sk_gpu_test::MtlTestContext {
@ -152,8 +139,25 @@ public:
 private:
    MtlTestContextImpl(id<MTLDevice> device, id<MTLCommandQueue> queue)
            : INHERITED(), fDevice(device), fQueue(queue) {
+#if GR_METAL_SDK_VERSION >= 200
+        // TODO: I believe we can just check whether creating a MTLSharedEvent returns nil,
+        // but this needs to be tested on an old OS.
+        NSOperatingSystemVersion osVersion = [[NSProcessInfo processInfo] operatingSystemVersion];
+#ifdef SK_BUILD_FOR_MAC
+        bool supportsFenceSync = (osVersion.majorVersion > 10 ||
+                                  (osVersion.majorVersion == 10 && osVersion.minorVersion >= 14));
+#else
+        bool supportsFenceSync = (osVersion.majorVersion >= 12);
+#endif
+        if (supportsFenceSync ) {
+            fFenceSync.reset(new MtlFenceSync(device, queue));
+        } else {
+            fFenceSync.reset(nullptr);
+        }
+#else
        fFenceSync.reset(nullptr);
-    }
+#endif
+     }

    void onPlatformMakeCurrent() const override {}
    std::function<void()> onPlatformGetAutoContextRestore() const override { return nullptr; }