diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp index 8495f9b0a3..2bf332083e 100644 --- a/src/gpu/vk/GrVkBuffer.cpp +++ b/src/gpu/vk/GrVkBuffer.cpp @@ -184,6 +184,33 @@ void GrVkBuffer::internalMap(GrVkGpu* gpu, size_t size, bool* createdNewBuffer) VALIDATE(); } +void GrVkBuffer::copyCpuDataToGpuBuffer(GrVkGpu* gpu, const void* src, size_t size) { + SkASSERT(src); + // The vulkan api restricts the use of vkCmdUpdateBuffer to updates that are less than or equal + // to 65536 bytes and a size the is 4 byte aligned. + if ((size <= 65536) && (0 == (size & 0x3)) && !gpu->vkCaps().avoidUpdateBuffers()) { + gpu->updateBuffer(this, src, this->offset(), size); + } else { + sk_sp transferBuffer = + GrVkTransferBuffer::Make(gpu, size, GrVkBuffer::kCopyRead_Type); + if (!transferBuffer) { + return; + } + + char* buffer = (char*) transferBuffer->map(); + memcpy (buffer, src, size); + transferBuffer->unmap(); + + gpu->copyBuffer(transferBuffer.get(), this, 0, this->offset(), size); + } + this->addMemoryBarrier(gpu, + VK_ACCESS_TRANSFER_WRITE_BIT, + buffer_type_to_access_flags(fDesc.fType), + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + false); +} + void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) { VALIDATE(); SkASSERT(this->vkIsMapped()); @@ -199,29 +226,8 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) { GrVkMemory::UnmapAlloc(gpu, alloc); fMapPtr = nullptr; } else { - // vkCmdUpdateBuffer requires size < 64k and 4-byte alignment. - // https://bugs.chromium.org/p/skia/issues/detail?id=7488 - if (size <= 65536 && 0 == (size & 0x3)) { - gpu->updateBuffer(this, fMapPtr, this->offset(), size); - } else { - sk_sp transferBuffer = - GrVkTransferBuffer::Make(gpu, size, GrVkBuffer::kCopyRead_Type); - if (!transferBuffer) { - return; - } - - char* buffer = (char*) transferBuffer->map(); - memcpy (buffer, fMapPtr, size); - transferBuffer->unmap(); - - gpu->copyBuffer(transferBuffer.get(), this, 0, this->offset(), size); - } - this->addMemoryBarrier(gpu, - VK_ACCESS_TRANSFER_WRITE_BIT, - buffer_type_to_access_flags(fDesc.fType), - VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - false); + SkASSERT(fMapPtr); + this->copyCpuDataToGpuBuffer(gpu, fMapPtr, size); } } @@ -236,14 +242,18 @@ bool GrVkBuffer::vkUpdateData(GrVkGpu* gpu, const void* src, size_t srcSizeInByt return false; } - this->internalMap(gpu, srcSizeInBytes, createdNewBuffer); - if (!fMapPtr) { - return false; + if (fDesc.fDynamic) { + this->internalMap(gpu, srcSizeInBytes, createdNewBuffer); + if (!fMapPtr) { + return false; + } + + memcpy(fMapPtr, src, srcSizeInBytes); + this->internalUnmap(gpu, srcSizeInBytes); + } else { + this->copyCpuDataToGpuBuffer(gpu, src, srcSizeInBytes); } - memcpy(fMapPtr, src, srcSizeInBytes); - - this->internalUnmap(gpu, srcSizeInBytes); return true; } diff --git a/src/gpu/vk/GrVkBuffer.h b/src/gpu/vk/GrVkBuffer.h index f096921955..d65f318338 100644 --- a/src/gpu/vk/GrVkBuffer.h +++ b/src/gpu/vk/GrVkBuffer.h @@ -106,6 +106,7 @@ private: void internalMap(GrVkGpu* gpu, size_t size, bool* createdNewBuffer = nullptr); void internalUnmap(GrVkGpu* gpu, size_t size); + void copyCpuDataToGpuBuffer(GrVkGpu* gpu, const void* srcData, size_t size); void validate() const; bool vkIsMapped() const; diff --git a/src/gpu/vk/GrVkCaps.cpp b/src/gpu/vk/GrVkCaps.cpp index 165ea68a06..0d6f1b61ff 100644 --- a/src/gpu/vk/GrVkCaps.cpp +++ b/src/gpu/vk/GrVkCaps.cpp @@ -232,7 +232,6 @@ template T* get_extension_feature_struct(const VkPhysicalDeviceFeatu void GrVkCaps::init(const GrContextOptions& contextOptions, const GrVkInterface* vkInterface, VkPhysicalDevice physDev, const VkPhysicalDeviceFeatures2& features, uint32_t physicalDeviceVersion, const GrVkExtensions& extensions) { - VkPhysicalDeviceProperties properties; GR_VK_CALL(vkInterface, GetPhysicalDeviceProperties(physDev, &properties)); @@ -340,6 +339,20 @@ void GrVkCaps::init(const GrContextOptions& contextOptions, const GrVkInterface* fPreferFullscreenClears = true; } + if (kQualcomm_VkVendor == properties.vendorID) { + // On Qualcomm mapping a gpu buffer and doing both reads and writes to it is slow. Thus for + // index and vertex buffers we will force to use a cpu side buffer and then copy the whole + // buffer up to the gpu. + fBufferMapThreshold = SK_MaxS32; + } + + if (kQualcomm_VkVendor == properties.vendorID) { + // On Qualcomm it looks like using vkCmdUpdateBuffer is slower than using a transfer buffer + // even for small sizes. + fAvoidUpdateBuffers = true; + } + + this->initConfigTable(vkInterface, physDev, properties); this->initStencilFormat(vkInterface, physDev); diff --git a/src/gpu/vk/GrVkCaps.h b/src/gpu/vk/GrVkCaps.h index 587102d989..337c8ae7f1 100644 --- a/src/gpu/vk/GrVkCaps.h +++ b/src/gpu/vk/GrVkCaps.h @@ -91,6 +91,11 @@ public: return fShouldAlwaysUseDedicatedImageMemory; } + // Always use a transfer buffer instead of vkCmdUpdateBuffer to upload data to a VkBuffer. + bool avoidUpdateBuffers() const { + return fAvoidUpdateBuffers; + } + /** * Returns both a supported and most preferred stencil format to use in draws. */ @@ -225,6 +230,8 @@ private: bool fNewCBOnPipelineChange = false; bool fShouldAlwaysUseDedicatedImageMemory = false; + bool fAvoidUpdateBuffers = false; + bool fSupportsSwapchain = false; bool fSupportsPhysicalDeviceProperties2 = false;