diff --git a/include/gpu/vk/GrVkTypes.h b/include/gpu/vk/GrVkTypes.h index 782cb90b21..aa1334adca 100644 --- a/include/gpu/vk/GrVkTypes.h +++ b/include/gpu/vk/GrVkTypes.h @@ -34,6 +34,11 @@ struct GrVkAlloc { VkDeviceMemory fMemory; // can be VK_NULL_HANDLE iff Tex is an RT and uses borrow semantics VkDeviceSize fOffset; VkDeviceSize fSize; // this can be indeterminate iff Tex uses borrow semantics + uint32_t fFlags; + + enum Flag { + kNoncoherent_Flag = 0x1, // memory must be flushed to device after mapping + }; }; struct GrVkImageInfo { diff --git a/src/gpu/vk/GrVkBuffer.cpp b/src/gpu/vk/GrVkBuffer.cpp index b8efb39177..82674b4cfb 100644 --- a/src/gpu/vk/GrVkBuffer.cpp +++ b/src/gpu/vk/GrVkBuffer.cpp @@ -180,6 +180,7 @@ void GrVkBuffer::internalUnmap(GrVkGpu* gpu, size_t size) { SkASSERT(this->vkIsMapped()); if (fDesc.fDynamic) { + GrVkMemory::FlushMappedAlloc(gpu, this->alloc()); VK_CALL(gpu, UnmapMemory(gpu->device(), this->alloc().fMemory)); fMapPtr = nullptr; } else { diff --git a/src/gpu/vk/GrVkGpu.cpp b/src/gpu/vk/GrVkGpu.cpp index 4d410a7b3a..a892e1b71c 100644 --- a/src/gpu/vk/GrVkGpu.cpp +++ b/src/gpu/vk/GrVkGpu.cpp @@ -494,6 +494,7 @@ bool GrVkGpu::uploadTexDataLinear(GrVkTexture* tex, } } + GrVkMemory::FlushMappedAlloc(this, alloc); GR_VK_CALL(interface, UnmapMemory(fDevice, alloc.fMemory)); return true; @@ -606,6 +607,7 @@ bool GrVkGpu::uploadTexDataOptimal(GrVkTexture* tex, currentHeight = SkTMax(1, currentHeight/2); } + // no need to flush non-coherent memory, unmap will do that for us transferBuffer->unmap(); // Change layout of our target so it can be copied to @@ -963,12 +965,12 @@ GrStencilAttachment* GrVkGpu::createStencilAttachmentForRenderTarget(const GrRen //////////////////////////////////////////////////////////////////////////////// -bool copy_testing_data(GrVkGpu* gpu, void* srcData, GrVkAlloc* alloc, +bool copy_testing_data(GrVkGpu* gpu, void* srcData, const GrVkAlloc& alloc, size_t srcRowBytes, size_t dstRowBytes, int h) { void* mapPtr; VkResult err = GR_VK_CALL(gpu->vkInterface(), MapMemory(gpu->device(), - alloc->fMemory, - alloc->fOffset, + alloc.fMemory, + alloc.fOffset, dstRowBytes * h, 0, &mapPtr)); @@ -984,7 +986,8 @@ bool copy_testing_data(GrVkGpu* gpu, void* srcData, GrVkAlloc* alloc, SkRectMemcpy(mapPtr, static_cast(dstRowBytes), srcData, srcRowBytes, srcRowBytes, h); } - GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc->fMemory)); + GrVkMemory::FlushMappedAlloc(gpu, alloc); + GR_VK_CALL(gpu->vkInterface(), UnmapMemory(gpu->device(), alloc.fMemory)); return true; } @@ -1019,7 +1022,7 @@ GrBackendObject GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w, i } VkImage image = VK_NULL_HANDLE; - GrVkAlloc alloc = { VK_NULL_HANDLE, 0, 0 }; + GrVkAlloc alloc = { VK_NULL_HANDLE, 0, 0, 0 }; VkImageTiling imageTiling = linearTiling ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL; VkImageLayout initialLayout = (VK_IMAGE_TILING_LINEAR == imageTiling) @@ -1070,7 +1073,7 @@ GrBackendObject GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w, i VK_CALL(GetImageSubresourceLayout(fDevice, image, &subres, &layout)); - if (!copy_testing_data(this, srcData, &alloc, rowCopyBytes, + if (!copy_testing_data(this, srcData, alloc, rowCopyBytes, static_cast(layout.rowPitch), h)) { GrVkMemory::FreeImageMemory(this, linearTiling, alloc); VK_CALL(DestroyImage(fDevice, image, nullptr)); @@ -1098,7 +1101,7 @@ GrBackendObject GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w, i return 0; } - GrVkAlloc bufferAlloc = { VK_NULL_HANDLE, 0, 0 }; + GrVkAlloc bufferAlloc = { VK_NULL_HANDLE, 0, 0, 0 }; if (!GrVkMemory::AllocAndBindBufferMemory(this, buffer, GrVkBuffer::kCopyRead_Type, true, &bufferAlloc)) { GrVkMemory::FreeImageMemory(this, linearTiling, alloc); @@ -1107,7 +1110,7 @@ GrBackendObject GrVkGpu::createTestingOnlyBackendTexture(void* srcData, int w, i return 0; } - if (!copy_testing_data(this, srcData, &bufferAlloc, rowCopyBytes, rowCopyBytes, h)) { + if (!copy_testing_data(this, srcData, bufferAlloc, rowCopyBytes, rowCopyBytes, h)) { GrVkMemory::FreeImageMemory(this, linearTiling, alloc); VK_CALL(DestroyImage(fDevice, image, nullptr)); GrVkMemory::FreeBufferMemory(this, GrVkBuffer::kCopyRead_Type, bufferAlloc); @@ -1756,7 +1759,7 @@ bool GrVkGpu::onReadPixels(GrSurface* surface, // We need to submit the current command buffer to the Queue and make sure it finishes before // we can copy the data out of the buffer. this->submitCommandBuffer(kForce_SyncQueue); - + GrVkMemory::InvalidateMappedAlloc(this, transferBuffer->alloc()); void* mappedMemory = transferBuffer->map(); if (copyFromOrigin) { diff --git a/src/gpu/vk/GrVkMemory.cpp b/src/gpu/vk/GrVkMemory.cpp index 48bea9c67e..98b2f89e24 100644 --- a/src/gpu/vk/GrVkMemory.cpp +++ b/src/gpu/vk/GrVkMemory.cpp @@ -10,13 +10,12 @@ #include "GrVkGpu.h" #include "GrVkUtil.h" -static bool get_valid_memory_type_index(VkPhysicalDeviceMemoryProperties physDevMemProps, +static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps, uint32_t typeBits, VkMemoryPropertyFlags requestedMemFlags, uint32_t* typeIndex) { - uint32_t checkBit = 1; - for (uint32_t i = 0; i < 32; ++i) { - if (typeBits & checkBit) { + for (uint32_t i = 0; i < physDevMemProps.memoryTypeCount; ++i) { + if (typeBits & (1 << i)) { uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFlags & requestedMemFlags; if (supportedFlags == requestedMemFlags) { @@ -24,7 +23,6 @@ static bool get_valid_memory_type_index(VkPhysicalDeviceMemoryProperties physDev return true; } } - checkBit <<= 1; } return false; } @@ -57,21 +55,32 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, VkMemoryRequirements memReqs; GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs)); - VkMemoryPropertyFlags desiredMemProps = dynamic ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT - : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; uint32_t typeIndex = 0; - if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), - memReqs.memoryTypeBits, - desiredMemProps, - &typeIndex)) { - // this memory type should always be available - SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), + const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); + if (dynamic) { + // try to get cached and ideally non-coherent memory first + if (!get_valid_memory_type_index(phDevMemProps, + memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + &typeIndex)) { + // some sort of host-visible memory type should always be available for dynamic buffers + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, + memReqs.memoryTypeBits, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + &typeIndex)); + } + + VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags; + alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 + : GrVkAlloc::kNoncoherent_Flag; + } else { + // device-local memory should always be available for static buffers + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &typeIndex)); + alloc->fFlags = 0x0; } GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type)); @@ -81,7 +90,7 @@ bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu, return false; } - // Bind Memory to device + // Bind buffer VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer, alloc->fMemory, alloc->fOffset)); if (err) { @@ -122,25 +131,27 @@ bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, uint32_t typeIndex = 0; GrVkHeap* heap; + const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties(); if (linearTiling) { VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - if (!get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), + if (!get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits, desiredMemProps, &typeIndex)) { - // this memory type should always be available - SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), + // some sort of host-visible memory type should always be available + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, &typeIndex)); } heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap); + VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags; + alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0 + : GrVkAlloc::kNoncoherent_Flag; } else { // this memory type should always be available - SkASSERT_RELEASE(get_valid_memory_type_index(gpu->physicalDeviceMemoryProperties(), + SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &typeIndex)); @@ -149,6 +160,7 @@ bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, } else { heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap); } + alloc->fFlags = 0x0; } if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, alloc)) { @@ -156,7 +168,7 @@ bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu, return false; } - // Bind Memory to device + // Bind image VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image, alloc->fMemory, alloc->fOffset)); if (err) { @@ -244,6 +256,32 @@ VkAccessFlags GrVkMemory::LayoutToSrcAccessMask(const VkImageLayout layout) { return flags; } +void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { + if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { + VkMappedMemoryRange mappedMemoryRange; + memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); + mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + mappedMemoryRange.memory = alloc.fMemory; + mappedMemoryRange.offset = alloc.fOffset; + mappedMemoryRange.size = alloc.fSize; + GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(), + 1, &mappedMemoryRange)); + } +} + +void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc) { + if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) { + VkMappedMemoryRange mappedMemoryRange; + memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange)); + mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + mappedMemoryRange.memory = alloc.fMemory; + mappedMemoryRange.offset = alloc.fOffset; + mappedMemoryRange.size = alloc.fSize; + GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(), + 1, &mappedMemoryRange)); + } +} + bool GrVkFreeListAlloc::alloc(VkDeviceSize requestedSize, VkDeviceSize* allocOffset, VkDeviceSize* allocSize) { VkDeviceSize alignedSize = align_size(requestedSize, fAlignment); diff --git a/src/gpu/vk/GrVkMemory.h b/src/gpu/vk/GrVkMemory.h index 644d7d4198..f8d5fdf91c 100644 --- a/src/gpu/vk/GrVkMemory.h +++ b/src/gpu/vk/GrVkMemory.h @@ -37,6 +37,9 @@ namespace GrVkMemory { VkPipelineStageFlags LayoutToPipelineStageFlags(const VkImageLayout layout); VkAccessFlags LayoutToSrcAccessMask(const VkImageLayout layout); + + void FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc); + void InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc); } class GrVkFreeListAlloc { diff --git a/tools/viewer/sk_app/VulkanWindowContext.cpp b/tools/viewer/sk_app/VulkanWindowContext.cpp index 589a7b8c1c..8d7dceeb8c 100644 --- a/tools/viewer/sk_app/VulkanWindowContext.cpp +++ b/tools/viewer/sk_app/VulkanWindowContext.cpp @@ -262,7 +262,7 @@ void VulkanWindowContext::createBuffers(VkFormat format) { GrBackendRenderTargetDesc desc; GrVkImageInfo info; info.fImage = fImages[i]; - info.fAlloc = { VK_NULL_HANDLE, 0, 0 }; + info.fAlloc = { VK_NULL_HANDLE, 0, 0, 0 }; info.fImageLayout = VK_IMAGE_LAYOUT_UNDEFINED; info.fImageTiling = VK_IMAGE_TILING_OPTIMAL; info.fFormat = format;