diff --git a/Doxyfile b/Doxyfile index ae43a98..34e860e 100644 --- a/Doxyfile +++ b/Doxyfile @@ -1,4 +1,4 @@ -# Doxyfile 1.9.1 +# Doxyfile 1.9.2 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -93,14 +93,6 @@ ALLOW_UNICODE_NAMES = NO OUTPUT_LANGUAGE = English -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -OUTPUT_TEXT_DIRECTION = None - # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. @@ -258,16 +250,16 @@ TAB_SIZE = 4 # the documentation. An alias has the form: # name=value # For example adding -# "sideeffect=@par Side Effects:\n" +# "sideeffect=@par Side Effects:^^" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) +# "Side Effects:". Note that you cannot put \n's in the value part of an alias +# to insert newlines (in the resulting output). You can put ^^ in the value part +# of an alias to insert a newline as if a physical newline was in the original +# file. When you need a literal { or } or , in the value part of an alias you +# have to escape them by means of a backslash (\), this can lead to conflicts +# with the commands \{ and \} for these it is advised to use the version @{ and +# @} or use a double escape (\\{ and \\}) ALIASES = @@ -312,8 +304,8 @@ OPTIMIZE_OUTPUT_SLICE = NO # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, +# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files). For instance to make doxygen treat .inc files @@ -466,7 +458,7 @@ LOOKUP_CACHE_SIZE = 0 # than 0 to get more control over the balance between CPU load and processing # speed. At this moment only the input processing can be done using multiple # threads. Since this is still an experimental feature the default is set to 1, -# which efficively disables parallel processing. Please report any issues you +# which effectively disables parallel processing. Please report any issues you # encounter. Generating dot graphs in parallel is controlled by the # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. @@ -610,6 +602,12 @@ HIDE_SCOPE_NAMES = NO HIDE_COMPOUND_REFERENCE= NO +# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class +# will show which file needs to be included to use the class. +# The default value is: YES. + +SHOW_HEADERFILE = NO + # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. @@ -767,7 +765,8 @@ FILE_VERSION_FILTER = # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. +# will be used as the name of the layout file. See also section "Changing the +# layout of pages" for information. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE @@ -813,18 +812,26 @@ WARNINGS = YES WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. +# potential errors in the documentation, such as documenting some parameters in +# a documented function twice, or documenting parameters that don't exist or +# using markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES +# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete +# function parameter documentation. If set to NO, doxygen will accept that some +# parameters have no documentation without warning. +# The default value is: YES. + +WARN_IF_INCOMPLETE_DOC = YES + # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. +# value. If set to NO, doxygen will only warn about wrong parameter +# documentation, but not about the absence of documentation. If EXTRACT_ALL is +# set to YES then this flag will automatically be disabled. See also +# WARN_IF_INCOMPLETE_DOC # The default value is: NO. WARN_NO_PARAMDOC = NO @@ -888,10 +895,10 @@ INPUT_ENCODING = UTF-8 # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), -# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl, -# *.ucf, *.qsf and *.ice. +# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, +# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C +# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, +# *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.c \ *.cc \ @@ -1159,9 +1166,11 @@ VERBATIM_HEADERS = NO CLANG_ASSISTED_PARSING = NO -# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to -# YES then doxygen will add the directory of each input to the include path. +# If the CLANG_ASSISTED_PARSING tag is set to YES and the CLANG_ADD_INC_PATHS +# tag is set to YES then doxygen will add the directory of each input to the +# include path. # The default value is: YES. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_ADD_INC_PATHS = YES @@ -1296,7 +1305,7 @@ HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see +# this color. Hue is specified as an angle on a color-wheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. @@ -1306,7 +1315,7 @@ HTML_EXTRA_FILES = HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A +# in the HTML output. For a value of 0 the output will use gray-scales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1413,8 +1422,12 @@ DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: -# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows. +# on Windows. In the beginning of 2021 Microsoft took the original page, with +# a.o. the download links, offline the HTML help workshop was already many years +# in maintenance mode). You can download the HTML help workshop from the web +# archives at Installation executable (see: +# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo +# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML @@ -1573,16 +1586,28 @@ DISABLE_INDEX = NO # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. +# further fine tune the look of the index (see "Fine-tuning the output"). As an +# example, the default style sheet generated by doxygen has an example that +# shows how to put an image at the root of the tree instead of the PROJECT_NAME. +# Since the tree basically has the same information as the tab index, you could +# consider setting DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO +# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the +# FULL_SIDEBAR option determines if the side bar is limited to only the treeview +# area (value NO) or if it should extend to the full height of the window (value +# YES). Setting this to YES gives a layout similar to +# https://docs.readthedocs.io with more room for contents, but less room for the +# project logo, title, and description. If either GENERATOR_TREEVIEW or +# DISABLE_INDEX is set to NO, this option has no effect. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FULL_SIDEBAR = NO + # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # @@ -1655,11 +1680,29 @@ FORMULA_MACROFILE = USE_MATHJAX = NO +# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. +# Note that the different versions of MathJax have different requirements with +# regards to the different settings, so it is possible that also other MathJax +# settings have to be changed when switching between the different MathJax +# versions. +# Possible values are: MathJax_2 and MathJax_3. +# The default value is: MathJax_2. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_VERSION = MathJax_2 + # When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. +# the MathJax output. For more details about the output format see MathJax +# version 2 (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 +# (see: +# http://docs.mathjax.org/en/latest/web/components/output.html). # Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. +# compatibility. This is the name for Mathjax version 2, for MathJax version 3 +# this will be translated into chtml), NativeMML (i.e. MathML. Only supported +# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This +# is the name for Mathjax version 3, for MathJax version 2 this will be +# translated into HTML-CSS) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. @@ -1672,15 +1715,21 @@ MATHJAX_FORMAT = HTML-CSS # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. +# MathJax from https://www.mathjax.org before deployment. The default value is: +# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 +# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example +# for MathJax version 2 (see +# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions): # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# For example for MathJax version 3 (see +# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): +# MATHJAX_EXTENSIONS = ams # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = @@ -1860,29 +1909,31 @@ PAPER_TYPE = a4 EXTRA_PACKAGES = -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the -# generated LaTeX document. The header should contain everything until the first -# chapter. If it is left blank doxygen will generate a standard header. See -# section "Doxygen usage" for information on how to let doxygen write the -# default header to a separate file. +# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for +# the generated LaTeX document. The header should contain everything until the +# first chapter. If it is left blank doxygen will generate a standard header. It +# is highly recommended to start with a default header using +# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty +# and then modify the file new_header.tex. See also section "Doxygen usage" for +# information on how to generate the default header that doxygen normally uses. # -# Note: Only use a user-defined header if you know what you are doing! The -# following commands have a special meaning inside the header: $title, -# $datetime, $date, $doxygenversion, $projectname, $projectnumber, -# $projectbrief, $projectlogo. Doxygen will replace $title with the empty -# string, for the replacement values of the other commands the user is referred -# to HTML_HEADER. +# Note: Only use a user-defined header if you know what you are doing! +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. The following +# commands have a special meaning inside the header (and footer): For a +# description of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_HEADER = -# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the -# generated LaTeX document. The footer should contain everything after the last -# chapter. If it is left blank doxygen will generate a standard footer. See +# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for +# the generated LaTeX document. The footer should contain everything after the +# last chapter. If it is left blank doxygen will generate a standard footer. See # LATEX_HEADER for more information on how to generate a default footer and what -# special commands can be used inside the footer. -# -# Note: Only use a user-defined footer if you know what you are doing! +# special commands can be used inside the footer. See also section "Doxygen +# usage" for information on how to generate the default footer that doxygen +# normally uses. Note: Only use a user-defined footer if you know what you are +# doing! # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_FOOTER = @@ -1927,8 +1978,7 @@ USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode # command to the generated LaTeX files. This will instruct LaTeX to keep running -# if errors occur, instead of asking the user for help. This option is also used -# when generating formulas in HTML. +# if errors occur, instead of asking the user for help. # The default value is: NO. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1941,16 +1991,6 @@ LATEX_BATCHMODE = NO LATEX_HIDE_INDICES = NO -# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source -# code with syntax highlighting in the LaTeX output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_SOURCE_CODE = NO - # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. See # https://en.wikipedia.org/wiki/BibTeX and \cite for more info. @@ -2031,16 +2071,6 @@ RTF_STYLESHEET_FILE = RTF_EXTENSIONS_FILE = -# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code -# with syntax highlighting in the RTF output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_SOURCE_CODE = NO - #--------------------------------------------------------------------------- # Configuration options related to the man page output #--------------------------------------------------------------------------- @@ -2137,15 +2167,6 @@ GENERATE_DOCBOOK = NO DOCBOOK_OUTPUT = docbook -# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the -# program listings (including syntax highlighting and cross-referencing -# information) to the DOCBOOK output. Note that enabling this will significantly -# increase the size of the DOCBOOK output. -# The default value is: NO. -# This tag requires that the tag GENERATE_DOCBOOK is set to YES. - -DOCBOOK_PROGRAMLISTING = NO - #--------------------------------------------------------------------------- # Configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- @@ -2262,7 +2283,7 @@ PREDEFINED = VMA_CALL_PRE= \ VMA_NULLABLE_NON_DISPATCHABLE= \ VMA_VULKAN_VERSION=1002000 \ VMA_EXTERNAL_MEMORY=1 \ - VMA_MEMORY_PRIORITY=1 \ + VMA_MEMORY_PRIORITY=1 # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The @@ -2657,8 +2678,8 @@ GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate # files that are used to generate the various graphs. # -# Note: This setting is not only used for dot files but also for msc and -# plantuml temporary files. +# Note: This setting is not only used for dot files but also for msc temporary +# files. # The default value is: YES. DOT_CLEANUP = YES diff --git a/README.md b/README.md index d960926..acdefe1 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Additional features: - Debug annotations: Associate string with name or opaque pointer to your own data with every allocation. - JSON dump: Obtain a string in JSON format with detailed map of internal state, including list of allocations and gaps between them. - Convert this JSON dump into a picture to visualize your memory. See [tools/VmaDumpVis](tools/VmaDumpVis/README.md). -- Debugging incorrect memory usage: Enable initialization of all allocated memory with a bit pattern to detect usage of uninitialized or freed memory. Enable validation of a magic number before and after every allocation to detect out-of-bounds memory corruption. +- Debugging incorrect memory usage: Enable initialization of all allocated memory with a bit pattern to detect usage of uninitialized or freed memory. Enable validation of a magic number after every allocation to detect out-of-bounds memory corruption. - Support for interoperability with OpenGL. - Virtual allocator: Interface for using core allocation algorithm to allocate any custom data, e.g. pieces of one large buffer. @@ -81,7 +81,7 @@ bufferInfo.size = 65536; bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocInfo = {}; -allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; VkBuffer buffer; VmaAllocation allocation; diff --git a/include/vk_mem_alloc.h b/include/vk_mem_alloc.h index 11e5817..7341f0e 100644 --- a/include/vk_mem_alloc.h +++ b/include/vk_mem_alloc.h @@ -49,7 +49,6 @@ License: MIT - [Mapping functions](@ref memory_mapping_mapping_functions) - [Persistently mapped memory](@ref memory_mapping_persistently_mapped_memory) - [Cache flush and invalidate](@ref memory_mapping_cache_control) - - [Finding out if memory is mappable](@ref memory_mapping_finding_if_memory_mappable) - \subpage staying_within_budget - [Querying for budget](@ref staying_within_budget_querying_for_budget) - [Controlling memory usage](@ref staying_within_budget_controlling_memory_usage) @@ -80,17 +79,19 @@ License: MIT - [Corruption detection](@ref debugging_memory_usage_corruption_detection) - \subpage opengl_interop - \subpage usage_patterns - - [Common mistakes](@ref usage_patterns_common_mistakes) - - [Simple patterns](@ref usage_patterns_simple) - - [Advanced patterns](@ref usage_patterns_advanced) + - [GPU-only resource](@ref usage_patterns_gpu_only) + - [Staging copy for upload](@ref usage_patterns_staging_copy_upload) + - [Readback](@ref usage_patterns_readback) + - [Advanced data uploading](@ref usage_patterns_advanced_data_uploading) + - [Other use cases](@ref usage_patterns_other_use_cases) - \subpage configuration - [Pointers to Vulkan functions](@ref config_Vulkan_functions) - [Custom host memory allocator](@ref custom_memory_allocator) - [Device memory allocation callbacks](@ref allocation_callbacks) - [Device heap memory limit](@ref heap_memory_limit) - - \subpage vk_khr_dedicated_allocation - - \subpage enabling_buffer_device_address - - \subpage vk_amd_device_coherent_memory +- \subpage vk_khr_dedicated_allocation +- \subpage enabling_buffer_device_address +- \subpage vk_amd_device_coherent_memory - \subpage general_considerations - [Thread safety](@ref general_considerations_thread_safety) - [Validation layer warnings](@ref general_considerations_validation_layer_warnings) @@ -99,8 +100,8 @@ License: MIT \section main_see_also See also -- [Product page on GPUOpen](https://gpuopen.com/gaming-product/vulkan-memory-allocator/) -- [Source repository on GitHub](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) +- [**Product page on GPUOpen**](https://gpuopen.com/gaming-product/vulkan-memory-allocator/) +- [**Source repository on GitHub**](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator) \defgroup group_init Library initialization @@ -174,13 +175,6 @@ extern "C" { #endif // #if VMA_VULKAN_VERSION >= 1001000 #endif // #if defined(__ANDROID__) && VMA_STATIC_VULKAN_FUNCTIONS && VK_NO_PROTOTYPES -#if !defined(VK_VERSION_1_2) - // This one is tricky. Vulkan specification defines this code as available since - // Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131. - // See pull request #207. - #define VK_ERROR_UNKNOWN ((VkResult)-13) -#endif - #if !defined(VMA_DEDICATED_ALLOCATION) #if VK_KHR_get_memory_requirements2 && VK_KHR_dedicated_allocation #define VMA_DEDICATED_ALLOCATION 1 @@ -449,56 +443,33 @@ typedef enum VmaMemoryUsage Use other members of VmaAllocationCreateInfo to specify your requirements. */ VMA_MEMORY_USAGE_UNKNOWN = 0, - /** Memory will be used on device only, so fast access from the device is preferred. - It usually means device-local GPU (video) memory. - No need to be mappable on host. - It is roughly equivalent of `D3D12_HEAP_TYPE_DEFAULT`. - - Usage: - - - Resources written and read by device, e.g. images used as attachments. - - Resources transferred from host once (immutable) or infrequently and read by - device multiple times, e.g. textures to be sampled, vertex buffers, uniform - (constant) buffers, and majority of other types of resources used on GPU. - - Allocation may still end up in `HOST_VISIBLE` memory on some implementations. - In such case, you are free to map it. - You can use #VMA_ALLOCATION_CREATE_MAPPED_BIT with this usage type. + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. */ VMA_MEMORY_USAGE_GPU_ONLY = 1, - /** Memory will be mappable on host. - It usually means CPU (system) memory. - Guarantees to be `HOST_VISIBLE` and `HOST_COHERENT`. - CPU access is typically uncached. Writes may be write-combined. - Resources created in this pool may still be accessible to the device, but access to them can be slow. - It is roughly equivalent of `D3D12_HEAP_TYPE_UPLOAD`. - - Usage: Staging copy of resources used as transfer source. + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` and `VK_MEMORY_PROPERTY_HOST_COHERENT_BIT`. */ VMA_MEMORY_USAGE_CPU_ONLY = 2, /** - Memory that is both mappable on host (guarantees to be `HOST_VISIBLE`) and preferably fast to access by GPU. - CPU access is typically uncached. Writes may be write-combined. - - Usage: Resources written frequently by host (dynamic), read by device. E.g. textures (with LINEAR layout), vertex buffers, uniform buffers updated every frame or every draw call. + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. */ VMA_MEMORY_USAGE_CPU_TO_GPU = 3, - /** Memory mappable on host (guarantees to be `HOST_VISIBLE`) and cached. - It is roughly equivalent of `D3D12_HEAP_TYPE_READBACK`. - - Usage: - - - Resources written by device, read by host - results of some computations, e.g. screen capture, average scene luminance for HDR tone mapping. - - Any resources read or accessed randomly on host, e.g. CPU-side copy of vertex buffer used as source of transfer, but also used for collision detection. + /** + \deprecated Obsolete, preserved for backward compatibility. + Guarantees `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`, prefers `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. */ VMA_MEMORY_USAGE_GPU_TO_CPU = 4, - /** CPU memory - memory that is preferably not `DEVICE_LOCAL`, but also not guaranteed to be `HOST_VISIBLE`. - - Usage: Staging copy of resources moved from GPU memory to CPU memory as part - of custom paging/residency mechanism, to be moved back to GPU memory when needed. + /** + \deprecated Obsolete, preserved for backward compatibility. + Prefers not `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. */ VMA_MEMORY_USAGE_CPU_COPY = 5, - /** Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. + /** + Lazily allocated GPU memory having `VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT`. Exists mostly on mobile platforms. Using it on desktop PC or other GPUs with no such memory type present will fail the allocation. Usage: Memory for transient attachment images (color attachments, depth attachments etc.), created with `VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT`. @@ -506,6 +477,43 @@ typedef enum VmaMemoryUsage Allocations with this usage are always created as dedicated - it implies #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. */ VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED = 6, + /** + Selects best memory type automatically. + This flag is recommended for most common use cases. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO = 7, + /** + Selects best memory type automatically with preference for GPU (device) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE = 8, + /** + Selects best memory type automatically with preference for CPU (host) memory. + + When using this flag, if you want to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT), + you must pass one of the flags: #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT + in VmaAllocationCreateInfo::flags. + + It can be used only with functions that let the library know `VkBufferCreateInfo` or `VkImageCreateInfo`, e.g. + vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo(), vmaFindMemoryTypeIndexForImageInfo() + and not with generic memory allocation functions. + */ + VMA_MEMORY_USAGE_AUTO_PREFER_HOST = 9, VMA_MEMORY_USAGE_MAX_ENUM = 0x7FFFFFFF } VmaMemoryUsage; @@ -566,11 +574,51 @@ typedef enum VmaAllocationCreateFlagBits */ VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT = 0x00000100, /** \brief Set this flag if the allocated memory will have aliasing resources. - * + Usage of this flag prevents supplying `VkMemoryDedicatedAllocateInfoKHR` when #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT is specified. Otherwise created dedicated memory will not be suitable for aliasing resources, resulting in Vulkan Validation Layer errors. */ VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT = 0x00000200, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory will only be written sequentially, e.g. using `memcpy()` or a loop writing number-by-number, + never read or accessed randomly, so a memory type can be selected that is uncached and write-combined. + + \warning Violating this declaration may work correctly, but will likely be very slow. + Watch out for implicit reads introduces by doing e.g. `pMappedData[i] += x;` + Better prepare your data in a local variable and `memcpy()` it to the mapped pointer all at once. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT = 0x00000400, + /** + Requests possibility to map the allocation (using vmaMapMemory() or #VMA_ALLOCATION_CREATE_MAPPED_BIT). + + - If you use #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` value, + you must use this flag to be able to map the allocation. Otherwise, mapping is incorrect. + - If you use other value of #VmaMemoryUsage, this flag is ignored and mapping is always possible in memory types that are `HOST_VISIBLE`. + This includes allocations created in \ref custom_memory_pools. + + Declares that mapped memory can be read, written, and accessed in random order, + so a `HOST_CACHED` memory type is preferred. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT = 0x00000800, + /** + Together with #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT, + it says that despite request for host access, a not-`HOST_VISIBLE` memory type can be selected + if it may improve performance. + + By using this flag, you declare that you will check if the allocation ended up in a `HOST_VISIBLE` memory type + (e.g. using vmaGetAllocationMemoryProperties()) and if not, you will create some "staging" buffer and + issue an explicit transfer to write/read your data. + To prepare for this possibility, don't forget to add appropriate flags like + `VK_BUFFER_USAGE_TRANSFER_DST_BIT`, `VK_BUFFER_USAGE_TRANSFER_SRC_BIT` to the parameters of created buffer or image. + */ + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT = 0x00001000, /** Allocation strategy that chooses smallest possible free range for the allocation to minimize memory usage and fragmentation, possibly at the expense of allocation time. */ @@ -1635,12 +1683,6 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. It internally creates a temporary, dummy buffer that never has memory bound. -It is just a convenience function, equivalent to calling: - -- `vkCreateBuffer` -- `vkGetBufferMemoryRequirements` -- `vmaFindMemoryTypeIndex` -- `vkDestroyBuffer` */ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( VmaAllocator VMA_NOT_NULL allocator, @@ -1653,12 +1695,6 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( It can be useful e.g. to determine value to be used as VmaPoolCreateInfo::memoryTypeIndex. It internally creates a temporary, dummy image that never has memory bound. -It is just a convenience function, equivalent to calling: - -- `vkCreateImage` -- `vkGetImageMemoryRequirements` -- `vmaFindMemoryTypeIndex` -- `vkDestroyImage` */ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( VmaAllocator VMA_NOT_NULL allocator, @@ -2894,6 +2930,17 @@ If providing your own implementation, you need to implement a subset of std::ato #define VMA_DEFAULT_LARGE_HEAP_BLOCK_SIZE (256ull * 1024 * 1024) #endif +/* +Mapping hysteresis is a logic that launches when vmaMapMemory/vmaUnmapMemory is called +or a persistently mapped allocation is created and destroyed several times in a row. +It keeps additional +1 mapping of a device memory block to prevent calling actual +vkMapMemory/vkUnmapMemory too many times, which may improve performance and help +tools like RenderDOc. +*/ +#ifndef VMA_MAPPING_HYSTERESIS_ENABLED + #define VMA_MAPPING_HYSTERESIS_ENABLED 1 +#endif + #ifndef VMA_CLASS_NO_COPY #define VMA_CLASS_NO_COPY(className) \ private: \ @@ -2925,6 +2972,11 @@ static const uint32_t VMA_ALLOCATION_INTERNAL_STRATEGY_MIN_OFFSET = 0x10000000u; static const uint32_t VMA_ALLOCATION_TRY_COUNT = 32; static const uint32_t VMA_VENDOR_ID_AMD = 4098; +// This one is tricky. Vulkan specification defines this code as available since +// Vulkan 1.0, but doesn't actually define it in Vulkan SDK earlier than 1.2.131. +// See pull request #207. +#define VK_ERROR_UNKNOWN_COPY ((VkResult)-13) + #if VMA_STATS_STRING_ENABLED // Correspond to values of enum VmaSuballocationType. @@ -3505,6 +3557,150 @@ static inline void VmaPnextChainPushFront(MainT* mainStruct, NewT* newStruct) mainStruct->pNext = newStruct; } +// This is the main algorithm that guides the selection of a memory type best for an allocation - +// converts usage to required/preferred/not preferred flags. +static bool FindMemoryPreferences( + bool isIntegratedGPU, + const VmaAllocationCreateInfo& allocCreateInfo, + VkFlags bufImgUsage, // VkBufferCreateInfo::usage or VkImageCreateInfo::usage. UINT32_MAX if unknown. + VkMemoryPropertyFlags& outRequiredFlags, + VkMemoryPropertyFlags& outPreferredFlags, + VkMemoryPropertyFlags& outNotPreferredFlags) +{ + outRequiredFlags = allocCreateInfo.requiredFlags; + outPreferredFlags = allocCreateInfo.preferredFlags; + outNotPreferredFlags = 0; + + switch(allocCreateInfo.usage) + { + case VMA_MEMORY_USAGE_UNKNOWN: + break; + case VMA_MEMORY_USAGE_GPU_ONLY: + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_CPU_ONLY: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + break; + case VMA_MEMORY_USAGE_CPU_TO_GPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + if(!isIntegratedGPU || (outPreferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + case VMA_MEMORY_USAGE_GPU_TO_CPU: + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + outPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + break; + case VMA_MEMORY_USAGE_CPU_COPY: + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: + outRequiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; + break; + case VMA_MEMORY_USAGE_AUTO: + case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE: + case VMA_MEMORY_USAGE_AUTO_PREFER_HOST: + { + if(bufImgUsage == UINT32_MAX) + { + VMA_ASSERT(0 && "VMA_MEMORY_USAGE_AUTO* values can only be used with functions like vmaCreateBuffer, vmaCreateImage so that the details of the created resource are known."); + return false; + } + // This relies on values of VK_IMAGE_USAGE_TRANSFER* being the same VK_BUFFER_IMAGE_TRANSFER*. + const bool deviceAccess = (bufImgUsage & ~(VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT)) != 0; + const bool hostAccessSequentialWrite = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT) != 0; + const bool hostAccessRandom = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) != 0; + const bool hostAccessAllowTransferInstead = (allocCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) != 0; + const bool preferDevice = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + const bool preferHost = allocCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + + // CPU random access - e.g. a buffer written to or transferred from GPU to read back on CPU. + if(hostAccessRandom) + { + if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + // Nice if it will end up in HOST_VISIBLE, but more importantly prefer DEVICE_LOCAL. + // Omitting HOST_VISIBLE here is intentional. + // In case there is DEVICE_LOCAL | HOST_VISIBLE | HOST_CACHED, it will pick that one. + // Otherwise, this will give same weight to DEVICE_LOCAL as HOST_VISIBLE | HOST_CACHED and select the former if occurs first on the list. + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } + else + { + // Always CPU memory, cached. + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } + } + // CPU sequential write - may be CPU or host-visible GPU memory, uncached and write-combined. + else if(hostAccessSequentialWrite) + { + // Want uncached and write-combined. + outNotPreferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + + if(!isIntegratedGPU && deviceAccess && hostAccessAllowTransferInstead && !preferHost) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + } + else + { + outRequiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + // Direct GPU access, CPU sequential write (e.g. a dynamic uniform buffer updated every frame) + if(deviceAccess) + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose GPU memory. + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + // GPU no direct access, CPU sequential write (e.g. an upload buffer to be transferred to the GPU) + else + { + // Could go to CPU memory or GPU BAR/unified. Up to the user to decide. If no preference, choose CPU memory. + if(preferDevice) + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + } + } + // No CPU access + else + { + // GPU access, no CPU access (e.g. a color attachment image) - prefer GPU memory + if(deviceAccess) + { + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + // No direct GPU access, no CPU access, just transfers. + // It may be staging copy intended for e.g. preserving image for next frame (then better GPU memory) or + // a "swap file" copy to free some GPU memory (then better CPU memory). + // Up to the user to decide. If no preferece, assume the former and choose GPU memory. + if(preferHost) + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + else + outPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + } + break; + } + default: + VMA_ASSERT(0); + } + + // Avoid DEVICE_COHERENT unless explicitly requested. + if(((allocCreateInfo.requiredFlags | allocCreateInfo.preferredFlags) & + (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) + { + outNotPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY; + } + + return true; +} + //////////////////////////////////////////////////////////////////////////////// // Memory allocation @@ -5560,12 +5756,109 @@ static void VmaPrintStatInfo(VmaJsonWriter& json, const VmaStatInfo& stat) } #endif // _VMA_JSON_WRITER +#ifndef _VMA_MAPPING_HYSTERESIS + +class VmaMappingHysteresis +{ + VMA_CLASS_NO_COPY(VmaMappingHysteresis) +public: + VmaMappingHysteresis() = default; + + uint32_t GetExtraMapping() const { return m_ExtraMapping; } + + // Call when Map was called. + // Returns true if switched to extra +1 mapping reference count. + bool PostMap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING) + { + m_ExtraMapping = 1; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + + // Call when Unmap was called. + void PostUnmap() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 0) + ++m_MajorCounter; + else // m_ExtraMapping == 1 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was made from the memory block. + void PostAlloc() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + ++m_MajorCounter; + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + } + + // Call when allocation was freed from the memory block. + // Returns true if switched to extra -1 mapping reference count. + bool PostFree() + { +#if VMA_MAPPING_HYSTERESIS_ENABLED + if(m_ExtraMapping == 1) + { + ++m_MajorCounter; + if(m_MajorCounter >= COUNTER_MIN_EXTRA_MAPPING && + m_MajorCounter > m_MinorCounter + 1) + { + m_ExtraMapping = 0; + m_MajorCounter = 0; + m_MinorCounter = 0; + return true; + } + } + else // m_ExtraMapping == 0 + PostMinorCounter(); +#endif // #if VMA_MAPPING_HYSTERESIS_ENABLED + return false; + } + +private: + static const int32_t COUNTER_MIN_EXTRA_MAPPING = 7; + + uint32_t m_MinorCounter = 0; + uint32_t m_MajorCounter = 0; + uint32_t m_ExtraMapping = 0; // 0 or 1. + + void PostMinorCounter() + { + if(m_MinorCounter < m_MajorCounter) + ++m_MinorCounter; + else if(m_MajorCounter > 0) + --m_MajorCounter, --m_MinorCounter; + } +}; + +#endif // _VMA_MAPPING_HYSTERESIS + #ifndef _VMA_DEVICE_MEMORY_BLOCK /* Represents a single block of device memory (`VkDeviceMemory`) with all the data about its regions (aka suballocations, #VmaAllocation), assigned and free. -Thread-safety: This class must be externally synchronized. +Thread-safety: +- Access to m_pMetadata must be externally synchronized. +- Map, Unmap, Bind* are synchronized internally. */ class VmaDeviceMemoryBlock { @@ -5594,6 +5887,12 @@ public: uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } uint32_t GetId() const { return m_Id; } void* GetMappedData() const { return m_pMappedData; } + uint32_t GetMapRefCount() const { return m_MapCount; } + + // Call when allocation/free was made from m_pMetadata. + // Used for m_MappingHysteresis. + void PostAlloc() { m_MappingHysteresis.PostAlloc(); } + void PostFree(VmaAllocator hAllocator); // Validates all data structures inside this object. If not valid, returns false. bool Validate() const; @@ -5630,7 +5929,8 @@ private: Also protects m_MapCount, m_pMappedData. Allocations, deallocations, any change in m_pMetadata is protected by parent's VmaBlockVector::m_Mutex. */ - VMA_MUTEX m_Mutex; + VMA_MUTEX m_MapAndBindMutex; + VmaMappingHysteresis m_MappingHysteresis; uint32_t m_MapCount; void* m_pMappedData; }; @@ -5641,9 +5941,12 @@ struct VmaAllocation_T { friend struct VmaDedicatedAllocationListItemTraits; - static const uint8_t MAP_COUNT_FLAG_PERSISTENT_MAP = 0x80; - - enum FLAGS { FLAG_USER_DATA_STRING = 0x01 }; + enum FLAGS + { + FLAG_USER_DATA_STRING = 0x01, + FLAG_PERSISTENT_MAP = 0x02, + FLAG_MAPPING_ALLOWED = 0x04, + }; public: enum ALLOCATION_TYPE @@ -5654,7 +5957,7 @@ public: }; // This struct is allocated using VmaPoolAllocator. - VmaAllocation_T(bool userDataString); + VmaAllocation_T(bool userDataString, bool mappingAllowed); ~VmaAllocation_T(); void InitBlockAllocation( @@ -5683,7 +5986,8 @@ public: VmaDeviceMemoryBlock* GetBlock() const { VMA_ASSERT(m_Type == ALLOCATION_TYPE_BLOCK); return m_BlockAllocation.m_Block; } uint32_t GetMemoryTypeIndex() const { return m_MemoryTypeIndex; } - bool IsPersistentMap() const { return (m_MapCount & MAP_COUNT_FLAG_PERSISTENT_MAP) != 0; } + bool IsPersistentMap() const { return (m_Flags & FLAG_PERSISTENT_MAP) != 0; } + bool IsMappingAllowed() const { return (m_Flags & FLAG_MAPPING_ALLOWED) != 0; } void SetUserData(VmaAllocator hAllocator, void* pUserData); void ChangeBlockAllocation(VmaAllocator hAllocator, VmaDeviceMemoryBlock* block, VmaAllocHandle allocHandle); @@ -5738,8 +6042,7 @@ private: uint32_t m_MemoryTypeIndex; uint8_t m_Type; // ALLOCATION_TYPE uint8_t m_SuballocationType; // VmaSuballocationType - // Bit 0x80 is set when allocation was created with VMA_ALLOCATION_CREATE_MAPPED_BIT. - // Bits with mask 0x7F are reference counter for vmaMapMemory()/vmaUnmapMemory(). + // Reference counter for vmaMapMemory()/vmaUnmapMemory(). uint8_t m_MapCount; uint8_t m_Flags; // enum FLAGS #if VMA_STATS_STRING_ENABLED @@ -5996,7 +6299,8 @@ public: virtual void AddPoolStats(VmaPoolStats& inoutStats) const = 0; #if VMA_STATS_STRING_ENABLED - virtual void PrintDetailedMap(class VmaJsonWriter& json) const = 0; + // mapRefCount == UINT32_MAX means unspecified. + virtual void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const = 0; #endif // Tries to find a place for suballocation with given parameters inside this block. @@ -6036,10 +6340,12 @@ protected: void DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size, void* userData) const; #if VMA_STATS_STRING_ENABLED + // mapRefCount == UINT32_MAX means unspecified. void PrintDetailedMap_Begin(class VmaJsonWriter& json, VkDeviceSize unusedBytes, size_t allocationCount, - size_t unusedRangeCount) const; + size_t unusedRangeCount, + uint32_t mapRefCount) const; void PrintDetailedMap_Allocation(class VmaJsonWriter& json, VkDeviceSize offset, VkDeviceSize size, void* userData) const; void PrintDetailedMap_UnusedRange(class VmaJsonWriter& json, @@ -6111,7 +6417,7 @@ void VmaBlockMetadata::DebugLogAllocation(VkDeviceSize offset, VkDeviceSize size #if VMA_STATS_STRING_ENABLED void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, - VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount) const + VkDeviceSize unusedBytes, size_t allocationCount, size_t unusedRangeCount, uint32_t mapRefCount) const { json.BeginObject(); @@ -6127,6 +6433,12 @@ void VmaBlockMetadata::PrintDetailedMap_Begin(class VmaJsonWriter& json, json.WriteString("UnusedRanges"); json.WriteNumber((uint64_t)unusedRangeCount); + if(mapRefCount != UINT32_MAX) + { + json.WriteString("MapRefCount"); + json.WriteNumber(mapRefCount); + } + json.WriteString("Suballocations"); json.BeginArray(); } @@ -6448,7 +6760,7 @@ public: void AddPoolStats(VmaPoolStats& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json) const override; + void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; #endif bool CreateAllocationRequest( @@ -6650,12 +6962,13 @@ void VmaBlockMetadata_Generic::AddPoolStats(VmaPoolStats& inoutStats) const } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Generic::PrintDetailedMap(class VmaJsonWriter& json) const +void VmaBlockMetadata_Generic::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const { PrintDetailedMap_Begin(json, m_SumFreeSize, // unusedBytes m_Suballocations.size() - (size_t)m_FreeCount, // allocationCount - m_FreeCount); // unusedRangeCount + m_FreeCount, // unusedRangeCount + mapRefCount); for (const auto& suballoc : m_Suballocations) { @@ -6778,7 +7091,7 @@ VkResult VmaBlockMetadata_Generic::CheckCorruption(const void* pBlockData) if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) { VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_UNKNOWN; + return VK_ERROR_UNKNOWN_COPY; } } } @@ -7291,7 +7604,7 @@ public: void AddPoolStats(VmaPoolStats& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json) const override; + void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; #endif bool CreateAllocationRequest( @@ -7879,7 +8192,7 @@ void VmaBlockMetadata_Linear::AddPoolStats(VmaPoolStats& inoutStats) const } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const +void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const { const VkDeviceSize size = GetSize(); const SuballocationVectorType& suballocations1st = AccessSuballocations1st(); @@ -8041,7 +8354,7 @@ void VmaBlockMetadata_Linear::PrintDetailedMap(class VmaJsonWriter& json) const } const VkDeviceSize unusedBytes = size - usedBytes; - PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount); + PrintDetailedMap_Begin(json, unusedBytes, alloc1stCount + alloc2ndCount, unusedRangeCount, mapRefCount); // SECOND PASS lastOffset = 0; @@ -8227,7 +8540,7 @@ VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData) if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) { VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_UNKNOWN; + return VK_ERROR_UNKNOWN_COPY; } } } @@ -8241,7 +8554,7 @@ VkResult VmaBlockMetadata_Linear::CheckCorruption(const void* pBlockData) if (!VmaValidateMagicValue(pBlockData, suballoc.offset + suballoc.size)) { VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_UNKNOWN; + return VK_ERROR_UNKNOWN_COPY; } } } @@ -8924,7 +9237,7 @@ public: void AddPoolStats(VmaPoolStats& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json) const override; + void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; #endif bool CreateAllocationRequest( @@ -9153,7 +9466,7 @@ void VmaBlockMetadata_Buddy::AddPoolStats(VmaPoolStats& inoutStats) const } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_Buddy::PrintDetailedMap(class VmaJsonWriter& json) const +void VmaBlockMetadata_Buddy::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const { VmaStatInfo stat; CalcAllocationStatInfo(stat); @@ -9162,7 +9475,8 @@ void VmaBlockMetadata_Buddy::PrintDetailedMap(class VmaJsonWriter& json) const json, stat.unusedBytes, stat.allocationCount, - stat.unusedRangeCount); + stat.unusedRangeCount, + mapRefCount); PrintDetailedMapNode(json, m_Root, LevelToNodeSize(0)); @@ -9603,7 +9917,7 @@ public: void AddPoolStats(VmaPoolStats& inoutStats) const override; #if VMA_STATS_STRING_ENABLED - void PrintDetailedMap(class VmaJsonWriter& json) const override; + void PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const override; #endif bool CreateAllocationRequest( @@ -9874,7 +10188,7 @@ void VmaBlockMetadata_TLSF::AddPoolStats(VmaPoolStats& inoutStats) const } #if VMA_STATS_STRING_ENABLED -void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const +void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json, uint32_t mapRefCount) const { size_t blockCount = m_AllocCount + m_BlocksFreeCount; VmaStlAllocator allocator(GetAllocationCallbacks()); @@ -9893,7 +10207,8 @@ void VmaBlockMetadata_TLSF::PrintDetailedMap(class VmaJsonWriter& json) const PrintDetailedMap_Begin(json, stat.unusedBytes, stat.allocationCount, - stat.unusedRangeCount); + stat.unusedRangeCount, + mapRefCount); for (; i < blockCount; ++i) { @@ -10054,7 +10369,7 @@ VkResult VmaBlockMetadata_TLSF::CheckCorruption(const void* pBlockData) if (!VmaValidateMagicValue(pBlockData, block->offset + block->size)) { VMA_ASSERT(0 && "MEMORY CORRUPTION DETECTED AFTER VALIDATED ALLOCATION!"); - return VK_ERROR_UNKNOWN; + return VK_ERROR_UNKNOWN_COPY; } } } @@ -10543,9 +10858,6 @@ private: void* const m_pMemoryAllocateNext; VMA_RW_MUTEX m_Mutex; - /* There can be at most one allocation that is completely empty (except when minBlockCount > 0) - - a hysteresis to avoid pessimistic case of alternating creation and destruction of a VkDeviceMemory. */ - bool m_HasEmptyBlock; // Incrementally sorted by sumFreeSize, ascending. VmaVector> m_Blocks; uint32_t m_NextBlockId; @@ -10590,7 +10902,7 @@ private: - updated with new data. */ void FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationStats); - void UpdateHasEmptyBlock(); + bool HasEmptyBlock() const; }; #endif // _VMA_BLOCK_VECTOR @@ -11158,7 +11470,8 @@ void VmaVirtualBlock_T::BuildStatsString(bool detailedMap, VmaStringBuilder& sb) if (detailedMap) { json.WriteString("Details"); - m_Metadata->PrintDetailedMap(json); + m_Metadata->PrintDetailedMap(json, + UINT32_MAX); // mapRefCount } json.EndObject(); @@ -11261,6 +11574,11 @@ public: VkMemoryRequirements& memReq, bool& requiresDedicatedAllocation, bool& prefersDedicatedAllocation) const; + VkResult FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkFlags bufImgUsage, // VkBufferCreateInfo::usage or VkImageCreateInfo::usage. UINT32_MAX if unknown. + uint32_t* pMemoryTypeIndex) const; // Main allocation function. VkResult AllocateMemory( @@ -11268,8 +11586,8 @@ public: bool requiresDedicatedAllocation, bool prefersDedicatedAllocation, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, // UINT32_MAX when unknown. VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, // UINT32_MAX if unknown. const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, size_t allocationCount, @@ -11413,8 +11731,8 @@ private: VkDeviceSize alignment, bool dedicatedPreferred, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, uint32_t memTypeIndex, VmaSuballocationType suballocType, @@ -11432,6 +11750,7 @@ private: const VkMemoryAllocateInfo& allocInfo, bool map, bool isUserDataString, + bool isMappingAllowed, void* pUserData, VmaAllocation* pAllocation); @@ -11444,12 +11763,13 @@ private: uint32_t memTypeIndex, bool map, bool isUserDataString, + bool isMappingAllowed, bool canAliasMemory, void* pUserData, float priority, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, size_t allocationCount, VmaAllocation* pAllocations, const void* pNextChain = nullptr); @@ -11602,6 +11922,19 @@ void VmaDeviceMemoryBlock::Destroy(VmaAllocator allocator) m_pMetadata = VMA_NULL; } +void VmaDeviceMemoryBlock::PostFree(VmaAllocator hAllocator) +{ + if(m_MappingHysteresis.PostFree()) + { + VMA_ASSERT(m_MappingHysteresis.GetExtraMapping() == 0); + if (m_MapCount == 0) + { + m_pMappedData = VMA_NULL; + (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); + } + } +} + bool VmaDeviceMemoryBlock::Validate() const { VMA_VALIDATE((m_hMemory != VK_NULL_HANDLE) && @@ -11633,8 +11966,10 @@ VkResult VmaDeviceMemoryBlock::Map(VmaAllocator hAllocator, uint32_t count, void return VK_SUCCESS; } - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); - if (m_MapCount != 0) + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); + const uint32_t oldTotalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + m_MappingHysteresis.PostMap(); + if (oldTotalMapCount != 0) { m_MapCount += count; VMA_ASSERT(m_pMappedData != VMA_NULL); @@ -11672,15 +12007,17 @@ void VmaDeviceMemoryBlock::Unmap(VmaAllocator hAllocator, uint32_t count) return; } - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); if (m_MapCount >= count) { m_MapCount -= count; - if (m_MapCount == 0) + const uint32_t totalMapCount = m_MapCount + m_MappingHysteresis.GetExtraMapping(); + if (totalMapCount == 0) { m_pMappedData = VMA_NULL; (*hAllocator->GetVulkanFunctions().vkUnmapMemory)(hAllocator->m_hDevice, m_hMemory); } + m_MappingHysteresis.PostUnmap(); } else { @@ -11738,7 +12075,7 @@ VkResult VmaDeviceMemoryBlock::BindBufferMemory( "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); return hAllocator->BindVulkanBuffer(m_hMemory, memoryOffset, hBuffer, pNext); } @@ -11755,13 +12092,13 @@ VkResult VmaDeviceMemoryBlock::BindImageMemory( "Invalid allocationLocalOffset. Did you forget that this offset is relative to the beginning of the allocation, not the whole memory block?"); const VkDeviceSize memoryOffset = hAllocation->GetOffset() + allocationLocalOffset; // This lock is important so that we don't call vkBind... and/or vkMap... simultaneously on the same VkDeviceMemory from multiple threads. - VmaMutexLock lock(m_Mutex, hAllocator->m_UseMutex); + VmaMutexLock lock(m_MapAndBindMutex, hAllocator->m_UseMutex); return hAllocator->BindVulkanImage(m_hMemory, memoryOffset, hImage, pNext); } #endif // _VMA_DEVICE_MEMORY_BLOCK_FUNCTIONS #ifndef _VMA_ALLOCATION_T_FUNCTIONS -VmaAllocation_T::VmaAllocation_T(bool userDataString) +VmaAllocation_T::VmaAllocation_T(bool userDataString, bool mappingAllowed) : m_Alignment{ 1 }, m_Size{ 0 }, m_pUserData{ VMA_NULL }, @@ -11769,8 +12106,13 @@ VmaAllocation_T::VmaAllocation_T(bool userDataString) m_Type{ (uint8_t)ALLOCATION_TYPE_NONE }, m_SuballocationType{ (uint8_t)VMA_SUBALLOCATION_TYPE_UNKNOWN }, m_MapCount{ 0 }, - m_Flags{ userDataString ? (uint8_t)FLAG_USER_DATA_STRING : (uint8_t)0 } + m_Flags{ 0 } { + if(userDataString) + m_Flags |= (uint8_t)FLAG_USER_DATA_STRING; + if(mappingAllowed) + m_Flags |= (uint8_t)FLAG_MAPPING_ALLOWED; + #if VMA_STATS_STRING_ENABLED m_BufferImageUsage = 0; #endif @@ -11778,7 +12120,7 @@ VmaAllocation_T::VmaAllocation_T(bool userDataString) VmaAllocation_T::~VmaAllocation_T() { - VMA_ASSERT((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) == 0 && "Allocation was not unmapped before destruction."); + VMA_ASSERT(m_MapCount == 0 && "Allocation was not unmapped before destruction."); // Check if owned string was freed. VMA_ASSERT(m_pUserData == VMA_NULL); @@ -11799,7 +12141,11 @@ void VmaAllocation_T::InitBlockAllocation( m_Alignment = alignment; m_Size = size; m_MemoryTypeIndex = memoryTypeIndex; - m_MapCount = mapped ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; + if(mapped) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + } m_SuballocationType = (uint8_t)suballocationType; m_BlockAllocation.m_Block = block; m_BlockAllocation.m_AllocHandle = allocHandle; @@ -11820,7 +12166,11 @@ void VmaAllocation_T::InitDedicatedAllocation( m_Size = size; m_MemoryTypeIndex = memoryTypeIndex; m_SuballocationType = (uint8_t)suballocationType; - m_MapCount = (pMappedData != VMA_NULL) ? MAP_COUNT_FLAG_PERSISTENT_MAP : 0; + if(pMappedData != VMA_NULL) + { + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); + m_Flags |= (uint8_t)FLAG_PERSISTENT_MAP; + } m_DedicatedAllocation.m_hParentPool = hParentPool; m_DedicatedAllocation.m_hMemory = hMemory; m_DedicatedAllocation.m_pMappedData = pMappedData; @@ -11858,7 +12208,7 @@ void VmaAllocation_T::ChangeBlockAllocation( // Move mapping reference counter from old block to new block. if (block != m_BlockAllocation.m_Block) { - uint32_t mapRefCount = m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP; + uint32_t mapRefCount = m_MapCount; if (IsPersistentMap()) ++mapRefCount; m_BlockAllocation.m_Block->Unmap(hAllocator, mapRefCount); @@ -11936,7 +12286,7 @@ void* VmaAllocation_T::GetMappedData() const switch (m_Type) { case ALLOCATION_TYPE_BLOCK: - if (m_MapCount != 0) + if (m_MapCount != 0 || IsPersistentMap()) { void* pBlockData = m_BlockAllocation.m_Block->GetMappedData(); VMA_ASSERT(pBlockData != VMA_NULL); @@ -11948,7 +12298,7 @@ void* VmaAllocation_T::GetMappedData() const } break; case ALLOCATION_TYPE_DEDICATED: - VMA_ASSERT((m_DedicatedAllocation.m_pMappedData != VMA_NULL) == (m_MapCount != 0)); + VMA_ASSERT((m_DedicatedAllocation.m_pMappedData != VMA_NULL) == (m_MapCount != 0 || IsPersistentMap())); return m_DedicatedAllocation.m_pMappedData; default: VMA_ASSERT(0); @@ -11972,8 +12322,9 @@ void VmaAllocation_T::DedicatedAllocCalcStatsInfo(VmaStatInfo& outInfo) void VmaAllocation_T::BlockAllocMap() { VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); - if ((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) < 0x7F) + if (m_MapCount < 0xFF) { ++m_MapCount; } @@ -11987,7 +12338,7 @@ void VmaAllocation_T::BlockAllocUnmap() { VMA_ASSERT(GetType() == ALLOCATION_TYPE_BLOCK); - if ((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) != 0) + if (m_MapCount > 0) { --m_MapCount; } @@ -12000,10 +12351,11 @@ void VmaAllocation_T::BlockAllocUnmap() VkResult VmaAllocation_T::DedicatedAllocMap(VmaAllocator hAllocator, void** ppData) { VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); + VMA_ASSERT(IsMappingAllowed() && "Mapping is not allowed on this allocation! Please use one of the new VMA_ALLOCATION_CREATE_HOST_ACCESS_* flags when creating it."); - if (m_MapCount != 0) + if (m_MapCount != 0 || IsPersistentMap()) { - if ((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) < 0x7F) + if (m_MapCount < 0xFF) { VMA_ASSERT(m_DedicatedAllocation.m_pMappedData != VMA_NULL); *ppData = m_DedicatedAllocation.m_pMappedData; @@ -12038,10 +12390,10 @@ void VmaAllocation_T::DedicatedAllocUnmap(VmaAllocator hAllocator) { VMA_ASSERT(GetType() == ALLOCATION_TYPE_DEDICATED); - if ((m_MapCount & ~MAP_COUNT_FLAG_PERSISTENT_MAP) != 0) + if (m_MapCount > 0) { --m_MapCount; - if (m_MapCount == 0) + if (m_MapCount == 0 && !IsPersistentMap()) { m_DedicatedAllocation.m_pMappedData = VMA_NULL; (*hAllocator->GetVulkanFunctions().vkUnmapMemory)( @@ -12127,7 +12479,6 @@ VmaBlockVector::VmaBlockVector( m_Priority(priority), m_MinAllocationAlignment(minAllocationAlignment), m_pMemoryAllocateNext(pMemoryAllocateNext), - m_HasEmptyBlock(false), m_Blocks(VmaStlAllocator(hAllocator->GetAllocationCallbacks())), m_NextBlockId(0) {} @@ -12245,8 +12596,6 @@ VkResult VmaBlockVector::AllocatePage( VmaAllocation* pAllocation) { const bool isUpperAddress = (createInfo.flags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; - const bool mapped = (createInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; - const bool isUserDataString = (createInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; VkDeviceSize freeMemory; { @@ -12286,17 +12635,11 @@ VkResult VmaBlockVector::AllocatePage( VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks.back(); VMA_ASSERT(pCurrBlock); VkResult res = AllocateFromBlock( - pCurrBlock, - size, - alignment, - createInfo.flags, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { VMA_DEBUG_LOG(" Returned from last block #%u", pCurrBlock->GetId()); + IncrementallySortBlocks(); return VK_SUCCESS; } } @@ -12305,24 +12648,55 @@ VkResult VmaBlockVector::AllocatePage( { if (strategy != VMA_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT) // MIN_MEMORY or default { - // Forward order in m_Blocks - prefer blocks with smallest amount of free space. - for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + const bool isHostVisible = + (m_hAllocator->m_MemProps.memoryTypes[m_MemoryTypeIndex].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0; + if(isHostVisible) { - VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; - VMA_ASSERT(pCurrBlock); - VkResult res = AllocateFromBlock( - pCurrBlock, - size, - alignment, - createInfo.flags, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); - if (res == VK_SUCCESS) + const bool isMappingAllowed = (createInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; + /* + For non-mappable allocations, check blocks that are not mapped first. + For mappable allocations, check blocks that are already mapped first. + This way, having many blocks, we will separate mappable and non-mappable allocations, + hopefully limiting the number of blocks that are mapped, which will help tools like RenderDoc. + */ + for(size_t mappingI = 0; mappingI < 2; ++mappingI) { - VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); - return VK_SUCCESS; + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + const bool isBlockMapped = pCurrBlock->GetMappedData() != VMA_NULL; + if((mappingI == 0) == (isMappingAllowed == isBlockMapped)) + { + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } + } + } + } + } + else + { + // Forward order in m_Blocks - prefer blocks with smallest amount of free space. + for (size_t blockIndex = 0; blockIndex < m_Blocks.size(); ++blockIndex) + { + VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; + VMA_ASSERT(pCurrBlock); + VkResult res = AllocateFromBlock( + pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); + if (res == VK_SUCCESS) + { + VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); + IncrementallySortBlocks(); + return VK_SUCCESS; + } } } } @@ -12333,18 +12707,11 @@ VkResult VmaBlockVector::AllocatePage( { VmaDeviceMemoryBlock* const pCurrBlock = m_Blocks[blockIndex]; VMA_ASSERT(pCurrBlock); - VkResult res = AllocateFromBlock( - pCurrBlock, - size, - alignment, - createInfo.flags, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); + VkResult res = AllocateFromBlock(pCurrBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { VMA_DEBUG_LOG(" Returned from existing block #%u", pCurrBlock->GetId()); + IncrementallySortBlocks(); return VK_SUCCESS; } } @@ -12407,17 +12774,11 @@ VkResult VmaBlockVector::AllocatePage( VMA_ASSERT(pBlock->m_pMetadata->GetSize() >= size); res = AllocateFromBlock( - pBlock, - size, - alignment, - createInfo.flags, - createInfo.pUserData, - suballocType, - strategy, - pAllocation); + pBlock, size, alignment, createInfo.flags, createInfo.pUserData, suballocType, strategy, pAllocation); if (res == VK_SUCCESS) { VMA_DEBUG_LOG(" Created new block #%u Size=%llu", pBlock->GetId(), newBlockSize); + IncrementallySortBlocks(); return VK_SUCCESS; } else @@ -12461,7 +12822,9 @@ void VmaBlockVector::Free( pBlock->Unmap(m_hAllocator, 1); } + const bool hadEmptyBlockBeforeFree = HasEmptyBlock(); pBlock->m_pMetadata->Free(hAllocation->GetAllocHandle()); + pBlock->PostFree(m_hAllocator); VMA_HEAVY_ASSERT(pBlock->Validate()); VMA_DEBUG_LOG(" Freed from MemoryTypeIndex=%u", m_MemoryTypeIndex); @@ -12470,17 +12833,17 @@ void VmaBlockVector::Free( // pBlock became empty after this deallocation. if (pBlock->m_pMetadata->IsEmpty()) { - // Already has empty block. We don't want to have two, so delete this one. - if ((m_HasEmptyBlock || budgetExceeded) && canDeleteBlock) + // Already had empty block. We don't want to have two, so delete this one. + if ((hadEmptyBlockBeforeFree || budgetExceeded) && canDeleteBlock) { pBlockToDelete = pBlock; Remove(pBlock); } - // else: We now have an empty block - leave it. + // else: We now have one empty block - leave it. A hysteresis to avoid allocating whole block back and forth. } // pBlock didn't become empty, but we have another empty block - find and free that one. // (This is optional, heuristics.) - else if (m_HasEmptyBlock && canDeleteBlock) + else if (hadEmptyBlockBeforeFree && canDeleteBlock) { VmaDeviceMemoryBlock* pLastBlock = m_Blocks.back(); if (pLastBlock->m_pMetadata->IsEmpty()) @@ -12490,7 +12853,6 @@ void VmaBlockVector::Free( } } - UpdateHasEmptyBlock(); IncrementallySortBlocks(); } @@ -12560,6 +12922,8 @@ VkResult VmaBlockVector::AllocateFromBlock( const bool isUpperAddress = (allocFlags & VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT) != 0; const bool mapped = (allocFlags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0; const bool isUserDataString = (allocFlags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0; + const bool isMappingAllowed = (allocFlags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0; VmaAllocationRequest currRequest = {}; if (pBlock->m_pMetadata->CreateAllocationRequest( @@ -12570,6 +12934,8 @@ VkResult VmaBlockVector::AllocateFromBlock( strategy, &currRequest)) { + pBlock->PostAlloc(); + // Allocate from pCurrBlock. if (mapped) { @@ -12580,9 +12946,8 @@ VkResult VmaBlockVector::AllocateFromBlock( } } - *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isUserDataString); + *pAllocation = m_hAllocator->m_AllocationObjectAllocator.Allocate(isUserDataString, isMappingAllowed); pBlock->m_pMetadata->Alloc(currRequest, suballocType, *pAllocation); - UpdateHasEmptyBlock(); (*pAllocation)->InitBlockAllocation( pBlock, currRequest.allocHandle, @@ -12894,21 +13259,19 @@ void VmaBlockVector::FreeEmptyBlocks(VmaDefragmentationStats* pDefragmentationSt } } } - UpdateHasEmptyBlock(); } -void VmaBlockVector::UpdateHasEmptyBlock() +bool VmaBlockVector::HasEmptyBlock() const { - m_HasEmptyBlock = false; for (size_t index = 0, count = m_Blocks.size(); index < count; ++index) { VmaDeviceMemoryBlock* const pBlock = m_Blocks[index]; if (pBlock->m_pMetadata->IsEmpty()) { - m_HasEmptyBlock = true; - break; + return true; } } + return false; } #if VMA_STATS_STRING_ENABLED @@ -12967,7 +13330,7 @@ void VmaBlockVector::PrintDetailedMap(class VmaJsonWriter& json) json.ContinueString(m_Blocks[i]->GetId()); json.EndString(); - m_Blocks[i]->m_pMetadata->PrintDetailedMap(json); + m_Blocks[i]->m_pMetadata->PrintDetailedMap(json, m_Blocks[i]->GetMapRefCount()); } json.EndObject(); } @@ -14946,8 +15309,8 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( VkDeviceSize alignment, bool dedicatedPreferred, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, uint32_t memTypeIndex, VmaSuballocationType suballocType, @@ -14978,12 +15341,14 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( memTypeIndex, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, finalCreateInfo.pUserData, finalCreateInfo.priority, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, + dedicatedBufferImageUsage, allocationCount, pAllocations, blockVector.GetAllocationNextPtr()); @@ -15019,12 +15384,14 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( memTypeIndex, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, finalCreateInfo.pUserData, finalCreateInfo.priority, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, + dedicatedBufferImageUsage, allocationCount, pAllocations, blockVector.GetAllocationNextPtr()); @@ -15058,12 +15425,14 @@ VkResult VmaAllocator_T::AllocateMemoryOfType( memTypeIndex, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT) != 0, + (finalCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0, (finalCreateInfo.flags & VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT) != 0, finalCreateInfo.pUserData, finalCreateInfo.priority, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, + dedicatedBufferImageUsage, allocationCount, pAllocations, blockVector.GetAllocationNextPtr()); @@ -15088,12 +15457,13 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( uint32_t memTypeIndex, bool map, bool isUserDataString, + bool isMappingAllowed, bool canAliasMemory, void* pUserData, float priority, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, size_t allocationCount, VmaAllocation* pAllocations, const void* pNextChain) @@ -15133,8 +15503,8 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( bool canContainBufferWithDeviceAddress = true; if(dedicatedBuffer != VK_NULL_HANDLE) { - canContainBufferWithDeviceAddress = dedicatedBufferUsage == UINT32_MAX || // Usage flags unknown - (dedicatedBufferUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) != 0; + canContainBufferWithDeviceAddress = dedicatedBufferImageUsage == UINT32_MAX || // Usage flags unknown + (dedicatedBufferImageUsage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_EXT) != 0; } else if(dedicatedImage != VK_NULL_HANDLE) { @@ -15179,6 +15549,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemory( allocInfo, map, isUserDataString, + isMappingAllowed, pUserData, pAllocations + allocIndex); if(res != VK_SUCCESS) @@ -15233,6 +15604,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( const VkMemoryAllocateInfo& allocInfo, bool map, bool isUserDataString, + bool isMappingAllowed, void* pUserData, VmaAllocation* pAllocation) { @@ -15262,7 +15634,7 @@ VkResult VmaAllocator_T::AllocateDedicatedMemoryPage( } } - *pAllocation = m_AllocationObjectAllocator.Allocate(isUserDataString); + *pAllocation = m_AllocationObjectAllocator.Allocate(isUserDataString, isMappingAllowed); (*pAllocation)->InitDedicatedAllocation(pool, memTypeIndex, hMemory, suballocType, pMappedData, size); (*pAllocation)->SetUserData(this, pUserData); m_Budget.AddAllocation(MemoryTypeIndexToHeapIndex(memTypeIndex), size); @@ -15338,6 +15710,62 @@ void VmaAllocator_T::GetImageMemoryRequirements( } } +VkResult VmaAllocator_T::FindMemoryTypeIndex( + uint32_t memoryTypeBits, + const VmaAllocationCreateInfo* pAllocationCreateInfo, + VkFlags bufImgUsage, + uint32_t* pMemoryTypeIndex) const +{ + memoryTypeBits &= GetGlobalMemoryTypeBits(); + + if(pAllocationCreateInfo->memoryTypeBits != 0) + { + memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; + } + + VkMemoryPropertyFlags requiredFlags = 0, preferredFlags = 0, notPreferredFlags = 0; + if(!FindMemoryPreferences( + IsIntegratedGpu(), + *pAllocationCreateInfo, + bufImgUsage, + requiredFlags, preferredFlags, notPreferredFlags)) + { + return VK_ERROR_FEATURE_NOT_PRESENT; + } + + *pMemoryTypeIndex = UINT32_MAX; + uint32_t minCost = UINT32_MAX; + for(uint32_t memTypeIndex = 0, memTypeBit = 1; + memTypeIndex < GetMemoryTypeCount(); + ++memTypeIndex, memTypeBit <<= 1) + { + // This memory type is acceptable according to memoryTypeBits bitmask. + if((memTypeBit & memoryTypeBits) != 0) + { + const VkMemoryPropertyFlags currFlags = + m_MemProps.memoryTypes[memTypeIndex].propertyFlags; + // This memory type contains requiredFlags. + if((requiredFlags & ~currFlags) == 0) + { + // Calculate cost as number of bits from preferredFlags not present in this memory type. + uint32_t currCost = VmaCountBitsSet(preferredFlags & ~currFlags) + + VmaCountBitsSet(currFlags & notPreferredFlags); + // Remember memory type with lowest cost. + if(currCost < minCost) + { + *pMemoryTypeIndex = memTypeIndex; + if(currCost == 0) + { + return VK_SUCCESS; + } + minCost = currCost; + } + } + } + } + return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; +} + VkResult VmaAllocator_T::CalcMemTypeParams( VmaAllocationCreateInfo& inoutCreateInfo, uint32_t memTypeIndex, @@ -15370,8 +15798,24 @@ VkResult VmaAllocator_T::CalcAllocationParams( bool dedicatedRequired, bool dedicatedPreferred) { + VMA_ASSERT((inoutCreateInfo.flags & + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != + (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT) && + "Specifying both flags VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT and VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT is incorrect."); + VMA_ASSERT((((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT) == 0 || + (inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0)) && + "Specifying VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT requires also VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + if(inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE || inoutCreateInfo.usage == VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & VMA_ALLOCATION_CREATE_MAPPED_BIT) != 0) + { + VMA_ASSERT((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) != 0 && + "When using VMA_ALLOCATION_CREATE_MAPPED_BIT and usage = VMA_MEMORY_USAGE_AUTO*, you must also specify VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT."); + } + } + + // If memory is lazily allocated, it should be always dedicated. if(dedicatedRequired || - // If memory is lazily allocated, it should be always dedicated. inoutCreateInfo.usage == VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED) { inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; @@ -15400,6 +15844,21 @@ VkResult VmaAllocator_T::CalcAllocationParams( { inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; } + + // Non-auto USAGE values imply HOST_ACCESS flags. + // And so does VMA_MEMORY_USAGE_UNKNOWN because it is used with custom pools. + // Which specific flag is used doesn't matter. They change things only when used with VMA_MEMORY_USAGE_AUTO*. + // Otherwise they just protect from assert on mapping. + if(inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE && + inoutCreateInfo.usage != VMA_MEMORY_USAGE_AUTO_PREFER_HOST) + { + if((inoutCreateInfo.flags & (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT)) == 0) + { + inoutCreateInfo.flags |= VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + } + } + return VK_SUCCESS; } @@ -15408,8 +15867,8 @@ VkResult VmaAllocator_T::AllocateMemory( bool requiresDedicatedAllocation, bool prefersDedicatedAllocation, VkBuffer dedicatedBuffer, - VkBufferUsageFlags dedicatedBufferUsage, VkImage dedicatedImage, + VkFlags dedicatedBufferImageUsage, const VmaAllocationCreateInfo& createInfo, VmaSuballocationType suballocType, size_t allocationCount, @@ -15438,8 +15897,8 @@ VkResult VmaAllocator_T::AllocateMemory( vkMemReq.alignment, prefersDedicatedAllocation, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, + dedicatedBufferImageUsage, createInfoFinal, blockVector.GetMemoryTypeIndex(), suballocType, @@ -15453,7 +15912,7 @@ VkResult VmaAllocator_T::AllocateMemory( // Bit mask of memory Vulkan types acceptable for this allocation. uint32_t memoryTypeBits = vkMemReq.memoryTypeBits; uint32_t memTypeIndex = UINT32_MAX; - res = vmaFindMemoryTypeIndex(this, memoryTypeBits, &createInfoFinal, &memTypeIndex); + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); // Can't find any single memory type matching requirements. res is VK_ERROR_FEATURE_NOT_PRESENT. if(res != VK_SUCCESS) return res; @@ -15467,8 +15926,8 @@ VkResult VmaAllocator_T::AllocateMemory( vkMemReq.alignment, requiresDedicatedAllocation || prefersDedicatedAllocation, dedicatedBuffer, - dedicatedBufferUsage, dedicatedImage, + dedicatedBufferImageUsage, createInfoFinal, memTypeIndex, suballocType, @@ -15483,7 +15942,7 @@ VkResult VmaAllocator_T::AllocateMemory( // Remove old memTypeIndex from list of possibilities. memoryTypeBits &= ~(1u << memTypeIndex); // Find alternative memTypeIndex. - res = vmaFindMemoryTypeIndex(this, memoryTypeBits, &createInfoFinal, &memTypeIndex); + res = FindMemoryTypeIndex(memoryTypeBits, &createInfoFinal, dedicatedBufferImageUsage, &memTypeIndex); } while(res == VK_SUCCESS); // No other matching memory type index could be found. @@ -16702,91 +17161,7 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndex( VMA_ASSERT(pAllocationCreateInfo != VMA_NULL); VMA_ASSERT(pMemoryTypeIndex != VMA_NULL); - memoryTypeBits &= allocator->GetGlobalMemoryTypeBits(); - - if(pAllocationCreateInfo->memoryTypeBits != 0) - { - memoryTypeBits &= pAllocationCreateInfo->memoryTypeBits; - } - - uint32_t requiredFlags = pAllocationCreateInfo->requiredFlags; - uint32_t preferredFlags = pAllocationCreateInfo->preferredFlags; - uint32_t notPreferredFlags = 0; - - // Convert usage to requiredFlags and preferredFlags. - switch(pAllocationCreateInfo->usage) - { - case VMA_MEMORY_USAGE_UNKNOWN: - break; - case VMA_MEMORY_USAGE_GPU_ONLY: - if(!allocator->IsIntegratedGpu() || (preferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) - { - preferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - } - break; - case VMA_MEMORY_USAGE_CPU_ONLY: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - break; - case VMA_MEMORY_USAGE_CPU_TO_GPU: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - if(!allocator->IsIntegratedGpu() || (preferredFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) == 0) - { - preferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - } - break; - case VMA_MEMORY_USAGE_GPU_TO_CPU: - requiredFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - preferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - break; - case VMA_MEMORY_USAGE_CPU_COPY: - notPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - break; - case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: - requiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; - break; - default: - VMA_ASSERT(0); - break; - } - - // Avoid DEVICE_COHERENT unless explicitly requested. - if(((pAllocationCreateInfo->requiredFlags | pAllocationCreateInfo->preferredFlags) & - (VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY | VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD_COPY)) == 0) - { - notPreferredFlags |= VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD_COPY; - } - - *pMemoryTypeIndex = UINT32_MAX; - uint32_t minCost = UINT32_MAX; - for(uint32_t memTypeIndex = 0, memTypeBit = 1; - memTypeIndex < allocator->GetMemoryTypeCount(); - ++memTypeIndex, memTypeBit <<= 1) - { - // This memory type is acceptable according to memoryTypeBits bitmask. - if((memTypeBit & memoryTypeBits) != 0) - { - const VkMemoryPropertyFlags currFlags = - allocator->m_MemProps.memoryTypes[memTypeIndex].propertyFlags; - // This memory type contains requiredFlags. - if((requiredFlags & ~currFlags) == 0) - { - // Calculate cost as number of bits from preferredFlags not present in this memory type. - uint32_t currCost = VmaCountBitsSet(preferredFlags & ~currFlags) + - VmaCountBitsSet(currFlags & notPreferredFlags); - // Remember memory type with lowest cost. - if(currCost < minCost) - { - *pMemoryTypeIndex = memTypeIndex; - if(currCost == 0) - { - return VK_SUCCESS; - } - minCost = currCost; - } - } - } - } - return (*pMemoryTypeIndex != UINT32_MAX) ? VK_SUCCESS : VK_ERROR_FEATURE_NOT_PRESENT; + return allocator->FindMemoryTypeIndex(memoryTypeBits, pAllocationCreateInfo, UINT32_MAX, pMemoryTypeIndex); } VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( @@ -16811,10 +17186,10 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForBufferInfo( funcs->vkGetBufferMemoryRequirements( hDev, hBuffer, &memReq); - res = vmaFindMemoryTypeIndex( - allocator, + res = allocator->FindMemoryTypeIndex( memReq.memoryTypeBits, pAllocationCreateInfo, + pBufferCreateInfo->usage, pMemoryTypeIndex); funcs->vkDestroyBuffer( @@ -16845,10 +17220,10 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaFindMemoryTypeIndexForImageInfo( funcs->vkGetImageMemoryRequirements( hDev, hImage, &memReq); - res = vmaFindMemoryTypeIndex( - allocator, + res = allocator->FindMemoryTypeIndex( memReq.memoryTypeBits, pAllocationCreateInfo, + pImageCreateInfo->usage, pMemoryTypeIndex); funcs->vkDestroyImage( @@ -16958,8 +17333,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemory( false, // requiresDedicatedAllocation false, // prefersDedicatedAllocation VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + UINT32_MAX, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_UNKNOWN, 1, // allocationCount @@ -16997,8 +17372,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryPages( false, // requiresDedicatedAllocation false, // prefersDedicatedAllocation VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + UINT32_MAX, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_UNKNOWN, allocationCount, @@ -17040,8 +17415,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForBuffer( requiresDedicatedAllocation, prefersDedicatedAllocation, buffer, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + UINT32_MAX, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount @@ -17079,8 +17454,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaAllocateMemoryForImage( requiresDedicatedAllocation, prefersDedicatedAllocation, VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage image, // dedicatedImage + UINT32_MAX, // dedicatedBufferImageUsage *pCreateInfo, VMA_SUBALLOCATION_TYPE_IMAGE_UNKNOWN, 1, // allocationCount @@ -17515,8 +17890,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBuffer( requiresDedicatedAllocation, prefersDedicatedAllocation, *pBuffer, // dedicatedBuffer - pBufferCreateInfo->usage, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + pBufferCreateInfo->usage, // dedicatedBufferImageUsage *pAllocationCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount @@ -17610,8 +17985,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateBufferWithAlignment( requiresDedicatedAllocation, prefersDedicatedAllocation, *pBuffer, // dedicatedBuffer - pBufferCreateInfo->usage, // dedicatedBufferUsage VK_NULL_HANDLE, // dedicatedImage + pBufferCreateInfo->usage, // dedicatedBufferImageUsage *pAllocationCreateInfo, VMA_SUBALLOCATION_TYPE_BUFFER, 1, // allocationCount @@ -17731,8 +18106,8 @@ VMA_CALL_PRE VkResult VMA_CALL_POST vmaCreateImage( requiresDedicatedAllocation, prefersDedicatedAllocation, VK_NULL_HANDLE, // dedicatedBuffer - UINT32_MAX, // dedicatedBufferUsage *pImage, // dedicatedImage + pImageCreateInfo->usage, // dedicatedBufferImageUsage *pAllocationCreateInfo, suballocType, 1, // allocationCount @@ -18022,7 +18397,7 @@ bufferInfo.size = 65536; bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocInfo = {}; -allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; VkBuffer buffer; VmaAllocation allocation; @@ -18046,8 +18421,8 @@ appropriate members of VmaAllocationCreateInfo structure, as described below. You can also combine multiple methods. -# If you just want to find memory type index that meets your requirements, you - can use function: vmaFindMemoryTypeIndex(), vmaFindMemoryTypeIndexForBufferInfo(), - vmaFindMemoryTypeIndexForImageInfo(). + can use function: vmaFindMemoryTypeIndexForBufferInfo(), + vmaFindMemoryTypeIndexForImageInfo(), vmaFindMemoryTypeIndex(). -# If you want to allocate a region of device memory without association with any specific image or buffer, you can use function vmaAllocateMemory(). Usage of this function is not recommended and usually not needed. @@ -18058,9 +18433,10 @@ You can also combine multiple methods. vmaAllocateMemoryForBuffer(), vmaAllocateMemoryForImage(). For binding you should use functions: vmaBindBufferMemory(), vmaBindImageMemory() or their extended versions: vmaBindBufferMemory2(), vmaBindImageMemory2(). --# If you want to create a buffer or an image, allocate memory for it and bind +-# **This is the easiest and recommended way to use this library:** + If you want to create a buffer or an image, allocate memory for it and bind them together, all in one call, you can use function vmaCreateBuffer(), - vmaCreateImage(). This is the easiest and recommended way to use this library. + vmaCreateImage(). When using 3. or 4., the library internally queries Vulkan for memory types supported for that buffer or image (function `vkGetBufferMemoryRequirements()`) @@ -18078,11 +18454,12 @@ It is valid, although not very useful. The easiest way to specify memory requirements is to fill member VmaAllocationCreateInfo::usage using one of the values of enum #VmaMemoryUsage. It defines high level, common usage types. -For more details, see description of this enum. +Since version 3 of the library, it is recommended to use #VMA_MEMORY_USAGE_AUTO to let it select best memory type for your resource automatically. For example, if you want to create a uniform buffer that will be filled using -transfer only once or infrequently and used for rendering every frame, you can -do it using following code: +transfer only once or infrequently and then used for rendering every frame as a uniform buffer, you can +do it using following code. The buffer will most likely end up in a memory type with +`VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT` to be fast to access by the GPU device. \code VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; @@ -18090,13 +18467,56 @@ bufferInfo.size = 65536; bufferInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocInfo = {}; -allocInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocInfo.usage = VMA_MEMORY_USAGE_AUTO; VkBuffer buffer; VmaAllocation allocation; vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullptr); \endcode +If you have a preference for putting the resource in GPU (device) memory or CPU (host) memory +on systems with discrete graphics card that have the memories separate, you can use +#VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST. + +When using `VMA_MEMORY_USAGE_AUTO*` while you want to map the allocated memory, +you also need to specify one of the host access flags: +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +This will help the library decide about preferred memory type to ensure it has `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +so you can map it. + +For example, a staging buffer that will be filled via mapped pointer and then +used as a source of transfer to the buffer decribed previously can be created like this. +It will likely and up in a memory type that is `HOST_VISIBLE` and `HOST_COHERENT` +but not `HOST_CACHED` (meaning uncached, write-combined) and not `DEVICE_LOCAL` (meaning system RAM). + +\code +VkBufferCreateInfo stagingBufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +stagingBufferInfo.size = 65536; +stagingBufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + +VmaAllocationCreateInfo stagingAllocInfo = {}; +stagingAllocInfo.usage = VMA_MEMORY_USAGE_AUTO; +stagingAllocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + +VkBuffer stagingBuffer; +VmaAllocation stagingAllocation; +vmaCreateBuffer(allocator, &stagingBufferInfo, &stagingAllocInfo, &stagingBuffer, &stagingAllocation, nullptr); +\endcode + +For more examples of creating different kinds of resources, see chapter \ref usage_patterns. + +Usage values `VMA_MEMORY_USAGE_AUTO*` are legal to use only when the library knows +about the resource being created by having `VkBufferCreateInfo` / `VkImageCreateInfo` passed, +so they work with functions like: vmaCreateBuffer(), vmaCreateImage(), vmaFindMemoryTypeIndexForBufferInfo() etc. +If you allocate raw memory using function vmaAllocateMemory(), you have to use other means of selecting +memory type, as decribed below. + +\note +Old usage values (`VMA_MEMORY_USAGE_GPU_ONLY`, `VMA_MEMORY_USAGE_CPU_ONLY`, +`VMA_MEMORY_USAGE_CPU_TO_GPU`, `VMA_MEMORY_USAGE_GPU_TO_CPU`, `VMA_MEMORY_USAGE_CPU_COPY`) +are still available and work same way as in previous versions of the library +for backward compatibility, but they are not recommended. + \section choosing_memory_type_required_preferred_flags Required and preferred flags You can specify more detailed requirements by filling members @@ -18110,7 +18530,7 @@ use following code: VmaAllocationCreateInfo allocInfo = {}; allocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; allocInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; -allocInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; +allocInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; VkBuffer buffer; VmaAllocation allocation; @@ -18120,8 +18540,8 @@ vmaCreateBuffer(allocator, &bufferInfo, &allocInfo, &buffer, &allocation, nullpt A memory type is chosen that has all the required flags and as many preferred flags set as possible. -If you use VmaAllocationCreateInfo::usage, it is just internally converted to -a set of required and preferred flags. +Value passed in VmaAllocationCreateInfo::usage is internally converted to a set of required and preferred flags, +plus some extra "magic" (heuristics). \section choosing_memory_type_explicit_memory_types Explicit memory types @@ -18188,6 +18608,13 @@ Mapping the same `VkDeviceMemory` block multiple times is illegal - only one map This includes mapping disjoint regions. Mapping is not reference-counted internally by Vulkan. Because of this, Vulkan Memory Allocator provides following facilities: +\note If you want to be able to map an allocation, you need to specify one of the flags +#VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT +in VmaAllocationCreateInfo::flags. These flags are required for an allocation to be mappable +when using #VMA_MEMORY_USAGE_AUTO or other `VMA_MEMORY_USAGE_AUTO*` enum values. +For other usage values they are ignored and every such allocation made in `HOST_VISIBLE` memory type is mappable, +but they can still be used for consistency. + \section memory_mapping_mapping_functions Mapping functions The library provides following functions for mapping of a specific #VmaAllocation: vmaMapMemory(), vmaUnmapMemory(). @@ -18200,16 +18627,15 @@ Example: \code // Having these objects initialized: - struct ConstantBuffer { ... }; -ConstantBuffer constantBufferData; +ConstantBuffer constantBufferData = ... -VmaAllocator allocator; -VkBuffer constantBuffer; -VmaAllocation constantBufferAllocation; +VmaAllocator allocator = ... +VkBuffer constantBuffer = ... +VmaAllocation constantBufferAllocation = ... // You can map and fill your buffer using following code: @@ -18246,8 +18672,9 @@ bufCreateInfo.size = sizeof(ConstantBuffer); bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; VkBuffer buf; VmaAllocation alloc; @@ -18258,18 +18685,12 @@ vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allo memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); \endcode -There are some exceptions though, when you should consider mapping memory only for a short period of time: - -- When operating system is Windows 7 or 8.x (Windows 10 is not affected because it uses WDDM2), - device is discrete AMD GPU, - and memory type is the special 256 MiB pool of `DEVICE_LOCAL + HOST_VISIBLE` memory - (selected when you use #VMA_MEMORY_USAGE_CPU_TO_GPU), - then whenever a memory block allocated from this memory type stays mapped - for the time of any call to `vkQueueSubmit()` or `vkQueuePresentKHR()`, this - block is migrated by WDDM to system RAM, which degrades performance. It doesn't - matter if that particular memory block is actually used by the command buffer - being submitted. -- Keeping many large memory blocks mapped may impact performance or stability of some debugging tools. +\note #VMA_ALLOCATION_CREATE_MAPPED_BIT by itself doesn't guarantee that the allocation will end up +in a mappable memory type. +For this, you need to also specify #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT or +#VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +#VMA_ALLOCATION_CREATE_MAPPED_BIT only guarantees that if the memory is `HOST_VISIBLE`, the allocation will be mapped on creation. +For an example of how to make use of this fact, see section \ref usage_patterns_advanced_data_uploading. \section memory_mapping_cache_control Cache flush and invalidate @@ -18290,86 +18711,9 @@ In any memory type that is `HOST_VISIBLE` but not `HOST_COHERENT`, all allocatio within blocks are aligned to this value, so their offsets are always multiply of `nonCoherentAtomSize` and two different allocations never share same "line" of this size. -Please note that memory allocated with #VMA_MEMORY_USAGE_CPU_ONLY is guaranteed to be `HOST_COHERENT`. - -Also, Windows drivers from all 3 **PC** GPU vendors (AMD, Intel, NVIDIA) +Also, Windows drivers from all 3 PC GPU vendors (AMD, Intel, NVIDIA) currently provide `HOST_COHERENT` flag on all memory types that are -`HOST_VISIBLE`, so on this platform you may not need to bother. - -\section memory_mapping_finding_if_memory_mappable Finding out if memory is mappable - -It may happen that your allocation ends up in memory that is `HOST_VISIBLE` (available for mapping) -despite it wasn't explicitly requested. -For example, application may work on integrated graphics with unified memory (like Intel) or -allocation from video memory might have failed, so the library chose system memory as fallback. - -You can detect this case and map such allocation to access its memory on CPU directly, -instead of launching a transfer operation. -In order to do that: call vmaGetAllocationMemoryProperties() -and look for `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` flag. - -\code -VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufCreateInfo.size = sizeof(ConstantBuffer); -bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; -allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; - -VkBuffer buf; -VmaAllocation alloc; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr); - -VkMemoryPropertyFlags memFlags; -vmaGetAllocationMemoryProperties(allocator, alloc, &memFlags); -if((memFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) -{ - // Allocation ended up in mappable memory. You can map it and access it directly. - void* mappedData; - vmaMapMemory(allocator, alloc, &mappedData); - memcpy(mappedData, &constantBufferData, sizeof(constantBufferData)); - vmaUnmapMemory(allocator, alloc); -} -else -{ - // Allocation ended up in non-mappable memory. - // You need to create CPU-side buffer in VMA_MEMORY_USAGE_CPU_ONLY and make a transfer. -} -\endcode - -You can even use #VMA_ALLOCATION_CREATE_MAPPED_BIT flag while creating allocations -that are not necessarily `HOST_VISIBLE` (e.g. using #VMA_MEMORY_USAGE_GPU_ONLY). -If the allocation ends up in memory type that is `HOST_VISIBLE`, it will be persistently mapped and you can use it directly. -If not, the flag is just ignored. -Example: - -\code -VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -bufCreateInfo.size = sizeof(ConstantBuffer); -bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - -VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; -allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - -VkBuffer buf; -VmaAllocation alloc; -VmaAllocationInfo allocInfo; -vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); - -if(allocInfo.pMappedData != nullptr) -{ - // Allocation ended up in mappable memory. - // It is persistently mapped. You can access it directly. - memcpy(allocInfo.pMappedData, &constantBufferData, sizeof(constantBufferData)); -} -else -{ - // Allocation ended up in non-mappable memory. - // You need to create CPU-side buffer in VMA_MEMORY_USAGE_CPU_ONLY and make a transfer. -} -\endcode +`HOST_VISIBLE`, so on PC you may not need to bother. \page staying_within_budget Staying within budget @@ -18512,7 +18856,7 @@ finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBi // Validate if(finalMemReq.memoryTypeBits != 0) VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; VmaAllocation alloc; res = vmaAllocateMemory(allocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr); @@ -18541,7 +18885,7 @@ See chapter 11.8. "Memory Aliasing" of Vulkan specification or `VK_IMAGE_CREATE_ - You can create more complex layout where different images and buffers are bound at different offsets inside one large allocation. For example, one can imagine a big texture used in some render passes, aliasing with a set of many small buffers -used between in some further passes. To bind a resource at non-zero offset of an allocation, +used between in some further passes. To bind a resource at non-zero offset in an allocation, use vmaBindBufferMemory2() / vmaBindImageMemory2(). - Before allocating memory for the resources you want to alias, check `memoryTypeBits` returned in memory requirements of each resource to make sure the bits overlap. @@ -18566,6 +18910,7 @@ It can be useful if you want to: - Reserve minimum or fixed amount of Vulkan memory always preallocated for that pool. - Use extra parameters for a set of your allocations that are available in #VmaPoolCreateInfo but not in #VmaAllocationCreateInfo - e.g., custom minimum alignment, custom `pNext` chain. +- Perform defragmentation on a specific subset of your allocations. To use custom memory pools: @@ -18612,6 +18957,14 @@ It is supported only when VmaPoolCreateInfo::blockSize = 0. To use this feature, set VmaAllocationCreateInfo::pool to the pointer to your custom pool and VmaAllocationCreateInfo::flags to #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. +\note Excessive use of custom pools is a common mistake when using this library. +Custom pools may be useful for special purposes - when you want to +keep certain type of resources separate e.g. to reserve minimum amount of memory +for them or limit maximum amount of memory they can occupy. For most +resources this is not needed and so it is not recommended to create #VmaPool +objects and allocations out of them. Allocating from the default pool is sufficient. + + \section custom_memory_pools_MemTypeIndex Choosing memory type index When creating a pool, you must explicitly specify memory type index. @@ -18622,11 +18975,11 @@ that you are going to create in that pool. \code VkBufferCreateInfo exampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -exampleBufCreateInfo.size = 1024; // Whatever. -exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; // Change if needed. +exampleBufCreateInfo.size = 1024; // Doesn't matter +exampleBufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; // Change if needed. +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; uint32_t memTypeIndex; vmaFindMemoryTypeIndexForBufferInfo(allocator, &exampleBufCreateInfo, &allocCreateInfo, &memTypeIndex); @@ -19032,18 +19385,17 @@ some handle, index, key, ordinal number or any other value that would associate the allocation with your custom metadata. \code -VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; -// Fill bufferInfo... +VkBufferCreateInfo bufCreateInfo = ... MyBufferMetadata* pMetadata = CreateBufferMetadata(); VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; allocCreateInfo.pUserData = pMetadata; VkBuffer buffer; VmaAllocation allocation; -vmaCreateBuffer(allocator, &bufferInfo, &allocCreateInfo, &buffer, &allocation, nullptr); +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buffer, &allocation, nullptr); \endcode The pointer may be later retrieved as VmaAllocationInfo::pUserData: @@ -19057,7 +19409,7 @@ MyBufferMetadata* pMetadata = (MyBufferMetadata*)allocInfo.pUserData; It can also be changed using function vmaSetAllocationUserData(). Values of (non-zero) allocations' `pUserData` are printed in JSON report created by -vmaBuildStatsString(), in hexadecimal form. +vmaBuildStatsString() in hexadecimal form. \section allocation_names Allocation names @@ -19065,19 +19417,18 @@ There is alternative mode available where `pUserData` pointer is used to point t a null-terminated string, giving a name to the allocation. To use this mode, set #VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT flag in VmaAllocationCreateInfo::flags. Then `pUserData` passed as VmaAllocationCreateInfo::pUserData or argument to -vmaSetAllocationUserData() must be either null or pointer to a null-terminated string. +vmaSetAllocationUserData() must be either null or a pointer to a null-terminated string. The library creates internal copy of the string, so the pointer you pass doesn't need to be valid for whole lifetime of the allocation. You can free it after the call. \code -VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; -// Fill imageInfo... +VkImageCreateInfo imageInfo = ... std::string imageName = "Texture: "; imageName += fileName; VmaAllocationCreateInfo allocCreateInfo = {}; -allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT; allocCreateInfo.pUserData = imageName.c_str(); @@ -19407,33 +19758,14 @@ Contrary to Direct3D 12, Vulkan doesn't have a concept of alignment of the entir \page usage_patterns Recommended usage patterns +Vulkan gives great flexibility in memory allocation. +This chapter shows the most common patterns. + See also slides from talk: [Sawicki, Adam. Advanced Graphics Techniques Tutorial: Memory management in Vulkan and DX12. Game Developers Conference, 2018](https://www.gdcvault.com/play/1025458/Advanced-Graphics-Techniques-Tutorial-New) -\section usage_patterns_common_mistakes Common mistakes - -Use of CPU_TO_GPU instead of CPU_ONLY memory - -#VMA_MEMORY_USAGE_CPU_TO_GPU is recommended only for resources that will be -mapped and written by the CPU, as well as read directly by the GPU - like some -buffers or textures updated every frame (dynamic). If you create a staging copy -of a resource to be written by CPU and then used as a source of transfer to -another resource placed in the GPU memory, that staging resource should be -created with #VMA_MEMORY_USAGE_CPU_ONLY. Please read the descriptions of these -enums carefully for details. - -Unnecessary use of custom pools - -\ref custom_memory_pools may be useful for special purposes - when you want to -keep certain type of resources separate e.g. to reserve minimum amount of memory -for them or limit maximum amount of memory they can occupy. For most -resources this is not needed and so it is not recommended to create #VmaPool -objects and allocations out of them. Allocating from the default pool is sufficient. - -\section usage_patterns_simple Simple patterns - -\subsection usage_patterns_simple_render_targets Render targets +\section usage_patterns_gpu_only GPU-only resource When: Any resources that you frequently write and read on GPU, @@ -19441,123 +19773,216 @@ e.g. images used as color attachments (aka "render targets"), depth-stencil atta images/buffers used as storage image/buffer (aka "Unordered Access View (UAV)"). What to do: -Create them in video memory that is fastest to access from GPU using -#VMA_MEMORY_USAGE_GPU_ONLY. +Let the library select the optimal memory type, which will likely have `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. -Consider using [VK_KHR_dedicated_allocation](@ref vk_khr_dedicated_allocation) extension -and/or manually creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, -especially if they are large or if you plan to destroy and recreate them e.g. when -display resolution changes. +\code +VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO }; +imgCreateInfo.imageType = VK_IMAGE_TYPE_2D; +imgCreateInfo.extent.width = 3840; +imgCreateInfo.extent.height = 2160; +imgCreateInfo.extent.depth = 1; +imgCreateInfo.mipLevels = 1; +imgCreateInfo.arrayLayers = 1; +imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM; +imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; +imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; +imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; +imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + +VkImage img; +VmaAllocation alloc; +vmaCreateImage(allocator, &imgCreateInfo, &allocCreateInfo, &img, &alloc, nullptr); +\endcode + +Also consider: +Consider creating them as dedicated allocations using #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT, +especially if they are large or if you plan to destroy and recreate them with different sizes +e.g. when display resolution changes. Prefer to create such resources first and all other GPU resources (like textures and vertex buffers) later. -\subsection usage_patterns_simple_immutable_resources Immutable resources + +\section usage_patterns_staging_copy_upload Staging copy for upload When: -Any resources that you fill on CPU only once (aka "immutable") or infrequently -and then read frequently on GPU, -e.g. textures, vertex and index buffers, constant buffers that don't change often. +A "staging" buffer than you want to map and fill from CPU code, then use as a source od transfer +to some GPU resource. What to do: -Create them in video memory that is fastest to access from GPU using -#VMA_MEMORY_USAGE_GPU_ONLY. +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT`. -To initialize content of such resource, create a CPU-side (aka "staging") copy of it -in system memory - #VMA_MEMORY_USAGE_CPU_ONLY, map it, fill it, -and submit a transfer from it to the GPU resource. -You can keep the staging copy if you need it for another upload transfer in the future. -If you don't, you can destroy it or reuse this buffer for uploading different resource -after the transfer finishes. +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; -Prefer to create just buffers in system memory rather than images, even for uploading textures. -Use `vkCmdCopyBufferToImage()`. -Dont use images with `VK_IMAGE_TILING_LINEAR`. +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; -\subsection usage_patterns_dynamic_resources Dynamic resources +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); + +... + +memcpy(allocInfo.pMappedData, myData, myDataSize); +\endcode + +Also consider: +You can map the allocation using vmaMapMemory() or you can create it as persistenly mapped +using #VMA_ALLOCATION_CREATE_MAPPED_BIT, as in the example above. + + +\section usage_patterns_readback Readback When: -Any resources that change frequently (aka "dynamic"), e.g. every frame or every draw call, -written on CPU, read on GPU. - -What to do: -Create them using #VMA_MEMORY_USAGE_CPU_TO_GPU. -You can map it and write to it directly on CPU, as well as read from it on GPU. - -This is a more complex situation. Different solutions are possible, -and the best one depends on specific GPU type, but you can use this simple approach for the start. -Prefer to write to such resource sequentially (e.g. using `memcpy`). -Don't perform random access or any reads from it on CPU, as it may be very slow. -Also note that textures written directly from the host through a mapped pointer need to be in LINEAR not OPTIMAL layout. - -\subsection usage_patterns_readback Readback - -When: -Resources that contain data written by GPU that you want to read back on CPU, +Buffers for data written by or transferred from the GPU that you want to read back on the CPU, e.g. results of some computations. What to do: -Create them using #VMA_MEMORY_USAGE_GPU_TO_CPU. -You can write to them directly on GPU, as well as map and read them on CPU. +Use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT. +Let the library select the optimal memory type, which will always have `VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT` +and `VK_MEMORY_PROPERTY_HOST_CACHED_BIT`. -\section usage_patterns_advanced Advanced patterns +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; -\subsection usage_patterns_integrated_graphics Detecting integrated graphics +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; -You can support integrated graphics (like Intel HD Graphics, AMD APU) better -by detecting it in Vulkan. -To do it, call `vkGetPhysicalDeviceProperties()`, inspect -`VkPhysicalDeviceProperties::deviceType` and look for `VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU`. -When you find it, you can assume that memory is unified and all memory types are comparably fast -to access from GPU, regardless of `VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT`. +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); -You can then sum up sizes of all available memory heaps and treat them as useful for -your GPU resources, instead of only `DEVICE_LOCAL` ones. -You can also prefer to create your resources in memory types that are `HOST_VISIBLE` to map them -directly instead of submitting explicit transfer (see below). +... -\subsection usage_patterns_direct_vs_transfer Direct access versus transfer +const float* downloadedData = (const float*)allocInfo.pMappedData; +\endcode -For resources that you frequently write on CPU and read on GPU, many solutions are possible: --# Create one copy in video memory using #VMA_MEMORY_USAGE_GPU_ONLY, - second copy in system memory using #VMA_MEMORY_USAGE_CPU_ONLY and submit explicit transfer each time. --# Create just a single copy using #VMA_MEMORY_USAGE_CPU_TO_GPU, map it and fill it on CPU, - read it directly on GPU. --# Create just a single copy using #VMA_MEMORY_USAGE_CPU_ONLY, map it and fill it on CPU, - read it directly on GPU. +\section usage_patterns_advanced_data_uploading Advanced data uploading -Which solution is the most efficient depends on your resource and especially on the GPU. -It is best to measure it and then make the decision. -Some general recommendations: +For resources that you frequently write on CPU via mapped pointer and +freqnently read on GPU e.g. as a uniform buffer (also called "dynamic"), multiple options are possible: -- On integrated graphics use (2) or (3) to avoid unnecessary time and memory overhead - related to using a second copy and making transfer. -- For small resources (e.g. constant buffers) use (2). - Discrete AMD cards have special 256 MiB pool of video memory that is directly mappable. - Even if the resource ends up in system memory, its data may be cached on GPU after first - fetch over PCIe bus. -- For larger resources (e.g. textures), decide between (1) and (2). - You may want to differentiate NVIDIA and AMD, e.g. by looking for memory type that is - both `DEVICE_LOCAL` and `HOST_VISIBLE`. When you find it, use (2), otherwise use (1). +-# Easiest solution is to have one copy of the resource in `HOST_VISIBLE` memory, + even if it means system RAM (not `DEVICE_LOCAL`) on systems with a discrete graphics card, + and make the device reach out to that resource directly. + - Reads performed by the device will then go through PCI Express bus. + The performace of this access may be limited, but it may be fine depending on the size + of this resource (whether it is small enough to quickly end up in GPU cache) and the sparsity + of access. +-# On systems with unified memory (e.g. AMD APU or Intel integrated graphics, mobile chips), + a memory type may be available that is both `HOST_VISIBLE` (available for mapping) and `DEVICE_LOCAL` + (fast to access from the GPU). Then, it is likely the best choice for such type of resource. +-# Systems with a discrete graphics card and separate video memory may or may not expose + a memory type that is both `HOST_VISIBLE` and `DEVICE_LOCAL`, also known as Base Address Register (BAR). + If they do, it represents a piece of VRAM (or entire VRAM, if ReBAR is enabled in the motherboard BIOS) + that is available to CPU for mapping. + - Writes performed by the host to that memory go through PCI Express bus. + The performance of these writes may be limited, but it may be fine, especially on PCIe 4.0, + as long as rules of using uncached and write-combined memory are followed - only sequential writes and no reads. +-# Finally, you may need or prefer to create a separate copy of the resource in `DEVICE_LOCAL` memory, + a separate "staging" copy in `HOST_VISIBLE` memory and perform an explicit transfer command between them. -Similarly, for resources that you frequently write on GPU and read on CPU, multiple -solutions are possible: +Thankfully, VMA offers an aid to create and use such resources in the the way optimal +for the current Vulkan device. To help the library make the best choice, +use flag #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT together with +#VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT. +It will then prefer a memory type that is both `DEVICE_LOCAL` and `HOST_VISIBLE` (integrated memory or BAR), +but if no such memory type is available or allocation from it fails +(PC graphics cards have only 256 MB of BAR by default, unless ReBAR is supported and enabled in BIOS), +it will fall back to `DEVICE_LOCAL` memory for fast GPU access. +It is then up to you to detect that the allocation ended up in a memory type that is not `HOST_VISIBLE`, +so you need to create another "staging" allocation and perform explicit transfers. --# Create one copy in video memory using #VMA_MEMORY_USAGE_GPU_ONLY, - second copy in system memory using #VMA_MEMORY_USAGE_GPU_TO_CPU and submit explicit tranfer each time. --# Create just single copy using #VMA_MEMORY_USAGE_GPU_TO_CPU, write to it directly on GPU, - map it and read it on CPU. +\code +VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; +bufCreateInfo.size = 65536; +bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + +VmaAllocationCreateInfo allocCreateInfo = {}; +allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; +allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + +VkBuffer buf; +VmaAllocation alloc; +VmaAllocationInfo allocInfo; +vmaCreateBuffer(allocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); -You should take some measurements to decide which option is faster in case of your specific -resource. +VkMemoryPropertyFlags memPropFlags; +vmaGetAllocationMemoryProperties(allocator, alloc, &memPropFlags); -Note that textures accessed directly from the host through a mapped pointer need to be in LINEAR layout, -which may slow down their usage on the device. -Textures accessed only by the device and transfer operations can use OPTIMAL layout. +if(memPropFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) +{ + // Allocation ended up in a mappable memory and is already mapped - write to it directly. -If you don't want to specialize your code for specific types of GPUs, you can still make -an simple optimization for cases when your resource ends up in mappable memory to use it -directly in this case instead of creating CPU-side staging copy. -For details see [Finding out if memory is mappable](@ref memory_mapping_finding_if_memory_mappable). + // [Executed in runtime]: + memcpy(allocInfo.pMappedData, myData, myDataSize); +} +else +{ + // Allocation ended up in a non-mappable memory - need to transfer. + VkBufferCreateInfo stagingBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; + stagingBufCreateInfo.size = 65536; + stagingBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + + VmaAllocationCreateInfo stagingAllocCreateInfo = {}; + stagingAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + stagingAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | + VMA_ALLOCATION_CREATE_MAPPED_BIT; + + VkBuffer stagingBuf; + VmaAllocation stagingAlloc; + VmaAllocationInfo stagingAllocInfo; + vmaCreateBuffer(allocator, &stagingBufCreateInfo, &stagingAllocCreateInfo, + &stagingBuf, &stagingAlloc, stagingAllocInfo); + + // [Executed in runtime]: + memcpy(stagingAllocInfo.pMappedData, myData, myDataSize); + VkBufferCopy bufCopy = { + 0, // srcOffset + 0, // dstOffset, + myDataSize); // size + vkCmdCopyBuffer(cmdBuf, stagingBuf, buf, 1, &bufCopy); +} +\endcode + +\section usage_patterns_other_use_cases Other use cases + +Here are some other, less obvious use cases and their recommended settings: + +- An image that is used only as transfer source and destination, but it should stay on the device, + as it is used to temporarily store a copy of some texture, e.g. from the current to the next frame, + for temporal antialiasing or other temporal effects. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO +- An image that is used only as transfer source and destination, but it should be placed + in the system RAM despite it doesn't need to be mapped, because it serves as a "swap" copy to evict + least recently used textures from VRAM. + - Use `VkImageCreateInfo::usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_HOST, + as VMA needs a hint here to differentiate from the previous case. +- A buffer that you want to map and write from the CPU, directly read from the GPU + (e.g. as a uniform or vertex buffer), but you have a clear preference to place it in device or + host memory due to its large size. + - Use `VkBufferCreateInfo::usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT` + - Use VmaAllocationCreateInfo::usage = #VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE or #VMA_MEMORY_USAGE_AUTO_PREFER_HOST + - Use VmaAllocationCreateInfo::flags = #VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT \page configuration Configuration @@ -19596,6 +20021,8 @@ by using a helper library like [volk](https://github.com/zeux/volk). Third, VMA tries to fetch remaining pointers that are still null by calling `vkGetInstanceProcAddr` and `vkGetDeviceProcAddr` on its own. +You need to only fill in VmaVulkanFunctions::vkGetInstanceProcAddr and VmaVulkanFunctions::vkGetDeviceProcAddr. +Other pointers will be fetched automatically. If you want to disable this feature, set configuration macro: `#define VMA_DYNAMIC_VULKAN_FUNCTIONS 0`. Finally, all the function pointers required by the library (considering selected @@ -19620,7 +20047,7 @@ VmaAllocatorCreateInfo::pDeviceMemoryCallbacks. \section heap_memory_limit Device heap memory limit When device memory of certain heap runs out of free space, new allocations may -fail (returning error code) or they may succeed, silently pushing some existing +fail (returning error code) or they may succeed, silently pushing some existing_ memory blocks from GPU VRAM to system RAM (which degrades performance). This behavior is implementation-dependent - it depends on GPU vendor and graphics driver. @@ -19641,10 +20068,14 @@ VK_KHR_dedicated_allocation is a Vulkan extension which can be used to improve performance on some GPUs. It augments Vulkan API with possibility to query driver whether it prefers particular buffer or image to have its own, dedicated allocation (separate `VkDeviceMemory` block) for better efficiency - to be able -to do some internal optimizations. +to do some internal optimizations. The extension is supported by this library. +It will be used automatically when enabled. -The extension is supported by this library. It will be used automatically when -enabled. To enable it: +It has been promoted to core Vulkan 1.1, so if you use eligible Vulkan version +and inform VMA about it by setting VmaAllocatorCreateInfo::vulkanApiVersion, +you are all set. + +Otherwise, if you want to use it as an extension: 1 . When creating Vulkan device, check if following 2 device extensions are supported (call `vkEnumerateDeviceExtensionProperties()`). @@ -19656,7 +20087,7 @@ If yes, enable them (fill `VkDeviceCreateInfo::ppEnabledExtensionNames`). If you enabled these extensions: 2 . Use #VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT flag when creating -your #VmaAllocator`to inform the library that you enabled required extensions +your #VmaAllocator to inform the library that you enabled required extensions and you want the library to use them. \code @@ -19671,7 +20102,7 @@ buffer using vmaCreateBuffer() or image using vmaCreateImage(). When using the extension together with Vulkan Validation Layer, you will receive warnings like this: - vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer. +_vkBindBufferMemory(): Binding memory to buffer 0x33 but vkGetBufferMemoryRequirements() has not been called on that buffer._ It is OK, you should just ignore it. It happens because you use function `vkGetBufferMemoryRequirements2KHR()` instead of standard @@ -19728,7 +20159,7 @@ out of the special `DEVICE_COHERENT` and `DEVICE_UNCACHED` memory types on eligi devices. There are multiple ways to do it, for example: - You can request or prefer to allocate out of such memory types by adding - `VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags + `VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD` to VmaAllocationCreateInfo::requiredFlags or VmaAllocationCreateInfo::preferredFlags. Those flags can be freely mixed with other ways of \ref choosing_memory_type, like setting VmaAllocationCreateInfo::usage. - If you manually found memory type index to use for this purpose, force allocation @@ -19746,7 +20177,7 @@ accompanying this library. Device extension VK_KHR_buffer_device_address allow to fetch raw GPU pointer to a buffer and pass it for usage in a shader code. -It is promoted to core Vulkan 1.2. +It has been promoted to core Vulkan 1.2. If you want to use this feature in connection with VMA, follow these steps: @@ -19807,7 +20238,7 @@ accompanying this library. you must not call vmaGetAllocationInfo() and vmaMapMemory() from different threads at the same time if you pass the same #VmaAllocation object to these functions. -- #VmaVirtualBlock is also not safe to be used from multiple threads simultaneously. +- #VmaVirtualBlock is not safe to be used from multiple threads simultaneously. \section general_considerations_validation_layer_warnings Validation layer warnings @@ -19831,7 +20262,7 @@ The library uses following algorithm for allocation, in order: -# Try to find free range of memory in existing blocks. -# If failed, try to create a new block of `VkDeviceMemory`, with preferred block size. --# If failed, try to create such block with size/2, size/4, size/8. +-# If failed, try to create such block with size / 2, size / 4, size / 8. -# If failed, try to allocate separate `VkDeviceMemory` for this allocation, just like when you use #VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT. -# If failed, choose other memory type that meets the requirements specified in @@ -19842,28 +20273,29 @@ The library uses following algorithm for allocation, in order: Features deliberately excluded from the scope of this library: -- **Data transfer.** Uploading (streaming) and downloading data of buffers and images - between CPU and GPU memory and related synchronization is responsibility of the user. - Defining some "texture" object that would automatically stream its data from a - staging copy in CPU memory to GPU memory would rather be a feature of another, - higher-level library implemented on top of VMA. -- **Recreation of buffers and images.** Although the library has functions for - buffer and image creation (vmaCreateBuffer(), vmaCreateImage()), you need to - recreate these objects yourself after defragmentation. That is because the big - structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in - #VmaAllocation object. -- **Handling CPU memory allocation failures.** When dynamically creating small C++ - objects in CPU memory (not Vulkan memory), allocation failures are not checked - and handled gracefully, because that would complicate code significantly and - is usually not needed in desktop PC applications anyway. - Success of an allocation is just checked with an assert. -- **Code free of any compiler warnings.** Maintaining the library to compile and - work correctly on so many different platforms is hard enough. Being free of - any warnings, on any version of any compiler, is simply not feasible. - There are many preprocessor macros that make some variables unused, function parameters unreferenced, - or conditional expressions constant in some configurations. - The code of this library should not be bigger or more complicated just to silence these warnings. - It is recommended to disable such warnings instead. -- This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but - are not going to be included into this repository. +-# **Data transfer.** Uploading (streaming) and downloading data of buffers and images + between CPU and GPU memory and related synchronization is responsibility of the user. + Defining some "texture" object that would automatically stream its data from a + staging copy in CPU memory to GPU memory would rather be a feature of another, + higher-level library implemented on top of VMA. + VMA doesn't record any commands to a `VkCommandBuffer`. It just allocates memory. +-# **Recreation of buffers and images.** Although the library has functions for + buffer and image creation: vmaCreateBuffer(), vmaCreateImage(), you need to + recreate these objects yourself after defragmentation. That is because the big + structures `VkBufferCreateInfo`, `VkImageCreateInfo` are not stored in + #VmaAllocation object. +-# **Handling CPU memory allocation failures.** When dynamically creating small C++ + objects in CPU memory (not Vulkan memory), allocation failures are not checked + and handled gracefully, because that would complicate code significantly and + is usually not needed in desktop PC applications anyway. + Success of an allocation is just checked with an assert. +-# **Code free of any compiler warnings.** Maintaining the library to compile and + work correctly on so many different platforms is hard enough. Being free of + any warnings, on any version of any compiler, is simply not feasible. + There are many preprocessor macros that make some variables unused, function parameters unreferenced, + or conditional expressions constant in some configurations. + The code of this library should not be bigger or more complicated just to silence these warnings. + It is recommended to disable such warnings instead. +-# This is a C++ library with C interface. **Bindings or ports to any other programming languages** are welcome as external projects but + are not going to be included into this repository. */ diff --git a/src/SparseBindingTest.cpp b/src/SparseBindingTest.cpp index ed7cadf..c5383c8 100644 --- a/src/SparseBindingTest.cpp +++ b/src/SparseBindingTest.cpp @@ -146,8 +146,8 @@ void BaseImage::UploadContent() srcBufCreateInfo.size = 4 * m_CreateInfo.extent.width * m_CreateInfo.extent.height; VmaAllocationCreateInfo srcBufAllocCreateInfo = {}; - srcBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; - srcBufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + srcBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + srcBufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; VkBuffer srcBuf = nullptr; VmaAllocation srcBufAlloc = nullptr; @@ -261,8 +261,8 @@ void BaseImage::ValidateContent(RandomNumberGenerator& rand) dstBufCreateInfo.size = valueCount * sizeof(uint32_t) * 3; VmaAllocationCreateInfo dstBufAllocCreateInfo = {}; - dstBufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; - dstBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_TO_CPU; + dstBufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; + dstBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; VkBuffer dstBuf = nullptr; VmaAllocation dstBufAlloc = nullptr; @@ -438,7 +438,7 @@ void TraditionalImage::Init(RandomNumberGenerator& rand) FillImageCreateInfo(rand); VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; // Default BEST_FIT is clearly better. //allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT; @@ -484,7 +484,7 @@ void SparseBindingImage::Init(RandomNumberGenerator& rand) const uint32_t pageCount = (uint32_t)ceil_div(imageMemReq.size, pageSize); VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; VkMemoryRequirements pageMemReq = imageMemReq; pageMemReq.size = pageSize; diff --git a/src/Tests.cpp b/src/Tests.cpp index 9212fe2..0e4485d 100644 --- a/src/Tests.cpp +++ b/src/Tests.cpp @@ -855,8 +855,8 @@ bool StagingBufferCollection::AcquireBuffer(VkDeviceSize size, VkBuffer& outBuff bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; - allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; VmaAllocationInfo allocInfo; VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo); @@ -1406,7 +1406,8 @@ void TestDefragmentationSimple() bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo exampleAllocCreateInfo = {}; - exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + exampleAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; uint32_t memTypeIndex = UINT32_MAX; vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex); @@ -1595,7 +1596,8 @@ void TestDefragmentationWholePool() bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo exampleAllocCreateInfo = {}; - exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + exampleAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; uint32_t memTypeIndex = UINT32_MAX; vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex); @@ -1770,7 +1772,7 @@ static void TestDefragmentationGpu() VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; allocCreateInfo.flags = 0; // Create all intended buffers. @@ -2099,7 +2101,7 @@ static void TestDefragmentationIncrementalBasic() imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; allocCreateInfo.flags = 0; // Create all intended images. @@ -2283,7 +2285,7 @@ void TestDefragmentationIncrementalComplex() imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; allocCreateInfo.flags = 0; // Create all intended images. @@ -2484,7 +2486,7 @@ static void TestUserData() void* pointerToSomething = &res; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; allocCreateInfo.pUserData = numberAsPointer; if(testIndex == 1) allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; @@ -2514,7 +2516,7 @@ static void TestUserData() strcpy_s(name1Buf, name1Len + 1, name1); VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT; allocCreateInfo.pUserData = name1Buf; if(testIndex == 1) @@ -2549,7 +2551,6 @@ static void TestInvalidAllocations() VkResult res; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; // Try to allocate 0 bytes. { @@ -2615,8 +2616,9 @@ static void TestMemoryRequirements() TEST(res == VK_SUCCESS); vmaDestroyBuffer(g_hAllocator, buf, alloc); - // Usage. - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + // Usage = auto + host access. + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; allocCreateInfo.requiredFlags = 0; allocCreateInfo.preferredFlags = 0; allocCreateInfo.memoryTypeBits = UINT32_MAX; @@ -2628,6 +2630,7 @@ static void TestMemoryRequirements() // Required flags, preferred flags. allocCreateInfo.usage = VMA_MEMORY_USAGE_UNKNOWN; + allocCreateInfo.flags = 0; allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; allocCreateInfo.memoryTypeBits = 0; @@ -2640,7 +2643,8 @@ static void TestMemoryRequirements() // memoryTypeBits. const uint32_t memType = allocInfo.memoryType; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_UNKNOWN; + allocCreateInfo.flags = 0; allocCreateInfo.requiredFlags = 0; allocCreateInfo.preferredFlags = 0; allocCreateInfo.memoryTypeBits = 1u << memType; @@ -2680,8 +2684,8 @@ static void TestBasics() bufCreateInfo.size = 128; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; - allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo; res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); @@ -2689,7 +2693,7 @@ static void TestBasics() vmaDestroyBuffer(g_hAllocator, buf, alloc); - // Same with OWN_MEMORY. + // Same with DEDICATED_MEMORY. allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo); @@ -3023,11 +3027,15 @@ static void TestAllocationVersusResourceSize() bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; for(uint32_t i = 0; i < 2; ++i) { - allocCreateInfo.flags = (i == 1) ? VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT : 0; + if(i == 1) + allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; + else + allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; AllocInfo info; info.CreateBuffer(bufCreateInfo, allocCreateInfo); @@ -3062,7 +3070,7 @@ static void TestPool_MinBlockCount() static const VkDeviceSize BLOCK_SIZE = ALLOC_SIZE * 2; // Each block can fit 2 allocations. VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_COPY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; @@ -3133,7 +3141,7 @@ static void TestPool_MinAllocationAlignment() static const VkDeviceSize MIN_ALLOCATION_ALIGNMENT = 64 * 1024; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_COPY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO }; bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; @@ -3340,7 +3348,7 @@ void TestHeapSizeLimit() VmaAllocationInfo dedicatedAllocInfo; { VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; bufCreateInfo.size = BLOCK_SIZE / 2; @@ -3422,7 +3430,8 @@ static void TestDebugMargin() bufInfo.size = 256; // Doesn't matter VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; VmaPoolCreateInfo poolCreateInfo = {}; TEST(vmaFindMemoryTypeIndexForBufferInfo( @@ -3541,7 +3550,7 @@ static void TestLinearAllocator() sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; VmaAllocationCreateInfo sampleAllocCreateInfo = {}; - sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; VmaPoolCreateInfo poolCreateInfo = {}; VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex); @@ -3857,7 +3866,7 @@ static void TestLinearAllocatorMultiBlock() sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo sampleAllocCreateInfo = {}; - sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; VmaPoolCreateInfo poolCreateInfo = {}; poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT; @@ -4180,7 +4189,7 @@ static void ManuallyTestLinearAllocator() sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; VmaAllocationCreateInfo sampleAllocCreateInfo = {}; - sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; VmaPoolCreateInfo poolCreateInfo = {}; VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex); @@ -4300,7 +4309,7 @@ static void BenchmarkAlgorithmsCase(FILE* file, sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; VmaAllocationCreateInfo sampleAllocCreateInfo = {}; - sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; VmaPoolCreateInfo poolCreateInfo = {}; VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex); @@ -4447,7 +4456,7 @@ static void TestBufferDeviceAddress() VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; // !!! VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; for(uint32_t testIndex = 0; testIndex < 2; ++testIndex) { @@ -4481,7 +4490,7 @@ static void TestMemoryPriority() bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; allocCreateInfo.priority = 1.f; for(uint32_t testIndex = 0; testIndex < 2; ++testIndex) @@ -4607,7 +4616,7 @@ static void TestPool_SameSize() } VmaAllocationCreateInfo poolAllocInfo = {}; - poolAllocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + poolAllocInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; uint32_t memTypeIndex; res = vmaFindMemoryTypeIndex( g_hAllocator, @@ -4821,7 +4830,8 @@ static void TestAllocationsInitialization() bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo dummyBufAllocCreateInfo = {}; - dummyBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + dummyBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + dummyBufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; VmaPoolCreateInfo poolCreateInfo = {}; poolCreateInfo.blockSize = BUF_SIZE * 10; @@ -4978,7 +4988,7 @@ static void TestPool_Benchmark( while(memoryTypeBits) { VmaAllocationCreateInfo dummyAllocCreateInfo = {}; - dummyAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + dummyAllocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; vmaFindMemoryTypeIndex(g_hAllocator, memoryTypeBits, &dummyAllocCreateInfo, &poolCreateInfo.memoryTypeIndex); const uint32_t heapIndex = memProps->memoryTypes[poolCreateInfo.memoryTypeIndex].heapIndex; @@ -5667,7 +5677,7 @@ static void TestBudget() bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; if(testIndex == 0) { allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; @@ -5786,7 +5796,7 @@ static void TestAliasing() img1MemReq.memoryTypeBits, img2MemReq.memoryTypeBits, finalMemReq.memoryTypeBits); VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; VmaAllocation alloc = VK_NULL_HANDLE; ERR_GUARD_VULKAN(vmaAllocateMemory(g_hAllocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr)); @@ -5830,7 +5840,7 @@ static void TestAllocationAliasing() VmaAllocationCreateInfo allocationCreateInfo = {}; allocationCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; - allocationCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocationCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; // Bind 2 textures together into same memory without VMA_ALLOCATION_CREATE_CAN_ALIAS_BIT and then with flag set /* @@ -5909,7 +5919,8 @@ static void TestMapping() bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; allocCreateInfo.pool = pool; if(testIndex == TEST_DEDICATED) allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; @@ -6059,7 +6070,8 @@ static void TestMappingMultithreaded() bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; allocCreateInfo.pool = pool; if(testIndex == TEST_DEDICATED) allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; @@ -6895,7 +6907,6 @@ static void BasicTestBuddyAllocator() sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; VmaAllocationCreateInfo sampleAllocCreateInfo = {}; - sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; VmaPoolCreateInfo poolCreateInfo = {}; VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex); @@ -7000,7 +7011,8 @@ static void BasicTestAllocatePages() sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo sampleAllocCreateInfo = {}; - sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + sampleAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; VmaPoolCreateInfo poolCreateInfo = {}; VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex); @@ -7062,7 +7074,7 @@ static void BasicTestAllocatePages() memReq.size = 4 * 1024; VmaAllocationCreateInfo dedicatedAllocCreateInfo = {}; - dedicatedAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; + dedicatedAllocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; dedicatedAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &dedicatedAllocCreateInfo, allocCount, alloc.data(), allocInfo.data()); TEST(res == VK_SUCCESS); @@ -7104,7 +7116,7 @@ static void TestGpuData() info.m_BufferInfo.size = 1024 * 1024 * (rand.Generate() % 9 + 1); VmaAllocationCreateInfo allocCreateInfo = {}; - allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; VkResult res = vmaCreateBuffer(g_hAllocator, &info.m_BufferInfo, &allocCreateInfo, &info.m_Buffer, &info.m_Allocation, nullptr); TEST(res == VK_SUCCESS); @@ -7251,6 +7263,142 @@ static void TestVirtualBlocksAlgorithmsBenchmark() } } +static void TestMappingHysteresis() +{ + /* + We have no way to check here if hysteresis worked as expected, + but at least we provoke some cases and make sure it doesn't crash or assert. + You can always check details with the debugger. + */ + + wprintf(L"Test mapping hysteresis\n"); + + VkBufferCreateInfo bufCreateInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + bufCreateInfo.size = 0x10000; + + VmaAllocationCreateInfo templateAllocCreateInfo = {}; + templateAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + templateAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + + VmaPoolCreateInfo poolCreateInfo = {}; + poolCreateInfo.blockSize = 10 * MEGABYTE; + poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1; + TEST(vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, + &bufCreateInfo, &templateAllocCreateInfo, &poolCreateInfo.memoryTypeIndex) == VK_SUCCESS); + + constexpr uint32_t BUF_COUNT = 30; + bool endOfScenarios = false; + for(uint32_t scenarioIndex = 0; !endOfScenarios; ++scenarioIndex) + { + VmaPool pool; + TEST(vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) == VK_SUCCESS); + + BufferInfo buf; + VmaAllocationInfo allocInfo; + + std::vector bufs; + + // Scenario: Create + destroy buffers without mapping. Hysteresis should not launch. + if(scenarioIndex == 0) + { + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.pool = pool; + + for(uint32_t bufIndex = 0; bufIndex < BUF_COUNT; ++bufIndex) + { + TEST(vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf.Buffer, &buf.Allocation, &allocInfo) == VK_SUCCESS); + TEST(allocInfo.pMappedData == nullptr); + vmaDestroyBuffer(g_hAllocator, buf.Buffer, buf.Allocation); + } + } + // Scenario: + // - Create one buffer mapped that stays there. + // - Create + destroy mapped buffers back and forth. Hysteresis should launch. + else if(scenarioIndex == 1) + { + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.pool = pool; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + + TEST(vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf.Buffer, &buf.Allocation, &allocInfo) == VK_SUCCESS); + TEST(allocInfo.pMappedData != nullptr); + bufs.push_back(buf); + + for(uint32_t bufIndex = 0; bufIndex < BUF_COUNT; ++bufIndex) + { + TEST(vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf.Buffer, &buf.Allocation, &allocInfo) == VK_SUCCESS); + TEST(allocInfo.pMappedData != nullptr); + vmaDestroyBuffer(g_hAllocator, buf.Buffer, buf.Allocation); + } + } + // Scenario: Create + destroy mapped buffers. + // Hysteresis should launch as it maps and unmaps back and forth. + else if(scenarioIndex == 2) + { + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.pool = pool; + allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + + for(uint32_t bufIndex = 0; bufIndex < BUF_COUNT; ++bufIndex) + { + TEST(vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf.Buffer, &buf.Allocation, &allocInfo) == VK_SUCCESS); + TEST(allocInfo.pMappedData != nullptr); + vmaDestroyBuffer(g_hAllocator, buf.Buffer, buf.Allocation); + } + } + // Scenario: Create one buffer and map it back and forth. Hysteresis should launch. + else if(scenarioIndex == 3) + { + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.pool = pool; + + TEST(vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf.Buffer, &buf.Allocation, &allocInfo) == VK_SUCCESS); + + for(uint32_t i = 0; i < BUF_COUNT; ++i) + { + void* mappedData = nullptr; + TEST(vmaMapMemory(g_hAllocator, buf.Allocation, &mappedData) == VK_SUCCESS); + TEST(mappedData != nullptr); + vmaUnmapMemory(g_hAllocator, buf.Allocation); + } + + vmaDestroyBuffer(g_hAllocator, buf.Buffer, buf.Allocation); + } + // Scenario: + // - Create many buffers + // - Map + unmap one of them many times. Hysteresis should launch. + // - Hysteresis should unmap during freeing the buffers. + else if(scenarioIndex == 4) + { + VmaAllocationCreateInfo allocCreateInfo = {}; + allocCreateInfo.pool = pool; + + for(uint32_t bufIndex = 0; bufIndex < BUF_COUNT; ++bufIndex) + { + TEST(vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf.Buffer, &buf.Allocation, &allocInfo) == VK_SUCCESS); + TEST(allocInfo.pMappedData == nullptr); + bufs.push_back(buf); + } + + for(uint32_t i = 0; i < BUF_COUNT; ++i) + { + void* mappedData = nullptr; + TEST(vmaMapMemory(g_hAllocator, buf.Allocation, &mappedData) == VK_SUCCESS); + TEST(mappedData != nullptr); + vmaUnmapMemory(g_hAllocator, buf.Allocation); + } + } + else + endOfScenarios = true; + + for(size_t i = bufs.size(); i--; ) + vmaDestroyBuffer(g_hAllocator, bufs[i].Buffer, bufs[i].Allocation); + + vmaDestroyPool(g_hAllocator, pool); + } +} + void Test() { wprintf(L"TESTING:\n"); @@ -7288,6 +7436,7 @@ void Test() TestAliasing(); TestAllocationAliasing(); TestMapping(); + TestMappingHysteresis(); TestDeviceLocalMapped(); TestMappingMultithreaded(); TestLinearAllocator(); diff --git a/src/VmaUsage.h b/src/VmaUsage.h index 7a7d770..47cc87d 100644 --- a/src/VmaUsage.h +++ b/src/VmaUsage.h @@ -57,7 +57,9 @@ include all public interface declarations. Example: //#define VMA_USE_STL_SHARED_MUTEX 0 //#define VMA_MEMORY_BUDGET 0 //#define VMA_STATS_STRING_ENABLED 0 +//#define VMA_MAPPING_HYSTERESIS_ENABLED 0 +//#define VMA_VULKAN_VERSION 1003000 // Vulkan 1.3 #define VMA_VULKAN_VERSION 1002000 // Vulkan 1.2 //#define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1 //#define VMA_VULKAN_VERSION 1000000 // Vulkan 1.0 diff --git a/src/VulkanSample.cpp b/src/VulkanSample.cpp index caff8af..87f8a42 100644 --- a/src/VulkanSample.cpp +++ b/src/VulkanSample.cpp @@ -393,7 +393,9 @@ static VkExtent2D ChooseSwapExtent() static constexpr uint32_t GetVulkanApiVersion() { -#if VMA_VULKAN_VERSION == 1002000 +#if VMA_VULKAN_VERSION == 1003000 + return VK_API_VERSION_1_3; +#elif VMA_VULKAN_VERSION == 1002000 return VK_API_VERSION_1_2; #elif VMA_VULKAN_VERSION == 1001000 return VK_API_VERSION_1_1; @@ -486,6 +488,7 @@ void VulkanUsage::Init() case VK_API_VERSION_1_0: wprintf(L"1.0\n"); break; case VK_API_VERSION_1_1: wprintf(L"1.1\n"); break; case VK_API_VERSION_1_2: wprintf(L"1.2\n"); break; + case VK_API_VERSION_1_3: wprintf(L"1.3\n"); break; default: assert(0); } @@ -672,8 +675,8 @@ static void CreateMesh() vbInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VmaAllocationCreateInfo vbAllocCreateInfo = {}; - vbAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; - vbAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + vbAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + vbAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; VkBuffer stagingVertexBuffer = VK_NULL_HANDLE; VmaAllocation stagingVertexBufferAlloc = VK_NULL_HANDLE; @@ -685,7 +688,6 @@ static void CreateMesh() // No need to flush stagingVertexBuffer memory because CPU_ONLY memory is always HOST_COHERENT. vbInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; - vbAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; vbAllocCreateInfo.flags = 0; ERR_GUARD_VULKAN( vmaCreateBuffer(g_hAllocator, &vbInfo, &vbAllocCreateInfo, &g_hVertexBuffer, &g_hVertexBufferAlloc, nullptr) ); @@ -697,8 +699,8 @@ static void CreateMesh() ibInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; VmaAllocationCreateInfo ibAllocCreateInfo = {}; - ibAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; - ibAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + ibAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + ibAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; VkBuffer stagingIndexBuffer = VK_NULL_HANDLE; VmaAllocation stagingIndexBufferAlloc = VK_NULL_HANDLE; @@ -710,7 +712,6 @@ static void CreateMesh() // No need to flush stagingIndexBuffer memory because CPU_ONLY memory is always HOST_COHERENT. ibInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT; - ibAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; ibAllocCreateInfo.flags = 0; ERR_GUARD_VULKAN( vmaCreateBuffer(g_hAllocator, &ibInfo, &ibAllocCreateInfo, &g_hIndexBuffer, &g_hIndexBufferAlloc, nullptr) ); @@ -747,8 +748,8 @@ static void CreateTexture(uint32_t sizeX, uint32_t sizeY) stagingBufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; VmaAllocationCreateInfo stagingBufAllocCreateInfo = {}; - stagingBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY; - stagingBufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT; + stagingBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; + stagingBufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT; VkBuffer stagingBuf = VK_NULL_HANDLE; VmaAllocation stagingBufAlloc = VK_NULL_HANDLE; @@ -792,7 +793,7 @@ static void CreateTexture(uint32_t sizeX, uint32_t sizeY) imageInfo.flags = 0; VmaAllocationCreateInfo imageAllocCreateInfo = {}; - imageAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + imageAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; ERR_GUARD_VULKAN( vmaCreateImage(g_hAllocator, &imageInfo, &imageAllocCreateInfo, &g_hTextureImage, &g_hTextureImageAlloc, nullptr) ); @@ -1021,7 +1022,7 @@ static void CreateSwapchain() depthImageInfo.flags = 0; VmaAllocationCreateInfo depthImageAllocCreateInfo = {}; - depthImageAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY; + depthImageAllocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO; ERR_GUARD_VULKAN( vmaCreateImage(g_hAllocator, &depthImageInfo, &depthImageAllocCreateInfo, &g_hDepthImage, &g_hDepthImageAlloc, nullptr) );