From 0f3bea06ef2f1f232da22ef8339b92230f446742 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nathan=20Gau=C3=ABr?= Date: Fri, 7 Jul 2023 16:41:52 +0200 Subject: [PATCH] NFC: rewrite EnumSet to handle larger enums. (#5289) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current EnumSet implementation is only efficient for enums with values < than 64. The reason is the first 63 values are stored as a bitmask in a 64 bit unsigned integer, and the other values are stored in a std::set. For small enums, this is fine (most SPIR-V enums have IDs < than 64), but performance starts to drop with larger enums (Capabilities, opcodes). Design considerations: ---------------------- This PR changes the internal behavior of the EnumSet to handle enums with arbitrary values while staying performant. The idea is to extend the 64-bits buckets sparsely: - each bucket can store 64 value, starting from a multiplier of 64. This could be considered as a hashset with linear probing. - For small enums, there is a slight memory overhead due to the bucket storage, but lookup is still constant. - For linearly distributed values, lookup is constant. - Worse case for storage are for enums with values which are multiples of 64. But lookup is constant. - Worse case for lookup are enums with a lot of small ranges scattered in the space (requires linear probing). For enums like capabilities/opcodes, this bucketing is useful as values are usually scatters in distinct, but almost contiguous blocks. (vendors usually have allocated ranges, like [5000;5500], while [1000;5000] is mostly unused). Benchmarking: ------------- Benchmarking was done in 2 ways: - a benchmark built for the occasion, which only measure the EnumSet performance. - SPIRV-Tools tests, to measure a more realist scenario. Running SPIR-V tests with both implementations shows the same performance (delta < noise). So seems like we have no regressions. This method is noisy by nature (I/O, etc), but the most representative of a real-life scenario. Protocol: - run spirv-tests with no stdout using perf, multiple times. Result: - measure noise is larger than the observed difference. The custom benchmark was testing EnumSet interfaces using SPIRV enums. Doing thousand of insertion/deletion/lookup, with 2 kind of scenarios: - add once, lookup many times. - add/delete/loopkup many time. For small enums, results are similar (delta < noise). Seems relevant with the previously observed results as most SPIRV enums are small, and SPIRV-Tools is not doing that many intensive operations on EnumSets. Performance on large enums (opcode/capabilities) shows an improvement: +-----------------------------+---------+---------+---------+ | Metric | Old | New | Delta % | +-----------------------------+---------+---------+---------+ | Execution time | 27s | 7s | -72% | | Instruction count | 174b | 129b | -25% | | Branch count | 28b | 33b | +17% | | Branch miss | 490m | 26m | -94% | | Cache-misses | 149k | 26k | -82% | +-----------------------------+---------+---------+---------+ Future work ----------- This was by-design an NFC change to compare apples-to-apples. The next PR aims to add STL-like iterators to the EnumSet to allow using it with STL algorithms, and range-based for loops. Signed-off-by: Nathan Gauër --- source/enum_set.h | 388 +++++++++++++++---------- source/extensions.h | 3 +- source/val/validation_state.cpp | 1 + test/enum_set_test.cpp | 490 +++++++++++++++++++++++++++----- 4 files changed, 666 insertions(+), 216 deletions(-) diff --git a/source/enum_set.h b/source/enum_set.h index 28ee5fee8..9b0bb5b4e 100644 --- a/source/enum_set.h +++ b/source/enum_set.h @@ -1,4 +1,4 @@ -// Copyright (c) 2016 Google Inc. +// Copyright (c) 2023 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,195 +12,289 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include +#include +#include +#include +#include +#include + #ifndef SOURCE_ENUM_SET_H_ #define SOURCE_ENUM_SET_H_ -#include -#include -#include -#include -#include - #include "source/latest_version_spirv_header.h" -#include "source/util/make_unique.h" namespace spvtools { -// A set of values of a 32-bit enum type. -// It is fast and compact for the common case, where enum values -// are at most 63. But it can represent enums with larger values, -// as may appear in extensions. -template +// This container is optimized to store and retrieve unsigned enum values. +// The base model for this implementation is an open-addressing hashtable with +// linear probing. For small enums (max index < 64), all operations are O(1). +// +// - Enums are stored in buckets (64 contiguous values max per bucket) +// - Buckets ranges don't overlap, but don't have to be contiguous. +// - Enums are packed into 64-bits buckets, using 1 bit per enum value. +// +// Example: +// - MyEnum { A = 0, B = 1, C = 64, D = 65 } +// - 2 buckets are required: +// - bucket 0, storing values in the range [ 0; 64[ +// - bucket 1, storing values in the range [64; 128[ +// +// - Buckets are stored in a sorted vector (sorted by bucket range). +// - Retrieval is done by computing the theoretical bucket index using the enum +// value, and +// doing a linear scan from this position. +// - Insertion is done by retrieving the bucket and either: +// - inserting a new bucket in the sorted vector when no buckets has a +// compatible range. +// - setting the corresponding bit in the bucket. +// This means insertion in the middle/beginning can cause a memmove when no +// bucket is available. In our case, this happens at most 23 times for the +// largest enum we have (Opcodes). +template class EnumSet { private: - // The ForEach method will call the functor on enum values in - // enum value order (lowest to highest). To make that easier, use - // an ordered set for the overflow values. - using OverflowSetType = std::set; + using BucketType = uint64_t; + using ElementType = std::underlying_type_t; + static_assert(std::is_enum_v, "EnumSets only works with enums."); + static_assert(std::is_signed_v == false, + "EnumSet doesn't supports signed enums."); + + // Each bucket can hold up to `kBucketSize` distinct, contiguous enum values. + // The first value a bucket can hold must be aligned on `kBucketSize`. + struct Bucket { + // bit mask to store `kBucketSize` enums. + BucketType data; + // 1st enum this bucket can represent. + T start; + + friend bool operator==(const Bucket& lhs, const Bucket& rhs) { + return lhs.start == rhs.start && lhs.data == rhs.data; + } + }; + + // How many distinct values can a bucket hold? 1 bit per value. + static constexpr size_t kBucketSize = sizeof(BucketType) * 8ULL; public: - // Construct an empty set. - EnumSet() {} - // Construct an set with just the given enum value. - explicit EnumSet(EnumType c) { Add(c); } - // Construct an set from an initializer list of enum values. - EnumSet(std::initializer_list cs) { - for (auto c : cs) Add(c); - } - EnumSet(uint32_t count, const EnumType* ptr) { - for (uint32_t i = 0; i < count; ++i) Add(ptr[i]); - } - // Copy constructor. - EnumSet(const EnumSet& other) { *this = other; } - // Move constructor. The moved-from set is emptied. - EnumSet(EnumSet&& other) { - mask_ = other.mask_; - overflow_ = std::move(other.overflow_); - other.mask_ = 0; - other.overflow_.reset(nullptr); - } - // Assignment operator. - EnumSet& operator=(const EnumSet& other) { - if (&other != this) { - mask_ = other.mask_; - overflow_.reset(other.overflow_ ? new OverflowSetType(*other.overflow_) - : nullptr); + // Creates an empty set. + EnumSet() : buckets_(0) {} + + // Creates a set and store `value` in it. + EnumSet(T value) : EnumSet() { Add(value); } + + // Creates a set and stores each `values` in it. + EnumSet(std::initializer_list values) : EnumSet() { + for (auto item : values) { + Add(item); } + } + + // Creates a set, and insert `count` enum values pointed by `array` in it. + EnumSet(ElementType count, const T* array) : EnumSet() { + for (ElementType i = 0; i < count; i++) { + Add(array[i]); + } + } + + // Copies the EnumSet `other` into a new EnumSet. + EnumSet(const EnumSet& other) : buckets_(other.buckets_) {} + + // Moves the EnumSet `other` into a new EnumSet. + EnumSet(EnumSet&& other) : buckets_(std::move(other.buckets_)) {} + + // Deep-copies the EnumSet `other` into this EnumSet. + EnumSet& operator=(const EnumSet& other) { + buckets_ = other.buckets_; return *this; } - friend bool operator==(const EnumSet& a, const EnumSet& b) { - if (a.mask_ != b.mask_) { + // Add the enum value `value` into the set. + // The set is unchanged if the value already exists. + void Add(T value) { + const size_t index = FindBucketForValue(value); + if (index >= buckets_.size() || + buckets_[index].start != ComputeBucketStart(value)) { + InsertBucketFor(index, value); + return; + } + auto& bucket = buckets_[index]; + bucket.data |= ComputeMaskForValue(value); + } + + // Removes the value `value` into the set. + // The set is unchanged if the value is not in the set. + void Remove(T value) { + const size_t index = FindBucketForValue(value); + if (index >= buckets_.size() || + buckets_[index].start != ComputeBucketStart(value)) { + return; + } + auto& bucket = buckets_[index]; + bucket.data &= ~ComputeMaskForValue(value); + if (bucket.data == 0) { + buckets_.erase(buckets_.cbegin() + index); + } + } + + // Returns true if `value` is present in the set. + bool Contains(T value) const { + const size_t index = FindBucketForValue(value); + if (index >= buckets_.size() || + buckets_[index].start != ComputeBucketStart(value)) { return false; } + auto& bucket = buckets_[index]; + return bucket.data & ComputeMaskForValue(value); + } - if (a.overflow_ == nullptr && b.overflow_ == nullptr) { + // Calls `unaryFunction` once for each value in the set. + // Values are sorted in increasing order using their numerical values. + void ForEach(std::function unaryFunction) const { + for (const auto& bucket : buckets_) { + for (uint8_t i = 0; i < kBucketSize; i++) { + if (bucket.data & (1ULL << i)) { + unaryFunction(GetValueFromBucket(bucket, i)); + } + } + } + } + + // Returns true if the set is holds no values. + bool IsEmpty() const { return buckets_.size() == 0; } + + // Returns true if this set contains at least one value contained in `in_set`. + // Note: If `in_set` is empty, this function returns true. + bool HasAnyOf(const EnumSet& in_set) const { + if (in_set.IsEmpty()) { return true; } - if (a.overflow_ == nullptr || b.overflow_ == nullptr) { - return false; - } + auto lhs = buckets_.cbegin(); + auto rhs = in_set.buckets_.cbegin(); - return *a.overflow_ == *b.overflow_; - } + while (lhs != buckets_.cend() && rhs != in_set.buckets_.cend()) { + if (lhs->start == rhs->start) { + if (lhs->data & rhs->data) { + // At least 1 bit is shared. Early return. + return true; + } - friend bool operator!=(const EnumSet& a, const EnumSet& b) { - return !(a == b); - } + lhs++; + rhs++; + continue; + } - // Adds the given enum value to the set. This has no effect if the - // enum value is already in the set. - void Add(EnumType c) { AddWord(ToWord(c)); } + // LHS bucket is smaller than the current RHS bucket. Catching up on RHS. + if (lhs->start < rhs->start) { + lhs++; + continue; + } - // Removes the given enum value from the set. This has no effect if the - // enum value is not in the set. - void Remove(EnumType c) { RemoveWord(ToWord(c)); } - - // Returns true if this enum value is in the set. - bool Contains(EnumType c) const { return ContainsWord(ToWord(c)); } - - // Applies f to each enum in the set, in order from smallest enum - // value to largest. - void ForEach(std::function f) const { - for (uint32_t i = 0; i < 64; ++i) { - if (mask_ & AsMask(i)) f(static_cast(i)); - } - if (overflow_) { - for (uint32_t c : *overflow_) f(static_cast(c)); - } - } - - // Returns true if the set is empty. - bool IsEmpty() const { - if (mask_) return false; - if (overflow_ && !overflow_->empty()) return false; - return true; - } - - // Returns true if the set contains ANY of the elements of |in_set|, - // or if |in_set| is empty. - bool HasAnyOf(const EnumSet& in_set) const { - if (in_set.IsEmpty()) return true; - - if (mask_ & in_set.mask_) return true; - - if (!overflow_ || !in_set.overflow_) return false; - - for (uint32_t item : *in_set.overflow_) { - if (overflow_->find(item) != overflow_->end()) return true; + // Otherwise, RHS needs to catch up on LHS. + rhs++; } return false; } private: - // Adds the given enum value (as a 32-bit word) to the set. This has no - // effect if the enum value is already in the set. - void AddWord(uint32_t word) { - if (auto new_bits = AsMask(word)) { - mask_ |= new_bits; - } else { - Overflow().insert(word); + // Returns the index of the last bucket in which `value` could be stored. + static constexpr inline size_t ComputeLargestPossibleBucketIndexFor(T value) { + return static_cast(value) / kBucketSize; + } + + // Returns the smallest enum value that could be contained in the same bucket + // as `value`. + static constexpr inline T ComputeBucketStart(T value) { + return static_cast(kBucketSize * + ComputeLargestPossibleBucketIndexFor(value)); + } + + // Returns the index of the bit that corresponds to `value` in the bucket. + static constexpr inline size_t ComputeBucketOffset(T value) { + return static_cast(value) % kBucketSize; + } + + // Returns the bitmask used to represent the enum `value` in its bucket. + static constexpr inline BucketType ComputeMaskForValue(T value) { + return 1ULL << ComputeBucketOffset(value); + } + + // Returns the `enum` stored in `bucket` at `offset`. + // `offset` is the bit-offset in the bucket storage. + static constexpr inline T GetValueFromBucket(const Bucket& bucket, + ElementType offset) { + return static_cast(static_cast(bucket.start) + offset); + } + + // For a given enum `value`, finds the bucket index that could contain this + // value. If no such bucket is found, the index at which the new bucket should + // be inserted is returned. + size_t FindBucketForValue(T value) const { + // Set is empty, insert at 0. + if (buckets_.size() == 0) { + return 0; } - } - // Removes the given enum value (as a 32-bit word) from the set. This has no - // effect if the enum value is not in the set. - void RemoveWord(uint32_t word) { - if (auto new_bits = AsMask(word)) { - mask_ &= ~new_bits; - } else { - auto itr = Overflow().find(word); - if (itr != Overflow().end()) Overflow().erase(itr); + const T wanted_start = ComputeBucketStart(value); + assert(buckets_.size() > 0 && + "Size must not be 0 here. Has the code above changed?"); + size_t index = std::min(buckets_.size() - 1, + ComputeLargestPossibleBucketIndexFor(value)); + + // This loops behaves like std::upper_bound with a reverse iterator. + // Buckets are sorted. 3 main cases: + // - The bucket matches + // => returns the bucket index. + // - The found bucket is larger + // => scans left until it finds the correct bucket, or insertion point. + // - The found bucket is smaller + // => We are at the end, so we return past-end index for insertion. + for (; buckets_[index].start >= wanted_start; index--) { + if (index == 0) { + return 0; + } } + + return index + 1; } - // Returns true if the enum represented as a 32-bit word is in the set. - bool ContainsWord(uint32_t word) const { - // We shouldn't call Overflow() since this is a const method. - if (auto bits = AsMask(word)) { - return (mask_ & bits) != 0; - } else if (auto overflow = overflow_.get()) { - return overflow->find(word) != overflow->end(); + // Creates a new bucket to store `value` and inserts it at `index`. + // If the `index` is past the end, the bucket is inserted at the end of the + // vector. + void InsertBucketFor(size_t index, T value) { + const T bucket_start = ComputeBucketStart(value); + Bucket bucket = {1ULL << ComputeBucketOffset(value), bucket_start}; + auto it = buckets_.emplace(buckets_.begin() + index, std::move(bucket)); +#if defined(NDEBUG) + (void)it; // Silencing unused variable warning. +#else + assert(std::next(it) == buckets_.end() || + std::next(it)->start > bucket_start); + assert(it == buckets_.begin() || std::prev(it)->start < bucket_start); +#endif + } + + // Returns true if `lhs` and `rhs` hold the exact same values. + friend bool operator==(const EnumSet& lhs, const EnumSet& rhs) { + if (lhs.buckets_.size() != rhs.buckets_.size()) { + return false; } - // The word is large, but the set doesn't have large members, so - // it doesn't have an overflow set. - return false; + return lhs.buckets_ == rhs.buckets_; } - // Returns the enum value as a uint32_t. - uint32_t ToWord(EnumType value) const { - static_assert(sizeof(EnumType) <= sizeof(uint32_t), - "EnumType must statically castable to uint32_t"); - return static_cast(value); + // Returns true if `lhs` and `rhs` hold at least 1 different value. + friend bool operator!=(const EnumSet& lhs, const EnumSet& rhs) { + return !(lhs == rhs); } - // Determines whether the given enum value can be represented - // as a bit in a uint64_t mask. If so, then returns that mask bit. - // Otherwise, returns 0. - uint64_t AsMask(uint32_t word) const { - if (word > 63) return 0; - return uint64_t(1) << word; - } - - // Ensures that overflow_set_ references a set. A new empty set is - // allocated if one doesn't exist yet. Returns overflow_set_. - OverflowSetType& Overflow() { - if (overflow_.get() == nullptr) { - overflow_ = MakeUnique(); - } - return *overflow_; - } - - // Enums with values up to 63 are stored as bits in this mask. - uint64_t mask_ = 0; - // Enums with values larger than 63 are stored in this set. - // This set should normally be empty or very small. - std::unique_ptr overflow_ = {}; + // Storage for the buckets. + std::vector buckets_; }; -// A set of spv::Capability, optimized for small capability values. +// A set of spv::Capability. using CapabilitySet = EnumSet; } // namespace spvtools diff --git a/source/extensions.h b/source/extensions.h index 8023444c3..cda4924a4 100644 --- a/source/extensions.h +++ b/source/extensions.h @@ -15,6 +15,7 @@ #ifndef SOURCE_EXTENSIONS_H_ #define SOURCE_EXTENSIONS_H_ +#include #include #include "source/enum_set.h" @@ -23,7 +24,7 @@ namespace spvtools { // The known SPIR-V extensions. -enum Extension { +enum Extension : uint32_t { #include "extension_enum.inc" }; diff --git a/source/val/validation_state.cpp b/source/val/validation_state.cpp index 14e205162..d1572591e 100644 --- a/source/val/validation_state.cpp +++ b/source/val/validation_state.cpp @@ -21,6 +21,7 @@ #include "source/opcode.h" #include "source/spirv_constant.h" #include "source/spirv_target_env.h" +#include "source/util/make_unique.h" #include "source/val/basic_block.h" #include "source/val/construct.h" #include "source/val/function.h" diff --git a/test/enum_set_test.cpp b/test/enum_set_test.cpp index 1f727158e..bf9e4432b 100644 --- a/test/enum_set_test.cpp +++ b/test/enum_set_test.cpp @@ -12,12 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "source/enum_set.h" + #include +#include +#include #include #include #include "gmock/gmock.h" -#include "source/enum_set.h" #include "test/unit_spirv.h" namespace spvtools { @@ -25,159 +28,439 @@ namespace { using spvtest::ElementsIn; using ::testing::Eq; +using ::testing::Values; using ::testing::ValuesIn; +enum class TestEnum : uint32_t { + ZERO = 0, + ONE = 1, + TWO = 2, + THREE = 3, + FOUR = 4, + FIVE = 5, + EIGHT = 8, + TWENTY = 20, + TWENTY_FOUR = 24, + THIRTY = 30, + ONE_HUNDRED = 100, + ONE_HUNDRED_FIFTY = 150, + TWO_HUNDRED = 200, + THREE_HUNDRED = 300, + FOUR_HUNDRED = 400, + FIVE_HUNDRED = 500, + SIX_HUNDRED = 600, +}; + +constexpr std::array kCapabilities{ + spv::Capability::Matrix, + spv::Capability::Shader, + spv::Capability::Geometry, + spv::Capability::Tessellation, + spv::Capability::Addresses, + spv::Capability::Linkage, + spv::Capability::Kernel, + spv::Capability::Vector16, + spv::Capability::Float16Buffer, + spv::Capability::Float16, + spv::Capability::Float64, + spv::Capability::Int64, + spv::Capability::Int64Atomics, + spv::Capability::ImageBasic, + spv::Capability::ImageReadWrite, + spv::Capability::ImageMipmap, + spv::Capability::Pipes, + spv::Capability::Groups, + spv::Capability::DeviceEnqueue, + spv::Capability::LiteralSampler, + spv::Capability::AtomicStorage, + spv::Capability::Int16, + spv::Capability::TessellationPointSize, + spv::Capability::GeometryPointSize, + spv::Capability::ImageGatherExtended, + spv::Capability::StorageImageMultisample, + spv::Capability::UniformBufferArrayDynamicIndexing, + spv::Capability::SampledImageArrayDynamicIndexing, + spv::Capability::StorageBufferArrayDynamicIndexing, + spv::Capability::StorageImageArrayDynamicIndexing, + spv::Capability::ClipDistance, + spv::Capability::CullDistance, + spv::Capability::ImageCubeArray, + spv::Capability::SampleRateShading, + spv::Capability::ImageRect, + spv::Capability::SampledRect, + spv::Capability::GenericPointer, + spv::Capability::Int8, + spv::Capability::InputAttachment, + spv::Capability::SparseResidency, + spv::Capability::MinLod, + spv::Capability::Sampled1D, + spv::Capability::Image1D, + spv::Capability::SampledCubeArray, + spv::Capability::SampledBuffer, + spv::Capability::ImageBuffer, + spv::Capability::ImageMSArray, + spv::Capability::StorageImageExtendedFormats, + spv::Capability::ImageQuery, + spv::Capability::DerivativeControl, + spv::Capability::InterpolationFunction, + spv::Capability::TransformFeedback, + spv::Capability::GeometryStreams, + spv::Capability::StorageImageReadWithoutFormat, + spv::Capability::StorageImageWriteWithoutFormat, + spv::Capability::MultiViewport, + spv::Capability::SubgroupDispatch, + spv::Capability::NamedBarrier, + spv::Capability::PipeStorage, + spv::Capability::GroupNonUniform, + spv::Capability::GroupNonUniformVote, + spv::Capability::GroupNonUniformArithmetic, + spv::Capability::GroupNonUniformBallot, + spv::Capability::GroupNonUniformShuffle, + spv::Capability::GroupNonUniformShuffleRelative, + spv::Capability::GroupNonUniformClustered, + spv::Capability::GroupNonUniformQuad, + spv::Capability::ShaderLayer, + spv::Capability::ShaderViewportIndex, + spv::Capability::UniformDecoration, + spv::Capability::CoreBuiltinsARM, + spv::Capability::FragmentShadingRateKHR, + spv::Capability::SubgroupBallotKHR, + spv::Capability::DrawParameters, + spv::Capability::WorkgroupMemoryExplicitLayoutKHR, + spv::Capability::WorkgroupMemoryExplicitLayout8BitAccessKHR, + spv::Capability::WorkgroupMemoryExplicitLayout16BitAccessKHR, + spv::Capability::SubgroupVoteKHR, + spv::Capability::StorageBuffer16BitAccess, + spv::Capability::StorageUniformBufferBlock16, + spv::Capability::StorageUniform16, + spv::Capability::UniformAndStorageBuffer16BitAccess, + spv::Capability::StoragePushConstant16, + spv::Capability::StorageInputOutput16, + spv::Capability::DeviceGroup, + spv::Capability::MultiView, + spv::Capability::VariablePointersStorageBuffer, + spv::Capability::VariablePointers, + spv::Capability::AtomicStorageOps, + spv::Capability::SampleMaskPostDepthCoverage, + spv::Capability::StorageBuffer8BitAccess, + spv::Capability::UniformAndStorageBuffer8BitAccess, + spv::Capability::StoragePushConstant8, + spv::Capability::DenormPreserve, + spv::Capability::DenormFlushToZero, + spv::Capability::SignedZeroInfNanPreserve, + spv::Capability::RoundingModeRTE, + spv::Capability::RoundingModeRTZ, + spv::Capability::RayQueryProvisionalKHR, + spv::Capability::RayQueryKHR, + spv::Capability::RayTraversalPrimitiveCullingKHR, + spv::Capability::RayTracingKHR, + spv::Capability::Float16ImageAMD, + spv::Capability::ImageGatherBiasLodAMD, + spv::Capability::FragmentMaskAMD, + spv::Capability::StencilExportEXT, + spv::Capability::ImageReadWriteLodAMD, + spv::Capability::Int64ImageEXT, + spv::Capability::ShaderClockKHR, + spv::Capability::SampleMaskOverrideCoverageNV, + spv::Capability::GeometryShaderPassthroughNV, + spv::Capability::ShaderViewportIndexLayerEXT, + spv::Capability::ShaderViewportIndexLayerNV, + spv::Capability::ShaderViewportMaskNV, + spv::Capability::ShaderStereoViewNV, + spv::Capability::PerViewAttributesNV, + spv::Capability::FragmentFullyCoveredEXT, + spv::Capability::MeshShadingNV, + spv::Capability::ImageFootprintNV, + spv::Capability::MeshShadingEXT, + spv::Capability::FragmentBarycentricKHR, + spv::Capability::FragmentBarycentricNV, + spv::Capability::ComputeDerivativeGroupQuadsNV, + spv::Capability::FragmentDensityEXT, + spv::Capability::ShadingRateNV, + spv::Capability::GroupNonUniformPartitionedNV, + spv::Capability::ShaderNonUniform, + spv::Capability::ShaderNonUniformEXT, + spv::Capability::RuntimeDescriptorArray, + spv::Capability::RuntimeDescriptorArrayEXT, + spv::Capability::InputAttachmentArrayDynamicIndexing, + spv::Capability::InputAttachmentArrayDynamicIndexingEXT, + spv::Capability::UniformTexelBufferArrayDynamicIndexing, + spv::Capability::UniformTexelBufferArrayDynamicIndexingEXT, + spv::Capability::StorageTexelBufferArrayDynamicIndexing, + spv::Capability::StorageTexelBufferArrayDynamicIndexingEXT, + spv::Capability::UniformBufferArrayNonUniformIndexing, + spv::Capability::UniformBufferArrayNonUniformIndexingEXT, + spv::Capability::SampledImageArrayNonUniformIndexing, + spv::Capability::SampledImageArrayNonUniformIndexingEXT, + spv::Capability::StorageBufferArrayNonUniformIndexing, + spv::Capability::StorageBufferArrayNonUniformIndexingEXT, + spv::Capability::StorageImageArrayNonUniformIndexing, + spv::Capability::StorageImageArrayNonUniformIndexingEXT, + spv::Capability::InputAttachmentArrayNonUniformIndexing, + spv::Capability::InputAttachmentArrayNonUniformIndexingEXT, + spv::Capability::UniformTexelBufferArrayNonUniformIndexing, + spv::Capability::UniformTexelBufferArrayNonUniformIndexingEXT, + spv::Capability::StorageTexelBufferArrayNonUniformIndexing, + spv::Capability::StorageTexelBufferArrayNonUniformIndexingEXT, + spv::Capability::RayTracingNV, + spv::Capability::RayTracingMotionBlurNV, + spv::Capability::VulkanMemoryModel, + spv::Capability::VulkanMemoryModelKHR, + spv::Capability::VulkanMemoryModelDeviceScope, + spv::Capability::VulkanMemoryModelDeviceScopeKHR, + spv::Capability::PhysicalStorageBufferAddresses, + spv::Capability::PhysicalStorageBufferAddressesEXT, + spv::Capability::ComputeDerivativeGroupLinearNV, + spv::Capability::RayTracingProvisionalKHR, + spv::Capability::CooperativeMatrixNV, + spv::Capability::FragmentShaderSampleInterlockEXT, + spv::Capability::FragmentShaderShadingRateInterlockEXT, + spv::Capability::ShaderSMBuiltinsNV, + spv::Capability::FragmentShaderPixelInterlockEXT, + spv::Capability::DemoteToHelperInvocation, + spv::Capability::DemoteToHelperInvocationEXT, + spv::Capability::RayTracingOpacityMicromapEXT, + spv::Capability::ShaderInvocationReorderNV, + spv::Capability::BindlessTextureNV, + spv::Capability::SubgroupShuffleINTEL, + spv::Capability::SubgroupBufferBlockIOINTEL, + spv::Capability::SubgroupImageBlockIOINTEL, + spv::Capability::SubgroupImageMediaBlockIOINTEL, + spv::Capability::RoundToInfinityINTEL, + spv::Capability::FloatingPointModeINTEL, + spv::Capability::IntegerFunctions2INTEL, + spv::Capability::FunctionPointersINTEL, + spv::Capability::IndirectReferencesINTEL, + spv::Capability::AsmINTEL, + spv::Capability::AtomicFloat32MinMaxEXT, + spv::Capability::AtomicFloat64MinMaxEXT, + spv::Capability::AtomicFloat16MinMaxEXT, + spv::Capability::VectorComputeINTEL, + spv::Capability::VectorAnyINTEL, + spv::Capability::ExpectAssumeKHR, + spv::Capability::SubgroupAvcMotionEstimationINTEL, + spv::Capability::SubgroupAvcMotionEstimationIntraINTEL, + spv::Capability::SubgroupAvcMotionEstimationChromaINTEL, + spv::Capability::VariableLengthArrayINTEL, + spv::Capability::FunctionFloatControlINTEL, + spv::Capability::FPGAMemoryAttributesINTEL, + spv::Capability::FPFastMathModeINTEL, + spv::Capability::ArbitraryPrecisionIntegersINTEL, + spv::Capability::ArbitraryPrecisionFloatingPointINTEL, + spv::Capability::UnstructuredLoopControlsINTEL, + spv::Capability::FPGALoopControlsINTEL, + spv::Capability::KernelAttributesINTEL, + spv::Capability::FPGAKernelAttributesINTEL, + spv::Capability::FPGAMemoryAccessesINTEL, + spv::Capability::FPGAClusterAttributesINTEL, + spv::Capability::LoopFuseINTEL, + spv::Capability::FPGADSPControlINTEL, + spv::Capability::MemoryAccessAliasingINTEL, + spv::Capability::FPGAInvocationPipeliningAttributesINTEL, + spv::Capability::FPGABufferLocationINTEL, + spv::Capability::ArbitraryPrecisionFixedPointINTEL, + spv::Capability::USMStorageClassesINTEL, + spv::Capability::RuntimeAlignedAttributeINTEL, + spv::Capability::IOPipesINTEL, + spv::Capability::BlockingPipesINTEL, + spv::Capability::FPGARegINTEL, + spv::Capability::DotProductInputAll, + spv::Capability::DotProductInputAllKHR, + spv::Capability::DotProductInput4x8Bit, + spv::Capability::DotProductInput4x8BitKHR, + spv::Capability::DotProductInput4x8BitPacked, + spv::Capability::DotProductInput4x8BitPackedKHR, + spv::Capability::DotProduct, + spv::Capability::DotProductKHR, + spv::Capability::RayCullMaskKHR, + spv::Capability::BitInstructions, + spv::Capability::GroupNonUniformRotateKHR, + spv::Capability::AtomicFloat32AddEXT, + spv::Capability::AtomicFloat64AddEXT, + spv::Capability::LongConstantCompositeINTEL, + spv::Capability::OptNoneINTEL, + spv::Capability::AtomicFloat16AddEXT, + spv::Capability::DebugInfoModuleINTEL, + spv::Capability::SplitBarrierINTEL, + spv::Capability::GroupUniformArithmeticKHR, + spv::Capability::Max, +}; + TEST(EnumSet, IsEmpty1) { - EnumSet set; + EnumSet set; EXPECT_TRUE(set.IsEmpty()); - set.Add(0); + set.Add(TestEnum::ZERO); EXPECT_FALSE(set.IsEmpty()); } TEST(EnumSet, IsEmpty2) { - EnumSet set; + EnumSet set; EXPECT_TRUE(set.IsEmpty()); - set.Add(150); + set.Add(TestEnum::ONE_HUNDRED_FIFTY); EXPECT_FALSE(set.IsEmpty()); } TEST(EnumSet, IsEmpty3) { - EnumSet set(4); + EnumSet set(TestEnum::FOUR); EXPECT_FALSE(set.IsEmpty()); } TEST(EnumSet, IsEmpty4) { - EnumSet set(300); + EnumSet set(TestEnum::THREE_HUNDRED); EXPECT_FALSE(set.IsEmpty()); } TEST(EnumSetHasAnyOf, EmptySetEmptyQuery) { - const EnumSet set; - const EnumSet empty; + const EnumSet set; + const EnumSet empty; EXPECT_TRUE(set.HasAnyOf(empty)); - EXPECT_TRUE(EnumSet().HasAnyOf(EnumSet())); + EXPECT_TRUE(EnumSet().HasAnyOf(EnumSet())); } TEST(EnumSetHasAnyOf, MaskSetEmptyQuery) { - EnumSet set; - const EnumSet empty; - set.Add(5); - set.Add(8); + EnumSet set; + const EnumSet empty; + set.Add(TestEnum::FIVE); + set.Add(TestEnum::EIGHT); EXPECT_TRUE(set.HasAnyOf(empty)); } TEST(EnumSetHasAnyOf, OverflowSetEmptyQuery) { - EnumSet set; - const EnumSet empty; - set.Add(200); - set.Add(300); + EnumSet set; + const EnumSet empty; + set.Add(TestEnum::TWO_HUNDRED); + set.Add(TestEnum::THREE_HUNDRED); EXPECT_TRUE(set.HasAnyOf(empty)); } TEST(EnumSetHasAnyOf, EmptyQuery) { - EnumSet set; - const EnumSet empty; - set.Add(5); - set.Add(8); - set.Add(200); - set.Add(300); + EnumSet set; + const EnumSet empty; + set.Add(TestEnum::FIVE); + set.Add(TestEnum::EIGHT); + set.Add(TestEnum::TWO_HUNDRED); + set.Add(TestEnum::THREE_HUNDRED); EXPECT_TRUE(set.HasAnyOf(empty)); } TEST(EnumSetHasAnyOf, EmptyQueryAlwaysTrue) { - EnumSet set; - const EnumSet empty; + EnumSet set; + const EnumSet empty; EXPECT_TRUE(set.HasAnyOf(empty)); - set.Add(5); + set.Add(TestEnum::FIVE); EXPECT_TRUE(set.HasAnyOf(empty)); - EXPECT_TRUE(EnumSet(100).HasAnyOf(EnumSet())); + EXPECT_TRUE( + EnumSet(TestEnum::ONE_HUNDRED).HasAnyOf(EnumSet())); } TEST(EnumSetHasAnyOf, ReflexiveMask) { - EnumSet set(3); - set.Add(24); - set.Add(30); + EnumSet set(TestEnum::THREE); + set.Add(TestEnum::TWENTY_FOUR); + set.Add(TestEnum::THIRTY); EXPECT_TRUE(set.HasAnyOf(set)); } TEST(EnumSetHasAnyOf, ReflexiveOverflow) { - EnumSet set(200); - set.Add(300); - set.Add(400); + EnumSet set(TestEnum::TWO_HUNDRED); + set.Add(TestEnum::TWO_HUNDRED); + set.Add(TestEnum::FOUR_HUNDRED); EXPECT_TRUE(set.HasAnyOf(set)); } TEST(EnumSetHasAnyOf, Reflexive) { - EnumSet set(3); - set.Add(24); - set.Add(300); - set.Add(400); + EnumSet set(TestEnum::THREE); + set.Add(TestEnum::TWENTY_FOUR); + set.Add(TestEnum::THREE_HUNDRED); + set.Add(TestEnum::FOUR_HUNDRED); EXPECT_TRUE(set.HasAnyOf(set)); } TEST(EnumSetHasAnyOf, EmptySetHasNone) { - EnumSet set; - EnumSet items; + EnumSet set; + EnumSet items; for (uint32_t i = 0; i < 200; ++i) { - items.Add(i); + TestEnum enumValue = static_cast(i); + items.Add(enumValue); EXPECT_FALSE(set.HasAnyOf(items)); - EXPECT_FALSE(set.HasAnyOf(EnumSet(i))); + EXPECT_FALSE(set.HasAnyOf(EnumSet(enumValue))); } } TEST(EnumSetHasAnyOf, MaskSetMaskQuery) { - EnumSet set(0); - EnumSet items(1); + EnumSet set(TestEnum::ZERO); + EnumSet items(TestEnum::ONE); EXPECT_FALSE(set.HasAnyOf(items)); - set.Add(2); - items.Add(3); + set.Add(TestEnum::TWO); + items.Add(TestEnum::THREE); EXPECT_FALSE(set.HasAnyOf(items)); - set.Add(3); + set.Add(TestEnum::THREE); EXPECT_TRUE(set.HasAnyOf(items)); - set.Add(4); + set.Add(TestEnum::FOUR); EXPECT_TRUE(set.HasAnyOf(items)); } TEST(EnumSetHasAnyOf, OverflowSetOverflowQuery) { - EnumSet set(100); - EnumSet items(200); + EnumSet set(TestEnum::ONE_HUNDRED); + EnumSet items(TestEnum::TWO_HUNDRED); EXPECT_FALSE(set.HasAnyOf(items)); - set.Add(300); - items.Add(400); + set.Add(TestEnum::THREE_HUNDRED); + items.Add(TestEnum::FOUR_HUNDRED); EXPECT_FALSE(set.HasAnyOf(items)); - set.Add(200); + set.Add(TestEnum::TWO_HUNDRED); EXPECT_TRUE(set.HasAnyOf(items)); - set.Add(500); + set.Add(TestEnum::FIVE_HUNDRED); EXPECT_TRUE(set.HasAnyOf(items)); } TEST(EnumSetHasAnyOf, GeneralCase) { - EnumSet set(0); - EnumSet items(100); + EnumSet set(TestEnum::ZERO); + EnumSet items(TestEnum::ONE_HUNDRED); EXPECT_FALSE(set.HasAnyOf(items)); - set.Add(300); - items.Add(4); + set.Add(TestEnum::THREE_HUNDRED); + items.Add(TestEnum::FOUR); EXPECT_FALSE(set.HasAnyOf(items)); - set.Add(5); - items.Add(500); + set.Add(TestEnum::FIVE); + items.Add(TestEnum::FIVE_HUNDRED); EXPECT_FALSE(set.HasAnyOf(items)); - set.Add(500); + set.Add(TestEnum::FIVE_HUNDRED); EXPECT_TRUE(set.HasAnyOf(items)); - EXPECT_FALSE(set.HasAnyOf(EnumSet(20))); - EXPECT_FALSE(set.HasAnyOf(EnumSet(600))); - EXPECT_TRUE(set.HasAnyOf(EnumSet(5))); - EXPECT_TRUE(set.HasAnyOf(EnumSet(300))); - EXPECT_TRUE(set.HasAnyOf(EnumSet(0))); + EXPECT_FALSE(set.HasAnyOf(EnumSet(TestEnum::TWENTY))); + EXPECT_FALSE(set.HasAnyOf(EnumSet(TestEnum::SIX_HUNDRED))); + EXPECT_TRUE(set.HasAnyOf(EnumSet(TestEnum::FIVE))); + EXPECT_TRUE(set.HasAnyOf(EnumSet(TestEnum::THREE_HUNDRED))); + EXPECT_TRUE(set.HasAnyOf(EnumSet(TestEnum::ZERO))); } TEST(EnumSet, DefaultIsEmpty) { - EnumSet set; + EnumSet set; for (uint32_t i = 0; i < 1000; ++i) { - EXPECT_FALSE(set.Contains(i)); + EXPECT_FALSE(set.Contains(static_cast(i))); } } +TEST(CapabilitySet, ForEachOrderIsEnumOrder) { + constexpr size_t kValueCount = 500; + std::vector orderedValues(kValueCount); + for (size_t i = 0; i < kValueCount; i++) { + orderedValues[i] = static_cast(i); + } + std::vector shuffledValues(orderedValues.cbegin(), orderedValues.cend()); + std::mt19937 rng(0); + std::shuffle(shuffledValues.begin(), shuffledValues.end(), rng); + + EnumSet set; + for (auto value : shuffledValues) { + set.Add(value); + } + + size_t index = 0; + set.ForEach([&orderedValues, &index](auto value) { + EXPECT_THAT(value, Eq(orderedValues[index])); + index++; + }); +} + TEST(CapabilitySet, ConstructSingleMemberMatrix) { CapabilitySet s(spv::Capability::Matrix); EXPECT_TRUE(s.Contains(spv::Capability::Matrix)); @@ -230,6 +513,62 @@ TEST(CapabilitySet, InitializerListEmpty) { } } +TEST(CapabilitySet, LargeSetHasInsertedElements) { + CapabilitySet set; + for (auto c : kCapabilities) { + EXPECT_FALSE(set.Contains(c)); + } + + for (auto c : kCapabilities) { + set.Add(c); + EXPECT_TRUE(set.Contains(c)); + } + + for (auto c : kCapabilities) { + EXPECT_TRUE(set.Contains(c)); + } +} + +TEST(CapabilitySet, LargeSetHasUnsortedInsertedElements) { + std::vector shuffledCapabilities(kCapabilities.cbegin(), + kCapabilities.cend()); + std::mt19937 rng(0); + std::shuffle(shuffledCapabilities.begin(), shuffledCapabilities.end(), rng); + CapabilitySet set; + for (auto c : shuffledCapabilities) { + EXPECT_FALSE(set.Contains(c)); + } + + for (auto c : shuffledCapabilities) { + set.Add(c); + EXPECT_TRUE(set.Contains(c)); + } + + for (auto c : shuffledCapabilities) { + EXPECT_TRUE(set.Contains(c)); + } +} + +TEST(CapabilitySet, LargeSetHasUnsortedRemovedElement) { + std::vector shuffledCapabilities(kCapabilities.cbegin(), + kCapabilities.cend()); + std::mt19937 rng(0); + std::shuffle(shuffledCapabilities.begin(), shuffledCapabilities.end(), rng); + CapabilitySet set; + for (auto c : shuffledCapabilities) { + set.Add(c); + EXPECT_TRUE(set.Contains(c)); + } + + for (auto c : kCapabilities) { + set.Remove(c); + } + + for (auto c : shuffledCapabilities) { + EXPECT_FALSE(set.Contains(c)); + } +} + struct ForEachCase { CapabilitySet capabilities; std::vector expected; @@ -287,5 +626,20 @@ INSTANTIATE_TEST_SUITE_P( static_cast(0x7fffffff)}}, })); +using BoundaryTestWithParam = ::testing::TestWithParam; + +TEST_P(BoundaryTestWithParam, InsertedContains) { + CapabilitySet set; + set.Add(GetParam()); + EXPECT_TRUE(set.Contains(GetParam())); +} + +INSTANTIATE_TEST_SUITE_P( + Samples, BoundaryTestWithParam, + Values(static_cast(0), static_cast(63), + static_cast(64), static_cast(65), + static_cast(127), static_cast(128), + static_cast(129))); + } // namespace } // namespace spvtools