From 3a858a91faa5c0910769fe5072726b041923a6ab Mon Sep 17 00:00:00 2001 From: Jakob Gruber Date: Thu, 4 Nov 2021 14:01:05 +0100 Subject: [PATCH] [base] Extend SmallVector for use with Zone storage This CL adds an Allocator to SmallVector to control how dynamic storage is managed. The default value uses the plain old C++ std::allocator, i.e. acts like malloc/free. For use with zone memory, one can pass a ZoneAllocator as follows: // Allocates in zone memory. base::SmallVector> xs(ZoneAllocator(zone)); Note: this is a follow-up to crrev.com/c/3240823. Drive-by: hide the internal `reset` function. It doesn't free the dynamic backing store; that's a surprise and should not be exposed to external use. Change-Id: I1f92f184924541e2269493fb52c30f2fdec032be Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3257711 Commit-Queue: Jakob Gruber Reviewed-by: Leszek Swirski Reviewed-by: Igor Sheludko Cr-Commit-Position: refs/heads/main@{#77755} --- include/v8config.h | 25 ++++++++++++ src/base/small-vector.h | 72 +++++++++++++++++++++++------------ src/regexp/regexp-parser.cc | 76 ++++++++++++++++++++----------------- 3 files changed, 115 insertions(+), 58 deletions(-) diff --git a/include/v8config.h b/include/v8config.h index ecb992822c..c4d0894e0f 100644 --- a/include/v8config.h +++ b/include/v8config.h @@ -293,6 +293,8 @@ path. Add it with -I to the command line // V8_HAS_ATTRIBUTE_WARN_UNUSED_RESULT - __attribute__((warn_unused_result)) // supported // V8_HAS_CPP_ATTRIBUTE_NODISCARD - [[nodiscard]] supported +// V8_HAS_CPP_ATTRIBUTE_NO_UNIQUE_ADDRESS +// - [[no_unique_address]] supported // V8_HAS_BUILTIN_BSWAP16 - __builtin_bswap16() supported // V8_HAS_BUILTIN_BSWAP32 - __builtin_bswap32() supported // V8_HAS_BUILTIN_BSWAP64 - __builtin_bswap64() supported @@ -337,6 +339,8 @@ path. Add it with -I to the command line (__has_attribute(warn_unused_result)) # define V8_HAS_CPP_ATTRIBUTE_NODISCARD (V8_HAS_CPP_ATTRIBUTE(nodiscard)) +# define V8_HAS_CPP_ATTRIBUTE_NO_UNIQUE_ADDRESS \ + (V8_HAS_CPP_ATTRIBUTE(no_unique_address)) # define V8_HAS_BUILTIN_ASSUME_ALIGNED (__has_builtin(__builtin_assume_aligned)) # define V8_HAS_BUILTIN_BSWAP16 (__has_builtin(__builtin_bswap16)) @@ -507,6 +511,27 @@ path. Add it with -I to the command line #define V8_NODISCARD /* NOT SUPPORTED */ #endif +// The no_unique_address attribute allows tail padding in a non-static data +// member to overlap other members of the enclosing class (and in the special +// case when the type is empty, permits it to fully overlap other members). The +// field is laid out as if a base class were encountered at the corresponding +// point within the class (except that it does not share a vptr with the +// enclosing object). +// +// Apply to a data member like: +// +// class Foo { +// V8_NO_UNIQUE_ADDRESS Bar bar_; +// }; +// +// [[no_unique_address]] comes in C++20 but supported in clang with +// -std >= c++11. +#if V8_HAS_CPP_ATTRIBUTE_NO_UNIQUE_ADDRESS +#define V8_NO_UNIQUE_ADDRESS [[no_unique_address]] +#else +#define V8_NO_UNIQUE_ADDRESS /* NOT SUPPORTED */ +#endif + // Helper macro to define no_sanitize attributes only with clang. #if defined(__clang__) && defined(__has_attribute) #if __has_attribute(no_sanitize) diff --git a/src/base/small-vector.h b/src/base/small-vector.h index 9b866dde6b..30850013dc 100644 --- a/src/base/small-vector.h +++ b/src/base/small-vector.h @@ -11,14 +11,13 @@ #include "src/base/bits.h" #include "src/base/macros.h" -#include "src/base/platform/wrappers.h" namespace v8 { namespace base { // Minimal SmallVector implementation. Uses inline storage first, switches to -// malloc when it overflows. -template +// dynamic storage when it overflows. +template > class SmallVector { // Currently only support trivially copyable and trivially destructible data // types, as it uses memcpy to copy elements and never calls destructors. @@ -28,17 +27,31 @@ class SmallVector { public: static constexpr size_t kInlineSize = kSize; - SmallVector() = default; - explicit SmallVector(size_t size) { resize_no_init(size); } - SmallVector(const SmallVector& other) V8_NOEXCEPT { *this = other; } - SmallVector(SmallVector&& other) V8_NOEXCEPT { *this = std::move(other); } - SmallVector(std::initializer_list init) { + explicit SmallVector(const Allocator& allocator = Allocator()) + : allocator_(allocator) {} + explicit SmallVector(size_t size, const Allocator& allocator = Allocator()) + : allocator_(allocator) { + resize_no_init(size); + } + SmallVector(const SmallVector& other, + const Allocator& allocator = Allocator()) V8_NOEXCEPT + : allocator_(allocator) { + *this = other; + } + SmallVector(SmallVector&& other, + const Allocator& allocator = Allocator()) V8_NOEXCEPT + : allocator_(allocator) { + *this = std::move(other); + } + SmallVector(std::initializer_list init, + const Allocator& allocator = Allocator()) + : allocator_(allocator) { resize_no_init(init.size()); memcpy(begin_, init.begin(), sizeof(T) * init.size()); } ~SmallVector() { - if (is_big()) base::Free(begin_); + if (is_big()) FreeDynamicStorage(); } SmallVector& operator=(const SmallVector& other) V8_NOEXCEPT { @@ -46,8 +59,8 @@ class SmallVector { size_t other_size = other.size(); if (capacity() < other_size) { // Create large-enough heap-allocated storage. - if (is_big()) base::Free(begin_); - begin_ = reinterpret_cast(base::Malloc(sizeof(T) * other_size)); + if (is_big()) FreeDynamicStorage(); + begin_ = AllocateDynamicStorage(other_size); end_of_storage_ = begin_ + other_size; } memcpy(begin_, other.begin_, sizeof(T) * other_size); @@ -58,11 +71,11 @@ class SmallVector { SmallVector& operator=(SmallVector&& other) V8_NOEXCEPT { if (this == &other) return *this; if (other.is_big()) { - if (is_big()) base::Free(begin_); + if (is_big()) FreeDynamicStorage(); begin_ = other.begin_; end_ = other.end_; end_of_storage_ = other.end_of_storage_; - other.reset(); + other.reset_to_inline_storage(); } else { DCHECK_GE(capacity(), other.size()); // Sanity check. size_t other_size = other.size(); @@ -126,17 +139,12 @@ class SmallVector { end_ = begin_ + new_size; } - // Clear without freeing any storage. + // Clear without reverting back to inline storage. void clear() { end_ = begin_; } - // Clear and go back to inline storage. - void reset() { - begin_ = inline_storage_begin(); - end_ = begin_; - end_of_storage_ = begin_ + kInlineSize; - } - private: + V8_NO_UNIQUE_ADDRESS Allocator allocator_; + T* begin_ = inline_storage_begin(); T* end_ = begin_; T* end_of_storage_ = begin_ + kInlineSize; @@ -152,8 +160,7 @@ class SmallVector { size_t in_use = end_ - begin_; size_t new_capacity = base::bits::RoundUpToPowerOfTwo(std::max(min_capacity, 2 * capacity())); - T* new_storage = - reinterpret_cast(base::Malloc(sizeof(T) * new_capacity)); + T* new_storage = AllocateDynamicStorage(new_capacity); if (new_storage == nullptr) { // Should be: V8::FatalProcessOutOfMemory, but we don't include V8 from // base. The message is intentionally the same as FatalProcessOutOfMemory @@ -162,13 +169,30 @@ class SmallVector { FATAL("Fatal process out of memory: base::SmallVector::Grow"); } memcpy(new_storage, begin_, sizeof(T) * in_use); - if (is_big()) base::Free(begin_); + if (is_big()) FreeDynamicStorage(); begin_ = new_storage; end_ = new_storage + in_use; end_of_storage_ = new_storage + new_capacity; return end_; } + T* AllocateDynamicStorage(size_t number_of_elements) { + return allocator_.allocate(number_of_elements); + } + + void FreeDynamicStorage() { + DCHECK(is_big()); + allocator_.deallocate(begin_, end_of_storage_ - begin_); + } + + // Clear and go back to inline storage. Dynamic storage is *not* freed. For + // internal use only. + void reset_to_inline_storage() { + begin_ = inline_storage_begin(); + end_ = begin_; + end_of_storage_ = begin_ + kInlineSize; + } + bool is_big() const { return begin_ != inline_storage_begin(); } T* inline_storage_begin() { return reinterpret_cast(&inline_storage_); } diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc index dcab6b2429..aa5cb2ad0d 100644 --- a/src/regexp/regexp-parser.cc +++ b/src/regexp/regexp-parser.cc @@ -4,6 +4,7 @@ #include "src/regexp/regexp-parser.h" +#include "src/base/small-vector.h" #include "src/execution/isolate.h" #include "src/objects/string-inl.h" #include "src/regexp/property-sequences.h" @@ -13,6 +14,7 @@ #include "src/strings/char-predicates-inl.h" #include "src/utils/ostreams.h" #include "src/utils/utils.h" +#include "src/zone/zone-allocator.h" #include "src/zone/zone-list-inl.h" #ifdef V8_INTL_SUPPORT @@ -37,9 +39,9 @@ class RegExpBuilder { RegExpBuilder(Zone* zone, RegExpFlags flags) : zone_(zone), flags_(flags), - terms_(2, zone), - text_(2, zone), - alternatives_(2, zone) {} + terms_(ZoneAllocator{zone}), + text_(ZoneAllocator{zone}), + alternatives_(ZoneAllocator{zone}) {} void AddCharacter(base::uc16 character); void AddUnicodeCharacter(base::uc32 character); void AddEscapedUnicodeCharacter(base::uc32 character); @@ -79,9 +81,12 @@ class RegExpBuilder { const RegExpFlags flags_; ZoneList* characters_ = nullptr; base::uc16 pending_surrogate_ = kNoPendingSurrogate; - ZoneList terms_; - ZoneList text_; - ZoneList alternatives_; + + using SmallRegExpTreeVector = + base::SmallVector>; + SmallRegExpTreeVector terms_; + SmallRegExpTreeVector text_; + SmallRegExpTreeVector alternatives_; #ifdef DEBUG enum { ADD_NONE, @@ -2100,26 +2105,26 @@ void RegExpBuilder::FlushCharacters() { if (characters_ != nullptr) { RegExpTree* atom = zone()->New(characters_->ToConstVector()); characters_ = nullptr; - text_.Add(atom, zone()); + text_.emplace_back(atom); LAST(ADD_ATOM); } } void RegExpBuilder::FlushText() { FlushCharacters(); - int num_text = text_.length(); + size_t num_text = text_.size(); if (num_text == 0) { return; } else if (num_text == 1) { - terms_.Add(text_.last(), zone()); + terms_.emplace_back(text_.back()); } else { RegExpText* text = zone()->New(zone()); - for (int i = 0; i < num_text; i++) { + for (size_t i = 0; i < num_text; i++) { text_[i]->AppendToText(text, zone()); } - terms_.Add(text, zone()); + terms_.emplace_back(text); } - text_.Rewind(0); + text_.clear(); } void RegExpBuilder::AddCharacter(base::uc16 c) { @@ -2182,23 +2187,23 @@ void RegExpBuilder::AddAtom(RegExpTree* term) { } if (term->IsTextElement()) { FlushCharacters(); - text_.Add(term, zone()); + text_.emplace_back(term); } else { FlushText(); - terms_.Add(term, zone()); + terms_.emplace_back(term); } LAST(ADD_ATOM); } void RegExpBuilder::AddTerm(RegExpTree* term) { FlushText(); - terms_.Add(term, zone()); + terms_.emplace_back(term); LAST(ADD_ATOM); } void RegExpBuilder::AddAssertion(RegExpTree* assert) { FlushText(); - terms_.Add(assert, zone()); + terms_.emplace_back(assert); LAST(ADD_ASSERT); } @@ -2206,18 +2211,19 @@ void RegExpBuilder::NewAlternative() { FlushTerms(); } void RegExpBuilder::FlushTerms() { FlushText(); - int num_terms = terms_.length(); + size_t num_terms = terms_.size(); RegExpTree* alternative; if (num_terms == 0) { alternative = zone()->New(); } else if (num_terms == 1) { - alternative = terms_.last(); + alternative = terms_.back(); } else { - alternative = zone()->New( - zone()->New>(terms_, zone())); + alternative = + zone()->New(zone()->New>( + base::VectorOf(terms_.begin(), terms_.size()), zone())); } - alternatives_.Add(alternative, zone()); - terms_.Rewind(0); + alternatives_.emplace_back(alternative); + terms_.clear(); LAST(ADD_NONE); } @@ -2256,11 +2262,11 @@ bool RegExpBuilder::NeedsDesugaringForIgnoreCase(base::uc32 c) { RegExpTree* RegExpBuilder::ToRegExp() { FlushTerms(); - int num_alternatives = alternatives_.length(); + size_t num_alternatives = alternatives_.size(); if (num_alternatives == 0) return zone()->New(); - if (num_alternatives == 1) return alternatives_.last(); - return zone()->New( - zone()->New>(alternatives_, zone())); + if (num_alternatives == 1) return alternatives_.back(); + return zone()->New(zone()->New>( + base::VectorOf(alternatives_.begin(), alternatives_.size()), zone())); } bool RegExpBuilder::AddQuantifierToAtom( @@ -2279,19 +2285,21 @@ bool RegExpBuilder::AddQuantifierToAtom( if (num_chars > 1) { base::Vector prefix = char_vector.SubVector(0, num_chars - 1); - text_.Add(zone()->New(prefix), zone()); + text_.emplace_back(zone()->New(prefix)); char_vector = char_vector.SubVector(num_chars - 1, num_chars); } characters_ = nullptr; atom = zone()->New(char_vector); FlushText(); - } else if (text_.length() > 0) { + } else if (text_.size() > 0) { DCHECK(last_added_ == ADD_ATOM); - atom = text_.RemoveLast(); + atom = text_.back(); + text_.pop_back(); FlushText(); - } else if (terms_.length() > 0) { + } else if (terms_.size() > 0) { DCHECK(last_added_ == ADD_ATOM); - atom = terms_.RemoveLast(); + atom = terms_.back(); + terms_.pop_back(); if (atom->IsLookaround()) { // With /u, lookarounds are not quantifiable. if (unicode()) return false; @@ -2306,15 +2314,15 @@ bool RegExpBuilder::AddQuantifierToAtom( if (min == 0) { return true; } - terms_.Add(atom, zone()); + terms_.emplace_back(atom); return true; } } else { // Only call immediately after adding an atom or character! UNREACHABLE(); } - terms_.Add(zone()->New(min, max, quantifier_type, atom), - zone()); + terms_.emplace_back( + zone()->New(min, max, quantifier_type, atom)); LAST(ADD_TERM); return true; }