[base] Extend SmallVector for use with Zone storage

This CL adds an Allocator to SmallVector to control how dynamic
storage is managed. The default value uses the plain old C++
std::allocator<T>, i.e. acts like malloc/free.

For use with zone memory, one can pass a ZoneAllocator as follows:

  // Allocates in zone memory.
  base::SmallVector<int, kInitialSize, ZoneAllocator<int>>
    xs(ZoneAllocator<int>(zone));

Note: this is a follow-up to crrev.com/c/3240823.

Drive-by: hide the internal `reset` function. It doesn't free the
dynamic backing store; that's a surprise and should not be exposed to
external use.

Change-Id: I1f92f184924541e2269493fb52c30f2fdec032be
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3257711
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: Leszek Swirski <leszeks@chromium.org>
Reviewed-by: Igor Sheludko <ishell@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77755}
This commit is contained in:
Jakob Gruber 2021-11-04 14:01:05 +01:00 committed by V8 LUCI CQ
parent 5bf06b536d
commit 3a858a91fa
3 changed files with 115 additions and 58 deletions

View File

@ -293,6 +293,8 @@ path. Add it with -I<path> to the command line
// V8_HAS_ATTRIBUTE_WARN_UNUSED_RESULT - __attribute__((warn_unused_result))
// supported
// V8_HAS_CPP_ATTRIBUTE_NODISCARD - [[nodiscard]] supported
// V8_HAS_CPP_ATTRIBUTE_NO_UNIQUE_ADDRESS
// - [[no_unique_address]] supported
// V8_HAS_BUILTIN_BSWAP16 - __builtin_bswap16() supported
// V8_HAS_BUILTIN_BSWAP32 - __builtin_bswap32() supported
// V8_HAS_BUILTIN_BSWAP64 - __builtin_bswap64() supported
@ -337,6 +339,8 @@ path. Add it with -I<path> to the command line
(__has_attribute(warn_unused_result))
# define V8_HAS_CPP_ATTRIBUTE_NODISCARD (V8_HAS_CPP_ATTRIBUTE(nodiscard))
# define V8_HAS_CPP_ATTRIBUTE_NO_UNIQUE_ADDRESS \
(V8_HAS_CPP_ATTRIBUTE(no_unique_address))
# define V8_HAS_BUILTIN_ASSUME_ALIGNED (__has_builtin(__builtin_assume_aligned))
# define V8_HAS_BUILTIN_BSWAP16 (__has_builtin(__builtin_bswap16))
@ -507,6 +511,27 @@ path. Add it with -I<path> to the command line
#define V8_NODISCARD /* NOT SUPPORTED */
#endif
// The no_unique_address attribute allows tail padding in a non-static data
// member to overlap other members of the enclosing class (and in the special
// case when the type is empty, permits it to fully overlap other members). The
// field is laid out as if a base class were encountered at the corresponding
// point within the class (except that it does not share a vptr with the
// enclosing object).
//
// Apply to a data member like:
//
// class Foo {
// V8_NO_UNIQUE_ADDRESS Bar bar_;
// };
//
// [[no_unique_address]] comes in C++20 but supported in clang with
// -std >= c++11.
#if V8_HAS_CPP_ATTRIBUTE_NO_UNIQUE_ADDRESS
#define V8_NO_UNIQUE_ADDRESS [[no_unique_address]]
#else
#define V8_NO_UNIQUE_ADDRESS /* NOT SUPPORTED */
#endif
// Helper macro to define no_sanitize attributes only with clang.
#if defined(__clang__) && defined(__has_attribute)
#if __has_attribute(no_sanitize)

View File

@ -11,14 +11,13 @@
#include "src/base/bits.h"
#include "src/base/macros.h"
#include "src/base/platform/wrappers.h"
namespace v8 {
namespace base {
// Minimal SmallVector implementation. Uses inline storage first, switches to
// malloc when it overflows.
template <typename T, size_t kSize>
// dynamic storage when it overflows.
template <typename T, size_t kSize, typename Allocator = std::allocator<T>>
class SmallVector {
// Currently only support trivially copyable and trivially destructible data
// types, as it uses memcpy to copy elements and never calls destructors.
@ -28,17 +27,31 @@ class SmallVector {
public:
static constexpr size_t kInlineSize = kSize;
SmallVector() = default;
explicit SmallVector(size_t size) { resize_no_init(size); }
SmallVector(const SmallVector& other) V8_NOEXCEPT { *this = other; }
SmallVector(SmallVector&& other) V8_NOEXCEPT { *this = std::move(other); }
SmallVector(std::initializer_list<T> init) {
explicit SmallVector(const Allocator& allocator = Allocator())
: allocator_(allocator) {}
explicit SmallVector(size_t size, const Allocator& allocator = Allocator())
: allocator_(allocator) {
resize_no_init(size);
}
SmallVector(const SmallVector& other,
const Allocator& allocator = Allocator()) V8_NOEXCEPT
: allocator_(allocator) {
*this = other;
}
SmallVector(SmallVector&& other,
const Allocator& allocator = Allocator()) V8_NOEXCEPT
: allocator_(allocator) {
*this = std::move(other);
}
SmallVector(std::initializer_list<T> init,
const Allocator& allocator = Allocator())
: allocator_(allocator) {
resize_no_init(init.size());
memcpy(begin_, init.begin(), sizeof(T) * init.size());
}
~SmallVector() {
if (is_big()) base::Free(begin_);
if (is_big()) FreeDynamicStorage();
}
SmallVector& operator=(const SmallVector& other) V8_NOEXCEPT {
@ -46,8 +59,8 @@ class SmallVector {
size_t other_size = other.size();
if (capacity() < other_size) {
// Create large-enough heap-allocated storage.
if (is_big()) base::Free(begin_);
begin_ = reinterpret_cast<T*>(base::Malloc(sizeof(T) * other_size));
if (is_big()) FreeDynamicStorage();
begin_ = AllocateDynamicStorage(other_size);
end_of_storage_ = begin_ + other_size;
}
memcpy(begin_, other.begin_, sizeof(T) * other_size);
@ -58,11 +71,11 @@ class SmallVector {
SmallVector& operator=(SmallVector&& other) V8_NOEXCEPT {
if (this == &other) return *this;
if (other.is_big()) {
if (is_big()) base::Free(begin_);
if (is_big()) FreeDynamicStorage();
begin_ = other.begin_;
end_ = other.end_;
end_of_storage_ = other.end_of_storage_;
other.reset();
other.reset_to_inline_storage();
} else {
DCHECK_GE(capacity(), other.size()); // Sanity check.
size_t other_size = other.size();
@ -126,17 +139,12 @@ class SmallVector {
end_ = begin_ + new_size;
}
// Clear without freeing any storage.
// Clear without reverting back to inline storage.
void clear() { end_ = begin_; }
// Clear and go back to inline storage.
void reset() {
begin_ = inline_storage_begin();
end_ = begin_;
end_of_storage_ = begin_ + kInlineSize;
}
private:
V8_NO_UNIQUE_ADDRESS Allocator allocator_;
T* begin_ = inline_storage_begin();
T* end_ = begin_;
T* end_of_storage_ = begin_ + kInlineSize;
@ -152,8 +160,7 @@ class SmallVector {
size_t in_use = end_ - begin_;
size_t new_capacity =
base::bits::RoundUpToPowerOfTwo(std::max(min_capacity, 2 * capacity()));
T* new_storage =
reinterpret_cast<T*>(base::Malloc(sizeof(T) * new_capacity));
T* new_storage = AllocateDynamicStorage(new_capacity);
if (new_storage == nullptr) {
// Should be: V8::FatalProcessOutOfMemory, but we don't include V8 from
// base. The message is intentionally the same as FatalProcessOutOfMemory
@ -162,13 +169,30 @@ class SmallVector {
FATAL("Fatal process out of memory: base::SmallVector::Grow");
}
memcpy(new_storage, begin_, sizeof(T) * in_use);
if (is_big()) base::Free(begin_);
if (is_big()) FreeDynamicStorage();
begin_ = new_storage;
end_ = new_storage + in_use;
end_of_storage_ = new_storage + new_capacity;
return end_;
}
T* AllocateDynamicStorage(size_t number_of_elements) {
return allocator_.allocate(number_of_elements);
}
void FreeDynamicStorage() {
DCHECK(is_big());
allocator_.deallocate(begin_, end_of_storage_ - begin_);
}
// Clear and go back to inline storage. Dynamic storage is *not* freed. For
// internal use only.
void reset_to_inline_storage() {
begin_ = inline_storage_begin();
end_ = begin_;
end_of_storage_ = begin_ + kInlineSize;
}
bool is_big() const { return begin_ != inline_storage_begin(); }
T* inline_storage_begin() { return reinterpret_cast<T*>(&inline_storage_); }

View File

@ -4,6 +4,7 @@
#include "src/regexp/regexp-parser.h"
#include "src/base/small-vector.h"
#include "src/execution/isolate.h"
#include "src/objects/string-inl.h"
#include "src/regexp/property-sequences.h"
@ -13,6 +14,7 @@
#include "src/strings/char-predicates-inl.h"
#include "src/utils/ostreams.h"
#include "src/utils/utils.h"
#include "src/zone/zone-allocator.h"
#include "src/zone/zone-list-inl.h"
#ifdef V8_INTL_SUPPORT
@ -37,9 +39,9 @@ class RegExpBuilder {
RegExpBuilder(Zone* zone, RegExpFlags flags)
: zone_(zone),
flags_(flags),
terms_(2, zone),
text_(2, zone),
alternatives_(2, zone) {}
terms_(ZoneAllocator<RegExpTree*>{zone}),
text_(ZoneAllocator<RegExpTree*>{zone}),
alternatives_(ZoneAllocator<RegExpTree*>{zone}) {}
void AddCharacter(base::uc16 character);
void AddUnicodeCharacter(base::uc32 character);
void AddEscapedUnicodeCharacter(base::uc32 character);
@ -79,9 +81,12 @@ class RegExpBuilder {
const RegExpFlags flags_;
ZoneList<base::uc16>* characters_ = nullptr;
base::uc16 pending_surrogate_ = kNoPendingSurrogate;
ZoneList<RegExpTree*> terms_;
ZoneList<RegExpTree*> text_;
ZoneList<RegExpTree*> alternatives_;
using SmallRegExpTreeVector =
base::SmallVector<RegExpTree*, 8, ZoneAllocator<RegExpTree*>>;
SmallRegExpTreeVector terms_;
SmallRegExpTreeVector text_;
SmallRegExpTreeVector alternatives_;
#ifdef DEBUG
enum {
ADD_NONE,
@ -2100,26 +2105,26 @@ void RegExpBuilder::FlushCharacters() {
if (characters_ != nullptr) {
RegExpTree* atom = zone()->New<RegExpAtom>(characters_->ToConstVector());
characters_ = nullptr;
text_.Add(atom, zone());
text_.emplace_back(atom);
LAST(ADD_ATOM);
}
}
void RegExpBuilder::FlushText() {
FlushCharacters();
int num_text = text_.length();
size_t num_text = text_.size();
if (num_text == 0) {
return;
} else if (num_text == 1) {
terms_.Add(text_.last(), zone());
terms_.emplace_back(text_.back());
} else {
RegExpText* text = zone()->New<RegExpText>(zone());
for (int i = 0; i < num_text; i++) {
for (size_t i = 0; i < num_text; i++) {
text_[i]->AppendToText(text, zone());
}
terms_.Add(text, zone());
terms_.emplace_back(text);
}
text_.Rewind(0);
text_.clear();
}
void RegExpBuilder::AddCharacter(base::uc16 c) {
@ -2182,23 +2187,23 @@ void RegExpBuilder::AddAtom(RegExpTree* term) {
}
if (term->IsTextElement()) {
FlushCharacters();
text_.Add(term, zone());
text_.emplace_back(term);
} else {
FlushText();
terms_.Add(term, zone());
terms_.emplace_back(term);
}
LAST(ADD_ATOM);
}
void RegExpBuilder::AddTerm(RegExpTree* term) {
FlushText();
terms_.Add(term, zone());
terms_.emplace_back(term);
LAST(ADD_ATOM);
}
void RegExpBuilder::AddAssertion(RegExpTree* assert) {
FlushText();
terms_.Add(assert, zone());
terms_.emplace_back(assert);
LAST(ADD_ASSERT);
}
@ -2206,18 +2211,19 @@ void RegExpBuilder::NewAlternative() { FlushTerms(); }
void RegExpBuilder::FlushTerms() {
FlushText();
int num_terms = terms_.length();
size_t num_terms = terms_.size();
RegExpTree* alternative;
if (num_terms == 0) {
alternative = zone()->New<RegExpEmpty>();
} else if (num_terms == 1) {
alternative = terms_.last();
alternative = terms_.back();
} else {
alternative = zone()->New<RegExpAlternative>(
zone()->New<ZoneList<RegExpTree*>>(terms_, zone()));
alternative =
zone()->New<RegExpAlternative>(zone()->New<ZoneList<RegExpTree*>>(
base::VectorOf(terms_.begin(), terms_.size()), zone()));
}
alternatives_.Add(alternative, zone());
terms_.Rewind(0);
alternatives_.emplace_back(alternative);
terms_.clear();
LAST(ADD_NONE);
}
@ -2256,11 +2262,11 @@ bool RegExpBuilder::NeedsDesugaringForIgnoreCase(base::uc32 c) {
RegExpTree* RegExpBuilder::ToRegExp() {
FlushTerms();
int num_alternatives = alternatives_.length();
size_t num_alternatives = alternatives_.size();
if (num_alternatives == 0) return zone()->New<RegExpEmpty>();
if (num_alternatives == 1) return alternatives_.last();
return zone()->New<RegExpDisjunction>(
zone()->New<ZoneList<RegExpTree*>>(alternatives_, zone()));
if (num_alternatives == 1) return alternatives_.back();
return zone()->New<RegExpDisjunction>(zone()->New<ZoneList<RegExpTree*>>(
base::VectorOf(alternatives_.begin(), alternatives_.size()), zone()));
}
bool RegExpBuilder::AddQuantifierToAtom(
@ -2279,19 +2285,21 @@ bool RegExpBuilder::AddQuantifierToAtom(
if (num_chars > 1) {
base::Vector<const base::uc16> prefix =
char_vector.SubVector(0, num_chars - 1);
text_.Add(zone()->New<RegExpAtom>(prefix), zone());
text_.emplace_back(zone()->New<RegExpAtom>(prefix));
char_vector = char_vector.SubVector(num_chars - 1, num_chars);
}
characters_ = nullptr;
atom = zone()->New<RegExpAtom>(char_vector);
FlushText();
} else if (text_.length() > 0) {
} else if (text_.size() > 0) {
DCHECK(last_added_ == ADD_ATOM);
atom = text_.RemoveLast();
atom = text_.back();
text_.pop_back();
FlushText();
} else if (terms_.length() > 0) {
} else if (terms_.size() > 0) {
DCHECK(last_added_ == ADD_ATOM);
atom = terms_.RemoveLast();
atom = terms_.back();
terms_.pop_back();
if (atom->IsLookaround()) {
// With /u, lookarounds are not quantifiable.
if (unicode()) return false;
@ -2306,15 +2314,15 @@ bool RegExpBuilder::AddQuantifierToAtom(
if (min == 0) {
return true;
}
terms_.Add(atom, zone());
terms_.emplace_back(atom);
return true;
}
} else {
// Only call immediately after adding an atom or character!
UNREACHABLE();
}
terms_.Add(zone()->New<RegExpQuantifier>(min, max, quantifier_type, atom),
zone());
terms_.emplace_back(
zone()->New<RegExpQuantifier>(min, max, quantifier_type, atom));
LAST(ADD_TERM);
return true;
}