[regexp] Use ZoneVector in parser and compiler

For a variety of reasons related to OOM handling and custom
allocators, SpiderMonkey wants to be able to see all memory
allocations. To enforce this, we have a static analysis that verifies
that we don't link in malloc/new/etc in unexpected places. One
consequence of this is that we can't use STL containers without a
custom allocator, because they call operator new internally.

This is mostly not an issue in irregexp, which makes heavy use of zone
allocation. The main exceptions are a handful of uses of std::vector
in regexp-compiler.* and regexp-parser.*. If these vectors are
converted to ZoneVectors, then our static analysis is satisfied.

R=jgruber@chromium.org

Bug: v8:10303
Change-Id: I8b14a2eb54d3b20959e3fbe878f77effae124a2c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2091402
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66674}
This commit is contained in:
Iain Ireland 2020-03-06 12:13:01 -05:00 committed by Commit Bot
parent 9b5141ca84
commit 5b44c16915
4 changed files with 21 additions and 19 deletions

View File

@ -252,7 +252,7 @@ RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
#endif
macro_assembler_ = macro_assembler;
std::vector<RegExpNode*> work_list;
ZoneVector<RegExpNode*> work_list(zone());
work_list_ = &work_list;
Label fail;
macro_assembler_->PushBacktrack(&fail);

View File

@ -576,7 +576,7 @@ class RegExpCompiler {
int next_register_;
int unicode_lookaround_stack_register_;
int unicode_lookaround_position_register_;
std::vector<RegExpNode*>* work_list_;
ZoneVector<RegExpNode*>* work_list_;
int recursion_depth_;
RegExpMacroAssembler* macro_assembler_;
bool one_byte_;

View File

@ -347,7 +347,8 @@ RegExpTree* RegExpParser::ParseDisjunction() {
if (unicode()) {
ZoneList<CharacterRange>* ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
std::vector<char> name_1, name_2;
ZoneVector<char> name_1(zone());
ZoneVector<char> name_2(zone());
if (ParsePropertyClassName(&name_1, &name_2)) {
if (AddPropertyClassRange(ranges, p == 'P', name_1, name_2)) {
RegExpCharacterClass* cc = new (zone())
@ -1385,8 +1386,8 @@ bool IsUnicodePropertyValueCharacter(char c) {
} // anonymous namespace
bool RegExpParser::ParsePropertyClassName(std::vector<char>* name_1,
std::vector<char>* name_2) {
bool RegExpParser::ParsePropertyClassName(ZoneVector<char>* name_1,
ZoneVector<char>* name_2) {
DCHECK(name_1->empty());
DCHECK(name_2->empty());
// Parse the property class as follows:
@ -1425,8 +1426,8 @@ bool RegExpParser::ParsePropertyClassName(std::vector<char>* name_1,
bool RegExpParser::AddPropertyClassRange(ZoneList<CharacterRange>* add_to,
bool negate,
const std::vector<char>& name_1,
const std::vector<char>& name_2) {
const ZoneVector<char>& name_1,
const ZoneVector<char>& name_2) {
if (name_2.empty()) {
// First attempt to interpret as general category property value name.
const char* name = name_1.data();
@ -1463,7 +1464,7 @@ bool RegExpParser::AddPropertyClassRange(ZoneList<CharacterRange>* add_to,
}
}
RegExpTree* RegExpParser::GetPropertySequence(const std::vector<char>& name_1) {
RegExpTree* RegExpParser::GetPropertySequence(const ZoneVector<char>& name_1) {
if (!FLAG_harmony_regexp_sequence) return nullptr;
const char* name = name_1.data();
const uc32* sequence_list = nullptr;
@ -1529,19 +1530,19 @@ RegExpTree* RegExpParser::GetPropertySequence(const std::vector<char>& name_1) {
#else // V8_INTL_SUPPORT
bool RegExpParser::ParsePropertyClassName(std::vector<char>* name_1,
std::vector<char>* name_2) {
bool RegExpParser::ParsePropertyClassName(ZoneVector<char>* name_1,
ZoneVector<char>* name_2) {
return false;
}
bool RegExpParser::AddPropertyClassRange(ZoneList<CharacterRange>* add_to,
bool negate,
const std::vector<char>& name_1,
const std::vector<char>& name_2) {
const ZoneVector<char>& name_1,
const ZoneVector<char>& name_2) {
return false;
}
RegExpTree* RegExpParser::GetPropertySequence(const std::vector<char>& name) {
RegExpTree* RegExpParser::GetPropertySequence(const ZoneVector<char>& name) {
return nullptr;
}
@ -1710,7 +1711,8 @@ void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
if (unicode()) {
bool negate = Next() == 'P';
Advance(2);
std::vector<char> name_1, name_2;
ZoneVector<char> name_1(zone);
ZoneVector<char> name_2(zone);
if (!ParsePropertyClassName(&name_1, &name_2) ||
!AddPropertyClassRange(ranges, negate, name_1, name_2)) {
ReportError(CStrVector("Invalid property name in character class"));

View File

@ -177,13 +177,13 @@ class V8_EXPORT_PRIVATE RegExpParser {
bool ParseUnicodeEscape(uc32* value);
bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
bool ParsePropertyClassName(std::vector<char>* name_1,
std::vector<char>* name_2);
bool ParsePropertyClassName(ZoneVector<char>* name_1,
ZoneVector<char>* name_2);
bool AddPropertyClassRange(ZoneList<CharacterRange>* add_to, bool negate,
const std::vector<char>& name_1,
const std::vector<char>& name_2);
const ZoneVector<char>& name_1,
const ZoneVector<char>& name_2);
RegExpTree* GetPropertySequence(const std::vector<char>& name_1);
RegExpTree* GetPropertySequence(const ZoneVector<char>& name_1);
RegExpTree* ParseCharacterClass(const RegExpBuilder* state);
uc32 ParseOctalLiteral();