Reland "[regexp] Limit the size of inlined choice nodes"

This is a reland of 6a0e7224f3

Original change's description:
> [regexp] Limit the size of inlined choice nodes
>
> Codegen for unicode property escapes (e.g.: /\p{L}/u) can produce huge
> code objects. This effect can be further magnified through inlining,
> leading to exponential code growth in the size of the pattern.
>
> This CL is a (fairly hacky) way to avoid exponential growth. We
> recognize choice nodes with 'many' choices and disable inlining for
> them. In the future we should fix this properly, either by using the
> code size budget correctly, or by improving codegen for property
> escapes.
>
> Bug: v8:10441
> Change-Id: I817f145251ec8b1b9906cc735c9e9bdb004c98ed
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2170229
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Yang Guo <yangguo@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#67433}

Tbr: yangguo@chromium.org
Bug: v8:10441
Change-Id: I9a16cc9e8248cb46d3d16a4e2d250968cc1b7b39
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2172679
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67462}
This commit is contained in:
Jakob Gruber 2020-04-29 08:31:16 +02:00 committed by Commit Bot
parent 22242cb18b
commit 10842cad3c
5 changed files with 45 additions and 2 deletions

View File

@ -135,9 +135,10 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
}
// This could be a Smi kUninitializedValue or Code.
Object Code(bool is_latin1) const;
V8_EXPORT_PRIVATE Object Code(bool is_latin1) const;
// This could be a Smi kUninitializedValue or ByteArray.
Object Bytecode(bool is_latin1) const;
V8_EXPORT_PRIVATE Object Bytecode(bool is_latin1) const;
bool ShouldProduceBytecode();
inline bool HasCompiledCode() const;
inline void DiscardCompiledCodeForSerialization();

View File

@ -439,6 +439,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
static constexpr int kMaxRangesToInline = 32; // Arbitrary.
if (ranges->length() > kMaxRangesToInline) result->SetDoNotInline();
return result;
}
} else {

View File

@ -237,6 +237,15 @@ class RegExpNode : public ZoneObject {
eats_at_least_ = eats_at_least;
}
// TODO(v8:10441): This is a hacky way to avoid exponential code size growth
// for very large choice nodes that can be generated by unicode property
// escapes. In order to avoid inlining (i.e. trace recursion), we pretend to
// have generated the maximum count of code copies already.
// We should instead fix this properly, e.g. by using the code size budget
// (flush_budget) or by generating property escape matches as calls to a C
// function.
void SetDoNotInline() { trace_count_ = kMaxCopiesCodeGenerated; }
BoyerMooreLookahead* bm_info(bool not_at_start) {
return bm_info_[not_at_start ? 1 : 0];
}

View File

@ -621,4 +621,9 @@
'test-cpu-profiler/DeoptUntrackedFunction': [SKIP],
}], # variant == turboprop
##############################################################################
['no_i18n == True', {
'test-regexp/UnicodePropertyEscapeCodeSize': [SKIP],
}], # no_i18n == True
]

View File

@ -50,6 +50,7 @@
#include "src/utils/ostreams.h"
#include "src/zone/zone-list-inl.h"
#include "test/cctest/cctest.h"
#include "test/common/wasm/flag-utils.h"
namespace v8 {
namespace internal {
@ -2341,6 +2342,31 @@ TEST(PeepholeLabelFixupsComplex) {
}
}
TEST(UnicodePropertyEscapeCodeSize) {
i::FlagScope<bool> f(&v8::internal::FLAG_regexp_tier_up, false);
LocalContext env;
v8::HandleScope scope(CcTest::isolate());
i::Handle<i::JSRegExp> re = Utils::OpenHandle(
*CompileRun("const r = /\\p{L}\\p{L}\\p{L}/u; r.exec('\\u200b'); r;")
.As<v8::RegExp>());
static constexpr int kMaxSize = 150 * KB;
static constexpr bool kIsNotLatin1 = false;
Object maybe_code = re->Code(kIsNotLatin1);
Object maybe_bytecode = re->Bytecode(kIsNotLatin1);
if (maybe_bytecode.IsByteArray()) {
// On x64, excessive inlining produced >250KB.
CHECK_LT(ByteArray::cast(maybe_bytecode).Size(), kMaxSize);
} else if (maybe_code.IsCode()) {
// On x64, excessive inlining produced >360KB.
CHECK_LT(Code::cast(maybe_code).Size(), kMaxSize);
CHECK_EQ(Code::cast(maybe_code).kind(), Code::REGEXP);
} else {
UNREACHABLE();
}
}
#undef CHECK_PARSE_ERROR
#undef CHECK_SIMPLE
#undef CHECK_MIN_MAX