Reland "[regexp] Limit the size of inlined choice nodes"
This is a reland of 6a0e7224f3
Original change's description:
> [regexp] Limit the size of inlined choice nodes
>
> Codegen for unicode property escapes (e.g.: /\p{L}/u) can produce huge
> code objects. This effect can be further magnified through inlining,
> leading to exponential code growth in the size of the pattern.
>
> This CL is a (fairly hacky) way to avoid exponential growth. We
> recognize choice nodes with 'many' choices and disable inlining for
> them. In the future we should fix this properly, either by using the
> code size budget correctly, or by improving codegen for property
> escapes.
>
> Bug: v8:10441
> Change-Id: I817f145251ec8b1b9906cc735c9e9bdb004c98ed
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2170229
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Yang Guo <yangguo@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#67433}
Tbr: yangguo@chromium.org
Bug: v8:10441
Change-Id: I9a16cc9e8248cb46d3d16a4e2d250968cc1b7b39
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2172679
Reviewed-by: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67462}
This commit is contained in:
parent
22242cb18b
commit
10842cad3c
@ -135,9 +135,10 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
|
||||
}
|
||||
|
||||
// This could be a Smi kUninitializedValue or Code.
|
||||
Object Code(bool is_latin1) const;
|
||||
V8_EXPORT_PRIVATE Object Code(bool is_latin1) const;
|
||||
// This could be a Smi kUninitializedValue or ByteArray.
|
||||
Object Bytecode(bool is_latin1) const;
|
||||
V8_EXPORT_PRIVATE Object Bytecode(bool is_latin1) const;
|
||||
|
||||
bool ShouldProduceBytecode();
|
||||
inline bool HasCompiledCode() const;
|
||||
inline void DiscardCompiledCodeForSerialization();
|
||||
|
@ -439,6 +439,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
|
||||
AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
|
||||
AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
|
||||
AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
|
||||
static constexpr int kMaxRangesToInline = 32; // Arbitrary.
|
||||
if (ranges->length() > kMaxRangesToInline) result->SetDoNotInline();
|
||||
return result;
|
||||
}
|
||||
} else {
|
||||
|
@ -237,6 +237,15 @@ class RegExpNode : public ZoneObject {
|
||||
eats_at_least_ = eats_at_least;
|
||||
}
|
||||
|
||||
// TODO(v8:10441): This is a hacky way to avoid exponential code size growth
|
||||
// for very large choice nodes that can be generated by unicode property
|
||||
// escapes. In order to avoid inlining (i.e. trace recursion), we pretend to
|
||||
// have generated the maximum count of code copies already.
|
||||
// We should instead fix this properly, e.g. by using the code size budget
|
||||
// (flush_budget) or by generating property escape matches as calls to a C
|
||||
// function.
|
||||
void SetDoNotInline() { trace_count_ = kMaxCopiesCodeGenerated; }
|
||||
|
||||
BoyerMooreLookahead* bm_info(bool not_at_start) {
|
||||
return bm_info_[not_at_start ? 1 : 0];
|
||||
}
|
||||
|
@ -621,4 +621,9 @@
|
||||
'test-cpu-profiler/DeoptUntrackedFunction': [SKIP],
|
||||
}], # variant == turboprop
|
||||
|
||||
##############################################################################
|
||||
['no_i18n == True', {
|
||||
'test-regexp/UnicodePropertyEscapeCodeSize': [SKIP],
|
||||
}], # no_i18n == True
|
||||
|
||||
]
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include "src/utils/ostreams.h"
|
||||
#include "src/zone/zone-list-inl.h"
|
||||
#include "test/cctest/cctest.h"
|
||||
#include "test/common/wasm/flag-utils.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
@ -2341,6 +2342,31 @@ TEST(PeepholeLabelFixupsComplex) {
|
||||
}
|
||||
}
|
||||
|
||||
TEST(UnicodePropertyEscapeCodeSize) {
|
||||
i::FlagScope<bool> f(&v8::internal::FLAG_regexp_tier_up, false);
|
||||
|
||||
LocalContext env;
|
||||
v8::HandleScope scope(CcTest::isolate());
|
||||
i::Handle<i::JSRegExp> re = Utils::OpenHandle(
|
||||
*CompileRun("const r = /\\p{L}\\p{L}\\p{L}/u; r.exec('\\u200b'); r;")
|
||||
.As<v8::RegExp>());
|
||||
|
||||
static constexpr int kMaxSize = 150 * KB;
|
||||
static constexpr bool kIsNotLatin1 = false;
|
||||
Object maybe_code = re->Code(kIsNotLatin1);
|
||||
Object maybe_bytecode = re->Bytecode(kIsNotLatin1);
|
||||
if (maybe_bytecode.IsByteArray()) {
|
||||
// On x64, excessive inlining produced >250KB.
|
||||
CHECK_LT(ByteArray::cast(maybe_bytecode).Size(), kMaxSize);
|
||||
} else if (maybe_code.IsCode()) {
|
||||
// On x64, excessive inlining produced >360KB.
|
||||
CHECK_LT(Code::cast(maybe_code).Size(), kMaxSize);
|
||||
CHECK_EQ(Code::cast(maybe_code).kind(), Code::REGEXP);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
#undef CHECK_PARSE_ERROR
|
||||
#undef CHECK_SIMPLE
|
||||
#undef CHECK_MIN_MAX
|
||||
|
Loading…
Reference in New Issue
Block a user