[regexp] Prototype new linear time EXPERIMENTAL regexp engine
This adds the new JsRegExp::Type EXPERIMENTAL, which should eventually be implemented with the algorithm based on automata. Currently the new engine deals with plain search strings only, i.e. regexps that do not contain operators or escape sequences. R=jgruber@chromium.org Bug: v8:10765 Change-Id: I6a10d9cdf4605d219dbe7cc1989df3bfa7349ff8 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2339094 Reviewed-by: Dominik Inführ <dinfuehr@chromium.org> Reviewed-by: Jakob Gruber <jgruber@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#69442}
This commit is contained in:
parent
491f979424
commit
46bf70a567
2
BUILD.gn
2
BUILD.gn
@ -3046,6 +3046,8 @@ v8_source_set("v8_base_without_compiler") {
|
||||
"src/profiler/tick-sample.h",
|
||||
"src/profiler/tracing-cpu-profiler.cc",
|
||||
"src/profiler/tracing-cpu-profiler.h",
|
||||
"src/regexp/experimental/experimental.cc",
|
||||
"src/regexp/experimental/experimental.h",
|
||||
"src/regexp/property-sequences.cc",
|
||||
"src/regexp/property-sequences.h",
|
||||
"src/regexp/regexp-ast.cc",
|
||||
|
@ -700,6 +700,7 @@ namespace internal {
|
||||
TFS(RegExpExecAtom, kRegExp, kString, kLastIndex, kMatchInfo) \
|
||||
TFS(RegExpExecInternal, kRegExp, kString, kLastIndex, kMatchInfo) \
|
||||
ASM(RegExpInterpreterTrampoline, CCall) \
|
||||
ASM(RegExpExperimentalTrampoline, CCall) \
|
||||
\
|
||||
/* Set */ \
|
||||
TFJ(SetConstructor, kDontAdaptArgumentsSentinel) \
|
||||
|
@ -28,7 +28,15 @@ using compiler::Node;
|
||||
// static
|
||||
void Builtins::Generate_RegExpInterpreterTrampoline(MacroAssembler* masm) {
|
||||
ExternalReference interpreter_code_entry =
|
||||
ExternalReference::re_match_for_call_from_js(masm->isolate());
|
||||
ExternalReference::re_match_for_call_from_js();
|
||||
masm->Jump(interpreter_code_entry);
|
||||
}
|
||||
|
||||
// Tail calls the experimental regular expression engine.
|
||||
// static
|
||||
void Builtins::Generate_RegExpExperimentalTrampoline(MacroAssembler* masm) {
|
||||
ExternalReference interpreter_code_entry =
|
||||
ExternalReference::re_experimental_match_for_call_from_js();
|
||||
masm->Jump(interpreter_code_entry);
|
||||
}
|
||||
|
||||
@ -399,9 +407,9 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
|
||||
int32_t values[] = {
|
||||
JSRegExp::IRREGEXP,
|
||||
JSRegExp::ATOM,
|
||||
JSRegExp::NOT_COMPILED,
|
||||
JSRegExp::EXPERIMENTAL,
|
||||
};
|
||||
Label* labels[] = {&next, &atom, &runtime};
|
||||
Label* labels[] = {&next, &atom, &next};
|
||||
|
||||
STATIC_ASSERT(arraysize(values) == arraysize(labels));
|
||||
Switch(tag, &unreachable, values, labels, arraysize(values));
|
||||
|
@ -4,8 +4,10 @@
|
||||
|
||||
specific_include_rules = {
|
||||
"external-reference.cc": [
|
||||
# Required to call IrregexpInterpreter::NativeMatch from builtin.
|
||||
# Required to call into IrregexpInterpreter and RegexpExperimental from
|
||||
# builtin.
|
||||
"+src/regexp/regexp-interpreter.h",
|
||||
"+src/regexp/experimental/experimental.h",
|
||||
"+src/regexp/regexp-macro-assembler-arch.h",
|
||||
],
|
||||
}
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include "src/objects/elements.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
#include "src/objects/ordered-hash-table.h"
|
||||
#include "src/regexp/experimental/experimental.h"
|
||||
#include "src/regexp/regexp-interpreter.h"
|
||||
#include "src/regexp/regexp-macro-assembler-arch.h"
|
||||
#include "src/regexp/regexp-stack.h"
|
||||
@ -511,8 +512,11 @@ FUNCTION_REFERENCE_WITH_ISOLATE(re_check_stack_guard_state, re_stack_check_func)
|
||||
FUNCTION_REFERENCE_WITH_ISOLATE(re_grow_stack,
|
||||
NativeRegExpMacroAssembler::GrowStack)
|
||||
|
||||
FUNCTION_REFERENCE_WITH_ISOLATE(re_match_for_call_from_js,
|
||||
IrregexpInterpreter::MatchForCallFromJs)
|
||||
FUNCTION_REFERENCE(re_match_for_call_from_js,
|
||||
IrregexpInterpreter::MatchForCallFromJs)
|
||||
|
||||
FUNCTION_REFERENCE(re_experimental_match_for_call_from_js,
|
||||
ExperimentalRegExp::MatchForCallFromJs)
|
||||
|
||||
FUNCTION_REFERENCE_WITH_ISOLATE(
|
||||
re_case_insensitive_compare_unicode,
|
||||
|
@ -84,7 +84,6 @@ class StatsCounter;
|
||||
V(re_check_stack_guard_state, \
|
||||
"RegExpMacroAssembler*::CheckStackGuardState()") \
|
||||
V(re_grow_stack, "NativeRegExpMacroAssembler::GrowStack()") \
|
||||
V(re_match_for_call_from_js, "IrregexpInterpreter::MatchForCallFromJs") \
|
||||
V(re_word_character_map, "NativeRegExpMacroAssembler::word_character_map")
|
||||
|
||||
#define EXTERNAL_REFERENCE_LIST(V) \
|
||||
@ -231,6 +230,9 @@ class StatsCounter;
|
||||
"atomic_pair_compare_exchange_function") \
|
||||
V(js_finalization_registry_remove_cell_from_unregister_token_map, \
|
||||
"JSFinalizationRegistry::RemoveCellFromUnregisterTokenMap") \
|
||||
V(re_match_for_call_from_js, "IrregexpInterpreter::MatchForCallFromJs") \
|
||||
V(re_experimental_match_for_call_from_js, \
|
||||
"ExperimentalRegExp::MatchForCallFromJs") \
|
||||
EXTERNAL_REFERENCE_LIST_INTL(V)
|
||||
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
|
@ -1219,6 +1219,42 @@ void JSRegExp::JSRegExpVerify(Isolate* isolate) {
|
||||
CHECK(arr.get(JSRegExp::kAtomPatternIndex).IsString());
|
||||
break;
|
||||
}
|
||||
case JSRegExp::EXPERIMENTAL: {
|
||||
FixedArray arr = FixedArray::cast(data());
|
||||
Smi uninitialized = Smi::FromInt(JSRegExp::kUninitializedValue);
|
||||
|
||||
Object latin1_code = arr.get(JSRegExp::kIrregexpLatin1CodeIndex);
|
||||
Object uc16_code = arr.get(JSRegExp::kIrregexpUC16CodeIndex);
|
||||
Object experimental_pattern =
|
||||
arr.get(JSRegExp::kExperimentalPatternIndex);
|
||||
if (latin1_code.IsCode()) {
|
||||
// `this` should be a compiled regexp.
|
||||
CHECK(latin1_code.IsCode());
|
||||
CHECK_EQ(Code::cast(latin1_code).builtin_index(),
|
||||
Builtins::kRegExpExperimentalTrampoline);
|
||||
|
||||
CHECK(uc16_code.IsCode());
|
||||
CHECK_EQ(Code::cast(uc16_code).builtin_index(),
|
||||
Builtins::kRegExpExperimentalTrampoline);
|
||||
|
||||
CHECK(experimental_pattern.IsString());
|
||||
} else {
|
||||
CHECK_EQ(latin1_code, uninitialized);
|
||||
CHECK_EQ(uc16_code, uninitialized);
|
||||
CHECK_EQ(experimental_pattern, uninitialized);
|
||||
}
|
||||
|
||||
CHECK_EQ(arr.get(JSRegExp::kIrregexpMaxRegisterCountIndex),
|
||||
uninitialized);
|
||||
// TODO(mbid,v8:10765): Once the EXPERIMENTAL regexps support captures,
|
||||
// the capture count should be allowed to be a Smi >= 0.
|
||||
CHECK_EQ(arr.get(JSRegExp::kIrregexpCaptureCountIndex), Smi::FromInt(0));
|
||||
CHECK_EQ(arr.get(JSRegExp::kIrregexpCaptureNameMapIndex), uninitialized);
|
||||
CHECK_EQ(arr.get(JSRegExp::kIrregexpTicksUntilTierUpIndex),
|
||||
uninitialized);
|
||||
CHECK_EQ(arr.get(JSRegExp::kIrregexpBacktrackLimit), uninitialized);
|
||||
break;
|
||||
}
|
||||
case JSRegExp::IRREGEXP: {
|
||||
bool can_be_interpreted = RegExp::CanGenerateBytecode();
|
||||
|
||||
|
@ -3292,12 +3292,11 @@ Handle<StoreHandler> Factory::NewStoreHandler(int data_count) {
|
||||
return handle(StoreHandler::cast(New(map, AllocationType::kOld)), isolate());
|
||||
}
|
||||
|
||||
void Factory::SetRegExpAtomData(Handle<JSRegExp> regexp, JSRegExp::Type type,
|
||||
Handle<String> source, JSRegExp::Flags flags,
|
||||
Handle<Object> data) {
|
||||
void Factory::SetRegExpAtomData(Handle<JSRegExp> regexp, Handle<String> source,
|
||||
JSRegExp::Flags flags, Handle<Object> data) {
|
||||
Handle<FixedArray> store = NewFixedArray(JSRegExp::kAtomDataSize);
|
||||
|
||||
store->set(JSRegExp::kTagIndex, Smi::FromInt(type));
|
||||
store->set(JSRegExp::kTagIndex, Smi::FromInt(JSRegExp::ATOM));
|
||||
store->set(JSRegExp::kSourceIndex, *source);
|
||||
store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags));
|
||||
store->set(JSRegExp::kAtomPatternIndex, *data);
|
||||
@ -3305,7 +3304,7 @@ void Factory::SetRegExpAtomData(Handle<JSRegExp> regexp, JSRegExp::Type type,
|
||||
}
|
||||
|
||||
void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
|
||||
JSRegExp::Type type, Handle<String> source,
|
||||
Handle<String> source,
|
||||
JSRegExp::Flags flags, int capture_count,
|
||||
uint32_t backtrack_limit) {
|
||||
DCHECK(Smi::IsValid(backtrack_limit));
|
||||
@ -3314,7 +3313,7 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
|
||||
Smi ticks_until_tier_up = FLAG_regexp_tier_up
|
||||
? Smi::FromInt(FLAG_regexp_tier_up_ticks)
|
||||
: uninitialized;
|
||||
store->set(JSRegExp::kTagIndex, Smi::FromInt(type));
|
||||
store->set(JSRegExp::kTagIndex, Smi::FromInt(JSRegExp::IRREGEXP));
|
||||
store->set(JSRegExp::kSourceIndex, *source);
|
||||
store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags));
|
||||
store->set(JSRegExp::kIrregexpLatin1CodeIndex, uninitialized);
|
||||
@ -3329,6 +3328,29 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
|
||||
regexp->set_data(*store);
|
||||
}
|
||||
|
||||
void Factory::SetRegExpExperimentalData(Handle<JSRegExp> regexp,
|
||||
Handle<String> source,
|
||||
JSRegExp::Flags flags,
|
||||
int capture_count) {
|
||||
Handle<FixedArray> store = NewFixedArray(JSRegExp::kExperimentalDataSize);
|
||||
Smi uninitialized = Smi::FromInt(JSRegExp::kUninitializedValue);
|
||||
|
||||
store->set(JSRegExp::kTagIndex, Smi::FromInt(JSRegExp::EXPERIMENTAL));
|
||||
store->set(JSRegExp::kSourceIndex, *source);
|
||||
store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags));
|
||||
store->set(JSRegExp::kIrregexpLatin1CodeIndex, uninitialized);
|
||||
store->set(JSRegExp::kIrregexpUC16CodeIndex, uninitialized);
|
||||
store->set(JSRegExp::kIrregexpLatin1BytecodeIndex, uninitialized);
|
||||
store->set(JSRegExp::kIrregexpUC16BytecodeIndex, uninitialized);
|
||||
store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, uninitialized);
|
||||
store->set(JSRegExp::kIrregexpCaptureCountIndex, Smi::FromInt(capture_count));
|
||||
store->set(JSRegExp::kIrregexpCaptureNameMapIndex, uninitialized);
|
||||
store->set(JSRegExp::kIrregexpTicksUntilTierUpIndex, uninitialized);
|
||||
store->set(JSRegExp::kIrregexpBacktrackLimit, uninitialized);
|
||||
store->set(JSRegExp::kExperimentalPatternIndex, uninitialized);
|
||||
regexp->set_data(*store);
|
||||
}
|
||||
|
||||
Handle<RegExpMatchInfo> Factory::NewRegExpMatchInfo() {
|
||||
// Initially, the last match info consists of all fixed fields plus space for
|
||||
// the match itself (i.e., 2 capture indices).
|
||||
|
@ -761,15 +761,19 @@ class V8_EXPORT_PRIVATE Factory : public FactoryBase<Factory> {
|
||||
|
||||
// Creates a new FixedArray that holds the data associated with the
|
||||
// atom regexp and stores it in the regexp.
|
||||
void SetRegExpAtomData(Handle<JSRegExp> regexp, JSRegExp::Type type,
|
||||
Handle<String> source, JSRegExp::Flags flags,
|
||||
Handle<Object> match_pattern);
|
||||
void SetRegExpAtomData(Handle<JSRegExp> regexp, Handle<String> source,
|
||||
JSRegExp::Flags flags, Handle<Object> match_pattern);
|
||||
|
||||
// Creates a new FixedArray that holds the data associated with the
|
||||
// irregexp regexp and stores it in the regexp.
|
||||
void SetRegExpIrregexpData(Handle<JSRegExp> regexp, JSRegExp::Type type,
|
||||
Handle<String> source, JSRegExp::Flags flags,
|
||||
int capture_count, uint32_t backtrack_limit);
|
||||
void SetRegExpIrregexpData(Handle<JSRegExp> regexp, Handle<String> source,
|
||||
JSRegExp::Flags flags, int capture_count,
|
||||
uint32_t backtrack_limit);
|
||||
|
||||
// Creates a new FixedArray that holds the data associated with the
|
||||
// experimental regexp and stores it in the regexp.
|
||||
void SetRegExpExperimentalData(Handle<JSRegExp> regexp, Handle<String> source,
|
||||
JSRegExp::Flags flags, int capture_count);
|
||||
|
||||
// Returns the value for a known global constant (a property of the global
|
||||
// object which is neither configurable nor writable) like 'undefined'.
|
||||
|
@ -38,6 +38,7 @@ int JSRegExp::CaptureCount() const {
|
||||
switch (TypeTag()) {
|
||||
case ATOM:
|
||||
return 0;
|
||||
case EXPERIMENTAL:
|
||||
case IRREGEXP:
|
||||
return Smi::ToInt(DataAt(kIrregexpCaptureCountIndex));
|
||||
default:
|
||||
|
@ -247,14 +247,20 @@ bool JSRegExp::ShouldProduceBytecode() {
|
||||
(FLAG_regexp_tier_up && !MarkedForTierUp());
|
||||
}
|
||||
|
||||
// An irregexp is considered to be marked for tier up if the tier-up ticks value
|
||||
// reaches zero. An atom is not subject to tier-up implementation, so the
|
||||
// tier-up ticks value is not set.
|
||||
// Only irregexps are subject to tier-up.
|
||||
bool JSRegExp::CanTierUp() {
|
||||
return FLAG_regexp_tier_up && TypeTag() == JSRegExp::IRREGEXP;
|
||||
}
|
||||
|
||||
// An irregexp is considered to be marked for tier up if the tier-up ticks
|
||||
// value reaches zero.
|
||||
bool JSRegExp::MarkedForTierUp() {
|
||||
DCHECK(data().IsFixedArray());
|
||||
if (TypeTag() == JSRegExp::ATOM || !FLAG_regexp_tier_up) {
|
||||
|
||||
if (!CanTierUp()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return Smi::ToInt(DataAt(kIrregexpTicksUntilTierUpIndex)) == 0;
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
|
||||
// NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
|
||||
// ATOM: A simple string to match against using an indexOf operation.
|
||||
// IRREGEXP: Compiled with Irregexp.
|
||||
enum Type { NOT_COMPILED, ATOM, IRREGEXP };
|
||||
// EXPERIMENTAL: Compiled to use the new linear time engine.
|
||||
enum Type { NOT_COMPILED, ATOM, IRREGEXP, EXPERIMENTAL };
|
||||
DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()
|
||||
|
||||
static constexpr base::Optional<Flag> FlagFromChar(char c) {
|
||||
@ -81,6 +82,7 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
|
||||
static Flags FlagsFromString(Isolate* isolate, Handle<String> flags,
|
||||
bool* success);
|
||||
|
||||
bool CanTierUp();
|
||||
bool MarkedForTierUp();
|
||||
void ResetLastTierUpTick();
|
||||
void TierUpTick();
|
||||
@ -187,6 +189,19 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
|
||||
static const int kIrregexpBacktrackLimit = kDataIndex + 8;
|
||||
static const int kIrregexpDataSize = kDataIndex + 9;
|
||||
|
||||
// TODO(mbid,v8:10765): At the moment the EXPERIMENTAL data array is an
|
||||
// extension of IRREGEXP data, with most fields set to some
|
||||
// default/uninitialized value. This is because EXPERIMENTAL and IRREGEXP
|
||||
// regexps take the same code path in
|
||||
// `RegExpBuiltinsAssembler::RegExpExecInternal`, which reads off various
|
||||
// fields from the `store` array. `RegExpExecInternal` should probably
|
||||
// distinguish between EXPERIMENTAL and IRREGEXP, and then we can get rid of
|
||||
// all the IRREGEXP only fields.
|
||||
|
||||
// The same as kAtomPatternIndex for atom regexps.
|
||||
static constexpr int kExperimentalPatternIndex = kIrregexpDataSize;
|
||||
static constexpr int kExperimentalDataSize = kIrregexpDataSize + 1;
|
||||
|
||||
// In-object fields.
|
||||
static const int kLastIndexFieldIndex = 0;
|
||||
static const int kInObjectFieldCount = 1;
|
||||
|
179
src/regexp/experimental/experimental.cc
Normal file
179
src/regexp/experimental/experimental.cc
Normal file
@ -0,0 +1,179 @@
|
||||
// Copyright 2020 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/regexp/experimental/experimental.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "src/objects/js-regexp-inl.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re,
|
||||
Handle<String> source,
|
||||
JSRegExp::Flags flags, int capture_count) {
|
||||
if (FLAG_trace_experimental_regexp_engine) {
|
||||
std::cout << "Using experimental regexp engine for: " << *source
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
isolate->factory()->SetRegExpExperimentalData(re, source, flags,
|
||||
capture_count);
|
||||
}
|
||||
|
||||
bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re) {
|
||||
return re->DataAt(JSRegExp::kExperimentalPatternIndex).IsString();
|
||||
}
|
||||
|
||||
void ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
|
||||
DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL);
|
||||
// TODO(mbid,v8:10765): Actually compile here.
|
||||
Handle<FixedArray> data =
|
||||
Handle<FixedArray>(FixedArray::cast(re->data()), isolate);
|
||||
Handle<Code> trampoline = BUILTIN_CODE(isolate, RegExpExperimentalTrampoline);
|
||||
|
||||
data->set(JSRegExp::kIrregexpLatin1CodeIndex, *trampoline);
|
||||
data->set(JSRegExp::kIrregexpUC16CodeIndex, *trampoline);
|
||||
|
||||
data->set(JSRegExp::kExperimentalPatternIndex,
|
||||
data->get(JSRegExp::kSourceIndex));
|
||||
}
|
||||
|
||||
struct match_range {
|
||||
int32_t begin;
|
||||
int32_t end;
|
||||
};
|
||||
|
||||
// Returns the number of matches.
|
||||
int32_t ExperimentalRegExp::ExecRaw(JSRegExp regexp, String subject,
|
||||
int32_t* output_registers,
|
||||
int32_t output_register_count,
|
||||
int32_t subject_index) {
|
||||
String needle =
|
||||
String::cast(regexp.DataAt(JSRegExp::kExperimentalPatternIndex));
|
||||
|
||||
if (FLAG_trace_experimental_regexp_engine) {
|
||||
std::cout << "Searching for " << output_register_count / 2
|
||||
<< " occurences of " << needle << " in " << subject << std::endl;
|
||||
}
|
||||
|
||||
DCHECK(needle.IsFlat());
|
||||
DCHECK(subject.IsFlat());
|
||||
|
||||
const int needle_len = needle.length();
|
||||
const int subject_len = subject.length();
|
||||
|
||||
DCHECK_GT(needle_len, 0);
|
||||
|
||||
DCHECK_EQ(output_register_count % 2, 0);
|
||||
|
||||
if (subject_index + needle_len > subject_len) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
match_range* matches = reinterpret_cast<match_range*>(output_registers);
|
||||
const int32_t max_match_num = output_register_count / 2;
|
||||
|
||||
// `state_num` does not overflow because the max length of strings is
|
||||
// strictly less than INT_MAX.
|
||||
const int state_num = needle_len + 1;
|
||||
const int start_state = 0;
|
||||
const int accepting_state = needle_len;
|
||||
// TODO(mbid,v8:10765): We probably don't want to allocate a new vector here
|
||||
// in every execution.
|
||||
std::vector<int8_t> in_state(state_num, false);
|
||||
in_state[start_state] = true;
|
||||
|
||||
DisallowHeapAllocation no_gc;
|
||||
String::FlatContent needle_content = needle.GetFlatContent(no_gc);
|
||||
String::FlatContent subject_content = subject.GetFlatContent(no_gc);
|
||||
|
||||
DCHECK(needle_content.IsFlat());
|
||||
DCHECK(subject_content.IsFlat());
|
||||
|
||||
int32_t match_num = 0;
|
||||
while (subject_index != subject_len && match_num != max_match_num) {
|
||||
uc16 subject_char = subject_content.Get(subject_index);
|
||||
|
||||
for (int needle_index = needle_len - 1; needle_index >= 0; --needle_index) {
|
||||
uc16 needle_char = needle_content.Get(needle_index);
|
||||
if (in_state[needle_index] && needle_char == subject_char) {
|
||||
in_state[needle_index + 1] = true;
|
||||
} else {
|
||||
in_state[needle_index + 1] = false;
|
||||
}
|
||||
}
|
||||
if (in_state[accepting_state]) {
|
||||
match_range& match = matches[match_num];
|
||||
match.end = subject_index + 1;
|
||||
match.begin = match.end - needle_len;
|
||||
if (FLAG_trace_experimental_regexp_engine) {
|
||||
std::cout << "Found match at [" << match.begin << ", " << match.end
|
||||
<< ")" << std::endl;
|
||||
}
|
||||
++match_num;
|
||||
in_state.assign(state_num, false);
|
||||
in_state[start_state] = true;
|
||||
}
|
||||
++subject_index;
|
||||
}
|
||||
|
||||
return match_num;
|
||||
}
|
||||
|
||||
int32_t ExperimentalRegExp::MatchForCallFromJs(
|
||||
Address subject, int32_t start_position, Address input_start,
|
||||
Address input_end, int* output_registers, int32_t output_register_count,
|
||||
Address backtrack_stack, RegExp::CallOrigin call_origin, Isolate* isolate,
|
||||
Address regexp) {
|
||||
DCHECK_NOT_NULL(isolate);
|
||||
DCHECK_NOT_NULL(output_registers);
|
||||
DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
|
||||
|
||||
DisallowHeapAllocation no_gc;
|
||||
DisallowJavascriptExecution no_js(isolate);
|
||||
DisallowHandleAllocation no_handles;
|
||||
DisallowHandleDereference no_deref;
|
||||
|
||||
String subject_string = String::cast(Object(subject));
|
||||
|
||||
JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
|
||||
|
||||
return ExecRaw(regexp_obj, subject_string, output_registers,
|
||||
output_register_count, start_position);
|
||||
}
|
||||
|
||||
MaybeHandle<Object> ExperimentalRegExp::Exec(
|
||||
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
|
||||
int subject_index, Handle<RegExpMatchInfo> last_match_info) {
|
||||
regexp->DataAt(JSRegExp::kExperimentalPatternIndex);
|
||||
if (!IsCompiled(regexp)) {
|
||||
Compile(isolate, regexp);
|
||||
}
|
||||
|
||||
subject = String::Flatten(isolate, subject);
|
||||
|
||||
match_range match;
|
||||
|
||||
int32_t* output_registers = &match.begin;
|
||||
int32_t output_register_count = sizeof(match_range) / sizeof(int32_t);
|
||||
|
||||
int capture_count = regexp->CaptureCount();
|
||||
|
||||
int num_matches = ExecRaw(*regexp, *subject, output_registers,
|
||||
output_register_count, subject_index);
|
||||
|
||||
if (num_matches == 0) {
|
||||
return isolate->factory()->null_value();
|
||||
} else {
|
||||
DCHECK_EQ(num_matches, 1);
|
||||
return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
|
||||
capture_count, output_registers);
|
||||
return last_match_info;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
41
src/regexp/experimental/experimental.h
Normal file
41
src/regexp/experimental/experimental.h
Normal file
@ -0,0 +1,41 @@
|
||||
// Copyright 2020 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_H_
|
||||
#define V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_H_
|
||||
|
||||
#include "src/regexp/regexp.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
class ExperimentalRegExp final : public AllStatic {
|
||||
public:
|
||||
// Initialization & Compilation:
|
||||
static void Initialize(Isolate* isolate, Handle<JSRegExp> re,
|
||||
Handle<String> pattern, JSRegExp::Flags flags,
|
||||
int capture_count);
|
||||
static bool IsCompiled(Handle<JSRegExp> re);
|
||||
static void Compile(Isolate* isolate, Handle<JSRegExp> re);
|
||||
|
||||
// Execution:
|
||||
static int32_t MatchForCallFromJs(Address subject, int32_t start_position,
|
||||
Address input_start, Address input_end,
|
||||
int* output_registers,
|
||||
int32_t output_register_count,
|
||||
Address backtrack_stack,
|
||||
RegExp::CallOrigin call_origin,
|
||||
Isolate* isolate, Address regexp);
|
||||
static MaybeHandle<Object> Exec(Isolate* isolate, Handle<JSRegExp> regexp,
|
||||
Handle<String> subject, int index,
|
||||
Handle<RegExpMatchInfo> last_match_info);
|
||||
static int32_t ExecRaw(JSRegExp regexp, String subject,
|
||||
int32_t* output_registers,
|
||||
int32_t output_register_count, int32_t subject_index);
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#endif // V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_H_
|
@ -1114,6 +1114,8 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
|
||||
|
||||
DisallowHeapAllocation no_gc;
|
||||
DisallowJavascriptExecution no_js(isolate);
|
||||
DisallowHandleAllocation no_handles;
|
||||
DisallowHandleDereference no_deref;
|
||||
|
||||
String subject_string = String::cast(Object(subject));
|
||||
JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "src/diagnostics/code-tracer.h"
|
||||
#include "src/heap/heap-inl.h"
|
||||
#include "src/objects/js-regexp-inl.h"
|
||||
#include "src/regexp/experimental/experimental.h"
|
||||
#include "src/regexp/regexp-bytecode-generator.h"
|
||||
#include "src/regexp/regexp-bytecodes.h"
|
||||
#include "src/regexp/regexp-compiler.h"
|
||||
@ -173,14 +174,24 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
|
||||
|
||||
bool has_been_compiled = false;
|
||||
|
||||
if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
|
||||
!HasFewDifferentCharacters(pattern)) {
|
||||
if (FLAG_enable_experimental_regexp_engine && parse_result.simple &&
|
||||
!IgnoreCase(flags) && !IsSticky(flags)) {
|
||||
// Parse-tree is a single atom that is equal to the pattern. For now we let
|
||||
// the experimental regexp engine deal with this case instead of string
|
||||
// search via ATOM (modulo some performance-related heuristic).
|
||||
int capture_count = 0;
|
||||
ExperimentalRegExp::Initialize(isolate, re, pattern, flags, capture_count);
|
||||
has_been_compiled = true;
|
||||
} else if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
|
||||
!HasFewDifferentCharacters(pattern)) {
|
||||
// Parse-tree is a single atom that is equal to the pattern.
|
||||
RegExpImpl::AtomCompile(isolate, re, pattern, flags, pattern);
|
||||
has_been_compiled = true;
|
||||
} else if (parse_result.tree->IsAtom() && !IsSticky(flags) &&
|
||||
parse_result.capture_count == 0) {
|
||||
RegExpAtom* atom = parse_result.tree->AsAtom();
|
||||
// The pattern source might (?) contain escape sequences, but they're
|
||||
// resolved in atom_string.
|
||||
Vector<const uc16> atom_pattern = atom->data();
|
||||
Handle<String> atom_string;
|
||||
ASSIGN_RETURN_ON_EXCEPTION(
|
||||
@ -211,15 +222,17 @@ MaybeHandle<Object> RegExp::Exec(Isolate* isolate, Handle<JSRegExp> regexp,
|
||||
Handle<String> subject, int index,
|
||||
Handle<RegExpMatchInfo> last_match_info) {
|
||||
switch (regexp->TypeTag()) {
|
||||
case JSRegExp::NOT_COMPILED:
|
||||
UNREACHABLE();
|
||||
case JSRegExp::ATOM:
|
||||
return RegExpImpl::AtomExec(isolate, regexp, subject, index,
|
||||
last_match_info);
|
||||
case JSRegExp::IRREGEXP: {
|
||||
case JSRegExp::IRREGEXP:
|
||||
return RegExpImpl::IrregexpExec(isolate, regexp, subject, index,
|
||||
last_match_info);
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
case JSRegExp::EXPERIMENTAL:
|
||||
return ExperimentalRegExp::Exec(isolate, regexp, subject, index,
|
||||
last_match_info);
|
||||
}
|
||||
}
|
||||
|
||||
@ -228,8 +241,7 @@ MaybeHandle<Object> RegExp::Exec(Isolate* isolate, Handle<JSRegExp> regexp,
|
||||
void RegExpImpl::AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
|
||||
Handle<String> pattern, JSRegExp::Flags flags,
|
||||
Handle<String> match_pattern) {
|
||||
isolate->factory()->SetRegExpAtomData(re, JSRegExp::ATOM, pattern, flags,
|
||||
match_pattern);
|
||||
isolate->factory()->SetRegExpAtomData(re, pattern, flags, match_pattern);
|
||||
}
|
||||
|
||||
static void SetAtomLastCapture(Isolate* isolate,
|
||||
@ -487,8 +499,8 @@ void RegExpImpl::IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
|
||||
JSRegExp::Flags flags, int capture_count,
|
||||
uint32_t backtrack_limit) {
|
||||
// Initialize compiled code entries to null.
|
||||
isolate->factory()->SetRegExpIrregexpData(
|
||||
re, JSRegExp::IRREGEXP, pattern, flags, capture_count, backtrack_limit);
|
||||
isolate->factory()->SetRegExpIrregexpData(re, pattern, flags, capture_count,
|
||||
backtrack_limit);
|
||||
}
|
||||
|
||||
// static
|
||||
@ -871,17 +883,28 @@ RegExpGlobalCache::RegExpGlobalCache(Handle<JSRegExp> regexp,
|
||||
isolate_(isolate) {
|
||||
bool interpreted = regexp->ShouldProduceBytecode();
|
||||
|
||||
if (regexp_->TypeTag() == JSRegExp::ATOM) {
|
||||
static const int kAtomRegistersPerMatch = 2;
|
||||
registers_per_match_ = kAtomRegistersPerMatch;
|
||||
// There is no distinction between interpreted and native for atom regexps.
|
||||
interpreted = false;
|
||||
} else {
|
||||
registers_per_match_ = RegExp::IrregexpPrepare(isolate_, regexp_, subject_);
|
||||
if (registers_per_match_ < 0) {
|
||||
num_matches_ = -1; // Signal exception.
|
||||
return;
|
||||
switch (regexp_->TypeTag()) {
|
||||
case JSRegExp::NOT_COMPILED:
|
||||
UNREACHABLE();
|
||||
case JSRegExp::EXPERIMENTAL:
|
||||
// TODO(mbid,v8:10765): At the moment experimental regexps can't deal with
|
||||
// captures; this should change in the future.
|
||||
case JSRegExp::ATOM: {
|
||||
static const int kAtomRegistersPerMatch = 2;
|
||||
registers_per_match_ = kAtomRegistersPerMatch;
|
||||
// There is no distinction between interpreted and native for atom
|
||||
// regexps.
|
||||
interpreted = false;
|
||||
break;
|
||||
}
|
||||
case JSRegExp::IRREGEXP:
|
||||
registers_per_match_ =
|
||||
RegExp::IrregexpPrepare(isolate_, regexp_, subject_);
|
||||
if (registers_per_match_ < 0) {
|
||||
num_matches_ = -1; // Signal exception.
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
DCHECK(IsGlobal(regexp->GetFlags()));
|
||||
@ -947,23 +970,39 @@ int32_t* RegExpGlobalCache::FetchNext() {
|
||||
®ister_array_[(current_match_index_ - 1) * registers_per_match_];
|
||||
int last_end_index = last_match[1];
|
||||
|
||||
if (regexp_->TypeTag() == JSRegExp::ATOM) {
|
||||
num_matches_ =
|
||||
RegExpImpl::AtomExecRaw(isolate_, regexp_, subject_, last_end_index,
|
||||
register_array_, register_array_size_);
|
||||
} else {
|
||||
int last_start_index = last_match[0];
|
||||
if (last_start_index == last_end_index) {
|
||||
// Zero-length match. Advance by one code point.
|
||||
last_end_index = AdvanceZeroLength(last_end_index);
|
||||
switch (regexp_->TypeTag()) {
|
||||
case JSRegExp::NOT_COMPILED:
|
||||
UNREACHABLE();
|
||||
case JSRegExp::ATOM:
|
||||
num_matches_ =
|
||||
RegExpImpl::AtomExecRaw(isolate_, regexp_, subject_, last_end_index,
|
||||
register_array_, register_array_size_);
|
||||
break;
|
||||
case JSRegExp::EXPERIMENTAL: {
|
||||
if (!ExperimentalRegExp::IsCompiled(regexp_)) {
|
||||
ExperimentalRegExp::Compile(isolate_, regexp_);
|
||||
}
|
||||
DisallowHeapAllocation no_gc;
|
||||
num_matches_ =
|
||||
ExperimentalRegExp::ExecRaw(*regexp_, *subject_, register_array_,
|
||||
register_array_size_, last_end_index);
|
||||
break;
|
||||
}
|
||||
if (last_end_index > subject_->length()) {
|
||||
num_matches_ = 0; // Signal failed match.
|
||||
return nullptr;
|
||||
case JSRegExp::IRREGEXP: {
|
||||
int last_start_index = last_match[0];
|
||||
if (last_start_index == last_end_index) {
|
||||
// Zero-length match. Advance by one code point.
|
||||
last_end_index = AdvanceZeroLength(last_end_index);
|
||||
}
|
||||
if (last_end_index > subject_->length()) {
|
||||
num_matches_ = 0; // Signal failed match.
|
||||
return nullptr;
|
||||
}
|
||||
num_matches_ = RegExpImpl::IrregexpExecRaw(
|
||||
isolate_, regexp_, subject_, last_end_index, register_array_,
|
||||
register_array_size_);
|
||||
break;
|
||||
}
|
||||
num_matches_ = RegExpImpl::IrregexpExecRaw(
|
||||
isolate_, regexp_, subject_, last_end_index, register_array_,
|
||||
register_array_size_);
|
||||
}
|
||||
|
||||
if (num_matches_ <= 0) return nullptr;
|
||||
|
@ -642,8 +642,7 @@ static Handle<JSRegExp> CreateJSRegExp(Handle<String> source, Handle<Code> code,
|
||||
Handle<JSRegExp> regexp =
|
||||
Handle<JSRegExp>::cast(factory->NewJSObject(constructor));
|
||||
|
||||
factory->SetRegExpIrregexpData(regexp, JSRegExp::IRREGEXP, source,
|
||||
JSRegExp::kNone, 0,
|
||||
factory->SetRegExpIrregexpData(regexp, source, JSRegExp::kNone, 0,
|
||||
JSRegExp::kNoBacktrackLimit);
|
||||
regexp->SetDataAt(is_unicode ? JSRegExp::kIrregexpUC16CodeIndex
|
||||
: JSRegExp::kIrregexpLatin1CodeIndex,
|
||||
|
Loading…
Reference in New Issue
Block a user