[regexp] Further narrow public API and restrict includes to regexp.h

This CL renames jsregexp.{h,cc} to regexp.{h,cc}, hides all non-public
functions of RegExpImpl in the .cc file, and renames the public parts
of RegExpImpl to just RegExp. Include directives from outside the
src/regexp directory are limited to regexp.h, regexp-stack.h, and
regexp-utils.h. We also expose all result codes that can be returned
by irregexp code (including RETRY) on the public header since they
are needed elsewhere, e.g. in builtins.

Bug: v8:9359
Change-Id: Iae1a01ac9f6e1e4dc168f3fbe8fe8679cb6b1259
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1662297
Reviewed-by: Michael Achenbach <machenbach@chromium.org>
Reviewed-by: Leszek Swirski <leszeks@chromium.org>
Reviewed-by: Ulan Degenbaev <ulan@chromium.org>
Reviewed-by: Peter Marshall <petermarshall@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62240}
This commit is contained in:
Jakob Gruber 2019-06-17 17:16:39 +02:00 committed by Commit Bot
parent 92fdbc1c42
commit a8c62102e1
26 changed files with 227 additions and 249 deletions

View File

@ -2677,8 +2677,6 @@ v8_source_set("v8_base_without_compiler") {
"src/profiler/tick-sample.h",
"src/profiler/tracing-cpu-profiler.cc",
"src/profiler/tracing-cpu-profiler.h",
"src/regexp/jsregexp.cc",
"src/regexp/jsregexp.h",
"src/regexp/property-sequences.cc",
"src/regexp/property-sequences.h",
"src/regexp/regexp-ast.cc",
@ -2706,6 +2704,8 @@ v8_source_set("v8_base_without_compiler") {
"src/regexp/regexp-stack.h",
"src/regexp/regexp-utils.cc",
"src/regexp/regexp-utils.h",
"src/regexp/regexp.cc",
"src/regexp/regexp.h",
"src/roots/roots-inl.h",
"src/roots/roots.cc",
"src/roots/roots.h",

View File

@ -30,6 +30,10 @@ include_rules = [
"+src/interpreter/interpreter.h",
"+src/interpreter/interpreter-generator.h",
"+src/interpreter/setup-interpreter.h",
"-src/regexp",
"+src/regexp/regexp.h",
"+src/regexp/regexp-stack.h",
"+src/regexp/regexp-utils.h",
"-src/trap-handler",
"+src/trap-handler/handler-inside-posix.h",
"+src/trap-handler/handler-inside-win.h",

View File

@ -15,7 +15,7 @@
#include "src/objects/js-regexp-string-iterator.h"
#include "src/objects/js-regexp.h"
#include "src/objects/regexp-match-info.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp.h"
namespace v8 {
namespace internal {
@ -550,19 +550,18 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
// We expect exactly one result since we force the called regexp to behave
// as non-global.
TNode<IntPtrT> int_result = ChangeInt32ToIntPtr(result);
GotoIf(
IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpSuccess)),
&if_success);
GotoIf(
IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpFailure)),
&if_failure);
GotoIf(IntPtrEqual(int_result,
IntPtrConstant(NativeRegExpMacroAssembler::SUCCESS)),
&if_success);
GotoIf(IntPtrEqual(int_result,
IntPtrConstant(NativeRegExpMacroAssembler::FAILURE)),
&if_failure);
GotoIf(IntPtrEqual(int_result,
IntPtrConstant(NativeRegExpMacroAssembler::EXCEPTION)),
IntPtrConstant(RegExp::kInternalRegExpException)),
&if_exception);
CSA_ASSERT(this,
IntPtrEqual(int_result,
IntPtrConstant(NativeRegExpMacroAssembler::RETRY)));
CSA_ASSERT(this, IntPtrEqual(int_result,
IntPtrConstant(RegExp::kInternalRegExpRetry)));
Goto(&runtime);
}
@ -1028,7 +1027,7 @@ TF_BUILTIN(RegExpPrototypeExecSlow, RegExpBuiltinsAssembler) {
// Fast path stub for ATOM regexps. String matching is done by StringIndexOf,
// and {match_info} is updated on success.
// The slow path is implemented in RegExpImpl::AtomExec.
// The slow path is implemented in RegExp::AtomExec.
TF_BUILTIN(RegExpExecAtom, RegExpBuiltinsAssembler) {
TNode<JSRegExp> regexp = CAST(Parameter(Descriptor::kRegExp));
TNode<String> subject_string = CAST(Parameter(Descriptor::kString));

View File

@ -6,8 +6,8 @@
#include "src/builtins/builtins.h"
#include "src/logging/counters.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-utils.h"
#include "src/regexp/regexp.h"
#include "src/strings/string-builder-inl.h"
namespace v8 {

9
src/codegen/DEPS Normal file
View File

@ -0,0 +1,9 @@
# Copyright 2019 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
specific_include_rules = {
"external-reference.cc": [
"+src/regexp/regexp-macro-assembler-arch.h",
],
}

View File

@ -1,28 +0,0 @@
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#if V8_TARGET_ARCH_PPC
#include "src/api/api-arguments-inl.h"
#include "src/base/bits.h"
#include "src/code-stubs.h"
#include "src/codegen/assembler-inl.h"
#include "src/codegen/macro-assembler.h"
#include "src/execution/frame-constants.h"
#include "src/execution/frames.h"
#include "src/execution/isolate.h"
#include "src/ic/ic.h"
#include "src/ic/stub-cache.h"
#include "src/init/bootstrapper.h"
#include "src/numbers/double.h"
#include "src/objects/api-callbacks.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/runtime/runtime.h"
namespace v8 {
namespace internal {} // namespace internal
} // namespace v8
#endif // V8_TARGET_ARCH_PPC

View File

@ -1,27 +0,0 @@
// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#if V8_TARGET_ARCH_S390
#include "src/api/api-arguments-inl.h"
#include "src/base/bits.h"
#include "src/code-stubs.h"
#include "src/codegen/assembler-inl.h"
#include "src/codegen/macro-assembler.h"
#include "src/execution/frame-constants.h"
#include "src/execution/frames.h"
#include "src/execution/isolate.h"
#include "src/ic/ic.h"
#include "src/ic/stub-cache.h"
#include "src/init/bootstrapper.h"
#include "src/objects/api-callbacks.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/runtime/runtime.h"
namespace v8 {
namespace internal {} // namespace internal
} // namespace v8
#endif // V8_TARGET_ARCH_S390

View File

@ -63,7 +63,7 @@
#include "src/objects/struct-inl.h"
#include "src/objects/template-objects-inl.h"
#include "src/objects/transitions-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
#include "src/utils/ostreams.h"
#include "src/wasm/wasm-objects-inl.h"
#include "torque-generated/class-verifiers-tq.h"
@ -1457,7 +1457,7 @@ void JSRegExp::JSRegExpVerify(Isolate* isolate) {
break;
}
case JSRegExp::IRREGEXP: {
bool is_native = RegExpImpl::UsesNativeRegExp();
bool is_native = RegExp::GeneratesNativeCode();
FixedArray arr = FixedArray::cast(data());
Object one_byte_data = arr.get(JSRegExp::kIrregexpLatin1CodeIndex);

View File

@ -60,7 +60,7 @@
#include "src/objects/struct-inl.h"
#include "src/objects/template-objects-inl.h"
#include "src/objects/transitions-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
#include "src/utils/ostreams.h"
#include "src/wasm/wasm-code-manager.h"
#include "src/wasm/wasm-engine.h"

View File

@ -64,7 +64,7 @@
#include "src/objects/shared-function-info.h"
#include "src/objects/slots-atomic-inl.h"
#include "src/objects/slots-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
#include "src/snapshot/embedded/embedded-data.h"
#include "src/snapshot/natives.h"
#include "src/snapshot/serializer-common.h"

View File

@ -40,7 +40,7 @@
#include "src/objects/stack-frame-info.h"
#include "src/objects/string.h"
#include "src/objects/template-objects-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
#include "src/wasm/wasm-objects.h"
namespace v8 {

View File

@ -104,7 +104,7 @@
#include "src/objects/template-objects-inl.h"
#include "src/objects/transitions-inl.h"
#include "src/parsing/preparse-data.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
#include "src/strings/string-builder-inl.h"
#include "src/strings/string-search.h"
#include "src/strings/string-stream.h"
@ -6341,8 +6341,8 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
ASSIGN_RETURN_ON_EXCEPTION(isolate, escaped_source,
EscapeRegExpSource(isolate, source), JSRegExp);
RETURN_ON_EXCEPTION(
isolate, RegExpImpl::Compile(isolate, regexp, source, flags), JSRegExp);
RETURN_ON_EXCEPTION(isolate, RegExp::Compile(isolate, regexp, source, flags),
JSRegExp);
regexp->set_source(*escaped_source);
regexp->set_flags(Smi::FromInt(flags));

View File

@ -392,7 +392,7 @@ class V8_EXPORT_PRIVATE Scanner {
// Returns true if a pattern is scanned.
bool ScanRegExpPattern();
// Scans the input as regular expression flags. Returns the flags on success.
Maybe<RegExp::Flags> ScanRegExpFlags();
Maybe<v8::RegExp::Flags> ScanRegExpFlags();
// Scans the input as a template literal
Token::Value ScanTemplateContinuation() {

View File

@ -5,7 +5,7 @@
#include "src/regexp/regexp-compiler.h"
#include "src/execution/isolate.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
#include "src/strings/unicode-inl.h"
#include "src/utils/splay-tree-inl.h"
#include "src/zone/zone-list-inl.h"

View File

@ -9,9 +9,9 @@
#include "src/ast/ast.h"
#include "src/base/small-vector.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-bytecodes.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp.h"
#include "src/strings/unicode.h"
#include "src/utils/utils.h"

View File

@ -7,17 +7,19 @@
#ifndef V8_REGEXP_REGEXP_INTERPRETER_H_
#define V8_REGEXP_REGEXP_INTERPRETER_H_
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
namespace v8 {
namespace internal {
class V8_EXPORT_PRIVATE IrregexpInterpreter {
public:
enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
STATIC_ASSERT(EXCEPTION == static_cast<int>(RegExpImpl::RE_EXCEPTION));
STATIC_ASSERT(FAILURE == static_cast<int>(RegExpImpl::RE_FAILURE));
STATIC_ASSERT(SUCCESS == static_cast<int>(RegExpImpl::RE_SUCCESS));
enum Result {
FAILURE = RegExp::kInternalRegExpFailure,
SUCCESS = RegExp::kInternalRegExpSuccess,
EXCEPTION = RegExp::kInternalRegExpException,
RETRY = RegExp::kInternalRegExpRetry,
};
// The caller is responsible for initializing registers before each call.
static Result Match(Isolate* isolate, Handle<ByteArray> code_array,

View File

@ -7,6 +7,7 @@
#include "src/codegen/label.h"
#include "src/regexp/regexp-ast.h"
#include "src/regexp/regexp.h"
namespace v8 {
namespace internal {
@ -206,7 +207,12 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
// FAILURE: Matching failed.
// SUCCESS: Matching succeeded, and the output array has been filled with
// capture positions.
enum Result { RETRY = -2, EXCEPTION = -1, FAILURE = 0, SUCCESS = 1 };
enum Result {
FAILURE = RegExp::kInternalRegExpFailure,
SUCCESS = RegExp::kInternalRegExpSuccess,
EXCEPTION = RegExp::kInternalRegExpException,
RETRY = RegExp::kInternalRegExpRetry,
};
NativeRegExpMacroAssembler(Isolate* isolate, Zone* zone);
~NativeRegExpMacroAssembler() override;

View File

@ -9,9 +9,9 @@
#include "src/execution/isolate.h"
#include "src/heap/factory.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/property-sequences.h"
#include "src/regexp/regexp-macro-assembler.h"
#include "src/regexp/regexp.h"
#include "src/strings/char-predicates-inl.h"
#include "src/utils/ostreams.h"
#include "src/utils/utils.h"

View File

@ -8,7 +8,7 @@
#include "src/heap/factory.h"
#include "src/objects/js-regexp-inl.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
namespace v8 {
namespace internal {

View File

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
#include "src/codegen/compilation-cache.h"
#include "src/heap/heap-inl.h"
@ -20,13 +20,79 @@ namespace internal {
using namespace regexp_compiler_constants; // NOLINT(build/namespaces)
class RegExpImpl final : public AllStatic {
public:
// Returns a string representation of a regular expression.
// Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
// This function calls the garbage collector if necessary.
static Handle<String> ToString(Handle<Object> value);
// Prepares a JSRegExp object with Irregexp-specific data.
static void IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern, JSRegExp::Flags flags,
int capture_register_count);
static void AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern, JSRegExp::Flags flags,
Handle<String> match_pattern);
static int AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index, int32_t* output,
int output_size);
static Handle<Object> AtomExec(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info);
// Execute a regular expression on the subject, starting from index.
// If matching succeeds, return the number of matches. This can be larger
// than one in the case of global regular expressions.
// The captures and subcaptures are stored into the registers vector.
// If matching fails, returns RE_FAILURE.
// If execution fails, sets a pending exception and returns RE_EXCEPTION.
static int IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index, int32_t* output,
int output_size);
// Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
V8_WARN_UNUSED_RESULT static MaybeHandle<Object> IrregexpExec(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int index, Handle<RegExpMatchInfo> last_match_info);
static bool CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> sample_subject, bool is_one_byte);
static inline bool EnsureCompiledIrregexp(Isolate* isolate,
Handle<JSRegExp> re,
Handle<String> sample_subject,
bool is_one_byte);
// Returns true on success, false on failure.
static bool Compile(Isolate* isolate, Zone* zone, RegExpCompileData* input,
JSRegExp::Flags flags, Handle<String> pattern,
Handle<String> sample_subject, bool is_one_byte);
// For acting on the JSRegExp data FixedArray.
static int IrregexpMaxRegisterCount(FixedArray re);
static void SetIrregexpMaxRegisterCount(FixedArray re, int value);
static void SetIrregexpCaptureNameMap(FixedArray re,
Handle<FixedArray> value);
static int IrregexpNumberOfCaptures(FixedArray re);
static int IrregexpNumberOfRegisters(FixedArray re);
static ByteArray IrregexpByteCode(FixedArray re, bool is_one_byte);
static Code IrregexpNativeCode(FixedArray re, bool is_one_byte);
};
V8_WARN_UNUSED_RESULT
static inline MaybeHandle<Object> ThrowRegExpException(
Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
Handle<String> error_text) {
THROW_NEW_ERROR(isolate, NewSyntaxError(MessageTemplate::kMalformedRegExp,
pattern, error_text),
Object);
THROW_NEW_ERROR(
isolate,
NewSyntaxError(MessageTemplate::kMalformedRegExp, pattern, error_text),
Object);
}
inline void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
@ -35,11 +101,8 @@ inline void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
error_text));
}
ContainedInLattice AddRange(ContainedInLattice containment,
const int* ranges,
int ranges_length,
Interval new_range) {
ContainedInLattice AddRange(ContainedInLattice containment, const int* ranges,
int ranges_length, Interval new_range) {
DCHECK_EQ(1, ranges_length & 1);
DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1]);
if (containment == kLatticeUnknown) return containment;
@ -83,9 +146,10 @@ static bool HasFewDifferentCharacters(Handle<String> pattern) {
// Generic RegExp methods. Dispatches to implementation specific methods.
MaybeHandle<Object> RegExpImpl::Compile(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags) {
// static
MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags) {
DCHECK(pattern->IsFlat());
Zone zone(isolate->allocator(), ZONE_NAME);
@ -113,7 +177,7 @@ MaybeHandle<Object> RegExpImpl::Compile(Isolate* isolate, Handle<JSRegExp> re,
if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
!HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern.
AtomCompile(isolate, re, pattern, flags, pattern);
RegExpImpl::AtomCompile(isolate, re, pattern, flags, pattern);
has_been_compiled = true;
} else if (parse_result.tree->IsAtom() && !IsSticky(flags) &&
parse_result.capture_count == 0) {
@ -124,12 +188,13 @@ MaybeHandle<Object> RegExpImpl::Compile(Isolate* isolate, Handle<JSRegExp> re,
isolate, atom_string,
isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) {
AtomCompile(isolate, re, pattern, flags, atom_string);
RegExpImpl::AtomCompile(isolate, re, pattern, flags, atom_string);
has_been_compiled = true;
}
}
if (!has_been_compiled) {
IrregexpInitialize(isolate, re, pattern, flags, parse_result.capture_count);
RegExpImpl::IrregexpInitialize(isolate, re, pattern, flags,
parse_result.capture_count);
}
DCHECK(re->data().IsFixedArray());
// Compilation succeeded so the data is set on the regexp
@ -140,21 +205,23 @@ MaybeHandle<Object> RegExpImpl::Compile(Isolate* isolate, Handle<JSRegExp> re,
return re;
}
MaybeHandle<Object> RegExpImpl::Exec(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info) {
// static
MaybeHandle<Object> RegExp::Exec(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
return AtomExec(isolate, regexp, subject, index, last_match_info);
return RegExpImpl::AtomExec(isolate, regexp, subject, index,
last_match_info);
case JSRegExp::IRREGEXP: {
return IrregexpExec(isolate, regexp, subject, index, last_match_info);
return RegExpImpl::IrregexpExec(isolate, regexp, subject, index,
last_match_info);
}
default:
UNREACHABLE();
}
}
// RegExp Atom implementation: Simple string search using indexOf.
void RegExpImpl::AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
@ -190,7 +257,7 @@ int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
DCHECK_LT(0, needle_len);
if (index + needle_len > subject->length()) {
return RegExpImpl::RE_FAILURE;
return RegExp::RE_FAILURE;
}
for (int i = 0; i < output_size; i += 2) {
@ -215,7 +282,7 @@ int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
return i / 2; // Return number of matches.
} else {
output[i] = index;
output[i+1] = index + needle_len;
output[i + 1] = index + needle_len;
index += needle_len;
}
}
@ -232,16 +299,15 @@ Handle<Object> RegExpImpl::AtomExec(Isolate* isolate, Handle<JSRegExp> re,
int res =
AtomExecRaw(isolate, re, subject, index, output_registers, kNumRegisters);
if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value();
if (res == RegExp::RE_FAILURE) return isolate->factory()->null_value();
DCHECK_EQ(res, RegExpImpl::RE_SUCCESS);
DCHECK_EQ(res, RegExp::RE_SUCCESS);
SealHandleScope shs(isolate);
SetAtomLastCapture(isolate, last_match_info, *subject, output_registers[0],
output_registers[1]);
return last_match_info;
}
// Irregexp implementation.
// Ensures that the regexp object contains a compiled version of the
@ -352,25 +418,31 @@ void RegExpImpl::IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
flags, capture_count);
}
int RegExpImpl::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject) {
// static
int RegExp::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject) {
DCHECK(subject->IsFlat());
// Check representation of the underlying storage.
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
if (!EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte)) return -1;
if (!RegExpImpl::EnsureCompiledIrregexp(isolate, regexp, subject,
is_one_byte)) {
return -1;
}
DisallowHeapAllocation no_gc;
FixedArray data = FixedArray::cast(regexp->data());
if (FLAG_regexp_interpret_all) {
// Byte-code regexp needs space allocated for all its registers.
// The result captures are copied to the start of the registers array
// if the match succeeds. This way those registers are not clobbered
// when we set the last match info from last successful match.
return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +
(IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
return RegExpImpl::IrregexpNumberOfRegisters(data) +
(RegExpImpl::IrregexpNumberOfCaptures(data) + 1) * 2;
} else {
// Native regexp only needs room to output captures. Registers are handled
// internally.
return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
return (RegExpImpl::IrregexpNumberOfCaptures(data) + 1) * 2;
}
}
@ -400,11 +472,11 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION ||
isolate->has_pending_exception());
STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) ==
RE_SUCCESS);
RegExp::RE_SUCCESS);
STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::FAILURE) ==
RE_FAILURE);
RegExp::RE_FAILURE);
STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION) ==
RE_EXCEPTION);
RegExp::RE_EXCEPTION);
return res;
}
// If result is RETRY, the string has changed representation, and we
@ -413,7 +485,7 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
// the, potentially, different subject (the string can switch between
// being internal and external, and even between being Latin1 and UC16,
// but the characters are always the same).
IrregexpPrepare(isolate, regexp, subject);
RegExp::IrregexpPrepare(isolate, regexp, subject);
is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
} while (true);
UNREACHABLE();
@ -477,8 +549,7 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec(
PrintF("\n\nSubject string: '%s'\n\n", subject->ToCString().get());
}
#endif
int required_registers =
RegExpImpl::IrregexpPrepare(isolate, regexp, subject);
int required_registers = RegExp::IrregexpPrepare(isolate, regexp, subject);
if (required_registers < 0) {
// Compiling failed with an exception.
DCHECK(isolate->has_pending_exception());
@ -497,21 +568,22 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec(
int res =
RegExpImpl::IrregexpExecRaw(isolate, regexp, subject, previous_index,
output_registers, required_registers);
if (res == RE_SUCCESS) {
if (res == RegExp::RE_SUCCESS) {
int capture_count =
IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
return SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
output_registers);
return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
capture_count, output_registers);
}
if (res == RE_EXCEPTION) {
if (res == RegExp::RE_EXCEPTION) {
DCHECK(isolate->has_pending_exception());
return MaybeHandle<Object>();
}
DCHECK(res == RE_FAILURE);
DCHECK(res == RegExp::RE_FAILURE);
return isolate->factory()->null_value();
}
Handle<RegExpMatchInfo> RegExpImpl::SetLastMatchInfo(
// static
Handle<RegExpMatchInfo> RegExp::SetLastMatchInfo(
Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
Handle<String> subject, int capture_count, int32_t* match) {
// This is the only place where match infos can grow. If, after executing the
@ -546,8 +618,9 @@ Handle<RegExpMatchInfo> RegExpImpl::SetLastMatchInfo(
return result;
}
void RegExpImpl::DotPrintForTesting(const char* label, RegExpNode* node,
bool ignore_case) {
// static
void RegExp::DotPrintForTesting(const char* label, RegExpNode* node,
bool ignore_case) {
DotPrinter::DotPrint(label, node, ignore_case);
}
@ -565,21 +638,21 @@ bool TooMuchRegExpCode(Isolate* isolate, Handle<String> pattern) {
static constexpr size_t kRegExpCompiledLimit = 1 * MB;
Heap* heap = isolate->heap();
if (pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize) return true;
if (pattern->length() > RegExp::kRegExpTooLargeToOptimize) return true;
return (isolate->total_regexp_code_generated() > kRegExpCompiledLimit &&
heap->CommittedMemoryExecutable() > kRegExpExecutableMemoryLimit);
}
} // namespace
bool RegExpImpl::CompileForTesting(Isolate* isolate, Zone* zone,
RegExpCompileData* data,
JSRegExp::Flags flags,
Handle<String> pattern,
Handle<String> sample_subject,
bool is_one_byte) {
return Compile(isolate, zone, data, flags, pattern, sample_subject,
is_one_byte);
// static
bool RegExp::CompileForTesting(Isolate* isolate, Zone* zone,
RegExpCompileData* data, JSRegExp::Flags flags,
Handle<String> pattern,
Handle<String> sample_subject,
bool is_one_byte) {
return RegExpImpl::Compile(isolate, zone, data, flags, pattern,
sample_subject, is_one_byte);
}
bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
@ -760,8 +833,7 @@ RegExpGlobalCache::RegExpGlobalCache(Handle<JSRegExp> regexp,
// There is no distinction between interpreted and native for atom regexps.
interpreted = false;
} else {
registers_per_match_ =
RegExpImpl::IrregexpPrepare(isolate_, regexp_, subject_);
registers_per_match_ = RegExp::IrregexpPrepare(isolate_, regexp_, subject_);
if (registers_per_match_ < 0) {
num_matches_ = -1; // Signal exception.
return;

View File

@ -2,8 +2,8 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_JSREGEXP_H_
#define V8_REGEXP_JSREGEXP_H_
#ifndef V8_REGEXP_REGEXP_H_
#define V8_REGEXP_REGEXP_H_
#include "src/objects/js-regexp.h"
@ -48,10 +48,10 @@ struct RegExpCompileData {
int register_count = 0;
};
class RegExpImpl final : public AllStatic {
class RegExp final : public AllStatic {
public:
// Whether the irregexp engine generates native code or interpreter bytecode.
static bool UsesNativeRegExp() { return !FLAG_regexp_interpret_all; }
static bool GeneratesNativeCode() { return !FLAG_regexp_interpret_all; }
// Parses the RegExp pattern and prepares the JSRegExp object with
// generic data and choice of implementation - as well as what
@ -67,7 +67,17 @@ class RegExpImpl final : public AllStatic {
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int index, Handle<RegExpMatchInfo> last_match_info);
enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 };
// Integral return values used throughout regexp code layers.
static constexpr int kInternalRegExpFailure = 0;
static constexpr int kInternalRegExpSuccess = 1;
static constexpr int kInternalRegExpException = -1;
static constexpr int kInternalRegExpRetry = -2;
enum IrregexpResult {
RE_FAILURE = kInternalRegExpFailure,
RE_SUCCESS = kInternalRegExpSuccess,
RE_EXCEPTION = kInternalRegExpException,
};
// Prepare a RegExp for being executed one or more times (using
// IrregexpExecOnce) on the subject.
@ -97,71 +107,6 @@ class RegExpImpl final : public AllStatic {
bool ignore_case);
static const int kRegExpTooLargeToOptimize = 20 * KB;
private:
// Returns a string representation of a regular expression.
// Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
// This function calls the garbage collector if necessary.
static Handle<String> ToString(Handle<Object> value);
// Prepares a JSRegExp object with Irregexp-specific data.
static void IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern, JSRegExp::Flags flags,
int capture_register_count);
static void AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern, JSRegExp::Flags flags,
Handle<String> match_pattern);
static int AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index, int32_t* output,
int output_size);
static Handle<Object> AtomExec(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info);
// Execute a regular expression on the subject, starting from index.
// If matching succeeds, return the number of matches. This can be larger
// than one in the case of global regular expressions.
// The captures and subcaptures are stored into the registers vector.
// If matching fails, returns RE_FAILURE.
// If execution fails, sets a pending exception and returns RE_EXCEPTION.
static int IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index, int32_t* output,
int output_size);
// Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
V8_WARN_UNUSED_RESULT static MaybeHandle<Object> IrregexpExec(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int index, Handle<RegExpMatchInfo> last_match_info);
static bool CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> sample_subject, bool is_one_byte);
static inline bool EnsureCompiledIrregexp(Isolate* isolate,
Handle<JSRegExp> re,
Handle<String> sample_subject,
bool is_one_byte);
// Returns true on success, false on failure.
static bool Compile(Isolate* isolate, Zone* zone, RegExpCompileData* input,
JSRegExp::Flags flags, Handle<String> pattern,
Handle<String> sample_subject, bool is_one_byte);
// For acting on the JSRegExp data FixedArray.
static int IrregexpMaxRegisterCount(FixedArray re);
static void SetIrregexpMaxRegisterCount(FixedArray re, int value);
static void SetIrregexpCaptureNameMap(FixedArray re,
Handle<FixedArray> value);
static int IrregexpNumberOfCaptures(FixedArray re);
static int IrregexpNumberOfRegisters(FixedArray re);
static ByteArray IrregexpByteCode(FixedArray re, bool is_one_byte);
static Code IrregexpNativeCode(FixedArray re, bool is_one_byte);
friend class RegExpGlobalCache;
};
// Uses a special global mode of irregexp-generated code to perform a global
@ -230,4 +175,4 @@ class RegExpResultsCache final : public AllStatic {
} // namespace internal
} // namespace v8
#endif // V8_REGEXP_JSREGEXP_H_
#endif // V8_REGEXP_REGEXP_H_

View File

@ -12,8 +12,8 @@
#include "src/numbers/conversions-inl.h"
#include "src/objects/js-array-inl.h"
#include "src/objects/js-regexp-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-utils.h"
#include "src/regexp/regexp.h"
#include "src/runtime/runtime-utils.h"
#include "src/strings/string-builder-inl.h"
#include "src/strings/string-search.h"
@ -595,8 +595,7 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
}
int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, 0,
match_indices);
RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices);
TruncateRegexpIndicesList(isolate);
@ -615,7 +614,7 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
JSRegExp::Type typeTag = regexp->TypeTag();
if (typeTag == JSRegExp::IRREGEXP) {
// Ensure the RegExp is compiled so we can access the capture-name map.
if (RegExpImpl::IrregexpPrepare(isolate, regexp, subject) == -1) {
if (RegExp::IrregexpPrepare(isolate, regexp, subject) == -1) {
DCHECK(isolate->has_pending_exception());
return ReadOnlyRoots(isolate).exception();
}
@ -680,8 +679,8 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
builder.AddSubjectSlice(prev, subject_length);
}
RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
global_cache.LastSuccessfulMatch());
RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
global_cache.LastSuccessfulMatch());
RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
}
@ -750,8 +749,8 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
global_cache.LastSuccessfulMatch());
RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
global_cache.LastSuccessfulMatch());
if (prev < subject_length) {
// Add substring subject[prev;length] to answer string.
@ -878,8 +877,8 @@ RUNTIME_FUNCTION(Runtime_RegExpExec) {
CHECK_LE(0, index);
CHECK_GE(subject->length(), index);
isolate->counters()->regexp_entry_runtime()->Increment();
RETURN_RESULT_OR_FAILURE(isolate, RegExpImpl::Exec(isolate, regexp, subject,
index, last_match_info));
RETURN_RESULT_OR_FAILURE(
isolate, RegExp::Exec(isolate, regexp, subject, index, last_match_info));
}
namespace {
@ -1109,8 +1108,8 @@ static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
isolate->factory()->CopyFixedArrayWithMap(
cached_fixed_array, isolate->factory()->fixed_array_map());
JSArray::SetContent(result_array, copied_fixed_array);
RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
capture_count, last_match);
RegExp::SetLastMatchInfo(isolate, last_match_array, subject,
capture_count, last_match);
DeleteArray(last_match);
return *result_array;
}
@ -1217,9 +1216,8 @@ static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
subject_length);
}
RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
capture_count,
global_cache.LastSuccessfulMatch());
RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count,
global_cache.LastSuccessfulMatch());
if (subject_length > kMinLengthToCache) {
// Store the last successful match into the array for caching.
@ -1283,10 +1281,10 @@ V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
// A lastIndex exceeding the string length always returns null (signalling
// failure) in RegExpBuiltinExec, thus we can skip the call.
if (last_index <= static_cast<uint32_t>(string->length())) {
ASSIGN_RETURN_ON_EXCEPTION(isolate, match_indices_obj,
RegExpImpl::Exec(isolate, regexp, string,
last_index, last_match_info),
String);
ASSIGN_RETURN_ON_EXCEPTION(
isolate, match_indices_obj,
RegExp::Exec(isolate, regexp, string, last_index, last_match_info),
String);
}
if (match_indices_obj->IsNull(isolate)) {
@ -1415,8 +1413,7 @@ RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
if (last_index <= static_cast<uint32_t>(subject->length())) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, match_indices_obj,
RegExpImpl::Exec(isolate, regexp, subject, last_index,
last_match_info));
RegExp::Exec(isolate, regexp, subject, last_index, last_match_info));
}
if (match_indices_obj->IsNull(isolate)) {

View File

@ -56,7 +56,7 @@
#include "src/objects/objects-inl.h"
#include "src/objects/slots.h"
#include "src/objects/transitions.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
#include "src/snapshot/snapshot.h"
#include "src/utils/ostreams.h"
#include "test/cctest/cctest.h"
@ -1510,8 +1510,8 @@ TEST(TestSizeOfRegExpCode) {
LocalContext context;
// Adjust source below and this check to match
// RegExpImple::kRegExpTooLargeToOptimize.
CHECK_EQ(i::RegExpImpl::kRegExpTooLargeToOptimize, 20 * KB);
// RegExp::kRegExpTooLargeToOptimize.
CHECK_EQ(i::RegExp::kRegExpTooLargeToOptimize, 20 * KB);
// Compile a regexp that is much larger if we are using regexp optimizations.
CompileRun(

View File

@ -36,12 +36,12 @@
#include "src/codegen/macro-assembler.h"
#include "src/init/v8.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-compiler.h"
#include "src/regexp/regexp-interpreter.h"
#include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-macro-assembler-irregexp.h"
#include "src/regexp/regexp-parser.h"
#include "src/regexp/regexp.h"
#include "src/strings/char-predicates-inl.h"
#include "src/strings/string-stream.h"
#include "src/strings/unicode-inl.h"
@ -547,8 +547,8 @@ static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
.ToHandleChecked();
Handle<String> sample_subject =
isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
RegExpImpl::CompileForTesting(isolate, zone, &compile_data, flags, pattern,
sample_subject, is_one_byte);
RegExp::CompileForTesting(isolate, zone, &compile_data, flags, pattern,
sample_subject, is_one_byte);
return compile_data.node;
}
@ -561,7 +561,7 @@ static void Execute(const char* input, bool multiline, bool unicode,
USE(node);
#ifdef DEBUG
if (dot_output) {
RegExpImpl::DotPrintForTesting(input, node, false);
RegExp::DotPrintForTesting(input, node, false);
}
#endif // DEBUG
}

View File

@ -12,7 +12,7 @@
#include "include/v8.h"
#include "src/heap/factory.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
#include "test/fuzzer/fuzzer-support.h"
// This is a hexdump of test/fuzzer/regexp_builtins/mjsunit.js generated using

View File

@ -9,7 +9,7 @@
#include "include/v8.h"
#include "src/heap/factory.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp.h"
#include "test/fuzzer/fuzzer-support.h"
namespace i = v8::internal;
@ -19,8 +19,7 @@ void Test(v8::Isolate* isolate, i::Handle<i::JSRegExp> regexp,
i::Handle<i::RegExpMatchInfo> results_array) {
v8::TryCatch try_catch(isolate);
i::Isolate* i_isolate = reinterpret_cast<i::Isolate*>(isolate);
if (i::RegExpImpl::Exec(i_isolate, regexp, subject, 0, results_array)
.is_null()) {
if (i::RegExp::Exec(i_isolate, regexp, subject, 0, results_array).is_null()) {
i_isolate->OptionalRescheduleException(true);
}
}