[regexp] Break dependency on JSRegExp::Flags
The JSRegExp heap object should not be the source of truth for regexp flags, which are also relevant in places that don't need or want to care about the heap object layout (e.g.: the regexp parser). Introduce RegExpFlags as a new source of truth, and base everything else on these flags. As a first change, remove the js-regexp.h dependency from the regexp parser. Other files in src/regexp/ should be updated in follow-up work. Change-Id: Id9a6706c7f09e93f743b08b647b211d0cb0b9c76 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3103306 Reviewed-by: Leszek Swirski <leszeks@chromium.org> Reviewed-by: Patrick Thier <pthier@chromium.org> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/main@{#76379}
This commit is contained in:
parent
2c70eb764a
commit
d586518a36
@ -1754,6 +1754,7 @@ filegroup(
|
||||
"src/regexp/regexp-dotprinter.h",
|
||||
"src/regexp/regexp-error.cc",
|
||||
"src/regexp/regexp-error.h",
|
||||
"src/regexp/regexp-flags.h",
|
||||
"src/regexp/regexp-interpreter.cc",
|
||||
"src/regexp/regexp-interpreter.h",
|
||||
"src/regexp/regexp-macro-assembler-arch.h",
|
||||
|
1
BUILD.gn
1
BUILD.gn
@ -3088,6 +3088,7 @@ v8_header_set("v8_internal_headers") {
|
||||
"src/regexp/regexp-compiler.h",
|
||||
"src/regexp/regexp-dotprinter.h",
|
||||
"src/regexp/regexp-error.h",
|
||||
"src/regexp/regexp-flags.h",
|
||||
"src/regexp/regexp-interpreter.h",
|
||||
"src/regexp/regexp-macro-assembler-arch.h",
|
||||
"src/regexp/regexp-macro-assembler-tracer.h",
|
||||
|
1
src/DEPS
1
src/DEPS
@ -52,6 +52,7 @@ include_rules = [
|
||||
"+src/interpreter/setup-interpreter.h",
|
||||
"-src/regexp",
|
||||
"+src/regexp/regexp.h",
|
||||
"+src/regexp/regexp-flags.h",
|
||||
"+src/regexp/regexp-stack.h",
|
||||
"+src/regexp/regexp-utils.h",
|
||||
"-src/trap-handler",
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "src/base/vector.h"
|
||||
#include "src/common/globals.h"
|
||||
#include "src/objects/objects-inl.h"
|
||||
#include "src/regexp/regexp-flags.h"
|
||||
#include "src/strings/string-builder-inl.h"
|
||||
|
||||
namespace v8 {
|
||||
@ -72,6 +73,12 @@ void CallPrinter::Find(AstNode* node, bool print) {
|
||||
}
|
||||
}
|
||||
|
||||
void CallPrinter::Print(char c) {
|
||||
if (!found_ || done_) return;
|
||||
num_prints_++;
|
||||
builder_->AppendCharacter(c);
|
||||
}
|
||||
|
||||
void CallPrinter::Print(const char* str) {
|
||||
if (!found_ || done_) return;
|
||||
num_prints_++;
|
||||
@ -269,13 +276,10 @@ void CallPrinter::VisitRegExpLiteral(RegExpLiteral* node) {
|
||||
Print("/");
|
||||
PrintLiteral(node->pattern(), false);
|
||||
Print("/");
|
||||
if (node->flags() & RegExp::kHasIndices) Print("d");
|
||||
if (node->flags() & RegExp::kGlobal) Print("g");
|
||||
if (node->flags() & RegExp::kIgnoreCase) Print("i");
|
||||
if (node->flags() & RegExp::kLinear) Print("l");
|
||||
if (node->flags() & RegExp::kMultiline) Print("m");
|
||||
if (node->flags() & RegExp::kUnicode) Print("u");
|
||||
if (node->flags() & RegExp::kSticky) Print("y");
|
||||
#define V(Lower, Camel, LowerCamel, Char, Bit) \
|
||||
if (node->flags() & RegExp::k##Camel) Print(Char);
|
||||
REGEXP_FLAG_LIST(V)
|
||||
#undef V
|
||||
}
|
||||
|
||||
|
||||
@ -1189,13 +1193,10 @@ void AstPrinter::VisitRegExpLiteral(RegExpLiteral* node) {
|
||||
PrintLiteralIndented("PATTERN", node->raw_pattern(), false);
|
||||
int i = 0;
|
||||
base::EmbeddedVector<char, 128> buf;
|
||||
if (node->flags() & RegExp::kHasIndices) buf[i++] = 'd';
|
||||
if (node->flags() & RegExp::kGlobal) buf[i++] = 'g';
|
||||
if (node->flags() & RegExp::kIgnoreCase) buf[i++] = 'i';
|
||||
if (node->flags() & RegExp::kLinear) buf[i++] = 'l';
|
||||
if (node->flags() & RegExp::kMultiline) buf[i++] = 'm';
|
||||
if (node->flags() & RegExp::kUnicode) buf[i++] = 'u';
|
||||
if (node->flags() & RegExp::kSticky) buf[i++] = 'y';
|
||||
#define V(Lower, Camel, LowerCamel, Char, Bit) \
|
||||
if (node->flags() & RegExp::k##Camel) buf[i++] = Char;
|
||||
REGEXP_FLAG_LIST(V)
|
||||
#undef V
|
||||
buf[i] = '\0';
|
||||
PrintIndented("FLAGS ");
|
||||
Print("%s", buf.begin());
|
||||
|
@ -52,6 +52,7 @@ class CallPrinter final : public AstVisitor<CallPrinter> {
|
||||
#undef DECLARE_VISIT
|
||||
|
||||
private:
|
||||
void Print(char c);
|
||||
void Print(const char* str);
|
||||
void Print(Handle<String> str);
|
||||
|
||||
|
@ -17,7 +17,6 @@
|
||||
#include "src/objects/contexts.h"
|
||||
#include "src/objects/field-index-inl.h"
|
||||
#include "src/objects/js-array-inl.h"
|
||||
#include "src/objects/js-regexp-inl.h"
|
||||
#include "src/objects/module-inl.h"
|
||||
#include "src/objects/property-details.h"
|
||||
#include "src/objects/prototype.h"
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "src/objects/js-regexp-string-iterator.h"
|
||||
#include "src/objects/js-regexp.h"
|
||||
#include "src/objects/regexp-match-info.h"
|
||||
#include "src/regexp/regexp-flags.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
@ -1041,23 +1042,16 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
|
||||
CAST(LoadObjectField(CAST(regexp), JSRegExp::kFlagsOffset));
|
||||
var_flags = SmiUntag(flags_smi);
|
||||
|
||||
#define CASE_FOR_FLAG(FLAG) \
|
||||
do { \
|
||||
Label next(this); \
|
||||
GotoIfNot(IsSetWord(var_flags.value(), FLAG), &next); \
|
||||
var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \
|
||||
Goto(&next); \
|
||||
BIND(&next); \
|
||||
} while (false)
|
||||
#define CASE_FOR_FLAG(Lower, Camel, ...) \
|
||||
do { \
|
||||
Label next(this); \
|
||||
GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \
|
||||
var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \
|
||||
Goto(&next); \
|
||||
BIND(&next); \
|
||||
} while (false);
|
||||
|
||||
CASE_FOR_FLAG(JSRegExp::kHasIndices);
|
||||
CASE_FOR_FLAG(JSRegExp::kGlobal);
|
||||
CASE_FOR_FLAG(JSRegExp::kIgnoreCase);
|
||||
CASE_FOR_FLAG(JSRegExp::kLinear);
|
||||
CASE_FOR_FLAG(JSRegExp::kMultiline);
|
||||
CASE_FOR_FLAG(JSRegExp::kDotAll);
|
||||
CASE_FOR_FLAG(JSRegExp::kUnicode);
|
||||
CASE_FOR_FLAG(JSRegExp::kSticky);
|
||||
REGEXP_FLAG_LIST(CASE_FOR_FLAG)
|
||||
#undef CASE_FOR_FLAG
|
||||
} else {
|
||||
DCHECK(!is_fastpath);
|
||||
@ -1123,26 +1117,19 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
|
||||
TVARIABLE(IntPtrT, var_offset,
|
||||
IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag));
|
||||
|
||||
#define CASE_FOR_FLAG(FLAG, CHAR) \
|
||||
do { \
|
||||
Label next(this); \
|
||||
GotoIfNot(IsSetWord(var_flags.value(), FLAG), &next); \
|
||||
const TNode<Int32T> value = Int32Constant(CHAR); \
|
||||
StoreNoWriteBarrier(MachineRepresentation::kWord8, string, \
|
||||
var_offset.value(), value); \
|
||||
var_offset = IntPtrAdd(var_offset.value(), int_one); \
|
||||
Goto(&next); \
|
||||
BIND(&next); \
|
||||
} while (false)
|
||||
#define CASE_FOR_FLAG(Lower, Camel, LowerCamel, Char, ...) \
|
||||
do { \
|
||||
Label next(this); \
|
||||
GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \
|
||||
const TNode<Int32T> value = Int32Constant(Char); \
|
||||
StoreNoWriteBarrier(MachineRepresentation::kWord8, string, \
|
||||
var_offset.value(), value); \
|
||||
var_offset = IntPtrAdd(var_offset.value(), int_one); \
|
||||
Goto(&next); \
|
||||
BIND(&next); \
|
||||
} while (false);
|
||||
|
||||
CASE_FOR_FLAG(JSRegExp::kHasIndices, 'd');
|
||||
CASE_FOR_FLAG(JSRegExp::kGlobal, 'g');
|
||||
CASE_FOR_FLAG(JSRegExp::kIgnoreCase, 'i');
|
||||
CASE_FOR_FLAG(JSRegExp::kLinear, 'l');
|
||||
CASE_FOR_FLAG(JSRegExp::kMultiline, 'm');
|
||||
CASE_FOR_FLAG(JSRegExp::kDotAll, 's');
|
||||
CASE_FOR_FLAG(JSRegExp::kUnicode, 'u');
|
||||
CASE_FOR_FLAG(JSRegExp::kSticky, 'y');
|
||||
REGEXP_FLAG_LIST(CASE_FOR_FLAG)
|
||||
#undef CASE_FOR_FLAG
|
||||
|
||||
if (is_fastpath) {
|
||||
@ -1391,29 +1378,12 @@ TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context,
|
||||
switch (flag) {
|
||||
case JSRegExp::kNone:
|
||||
UNREACHABLE();
|
||||
case JSRegExp::kGlobal:
|
||||
name = isolate()->factory()->global_string();
|
||||
break;
|
||||
case JSRegExp::kIgnoreCase:
|
||||
name = isolate()->factory()->ignoreCase_string();
|
||||
break;
|
||||
case JSRegExp::kMultiline:
|
||||
name = isolate()->factory()->multiline_string();
|
||||
break;
|
||||
case JSRegExp::kDotAll:
|
||||
UNREACHABLE(); // Never called for dotAll.
|
||||
case JSRegExp::kSticky:
|
||||
name = isolate()->factory()->sticky_string();
|
||||
break;
|
||||
case JSRegExp::kUnicode:
|
||||
name = isolate()->factory()->unicode_string();
|
||||
break;
|
||||
case JSRegExp::kHasIndices:
|
||||
name = isolate()->factory()->has_indices_string();
|
||||
break;
|
||||
case JSRegExp::kLinear:
|
||||
name = isolate()->factory()->linear_string();
|
||||
break;
|
||||
#define V(Lower, Camel, LowerCamel, Char, Bit) \
|
||||
case JSRegExp::k##Camel: \
|
||||
name = isolate()->factory()->LowerCamel##_string(); \
|
||||
break;
|
||||
REGEXP_FLAG_LIST(V)
|
||||
#undef V
|
||||
}
|
||||
|
||||
TNode<Object> value = GetProperty(context, regexp, name);
|
||||
|
@ -2462,7 +2462,7 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
|
||||
Builtin::kRegExpPrototypeFlagsGetter, true);
|
||||
SimpleInstallGetter(isolate_, prototype, factory->global_string(),
|
||||
Builtin::kRegExpPrototypeGlobalGetter, true);
|
||||
SimpleInstallGetter(isolate(), prototype, factory->has_indices_string(),
|
||||
SimpleInstallGetter(isolate(), prototype, factory->hasIndices_string(),
|
||||
Builtin::kRegExpPrototypeHasIndicesGetter, true);
|
||||
SimpleInstallGetter(isolate_, prototype, factory->ignoreCase_string(),
|
||||
Builtin::kRegExpPrototypeIgnoreCaseGetter, true);
|
||||
|
@ -227,7 +227,7 @@
|
||||
V(_, groups_string, "groups") \
|
||||
V(_, growable_string, "growable") \
|
||||
V(_, has_string, "has") \
|
||||
V(_, has_indices_string, "hasIndices") \
|
||||
V(_, hasIndices_string, "hasIndices") \
|
||||
V(_, ignoreCase_string, "ignoreCase") \
|
||||
V(_, illegal_access_string, "illegal access") \
|
||||
V(_, illegal_argument_string, "illegal argument") \
|
||||
|
@ -59,7 +59,7 @@ int JSRegExp::MaxRegisterCount() const {
|
||||
return Smi::ToInt(DataAt(kIrregexpMaxRegisterCountIndex));
|
||||
}
|
||||
|
||||
JSRegExp::Flags JSRegExp::GetFlags() {
|
||||
JSRegExp::Flags JSRegExp::GetFlags() const {
|
||||
DCHECK(this->data().IsFixedArray());
|
||||
Object data = this->data();
|
||||
Smi smi = Smi::cast(FixedArray::cast(data).get(kFlagsIndex));
|
||||
|
@ -111,64 +111,38 @@ uint32_t JSRegExp::BacktrackLimit() const {
|
||||
}
|
||||
|
||||
// static
|
||||
JSRegExp::Flags JSRegExp::FlagsFromString(Isolate* isolate,
|
||||
Handle<String> flags, bool* success) {
|
||||
int length = flags->length();
|
||||
if (length == 0) {
|
||||
*success = true;
|
||||
return JSRegExp::kNone;
|
||||
}
|
||||
base::Optional<JSRegExp::Flags> JSRegExp::FlagsFromString(
|
||||
Isolate* isolate, Handle<String> flags) {
|
||||
const int length = flags->length();
|
||||
|
||||
// A longer flags string cannot be valid.
|
||||
if (length > JSRegExp::kFlagCount) return JSRegExp::Flags(0);
|
||||
JSRegExp::Flags value(0);
|
||||
if (flags->IsSeqOneByteString()) {
|
||||
DisallowGarbageCollection no_gc;
|
||||
SeqOneByteString seq_flags = SeqOneByteString::cast(*flags);
|
||||
for (int i = 0; i < length; i++) {
|
||||
base::Optional<JSRegExp::Flag> maybe_flag =
|
||||
JSRegExp::FlagFromChar(seq_flags.Get(i));
|
||||
if (!maybe_flag.has_value()) return JSRegExp::Flags(0);
|
||||
JSRegExp::Flag flag = *maybe_flag;
|
||||
// Duplicate flag.
|
||||
if (value & flag) return JSRegExp::Flags(0);
|
||||
value |= flag;
|
||||
}
|
||||
} else {
|
||||
flags = String::Flatten(isolate, flags);
|
||||
DisallowGarbageCollection no_gc;
|
||||
String::FlatContent flags_content = flags->GetFlatContent(no_gc);
|
||||
for (int i = 0; i < length; i++) {
|
||||
base::Optional<JSRegExp::Flag> maybe_flag =
|
||||
JSRegExp::FlagFromChar(flags_content.Get(i));
|
||||
if (!maybe_flag.has_value()) return JSRegExp::Flags(0);
|
||||
JSRegExp::Flag flag = *maybe_flag;
|
||||
// Duplicate flag.
|
||||
if (value & flag) return JSRegExp::Flags(0);
|
||||
value |= flag;
|
||||
}
|
||||
if (length > JSRegExp::kFlagCount) return {};
|
||||
|
||||
JSRegExp::Flags value;
|
||||
FlatStringReader reader(isolate, String::Flatten(isolate, flags));
|
||||
|
||||
for (int i = 0; i < length; i++) {
|
||||
base::Optional<JSRegExp::Flag> flag = JSRegExp::FlagFromChar(reader.Get(i));
|
||||
if (!flag.has_value()) return {};
|
||||
if (value & flag.value()) return {}; // Duplicate.
|
||||
value |= flag.value();
|
||||
}
|
||||
*success = true;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
// static
|
||||
Handle<String> JSRegExp::StringFromFlags(Isolate* isolate,
|
||||
JSRegExp::Flags flags) {
|
||||
// Ensure that this function is up-to-date with the supported flag options.
|
||||
constexpr size_t kFlagCount = JSRegExp::kFlagCount;
|
||||
STATIC_ASSERT(kFlagCount == 8);
|
||||
|
||||
// Translate to the lexicographically smaller string.
|
||||
static constexpr int kStringTerminator = 1;
|
||||
int cursor = 0;
|
||||
char buffer[kFlagCount] = {'\0'};
|
||||
if (flags & JSRegExp::kHasIndices) buffer[cursor++] = 'd';
|
||||
if (flags & JSRegExp::kGlobal) buffer[cursor++] = 'g';
|
||||
if (flags & JSRegExp::kIgnoreCase) buffer[cursor++] = 'i';
|
||||
if (flags & JSRegExp::kLinear) buffer[cursor++] = 'l';
|
||||
if (flags & JSRegExp::kMultiline) buffer[cursor++] = 'm';
|
||||
if (flags & JSRegExp::kDotAll) buffer[cursor++] = 's';
|
||||
if (flags & JSRegExp::kUnicode) buffer[cursor++] = 'u';
|
||||
if (flags & JSRegExp::kSticky) buffer[cursor++] = 'y';
|
||||
char buffer[kFlagCount + kStringTerminator];
|
||||
#define V(Lower, Camel, LowerCamel, Char, Bit) \
|
||||
if (flags & JSRegExp::k##Camel) buffer[cursor++] = Char;
|
||||
REGEXP_FLAG_LIST(V)
|
||||
#undef V
|
||||
buffer[cursor++] = '\0';
|
||||
DCHECK_LE(cursor, kFlagCount + kStringTerminator);
|
||||
return isolate->factory()->NewStringFromAsciiChecked(buffer);
|
||||
}
|
||||
|
||||
@ -247,15 +221,15 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
|
||||
Handle<String> source,
|
||||
Handle<String> flags_string) {
|
||||
Isolate* isolate = regexp->GetIsolate();
|
||||
bool success = false;
|
||||
Flags flags = JSRegExp::FlagsFromString(isolate, flags_string, &success);
|
||||
if (!success) {
|
||||
base::Optional<Flags> flags =
|
||||
JSRegExp::FlagsFromString(isolate, flags_string);
|
||||
if (!flags.has_value()) {
|
||||
THROW_NEW_ERROR(
|
||||
isolate,
|
||||
NewSyntaxError(MessageTemplate::kInvalidRegExpFlags, flags_string),
|
||||
JSRegExp);
|
||||
}
|
||||
return Initialize(regexp, source, flags);
|
||||
return Initialize(regexp, source, flags.value());
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "src/objects/contexts.h"
|
||||
#include "src/objects/js-array.h"
|
||||
#include "src/regexp/regexp-flags.h"
|
||||
#include "torque-generated/bit-fields.h"
|
||||
|
||||
// Has to be the last include (doesn't have include guards):
|
||||
@ -43,32 +44,39 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
|
||||
enum Type { NOT_COMPILED, ATOM, IRREGEXP, EXPERIMENTAL };
|
||||
DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()
|
||||
|
||||
static constexpr Flag AsJSRegExpFlag(RegExpFlag f) {
|
||||
return static_cast<Flag>(f);
|
||||
}
|
||||
static constexpr base::Optional<Flag> AsOptionalJSRegExpFlag(
|
||||
base::Optional<RegExpFlag> f) {
|
||||
return f.has_value() ? base::Optional<Flag>{AsJSRegExpFlag(f.value())}
|
||||
: base::Optional<Flag>{};
|
||||
}
|
||||
static constexpr Flags AsJSRegExpFlags(RegExpFlags f) {
|
||||
return Flags{static_cast<int>(f)};
|
||||
}
|
||||
static constexpr RegExpFlags AsRegExpFlags(Flags f) {
|
||||
return RegExpFlags{static_cast<int>(f)};
|
||||
}
|
||||
|
||||
static base::Optional<Flag> FlagFromChar(char c) {
|
||||
STATIC_ASSERT(kFlagCount == 8);
|
||||
// clang-format off
|
||||
return c == 'g' ? base::Optional<Flag>(kGlobal)
|
||||
: c == 'i' ? base::Optional<Flag>(kIgnoreCase)
|
||||
: c == 'm' ? base::Optional<Flag>(kMultiline)
|
||||
: c == 'y' ? base::Optional<Flag>(kSticky)
|
||||
: c == 'u' ? base::Optional<Flag>(kUnicode)
|
||||
: c == 's' ? base::Optional<Flag>(kDotAll)
|
||||
: c == 'd' ? base::Optional<Flag>(kHasIndices)
|
||||
: (FLAG_enable_experimental_regexp_engine && c == 'l')
|
||||
? base::Optional<Flag>(kLinear)
|
||||
: base::Optional<Flag>();
|
||||
// clang-format on
|
||||
base::Optional<Flag> f = AsOptionalJSRegExpFlag(TryRegExpFlagFromChar(c));
|
||||
if (!f.has_value()) return f;
|
||||
if (f.value() == kLinear && !FLAG_enable_experimental_regexp_engine) {
|
||||
return {};
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone);
|
||||
STATIC_ASSERT(static_cast<int>(kGlobal) == v8::RegExp::kGlobal);
|
||||
STATIC_ASSERT(static_cast<int>(kIgnoreCase) == v8::RegExp::kIgnoreCase);
|
||||
STATIC_ASSERT(static_cast<int>(kMultiline) == v8::RegExp::kMultiline);
|
||||
STATIC_ASSERT(static_cast<int>(kSticky) == v8::RegExp::kSticky);
|
||||
STATIC_ASSERT(static_cast<int>(kUnicode) == v8::RegExp::kUnicode);
|
||||
STATIC_ASSERT(static_cast<int>(kDotAll) == v8::RegExp::kDotAll);
|
||||
STATIC_ASSERT(static_cast<int>(kLinear) == v8::RegExp::kLinear);
|
||||
STATIC_ASSERT(static_cast<int>(kHasIndices) == v8::RegExp::kHasIndices);
|
||||
#define V(_, Camel, ...) \
|
||||
STATIC_ASSERT(static_cast<int>(k##Camel) == v8::RegExp::k##Camel); \
|
||||
STATIC_ASSERT(static_cast<int>(k##Camel) == \
|
||||
static_cast<int>(RegExpFlag::k##Camel));
|
||||
REGEXP_FLAG_LIST(V)
|
||||
#undef V
|
||||
STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount);
|
||||
STATIC_ASSERT(kFlagCount == kRegExpFlagCount);
|
||||
|
||||
DECL_ACCESSORS(last_index, Object)
|
||||
|
||||
@ -86,8 +94,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
|
||||
Handle<String> source,
|
||||
Handle<String> flags_string);
|
||||
|
||||
static Flags FlagsFromString(Isolate* isolate, Handle<String> flags,
|
||||
bool* success);
|
||||
static base::Optional<Flags> FlagsFromString(Isolate* isolate,
|
||||
Handle<String> flags);
|
||||
|
||||
V8_EXPORT_PRIVATE static Handle<String> StringFromFlags(Isolate* isolate,
|
||||
Flags flags);
|
||||
@ -112,7 +120,7 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
|
||||
static int RegistersForCaptureCount(int count) { return (count + 1) * 2; }
|
||||
|
||||
inline int MaxRegisterCount() const;
|
||||
inline Flags GetFlags();
|
||||
inline Flags GetFlags() const;
|
||||
inline String Pattern();
|
||||
inline String EscapedPattern();
|
||||
inline Object CaptureNameMap();
|
||||
|
@ -69,14 +69,14 @@ base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
|
||||
Zone zone(isolate->allocator(), ZONE_NAME);
|
||||
|
||||
Handle<String> source(regexp->Pattern(), isolate);
|
||||
JSRegExp::Flags flags = regexp->GetFlags();
|
||||
|
||||
// Parse and compile the regexp source.
|
||||
RegExpCompileData parse_result;
|
||||
DCHECK(!isolate->has_pending_exception());
|
||||
|
||||
bool parse_success = RegExpParser::ParseRegExpFromHeapString(
|
||||
isolate, &zone, source, flags, &parse_result);
|
||||
isolate, &zone, source, JSRegExp::AsRegExpFlags(regexp->GetFlags()),
|
||||
&parse_result);
|
||||
if (!parse_success) {
|
||||
// The pattern was already parsed successfully during initialization, so
|
||||
// the only way parsing can fail now is because of stack overflow.
|
||||
@ -86,8 +86,8 @@ base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
|
||||
return base::nullopt;
|
||||
}
|
||||
|
||||
ZoneList<RegExpInstruction> bytecode =
|
||||
ExperimentalRegExpCompiler::Compile(parse_result.tree, flags, &zone);
|
||||
ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile(
|
||||
parse_result.tree, regexp->GetFlags(), &zone);
|
||||
|
||||
CompilationResult result;
|
||||
result.bytecode = VectorToByteArray(isolate, bytecode.ToVector());
|
||||
|
@ -518,7 +518,7 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
DCHECK_LT(first_atom, alternatives->length());
|
||||
DCHECK_LE(i, alternatives->length());
|
||||
DCHECK_LE(first_atom, i);
|
||||
if (IgnoreCase(compiler->flags())) {
|
||||
if (IsIgnoreCase(compiler->flags())) {
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom,
|
||||
i - first_atom);
|
||||
@ -570,14 +570,14 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
icu::UnicodeString new_prefix(atom->data().at(0));
|
||||
if (new_prefix != common_prefix) {
|
||||
if (!IgnoreCase(compiler->flags())) break;
|
||||
if (!IsIgnoreCase(compiler->flags())) break;
|
||||
if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0)
|
||||
break;
|
||||
}
|
||||
#else
|
||||
unibrow::uchar new_prefix = atom->data().at(0);
|
||||
if (new_prefix != common_prefix) {
|
||||
if (!IgnoreCase(compiler->flags())) break;
|
||||
if (!IsIgnoreCase(compiler->flags())) break;
|
||||
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
|
||||
compiler->isolate()->regexp_macro_assembler_canonicalize();
|
||||
new_prefix = Canonical(canonicalize, new_prefix);
|
||||
|
@ -1589,7 +1589,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
|
||||
QuickCheckDetails::Position* pos =
|
||||
details->positions(characters_filled_in);
|
||||
base::uc16 c = quarks[i];
|
||||
if (IgnoreCase(compiler->flags())) {
|
||||
if (IsIgnoreCase(compiler->flags())) {
|
||||
unibrow::uchar chars[4];
|
||||
int length = GetCaseIndependentLetters(
|
||||
isolate, c, compiler->one_byte(), chars, 4);
|
||||
@ -1861,7 +1861,7 @@ RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
|
||||
base::Vector<const base::uc16> quarks = elm.atom()->data();
|
||||
for (int j = 0; j < quarks.length(); j++) {
|
||||
base::uc16 c = quarks[j];
|
||||
if (IgnoreCase(flags)) {
|
||||
if (IsIgnoreCase(flags)) {
|
||||
c = unibrow::Latin1::TryConvertToLatin1(c);
|
||||
}
|
||||
if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
|
||||
@ -1880,7 +1880,7 @@ RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
|
||||
if (range_count != 0 && ranges->at(0).from() == 0 &&
|
||||
ranges->at(0).to() >= String::kMaxOneByteCharCode) {
|
||||
// This will be handled in a later filter.
|
||||
if (IgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
|
||||
if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
|
||||
continue;
|
||||
}
|
||||
return set_replacement(nullptr);
|
||||
@ -1889,7 +1889,7 @@ RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
|
||||
if (range_count == 0 ||
|
||||
ranges->at(0).from() > String::kMaxOneByteCharCode) {
|
||||
// This will be handled in a later filter.
|
||||
if (IgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
|
||||
if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
|
||||
continue;
|
||||
}
|
||||
return set_replacement(nullptr);
|
||||
@ -2321,13 +2321,13 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
|
||||
TextElement elm = elements()->at(i);
|
||||
int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
|
||||
if (elm.text_type() == TextElement::ATOM) {
|
||||
if (SkipPass(pass, IgnoreCase(compiler->flags()))) continue;
|
||||
if (SkipPass(pass, IsIgnoreCase(compiler->flags()))) continue;
|
||||
base::Vector<const base::uc16> quarks = elm.atom()->data();
|
||||
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
|
||||
if (first_element_checked && i == 0 && j == 0) continue;
|
||||
if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
|
||||
base::uc16 quark = quarks[j];
|
||||
if (IgnoreCase(compiler->flags())) {
|
||||
if (IsIgnoreCase(compiler->flags())) {
|
||||
// Everywhere else we assume that a non-Latin-1 character cannot match
|
||||
// a Latin-1 character. Avoid the cases where this is assumption is
|
||||
// invalid by using the Latin1 equivalent instead.
|
||||
@ -2492,7 +2492,7 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
|
||||
|
||||
void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
|
||||
JSRegExp::Flags flags) {
|
||||
if (!IgnoreCase(flags)) return;
|
||||
if (!IsIgnoreCase(flags)) return;
|
||||
#ifdef V8_INTL_SUPPORT
|
||||
if (NeedsUnicodeCaseEquivalents(flags)) return;
|
||||
#endif
|
||||
@ -3444,7 +3444,7 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
|
||||
RecursionCheck rc(compiler);
|
||||
|
||||
DCHECK_EQ(start_reg_ + 1, end_reg_);
|
||||
if (IgnoreCase(flags_)) {
|
||||
if (IsIgnoreCase(flags_)) {
|
||||
bool unicode = IsUnicode(flags_);
|
||||
assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
|
||||
unicode, trace->backtrack());
|
||||
@ -3809,7 +3809,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
|
||||
return;
|
||||
}
|
||||
base::uc16 character = atom->data()[j];
|
||||
if (IgnoreCase(bm->compiler()->flags())) {
|
||||
if (IsIgnoreCase(bm->compiler()->flags())) {
|
||||
unibrow::uchar chars[4];
|
||||
int length = GetCaseIndependentLetters(
|
||||
isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
|
||||
|
@ -9,6 +9,7 @@
|
||||
|
||||
#include "src/base/small-vector.h"
|
||||
#include "src/base/strings.h"
|
||||
#include "src/regexp/regexp-flags.h"
|
||||
#include "src/regexp/regexp-nodes.h"
|
||||
|
||||
namespace v8 {
|
||||
@ -49,34 +50,17 @@ constexpr int kPatternTooShortForBoyerMoore = 2;
|
||||
|
||||
} // namespace regexp_compiler_constants
|
||||
|
||||
inline bool IgnoreCase(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kIgnoreCase) != 0;
|
||||
}
|
||||
|
||||
inline bool IsUnicode(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kUnicode) != 0;
|
||||
}
|
||||
|
||||
inline bool IsSticky(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kSticky) != 0;
|
||||
}
|
||||
|
||||
inline bool IsGlobal(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kGlobal) != 0;
|
||||
}
|
||||
|
||||
inline bool DotAll(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kDotAll) != 0;
|
||||
}
|
||||
|
||||
inline bool Multiline(JSRegExp::Flags flags) {
|
||||
return (flags & JSRegExp::kMultiline) != 0;
|
||||
}
|
||||
#define V(Lower, Camel, LowerCamel, Char, Bit) \
|
||||
inline bool Is##Camel(JSRegExp::Flags flags) { \
|
||||
return Is##Camel(JSRegExp::AsRegExpFlags(flags)); \
|
||||
}
|
||||
REGEXP_FLAG_LIST(V)
|
||||
#undef V
|
||||
|
||||
inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
|
||||
// Both unicode and ignore_case flags are set. We need to use ICU to find
|
||||
// the closure over case equivalents.
|
||||
return IsUnicode(flags) && IgnoreCase(flags);
|
||||
return IsUnicode(flags) && IsIgnoreCase(flags);
|
||||
}
|
||||
|
||||
// Details of a quick mask-compare check that can look ahead in the
|
||||
|
67
src/regexp/regexp-flags.h
Normal file
67
src/regexp/regexp-flags.h
Normal file
@ -0,0 +1,67 @@
|
||||
// Copyright 2021 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_REGEXP_REGEXP_FLAGS_H_
|
||||
#define V8_REGEXP_REGEXP_FLAGS_H_
|
||||
|
||||
#include "src/base/flags.h"
|
||||
#include "src/base/optional.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
// Order is important! Sorted in alphabetic order by the flag char. Note this
|
||||
// means that flag bits are shuffled. Take care to keep them contiguous when
|
||||
// adding/removing flags.
|
||||
#define REGEXP_FLAG_LIST(V) \
|
||||
V(has_indices, HasIndices, hasIndices, 'd', 7) \
|
||||
V(global, Global, global, 'g', 0) \
|
||||
V(ignore_case, IgnoreCase, ignoreCase, 'i', 1) \
|
||||
V(linear, Linear, linear, 'l', 6) \
|
||||
V(multiline, Multiline, multiline, 'm', 2) \
|
||||
V(dot_all, DotAll, dotAll, 's', 5) \
|
||||
V(unicode, Unicode, unicode, 'u', 4) \
|
||||
V(sticky, Sticky, sticky, 'y', 3)
|
||||
|
||||
#define V(Lower, Camel, LowerCamel, Char, Bit) k##Camel = 1 << Bit,
|
||||
enum class RegExpFlag { REGEXP_FLAG_LIST(V) };
|
||||
#undef V
|
||||
|
||||
#define V(...) +1
|
||||
constexpr int kRegExpFlagCount = REGEXP_FLAG_LIST(V);
|
||||
#undef V
|
||||
|
||||
// Assert alpha-sorted chars.
|
||||
#define V(Lower, Camel, LowerCamel, Char, Bit) < Char) && (Char
|
||||
static_assert((('a' - 1) REGEXP_FLAG_LIST(V) <= 'z'), "alpha-sort chars");
|
||||
#undef V
|
||||
|
||||
// Assert contiguous indices.
|
||||
#define V(Lower, Camel, LowerCamel, Char, Bit) | (1 << Bit)
|
||||
static_assert(((1 << kRegExpFlagCount) - 1) == (0 REGEXP_FLAG_LIST(V)),
|
||||
"contiguous bits");
|
||||
#undef V
|
||||
|
||||
using RegExpFlags = base::Flags<RegExpFlag>;
|
||||
|
||||
#define V(Lower, Camel, ...) \
|
||||
constexpr bool Is##Camel(RegExpFlags f) { \
|
||||
return (f & RegExpFlag::k##Camel) != 0; \
|
||||
}
|
||||
REGEXP_FLAG_LIST(V)
|
||||
#undef V
|
||||
|
||||
// clang-format off
|
||||
#define V(Lower, Camel, LowerCamel, Char, Bit) \
|
||||
c == Char ? RegExpFlag::k##Camel :
|
||||
constexpr base::Optional<RegExpFlag> TryRegExpFlagFromChar(char c) {
|
||||
return REGEXP_FLAG_LIST(V) base::Optional<RegExpFlag>{};
|
||||
}
|
||||
#undef V
|
||||
// clang-format on
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#endif // V8_REGEXP_REGEXP_FLAGS_H_
|
@ -106,7 +106,7 @@ class BufferedZoneList {
|
||||
// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
|
||||
class RegExpBuilder : public ZoneObject {
|
||||
public:
|
||||
RegExpBuilder(Zone* zone, JSRegExp::Flags flags);
|
||||
RegExpBuilder(Zone* zone, RegExpFlags flags);
|
||||
void AddCharacter(base::uc16 character);
|
||||
void AddUnicodeCharacter(base::uc32 character);
|
||||
void AddEscapedUnicodeCharacter(base::uc32 character);
|
||||
@ -123,12 +123,11 @@ class RegExpBuilder : public ZoneObject {
|
||||
RegExpQuantifier::QuantifierType type);
|
||||
void FlushText();
|
||||
RegExpTree* ToRegExp();
|
||||
JSRegExp::Flags flags() const { return flags_; }
|
||||
void set_flags(JSRegExp::Flags flags) { flags_ = flags; }
|
||||
RegExpFlags flags() const { return flags_; }
|
||||
|
||||
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
|
||||
bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; }
|
||||
bool dotall() const { return (flags_ & JSRegExp::kDotAll) != 0; }
|
||||
bool ignore_case() const { return IsIgnoreCase(flags_); }
|
||||
bool multiline() const { return IsMultiline(flags_); }
|
||||
bool dotall() const { return IsDotAll(flags_); }
|
||||
|
||||
private:
|
||||
static const base::uc16 kNoPendingSurrogate = 0;
|
||||
@ -142,9 +141,9 @@ class RegExpBuilder : public ZoneObject {
|
||||
Zone* zone() const { return zone_; }
|
||||
bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
|
||||
|
||||
Zone* zone_;
|
||||
Zone* const zone_;
|
||||
bool pending_empty_;
|
||||
JSRegExp::Flags flags_;
|
||||
const RegExpFlags flags_;
|
||||
ZoneList<base::uc16>* characters_;
|
||||
base::uc16 pending_surrogate_;
|
||||
BufferedZoneList<RegExpTree, 2> terms_;
|
||||
@ -174,7 +173,7 @@ class RegExpParserState : public ZoneObject {
|
||||
RegExpLookaround::Type lookaround_type,
|
||||
int disjunction_capture_index,
|
||||
const ZoneVector<base::uc16>* capture_name,
|
||||
JSRegExp::Flags flags, Zone* zone)
|
||||
RegExpFlags flags, Zone* zone)
|
||||
: previous_state_(previous_state),
|
||||
builder_(zone->New<RegExpBuilder>(zone, flags)),
|
||||
group_type_(group_type),
|
||||
@ -242,7 +241,7 @@ class RegExpParserState : public ZoneObject {
|
||||
template <class CharT>
|
||||
class RegExpParserImpl final {
|
||||
private:
|
||||
RegExpParserImpl(const CharT* input, int input_length, JSRegExp::Flags flags,
|
||||
RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags,
|
||||
Isolate* isolate, Zone* zone,
|
||||
const DisallowGarbageCollection& no_gc);
|
||||
|
||||
@ -378,7 +377,7 @@ class RegExpParserImpl final {
|
||||
// These are the flags specified outside the regexp syntax ie after the
|
||||
// terminating '/' or in the second argument to the constructor. The current
|
||||
// flags are stored on the RegExpBuilder.
|
||||
const JSRegExp::Flags top_level_flags_;
|
||||
const RegExpFlags top_level_flags_;
|
||||
int next_pos_;
|
||||
int captures_started_;
|
||||
int capture_count_; // Only valid after we have scanned for captures.
|
||||
@ -391,14 +390,14 @@ class RegExpParserImpl final {
|
||||
|
||||
friend bool RegExpParser::ParseRegExpFromHeapString(Isolate*, Zone*,
|
||||
Handle<String>,
|
||||
JSRegExp::Flags,
|
||||
RegExpFlags,
|
||||
RegExpCompileData*);
|
||||
};
|
||||
|
||||
template <class CharT>
|
||||
RegExpParserImpl<CharT>::RegExpParserImpl(
|
||||
const CharT* input, int input_length, JSRegExp::Flags flags,
|
||||
Isolate* isolate, Zone* zone, const DisallowGarbageCollection& no_gc)
|
||||
const CharT* input, int input_length, RegExpFlags flags, Isolate* isolate,
|
||||
Zone* zone, const DisallowGarbageCollection& no_gc)
|
||||
: isolate_(isolate),
|
||||
zone_(zone),
|
||||
captures_(nullptr),
|
||||
@ -778,7 +777,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
|
||||
} else {
|
||||
RegExpCapture* capture = GetCapture(index);
|
||||
RegExpTree* atom = zone()->template New<RegExpBackReference>(
|
||||
capture, builder->flags());
|
||||
capture, JSRegExp::AsJSRegExpFlags(builder->flags()));
|
||||
builder->AddAtom(atom);
|
||||
}
|
||||
break;
|
||||
@ -976,8 +975,6 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
|
||||
RegExpParserState* state) {
|
||||
RegExpLookaround::Type lookaround_type = state->lookaround_type();
|
||||
bool is_named_capture = false;
|
||||
JSRegExp::Flags switch_on = JSRegExp::kNone;
|
||||
JSRegExp::Flags switch_off = JSRegExp::kNone;
|
||||
const ZoneVector<base::uc16>* capture_name = nullptr;
|
||||
SubexpressionType subexpr_type = CAPTURE;
|
||||
Advance();
|
||||
@ -1030,11 +1027,10 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
|
||||
capture_name = ParseCaptureGroupName(CHECK_FAILED);
|
||||
}
|
||||
}
|
||||
JSRegExp::Flags flags = (state->builder()->flags() | switch_on) & ~switch_off;
|
||||
// Store current state and begin new disjunction parsing.
|
||||
return zone()->template New<RegExpParserState>(
|
||||
state, subexpr_type, lookaround_type, captures_started_, capture_name,
|
||||
flags, zone());
|
||||
state->builder()->flags(), zone());
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
@ -1256,8 +1252,8 @@ bool RegExpParserImpl<CharT>::ParseNamedBackReference(
|
||||
if (state->IsInsideCaptureGroup(name)) {
|
||||
builder->AddEmpty();
|
||||
} else {
|
||||
RegExpBackReference* atom =
|
||||
zone()->template New<RegExpBackReference>(builder->flags());
|
||||
RegExpBackReference* atom = zone()->template New<RegExpBackReference>(
|
||||
JSRegExp::AsJSRegExpFlags(builder->flags()));
|
||||
atom->set_name(name);
|
||||
|
||||
builder->AddAtom(atom);
|
||||
@ -1753,7 +1749,7 @@ RegExpTree* RegExpParserImpl<CharT>::GetPropertySequence(
|
||||
if (!FLAG_harmony_regexp_sequence) return nullptr;
|
||||
const char* name = name_1.data();
|
||||
const base::uc32* sequence_list = nullptr;
|
||||
JSRegExp::Flags flags = JSRegExp::kUnicode;
|
||||
RegExpFlags flags = RegExpFlag::kUnicode;
|
||||
if (NameEquals(name, "Emoji_Flag_Sequence")) {
|
||||
sequence_list = UnicodePropertySequences::kEmojiFlagSequences;
|
||||
} else if (NameEquals(name, "Emoji_Tag_Sequence")) {
|
||||
@ -2114,7 +2110,7 @@ bool RegExpParserImpl<CharT>::Parse(RegExpCompileData* result) {
|
||||
return !failed();
|
||||
}
|
||||
|
||||
RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags)
|
||||
RegExpBuilder::RegExpBuilder(Zone* zone, RegExpFlags flags)
|
||||
: zone_(zone),
|
||||
pending_empty_(false),
|
||||
flags_(flags),
|
||||
@ -2406,7 +2402,7 @@ template class RegExpParserImpl<base::uc16>;
|
||||
// static
|
||||
bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
|
||||
Handle<String> input,
|
||||
JSRegExp::Flags flags,
|
||||
RegExpFlags flags,
|
||||
RegExpCompileData* result) {
|
||||
DisallowGarbageCollection no_gc;
|
||||
String::FlatContent content = input->GetFlatContent(no_gc);
|
||||
@ -2425,8 +2421,7 @@ bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
|
||||
|
||||
// static
|
||||
bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
|
||||
Handle<String> input,
|
||||
JSRegExp::Flags flags,
|
||||
Handle<String> input, RegExpFlags flags,
|
||||
RegExpCompileData* result,
|
||||
const DisallowGarbageCollection&) {
|
||||
return ParseRegExpFromHeapString(isolate, zone, input, flags, result);
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
#include "src/common/assert-scope.h"
|
||||
#include "src/handles/handles.h"
|
||||
#include "src/objects/js-regexp.h" // Move the Flags definition elsewhere.
|
||||
#include "src/regexp/regexp-flags.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
@ -20,13 +20,12 @@ struct RegExpCompileData;
|
||||
class V8_EXPORT_PRIVATE RegExpParser : public AllStatic {
|
||||
public:
|
||||
static bool ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
|
||||
Handle<String> input,
|
||||
JSRegExp::Flags flags,
|
||||
Handle<String> input, RegExpFlags flags,
|
||||
RegExpCompileData* result);
|
||||
|
||||
// Used by the SpiderMonkey embedding of irregexp.
|
||||
static bool VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
|
||||
Handle<String> input, JSRegExp::Flags flags,
|
||||
Handle<String> input, RegExpFlags flags,
|
||||
RegExpCompileData* result,
|
||||
const DisallowGarbageCollection& no_gc);
|
||||
};
|
||||
|
@ -181,7 +181,8 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
|
||||
PostponeInterruptsScope postpone(isolate);
|
||||
RegExpCompileData parse_result;
|
||||
DCHECK(!isolate->has_pending_exception());
|
||||
if (!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, pattern, flags,
|
||||
if (!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, pattern,
|
||||
JSRegExp::AsRegExpFlags(flags),
|
||||
&parse_result)) {
|
||||
// Throw an exception if we fail to parse the pattern.
|
||||
return RegExp::ThrowRegExpException(isolate, re, pattern,
|
||||
@ -209,7 +210,7 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
|
||||
ExperimentalRegExp::Initialize(isolate, re, pattern, flags,
|
||||
parse_result.capture_count);
|
||||
has_been_compiled = true;
|
||||
} else if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
|
||||
} else if (parse_result.simple && !IsIgnoreCase(flags) && !IsSticky(flags) &&
|
||||
!HasFewDifferentCharacters(pattern)) {
|
||||
// Parse-tree is a single atom that is equal to the pattern.
|
||||
RegExpImpl::AtomCompile(isolate, re, pattern, flags, pattern);
|
||||
@ -224,7 +225,7 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
|
||||
ASSIGN_RETURN_ON_EXCEPTION(
|
||||
isolate, atom_string,
|
||||
isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
|
||||
if (!IgnoreCase(flags) && !HasFewDifferentCharacters(atom_string)) {
|
||||
if (!IsIgnoreCase(flags) && !HasFewDifferentCharacters(atom_string)) {
|
||||
RegExpImpl::AtomCompile(isolate, re, pattern, flags, atom_string);
|
||||
has_been_compiled = true;
|
||||
}
|
||||
@ -506,7 +507,8 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
|
||||
Handle<String> pattern(re->Pattern(), isolate);
|
||||
pattern = String::Flatten(isolate, pattern);
|
||||
RegExpCompileData compile_data;
|
||||
if (!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, pattern, flags,
|
||||
if (!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, pattern,
|
||||
JSRegExp::AsRegExpFlags(flags),
|
||||
&compile_data)) {
|
||||
// Throw an exception if we fail to parse the pattern.
|
||||
// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
|
||||
|
@ -1390,10 +1390,8 @@ RUNTIME_FUNCTION(Runtime_NewRegExpWithBacktrackLimit) {
|
||||
CONVERT_ARG_HANDLE_CHECKED(String, flags_string, 1);
|
||||
CONVERT_UINT32_ARG_CHECKED(backtrack_limit, 2);
|
||||
|
||||
bool success = false;
|
||||
JSRegExp::Flags flags =
|
||||
JSRegExp::FlagsFromString(isolate, flags_string, &success);
|
||||
CHECK(success);
|
||||
JSRegExp::FlagsFromString(isolate, flags_string).value();
|
||||
|
||||
RETURN_RESULT_OR_FAILURE(
|
||||
isolate, JSRegExp::New(isolate, pattern, flags, backtrack_limit));
|
||||
|
@ -1513,15 +1513,14 @@ void WebSnapshotDeserializer::ReadValue(
|
||||
case ValueType::REGEXP: {
|
||||
Handle<String> pattern = ReadString(false);
|
||||
Handle<String> flags_string = ReadString(false);
|
||||
bool success = false;
|
||||
JSRegExp::Flags flags =
|
||||
JSRegExp::FlagsFromString(isolate_, flags_string, &success);
|
||||
if (!success) {
|
||||
base::Optional<JSRegExp::Flags> flags =
|
||||
JSRegExp::FlagsFromString(isolate_, flags_string);
|
||||
if (!flags.has_value()) {
|
||||
Throw("Web snapshot: Malformed flags in regular expression");
|
||||
return;
|
||||
}
|
||||
MaybeHandle<JSRegExp> maybe_regexp =
|
||||
JSRegExp::New(isolate_, pattern, flags);
|
||||
JSRegExp::New(isolate_, pattern, flags.value());
|
||||
if (!maybe_regexp.ToHandle(&value)) {
|
||||
Throw("Web snapshot: Malformed RegExp");
|
||||
return;
|
||||
|
@ -64,8 +64,8 @@ static bool CheckParse(const char* input) {
|
||||
Zone zone(isolate->allocator(), ZONE_NAME);
|
||||
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
|
||||
RegExpCompileData result;
|
||||
return RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str,
|
||||
JSRegExp::kNone, &result);
|
||||
return RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, {},
|
||||
&result);
|
||||
}
|
||||
|
||||
static void CheckParseEq(const char* input, const char* expected,
|
||||
@ -76,8 +76,8 @@ static void CheckParseEq(const char* input, const char* expected,
|
||||
Zone zone(isolate->allocator(), ZONE_NAME);
|
||||
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
|
||||
RegExpCompileData result;
|
||||
JSRegExp::Flags flags = JSRegExp::kNone;
|
||||
if (unicode) flags |= JSRegExp::kUnicode;
|
||||
RegExpFlags flags;
|
||||
if (unicode) flags |= RegExpFlag::kUnicode;
|
||||
CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, flags,
|
||||
&result));
|
||||
CHECK_NOT_NULL(result.tree);
|
||||
@ -97,8 +97,8 @@ static bool CheckSimple(const char* input) {
|
||||
Zone zone(isolate->allocator(), ZONE_NAME);
|
||||
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
|
||||
RegExpCompileData result;
|
||||
CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str,
|
||||
JSRegExp::kNone, &result));
|
||||
CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, {},
|
||||
&result));
|
||||
CHECK_NOT_NULL(result.tree);
|
||||
CHECK(result.error == RegExpError::kNone);
|
||||
return result.simple;
|
||||
@ -116,8 +116,8 @@ static MinMaxPair CheckMinMaxMatch(const char* input) {
|
||||
Zone zone(isolate->allocator(), ZONE_NAME);
|
||||
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
|
||||
RegExpCompileData result;
|
||||
CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str,
|
||||
JSRegExp::kNone, &result));
|
||||
CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, {},
|
||||
&result));
|
||||
CHECK_NOT_NULL(result.tree);
|
||||
CHECK(result.error == RegExpError::kNone);
|
||||
int min_match = result.tree->min_match();
|
||||
@ -430,8 +430,8 @@ static void ExpectError(const char* input, const char* expected,
|
||||
Zone zone(isolate->allocator(), ZONE_NAME);
|
||||
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
|
||||
RegExpCompileData result;
|
||||
JSRegExp::Flags flags = JSRegExp::kNone;
|
||||
if (unicode) flags |= JSRegExp::kUnicode;
|
||||
RegExpFlags flags;
|
||||
if (unicode) flags |= RegExpFlag::kUnicode;
|
||||
CHECK(!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, flags,
|
||||
&result));
|
||||
CHECK_NULL(result.tree);
|
||||
@ -533,9 +533,9 @@ static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
|
||||
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
|
||||
RegExpCompileData compile_data;
|
||||
compile_data.compilation_target = RegExpCompilationTarget::kNative;
|
||||
JSRegExp::Flags flags = JSRegExp::kNone;
|
||||
if (multiline) flags = JSRegExp::kMultiline;
|
||||
if (unicode) flags = JSRegExp::kUnicode;
|
||||
RegExpFlags flags;
|
||||
if (multiline) flags |= RegExpFlag::kMultiline;
|
||||
if (unicode) flags |= RegExpFlag::kUnicode;
|
||||
if (!RegExpParser::ParseRegExpFromHeapString(isolate, zone, str, flags,
|
||||
&compile_data)) {
|
||||
return nullptr;
|
||||
@ -546,7 +546,8 @@ static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
|
||||
Handle<String> sample_subject = isolate->factory()
|
||||
->NewStringFromUtf8(base::CStrVector(""))
|
||||
.ToHandleChecked();
|
||||
RegExp::CompileForTesting(isolate, zone, &compile_data, flags, pattern,
|
||||
RegExp::CompileForTesting(isolate, zone, &compile_data,
|
||||
JSRegExp::AsJSRegExpFlags(flags), pattern,
|
||||
sample_subject, is_one_byte);
|
||||
return compile_data.node;
|
||||
}
|
||||
@ -638,7 +639,7 @@ static Handle<JSRegExp> CreateJSRegExp(Handle<String> source, Handle<Code> code,
|
||||
Handle<JSRegExp> regexp =
|
||||
Handle<JSRegExp>::cast(factory->NewJSObject(constructor));
|
||||
|
||||
factory->SetRegExpIrregexpData(regexp, source, JSRegExp::kNone, 0,
|
||||
factory->SetRegExpIrregexpData(regexp, source, {}, 0,
|
||||
JSRegExp::kNoBacktrackLimit);
|
||||
regexp->SetDataAt(is_unicode ? JSRegExp::kIrregexpUC16CodeIndex
|
||||
: JSRegExp::kIrregexpLatin1CodeIndex,
|
||||
|
Loading…
Reference in New Issue
Block a user