[regexp] Add support for dotAll flag

The dotAll flag changes behavior of the dot '.' character to match every
possible single character instead of excluding certain line terminators.

The implementation is staged behind --harmony-regexp-dotall.

Spec proposal: https://github.com/mathiasbynens/es-regexp-dotall-flag

BUG=v8:6172

Review-Url: https://codereview.chromium.org/2780173002
Cr-Commit-Position: refs/heads/master@{#44295}
This commit is contained in:
jgruber 2017-03-31 02:20:13 -07:00 committed by Commit bot
parent 85ff725cf4
commit cec39ad1ad
17 changed files with 316 additions and 13 deletions

View File

@ -4692,11 +4692,12 @@ class V8_EXPORT RegExp : public Object {
*/
enum Flags {
kNone = 0,
kGlobal = 1,
kIgnoreCase = 2,
kMultiline = 4,
kSticky = 8,
kUnicode = 16
kGlobal = 1 << 0,
kIgnoreCase = 1 << 1,
kMultiline = 1 << 2,
kSticky = 1 << 3,
kUnicode = 1 << 4,
kDotAll = 1 << 5,
};
/**

View File

@ -1228,6 +1228,11 @@ ExternalReference ExternalReference::address_of_regexp_stack_limit(
return ExternalReference(isolate->regexp_stack()->limit_address());
}
ExternalReference ExternalReference::address_of_regexp_dotall_flag(
Isolate* isolate) {
return ExternalReference(&FLAG_harmony_regexp_dotall);
}
ExternalReference ExternalReference::store_buffer_top(Isolate* isolate) {
return ExternalReference(isolate->heap()->store_buffer_top_address());
}

View File

@ -923,6 +923,9 @@ class ExternalReference BASE_EMBEDDED {
// Static variable RegExpStack::limit_address()
static ExternalReference address_of_regexp_stack_limit(Isolate* isolate);
// Direct access to FLAG_harmony_regexp_dotall.
static ExternalReference address_of_regexp_dotall_flag(Isolate* isolate);
// Static variables for RegExp.
static ExternalReference address_of_static_offsets_vector(Isolate* isolate);
static ExternalReference address_of_regexp_stack_memory_address(

View File

@ -4004,6 +4004,22 @@ void Genesis::InitializeGlobal_harmony_promise_finally() {
}
}
void Genesis::InitializeGlobal_harmony_regexp_dotall() {
if (!FLAG_harmony_regexp_dotall) return;
Handle<JSFunction> constructor(native_context()->regexp_function());
Handle<JSObject> prototype(JSObject::cast(constructor->instance_prototype()));
SimpleInstallGetter(prototype, isolate()->factory()->dotAll_string(),
Builtins::kRegExpPrototypeDotAllGetter, true);
// The regexp prototype map has changed because we added a property
// to it, so we update the saved map.
Handle<Map> prototype_map(prototype->map());
Map::SetShouldBeFastPrototypeMap(prototype_map, true, isolate());
native_context()->set_regexp_prototype_map(*prototype_map);
}
#ifdef V8_I18N_SUPPORT
void Genesis::InitializeGlobal_datetime_format_to_parts() {
if (!FLAG_datetime_format_to_parts) return;

View File

@ -737,6 +737,8 @@ namespace internal {
TFJ(RegExpPrototypeCompile, 2, kPattern, kFlags) \
/* ES #sec-regexp.prototype.exec */ \
TFJ(RegExpPrototypeExec, 1, kString) \
/* ES #sec-get-regexp.prototype.dotAll */ \
TFJ(RegExpPrototypeDotAllGetter, 0) \
/* ES #sec-get-regexp.prototype.flags */ \
TFJ(RegExpPrototypeFlagsGetter, 0) \
/* ES #sec-get-regexp.prototype.global */ \

View File

@ -781,6 +781,8 @@ Node* RegExpBuiltinsAssembler::FlagsGetter(Node* const context,
Variable var_length(this, MachineType::PointerRepresentation(), int_zero);
Variable var_flags(this, MachineType::PointerRepresentation());
Node* const is_dotall_enabled = IsDotAllEnabled(isolate);
// First, count the number of characters we will need and check which flags
// are set.
@ -802,6 +804,13 @@ Node* RegExpBuiltinsAssembler::FlagsGetter(Node* const context,
CASE_FOR_FLAG(JSRegExp::kGlobal);
CASE_FOR_FLAG(JSRegExp::kIgnoreCase);
CASE_FOR_FLAG(JSRegExp::kMultiline);
{
Label next(this);
GotoIfNot(is_dotall_enabled, &next);
CASE_FOR_FLAG(JSRegExp::kDotAll);
Goto(&next);
Bind(&next);
}
CASE_FOR_FLAG(JSRegExp::kUnicode);
CASE_FOR_FLAG(JSRegExp::kSticky);
#undef CASE_FOR_FLAG
@ -828,6 +837,13 @@ Node* RegExpBuiltinsAssembler::FlagsGetter(Node* const context,
CASE_FOR_FLAG("global", JSRegExp::kGlobal);
CASE_FOR_FLAG("ignoreCase", JSRegExp::kIgnoreCase);
CASE_FOR_FLAG("multiline", JSRegExp::kMultiline);
{
Label next(this);
GotoIfNot(is_dotall_enabled, &next);
CASE_FOR_FLAG("dotAll", JSRegExp::kDotAll);
Goto(&next);
Bind(&next);
}
CASE_FOR_FLAG("unicode", JSRegExp::kUnicode);
CASE_FOR_FLAG("sticky", JSRegExp::kSticky);
#undef CASE_FOR_FLAG
@ -859,6 +875,13 @@ Node* RegExpBuiltinsAssembler::FlagsGetter(Node* const context,
CASE_FOR_FLAG(JSRegExp::kGlobal, 'g');
CASE_FOR_FLAG(JSRegExp::kIgnoreCase, 'i');
CASE_FOR_FLAG(JSRegExp::kMultiline, 'm');
{
Label next(this);
GotoIfNot(is_dotall_enabled, &next);
CASE_FOR_FLAG(JSRegExp::kDotAll, 's');
Goto(&next);
Bind(&next);
}
CASE_FOR_FLAG(JSRegExp::kUnicode, 'u');
CASE_FOR_FLAG(JSRegExp::kSticky, 'y');
#undef CASE_FOR_FLAG
@ -1211,6 +1234,9 @@ Node* RegExpBuiltinsAssembler::SlowFlagGetter(Node* const context,
case JSRegExp::kMultiline:
name = factory->multiline_string();
break;
case JSRegExp::kDotAll:
UNREACHABLE(); // Never called for dotAll.
break;
case JSRegExp::kSticky:
name = factory->sticky_string();
break;
@ -1251,8 +1277,7 @@ Node* RegExpBuiltinsAssembler::FlagGetter(Node* const context,
}
void RegExpBuiltinsAssembler::FlagGetter(Node* context, Node* receiver,
JSRegExp::Flag flag,
v8::Isolate::UseCounterFeature counter,
JSRegExp::Flag flag, int counter,
const char* method_name) {
Isolate* isolate = this->isolate();
@ -1290,8 +1315,10 @@ void RegExpBuiltinsAssembler::FlagGetter(Node* context, Node* receiver,
Bind(&if_isprototype);
{
Node* const counter_smi = SmiConstant(Smi::FromInt(counter));
CallRuntime(Runtime::kIncrementUseCounter, context, counter_smi);
if (counter != -1) {
Node* const counter_smi = SmiConstant(Smi::FromInt(counter));
CallRuntime(Runtime::kIncrementUseCounter, context, counter_smi);
}
Return(UndefinedConstant());
}
@ -1338,6 +1365,23 @@ TF_BUILTIN(RegExpPrototypeMultilineGetter, RegExpBuiltinsAssembler) {
"RegExp.prototype.multiline");
}
Node* RegExpBuiltinsAssembler::IsDotAllEnabled(Isolate* isolate) {
Node* flag_ptr = ExternalConstant(
ExternalReference::address_of_regexp_dotall_flag(isolate));
Node* flag_value = Load(MachineType::IntPtr(), flag_ptr);
return WordNotEqual(flag_value, IntPtrConstant(0));
}
// ES #sec-get-regexp.prototype.dotAll
TF_BUILTIN(RegExpPrototypeDotAllGetter, RegExpBuiltinsAssembler) {
Node* context = Parameter(Descriptor::kContext);
Node* receiver = Parameter(Descriptor::kReceiver);
static const int kNoCounter = -1;
CSA_ASSERT(this, IsDotAllEnabled(isolate()));
FlagGetter(context, receiver, JSRegExp::kDotAll, kNoCounter,
"RegExp.prototype.dotAll");
}
// ES6 21.2.5.12.
// ES #sec-get-regexp.prototype.sticky
TF_BUILTIN(RegExpPrototypeStickyGetter, RegExpBuiltinsAssembler) {

View File

@ -72,8 +72,10 @@ class RegExpBuiltinsAssembler : public CodeStubAssembler {
Node* FlagGetter(Node* const context, Node* const regexp, JSRegExp::Flag flag,
bool is_fastpath);
void FlagGetter(Node* context, Node* receiver, JSRegExp::Flag flag,
v8::Isolate::UseCounterFeature counter,
const char* method_name);
int counter, const char* method_name);
// Utility method, remove once dotall is unstaged.
Node* IsDotAllEnabled(Isolate* isolate);
Node* IsRegExp(Node* const context, Node* const maybe_receiver);
Node* RegExpInitialize(Node* const context, Node* const regexp,

View File

@ -273,6 +273,9 @@ void ExternalReferenceTable::AddReferences(Isolate* isolate) {
Add(ExternalReference::debug_restart_fp_address(isolate).address(),
"Debug::restart_fp_address()");
Add(ExternalReference::address_of_regexp_dotall_flag(isolate).address(),
"FLAG_harmony_regexp_dotall");
#ifndef V8_INTERPRETED_REGEXP
Add(ExternalReference::re_case_insensitive_compare_uc16(isolate).address(),
"NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16()");

View File

@ -204,6 +204,7 @@ DEFINE_IMPLICATION(es_staging, move_object_start)
V(harmony_tailcalls, "harmony tail calls") \
V(harmony_sharedarraybuffer, "harmony sharedarraybuffer") \
V(harmony_do_expressions, "harmony do-expressions") \
V(harmony_regexp_dotall, "harmony regexp dotall flag") \
V(harmony_regexp_named_captures, "harmony regexp named captures") \
V(harmony_regexp_property, "harmony unicode regexp property classes") \
V(harmony_class_fields, "harmony public fields in class literals") \

View File

@ -54,6 +54,7 @@
V(did_handle_string, "didHandle") \
V(display_name_string, "displayName") \
V(done_string, "done") \
V(dotAll_string, "dotAll") \
V(dot_catch_string, ".catch") \
V(dot_for_string, ".for") \
V(dot_generator_object_string, ".generator_object") \

View File

@ -16144,6 +16144,13 @@ JSRegExp::Flags RegExpFlagsFromString(Handle<String> flags, bool* success) {
case 'm':
flag = JSRegExp::kMultiline;
break;
case 's':
if (FLAG_harmony_regexp_dotall) {
flag = JSRegExp::kDotAll;
} else {
return JSRegExp::Flags(0);
}
break;
case 'u':
flag = JSRegExp::kUnicode;
break;

View File

@ -8305,6 +8305,7 @@ class JSRegExp: public JSObject {
kMultiline = 1 << 2,
kSticky = 1 << 3,
kUnicode = 1 << 4,
kDotAll = 1 << 5,
};
typedef base::Flags<Flag> Flags;

View File

@ -1717,6 +1717,13 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
case 'm':
flag = RegExp::kMultiline;
break;
case 's':
if (FLAG_harmony_regexp_dotall) {
flag = RegExp::kDotAll;
} else {
return Nothing<RegExp::Flags>();
}
break;
case 'u':
flag = RegExp::kUnicode;
break;

View File

@ -29,6 +29,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
named_back_references_(NULL),
in_(in),
current_(kEndMarker),
dotall_(flags & JSRegExp::kDotAll),
ignore_case_(flags & JSRegExp::kIgnoreCase),
multiline_(flags & JSRegExp::kMultiline),
unicode_(flags & JSRegExp::kUnicode),
@ -40,6 +41,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
contains_anchor_(false),
is_scanned_for_captures_(false),
failed_(false) {
DCHECK_IMPLIES(dotall(), FLAG_harmony_regexp_dotall);
Advance();
}
@ -270,10 +272,18 @@ RegExpTree* RegExpParser::ParseDisjunction() {
}
case '.': {
Advance();
// everything except \x0a, \x0d, \u2028 and \u2029
ZoneList<CharacterRange>* ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
CharacterRange::AddClassEscape('.', ranges, false, zone());
if (dotall()) {
// Everything.
DCHECK(FLAG_harmony_regexp_dotall);
CharacterRange::AddClassEscape('*', ranges, false, zone());
} else {
// Everything except \x0a, \x0d, \u2028 and \u2029
CharacterRange::AddClassEscape('.', ranges, false, zone());
}
RegExpCharacterClass* cc =
new (zone()) RegExpCharacterClass(ranges, false);
builder->AddCharacterClass(cc);

View File

@ -199,6 +199,7 @@ class RegExpParser BASE_EMBEDDED {
int captures_started() { return captures_started_; }
int position() { return next_pos_ - 1; }
bool failed() { return failed_; }
bool dotall() const { return dotall_; }
bool ignore_case() const { return ignore_case_; }
bool multiline() const { return multiline_; }
bool unicode() const { return unicode_; }
@ -312,6 +313,7 @@ class RegExpParser BASE_EMBEDDED {
ZoneList<RegExpBackReference*>* named_back_references_;
FlatStringReader* in_;
uc32 current_;
bool dotall_;
bool ignore_case_;
bool multiline_;
bool unicode_;

View File

@ -0,0 +1,69 @@
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// This tests that RegExp dotall features are not enabled when
// --harmony-regexp-dotall is not passed.
// Construction does not throw.
{
assertThrows("/./s", SyntaxError);
assertThrows(() => RegExp(".", "s"), SyntaxError);
assertThrows(() => new RegExp(".", "s"), SyntaxError);
assertThrows(() => new RegExp(".", "wtf"), SyntaxError);
}
// The flags accessors.
{
let re = /./gimyu;
assertEquals("gimuy", re.flags);
assertTrue(re.global);
assertTrue(re.ignoreCase);
assertTrue(re.multiline);
assertTrue(re.sticky);
assertTrue(re.unicode);
assertEquals(re.dotAll, undefined);
assertFalse("dotAll" in re);
let callCount = 0;
re.__defineGetter__("dotAll", () => { callCount++; return undefined; });
assertEquals("gimuy", re.flags);
assertEquals(callCount, 0);
}
// Default '.' behavior.
{
let re = /^.$/;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertFalse(re.test("\u{10300}")); // Supplementary plane.
assertFalse(re.test("\n"));
assertFalse(re.test("\r"));
assertFalse(re.test("\u2028"));
assertFalse(re.test("\u2029"));
}
// Default '.' behavior (unicode).
{
let re = /^.$/u;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertTrue(re.test("\u{10300}")); // Supplementary plane.
assertFalse(re.test("\n"));
assertFalse(re.test("\r"));
assertFalse(re.test("\u2028"));
assertFalse(re.test("\u2029"));
}

View File

@ -0,0 +1,129 @@
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-dotall
function toSlowMode(re) {
re.exec = (str) => RegExp.prototype.exec.call(re, str);
return re;
}
// Construction does not throw.
{
let re = /./s;
re = RegExp(".", "s");
re = new RegExp(".", "s");
assertThrows(() => new RegExp(".", "wtf"), SyntaxError);
}
// The flags accessors.
{
let re = /./s;
assertEquals("s", re.flags);
assertFalse(re.global);
assertFalse(re.ignoreCase);
assertFalse(re.multiline);
assertFalse(re.sticky);
assertFalse(re.unicode);
assertTrue(re.dotAll);
re = toSlowMode(/./s);
assertEquals("s", re.flags);
assertFalse(re.global);
assertFalse(re.ignoreCase);
assertFalse(re.multiline);
assertFalse(re.sticky);
assertFalse(re.unicode);
assertTrue(re.dotAll);
re = /./gimyus;
assertEquals("gimsuy", re.flags);
assertTrue(re.global);
assertTrue(re.ignoreCase);
assertTrue(re.multiline);
assertTrue(re.sticky);
assertTrue(re.unicode);
assertTrue(re.dotAll);
re = /./gimyu;
assertEquals("gimuy", re.flags);
assertTrue(re.global);
assertTrue(re.ignoreCase);
assertTrue(re.multiline);
assertTrue(re.sticky);
assertTrue(re.unicode);
assertFalse(re.dotAll);
}
// Default '.' behavior.
{
let re = /^.$/;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertFalse(re.test("\u{10300}")); // Supplementary plane.
assertFalse(re.test("\n"));
assertFalse(re.test("\r"));
assertFalse(re.test("\u2028"));
assertFalse(re.test("\u2029"));
}
// Default '.' behavior (unicode).
{
let re = /^.$/u;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertTrue(re.test("\u{10300}")); // Supplementary plane.
assertFalse(re.test("\n"));
assertFalse(re.test("\r"));
assertFalse(re.test("\u2028"));
assertFalse(re.test("\u2029"));
}
// DotAll '.' behavior.
{
let re = /^.$/s;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertFalse(re.test("\u{10300}")); // Supplementary plane.
assertTrue(re.test("\n"));
assertTrue(re.test("\r"));
assertTrue(re.test("\u2028"));
assertTrue(re.test("\u2029"));
}
// DotAll '.' behavior (unicode).
{
let re = /^.$/su;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertTrue(re.test("\u{10300}")); // Supplementary plane.
assertTrue(re.test("\n"));
assertTrue(re.test("\r"));
assertTrue(re.test("\u2028"));
assertTrue(re.test("\u2029"));
}