[regexp] Add UseCounter for invalid regexp with /v, but valid with /u
Some patterns that were valid with /u are invalid with /v. This CL adds a UseCounter for such usages in /u to get an idea how often they are used in the wild. This is important information w.r.t the proposal to use /v instead of /u for the pattern attribute (http://go/gh/whatwg/html/pull/7908). Chromium CL: https://crrev.com/c/4221395 Bug: v8:11935 Change-Id: Idc023ceba9ce03eee578d6c387ce8a8f37db292f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4212393 Reviewed-by: Camillo Bruni <cbruni@chromium.org> Reviewed-by: Jakob Linke <jgruber@chromium.org> Reviewed-by: Mathias Bynens <mathias@chromium.org> Commit-Queue: Patrick Thier <pthier@chromium.org> Cr-Commit-Position: refs/heads/main@{#85639}
This commit is contained in:
parent
d3a3d73384
commit
5080c7727a
@ -538,6 +538,7 @@ class V8_EXPORT Isolate {
|
|||||||
kAsyncStackTaggingCreateTaskCall = 116,
|
kAsyncStackTaggingCreateTaskCall = 116,
|
||||||
kDurationFormat = 117,
|
kDurationFormat = 117,
|
||||||
kInvalidatedNumberStringPrototypeNoReplaceProtector = 118,
|
kInvalidatedNumberStringPrototypeNoReplaceProtector = 118,
|
||||||
|
kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode = 119,
|
||||||
|
|
||||||
// If you add new values here, you'll also need to update Chromium's:
|
// If you add new values here, you'll also need to update Chromium's:
|
||||||
// web_feature.mojom, use_counter_callback.cc, and enums.xml. V8 changes to
|
// web_feature.mojom, use_counter_callback.cc, and enums.xml. V8 changes to
|
||||||
|
@ -429,8 +429,8 @@ class RegExpParserState : public ZoneObject {
|
|||||||
template <class CharT>
|
template <class CharT>
|
||||||
class RegExpParserImpl final {
|
class RegExpParserImpl final {
|
||||||
private:
|
private:
|
||||||
RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags,
|
RegExpParserImpl(Isolate* isolate, const CharT* input, int input_length,
|
||||||
uintptr_t stack_limit, Zone* zone,
|
RegExpFlags flags, uintptr_t stack_limit, Zone* zone,
|
||||||
const DisallowGarbageCollection& no_gc);
|
const DisallowGarbageCollection& no_gc);
|
||||||
|
|
||||||
bool Parse(RegExpCompileData* result);
|
bool Parse(RegExpCompileData* result);
|
||||||
@ -563,6 +563,7 @@ class RegExpParserImpl final {
|
|||||||
bool HasNamedCaptures(InClassEscapeState in_class_escape_state);
|
bool HasNamedCaptures(InClassEscapeState in_class_escape_state);
|
||||||
|
|
||||||
Zone* zone() const { return zone_; }
|
Zone* zone() const { return zone_; }
|
||||||
|
Isolate* isolate() const { return isolate_; }
|
||||||
|
|
||||||
base::uc32 current() const { return current_; }
|
base::uc32 current() const { return current_; }
|
||||||
bool has_more() const { return has_more_; }
|
bool has_more() const { return has_more_; }
|
||||||
@ -603,6 +604,10 @@ class RegExpParserImpl final {
|
|||||||
|
|
||||||
const DisallowGarbageCollection no_gc_;
|
const DisallowGarbageCollection no_gc_;
|
||||||
Zone* const zone_;
|
Zone* const zone_;
|
||||||
|
// TODO(pthier, v8:11935): Isolate is only used to increment the UseCounter
|
||||||
|
// for unicode set incompabilities in unicode mode. Remove when the counter
|
||||||
|
// is removed.
|
||||||
|
Isolate* const isolate_;
|
||||||
RegExpError error_ = RegExpError::kNone;
|
RegExpError error_ = RegExpError::kNone;
|
||||||
int error_pos_ = 0;
|
int error_pos_ = 0;
|
||||||
ZoneList<RegExpCapture*>* captures_;
|
ZoneList<RegExpCapture*>* captures_;
|
||||||
@ -629,9 +634,10 @@ class RegExpParserImpl final {
|
|||||||
|
|
||||||
template <class CharT>
|
template <class CharT>
|
||||||
RegExpParserImpl<CharT>::RegExpParserImpl(
|
RegExpParserImpl<CharT>::RegExpParserImpl(
|
||||||
const CharT* input, int input_length, RegExpFlags flags,
|
Isolate* isolate, const CharT* input, int input_length, RegExpFlags flags,
|
||||||
uintptr_t stack_limit, Zone* zone, const DisallowGarbageCollection& no_gc)
|
uintptr_t stack_limit, Zone* zone, const DisallowGarbageCollection& no_gc)
|
||||||
: zone_(zone),
|
: zone_(zone),
|
||||||
|
isolate_(isolate),
|
||||||
captures_(nullptr),
|
captures_(nullptr),
|
||||||
named_captures_(nullptr),
|
named_captures_(nullptr),
|
||||||
named_back_references_(nullptr),
|
named_back_references_(nullptr),
|
||||||
@ -2417,6 +2423,21 @@ void RegExpParserImpl<CharT>::ParseClassEscape(
|
|||||||
if (current() != '\\') {
|
if (current() != '\\') {
|
||||||
// Not a ClassEscape.
|
// Not a ClassEscape.
|
||||||
*char_out = current();
|
*char_out = current();
|
||||||
|
// Count usages of patterns that would break when replacing /u with /v.
|
||||||
|
// This is only temporarily enabled and should give us an idea if it is
|
||||||
|
// feasible to enable unicode sets for usage in the pattern attribute.
|
||||||
|
// TODO(pthier, v8:11935): Remove for M113.
|
||||||
|
// IsUnicodeMode() is true for both /u and /v, but this method is only
|
||||||
|
// called for /u.
|
||||||
|
if (IsUnicodeMode() && isolate() != nullptr) {
|
||||||
|
const bool unicode_sets_invalid =
|
||||||
|
IsClassSetSyntaxCharacter(*char_out) ||
|
||||||
|
IsClassSetReservedDoublePunctuator(*char_out);
|
||||||
|
if (unicode_sets_invalid) {
|
||||||
|
isolate()->CountUsage(
|
||||||
|
v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode);
|
||||||
|
}
|
||||||
|
}
|
||||||
Advance();
|
Advance();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -3113,13 +3134,13 @@ bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
|
|||||||
String::FlatContent content = input->GetFlatContent(no_gc);
|
String::FlatContent content = input->GetFlatContent(no_gc);
|
||||||
if (content.IsOneByte()) {
|
if (content.IsOneByte()) {
|
||||||
base::Vector<const uint8_t> v = content.ToOneByteVector();
|
base::Vector<const uint8_t> v = content.ToOneByteVector();
|
||||||
return RegExpParserImpl<uint8_t>{v.begin(), v.length(), flags,
|
return RegExpParserImpl<uint8_t>{isolate, v.begin(), v.length(), flags,
|
||||||
stack_limit, zone, no_gc}
|
stack_limit, zone, no_gc}
|
||||||
.Parse(result);
|
.Parse(result);
|
||||||
} else {
|
} else {
|
||||||
base::Vector<const base::uc16> v = content.ToUC16Vector();
|
base::Vector<const base::uc16> v = content.ToUC16Vector();
|
||||||
return RegExpParserImpl<base::uc16>{v.begin(), v.length(), flags,
|
return RegExpParserImpl<base::uc16>{
|
||||||
stack_limit, zone, no_gc}
|
isolate, v.begin(), v.length(), flags, stack_limit, zone, no_gc}
|
||||||
.Parse(result);
|
.Parse(result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3131,8 +3152,14 @@ bool RegExpParser::VerifyRegExpSyntax(Zone* zone, uintptr_t stack_limit,
|
|||||||
RegExpFlags flags,
|
RegExpFlags flags,
|
||||||
RegExpCompileData* result,
|
RegExpCompileData* result,
|
||||||
const DisallowGarbageCollection& no_gc) {
|
const DisallowGarbageCollection& no_gc) {
|
||||||
return RegExpParserImpl<CharT>{input, input_length, flags,
|
// TODO(pthier, v8:11935): Isolate is only temporarily used to increment the
|
||||||
stack_limit, zone, no_gc}
|
// UseCounter for unicode set incompabilities in unicode mode.
|
||||||
|
// This method is only used in the parser for early-errors. To avoid passing
|
||||||
|
// the isolate through we simply pass a nullptr. This also has the positive
|
||||||
|
// side-effect of not incrementing the UseCounter multiple times.
|
||||||
|
Isolate* isolate = nullptr;
|
||||||
|
return RegExpParserImpl<CharT>{isolate, input, input_length, flags,
|
||||||
|
stack_limit, zone, no_gc}
|
||||||
.Parse(result);
|
.Parse(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1654,6 +1654,42 @@ void MockUseCounterCallback(v8::Isolate* isolate,
|
|||||||
v8::Isolate::UseCounterFeature feature) {
|
v8::Isolate::UseCounterFeature feature) {
|
||||||
++global_use_counts[feature];
|
++global_use_counts[feature];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CheckRegExpUnicodeSetIncompatibilitiesUseCounter(
|
||||||
|
v8::Isolate* isolate, const char* check_pattern) {
|
||||||
|
int* use_counts = global_use_counts;
|
||||||
|
int old_count = use_counts
|
||||||
|
[v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode];
|
||||||
|
Local<v8::Context> context = isolate->GetCurrentContext();
|
||||||
|
{
|
||||||
|
v8_flags.harmony_regexp_unicode_sets = true;
|
||||||
|
std::ostringstream os;
|
||||||
|
os << "/[" << check_pattern << "]/v";
|
||||||
|
Local<v8::String> v8_source =
|
||||||
|
v8::String::NewFromUtf8(isolate, os.str().c_str()).ToLocalChecked();
|
||||||
|
MaybeLocal<v8::Script> script = v8::Script::Compile(context, v8_source);
|
||||||
|
CHECK(script.IsEmpty());
|
||||||
|
CHECK_EQ(
|
||||||
|
old_count,
|
||||||
|
use_counts
|
||||||
|
[v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode]);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
std::ostringstream os;
|
||||||
|
os << "/[" << check_pattern << "]/u";
|
||||||
|
Local<v8::String> v8_source =
|
||||||
|
v8::String::NewFromUtf8(isolate, os.str().c_str()).ToLocalChecked();
|
||||||
|
MaybeLocal<v8::Script> script = v8::Script::Compile(context, v8_source);
|
||||||
|
Local<v8::Value> result =
|
||||||
|
script.ToLocalChecked()->Run(context).ToLocalChecked();
|
||||||
|
CHECK(result->IsRegExp());
|
||||||
|
CHECK_EQ(
|
||||||
|
old_count + 1,
|
||||||
|
use_counts
|
||||||
|
[v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
using RegExpTestWithContext = TestWithContext;
|
using RegExpTestWithContext = TestWithContext;
|
||||||
@ -1720,6 +1756,14 @@ TEST_F(RegExpTestWithContext, UseCountRegExp) {
|
|||||||
CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
|
CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
|
||||||
CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
|
CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
|
||||||
CHECK(resultToStringError->IsObject());
|
CHECK(resultToStringError->IsObject());
|
||||||
|
|
||||||
|
const char* incompatible_patterns[] = {
|
||||||
|
"(", ")", "[", "{", "}", "/", "-", "|", "&&",
|
||||||
|
"!!", "##", "$$", "%%", "**", "++", ",,", "..", "::",
|
||||||
|
";;", "<<", "==", ">>", "??", "@@", "^^^", "``", "~~"};
|
||||||
|
for (auto pattern : incompatible_patterns) {
|
||||||
|
CheckRegExpUnicodeSetIncompatibilitiesUseCounter(v8_isolate(), pattern);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class UncachedExternalString
|
class UncachedExternalString
|
||||||
|
Loading…
Reference in New Issue
Block a user