[regexp] Add UseCounter for invalid regexp with /v, but valid with /u
Some patterns that were valid with /u are invalid with /v. This CL adds a UseCounter for such usages in /u to get an idea how often they are used in the wild. This is important information w.r.t the proposal to use /v instead of /u for the pattern attribute (http://go/gh/whatwg/html/pull/7908). Chromium CL: https://crrev.com/c/4221395 Bug: v8:11935 Change-Id: Idc023ceba9ce03eee578d6c387ce8a8f37db292f Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/4212393 Reviewed-by: Camillo Bruni <cbruni@chromium.org> Reviewed-by: Jakob Linke <jgruber@chromium.org> Reviewed-by: Mathias Bynens <mathias@chromium.org> Commit-Queue: Patrick Thier <pthier@chromium.org> Cr-Commit-Position: refs/heads/main@{#85639}
This commit is contained in:
parent
d3a3d73384
commit
5080c7727a
@ -538,6 +538,7 @@ class V8_EXPORT Isolate {
|
||||
kAsyncStackTaggingCreateTaskCall = 116,
|
||||
kDurationFormat = 117,
|
||||
kInvalidatedNumberStringPrototypeNoReplaceProtector = 118,
|
||||
kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode = 119,
|
||||
|
||||
// If you add new values here, you'll also need to update Chromium's:
|
||||
// web_feature.mojom, use_counter_callback.cc, and enums.xml. V8 changes to
|
||||
|
@ -429,8 +429,8 @@ class RegExpParserState : public ZoneObject {
|
||||
template <class CharT>
|
||||
class RegExpParserImpl final {
|
||||
private:
|
||||
RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags,
|
||||
uintptr_t stack_limit, Zone* zone,
|
||||
RegExpParserImpl(Isolate* isolate, const CharT* input, int input_length,
|
||||
RegExpFlags flags, uintptr_t stack_limit, Zone* zone,
|
||||
const DisallowGarbageCollection& no_gc);
|
||||
|
||||
bool Parse(RegExpCompileData* result);
|
||||
@ -563,6 +563,7 @@ class RegExpParserImpl final {
|
||||
bool HasNamedCaptures(InClassEscapeState in_class_escape_state);
|
||||
|
||||
Zone* zone() const { return zone_; }
|
||||
Isolate* isolate() const { return isolate_; }
|
||||
|
||||
base::uc32 current() const { return current_; }
|
||||
bool has_more() const { return has_more_; }
|
||||
@ -603,6 +604,10 @@ class RegExpParserImpl final {
|
||||
|
||||
const DisallowGarbageCollection no_gc_;
|
||||
Zone* const zone_;
|
||||
// TODO(pthier, v8:11935): Isolate is only used to increment the UseCounter
|
||||
// for unicode set incompabilities in unicode mode. Remove when the counter
|
||||
// is removed.
|
||||
Isolate* const isolate_;
|
||||
RegExpError error_ = RegExpError::kNone;
|
||||
int error_pos_ = 0;
|
||||
ZoneList<RegExpCapture*>* captures_;
|
||||
@ -629,9 +634,10 @@ class RegExpParserImpl final {
|
||||
|
||||
template <class CharT>
|
||||
RegExpParserImpl<CharT>::RegExpParserImpl(
|
||||
const CharT* input, int input_length, RegExpFlags flags,
|
||||
Isolate* isolate, const CharT* input, int input_length, RegExpFlags flags,
|
||||
uintptr_t stack_limit, Zone* zone, const DisallowGarbageCollection& no_gc)
|
||||
: zone_(zone),
|
||||
isolate_(isolate),
|
||||
captures_(nullptr),
|
||||
named_captures_(nullptr),
|
||||
named_back_references_(nullptr),
|
||||
@ -2417,6 +2423,21 @@ void RegExpParserImpl<CharT>::ParseClassEscape(
|
||||
if (current() != '\\') {
|
||||
// Not a ClassEscape.
|
||||
*char_out = current();
|
||||
// Count usages of patterns that would break when replacing /u with /v.
|
||||
// This is only temporarily enabled and should give us an idea if it is
|
||||
// feasible to enable unicode sets for usage in the pattern attribute.
|
||||
// TODO(pthier, v8:11935): Remove for M113.
|
||||
// IsUnicodeMode() is true for both /u and /v, but this method is only
|
||||
// called for /u.
|
||||
if (IsUnicodeMode() && isolate() != nullptr) {
|
||||
const bool unicode_sets_invalid =
|
||||
IsClassSetSyntaxCharacter(*char_out) ||
|
||||
IsClassSetReservedDoublePunctuator(*char_out);
|
||||
if (unicode_sets_invalid) {
|
||||
isolate()->CountUsage(
|
||||
v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode);
|
||||
}
|
||||
}
|
||||
Advance();
|
||||
return;
|
||||
}
|
||||
@ -3113,13 +3134,13 @@ bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
|
||||
String::FlatContent content = input->GetFlatContent(no_gc);
|
||||
if (content.IsOneByte()) {
|
||||
base::Vector<const uint8_t> v = content.ToOneByteVector();
|
||||
return RegExpParserImpl<uint8_t>{v.begin(), v.length(), flags,
|
||||
stack_limit, zone, no_gc}
|
||||
return RegExpParserImpl<uint8_t>{isolate, v.begin(), v.length(), flags,
|
||||
stack_limit, zone, no_gc}
|
||||
.Parse(result);
|
||||
} else {
|
||||
base::Vector<const base::uc16> v = content.ToUC16Vector();
|
||||
return RegExpParserImpl<base::uc16>{v.begin(), v.length(), flags,
|
||||
stack_limit, zone, no_gc}
|
||||
return RegExpParserImpl<base::uc16>{
|
||||
isolate, v.begin(), v.length(), flags, stack_limit, zone, no_gc}
|
||||
.Parse(result);
|
||||
}
|
||||
}
|
||||
@ -3131,8 +3152,14 @@ bool RegExpParser::VerifyRegExpSyntax(Zone* zone, uintptr_t stack_limit,
|
||||
RegExpFlags flags,
|
||||
RegExpCompileData* result,
|
||||
const DisallowGarbageCollection& no_gc) {
|
||||
return RegExpParserImpl<CharT>{input, input_length, flags,
|
||||
stack_limit, zone, no_gc}
|
||||
// TODO(pthier, v8:11935): Isolate is only temporarily used to increment the
|
||||
// UseCounter for unicode set incompabilities in unicode mode.
|
||||
// This method is only used in the parser for early-errors. To avoid passing
|
||||
// the isolate through we simply pass a nullptr. This also has the positive
|
||||
// side-effect of not incrementing the UseCounter multiple times.
|
||||
Isolate* isolate = nullptr;
|
||||
return RegExpParserImpl<CharT>{isolate, input, input_length, flags,
|
||||
stack_limit, zone, no_gc}
|
||||
.Parse(result);
|
||||
}
|
||||
|
||||
|
@ -1654,6 +1654,42 @@ void MockUseCounterCallback(v8::Isolate* isolate,
|
||||
v8::Isolate::UseCounterFeature feature) {
|
||||
++global_use_counts[feature];
|
||||
}
|
||||
|
||||
void CheckRegExpUnicodeSetIncompatibilitiesUseCounter(
|
||||
v8::Isolate* isolate, const char* check_pattern) {
|
||||
int* use_counts = global_use_counts;
|
||||
int old_count = use_counts
|
||||
[v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode];
|
||||
Local<v8::Context> context = isolate->GetCurrentContext();
|
||||
{
|
||||
v8_flags.harmony_regexp_unicode_sets = true;
|
||||
std::ostringstream os;
|
||||
os << "/[" << check_pattern << "]/v";
|
||||
Local<v8::String> v8_source =
|
||||
v8::String::NewFromUtf8(isolate, os.str().c_str()).ToLocalChecked();
|
||||
MaybeLocal<v8::Script> script = v8::Script::Compile(context, v8_source);
|
||||
CHECK(script.IsEmpty());
|
||||
CHECK_EQ(
|
||||
old_count,
|
||||
use_counts
|
||||
[v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode]);
|
||||
}
|
||||
{
|
||||
std::ostringstream os;
|
||||
os << "/[" << check_pattern << "]/u";
|
||||
Local<v8::String> v8_source =
|
||||
v8::String::NewFromUtf8(isolate, os.str().c_str()).ToLocalChecked();
|
||||
MaybeLocal<v8::Script> script = v8::Script::Compile(context, v8_source);
|
||||
Local<v8::Value> result =
|
||||
script.ToLocalChecked()->Run(context).ToLocalChecked();
|
||||
CHECK(result->IsRegExp());
|
||||
CHECK_EQ(
|
||||
old_count + 1,
|
||||
use_counts
|
||||
[v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
using RegExpTestWithContext = TestWithContext;
|
||||
@ -1720,6 +1756,14 @@ TEST_F(RegExpTestWithContext, UseCountRegExp) {
|
||||
CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
|
||||
CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
|
||||
CHECK(resultToStringError->IsObject());
|
||||
|
||||
const char* incompatible_patterns[] = {
|
||||
"(", ")", "[", "{", "}", "/", "-", "|", "&&",
|
||||
"!!", "##", "$$", "%%", "**", "++", ",,", "..", "::",
|
||||
";;", "<<", "==", ">>", "??", "@@", "^^^", "``", "~~"};
|
||||
for (auto pattern : incompatible_patterns) {
|
||||
CheckRegExpUnicodeSetIncompatibilitiesUseCounter(v8_isolate(), pattern);
|
||||
}
|
||||
}
|
||||
|
||||
class UncachedExternalString
|
||||
|
Loading…
Reference in New Issue
Block a user