diff --git a/include/v8-isolate.h b/include/v8-isolate.h index 4571b2c3b7..8e93cd6b70 100644 --- a/include/v8-isolate.h +++ b/include/v8-isolate.h @@ -538,6 +538,7 @@ class V8_EXPORT Isolate { kAsyncStackTaggingCreateTaskCall = 116, kDurationFormat = 117, kInvalidatedNumberStringPrototypeNoReplaceProtector = 118, + kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode = 119, // If you add new values here, you'll also need to update Chromium's: // web_feature.mojom, use_counter_callback.cc, and enums.xml. V8 changes to diff --git a/src/regexp/regexp-parser.cc b/src/regexp/regexp-parser.cc index fff26b4a58..06e612321b 100644 --- a/src/regexp/regexp-parser.cc +++ b/src/regexp/regexp-parser.cc @@ -429,8 +429,8 @@ class RegExpParserState : public ZoneObject { template class RegExpParserImpl final { private: - RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags, - uintptr_t stack_limit, Zone* zone, + RegExpParserImpl(Isolate* isolate, const CharT* input, int input_length, + RegExpFlags flags, uintptr_t stack_limit, Zone* zone, const DisallowGarbageCollection& no_gc); bool Parse(RegExpCompileData* result); @@ -563,6 +563,7 @@ class RegExpParserImpl final { bool HasNamedCaptures(InClassEscapeState in_class_escape_state); Zone* zone() const { return zone_; } + Isolate* isolate() const { return isolate_; } base::uc32 current() const { return current_; } bool has_more() const { return has_more_; } @@ -603,6 +604,10 @@ class RegExpParserImpl final { const DisallowGarbageCollection no_gc_; Zone* const zone_; + // TODO(pthier, v8:11935): Isolate is only used to increment the UseCounter + // for unicode set incompabilities in unicode mode. Remove when the counter + // is removed. + Isolate* const isolate_; RegExpError error_ = RegExpError::kNone; int error_pos_ = 0; ZoneList* captures_; @@ -629,9 +634,10 @@ class RegExpParserImpl final { template RegExpParserImpl::RegExpParserImpl( - const CharT* input, int input_length, RegExpFlags flags, + Isolate* isolate, const CharT* input, int input_length, RegExpFlags flags, uintptr_t stack_limit, Zone* zone, const DisallowGarbageCollection& no_gc) : zone_(zone), + isolate_(isolate), captures_(nullptr), named_captures_(nullptr), named_back_references_(nullptr), @@ -2417,6 +2423,21 @@ void RegExpParserImpl::ParseClassEscape( if (current() != '\\') { // Not a ClassEscape. *char_out = current(); + // Count usages of patterns that would break when replacing /u with /v. + // This is only temporarily enabled and should give us an idea if it is + // feasible to enable unicode sets for usage in the pattern attribute. + // TODO(pthier, v8:11935): Remove for M113. + // IsUnicodeMode() is true for both /u and /v, but this method is only + // called for /u. + if (IsUnicodeMode() && isolate() != nullptr) { + const bool unicode_sets_invalid = + IsClassSetSyntaxCharacter(*char_out) || + IsClassSetReservedDoublePunctuator(*char_out); + if (unicode_sets_invalid) { + isolate()->CountUsage( + v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode); + } + } Advance(); return; } @@ -3113,13 +3134,13 @@ bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone, String::FlatContent content = input->GetFlatContent(no_gc); if (content.IsOneByte()) { base::Vector v = content.ToOneByteVector(); - return RegExpParserImpl{v.begin(), v.length(), flags, - stack_limit, zone, no_gc} + return RegExpParserImpl{isolate, v.begin(), v.length(), flags, + stack_limit, zone, no_gc} .Parse(result); } else { base::Vector v = content.ToUC16Vector(); - return RegExpParserImpl{v.begin(), v.length(), flags, - stack_limit, zone, no_gc} + return RegExpParserImpl{ + isolate, v.begin(), v.length(), flags, stack_limit, zone, no_gc} .Parse(result); } } @@ -3131,8 +3152,14 @@ bool RegExpParser::VerifyRegExpSyntax(Zone* zone, uintptr_t stack_limit, RegExpFlags flags, RegExpCompileData* result, const DisallowGarbageCollection& no_gc) { - return RegExpParserImpl{input, input_length, flags, - stack_limit, zone, no_gc} + // TODO(pthier, v8:11935): Isolate is only temporarily used to increment the + // UseCounter for unicode set incompabilities in unicode mode. + // This method is only used in the parser for early-errors. To avoid passing + // the isolate through we simply pass a nullptr. This also has the positive + // side-effect of not incrementing the UseCounter multiple times. + Isolate* isolate = nullptr; + return RegExpParserImpl{isolate, input, input_length, flags, + stack_limit, zone, no_gc} .Parse(result); } diff --git a/test/unittests/regexp/regexp-unittest.cc b/test/unittests/regexp/regexp-unittest.cc index dcdee75927..f42b455b39 100644 --- a/test/unittests/regexp/regexp-unittest.cc +++ b/test/unittests/regexp/regexp-unittest.cc @@ -1654,6 +1654,42 @@ void MockUseCounterCallback(v8::Isolate* isolate, v8::Isolate::UseCounterFeature feature) { ++global_use_counts[feature]; } + +void CheckRegExpUnicodeSetIncompatibilitiesUseCounter( + v8::Isolate* isolate, const char* check_pattern) { + int* use_counts = global_use_counts; + int old_count = use_counts + [v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode]; + Local context = isolate->GetCurrentContext(); + { + v8_flags.harmony_regexp_unicode_sets = true; + std::ostringstream os; + os << "/[" << check_pattern << "]/v"; + Local v8_source = + v8::String::NewFromUtf8(isolate, os.str().c_str()).ToLocalChecked(); + MaybeLocal script = v8::Script::Compile(context, v8_source); + CHECK(script.IsEmpty()); + CHECK_EQ( + old_count, + use_counts + [v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode]); + } + { + std::ostringstream os; + os << "/[" << check_pattern << "]/u"; + Local v8_source = + v8::String::NewFromUtf8(isolate, os.str().c_str()).ToLocalChecked(); + MaybeLocal script = v8::Script::Compile(context, v8_source); + Local result = + script.ToLocalChecked()->Run(context).ToLocalChecked(); + CHECK(result->IsRegExp()); + CHECK_EQ( + old_count + 1, + use_counts + [v8::Isolate::kRegExpUnicodeSetIncompatibilitiesWithUnicodeMode]); + } +} + } // namespace using RegExpTestWithContext = TestWithContext; @@ -1720,6 +1756,14 @@ TEST_F(RegExpTestWithContext, UseCountRegExp) { CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]); CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]); CHECK(resultToStringError->IsObject()); + + const char* incompatible_patterns[] = { + "(", ")", "[", "{", "}", "/", "-", "|", "&&", + "!!", "##", "$$", "%%", "**", "++", ",,", "..", "::", + ";;", "<<", "==", ">>", "??", "@@", "^^^", "``", "~~"}; + for (auto pattern : incompatible_patterns) { + CheckRegExpUnicodeSetIncompatibilitiesUseCounter(v8_isolate(), pattern); + } } class UncachedExternalString