diff --git a/src/bootstrapper.cc b/src/bootstrapper.cc index af31309698..8da4d864cd 100644 --- a/src/bootstrapper.cc +++ b/src/bootstrapper.cc @@ -2609,19 +2609,9 @@ void Genesis::InitializeGlobal(Handle global_object, writable, Representation::Tagged()); initial_map->AppendDescriptor(isolate(), &d); - { // Internal: RegExpInternalMatch - Handle function = - SimpleCreateFunction(isolate_, isolate_->factory()->empty_string(), - Builtins::kRegExpInternalMatch, 2, true); - native_context()->set(Context::REGEXP_INTERNAL_MATCH, *function); - } - - // Create the last match info. One for external use, and one for internal - // use when we don't want to modify the externally visible match info. + // Create the last match info. Handle last_match_info = factory->NewRegExpMatchInfo(); native_context()->set_regexp_last_match_info(*last_match_info); - Handle internal_match_info = factory->NewRegExpMatchInfo(); - native_context()->set_regexp_internal_match_info(*internal_match_info); // Force the RegExp constructor to fast properties, so that we can use the // fast paths for various things like diff --git a/src/builtins/builtins-definitions.h b/src/builtins/builtins-definitions.h index ee40142bbe..3490f9c8e8 100644 --- a/src/builtins/builtins-definitions.h +++ b/src/builtins/builtins-definitions.h @@ -937,7 +937,6 @@ namespace internal { CPP(RegExpCapture9Getter) \ /* ES #sec-regexp-pattern-flags */ \ TFJ(RegExpConstructor, 2, kReceiver, kPattern, kFlags) \ - TFJ(RegExpInternalMatch, 2, kReceiver, kRegExp, kString) \ CPP(RegExpInputGetter) \ CPP(RegExpInputSetter) \ CPP(RegExpLastMatchGetter) \ diff --git a/src/builtins/builtins-regexp-gen.cc b/src/builtins/builtins-regexp-gen.cc index 97c3779bf9..c59063c71f 100644 --- a/src/builtins/builtins-regexp-gen.cc +++ b/src/builtins/builtins-regexp-gen.cc @@ -3091,30 +3091,6 @@ TF_BUILTIN(RegExpPrototypeReplace, RegExpBuiltinsAssembler) { string, replace_value)); } -// Simple string matching functionality for internal use which does not modify -// the last match info. -TF_BUILTIN(RegExpInternalMatch, RegExpBuiltinsAssembler) { - TNode regexp = CAST(Parameter(Descriptor::kRegExp)); - TNode string = CAST(Parameter(Descriptor::kString)); - TNode context = CAST(Parameter(Descriptor::kContext)); - - TNode native_context = LoadNativeContext(context); - TNode internal_match_info = CAST(LoadContextElement( - native_context, Context::REGEXP_INTERNAL_MATCH_INFO_INDEX)); - TNode maybe_match_indices = - CAST(CallBuiltin(Builtins::kRegExpExecInternal, context, regexp, string, - SmiZero(), internal_match_info)); - TNode null = NullConstant(); - Label if_matched(this); - GotoIfNot(WordEqual(maybe_match_indices, null), &if_matched); - Return(null); - - BIND(&if_matched); - TNode match_indices = CAST(maybe_match_indices); - Return( - ConstructNewResultFromMatchInfo(context, regexp, match_indices, string)); -} - class RegExpStringIteratorAssembler : public RegExpBuiltinsAssembler { public: explicit RegExpStringIteratorAssembler(compiler::CodeAssemblerState* state) diff --git a/src/builtins/builtins-string-gen.cc b/src/builtins/builtins-string-gen.cc index acfa3ce707..e2db979452 100644 --- a/src/builtins/builtins-string-gen.cc +++ b/src/builtins/builtins-string-gen.cc @@ -2572,8 +2572,8 @@ class StringHtmlAssembler : public StringBuiltinsAssembler { const char* method_name, const char* tag_name, const char* attr, Node* const value) { Node* const string = ToThisString(context, receiver, method_name); - Node* const value_string = - EscapeQuotes(context, ToString_Inline(context, value)); + TNode value_string = + EscapeQuotes(CAST(context), ToString_Inline(context, value)); std::string open_tag_attr = "<" + std::string(tag_name) + " " + std::string(attr) + "=\""; std::string close_tag = ""; @@ -2593,20 +2593,8 @@ class StringHtmlAssembler : public StringBuiltinsAssembler { return var_result.value(); } - Node* EscapeQuotes(Node* const context, Node* const string) { - CSA_ASSERT(this, IsString(string)); - Node* const regexp_function = LoadContextElement( - LoadNativeContext(context), Context::REGEXP_FUNCTION_INDEX); - Node* const initial_map = LoadObjectField( - regexp_function, JSFunction::kPrototypeOrInitialMapOffset); - // TODO(pwong): Refactor to not allocate RegExp - Node* const regexp = - CallRuntime(Runtime::kRegExpInitializeAndCompile, context, - AllocateJSObjectFromMap(initial_map), StringConstant("\""), - StringConstant("g")); - - return CallRuntime(Runtime::kRegExpInternalReplace, context, regexp, string, - StringConstant(""")); + TNode EscapeQuotes(TNode context, TNode string) { + return CAST(CallRuntime(Runtime::kStringEscapeQuotes, context, string)); } }; diff --git a/src/contexts.h b/src/contexts.h index 57e67f52d7..fa36b76955 100644 --- a/src/contexts.h +++ b/src/contexts.h @@ -54,7 +54,6 @@ enum ContextLookupFlags { V(OBJECT_IS_FROZEN, JSFunction, object_is_frozen) \ V(OBJECT_IS_SEALED, JSFunction, object_is_sealed) \ V(OBJECT_KEYS, JSFunction, object_keys) \ - V(REGEXP_INTERNAL_MATCH, JSFunction, regexp_internal_match) \ V(REFLECT_APPLY_INDEX, JSFunction, reflect_apply) \ V(REFLECT_CONSTRUCT_INDEX, JSFunction, reflect_construct) \ V(REFLECT_DEFINE_PROPERTY_INDEX, JSFunction, reflect_define_property) \ @@ -234,8 +233,6 @@ enum ContextLookupFlags { V(REGEXP_EXEC_FUNCTION_INDEX, JSFunction, regexp_exec_function) \ V(REGEXP_FUNCTION_INDEX, JSFunction, regexp_function) \ V(REGEXP_LAST_MATCH_INFO_INDEX, RegExpMatchInfo, regexp_last_match_info) \ - V(REGEXP_INTERNAL_MATCH_INFO_INDEX, RegExpMatchInfo, \ - regexp_internal_match_info) \ V(REGEXP_PROTOTYPE_MAP_INDEX, Map, regexp_prototype_map) \ V(INITIAL_REGEXP_STRING_ITERATOR_PROTOTYPE_MAP_INDEX, Map, \ initial_regexp_string_iterator_prototype_map) \ diff --git a/src/debug/debug-evaluate.cc b/src/debug/debug-evaluate.cc index 2d533caa7d..1c60786c54 100644 --- a/src/debug/debug-evaluate.cc +++ b/src/debug/debug-evaluate.cc @@ -283,7 +283,6 @@ bool IntrinsicHasNoSideEffect(Runtime::FunctionId id) { V(ThrowReferenceError) \ V(ThrowSymbolIteratorInvalid) \ /* Strings */ \ - V(RegExpInternalReplace) \ V(StringIncludes) \ V(StringIndexOf) \ V(StringReplaceOneCharWithString) \ diff --git a/src/regexp/jsregexp.cc b/src/regexp/jsregexp.cc index 9e0d6109e4..84e44733ac 100644 --- a/src/regexp/jsregexp.cc +++ b/src/regexp/jsregexp.cc @@ -564,13 +564,8 @@ Handle RegExpImpl::SetLastMatchInfo( result->SetNumberOfCaptureRegisters(capture_register_count); if (*result != *last_match_info) { - // The match info has been reallocated, update the corresponding reference - // on the native context. - if (*last_match_info == *isolate->regexp_last_match_info()) { - isolate->native_context()->set_regexp_last_match_info(*result); - } else if (*last_match_info == *isolate->regexp_internal_match_info()) { - isolate->native_context()->set_regexp_internal_match_info(*result); - } + DCHECK_EQ(*last_match_info, *isolate->regexp_last_match_info()); + isolate->native_context()->set_regexp_last_match_info(*result); } DisallowHeapAllocation no_allocation; diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc index aa4c900838..43bbb4af95 100644 --- a/src/runtime/runtime-regexp.cc +++ b/src/runtime/runtime-regexp.cc @@ -791,33 +791,6 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString( return *answer; } -namespace { - -Object StringReplaceGlobalRegExpWithStringHelper( - Isolate* isolate, Handle regexp, Handle subject, - Handle replacement, Handle last_match_info) { - CHECK(regexp->GetFlags() & JSRegExp::kGlobal); - - subject = String::Flatten(isolate, subject); - - if (replacement->length() == 0) { - if (subject->HasOnlyOneByteChars()) { - return StringReplaceGlobalRegExpWithEmptyString( - isolate, subject, regexp, last_match_info); - } else { - return StringReplaceGlobalRegExpWithEmptyString( - isolate, subject, regexp, last_match_info); - } - } - - replacement = String::Flatten(isolate, replacement); - - return StringReplaceGlobalRegExpWithString(isolate, subject, regexp, - replacement, last_match_info); -} - -} // namespace - RUNTIME_FUNCTION(Runtime_StringSplit) { HandleScope handle_scope(isolate); DCHECK_EQ(3, args.length()); @@ -915,20 +888,6 @@ RUNTIME_FUNCTION(Runtime_RegExpExec) { index, last_match_info)); } -RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) { - HandleScope scope(isolate); - DCHECK_EQ(3, args.length()); - CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0); - CONVERT_ARG_HANDLE_CHECKED(String, subject, 1); - CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2); - - Handle internal_match_info = - isolate->regexp_internal_match_info(); - - return StringReplaceGlobalRegExpWithStringHelper( - isolate, regexp, subject, replacement, internal_match_info); -} - namespace { class MatchInfoBackedMatch : public String::Match { diff --git a/src/runtime/runtime-strings.cc b/src/runtime/runtime-strings.cc index 136ba7f759..1fa7c35625 100644 --- a/src/runtime/runtime-strings.cc +++ b/src/runtime/runtime-strings.cc @@ -736,5 +736,56 @@ RUNTIME_FUNCTION(Runtime_StringCompareSequence) { return ReadOnlyRoots(isolate).true_value(); } + +RUNTIME_FUNCTION(Runtime_StringEscapeQuotes) { + HandleScope handle_scope(isolate); + DCHECK_EQ(1, args.length()); + CONVERT_ARG_HANDLE_CHECKED(String, string, 0); + + // Equivalent to global replacement `string.replace(/"/g, """)`, but this + // does not modify any global state (e.g. the regexp match info). + + const int string_length = string->length(); + Handle quotes = + isolate->factory()->LookupSingleCharacterStringFromCode('"'); + + int index = String::IndexOf(isolate, string, quotes, 0); + + // No quotes, nothing to do. + if (index == -1) return *string; + + // Find all quotes. + std::vector indices = {index}; + while (index + 1 < string_length) { + index = String::IndexOf(isolate, string, quotes, index + 1); + if (index == -1) break; + indices.emplace_back(index); + } + + // Build the replacement string. + Handle replacement = + isolate->factory()->NewStringFromAsciiChecked("""); + const int estimated_part_count = static_cast(indices.size()) * 2 + 1; + ReplacementStringBuilder builder(isolate->heap(), string, + estimated_part_count); + + int prev_index = -1; // Start at -1 to avoid special-casing the first match. + for (int index : indices) { + const int slice_start = prev_index + 1; + const int slice_end = index; + if (slice_end > slice_start) { + builder.AddSubjectSlice(slice_start, slice_end); + } + builder.AddString(replacement); + prev_index = index; + } + + if (prev_index < string_length - 1) { + builder.AddSubjectSlice(prev_index + 1, string_length); + } + + return *builder.ToString().ToHandleChecked(); +} + } // namespace internal } // namespace v8 diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index abdd564f00..39aee6b3af 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -371,7 +371,6 @@ namespace internal { F(RegExpExec, 4, 1) \ F(RegExpExecMultiple, 4, 1) \ F(RegExpInitializeAndCompile, 3, 1) \ - F(RegExpInternalReplace, 3, 1) \ F(RegExpReplace, 3, 1) \ F(RegExpSplit, 3, 1) \ F(StringReplaceNonGlobalRegExpWithFunction, 3, 1) \ @@ -413,6 +412,7 @@ namespace internal { F(StringBuilderJoin, 3, 1) \ F(StringCharCodeAt, 2, 1) \ F(StringEqual, 2, 1) \ + F(StringEscapeQuotes, 1, 1) \ F(StringGreaterThan, 2, 1) \ F(StringGreaterThanOrEqual, 2, 1) \ F(StringIncludes, 3, 1) \ diff --git a/test/intl/assert.js b/test/intl/assert.js index c11e7c0bbf..a6367a8cf2 100644 --- a/test/intl/assert.js +++ b/test/intl/assert.js @@ -200,15 +200,68 @@ function assertInstanceof(obj, type) { } } - /** * Split a BCP 47 language tag into locale and extension. */ function splitLanguageTag(tag) { - var extRe = /(-[0-9A-Za-z](-[0-9A-Za-z]{2,8})+)+$/; - var match = %regexp_internal_match(extRe, tag); - if (match) { - return { locale: tag.slice(0, match.index), extension: match[0] }; + // Search for the beginning of one or more extension tags, each of which + // contains a singleton tag followed by one or more subtags. The equivalent + // regexp is: /(-[0-9A-Za-z](-[0-9A-Za-z]{2,8})+)+$/. For example, in + // 'de-DE-u-co-phonebk' the matched extension tags are '-u-co-phonebk'. + // + // The below is a mini-parser that reads backwards from the end of the string. + + function charCode(char) { return char.charCodeAt(0); } + function isAlphaNumeric(code) { + return (charCode("0") <= code && code <= charCode("9")) || + (charCode("A") <= code && code <= charCode("Z")) || + (charCode("a") <= code && code <= charCode("z")); + } + + const MATCH_SUBTAG = 0; + const MATCH_SINGLETON_OR_SUBTAG = 1; + let state = MATCH_SUBTAG; + + const MINIMUM_TAG_LENGTH = 2; + const MAXIMUM_TAG_LENGTH = 8; + let currentTagLength = 0; + + // -1 signifies failure, a non-negative integer is the start index of the + // extension tag. + let extensionTagStartIndex = -1; + + for (let i = tag.length - 1; i >= 0; i--) { + const currentCharCode = tag.charCodeAt(i); + if (currentCharCode == charCode("-")) { + if (state == MATCH_SINGLETON_OR_SUBTAG && currentTagLength == 1) { + // Found the singleton tag, the match succeeded. + // Save the matched index, and reset the state. After this point, we + // definitely have a match, but we may still find another extension tag + // sequence. + extensionTagStartIndex = i; + state = MATCH_SUBTAG; + currentTagLength = 0; + } else if (MINIMUM_TAG_LENGTH <= currentTagLength && + currentTagLength <= MAXIMUM_TAG_LENGTH) { + // Found a valid subtag. + state = MATCH_SINGLETON_OR_SUBTAG; + currentTagLength = 0; + } else { + // Invalid subtag (too short or too long). + break; + } + } else if (isAlphaNumeric(currentCharCode)) { + // An alphanumeric character is potentially part of a tag. + currentTagLength++; + } else { + // Any other character is invalid. + break; + } + } + + if (extensionTagStartIndex != -1) { + return { locale: tag.substring(0, extensionTagStartIndex), + extension: tag.substring(extensionTagStartIndex) }; } return { locale: tag, extension: '' };