[regexp] Remove internal match and replace methods

These are no longer needed now that JS builtins have been fully
removed.

The internal match usage in assert.js has been replaced by a custom
miniparser. The internal replace use in various string builtins was
replaced by manual global string replacement in a runtime function.

Bug: v8:8842,v8:7624
Change-Id: Ieb49c694662a13e84fd9fd2fe5d0412b8e0574da
Reviewed-on: https://chromium-review.googlesource.com/c/1473030
Reviewed-by: Yang Guo <yangguo@chromium.org>
Reviewed-by: Peter Wong <peter.wm.wong@gmail.com>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#59689}
This commit is contained in:
Jakob Gruber 2019-02-14 15:01:43 +01:00 committed by Commit Bot
parent d342122f26
commit 29e7c165a4
11 changed files with 117 additions and 110 deletions

View File

@ -2609,19 +2609,9 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
writable, Representation::Tagged());
initial_map->AppendDescriptor(isolate(), &d);
{ // Internal: RegExpInternalMatch
Handle<JSFunction> function =
SimpleCreateFunction(isolate_, isolate_->factory()->empty_string(),
Builtins::kRegExpInternalMatch, 2, true);
native_context()->set(Context::REGEXP_INTERNAL_MATCH, *function);
}
// Create the last match info. One for external use, and one for internal
// use when we don't want to modify the externally visible match info.
// Create the last match info.
Handle<RegExpMatchInfo> last_match_info = factory->NewRegExpMatchInfo();
native_context()->set_regexp_last_match_info(*last_match_info);
Handle<RegExpMatchInfo> internal_match_info = factory->NewRegExpMatchInfo();
native_context()->set_regexp_internal_match_info(*internal_match_info);
// Force the RegExp constructor to fast properties, so that we can use the
// fast paths for various things like

View File

@ -937,7 +937,6 @@ namespace internal {
CPP(RegExpCapture9Getter) \
/* ES #sec-regexp-pattern-flags */ \
TFJ(RegExpConstructor, 2, kReceiver, kPattern, kFlags) \
TFJ(RegExpInternalMatch, 2, kReceiver, kRegExp, kString) \
CPP(RegExpInputGetter) \
CPP(RegExpInputSetter) \
CPP(RegExpLastMatchGetter) \

View File

@ -3091,30 +3091,6 @@ TF_BUILTIN(RegExpPrototypeReplace, RegExpBuiltinsAssembler) {
string, replace_value));
}
// Simple string matching functionality for internal use which does not modify
// the last match info.
TF_BUILTIN(RegExpInternalMatch, RegExpBuiltinsAssembler) {
TNode<JSRegExp> regexp = CAST(Parameter(Descriptor::kRegExp));
TNode<String> string = CAST(Parameter(Descriptor::kString));
TNode<Context> context = CAST(Parameter(Descriptor::kContext));
TNode<Context> native_context = LoadNativeContext(context);
TNode<RegExpMatchInfo> internal_match_info = CAST(LoadContextElement(
native_context, Context::REGEXP_INTERNAL_MATCH_INFO_INDEX));
TNode<HeapObject> maybe_match_indices =
CAST(CallBuiltin(Builtins::kRegExpExecInternal, context, regexp, string,
SmiZero(), internal_match_info));
TNode<Oddball> null = NullConstant();
Label if_matched(this);
GotoIfNot(WordEqual(maybe_match_indices, null), &if_matched);
Return(null);
BIND(&if_matched);
TNode<RegExpMatchInfo> match_indices = CAST(maybe_match_indices);
Return(
ConstructNewResultFromMatchInfo(context, regexp, match_indices, string));
}
class RegExpStringIteratorAssembler : public RegExpBuiltinsAssembler {
public:
explicit RegExpStringIteratorAssembler(compiler::CodeAssemblerState* state)

View File

@ -2572,8 +2572,8 @@ class StringHtmlAssembler : public StringBuiltinsAssembler {
const char* method_name, const char* tag_name,
const char* attr, Node* const value) {
Node* const string = ToThisString(context, receiver, method_name);
Node* const value_string =
EscapeQuotes(context, ToString_Inline(context, value));
TNode<String> value_string =
EscapeQuotes(CAST(context), ToString_Inline(context, value));
std::string open_tag_attr =
"<" + std::string(tag_name) + " " + std::string(attr) + "=\"";
std::string close_tag = "</" + std::string(tag_name) + ">";
@ -2593,20 +2593,8 @@ class StringHtmlAssembler : public StringBuiltinsAssembler {
return var_result.value();
}
Node* EscapeQuotes(Node* const context, Node* const string) {
CSA_ASSERT(this, IsString(string));
Node* const regexp_function = LoadContextElement(
LoadNativeContext(context), Context::REGEXP_FUNCTION_INDEX);
Node* const initial_map = LoadObjectField(
regexp_function, JSFunction::kPrototypeOrInitialMapOffset);
// TODO(pwong): Refactor to not allocate RegExp
Node* const regexp =
CallRuntime(Runtime::kRegExpInitializeAndCompile, context,
AllocateJSObjectFromMap(initial_map), StringConstant("\""),
StringConstant("g"));
return CallRuntime(Runtime::kRegExpInternalReplace, context, regexp, string,
StringConstant("&quot;"));
TNode<String> EscapeQuotes(TNode<Context> context, TNode<String> string) {
return CAST(CallRuntime(Runtime::kStringEscapeQuotes, context, string));
}
};

View File

@ -54,7 +54,6 @@ enum ContextLookupFlags {
V(OBJECT_IS_FROZEN, JSFunction, object_is_frozen) \
V(OBJECT_IS_SEALED, JSFunction, object_is_sealed) \
V(OBJECT_KEYS, JSFunction, object_keys) \
V(REGEXP_INTERNAL_MATCH, JSFunction, regexp_internal_match) \
V(REFLECT_APPLY_INDEX, JSFunction, reflect_apply) \
V(REFLECT_CONSTRUCT_INDEX, JSFunction, reflect_construct) \
V(REFLECT_DEFINE_PROPERTY_INDEX, JSFunction, reflect_define_property) \
@ -234,8 +233,6 @@ enum ContextLookupFlags {
V(REGEXP_EXEC_FUNCTION_INDEX, JSFunction, regexp_exec_function) \
V(REGEXP_FUNCTION_INDEX, JSFunction, regexp_function) \
V(REGEXP_LAST_MATCH_INFO_INDEX, RegExpMatchInfo, regexp_last_match_info) \
V(REGEXP_INTERNAL_MATCH_INFO_INDEX, RegExpMatchInfo, \
regexp_internal_match_info) \
V(REGEXP_PROTOTYPE_MAP_INDEX, Map, regexp_prototype_map) \
V(INITIAL_REGEXP_STRING_ITERATOR_PROTOTYPE_MAP_INDEX, Map, \
initial_regexp_string_iterator_prototype_map) \

View File

@ -283,7 +283,6 @@ bool IntrinsicHasNoSideEffect(Runtime::FunctionId id) {
V(ThrowReferenceError) \
V(ThrowSymbolIteratorInvalid) \
/* Strings */ \
V(RegExpInternalReplace) \
V(StringIncludes) \
V(StringIndexOf) \
V(StringReplaceOneCharWithString) \

View File

@ -564,13 +564,8 @@ Handle<RegExpMatchInfo> RegExpImpl::SetLastMatchInfo(
result->SetNumberOfCaptureRegisters(capture_register_count);
if (*result != *last_match_info) {
// The match info has been reallocated, update the corresponding reference
// on the native context.
if (*last_match_info == *isolate->regexp_last_match_info()) {
isolate->native_context()->set_regexp_last_match_info(*result);
} else if (*last_match_info == *isolate->regexp_internal_match_info()) {
isolate->native_context()->set_regexp_internal_match_info(*result);
}
DCHECK_EQ(*last_match_info, *isolate->regexp_last_match_info());
isolate->native_context()->set_regexp_last_match_info(*result);
}
DisallowHeapAllocation no_allocation;

View File

@ -791,33 +791,6 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
return *answer;
}
namespace {
Object StringReplaceGlobalRegExpWithStringHelper(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
subject = String::Flatten(isolate, subject);
if (replacement->length() == 0) {
if (subject->HasOnlyOneByteChars()) {
return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
isolate, subject, regexp, last_match_info);
} else {
return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
isolate, subject, regexp, last_match_info);
}
}
replacement = String::Flatten(isolate, replacement);
return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
replacement, last_match_info);
}
} // namespace
RUNTIME_FUNCTION(Runtime_StringSplit) {
HandleScope handle_scope(isolate);
DCHECK_EQ(3, args.length());
@ -915,20 +888,6 @@ RUNTIME_FUNCTION(Runtime_RegExpExec) {
index, last_match_info));
}
RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
HandleScope scope(isolate);
DCHECK_EQ(3, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
Handle<RegExpMatchInfo> internal_match_info =
isolate->regexp_internal_match_info();
return StringReplaceGlobalRegExpWithStringHelper(
isolate, regexp, subject, replacement, internal_match_info);
}
namespace {
class MatchInfoBackedMatch : public String::Match {

View File

@ -736,5 +736,56 @@ RUNTIME_FUNCTION(Runtime_StringCompareSequence) {
return ReadOnlyRoots(isolate).true_value();
}
RUNTIME_FUNCTION(Runtime_StringEscapeQuotes) {
HandleScope handle_scope(isolate);
DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(String, string, 0);
// Equivalent to global replacement `string.replace(/"/g, "&quot")`, but this
// does not modify any global state (e.g. the regexp match info).
const int string_length = string->length();
Handle<String> quotes =
isolate->factory()->LookupSingleCharacterStringFromCode('"');
int index = String::IndexOf(isolate, string, quotes, 0);
// No quotes, nothing to do.
if (index == -1) return *string;
// Find all quotes.
std::vector<int> indices = {index};
while (index + 1 < string_length) {
index = String::IndexOf(isolate, string, quotes, index + 1);
if (index == -1) break;
indices.emplace_back(index);
}
// Build the replacement string.
Handle<String> replacement =
isolate->factory()->NewStringFromAsciiChecked("&quot;");
const int estimated_part_count = static_cast<int>(indices.size()) * 2 + 1;
ReplacementStringBuilder builder(isolate->heap(), string,
estimated_part_count);
int prev_index = -1; // Start at -1 to avoid special-casing the first match.
for (int index : indices) {
const int slice_start = prev_index + 1;
const int slice_end = index;
if (slice_end > slice_start) {
builder.AddSubjectSlice(slice_start, slice_end);
}
builder.AddString(replacement);
prev_index = index;
}
if (prev_index < string_length - 1) {
builder.AddSubjectSlice(prev_index + 1, string_length);
}
return *builder.ToString().ToHandleChecked();
}
} // namespace internal
} // namespace v8

View File

@ -371,7 +371,6 @@ namespace internal {
F(RegExpExec, 4, 1) \
F(RegExpExecMultiple, 4, 1) \
F(RegExpInitializeAndCompile, 3, 1) \
F(RegExpInternalReplace, 3, 1) \
F(RegExpReplace, 3, 1) \
F(RegExpSplit, 3, 1) \
F(StringReplaceNonGlobalRegExpWithFunction, 3, 1) \
@ -413,6 +412,7 @@ namespace internal {
F(StringBuilderJoin, 3, 1) \
F(StringCharCodeAt, 2, 1) \
F(StringEqual, 2, 1) \
F(StringEscapeQuotes, 1, 1) \
F(StringGreaterThan, 2, 1) \
F(StringGreaterThanOrEqual, 2, 1) \
F(StringIncludes, 3, 1) \

View File

@ -200,15 +200,68 @@ function assertInstanceof(obj, type) {
}
}
/**
* Split a BCP 47 language tag into locale and extension.
*/
function splitLanguageTag(tag) {
var extRe = /(-[0-9A-Za-z](-[0-9A-Za-z]{2,8})+)+$/;
var match = %regexp_internal_match(extRe, tag);
if (match) {
return { locale: tag.slice(0, match.index), extension: match[0] };
// Search for the beginning of one or more extension tags, each of which
// contains a singleton tag followed by one or more subtags. The equivalent
// regexp is: /(-[0-9A-Za-z](-[0-9A-Za-z]{2,8})+)+$/. For example, in
// 'de-DE-u-co-phonebk' the matched extension tags are '-u-co-phonebk'.
//
// The below is a mini-parser that reads backwards from the end of the string.
function charCode(char) { return char.charCodeAt(0); }
function isAlphaNumeric(code) {
return (charCode("0") <= code && code <= charCode("9")) ||
(charCode("A") <= code && code <= charCode("Z")) ||
(charCode("a") <= code && code <= charCode("z"));
}
const MATCH_SUBTAG = 0;
const MATCH_SINGLETON_OR_SUBTAG = 1;
let state = MATCH_SUBTAG;
const MINIMUM_TAG_LENGTH = 2;
const MAXIMUM_TAG_LENGTH = 8;
let currentTagLength = 0;
// -1 signifies failure, a non-negative integer is the start index of the
// extension tag.
let extensionTagStartIndex = -1;
for (let i = tag.length - 1; i >= 0; i--) {
const currentCharCode = tag.charCodeAt(i);
if (currentCharCode == charCode("-")) {
if (state == MATCH_SINGLETON_OR_SUBTAG && currentTagLength == 1) {
// Found the singleton tag, the match succeeded.
// Save the matched index, and reset the state. After this point, we
// definitely have a match, but we may still find another extension tag
// sequence.
extensionTagStartIndex = i;
state = MATCH_SUBTAG;
currentTagLength = 0;
} else if (MINIMUM_TAG_LENGTH <= currentTagLength &&
currentTagLength <= MAXIMUM_TAG_LENGTH) {
// Found a valid subtag.
state = MATCH_SINGLETON_OR_SUBTAG;
currentTagLength = 0;
} else {
// Invalid subtag (too short or too long).
break;
}
} else if (isAlphaNumeric(currentCharCode)) {
// An alphanumeric character is potentially part of a tag.
currentTagLength++;
} else {
// Any other character is invalid.
break;
}
}
if (extensionTagStartIndex != -1) {
return { locale: tag.substring(0, extensionTagStartIndex),
extension: tag.substring(extensionTagStartIndex) };
}
return { locale: tag, extension: '' };