[regexp] Remove internal match and replace methods
These are no longer needed now that JS builtins have been fully removed. The internal match usage in assert.js has been replaced by a custom miniparser. The internal replace use in various string builtins was replaced by manual global string replacement in a runtime function. Bug: v8:8842,v8:7624 Change-Id: Ieb49c694662a13e84fd9fd2fe5d0412b8e0574da Reviewed-on: https://chromium-review.googlesource.com/c/1473030 Reviewed-by: Yang Guo <yangguo@chromium.org> Reviewed-by: Peter Wong <peter.wm.wong@gmail.com> Commit-Queue: Jakob Gruber <jgruber@chromium.org> Cr-Commit-Position: refs/heads/master@{#59689}
This commit is contained in:
parent
d342122f26
commit
29e7c165a4
@ -2609,19 +2609,9 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
|
||||
writable, Representation::Tagged());
|
||||
initial_map->AppendDescriptor(isolate(), &d);
|
||||
|
||||
{ // Internal: RegExpInternalMatch
|
||||
Handle<JSFunction> function =
|
||||
SimpleCreateFunction(isolate_, isolate_->factory()->empty_string(),
|
||||
Builtins::kRegExpInternalMatch, 2, true);
|
||||
native_context()->set(Context::REGEXP_INTERNAL_MATCH, *function);
|
||||
}
|
||||
|
||||
// Create the last match info. One for external use, and one for internal
|
||||
// use when we don't want to modify the externally visible match info.
|
||||
// Create the last match info.
|
||||
Handle<RegExpMatchInfo> last_match_info = factory->NewRegExpMatchInfo();
|
||||
native_context()->set_regexp_last_match_info(*last_match_info);
|
||||
Handle<RegExpMatchInfo> internal_match_info = factory->NewRegExpMatchInfo();
|
||||
native_context()->set_regexp_internal_match_info(*internal_match_info);
|
||||
|
||||
// Force the RegExp constructor to fast properties, so that we can use the
|
||||
// fast paths for various things like
|
||||
|
@ -937,7 +937,6 @@ namespace internal {
|
||||
CPP(RegExpCapture9Getter) \
|
||||
/* ES #sec-regexp-pattern-flags */ \
|
||||
TFJ(RegExpConstructor, 2, kReceiver, kPattern, kFlags) \
|
||||
TFJ(RegExpInternalMatch, 2, kReceiver, kRegExp, kString) \
|
||||
CPP(RegExpInputGetter) \
|
||||
CPP(RegExpInputSetter) \
|
||||
CPP(RegExpLastMatchGetter) \
|
||||
|
@ -3091,30 +3091,6 @@ TF_BUILTIN(RegExpPrototypeReplace, RegExpBuiltinsAssembler) {
|
||||
string, replace_value));
|
||||
}
|
||||
|
||||
// Simple string matching functionality for internal use which does not modify
|
||||
// the last match info.
|
||||
TF_BUILTIN(RegExpInternalMatch, RegExpBuiltinsAssembler) {
|
||||
TNode<JSRegExp> regexp = CAST(Parameter(Descriptor::kRegExp));
|
||||
TNode<String> string = CAST(Parameter(Descriptor::kString));
|
||||
TNode<Context> context = CAST(Parameter(Descriptor::kContext));
|
||||
|
||||
TNode<Context> native_context = LoadNativeContext(context);
|
||||
TNode<RegExpMatchInfo> internal_match_info = CAST(LoadContextElement(
|
||||
native_context, Context::REGEXP_INTERNAL_MATCH_INFO_INDEX));
|
||||
TNode<HeapObject> maybe_match_indices =
|
||||
CAST(CallBuiltin(Builtins::kRegExpExecInternal, context, regexp, string,
|
||||
SmiZero(), internal_match_info));
|
||||
TNode<Oddball> null = NullConstant();
|
||||
Label if_matched(this);
|
||||
GotoIfNot(WordEqual(maybe_match_indices, null), &if_matched);
|
||||
Return(null);
|
||||
|
||||
BIND(&if_matched);
|
||||
TNode<RegExpMatchInfo> match_indices = CAST(maybe_match_indices);
|
||||
Return(
|
||||
ConstructNewResultFromMatchInfo(context, regexp, match_indices, string));
|
||||
}
|
||||
|
||||
class RegExpStringIteratorAssembler : public RegExpBuiltinsAssembler {
|
||||
public:
|
||||
explicit RegExpStringIteratorAssembler(compiler::CodeAssemblerState* state)
|
||||
|
@ -2572,8 +2572,8 @@ class StringHtmlAssembler : public StringBuiltinsAssembler {
|
||||
const char* method_name, const char* tag_name,
|
||||
const char* attr, Node* const value) {
|
||||
Node* const string = ToThisString(context, receiver, method_name);
|
||||
Node* const value_string =
|
||||
EscapeQuotes(context, ToString_Inline(context, value));
|
||||
TNode<String> value_string =
|
||||
EscapeQuotes(CAST(context), ToString_Inline(context, value));
|
||||
std::string open_tag_attr =
|
||||
"<" + std::string(tag_name) + " " + std::string(attr) + "=\"";
|
||||
std::string close_tag = "</" + std::string(tag_name) + ">";
|
||||
@ -2593,20 +2593,8 @@ class StringHtmlAssembler : public StringBuiltinsAssembler {
|
||||
return var_result.value();
|
||||
}
|
||||
|
||||
Node* EscapeQuotes(Node* const context, Node* const string) {
|
||||
CSA_ASSERT(this, IsString(string));
|
||||
Node* const regexp_function = LoadContextElement(
|
||||
LoadNativeContext(context), Context::REGEXP_FUNCTION_INDEX);
|
||||
Node* const initial_map = LoadObjectField(
|
||||
regexp_function, JSFunction::kPrototypeOrInitialMapOffset);
|
||||
// TODO(pwong): Refactor to not allocate RegExp
|
||||
Node* const regexp =
|
||||
CallRuntime(Runtime::kRegExpInitializeAndCompile, context,
|
||||
AllocateJSObjectFromMap(initial_map), StringConstant("\""),
|
||||
StringConstant("g"));
|
||||
|
||||
return CallRuntime(Runtime::kRegExpInternalReplace, context, regexp, string,
|
||||
StringConstant("""));
|
||||
TNode<String> EscapeQuotes(TNode<Context> context, TNode<String> string) {
|
||||
return CAST(CallRuntime(Runtime::kStringEscapeQuotes, context, string));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -54,7 +54,6 @@ enum ContextLookupFlags {
|
||||
V(OBJECT_IS_FROZEN, JSFunction, object_is_frozen) \
|
||||
V(OBJECT_IS_SEALED, JSFunction, object_is_sealed) \
|
||||
V(OBJECT_KEYS, JSFunction, object_keys) \
|
||||
V(REGEXP_INTERNAL_MATCH, JSFunction, regexp_internal_match) \
|
||||
V(REFLECT_APPLY_INDEX, JSFunction, reflect_apply) \
|
||||
V(REFLECT_CONSTRUCT_INDEX, JSFunction, reflect_construct) \
|
||||
V(REFLECT_DEFINE_PROPERTY_INDEX, JSFunction, reflect_define_property) \
|
||||
@ -234,8 +233,6 @@ enum ContextLookupFlags {
|
||||
V(REGEXP_EXEC_FUNCTION_INDEX, JSFunction, regexp_exec_function) \
|
||||
V(REGEXP_FUNCTION_INDEX, JSFunction, regexp_function) \
|
||||
V(REGEXP_LAST_MATCH_INFO_INDEX, RegExpMatchInfo, regexp_last_match_info) \
|
||||
V(REGEXP_INTERNAL_MATCH_INFO_INDEX, RegExpMatchInfo, \
|
||||
regexp_internal_match_info) \
|
||||
V(REGEXP_PROTOTYPE_MAP_INDEX, Map, regexp_prototype_map) \
|
||||
V(INITIAL_REGEXP_STRING_ITERATOR_PROTOTYPE_MAP_INDEX, Map, \
|
||||
initial_regexp_string_iterator_prototype_map) \
|
||||
|
@ -283,7 +283,6 @@ bool IntrinsicHasNoSideEffect(Runtime::FunctionId id) {
|
||||
V(ThrowReferenceError) \
|
||||
V(ThrowSymbolIteratorInvalid) \
|
||||
/* Strings */ \
|
||||
V(RegExpInternalReplace) \
|
||||
V(StringIncludes) \
|
||||
V(StringIndexOf) \
|
||||
V(StringReplaceOneCharWithString) \
|
||||
|
@ -564,13 +564,8 @@ Handle<RegExpMatchInfo> RegExpImpl::SetLastMatchInfo(
|
||||
result->SetNumberOfCaptureRegisters(capture_register_count);
|
||||
|
||||
if (*result != *last_match_info) {
|
||||
// The match info has been reallocated, update the corresponding reference
|
||||
// on the native context.
|
||||
if (*last_match_info == *isolate->regexp_last_match_info()) {
|
||||
isolate->native_context()->set_regexp_last_match_info(*result);
|
||||
} else if (*last_match_info == *isolate->regexp_internal_match_info()) {
|
||||
isolate->native_context()->set_regexp_internal_match_info(*result);
|
||||
}
|
||||
DCHECK_EQ(*last_match_info, *isolate->regexp_last_match_info());
|
||||
isolate->native_context()->set_regexp_last_match_info(*result);
|
||||
}
|
||||
|
||||
DisallowHeapAllocation no_allocation;
|
||||
|
@ -791,33 +791,6 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
|
||||
return *answer;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
Object StringReplaceGlobalRegExpWithStringHelper(
|
||||
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
|
||||
Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
|
||||
CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
|
||||
|
||||
subject = String::Flatten(isolate, subject);
|
||||
|
||||
if (replacement->length() == 0) {
|
||||
if (subject->HasOnlyOneByteChars()) {
|
||||
return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
|
||||
isolate, subject, regexp, last_match_info);
|
||||
} else {
|
||||
return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
|
||||
isolate, subject, regexp, last_match_info);
|
||||
}
|
||||
}
|
||||
|
||||
replacement = String::Flatten(isolate, replacement);
|
||||
|
||||
return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
|
||||
replacement, last_match_info);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
RUNTIME_FUNCTION(Runtime_StringSplit) {
|
||||
HandleScope handle_scope(isolate);
|
||||
DCHECK_EQ(3, args.length());
|
||||
@ -915,20 +888,6 @@ RUNTIME_FUNCTION(Runtime_RegExpExec) {
|
||||
index, last_match_info));
|
||||
}
|
||||
|
||||
RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
|
||||
HandleScope scope(isolate);
|
||||
DCHECK_EQ(3, args.length());
|
||||
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
|
||||
CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
|
||||
CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
|
||||
|
||||
Handle<RegExpMatchInfo> internal_match_info =
|
||||
isolate->regexp_internal_match_info();
|
||||
|
||||
return StringReplaceGlobalRegExpWithStringHelper(
|
||||
isolate, regexp, subject, replacement, internal_match_info);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class MatchInfoBackedMatch : public String::Match {
|
||||
|
@ -736,5 +736,56 @@ RUNTIME_FUNCTION(Runtime_StringCompareSequence) {
|
||||
|
||||
return ReadOnlyRoots(isolate).true_value();
|
||||
}
|
||||
|
||||
RUNTIME_FUNCTION(Runtime_StringEscapeQuotes) {
|
||||
HandleScope handle_scope(isolate);
|
||||
DCHECK_EQ(1, args.length());
|
||||
CONVERT_ARG_HANDLE_CHECKED(String, string, 0);
|
||||
|
||||
// Equivalent to global replacement `string.replace(/"/g, """)`, but this
|
||||
// does not modify any global state (e.g. the regexp match info).
|
||||
|
||||
const int string_length = string->length();
|
||||
Handle<String> quotes =
|
||||
isolate->factory()->LookupSingleCharacterStringFromCode('"');
|
||||
|
||||
int index = String::IndexOf(isolate, string, quotes, 0);
|
||||
|
||||
// No quotes, nothing to do.
|
||||
if (index == -1) return *string;
|
||||
|
||||
// Find all quotes.
|
||||
std::vector<int> indices = {index};
|
||||
while (index + 1 < string_length) {
|
||||
index = String::IndexOf(isolate, string, quotes, index + 1);
|
||||
if (index == -1) break;
|
||||
indices.emplace_back(index);
|
||||
}
|
||||
|
||||
// Build the replacement string.
|
||||
Handle<String> replacement =
|
||||
isolate->factory()->NewStringFromAsciiChecked(""");
|
||||
const int estimated_part_count = static_cast<int>(indices.size()) * 2 + 1;
|
||||
ReplacementStringBuilder builder(isolate->heap(), string,
|
||||
estimated_part_count);
|
||||
|
||||
int prev_index = -1; // Start at -1 to avoid special-casing the first match.
|
||||
for (int index : indices) {
|
||||
const int slice_start = prev_index + 1;
|
||||
const int slice_end = index;
|
||||
if (slice_end > slice_start) {
|
||||
builder.AddSubjectSlice(slice_start, slice_end);
|
||||
}
|
||||
builder.AddString(replacement);
|
||||
prev_index = index;
|
||||
}
|
||||
|
||||
if (prev_index < string_length - 1) {
|
||||
builder.AddSubjectSlice(prev_index + 1, string_length);
|
||||
}
|
||||
|
||||
return *builder.ToString().ToHandleChecked();
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
@ -371,7 +371,6 @@ namespace internal {
|
||||
F(RegExpExec, 4, 1) \
|
||||
F(RegExpExecMultiple, 4, 1) \
|
||||
F(RegExpInitializeAndCompile, 3, 1) \
|
||||
F(RegExpInternalReplace, 3, 1) \
|
||||
F(RegExpReplace, 3, 1) \
|
||||
F(RegExpSplit, 3, 1) \
|
||||
F(StringReplaceNonGlobalRegExpWithFunction, 3, 1) \
|
||||
@ -413,6 +412,7 @@ namespace internal {
|
||||
F(StringBuilderJoin, 3, 1) \
|
||||
F(StringCharCodeAt, 2, 1) \
|
||||
F(StringEqual, 2, 1) \
|
||||
F(StringEscapeQuotes, 1, 1) \
|
||||
F(StringGreaterThan, 2, 1) \
|
||||
F(StringGreaterThanOrEqual, 2, 1) \
|
||||
F(StringIncludes, 3, 1) \
|
||||
|
@ -200,15 +200,68 @@ function assertInstanceof(obj, type) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Split a BCP 47 language tag into locale and extension.
|
||||
*/
|
||||
function splitLanguageTag(tag) {
|
||||
var extRe = /(-[0-9A-Za-z](-[0-9A-Za-z]{2,8})+)+$/;
|
||||
var match = %regexp_internal_match(extRe, tag);
|
||||
if (match) {
|
||||
return { locale: tag.slice(0, match.index), extension: match[0] };
|
||||
// Search for the beginning of one or more extension tags, each of which
|
||||
// contains a singleton tag followed by one or more subtags. The equivalent
|
||||
// regexp is: /(-[0-9A-Za-z](-[0-9A-Za-z]{2,8})+)+$/. For example, in
|
||||
// 'de-DE-u-co-phonebk' the matched extension tags are '-u-co-phonebk'.
|
||||
//
|
||||
// The below is a mini-parser that reads backwards from the end of the string.
|
||||
|
||||
function charCode(char) { return char.charCodeAt(0); }
|
||||
function isAlphaNumeric(code) {
|
||||
return (charCode("0") <= code && code <= charCode("9")) ||
|
||||
(charCode("A") <= code && code <= charCode("Z")) ||
|
||||
(charCode("a") <= code && code <= charCode("z"));
|
||||
}
|
||||
|
||||
const MATCH_SUBTAG = 0;
|
||||
const MATCH_SINGLETON_OR_SUBTAG = 1;
|
||||
let state = MATCH_SUBTAG;
|
||||
|
||||
const MINIMUM_TAG_LENGTH = 2;
|
||||
const MAXIMUM_TAG_LENGTH = 8;
|
||||
let currentTagLength = 0;
|
||||
|
||||
// -1 signifies failure, a non-negative integer is the start index of the
|
||||
// extension tag.
|
||||
let extensionTagStartIndex = -1;
|
||||
|
||||
for (let i = tag.length - 1; i >= 0; i--) {
|
||||
const currentCharCode = tag.charCodeAt(i);
|
||||
if (currentCharCode == charCode("-")) {
|
||||
if (state == MATCH_SINGLETON_OR_SUBTAG && currentTagLength == 1) {
|
||||
// Found the singleton tag, the match succeeded.
|
||||
// Save the matched index, and reset the state. After this point, we
|
||||
// definitely have a match, but we may still find another extension tag
|
||||
// sequence.
|
||||
extensionTagStartIndex = i;
|
||||
state = MATCH_SUBTAG;
|
||||
currentTagLength = 0;
|
||||
} else if (MINIMUM_TAG_LENGTH <= currentTagLength &&
|
||||
currentTagLength <= MAXIMUM_TAG_LENGTH) {
|
||||
// Found a valid subtag.
|
||||
state = MATCH_SINGLETON_OR_SUBTAG;
|
||||
currentTagLength = 0;
|
||||
} else {
|
||||
// Invalid subtag (too short or too long).
|
||||
break;
|
||||
}
|
||||
} else if (isAlphaNumeric(currentCharCode)) {
|
||||
// An alphanumeric character is potentially part of a tag.
|
||||
currentTagLength++;
|
||||
} else {
|
||||
// Any other character is invalid.
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (extensionTagStartIndex != -1) {
|
||||
return { locale: tag.substring(0, extensionTagStartIndex),
|
||||
extension: tag.substring(extensionTagStartIndex) };
|
||||
}
|
||||
|
||||
return { locale: tag, extension: '' };
|
||||
|
Loading…
Reference in New Issue
Block a user