[builtins] Port StringTrim to Torque

StringPrototypeTrim, StringPrototypeTrimStart, StringPrototypeTrimEnd

Bug: v8:8996
Change-Id: Ic1155b072d7de888f81a739236d224d00ae46c79
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2511529
Commit-Queue: Z Nguyen-Huu <duongn@microsoft.com>
Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70995}
This commit is contained in:
Z Nguyen-Huu 2020-11-05 12:29:59 -08:00 committed by Commit Bot
parent d6192968d7
commit 289d25c1ac
9 changed files with 188 additions and 208 deletions

View File

@ -1221,8 +1221,9 @@ torque_files = [
"src/builtins/string-replaceall.tq",
"src/builtins/string-slice.tq",
"src/builtins/string-startswith.tq",
"src/builtins/string-substring.tq",
"src/builtins/string-substr.tq",
"src/builtins/string-substring.tq",
"src/builtins/string-trim.tq",
"src/builtins/symbol.tq",
"src/builtins/torque-internal.tq",
"src/builtins/typed-array-createtypedarray.tq",

View File

@ -834,6 +834,10 @@ extern operator '==' macro
ConstexprInt31Equal(constexpr int31, constexpr int31): constexpr bool;
extern operator '!=' macro
ConstexprInt31NotEqual(constexpr int31, constexpr int31): constexpr bool;
extern operator '==' macro
ConstexprUint32Equal(constexpr uint32, constexpr uint32): constexpr bool;
extern operator '!=' macro
ConstexprUint32NotEqual(constexpr uint32, constexpr uint32): constexpr bool;
extern operator '>=' macro
ConstexprInt31GreaterThanEqual(
constexpr int31, constexpr int31): constexpr bool;

View File

@ -763,9 +763,6 @@ namespace internal {
TFJ(StringPrototypeSearch, 1, kReceiver, kRegexp) \
/* ES6 #sec-string.prototype.split */ \
TFJ(StringPrototypeSplit, kDontAdaptArgumentsSentinel) \
TFJ(StringPrototypeTrim, kDontAdaptArgumentsSentinel) \
TFJ(StringPrototypeTrimEnd, kDontAdaptArgumentsSentinel) \
TFJ(StringPrototypeTrimStart, kDontAdaptArgumentsSentinel) \
/* ES6 #sec-string.raw */ \
CPP(StringRaw) \
\

View File

@ -1803,178 +1803,6 @@ TF_BUILTIN(StringSubstring, StringBuiltinsAssembler) {
Return(SubString(string, from, to));
}
// ES6 #sec-string.prototype.trim
TF_BUILTIN(StringPrototypeTrim, StringTrimAssembler) {
TNode<IntPtrT> argc = ChangeInt32ToIntPtr(
UncheckedParameter<Int32T>(Descriptor::kJSActualArgumentsCount));
auto context = Parameter<Context>(Descriptor::kContext);
Generate(String::kTrim, "String.prototype.trim", argc, context);
}
// https://github.com/tc39/proposal-string-left-right-trim
TF_BUILTIN(StringPrototypeTrimStart, StringTrimAssembler) {
TNode<IntPtrT> argc = ChangeInt32ToIntPtr(
UncheckedParameter<Int32T>(Descriptor::kJSActualArgumentsCount));
auto context = Parameter<Context>(Descriptor::kContext);
Generate(String::kTrimStart, "String.prototype.trimLeft", argc, context);
}
// https://github.com/tc39/proposal-string-left-right-trim
TF_BUILTIN(StringPrototypeTrimEnd, StringTrimAssembler) {
TNode<IntPtrT> argc = ChangeInt32ToIntPtr(
UncheckedParameter<Int32T>(Descriptor::kJSActualArgumentsCount));
auto context = Parameter<Context>(Descriptor::kContext);
Generate(String::kTrimEnd, "String.prototype.trimRight", argc, context);
}
void StringTrimAssembler::Generate(String::TrimMode mode,
const char* method_name, TNode<IntPtrT> argc,
TNode<Context> context) {
Label return_emptystring(this), if_runtime(this);
CodeStubArguments arguments(this, argc);
TNode<Object> receiver = arguments.GetReceiver();
// Check that {receiver} is coercible to Object and convert it to a String.
const TNode<String> string = ToThisString(context, receiver, method_name);
const TNode<IntPtrT> string_length = LoadStringLengthAsWord(string);
ToDirectStringAssembler to_direct(state(), string);
to_direct.TryToDirect(&if_runtime);
const TNode<RawPtrT> string_data = to_direct.PointerToData(&if_runtime);
const TNode<Int32T> instance_type = to_direct.instance_type();
const TNode<BoolT> is_stringonebyte =
IsOneByteStringInstanceType(instance_type);
const TNode<IntPtrT> string_data_offset = to_direct.offset();
TVARIABLE(IntPtrT, var_start, IntPtrConstant(0));
TVARIABLE(IntPtrT, var_end, IntPtrSub(string_length, IntPtrConstant(1)));
if (mode == String::kTrimStart || mode == String::kTrim) {
ScanForNonWhiteSpaceOrLineTerminator(string_data, string_data_offset,
is_stringonebyte, &var_start,
string_length, 1, &return_emptystring);
}
if (mode == String::kTrimEnd || mode == String::kTrim) {
ScanForNonWhiteSpaceOrLineTerminator(
string_data, string_data_offset, is_stringonebyte, &var_end,
IntPtrConstant(-1), -1, &return_emptystring);
}
arguments.PopAndReturn(
SubString(string, var_start.value(),
IntPtrAdd(var_end.value(), IntPtrConstant(1))));
BIND(&if_runtime);
arguments.PopAndReturn(
CallRuntime(Runtime::kStringTrim, context, string, SmiConstant(mode)));
BIND(&return_emptystring);
arguments.PopAndReturn(EmptyStringConstant());
}
void StringTrimAssembler::ScanForNonWhiteSpaceOrLineTerminator(
const TNode<RawPtrT> string_data, const TNode<IntPtrT> string_data_offset,
const TNode<BoolT> is_stringonebyte, TVariable<IntPtrT>* const var_index,
const TNode<IntPtrT> end, int increment, Label* const if_none_found) {
Label if_stringisonebyte(this), out(this);
GotoIf(is_stringonebyte, &if_stringisonebyte);
// Two Byte String
BuildLoop<Uint16T>(
var_index, end, increment, if_none_found, &out,
[&](const TNode<IntPtrT> index) {
return Load<Uint16T>(
string_data,
WordShl(IntPtrAdd(index, string_data_offset), IntPtrConstant(1)));
});
BIND(&if_stringisonebyte);
BuildLoop<Uint8T>(var_index, end, increment, if_none_found, &out,
[&](const TNode<IntPtrT> index) {
return Load<Uint8T>(string_data,
IntPtrAdd(index, string_data_offset));
});
BIND(&out);
}
template <typename T>
void StringTrimAssembler::BuildLoop(
TVariable<IntPtrT>* const var_index, const TNode<IntPtrT> end,
int increment, Label* const if_none_found, Label* const out,
const std::function<TNode<T>(const TNode<IntPtrT>)>& get_character) {
Label loop(this, var_index);
Goto(&loop);
BIND(&loop);
{
TNode<IntPtrT> index = var_index->value();
GotoIf(IntPtrEqual(index, end), if_none_found);
GotoIfNotWhiteSpaceOrLineTerminator(
UncheckedCast<Uint32T>(get_character(index)), out);
Increment(var_index, increment);
Goto(&loop);
}
}
void StringTrimAssembler::GotoIfNotWhiteSpaceOrLineTerminator(
const TNode<Word32T> char_code, Label* const if_not_whitespace) {
Label out(this);
// 0x0020 - SPACE (Intentionally out of order to fast path a commmon case)
GotoIf(Word32Equal(char_code, Int32Constant(0x0020)), &out);
// 0x0009 - HORIZONTAL TAB
GotoIf(Uint32LessThan(char_code, Int32Constant(0x0009)), if_not_whitespace);
// 0x000A - LINE FEED OR NEW LINE
// 0x000B - VERTICAL TAB
// 0x000C - FORMFEED
// 0x000D - HORIZONTAL TAB
GotoIf(Uint32LessThanOrEqual(char_code, Int32Constant(0x000D)), &out);
// Common Non-whitespace characters
GotoIf(Uint32LessThan(char_code, Int32Constant(0x00A0)), if_not_whitespace);
// 0x00A0 - NO-BREAK SPACE
GotoIf(Word32Equal(char_code, Int32Constant(0x00A0)), &out);
// 0x1680 - Ogham Space Mark
GotoIf(Word32Equal(char_code, Int32Constant(0x1680)), &out);
// 0x2000 - EN QUAD
GotoIf(Uint32LessThan(char_code, Int32Constant(0x2000)), if_not_whitespace);
// 0x2001 - EM QUAD
// 0x2002 - EN SPACE
// 0x2003 - EM SPACE
// 0x2004 - THREE-PER-EM SPACE
// 0x2005 - FOUR-PER-EM SPACE
// 0x2006 - SIX-PER-EM SPACE
// 0x2007 - FIGURE SPACE
// 0x2008 - PUNCTUATION SPACE
// 0x2009 - THIN SPACE
// 0x200A - HAIR SPACE
GotoIf(Uint32LessThanOrEqual(char_code, Int32Constant(0x200A)), &out);
// 0x2028 - LINE SEPARATOR
GotoIf(Word32Equal(char_code, Int32Constant(0x2028)), &out);
// 0x2029 - PARAGRAPH SEPARATOR
GotoIf(Word32Equal(char_code, Int32Constant(0x2029)), &out);
// 0x202F - NARROW NO-BREAK SPACE
GotoIf(Word32Equal(char_code, Int32Constant(0x202F)), &out);
// 0x205F - MEDIUM MATHEMATICAL SPACE
GotoIf(Word32Equal(char_code, Int32Constant(0x205F)), &out);
// 0xFEFF - BYTE ORDER MARK
GotoIf(Word32Equal(char_code, Int32Constant(0xFEFF)), &out);
// 0x3000 - IDEOGRAPHIC SPACE
Branch(Word32Equal(char_code, Int32Constant(0x3000)), &out,
if_not_whitespace);
BIND(&out);
}
// Return the |word32| codepoint at {index}. Supports SeqStrings and
// ExternalStrings.

View File

@ -184,30 +184,6 @@ class StringIncludesIndexOfAssembler : public StringBuiltinsAssembler {
TNode<Context> context);
};
class StringTrimAssembler : public StringBuiltinsAssembler {
public:
explicit StringTrimAssembler(compiler::CodeAssemblerState* state)
: StringBuiltinsAssembler(state) {}
V8_EXPORT_PRIVATE void GotoIfNotWhiteSpaceOrLineTerminator(
const TNode<Word32T> char_code, Label* const if_not_whitespace);
protected:
void Generate(String::TrimMode mode, const char* method, TNode<IntPtrT> argc,
TNode<Context> context);
void ScanForNonWhiteSpaceOrLineTerminator(
const TNode<RawPtrT> string_data, const TNode<IntPtrT> string_data_offset,
const TNode<BoolT> is_stringonebyte, TVariable<IntPtrT>* const var_index,
const TNode<IntPtrT> end, int increment, Label* const if_none_found);
template <typename T>
void BuildLoop(
TVariable<IntPtrT>* const var_index, const TNode<IntPtrT> end,
int increment, Label* const if_none_found, Label* const out,
const std::function<TNode<T>(const TNode<IntPtrT>)>& get_character);
};
} // namespace internal
} // namespace v8

View File

@ -115,6 +115,11 @@ FromConstexpr<IterationKind, constexpr IterationKind>(
return %RawDownCast<IterationKind>(Unsigned(%FromConstexpr<int32>(c)));
}
FromConstexpr<string::TrimMode, string::constexpr TrimMode>(
c: string::constexpr TrimMode): string::TrimMode {
return %RawDownCast<string::TrimMode>(Unsigned(%FromConstexpr<int32>(c)));
}
macro Convert<To: type, From: type>(i: From): To {
return i;
}

168
src/builtins/string-trim.tq Normal file
View File

@ -0,0 +1,168 @@
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include 'src/builtins/builtins-string-gen.h'
namespace string {
extern enum TrimMode extends uint31 constexpr 'String::TrimMode' {
kTrim,
kTrimStart,
kTrimEnd
}
@export
macro IsWhiteSpaceOrLineTerminator(charCode: int32): bool {
// 0x0020 - SPACE (Intentionally out of order to fast path a commmon case)
if (charCode == Int32Constant(0x0020)) {
return true;
}
// 0x0009 - HORIZONTAL TAB
if (charCode < Int32Constant(0x0009)) {
return false;
}
// 0x000A - LINE FEED OR NEW LINE
// 0x000B - VERTICAL TAB
// 0x000C - FORMFEED
// 0x000D - HORIZONTAL TAB
if (charCode <= Int32Constant(0x000D)) {
return true;
}
// Common Non-whitespace characters
if (charCode < Int32Constant(0x00A0)) {
return false;
}
// 0x00A0 - NO-BREAK SPACE
if (charCode == Int32Constant(0x00A0)) {
return true;
}
// 0x1680 - Ogham Space Mark
if (charCode == Int32Constant(0x1680)) {
return true;
}
// 0x2000 - EN QUAD
if (charCode < Int32Constant(0x2000)) {
return false;
}
// 0x2001 - EM QUAD
// 0x2002 - EN SPACE
// 0x2003 - EM SPACE
// 0x2004 - THREE-PER-EM SPACE
// 0x2005 - FOUR-PER-EM SPACE
// 0x2006 - SIX-PER-EM SPACE
// 0x2007 - FIGURE SPACE
// 0x2008 - PUNCTUATION SPACE
// 0x2009 - THIN SPACE
// 0x200A - HAIR SPACE
if (charCode <= Int32Constant(0x200A)) {
return true;
}
// 0x2028 - LINE SEPARATOR
if (charCode == Int32Constant(0x2028)) {
return true;
}
// 0x2029 - PARAGRAPH SEPARATOR
if (charCode == Int32Constant(0x2029)) {
return true;
}
// 0x202F - NARROW NO-BREAK SPACE
if (charCode == Int32Constant(0x202F)) {
return true;
}
// 0x205F - MEDIUM MATHEMATICAL SPACE
if (charCode == Int32Constant(0x205F)) {
return true;
}
// 0xFEFF - BYTE ORDER MARK
if (charCode == Int32Constant(0xFEFF)) {
return true;
}
// 0x3000 - IDEOGRAPHIC SPACE
if (charCode == Int32Constant(0x3000)) {
return true;
}
return false;
}
transitioning macro StringTrim(implicit context: Context)(
receiver: JSAny, _arguments: Arguments, methodName: constexpr string,
variant: constexpr TrimMode): String {
const receiverString: String = ToThisString(receiver, methodName);
const stringLength: intptr = receiverString.length_intptr;
const directString = Cast<DirectString>(receiverString)
otherwise return runtime::StringTrim(
receiverString, SmiTag<TrimMode>(variant));
let startIndex: intptr = 0;
let endIndex: intptr = stringLength - 1;
// TODO(duongn): It would probably be more efficient to turn StringTrim into a
// tempalate for the different string types and specialize the loop for them.
if (variant == TrimMode::kTrim || variant == TrimMode::kTrimStart) {
while (true) {
if (startIndex == stringLength) {
return EmptyStringConstant();
}
if (!IsWhiteSpaceOrLineTerminator(
StringCharCodeAt(directString, Unsigned(startIndex)))) {
break;
}
startIndex++;
}
}
if (variant == TrimMode::kTrim || variant == TrimMode::kTrimEnd) {
while (true) {
if (endIndex == -1) {
return EmptyStringConstant();
}
if (!IsWhiteSpaceOrLineTerminator(
StringCharCodeAt(directString, Unsigned(endIndex)))) {
break;
}
endIndex--;
}
}
return SubString(
receiverString, Unsigned(startIndex), Unsigned(endIndex + 1));
}
// ES6 #sec-string.prototype.trim
transitioning javascript builtin
StringPrototypeTrim(
js-implicit context: NativeContext, receiver: JSAny)(...arguments): String {
const methodName: constexpr string = 'String.prototype.trim';
return StringTrim(receiver, arguments, methodName, TrimMode::kTrim);
}
// https://github.com/tc39/proposal-string-left-right-trim
transitioning javascript builtin
StringPrototypeTrimStart(
js-implicit context: NativeContext, receiver: JSAny)(...arguments): String {
const methodName: constexpr string = 'String.prototype.trimLeft';
return StringTrim(receiver, arguments, methodName, TrimMode::kTrimStart);
}
// https://github.com/tc39/proposal-string-left-right-trim
transitioning javascript builtin
StringPrototypeTrimEnd(
js-implicit context: NativeContext, receiver: JSAny)(...arguments): String {
const methodName: constexpr string = 'String.prototype.trimRight';
return StringTrim(receiver, arguments, methodName, TrimMode::kTrimEnd);
}
}
namespace runtime {
extern runtime StringTrim(implicit context: Context)(
String, SmiTagged<string::TrimMode>): String;
}

View File

@ -3481,6 +3481,8 @@ class V8_EXPORT_PRIVATE CodeStubAssembler
bool ConstexprInt31Equal(int31_t a, int31_t b) { return a == b; }
bool ConstexprInt31NotEqual(int31_t a, int31_t b) { return a != b; }
bool ConstexprInt31GreaterThanEqual(int31_t a, int31_t b) { return a >= b; }
bool ConstexprUint32Equal(uint32_t a, uint32_t b) { return a == b; }
bool ConstexprUint32NotEqual(uint32_t a, uint32_t b) { return a != b; }
bool ConstexprInt32Equal(int32_t a, int32_t b) { return a == b; }
bool ConstexprInt32NotEqual(int32_t a, int32_t b) { return a != b; }
bool ConstexprInt32GreaterThanEqual(int32_t a, int32_t b) { return a >= b; }

View File

@ -3135,21 +3135,20 @@ TEST(LoadJSArrayElementsMap) {
}
}
TEST(GotoIfNotWhiteSpaceOrLineTerminator) {
TEST(IsWhiteSpaceOrLineTerminator) {
Isolate* isolate(CcTest::InitIsolateOnce());
const int kNumParams = 1;
CodeAssemblerTester asm_tester(isolate, kNumParams + 1); // Include receiver.
StringTrimAssembler m(asm_tester.state());
{ // Returns true if whitespace, false otherwise.
Label if_not_whitespace(&m);
m.GotoIfNotWhiteSpaceOrLineTerminator(m.SmiToInt32(m.Parameter<Smi>(1)),
&if_not_whitespace);
CodeStubAssembler m(asm_tester.state());
Label if_true(&m), if_false(&m);
m.Branch(m.IsWhiteSpaceOrLineTerminator(m.SmiToInt32(m.Parameter<Smi>(1))),
&if_true, &if_false);
m.BIND(&if_true);
m.Return(m.TrueConstant());
m.BIND(&if_not_whitespace);
m.BIND(&if_false);
m.Return(m.FalseConstant());
}
FunctionTester ft(asm_tester.GenerateCode(), kNumParams);