Reland "[turbofan] Add simplified operator for String.prototype.codePointAt"
This is a reland of 22d894c83e
Original change's description:
> [turbofan] Add simplified operator for String.prototype.codePointAt
>
> This CL adds the simplified operators
> StringCodePointAt
> SeqStringCodePointAt
> and associated lowering.
>
> Bug: v8:7270
> Change-Id: I5aeefddeec39c3119b2d225e92a3116f802e7b45
> Reviewed-on: https://chromium-review.googlesource.com/861789
> Commit-Queue: Sigurd Schneider <sigurds@chromium.org>
> Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
> Reviewed-by: Peter Marshall <petermarshall@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#50547}
Bug: v8:7270
Change-Id: Ia08d18543fc165fc2312cd393ed51f4ec98d7a58
Reviewed-on: https://chromium-review.googlesource.com/866311
Reviewed-by: Peter Marshall <petermarshall@chromium.org>
Reviewed-by: Benedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Sigurd Schneider <sigurds@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50599}
This commit is contained in:
parent
83df2d84ce
commit
508750180d
@ -91,8 +91,9 @@ namespace internal {
|
||||
ASM(StackCheck) \
|
||||
\
|
||||
/* String helpers */ \
|
||||
TFC(StringCharAt, StringCharAt, 1) \
|
||||
TFC(StringCharCodeAt, StringCharCodeAt, 1) \
|
||||
TFC(StringCharAt, StringAt, 1) \
|
||||
TFC(StringCharCodeAt, StringAt, 1) \
|
||||
TFC(StringCodePointAt, StringAt, 1) \
|
||||
TFC(StringEqual, Compare, 1) \
|
||||
TFC(StringGreaterThan, Compare, 1) \
|
||||
TFC(StringGreaterThanOrEqual, Compare, 1) \
|
||||
|
@ -576,6 +576,21 @@ TF_BUILTIN(StringCharCodeAt, StringBuiltinsAssembler) {
|
||||
Return(result);
|
||||
}
|
||||
|
||||
TF_BUILTIN(StringCodePointAt, StringBuiltinsAssembler) {
|
||||
Node* receiver = Parameter(Descriptor::kReceiver);
|
||||
Node* position = Parameter(Descriptor::kPosition);
|
||||
|
||||
// TODO(sigurds) Figure out if passing length as argument pays off.
|
||||
TNode<IntPtrT> length = LoadStringLengthAsWord(receiver);
|
||||
// Load the character code at the {position} from the {receiver}.
|
||||
TNode<Int32T> code =
|
||||
LoadSurrogatePairAt(receiver, length, position, UnicodeEncoding::UTF32);
|
||||
// And return it as TaggedSigned value.
|
||||
// TODO(turbofan): Allow builtins to return values untagged.
|
||||
TNode<Smi> result = SmiFromWord32(code);
|
||||
Return(result);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// ES6 section 21.1 String Objects
|
||||
|
||||
|
@ -847,6 +847,12 @@ bool EffectControlLinearizer::TryWireInStateEffect(Node* node,
|
||||
case IrOpcode::kSeqStringCharCodeAt:
|
||||
result = LowerSeqStringCharCodeAt(node);
|
||||
break;
|
||||
case IrOpcode::kStringCodePointAt:
|
||||
result = LowerStringCodePointAt(node);
|
||||
break;
|
||||
case IrOpcode::kSeqStringCodePointAt:
|
||||
result = LowerSeqStringCharCodeAt(node);
|
||||
break;
|
||||
case IrOpcode::kStringToLowerCaseIntl:
|
||||
result = LowerStringToLowerCaseIntl(node);
|
||||
break;
|
||||
@ -2687,19 +2693,25 @@ Node* EffectControlLinearizer::LowerStringCharCodeAt(Node* node) {
|
||||
__ NoContextConstant());
|
||||
}
|
||||
|
||||
Node* EffectControlLinearizer::LowerSeqStringCharCodeAt(Node* node) {
|
||||
Node* EffectControlLinearizer::LowerStringCodePointAt(Node* node) {
|
||||
Node* receiver = node->InputAt(0);
|
||||
Node* position = node->InputAt(1);
|
||||
|
||||
Callable const callable =
|
||||
Builtins::CallableFor(isolate(), Builtins::kStringCodePointAt);
|
||||
Operator::Properties properties = Operator::kNoThrow | Operator::kNoWrite;
|
||||
CallDescriptor::Flags flags = CallDescriptor::kNoFlags;
|
||||
CallDescriptor* desc = Linkage::GetStubCallDescriptor(
|
||||
isolate(), graph()->zone(), callable.descriptor(), 0, flags, properties,
|
||||
MachineType::TaggedSigned());
|
||||
return __ Call(desc, __ HeapConstant(callable.code()), receiver, position,
|
||||
__ NoContextConstant());
|
||||
}
|
||||
|
||||
Node* EffectControlLinearizer::LoadFromString(Node* receiver, Node* position,
|
||||
Node* is_one_byte) {
|
||||
auto one_byte_load = __ MakeLabel();
|
||||
auto done = __ MakeLabel(MachineRepresentation::kWord32);
|
||||
|
||||
Node* map = __ LoadField(AccessBuilder::ForMap(), receiver);
|
||||
Node* instance_type = __ LoadField(AccessBuilder::ForMapInstanceType(), map);
|
||||
Node* is_one_byte = __ Word32Equal(
|
||||
__ Word32And(instance_type, __ Int32Constant(kStringEncodingMask)),
|
||||
__ Int32Constant(kOneByteStringTag));
|
||||
|
||||
__ GotoIf(is_one_byte, &one_byte_load);
|
||||
Node* two_byte_result = __ LoadElement(
|
||||
AccessBuilder::ForSeqTwoByteStringCharacter(), receiver, position);
|
||||
@ -2714,6 +2726,85 @@ Node* EffectControlLinearizer::LowerSeqStringCharCodeAt(Node* node) {
|
||||
return done.PhiAt(0);
|
||||
}
|
||||
|
||||
Node* EffectControlLinearizer::LowerSeqStringCharCodeAt(Node* node) {
|
||||
Node* receiver = node->InputAt(0);
|
||||
Node* position = node->InputAt(1);
|
||||
|
||||
Node* map = __ LoadField(AccessBuilder::ForMap(), receiver);
|
||||
Node* instance_type = __ LoadField(AccessBuilder::ForMapInstanceType(), map);
|
||||
Node* is_one_byte = __ Word32Equal(
|
||||
__ Word32And(instance_type, __ Int32Constant(kStringEncodingMask)),
|
||||
__ Int32Constant(kOneByteStringTag));
|
||||
|
||||
return LoadFromString(receiver, position, is_one_byte);
|
||||
}
|
||||
|
||||
Node* EffectControlLinearizer::LowerSeqStringCodePointAt(
|
||||
Node* node, UnicodeEncoding encoding) {
|
||||
Node* receiver = node->InputAt(0);
|
||||
Node* position = node->InputAt(1);
|
||||
|
||||
Node* map = __ LoadField(AccessBuilder::ForMap(), receiver);
|
||||
Node* instance_type = __ LoadField(AccessBuilder::ForMapInstanceType(), map);
|
||||
Node* is_one_byte = __ Word32Equal(
|
||||
__ Word32And(instance_type, __ Int32Constant(kStringEncodingMask)),
|
||||
__ Int32Constant(kOneByteStringTag));
|
||||
|
||||
Node* first_char_code = LoadFromString(receiver, position, is_one_byte);
|
||||
|
||||
auto return_result = __ MakeLabel(MachineRepresentation::kWord32);
|
||||
|
||||
// Check if first character code is outside of interval [0xD800, 0xDBFF].
|
||||
Node* first_out =
|
||||
__ Word32Equal(__ Word32And(first_char_code, __ Int32Constant(0xFC00)),
|
||||
__ Int32Constant(0xD800));
|
||||
// Return first character code.
|
||||
__ GotoIf(first_out, &return_result, first_char_code);
|
||||
// Check if position + 1 is still in range.
|
||||
Node* length = __ LoadField(AccessBuilder::ForStringLength(), receiver);
|
||||
Node* next_position = __ Int32Add(position, __ Int32Constant(1));
|
||||
Node* next_position_in_range = __ Int32LessThan(next_position, length);
|
||||
__ GotoIf(next_position_in_range, &return_result, first_char_code);
|
||||
|
||||
// Load second character code.
|
||||
Node* second_char_code = LoadFromString(receiver, next_position, is_one_byte);
|
||||
// Check if first character code is outside of interval [0xD800, 0xDBFF].
|
||||
Node* second_out =
|
||||
__ Word32Equal(__ Word32And(second_char_code, __ Int32Constant(0xFC00)),
|
||||
__ Int32Constant(0xDC00));
|
||||
__ GotoIfNot(second_out, &return_result, first_char_code);
|
||||
|
||||
Node* result;
|
||||
switch (encoding) {
|
||||
case UnicodeEncoding::UTF16:
|
||||
result = __ Word32Or(
|
||||
// Need to swap the order for big-endian platforms
|
||||
#if V8_TARGET_BIG_ENDIAN
|
||||
__ Word32Shl(first_char_code, __ Int32Constant(16)),
|
||||
second_char_code);
|
||||
#else
|
||||
__ Word32Shl(second_char_code, __ Int32Constant(16)),
|
||||
first_char_code);
|
||||
#endif
|
||||
break;
|
||||
case UnicodeEncoding::UTF32: {
|
||||
// Convert UTF16 surrogate pair into |word32| code point, encoded as
|
||||
// UTF32.
|
||||
Node* surrogate_offset =
|
||||
__ Int32Constant(0x10000 - (0xD800 << 10) - 0xDC00);
|
||||
|
||||
// (lead << 10) + trail + SURROGATE_OFFSET
|
||||
result = __ Int32Add(__ Word32Shl(first_char_code, __ Int32Constant(10)),
|
||||
__ Int32Add(second_char_code, surrogate_offset));
|
||||
break;
|
||||
}
|
||||
}
|
||||
__ Goto(&return_result, result);
|
||||
|
||||
__ Bind(&return_result);
|
||||
return return_result.PhiAt(0);
|
||||
}
|
||||
|
||||
Node* EffectControlLinearizer::LowerStringFromCharCode(Node* node) {
|
||||
Node* value = node->InputAt(0);
|
||||
|
||||
|
@ -118,6 +118,8 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer {
|
||||
Node* LowerStringCharAt(Node* node);
|
||||
Node* LowerStringCharCodeAt(Node* node);
|
||||
Node* LowerSeqStringCharCodeAt(Node* node);
|
||||
Node* LowerStringCodePointAt(Node* node);
|
||||
Node* LowerSeqStringCodePointAt(Node* node, UnicodeEncoding encoding);
|
||||
Node* LowerStringToLowerCaseIntl(Node* node);
|
||||
Node* LowerStringToUpperCaseIntl(Node* node);
|
||||
Node* LowerStringFromCharCode(Node* node);
|
||||
@ -179,6 +181,7 @@ class V8_EXPORT_PRIVATE EffectControlLinearizer {
|
||||
Node* ChangeSmiToIntPtr(Node* value);
|
||||
Node* ChangeSmiToInt32(Node* value);
|
||||
Node* ObjectIsSmi(Node* value);
|
||||
Node* LoadFromString(Node* receiver, Node* position, Node* is_one_byte);
|
||||
|
||||
Node* SmiMaxValueConstant();
|
||||
Node* SmiShiftBitsConstant();
|
||||
|
@ -333,6 +333,8 @@
|
||||
V(StringCharAt) \
|
||||
V(StringCharCodeAt) \
|
||||
V(SeqStringCharCodeAt) \
|
||||
V(StringCodePointAt) \
|
||||
V(SeqStringCodePointAt) \
|
||||
V(StringFromCharCode) \
|
||||
V(StringFromCodePoint) \
|
||||
V(StringIndexOf) \
|
||||
|
@ -2353,6 +2353,22 @@ class RepresentationSelector {
|
||||
}
|
||||
return;
|
||||
}
|
||||
case IrOpcode::kStringCodePointAt: {
|
||||
Type* string_type = TypeOf(node->InputAt(0));
|
||||
if (string_type->Is(Type::SeqString())) {
|
||||
VisitBinop(node, UseInfo::AnyTagged(), UseInfo::TruncatingWord32(),
|
||||
MachineRepresentation::kWord32);
|
||||
if (lower()) {
|
||||
NodeProperties::ChangeOp(node,
|
||||
simplified()->SeqStringCodePointAt());
|
||||
}
|
||||
} else {
|
||||
// TODO(turbofan): Allow builtins to return untagged values.
|
||||
VisitBinop(node, UseInfo::AnyTagged(), UseInfo::TruncatingWord32(),
|
||||
MachineRepresentation::kTaggedSigned);
|
||||
}
|
||||
return;
|
||||
}
|
||||
case IrOpcode::kStringFromCharCode: {
|
||||
VisitUnop(node, UseInfo::TruncatingWord32(),
|
||||
MachineRepresentation::kTaggedPointer);
|
||||
|
@ -657,6 +657,8 @@ bool operator==(CheckMinusZeroParameters const& lhs,
|
||||
V(StringCharAt, Operator::kNoProperties, 2, 1) \
|
||||
V(StringCharCodeAt, Operator::kNoProperties, 2, 1) \
|
||||
V(SeqStringCharCodeAt, Operator::kNoProperties, 2, 1) \
|
||||
V(StringCodePointAt, Operator::kNoProperties, 2, 1) \
|
||||
V(SeqStringCodePointAt, Operator::kNoProperties, 2, 1) \
|
||||
V(StringFromCharCode, Operator::kNoProperties, 1, 0) \
|
||||
V(StringIndexOf, Operator::kNoProperties, 3, 0) \
|
||||
V(StringLength, Operator::kNoProperties, 1, 0) \
|
||||
|
@ -503,6 +503,8 @@ class V8_EXPORT_PRIVATE SimplifiedOperatorBuilder final
|
||||
const Operator* StringCharAt();
|
||||
const Operator* StringCharCodeAt();
|
||||
const Operator* SeqStringCharCodeAt();
|
||||
const Operator* StringCodePointAt();
|
||||
const Operator* SeqStringCodePointAt();
|
||||
const Operator* StringFromCharCode();
|
||||
const Operator* StringFromCodePoint(UnicodeEncoding encoding);
|
||||
const Operator* StringIndexOf();
|
||||
|
@ -1948,6 +1948,14 @@ Type* Typer::Visitor::TypeSeqStringCharCodeAt(Node* node) {
|
||||
return typer_->cache_.kUint16;
|
||||
}
|
||||
|
||||
Type* Typer::Visitor::TypeStringCodePointAt(Node* node) {
|
||||
return Type::Range(0.0, String::kMaxCodePoint, zone());
|
||||
}
|
||||
|
||||
Type* Typer::Visitor::TypeSeqStringCodePointAt(Node* node) {
|
||||
return Type::Range(0.0, String::kMaxCodePoint, zone());
|
||||
}
|
||||
|
||||
Type* Typer::Visitor::TypeStringFromCharCode(Node* node) {
|
||||
return TypeUnaryOp(node, StringFromCharCodeTyper);
|
||||
}
|
||||
|
@ -1054,6 +1054,18 @@ void Verifier::Visitor::Check(Node* node, const AllNodes& all) {
|
||||
CheckValueInputIs(node, 1, Type::Unsigned32());
|
||||
CheckTypeIs(node, Type::UnsignedSmall());
|
||||
break;
|
||||
case IrOpcode::kStringCodePointAt:
|
||||
// (String, Unsigned32) -> UnsignedSmall
|
||||
CheckValueInputIs(node, 0, Type::String());
|
||||
CheckValueInputIs(node, 1, Type::Unsigned32());
|
||||
CheckTypeIs(node, Type::UnsignedSmall());
|
||||
break;
|
||||
case IrOpcode::kSeqStringCodePointAt:
|
||||
// (String, Unsigned32) -> UnsignedSmall
|
||||
CheckValueInputIs(node, 0, Type::String());
|
||||
CheckValueInputIs(node, 1, Type::Unsigned32());
|
||||
CheckTypeIs(node, Type::UnsignedSmall());
|
||||
break;
|
||||
case IrOpcode::kStringFromCharCode:
|
||||
// Number -> String
|
||||
CheckValueInputIs(node, 0, Type::Number());
|
||||
|
@ -269,21 +269,7 @@ void StoreNamedTransitionDescriptor::InitializePlatformSpecific(
|
||||
data->InitializePlatformSpecific(len, registers);
|
||||
}
|
||||
|
||||
void StringCharAtDescriptor::InitializePlatformIndependent(
|
||||
CallInterfaceDescriptorData* data) {
|
||||
// kReceiver, kPosition
|
||||
MachineType machine_types[] = {MachineType::AnyTagged(),
|
||||
MachineType::IntPtr()};
|
||||
data->InitializePlatformIndependent(arraysize(machine_types), 0,
|
||||
machine_types);
|
||||
}
|
||||
|
||||
void StringCharAtDescriptor::InitializePlatformSpecific(
|
||||
CallInterfaceDescriptorData* data) {
|
||||
DefaultInitializePlatformSpecific(data, kParameterCount);
|
||||
}
|
||||
|
||||
void StringCharCodeAtDescriptor::InitializePlatformIndependent(
|
||||
void StringAtDescriptor::InitializePlatformIndependent(
|
||||
CallInterfaceDescriptorData* data) {
|
||||
// kReceiver, kPosition
|
||||
// TODO(turbofan): Allow builtins to return untagged values.
|
||||
@ -293,7 +279,7 @@ void StringCharCodeAtDescriptor::InitializePlatformIndependent(
|
||||
machine_types);
|
||||
}
|
||||
|
||||
void StringCharCodeAtDescriptor::InitializePlatformSpecific(
|
||||
void StringAtDescriptor::InitializePlatformSpecific(
|
||||
CallInterfaceDescriptorData* data) {
|
||||
DefaultInitializePlatformSpecific(data, kParameterCount);
|
||||
}
|
||||
|
@ -62,8 +62,7 @@ class PlatformInterfaceDescriptor;
|
||||
V(Compare) \
|
||||
V(BinaryOp) \
|
||||
V(StringAdd) \
|
||||
V(StringCharAt) \
|
||||
V(StringCharCodeAt) \
|
||||
V(StringAt) \
|
||||
V(ForInPrepare) \
|
||||
V(GetProperty) \
|
||||
V(ArgumentAdaptor) \
|
||||
@ -762,17 +761,12 @@ class StringAddDescriptor : public CallInterfaceDescriptor {
|
||||
DECLARE_DESCRIPTOR(StringAddDescriptor, CallInterfaceDescriptor)
|
||||
};
|
||||
|
||||
class StringCharAtDescriptor final : public CallInterfaceDescriptor {
|
||||
// This desciptor is shared among String.p.charAt/charCodeAt/codePointAt
|
||||
// as they all have the same interface.
|
||||
class StringAtDescriptor final : public CallInterfaceDescriptor {
|
||||
public:
|
||||
DEFINE_PARAMETERS(kReceiver, kPosition)
|
||||
DECLARE_DESCRIPTOR_WITH_CUSTOM_FUNCTION_TYPE(StringCharAtDescriptor,
|
||||
CallInterfaceDescriptor)
|
||||
};
|
||||
|
||||
class StringCharCodeAtDescriptor final : public CallInterfaceDescriptor {
|
||||
public:
|
||||
DEFINE_PARAMETERS(kReceiver, kPosition)
|
||||
DECLARE_DESCRIPTOR_WITH_CUSTOM_FUNCTION_TYPE(StringCharCodeAtDescriptor,
|
||||
DECLARE_DESCRIPTOR_WITH_CUSTOM_FUNCTION_TYPE(StringAtDescriptor,
|
||||
CallInterfaceDescriptor)
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user