From c0ba94db41d3480163f501990da0eeda7ce33f20 Mon Sep 17 00:00:00 2001 From: Darius M Date: Fri, 16 Sep 2022 09:18:11 +0200 Subject: [PATCH] [compiler] fix bug with string concatenation folding We can't freely concatenate strings in the background because they could be mutated by the main thread (eg, flattened, internalized, externalized...). So, when there is a JSAdd between 2 constant strings, we first checked if they are "safe" (= internalized, I think), and if so, we concatenate them at compile time. If they are "unsafe", then we don't. It turns out that this wasn't an issue with delayed constant strings, since the content of the strings were never accessed: the actual concatenations were done on the main thread, where it's safe to do. This CL fixes that for most cases: - if the strings really cannot be read from the background, but the length of their concatenation is more than ConsString::kMinLength, then we create a ConsString. - I added a set to record which strings we created in the turbofan: those strings can safely be accessed from turbofan regardless of their type. The only case where delayed constant strings could be a bit better is when there is a concatenation of 2 small non-internalized string, because right now, we wouldn't fold it. Still, it should happen very rarely, if ever. Bug: chromium:1359941 Change-Id: I651b834273de89f1e3c60654094a4606dd9c62f0 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3891252 Reviewed-by: Tobias Tebbi Commit-Queue: Darius Mercadier Cr-Commit-Position: refs/heads/main@{#83251} --- .../js-native-context-specialization.cc | 112 ++++++++++++++---- .../js-native-context-specialization.h | 15 +++ 2 files changed, 107 insertions(+), 20 deletions(-) diff --git a/src/compiler/js-native-context-specialization.cc b/src/compiler/js-native-context-specialization.cc index 05dcf59096..cc1fe26b6f 100644 --- a/src/compiler/js-native-context-specialization.cc +++ b/src/compiler/js-native-context-specialization.cc @@ -65,7 +65,8 @@ JSNativeContextSpecialization::JSNativeContextSpecialization( dependencies_(dependencies), zone_(zone), shared_zone_(shared_zone), - type_cache_(TypeCache::Get()) {} + type_cache_(TypeCache::Get()), + created_strings_(zone) {} Reduction JSNativeContextSpecialization::Reduce(Node* node) { switch (node->opcode()) { @@ -135,7 +136,6 @@ base::Optional JSNativeContextSpecialization::GetMaxStringLength( HeapObjectMatcher matcher(node); if (matcher.HasResolvedValue() && matcher.Ref(broker).IsString()) { StringRef input = matcher.Ref(broker).AsString(); - if (!input.IsContentAccessible()) return base::nullopt; return input.length(); } @@ -196,6 +196,13 @@ Handle JSNativeContextSpecialization::CreateStringConstant(Node* node) { ->local_isolate_or_isolate() ->factory() ->NewNumber(number_matcher.ResolvedValue()); + // Note that we do not store the result of NumberToString in + // {created_strings_}, because the latter is used to know if strings are + // safe to be used in the background, but we always have as additional + // information the node from which the string was created ({node} is that + // case), and if this node is a kHeapNumber, then we know that we must have + // created the string, and that there it is safe to read. So, we don't need + // {created_strings_} in that case. return broker()->local_isolate_or_isolate()->factory()->NumberToString( num_obj); } else { @@ -213,6 +220,15 @@ bool IsStringConstant(JSHeapBroker* broker, Node* node) { HeapObjectMatcher matcher(node); return matcher.HasResolvedValue() && matcher.Ref(broker).IsString(); } + +bool IsStringWithNonAccessibleContent(JSHeapBroker* broker, Node* node) { + HeapObjectMatcher matcher(node); + if (matcher.HasResolvedValue() && matcher.Ref(broker).IsString()) { + StringRef input = matcher.Ref(broker).AsString(); + return !input.IsContentAccessible(); + } + return false; +} } // namespace Reduction JSNativeContextSpecialization::ReduceJSAsyncFunctionEnter( @@ -321,16 +337,14 @@ Reduction JSNativeContextSpecialization::ReduceJSAsyncFunctionResolve( return Replace(promise); } -namespace { - // Concatenates {left} and {right}. The result is fairly similar to creating a // new ConsString with {left} and {right} and then flattening it, which we don't // do because String::Flatten does not support background threads. Rather than // implementing a full String::Flatten for background threads, we prefered to // implement this Concatenate function, which, unlike String::Flatten, doesn't // need to replace ConsStrings by ThinStrings. -Handle Concatenate(Handle left, Handle right, - JSHeapBroker* broker) { +Handle JSNativeContextSpecialization::Concatenate( + Handle left, Handle right) { if (left->length() == 0) return right; if (right->length() == 0) return left; @@ -357,7 +371,8 @@ Handle Concatenate(Handle left, Handle right, // generational write-barrier supports background threads. if (!LocalHeap::Current() || (!ObjectInYoungGeneration(*left) && !ObjectInYoungGeneration(*right))) { - return broker->local_isolate_or_isolate() + return broker() + ->local_isolate_or_isolate() ->factory() ->NewConsString(left, right, AllocationType::kOld) .ToHandleChecked(); @@ -367,19 +382,24 @@ Handle Concatenate(Handle left, Handle right, // If one of the string is not in readonly space, then we need a // SharedStringAccessGuardIfNeeded before accessing its content. bool require_guard = SharedStringAccessGuardIfNeeded::IsNeeded( - *left, broker->local_isolate_or_isolate()) || + *left, broker()->local_isolate_or_isolate()) || SharedStringAccessGuardIfNeeded::IsNeeded( - *right, broker->local_isolate_or_isolate()); + *right, broker()->local_isolate_or_isolate()); SharedStringAccessGuardIfNeeded access_guard( - require_guard ? broker->local_isolate_or_isolate() : nullptr); + require_guard ? broker()->local_isolate_or_isolate() : nullptr); if (left->IsOneByteRepresentation() && right->IsOneByteRepresentation()) { // {left} and {right} are 1-byte ==> the result will be 1-byte. - Handle flat = - broker->local_isolate_or_isolate() + // Note that we need a canonical handle, because we insert in + // {created_strings_} the handle's address, which is kinda meaningless if + // the handle isn't canonical. + Handle flat = broker()->CanonicalPersistentHandle( + broker() + ->local_isolate_or_isolate() ->factory() ->NewRawOneByteString(length, AllocationType::kOld) - .ToHandleChecked(); + .ToHandleChecked()); + created_strings_.insert(flat); DisallowGarbageCollection no_gc; String::WriteToFlat(*left, flat->GetChars(no_gc, access_guard), 0, left->length(), GetPtrComprCageBase(*left), @@ -391,11 +411,13 @@ Handle Concatenate(Handle left, Handle right, } else { // One (or both) of {left} and {right} is 2-byte ==> the result will be // 2-byte. - Handle flat = - broker->local_isolate_or_isolate() + Handle flat = broker()->CanonicalPersistentHandle( + broker() + ->local_isolate_or_isolate() ->factory() ->NewRawTwoByteString(length, AllocationType::kOld) - .ToHandleChecked(); + .ToHandleChecked()); + created_strings_.insert(flat); DisallowGarbageCollection no_gc; String::WriteToFlat(*left, flat->GetChars(no_gc, access_guard), 0, left->length(), GetPtrComprCageBase(*left), @@ -407,7 +429,22 @@ Handle Concatenate(Handle left, Handle right, } } -} // namespace +bool JSNativeContextSpecialization::StringCanSafelyBeRead(Node* const node, + Handle str) { + DCHECK(node->opcode() == IrOpcode::kHeapConstant || + node->opcode() == IrOpcode::kNumberConstant); + if (broker()->IsMainThread()) { + // All strings are safe to be read on the main thread. + return true; + } + if (node->opcode() == IrOpcode::kNumberConstant) { + // If {node} is a number constant, then {str} is the stringification of this + // number which we must have created ourselves. + return true; + } + return !IsStringWithNonAccessibleContent(broker(), node) || + created_strings_.find(str) != created_strings_.end(); +} Reduction JSNativeContextSpecialization::ReduceJSAdd(Node* node) { // TODO(turbofan): This has to run together with the inlining and @@ -427,10 +464,45 @@ Reduction JSNativeContextSpecialization::ReduceJSAdd(Node* node) { // addition won't throw due to too long result. if (*lhs_len + *rhs_len <= String::kMaxLength && (IsStringConstant(broker(), lhs) || IsStringConstant(broker(), rhs))) { - Handle left = CreateStringConstant(lhs); - Handle right = CreateStringConstant(rhs); + // We need canonical handles for {left} and {right}, in order to be able to + // search {created_strings_} if needed. + Handle left = + broker()->CanonicalPersistentHandle(CreateStringConstant(lhs)); + Handle right = + broker()->CanonicalPersistentHandle(CreateStringConstant(rhs)); - Handle concatenated = Concatenate(left, right, broker()); + if (!(StringCanSafelyBeRead(lhs, left) && + StringCanSafelyBeRead(rhs, right))) { + // One of {lhs} or {rhs} is not safe to be read in the background. + + if (left->length() + right->length() > ConsString::kMinLength && + (!LocalHeap::Current() || (!ObjectInYoungGeneration(*left) && + !ObjectInYoungGeneration(*right)))) { + // We can create a ConsString with {left} and {right}, without needing + // to read their content (and this ConsString will not introduce + // old-to-new pointers from the background). + Handle concatenated = + broker() + ->local_isolate_or_isolate() + ->factory() + ->NewConsString(left, right, AllocationType::kOld) + .ToHandleChecked(); + Node* reduced = graph()->NewNode(common()->HeapConstant( + broker()->CanonicalPersistentHandle(concatenated))); + ReplaceWithValue(node, reduced); + return Replace(reduced); + } else { + // Concatenating those strings would not produce a ConsString but rather + // a flat string (because the result is small). And, since the strings + // are not safe to be read in the background, this wouldn't be safe. + // Or, one of the string is in the young generation, and since the + // generational barrier doesn't support background threads, we cannot + // create the ConsString. + return NoChange(); + } + } + + Handle concatenated = Concatenate(left, right); Node* reduced = graph()->NewNode(common()->HeapConstant( broker()->CanonicalPersistentHandle(concatenated))); diff --git a/src/compiler/js-native-context-specialization.h b/src/compiler/js-native-context-specialization.h index 4996cec82e..36db56416d 100644 --- a/src/compiler/js-native-context-specialization.h +++ b/src/compiler/js-native-context-specialization.h @@ -10,6 +10,7 @@ #include "src/compiler/graph-reducer.h" #include "src/compiler/js-heap-broker.h" #include "src/deoptimizer/deoptimize-reason.h" +#include "src/zone/zone-containers.h" namespace v8 { namespace internal { @@ -205,6 +206,17 @@ class V8_EXPORT_PRIVATE JSNativeContextSpecialization final Node* BuildCheckEqualsName(NameRef const& name, Node* value, Node* effect, Node* control); + // Concatenates {left} and {right}. + Handle Concatenate(Handle left, Handle right); + + // Returns true if {str} can safely be read: + // - if we are on the main thread, then any string can safely be read + // - in the background, we can only read some string shapes, except if we + // created the string ourselves. + // {node} is the node from which we got {str}, but which is still taken as + // parameter to simplify the checks. + bool StringCanSafelyBeRead(Node* const node, Handle str); + // Checks if we can turn the hole into undefined when loading an element // from an object with one of the {receiver_maps}; sets up appropriate // code dependencies and might use the array protector cell. @@ -264,6 +276,9 @@ class V8_EXPORT_PRIVATE JSNativeContextSpecialization final Zone* const zone_; Zone* const shared_zone_; TypeCache const* type_cache_; + ZoneUnorderedSet, Handle::hash, + Handle::equal_to> + created_strings_; }; DEFINE_OPERATORS_FOR_FLAGS(JSNativeContextSpecialization::Flags)