From b71af5c38a2197d2fe8cbb0295f0c698712ee22e Mon Sep 17 00:00:00 2001 From: Igor Sheludko Date: Wed, 23 Oct 2019 11:01:59 +0200 Subject: [PATCH] [builtins] Use uintptr as index in String builtins, pt.1 The CL refactors the following builtins: String.prototype.startsWith String.prototype.endsWith to use ClampToIndexRange(x, len) instead of NumberMin(NumberMax(x, 0), len). Bug: v8:8996 Change-Id: I20ab42088168e517840385cc2db435361004d9c0 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1873702 Commit-Queue: Igor Sheludko Reviewed-by: Jakob Gruber Cr-Commit-Position: refs/heads/master@{#64500} --- src/builtins/base.tq | 9 +++++- src/builtins/builtins-regexp-gen.cc | 15 ++++++--- src/builtins/builtins-string-gen.cc | 8 +++-- src/builtins/string-endswith.tq | 34 +++++++++------------ src/builtins/string-startswith.tq | 24 +++++++-------- src/builtins/string.tq | 47 ++++++++++++++++++++--------- src/codegen/code-stub-assembler.cc | 19 ++++++------ src/codegen/code-stub-assembler.h | 6 ++-- src/ic/accessor-assembler.cc | 9 +++--- 9 files changed, 100 insertions(+), 71 deletions(-) diff --git a/src/builtins/base.tq b/src/builtins/base.tq index 9d747bb86f..05db520dfe 100644 --- a/src/builtins/base.tq +++ b/src/builtins/base.tq @@ -1409,7 +1409,10 @@ const kMaxTypedArrayInHeap: const kMaxSafeInteger: constexpr float64 generates 'kMaxSafeInteger'; const kSmiMaxValue: constexpr uintptr generates 'kSmiMaxValue'; const kSmiMax: uintptr = kSmiMaxValue; +// TODO(v8:8996): Use uintptr version instead and drop this one. const kStringMaxLength: constexpr int31 generates 'String::kMaxLength'; +const kStringMaxLengthUintptr: + constexpr uintptr generates 'String::kMaxLength'; const kFixedArrayMaxLength: constexpr int31 generates 'FixedArray::kMaxLength'; const kObjectAlignmentMask: constexpr intptr @@ -1962,7 +1965,7 @@ extern macro LoadBufferSmi(RawPtr, constexpr int32): Smi; extern runtime StringEqual(Context, String, String): Oddball; extern builtin StringLessThan(Context, String, String): Boolean; -extern macro StringCharCodeAt(String, intptr): int32; +extern macro StringCharCodeAt(String, uintptr): int32; extern runtime StringCompareSequence(Context, String, String, Number): Boolean; extern macro StringFromSingleCharCode(int32): String; @@ -2169,6 +2172,8 @@ extern macro TryIntPtrAdd(intptr, intptr): intptr labels Overflow; extern macro TryIntPtrSub(intptr, intptr): intptr labels Overflow; extern macro TryInt32Mul(int32, int32): int32 labels Overflow; +extern operator '<' macro ConstexprUintPtrLessThan( + constexpr uintptr, constexpr uintptr): constexpr bool; extern operator '<<' macro ConstexprUintPtrShl( constexpr uintptr, constexpr int31): constexpr uintptr; extern operator '>>>' macro ConstexprUintPtrShr( @@ -3685,6 +3690,7 @@ macro ConvertToRelativeIndex(indexNumber: Number, length: uintptr): uintptr { } } case (indexHeapNumber: HeapNumber): { + assert(IsNumberNormalized(indexHeapNumber)); const indexDouble: float64 = Convert(indexHeapNumber); // NaNs must already be handled by ConvertToRelativeIndex() version // above accepting JSAny indices. @@ -3726,6 +3732,7 @@ macro ClampToIndexRange(indexNumber: Number, limit: uintptr): uintptr { return index; } case (indexHeapNumber: HeapNumber): { + assert(IsNumberNormalized(indexHeapNumber)); const indexDouble: float64 = Convert(indexHeapNumber); // NaNs must already be handled by ClampToIndexRange() version // above accepting JSAny indices. diff --git a/src/builtins/builtins-regexp-gen.cc b/src/builtins/builtins-regexp-gen.cc index b32671527a..846d1849fd 100644 --- a/src/builtins/builtins-regexp-gen.cc +++ b/src/builtins/builtins-regexp-gen.cc @@ -1427,6 +1427,9 @@ TNode RegExpBuiltinsAssembler::AdvanceStringIndex( TNode index_plus_one = NumberInc(index); TVARIABLE(Number, var_result, index_plus_one); + // TODO(v8:9880): Given that we have to convert index from Number to UintPtrT + // anyway, consider using UintPtrT index to simplify the code below. + // Advancing the index has some subtle issues involving the distinction // between Smis and HeapNumbers. There's three cases: // * {index} is a Smi, {index_plus_one} is a Smi. The standard case. @@ -1451,16 +1454,18 @@ TNode RegExpBuiltinsAssembler::AdvanceStringIndex( BIND(&if_isunicode); { - TNode const string_length = LoadStringLengthAsWord(string); - TNode untagged_plus_one = SmiUntag(CAST(index_plus_one)); - GotoIfNot(IntPtrLessThan(untagged_plus_one, string_length), &out); + TNode string_length = Unsigned(LoadStringLengthAsWord(string)); + TNode untagged_plus_one = + Unsigned(SmiUntag(CAST(index_plus_one))); + GotoIfNot(UintPtrLessThan(untagged_plus_one, string_length), &out); - TNode const lead = StringCharCodeAt(string, SmiUntag(CAST(index))); + TNode lead = + StringCharCodeAt(string, Unsigned(SmiUntag(CAST(index)))); GotoIfNot(Word32Equal(Word32And(lead, Int32Constant(0xFC00)), Int32Constant(0xD800)), &out); - TNode const trail = StringCharCodeAt(string, untagged_plus_one); + TNode trail = StringCharCodeAt(string, untagged_plus_one); GotoIfNot(Word32Equal(Word32And(trail, Int32Constant(0xFC00)), Int32Constant(0xDC00)), &out); diff --git a/src/builtins/builtins-string-gen.cc b/src/builtins/builtins-string-gen.cc index 3b2749df59..4515f5c2cf 100644 --- a/src/builtins/builtins-string-gen.cc +++ b/src/builtins/builtins-string-gen.cc @@ -1977,13 +1977,14 @@ void StringTrimAssembler::GotoIfNotWhiteSpaceOrLineTerminator( // Return the |word32| codepoint at {index}. Supports SeqStrings and // ExternalStrings. +// TODO(v8:9880): Use UintPtrT here. TNode StringBuiltinsAssembler::LoadSurrogatePairAt( SloppyTNode string, SloppyTNode length, SloppyTNode index, UnicodeEncoding encoding) { Label handle_surrogate_pair(this), return_result(this); TVARIABLE(Int32T, var_result); TVARIABLE(Int32T, var_trail); - var_result = StringCharCodeAt(string, index); + var_result = StringCharCodeAt(string, Unsigned(index)); var_trail = Int32Constant(0); GotoIf(Word32NotEqual(Word32And(var_result.value(), Int32Constant(0xFC00)), @@ -1992,7 +1993,7 @@ TNode StringBuiltinsAssembler::LoadSurrogatePairAt( TNode next_index = IntPtrAdd(index, IntPtrConstant(1)); GotoIfNot(IntPtrLessThan(next_index, length), &return_result); - var_trail = StringCharCodeAt(string, next_index); + var_trail = StringCharCodeAt(string, Unsigned(next_index)); Branch(Word32Equal(Word32And(var_trail.value(), Int32Constant(0xFC00)), Int32Constant(0xDC00)), &handle_surrogate_pair, &return_result); @@ -2154,6 +2155,7 @@ TNode StringBuiltinsAssembler::AllocAndCopyStringCharacters( return var_result.value(); } +// TODO(v8:9880): Use UintPtrT here. TNode StringBuiltinsAssembler::SubString(TNode string, TNode from, TNode to) { @@ -2261,7 +2263,7 @@ TNode StringBuiltinsAssembler::SubString(TNode string, // Substrings of length 1 are generated through CharCodeAt and FromCharCode. BIND(&single_char); { - TNode char_code = StringCharCodeAt(string, from); + TNode char_code = StringCharCodeAt(string, Unsigned(from)); var_result = StringFromSingleCharCode(char_code); Goto(&end); } diff --git a/src/builtins/string-endswith.tq b/src/builtins/string-endswith.tq index 9590b853e7..d53d72758d 100644 --- a/src/builtins/string-endswith.tq +++ b/src/builtins/string-endswith.tq @@ -4,17 +4,15 @@ namespace string { macro TryFastStringCompareSequence( - string: String, searchStr: String, start: Number, - searchLength: Smi): Boolean labels Slow { + string: String, searchStr: String, start: uintptr, + searchLength: uintptr): Boolean labels Slow { const directString = Cast(string) otherwise Slow; const directSearchStr = Cast(searchStr) otherwise Slow; - const stringIndexSmi: Smi = Cast(start) otherwise Slow; - let searchIndex: intptr = 0; - let stringIndex = Convert(stringIndexSmi); - const searchLengthInteger = Convert(searchLength); + let searchIndex: uintptr = 0; + let stringIndex: uintptr = start; - while (searchIndex < searchLengthInteger) { + while (searchIndex < searchLength) { if (StringCharCodeAt(directSearchStr, searchIndex) != StringCharCodeAt(directString, stringIndex)) { return False; @@ -34,10 +32,8 @@ namespace string { const kBuiltinName: constexpr string = 'String.prototype.endsWith'; // 1. Let O be ? RequireObjectCoercible(this value). - const object: JSAny = RequireObjectCoercible(receiver, kBuiltinName); - // 2. Let S be ? ToString(O). - const string: String = ToString_Inline(context, object); + const string: String = ToThisString(receiver, kBuiltinName); // 3. Let isRegExp be ? IsRegExp(searchString). // 4. If isRegExp is true, throw a TypeError exception. @@ -49,25 +45,22 @@ namespace string { const searchStr: String = ToString_Inline(context, searchString); // 6. Let len be the length of S. - const len: Number = string.length_smi; + const len: uintptr = string.length_uintptr; // 7. If endPosition is undefined, let pos be len, // else let pos be ? ToInteger(endPosition). - const pos: Number = (endPosition == Undefined) ? - len : - ToInteger_Inline(context, endPosition); - // 8. Let end be min(max(pos, 0), len). - const end: Number = NumberMin(NumberMax(pos, 0), len); + const end: uintptr = + (endPosition != Undefined) ? ClampToIndexRange(endPosition, len) : len; // 9. Let searchLength be the length of searchStr. - const searchLength: Smi = searchStr.length_smi; + const searchLength: uintptr = searchStr.length_uintptr; // 10. Let start be end - searchLength. - const start = end - searchLength; + const start: uintptr = end - searchLength; // 11. If start is less than 0, return false. - if (start < 0) return False; + if (Signed(start) < 0) return False; // 12. If the sequence of code units of S starting at start of length // searchLength is the same as the full code unit sequence of searchStr, @@ -80,7 +73,8 @@ namespace string { } label Slow { // Slow Path: If either of the string is indirect, bail into runtime. - return StringCompareSequence(context, string, searchStr, start); + return StringCompareSequence( + context, string, searchStr, Convert(start)); } } } diff --git a/src/builtins/string-startswith.tq b/src/builtins/string-startswith.tq index 3238f52b86..32a1d0c1c6 100644 --- a/src/builtins/string-startswith.tq +++ b/src/builtins/string-startswith.tq @@ -13,10 +13,8 @@ namespace string { const kBuiltinName: constexpr string = 'String.prototype.startsWith'; // 1. Let O be ? RequireObjectCoercible(this value). - const object: JSAny = RequireObjectCoercible(receiver, kBuiltinName); - // 2. Let S be ? ToString(O). - const string: String = ToString_Inline(context, object); + const string: String = ToThisString(receiver, kBuiltinName); // 3. Let isRegExp be ? IsRegExp(searchString). // 4. If isRegExp is true, throw a TypeError exception. @@ -27,21 +25,22 @@ namespace string { // 5. Let searchStr be ? ToString(searchString). const searchStr: String = ToString_Inline(context, searchString); - // 6. Let pos be ? ToInteger(position). - const pos: Number = ToInteger_Inline(context, position); - - // 7. Assert: If position is undefined, then pos is 0. // 8. Let len be the length of S. - const len: Number = string.length_smi; + const len: uintptr = string.length_uintptr; + // 6. Let pos be ? ToInteger(position). + // 7. Assert: If position is undefined, then pos is 0. // 9. Let start be min(max(pos, 0), len). - const start: Number = NumberMin(NumberMax(pos, 0), len); + const start: uintptr = + (position != Undefined) ? ClampToIndexRange(position, len) : 0; // 10. Let searchLength be the length of searchStr. - const searchLength: Smi = searchStr.length_smi; + const searchLength: uintptr = searchStr.length_uintptr; // 11. If searchLength + start is greater than len, return false. - if (searchLength + start > len) return False; + // The comparison is rephrased to be overflow-friendly with unsigned + // indices. + if (searchLength > len - start) return False; // 12. If the sequence of code units of S starting at start of length // searchLength is the same as the full code unit sequence of searchStr, @@ -54,7 +53,8 @@ namespace string { } label Slow { // Slow Path: If either of the string is indirect, bail into runtime. - return StringCompareSequence(context, string, searchStr, start); + return StringCompareSequence( + context, string, searchStr, Convert(start)); } } } diff --git a/src/builtins/string.tq b/src/builtins/string.tq index 4f2c342fd5..dbee1c25f9 100644 --- a/src/builtins/string.tq +++ b/src/builtins/string.tq @@ -57,18 +57,36 @@ namespace string { transitioning macro GenerateStringAt(implicit context: Context)( receiver: JSAny, position: JSAny, methodName: constexpr string): never labels - IfInBounds(String, intptr, intptr), IfOutOfBounds { - // Check that {receiver} is coercible to Object and convert it to a String. + IfInBounds(String, uintptr, uintptr), IfOutOfBounds { + // 1. Let O be ? RequireObjectCoercible(this value). + // 2. Let S be ? ToString(O). const string: String = ToThisString(receiver, methodName); - // Convert the {position} to a Smi and check that it's in bounds of - // the {string}. + + // 3. Let position be ? ToInteger(pos). const indexNumber: Number = ToInteger_Inline(context, position, kTruncateMinusZero); - if (TaggedIsNotSmi(indexNumber)) goto IfOutOfBounds; - const index: intptr = SmiUntag(UnsafeCast(indexNumber)); - const length: intptr = string.length_intptr; - if (Convert(index) >= Convert(length)) goto IfOutOfBounds; - goto IfInBounds(string, index, length); + + // Convert the {position} to a uintptr and check that it's in bounds of + // the {string}. + typeswitch (indexNumber) { + case (indexSmi: Smi): { + const length: uintptr = string.length_uintptr; + const index: uintptr = Unsigned(Convert(indexSmi)); + // Max string length fits Smi range, so we can do an unsigned bounds + // check. + const kMaxStringLengthFitsSmi: constexpr bool = + kStringMaxLengthUintptr < kSmiMaxValue; + StaticAssert(kMaxStringLengthFitsSmi); + if (index >= length) goto IfOutOfBounds; + goto IfInBounds(string, index, length); + } + case (indexHeapNumber: HeapNumber): { + assert(IsNumberNormalized(indexHeapNumber)); + // Valid string indices fit into Smi range, so HeapNumber index is + // definitely an out of bounds case. + goto IfOutOfBounds; + } + } } // ES6 #sec-string.prototype.charat @@ -78,7 +96,7 @@ namespace string { GenerateStringAt(receiver, position, 'String.prototype.charAt') otherwise IfInBounds, IfOutOfBounds; } - label IfInBounds(string: String, index: intptr, _length: intptr) { + label IfInBounds(string: String, index: uintptr, _length: uintptr) { const code: int32 = StringCharCodeAt(string, index); return StringFromSingleCharCode(code); } @@ -94,7 +112,7 @@ namespace string { GenerateStringAt(receiver, position, 'String.prototype.charCodeAt') otherwise IfInBounds, IfOutOfBounds; } - label IfInBounds(string: String, index: intptr, _length: intptr) { + label IfInBounds(string: String, index: uintptr, _length: uintptr) { const code: int32 = StringCharCodeAt(string, index); return Convert(code); } @@ -110,10 +128,11 @@ namespace string { GenerateStringAt(receiver, position, 'String.prototype.codePointAt') otherwise IfInBounds, IfOutOfBounds; } - label IfInBounds(string: String, index: intptr, length: intptr) { + label IfInBounds(string: String, index: uintptr, length: uintptr) { // This is always a call to a builtin from Javascript, so we need to // produce UTF32. - const code: int32 = LoadSurrogatePairAt(string, length, index, UTF32); + const code: int32 = + LoadSurrogatePairAt(string, Signed(length), Signed(index), UTF32); return Convert(code); } label IfOutOfBounds { @@ -190,7 +209,7 @@ namespace string { } builtin StringCharAt(implicit context: Context)( - receiver: String, position: intptr): String { + receiver: String, position: uintptr): String { // Load the character code at the {position} from the {receiver}. const code: int32 = StringCharCodeAt(receiver, position); // And return the single character string with only that {code} diff --git a/src/codegen/code-stub-assembler.cc b/src/codegen/code-stub-assembler.cc index bd02d4a6eb..f7c7b5de89 100644 --- a/src/codegen/code-stub-assembler.cc +++ b/src/codegen/code-stub-assembler.cc @@ -6765,12 +6765,9 @@ Node* CodeStubAssembler::FixedArraySizeDoesntFitInNewSpace(Node* element_count, element_count, IntPtrOrSmiConstant(max_newspace_elements, mode), mode); } -TNode CodeStubAssembler::StringCharCodeAt(SloppyTNode string, - SloppyTNode index) { - CSA_ASSERT(this, IsString(string)); - - CSA_ASSERT(this, IntPtrGreaterThanOrEqual(index, IntPtrConstant(0))); - CSA_ASSERT(this, IntPtrLessThan(index, LoadStringLengthAsWord(string))); +TNode CodeStubAssembler::StringCharCodeAt(TNode string, + TNode index) { + CSA_ASSERT(this, UintPtrLessThan(index, LoadStringLengthAsWord(string))); TVARIABLE(Int32T, var_result); @@ -6779,7 +6776,8 @@ TNode CodeStubAssembler::StringCharCodeAt(SloppyTNode string, ToDirectStringAssembler to_direct(state(), string); to_direct.TryToDirect(&if_runtime); - TNode const offset = IntPtrAdd(index, to_direct.offset()); + TNode const offset = + UintPtrAdd(index, Unsigned(to_direct.offset())); TNode const instance_type = to_direct.instance_type(); TNode const string_data = to_direct.PointerToData(&if_runtime); @@ -6804,8 +6802,9 @@ TNode CodeStubAssembler::StringCharCodeAt(SloppyTNode string, BIND(&if_runtime); { - TNode result = CallRuntime( - Runtime::kStringCharCodeAt, NoContextConstant(), string, SmiTag(index)); + TNode result = + CallRuntime(Runtime::kStringCharCodeAt, NoContextConstant(), string, + ChangeUintPtrToTagged(index)); var_result = SmiToInt32(CAST(result)); Goto(&return_result); } @@ -9395,7 +9394,7 @@ void CodeStubAssembler::BranchIfMaybeSpecialIndex(TNode name_string, // If the first character of name is not a digit or '-', or we can't match it // to Infinity or NaN, then this is not a special index. - TNode first_char = StringCharCodeAt(name_string, IntPtrConstant(0)); + TNode first_char = StringCharCodeAt(name_string, UintPtrConstant(0)); // If the name starts with '-', it can be a negative index. GotoIf(Word32Equal(first_char, Int32Constant('-')), if_maybe_special_index); // If the name starts with 'I', it can be "Infinity". diff --git a/src/codegen/code-stub-assembler.h b/src/codegen/code-stub-assembler.h index 290f1108d2..1abf396d52 100644 --- a/src/codegen/code-stub-assembler.h +++ b/src/codegen/code-stub-assembler.h @@ -2606,8 +2606,7 @@ class V8_EXPORT_PRIVATE CodeStubAssembler // String helpers. // Load a character from a String (might flatten a ConsString). - TNode StringCharCodeAt(SloppyTNode string, - SloppyTNode index); + TNode StringCharCodeAt(TNode string, TNode index); // Return the single character string with only {code}. TNode StringFromSingleCharCode(TNode code); @@ -3581,6 +3580,8 @@ class V8_EXPORT_PRIVATE CodeStubAssembler return val; } + bool ConstexprUintPtrLessThan(uintptr_t a, uintptr_t b) { return a < b; } + void PerformStackCheck(TNode context); void SetPropertyLength(TNode context, TNode array, @@ -3961,6 +3962,7 @@ class ToDirectStringAssembler : public CodeStubAssembler { TVariable var_string_; TVariable var_instance_type_; + // TODO(v8:9880): Use UintPtrT here. TVariable var_offset_; TVariable var_is_external_; diff --git a/src/ic/accessor-assembler.cc b/src/ic/accessor-assembler.cc index 835e5dcb9d..662360c697 100644 --- a/src/ic/accessor-assembler.cc +++ b/src/ic/accessor-assembler.cc @@ -397,10 +397,11 @@ void AccessorAssembler::HandleLoadICSmiHandlerCase( Comment("indexed string"); TNode string_holder = CAST(holder); - TNode intptr_index = TryToIntptr(p->name(), miss); - TNode length = LoadStringLengthAsWord(string_holder); - GotoIf(UintPtrGreaterThanOrEqual(intptr_index, length), &if_oob); - TNode code = StringCharCodeAt(string_holder, intptr_index); + TNode index = Unsigned(TryToIntptr(p->name(), miss)); + TNode length = + Unsigned(LoadStringLengthAsWord(string_holder)); + GotoIf(UintPtrGreaterThanOrEqual(index, length), &if_oob); + TNode code = StringCharCodeAt(string_holder, index); TNode result = StringFromSingleCharCode(code); Return(result);