From af51befe694fe039db3554d4b9165f7d6baceb77 Mon Sep 17 00:00:00 2001 From: jkummerow Date: Tue, 10 Jan 2017 02:58:03 -0800 Subject: [PATCH] Internalize strings in-place using newly introduced ThinStrings, which store a pointer to the actual, internalized string they represent. BUG=v8:4520 Review-Url: https://codereview.chromium.org/2549773002 Cr-Commit-Position: refs/heads/master@{#42168} --- include/v8.h | 8 +- src/arm/code-stubs-arm.cc | 16 +++- src/arm/codegen-arm.cc | 12 ++- src/arm64/code-stubs-arm64.cc | 26 ++++-- src/arm64/codegen-arm64.cc | 13 ++- src/ast/ast-types.cc | 2 + src/builtins/builtins-object.cc | 32 ++++--- src/code-stub-assembler.cc | 110 +++++++++++++++++----- src/code-stub-assembler.h | 3 +- src/code-stubs.cc | 2 +- src/compiler/access-builder.cc | 9 ++ src/compiler/access-builder.h | 3 + src/compiler/types.cc | 2 + src/elements.cc | 9 +- src/factory.cc | 70 ++++++++++---- src/factory.h | 5 + src/heap/heap-inl.h | 3 + src/heap/heap.cc | 55 ++++++++--- src/heap/heap.h | 6 +- src/heap/mark-compact.cc | 8 +- src/heap/objects-visiting-inl.h | 8 ++ src/heap/objects-visiting.cc | 3 + src/heap/objects-visiting.h | 1 + src/heap/scavenger.cc | 27 ++++++ src/ia32/code-stubs-ia32.cc | 12 ++- src/ia32/codegen-ia32.cc | 14 ++- src/ia32/macro-assembler-ia32.cc | 8 +- src/ic/accessor-assembler.cc | 8 +- src/ic/ic.cc | 2 + src/ic/keyed-store-generic.cc | 10 +- src/mips/code-stubs-mips.cc | 15 ++- src/mips/codegen-mips.cc | 12 ++- src/mips64/code-stubs-mips64.cc | 15 ++- src/mips64/codegen-mips64.cc | 12 ++- src/objects-body-descriptors-inl.h | 2 + src/objects-debug.cc | 12 ++- src/objects-inl.h | 23 +++++ src/objects-printer.cc | 2 + src/objects.cc | 118 ++++++++++++++++++++---- src/objects.h | 73 +++++++++++---- src/ppc/code-stubs-ppc.cc | 12 ++- src/ppc/codegen-ppc.cc | 17 +++- src/profiler/heap-snapshot-generator.cc | 4 + src/regexp/regexp-macro-assembler.cc | 4 + src/runtime/runtime-i18n.cc | 4 +- src/runtime/runtime-internal.cc | 2 + src/runtime/runtime-object.cc | 8 ++ src/runtime/runtime-regexp.cc | 3 + src/runtime/runtime-strings.cc | 3 + src/s390/code-stubs-s390.cc | 12 ++- src/s390/codegen-s390.cc | 17 +++- src/value-serializer.cc | 11 ++- src/x64/code-stubs-x64.cc | 12 ++- src/x64/codegen-x64.cc | 14 ++- src/x64/macro-assembler-x64.cc | 8 +- src/x87/code-stubs-x87.cc | 12 ++- src/x87/codegen-x87.cc | 14 ++- src/x87/macro-assembler-x87.cc | 8 +- test/cctest/test-code-stub-assembler.cc | 47 +++++++--- test/cctest/test-strings.cc | 25 +++++ test/mjsunit/thin-strings.js | 38 ++++++++ 61 files changed, 827 insertions(+), 219 deletions(-) create mode 100644 test/mjsunit/thin-strings.js diff --git a/include/v8.h b/include/v8.h index fa238742d5..f7e2b376a3 100644 --- a/include/v8.h +++ b/include/v8.h @@ -2313,7 +2313,7 @@ class V8_EXPORT String : public Name { enum Encoding { UNKNOWN_ENCODING = 0x1, TWO_BYTE_ENCODING = 0x0, - ONE_BYTE_ENCODING = 0x4 + ONE_BYTE_ENCODING = 0x8 }; /** * Returns the number of characters in this string. @@ -8406,10 +8406,10 @@ class Internals { static const int kFixedArrayHeaderSize = 2 * kApiPointerSize; static const int kContextHeaderSize = 2 * kApiPointerSize; static const int kContextEmbedderDataIndex = 5; - static const int kFullStringRepresentationMask = 0x07; - static const int kStringEncodingMask = 0x4; + static const int kFullStringRepresentationMask = 0x0f; + static const int kStringEncodingMask = 0x8; static const int kExternalTwoByteRepresentationTag = 0x02; - static const int kExternalOneByteRepresentationTag = 0x06; + static const int kExternalOneByteRepresentationTag = 0x0a; static const int kIsolateEmbedderDataOffset = 0 * kApiPointerSize; static const int kExternalMemoryOffset = 4 * kApiPointerSize; diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc index 9ad4a64457..fd52b27082 100644 --- a/src/arm/code-stubs-arm.cc +++ b/src/arm/code-stubs-arm.cc @@ -1298,7 +1298,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (6) External string. Make it, offset-wise, look like a sequential string. // Go to (4). // (7) Short external string or not a string? If yes, bail out to runtime. - // (8) Sliced string. Replace subject with parent. Go to (1). + // (8) Sliced or thin string. Replace subject with parent. Go to (1). Label seq_string /* 4 */, external_string /* 6 */, check_underlying /* 1 */, not_seq_nor_cons /* 5 */, not_long_external /* 7 */; @@ -1320,6 +1320,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (2) Sequential or cons? If not, go to (5). STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); + STATIC_ASSERT(kThinStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ cmp(r1, Operand(kExternalStringTag)); @@ -1347,10 +1348,10 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ b(ls, &runtime); __ SmiUntag(r1); - STATIC_ASSERT(4 == kOneByteStringTag); + STATIC_ASSERT(8 == kOneByteStringTag); STATIC_ASSERT(kTwoByteStringTag == 0); __ and_(r0, r0, Operand(kStringEncodingMask)); - __ mov(r3, Operand(r0, ASR, 2), SetCC); + __ mov(r3, Operand(r0, ASR, 3), SetCC); __ ldr(r6, FieldMemOperand(regexp_data, JSRegExp::kDataOneByteCodeOffset), ne); __ ldr(r6, FieldMemOperand(regexp_data, JSRegExp::kDataUC16CodeOffset), eq); @@ -1584,12 +1585,19 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ tst(r1, Operand(kIsNotStringMask | kShortExternalStringMask)); __ b(ne, &runtime); - // (8) Sliced string. Replace subject with parent. Go to (4). + // (8) Sliced or thin string. Replace subject with parent. Go to (4). + Label thin_string; + __ cmp(r1, Operand(kThinStringTag)); + __ b(eq, &thin_string); // Load offset into r9 and replace subject string with parent. __ ldr(r9, FieldMemOperand(subject, SlicedString::kOffsetOffset)); __ SmiUntag(r9); __ ldr(subject, FieldMemOperand(subject, SlicedString::kParentOffset)); __ jmp(&check_underlying); // Go to (4). + + __ bind(&thin_string); + __ ldr(subject, FieldMemOperand(subject, ThinString::kActualOffset)); + __ jmp(&check_underlying); // Go to (4). #endif // V8_INTERPRETED_REGEXP } diff --git a/src/arm/codegen-arm.cc b/src/arm/codegen-arm.cc index 06e92168b6..2152d6837a 100644 --- a/src/arm/codegen-arm.cc +++ b/src/arm/codegen-arm.cc @@ -332,9 +332,12 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ b(eq, &check_sequential); // Dispatch on the indirect string shape: slice or cons. - Label cons_string; - __ tst(result, Operand(kSlicedNotConsMask)); + Label cons_string, thin_string; + __ and_(result, result, Operand(kStringRepresentationMask)); + __ cmp(result, Operand(kConsStringTag)); __ b(eq, &cons_string); + __ cmp(result, Operand(kThinStringTag)); + __ b(eq, &thin_string); // Handle slices. Label indirect_string_loaded; @@ -343,6 +346,11 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ add(index, index, Operand::SmiUntag(result)); __ jmp(&indirect_string_loaded); + // Handle thin strings. + __ bind(&thin_string); + __ ldr(string, FieldMemOperand(string, ThinString::kActualOffset)); + __ jmp(&indirect_string_loaded); + // Handle cons strings. // Check whether the right hand side is the empty string (i.e. if // this is really a flat string in a cons string). If that is not diff --git a/src/arm64/code-stubs-arm64.cc b/src/arm64/code-stubs-arm64.cc index c9718cab73..f61df949b0 100644 --- a/src/arm64/code-stubs-arm64.cc +++ b/src/arm64/code-stubs-arm64.cc @@ -1445,7 +1445,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (6) External string. Make it, offset-wise, look like a sequential string. // Go to (4). // (7) Short external string or not a string? If yes, bail out to runtime. - // (8) Sliced string. Replace subject with parent. Go to (1). + // (8) Sliced or thin string. Replace subject with parent. Go to (1). Label check_underlying; // (1) Label seq_string; // (4) @@ -1479,6 +1479,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (2) Sequential or cons? If not, go to (5). STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); + STATIC_ASSERT(kThinStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ Cmp(string_representation, kExternalStringTag); @@ -1506,10 +1507,10 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // before entering the exit frame. __ SmiUntag(x1, x10); - // The third bit determines the string encoding in string_type. - STATIC_ASSERT(kOneByteStringTag == 0x04); + // The fourth bit determines the string encoding in string_type. + STATIC_ASSERT(kOneByteStringTag == 0x08); STATIC_ASSERT(kTwoByteStringTag == 0x00); - STATIC_ASSERT(kStringEncodingMask == 0x04); + STATIC_ASSERT(kStringEncodingMask == 0x08); // Find the code object based on the assumptions above. // kDataOneByteCodeOffset and kDataUC16CodeOffset are adjacent, adds an offset @@ -1517,7 +1518,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { STATIC_ASSERT(JSRegExp::kDataOneByteCodeOffset + kPointerSize == JSRegExp::kDataUC16CodeOffset); __ Mov(x10, kPointerSize); - // We will need the encoding later: Latin1 = 0x04 + // We will need the encoding later: Latin1 = 0x08 // UC16 = 0x00 __ Ands(string_encoding, string_type, kStringEncodingMask); __ CzeroX(x10, ne); @@ -1565,10 +1566,10 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ Ldr(length, UntagSmiFieldMemOperand(subject, String::kLengthOffset)); // Handle UC16 encoding, two bytes make one character. - // string_encoding: if Latin1: 0x04 + // string_encoding: if Latin1: 0x08 // if UC16: 0x00 - STATIC_ASSERT(kStringEncodingMask == 0x04); - __ Ubfx(string_encoding, string_encoding, 2, 1); + STATIC_ASSERT(kStringEncodingMask == 0x08); + __ Ubfx(string_encoding, string_encoding, 3, 1); __ Eor(string_encoding, string_encoding, 1); // string_encoding: if Latin1: 0 // if UC16: 1 @@ -1781,11 +1782,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { kShortExternalStringMask | kIsNotStringMask, &runtime); - // (8) Sliced string. Replace subject with parent. + // (8) Sliced or thin string. Replace subject with parent. + Label thin_string; + __ Cmp(string_representation, kThinStringTag); + __ B(eq, &thin_string); __ Ldr(sliced_string_offset, UntagSmiFieldMemOperand(subject, SlicedString::kOffsetOffset)); __ Ldr(subject, FieldMemOperand(subject, SlicedString::kParentOffset)); __ B(&check_underlying); // Go to (1). + + __ bind(&thin_string); + __ Ldr(subject, FieldMemOperand(subject, ThinString::kActualOffset)); + __ B(&check_underlying); // Go to (1). #endif } diff --git a/src/arm64/codegen-arm64.cc b/src/arm64/codegen-arm64.cc index e6ddcfadb8..10dc83bb6e 100644 --- a/src/arm64/codegen-arm64.cc +++ b/src/arm64/codegen-arm64.cc @@ -108,8 +108,12 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ TestAndBranchIfAllClear(result, kIsIndirectStringMask, &check_sequential); // Dispatch on the indirect string shape: slice or cons. - Label cons_string; - __ TestAndBranchIfAllClear(result, kSlicedNotConsMask, &cons_string); + Label cons_string, thin_string; + __ And(result, result, kStringRepresentationMask); + __ Cmp(result, kConsStringTag); + __ B(eq, &cons_string); + __ Cmp(result, kThinStringTag); + __ B(eq, &thin_string); // Handle slices. Label indirect_string_loaded; @@ -119,6 +123,11 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ Add(index, index, result.W()); __ B(&indirect_string_loaded); + // Handle thin strings. + __ Bind(&thin_string); + __ Ldr(string, FieldMemOperand(string, ThinString::kActualOffset)); + __ B(&indirect_string_loaded); + // Handle cons strings. // Check whether the right hand side is the empty string (i.e. if // this is really a flat string in a cons string). If that is not diff --git a/src/ast/ast-types.cc b/src/ast/ast-types.cc index 83879215fc..c50ae27291 100644 --- a/src/ast/ast-types.cc +++ b/src/ast/ast-types.cc @@ -157,6 +157,8 @@ AstType::bitset AstBitsetType::Lub(i::Map* map) { case ONE_BYTE_STRING_TYPE: case CONS_STRING_TYPE: case CONS_ONE_BYTE_STRING_TYPE: + case THIN_STRING_TYPE: + case THIN_ONE_BYTE_STRING_TYPE: case SLICED_STRING_TYPE: case SLICED_ONE_BYTE_STRING_TYPE: case EXTERNAL_STRING_TYPE: diff --git a/src/builtins/builtins-object.cc b/src/builtins/builtins-object.cc index 6b2d586510..f216508c23 100644 --- a/src/builtins/builtins-object.cc +++ b/src/builtins/builtins-object.cc @@ -37,24 +37,26 @@ void Builtins::Generate_ObjectHasOwnProperty( Node* map = assembler.LoadMap(object); Node* instance_type = assembler.LoadMapInstanceType(map); - Variable var_index(&assembler, MachineType::PointerRepresentation()); + { + Variable var_index(&assembler, MachineType::PointerRepresentation()); + Variable var_unique(&assembler, MachineRepresentation::kTagged); - Label keyisindex(&assembler), if_iskeyunique(&assembler); - assembler.TryToName(key, &keyisindex, &var_index, &if_iskeyunique, - &call_runtime); + Label keyisindex(&assembler), if_iskeyunique(&assembler); + assembler.TryToName(key, &keyisindex, &var_index, &if_iskeyunique, + &var_unique, &call_runtime); - assembler.Bind(&if_iskeyunique); - assembler.TryHasOwnProperty(object, map, instance_type, key, &return_true, - &return_false, &call_runtime); - - assembler.Bind(&keyisindex); - // Handle negative keys in the runtime. - assembler.GotoIf( - assembler.IntPtrLessThan(var_index.value(), assembler.IntPtrConstant(0)), - &call_runtime); - assembler.TryLookupElement(object, map, instance_type, var_index.value(), - &return_true, &return_false, &call_runtime); + assembler.Bind(&if_iskeyunique); + assembler.TryHasOwnProperty(object, map, instance_type, var_unique.value(), + &return_true, &return_false, &call_runtime); + assembler.Bind(&keyisindex); + // Handle negative keys in the runtime. + assembler.GotoIf(assembler.IntPtrLessThan(var_index.value(), + assembler.IntPtrConstant(0)), + &call_runtime); + assembler.TryLookupElement(object, map, instance_type, var_index.value(), + &return_true, &return_false, &call_runtime); + } assembler.Bind(&return_true); assembler.Return(assembler.BooleanConstant(true)); diff --git a/src/code-stub-assembler.cc b/src/code-stub-assembler.cc index 1fb8a2152e..c569e8b927 100644 --- a/src/code-stub-assembler.cc +++ b/src/code-stub-assembler.cc @@ -1565,6 +1565,9 @@ Node* CodeStubAssembler::AllocateHeapNumberWithValue(Node* value, Node* CodeStubAssembler::AllocateSeqOneByteString(int length, AllocationFlags flags) { Comment("AllocateSeqOneByteString"); + if (length == 0) { + return LoadRoot(Heap::kempty_stringRootIndex); + } Node* result = Allocate(SeqOneByteString::SizeFor(length), flags); DCHECK(Heap::RootIsImmortalImmovable(Heap::kOneByteStringMapRootIndex)); StoreMapNoWriteBarrier(result, Heap::kOneByteStringMapRootIndex); @@ -1584,8 +1587,10 @@ Node* CodeStubAssembler::AllocateSeqOneByteString(Node* context, Node* length, Variable var_result(this, MachineRepresentation::kTagged); // Compute the SeqOneByteString size and check if it fits into new space. - Label if_sizeissmall(this), if_notsizeissmall(this, Label::kDeferred), - if_join(this); + Label if_lengthiszero(this), if_sizeissmall(this), + if_notsizeissmall(this, Label::kDeferred), if_join(this); + GotoIf(WordEqual(length, IntPtrOrSmiConstant(0, mode)), &if_lengthiszero); + Node* raw_size = GetArrayAllocationSize( length, UINT8_ELEMENTS, mode, SeqOneByteString::kHeaderSize + kObjectAlignmentMask); @@ -1618,6 +1623,12 @@ Node* CodeStubAssembler::AllocateSeqOneByteString(Node* context, Node* length, Goto(&if_join); } + Bind(&if_lengthiszero); + { + var_result.Bind(LoadRoot(Heap::kempty_stringRootIndex)); + Goto(&if_join); + } + Bind(&if_join); return var_result.value(); } @@ -1625,6 +1636,9 @@ Node* CodeStubAssembler::AllocateSeqOneByteString(Node* context, Node* length, Node* CodeStubAssembler::AllocateSeqTwoByteString(int length, AllocationFlags flags) { Comment("AllocateSeqTwoByteString"); + if (length == 0) { + return LoadRoot(Heap::kempty_stringRootIndex); + } Node* result = Allocate(SeqTwoByteString::SizeFor(length), flags); DCHECK(Heap::RootIsImmortalImmovable(Heap::kStringMapRootIndex)); StoreMapNoWriteBarrier(result, Heap::kStringMapRootIndex); @@ -1644,8 +1658,10 @@ Node* CodeStubAssembler::AllocateSeqTwoByteString(Node* context, Node* length, Variable var_result(this, MachineRepresentation::kTagged); // Compute the SeqTwoByteString size and check if it fits into new space. - Label if_sizeissmall(this), if_notsizeissmall(this, Label::kDeferred), - if_join(this); + Label if_lengthiszero(this), if_sizeissmall(this), + if_notsizeissmall(this, Label::kDeferred), if_join(this); + GotoIf(WordEqual(length, IntPtrOrSmiConstant(0, mode)), &if_lengthiszero); + Node* raw_size = GetArrayAllocationSize( length, UINT16_ELEMENTS, mode, SeqOneByteString::kHeaderSize + kObjectAlignmentMask); @@ -1680,6 +1696,12 @@ Node* CodeStubAssembler::AllocateSeqTwoByteString(Node* context, Node* length, Goto(&if_join); } + Bind(&if_lengthiszero); + { + var_result.Bind(LoadRoot(Heap::kempty_stringRootIndex)); + Goto(&if_join); + } + Bind(&if_join); return var_result.value(); } @@ -3134,14 +3156,29 @@ Node* CodeStubAssembler::StringCharCodeAt(Node* string, Node* index, Bind(&if_stringisnotexternal); { - // The {string} is a SlicedString, continue with its parent. - Node* string_offset = - LoadAndUntagObjectField(string, SlicedString::kOffsetOffset); - Node* string_parent = - LoadObjectField(string, SlicedString::kParentOffset); - var_index.Bind(IntPtrAdd(index, string_offset)); - var_string.Bind(string_parent); - Goto(&loop); + Label if_stringissliced(this), if_stringisthin(this); + Branch( + Word32Equal(Word32And(string_instance_type, + Int32Constant(kStringRepresentationMask)), + Int32Constant(kSlicedStringTag)), + &if_stringissliced, &if_stringisthin); + Bind(&if_stringissliced); + { + // The {string} is a SlicedString, continue with its parent. + Node* string_offset = + LoadAndUntagObjectField(string, SlicedString::kOffsetOffset); + Node* string_parent = + LoadObjectField(string, SlicedString::kParentOffset); + var_index.Bind(IntPtrAdd(index, string_offset)); + var_string.Bind(string_parent); + Goto(&loop); + } + Bind(&if_stringisthin); + { + // The {string} is a ThinString, continue with its actual value. + var_string.Bind(LoadObjectField(string, ThinString::kActualOffset)); + Goto(&loop); + } } } } @@ -3317,7 +3354,8 @@ Node* CodeStubAssembler::SubString(Node* context, Node* string, Node* from, // and put the underlying string into var_string. // If the string is not indirect, it can only be sequential or external. - STATIC_ASSERT(kIsIndirectStringMask == (kSlicedStringTag & kConsStringTag)); + STATIC_ASSERT(kIsIndirectStringMask == + (kSlicedStringTag & kConsStringTag & kThinStringTag)); STATIC_ASSERT(kIsIndirectStringMask != 0); Label underlying_unpacked(this); GotoIf(Word32Equal( @@ -3325,13 +3363,13 @@ Node* CodeStubAssembler::SubString(Node* context, Node* string, Node* from, Int32Constant(0)), &underlying_unpacked); - // The subject string is either a sliced or cons string. + // The subject string is a sliced, cons, or thin string. - Label sliced_string(this); - GotoIf(Word32NotEqual( - Word32And(instance_type, Int32Constant(kSlicedNotConsMask)), - Int32Constant(0)), - &sliced_string); + Label sliced_string(this), thin_or_sliced(this); + Node* representation = + Word32And(instance_type, Int32Constant(kStringRepresentationMask)); + GotoIf(Word32NotEqual(representation, Int32Constant(kConsStringTag)), + &thin_or_sliced); // Cons string. Check whether it is flat, then fetch first part. // Flat cons strings have an empty second part. @@ -3347,6 +3385,16 @@ Node* CodeStubAssembler::SubString(Node* context, Node* string, Node* from, Goto(&underlying_unpacked); } + Bind(&thin_or_sliced); + { + GotoIf(Word32Equal(representation, Int32Constant(kSlicedStringTag)), + &sliced_string); + Node* actual_string = LoadObjectField(string, ThinString::kActualOffset); + var_string.Bind(actual_string); + var_instance_type.Bind(LoadInstanceType(actual_string)); + Goto(&underlying_unpacked); + } + Bind(&sliced_string); { // Fetch parent and correct start index by offset. @@ -4303,17 +4351,19 @@ void CodeStubAssembler::Use(Label* label) { void CodeStubAssembler::TryToName(Node* key, Label* if_keyisindex, Variable* var_index, Label* if_keyisunique, - Label* if_bailout) { + Variable* var_unique, Label* if_bailout) { DCHECK_EQ(MachineType::PointerRepresentation(), var_index->rep()); + DCHECK_EQ(MachineRepresentation::kTagged, var_unique->rep()); Comment("TryToName"); - Label if_hascachedindex(this), if_keyisnotindex(this); + Label if_hascachedindex(this), if_keyisnotindex(this), if_thinstring(this); // Handle Smi and HeapNumber keys. var_index->Bind(TryToIntptr(key, &if_keyisnotindex)); Goto(if_keyisindex); Bind(&if_keyisnotindex); Node* key_map = LoadMap(key); + var_unique->Bind(key); // Symbols are unique. GotoIf(IsSymbolMap(key_map), if_keyisunique); Node* key_instance_type = LoadMapInstanceType(key_map); @@ -4330,6 +4380,12 @@ void CodeStubAssembler::TryToName(Node* key, Label* if_keyisindex, Node* not_an_index = Word32And(hash, Int32Constant(Name::kIsNotArrayIndexMask)); GotoIf(Word32Equal(not_an_index, Int32Constant(0)), if_bailout); + // Check if we have a ThinString. + GotoIf(Word32Equal(key_instance_type, Int32Constant(THIN_STRING_TYPE)), + &if_thinstring); + GotoIf( + Word32Equal(key_instance_type, Int32Constant(THIN_ONE_BYTE_STRING_TYPE)), + &if_thinstring); // Finally, check if |key| is internalized. STATIC_ASSERT(kNotInternalizedTag != 0); Node* not_internalized = @@ -4337,6 +4393,10 @@ void CodeStubAssembler::TryToName(Node* key, Label* if_keyisindex, GotoIf(Word32NotEqual(not_internalized, Int32Constant(0)), if_bailout); Goto(if_keyisunique); + Bind(&if_thinstring); + var_unique->Bind(LoadObjectField(key, ThinString::kActualOffset)); + Goto(if_keyisunique); + Bind(&if_hascachedindex); var_index->Bind(DecodeWordFromWord32(hash)); Goto(if_keyisindex); @@ -5186,9 +5246,11 @@ void CodeStubAssembler::TryPrototypeChainLookup( } Variable var_index(this, MachineType::PointerRepresentation()); + Variable var_unique(this, MachineRepresentation::kTagged); Label if_keyisindex(this), if_iskeyunique(this); - TryToName(key, &if_keyisindex, &var_index, &if_iskeyunique, if_bailout); + TryToName(key, &if_keyisindex, &var_index, &if_iskeyunique, &var_unique, + if_bailout); Bind(&if_iskeyunique); { @@ -5210,8 +5272,8 @@ void CodeStubAssembler::TryPrototypeChainLookup( Label next_proto(this); lookup_property_in_holder(receiver, var_holder.value(), holder_map, - holder_instance_type, key, &next_proto, - if_bailout); + holder_instance_type, var_unique.value(), + &next_proto, if_bailout); Bind(&next_proto); // Bailout if it can be an integer indexed exotic case. diff --git a/src/code-stub-assembler.h b/src/code-stub-assembler.h index 77066b2bd0..4c0e2a6073 100644 --- a/src/code-stub-assembler.h +++ b/src/code-stub-assembler.h @@ -835,7 +835,8 @@ class V8_EXPORT_PRIVATE CodeStubAssembler : public compiler::CodeAssembler { // Various building blocks for stubs doing property lookups. void TryToName(Node* key, Label* if_keyisindex, Variable* var_index, - Label* if_keyisunique, Label* if_bailout); + Label* if_keyisunique, Variable* var_unique, + Label* if_bailout); // Calculates array index for given dictionary entry and entry field. // See Dictionary::EntryToIndex(). diff --git a/src/code-stubs.cc b/src/code-stubs.cc index 812012c3dc..c607155413 100644 --- a/src/code-stubs.cc +++ b/src/code-stubs.cc @@ -73,7 +73,7 @@ void CodeStubDescriptor::Initialize(Register stack_parameter_count, bool CodeStub::FindCodeInCache(Code** code_out) { UnseededNumberDictionary* stubs = isolate()->heap()->code_stubs(); - int index = stubs->FindEntry(GetKey()); + int index = stubs->FindEntry(isolate(), GetKey()); if (index != UnseededNumberDictionary::kNotFound) { *code_out = Code::cast(stubs->ValueAt(index)); return true; diff --git a/src/compiler/access-builder.cc b/src/compiler/access-builder.cc index 6ad041df3b..3bd63cc3d5 100644 --- a/src/compiler/access-builder.cc +++ b/src/compiler/access-builder.cc @@ -502,6 +502,15 @@ FieldAccess AccessBuilder::ForConsStringSecond() { return access; } +// static +FieldAccess AccessBuilder::ForThinStringActual() { + FieldAccess access = {kTaggedBase, ThinString::kActualOffset, + Handle(), MaybeHandle(), + Type::String(), MachineType::TaggedPointer(), + kPointerWriteBarrier}; + return access; +} + // static FieldAccess AccessBuilder::ForSlicedStringOffset() { FieldAccess access = {kTaggedBase, SlicedString::kOffsetOffset, diff --git a/src/compiler/access-builder.h b/src/compiler/access-builder.h index 1929040b12..cbe92bae4e 100644 --- a/src/compiler/access-builder.h +++ b/src/compiler/access-builder.h @@ -167,6 +167,9 @@ class V8_EXPORT_PRIVATE AccessBuilder final // Provides access to ConsString::second() field. static FieldAccess ForConsStringSecond(); + // Provides access to ThinString::actual() field. + static FieldAccess ForThinStringActual(); + // Provides access to SlicedString::offset() field. static FieldAccess ForSlicedStringOffset(); diff --git a/src/compiler/types.cc b/src/compiler/types.cc index a2af190d9d..8a6484ec49 100644 --- a/src/compiler/types.cc +++ b/src/compiler/types.cc @@ -152,6 +152,8 @@ Type::bitset BitsetType::Lub(i::Map* map) { case ONE_BYTE_STRING_TYPE: case CONS_STRING_TYPE: case CONS_ONE_BYTE_STRING_TYPE: + case THIN_STRING_TYPE: + case THIN_ONE_BYTE_STRING_TYPE: case SLICED_STRING_TYPE: case SLICED_ONE_BYTE_STRING_TYPE: case EXTERNAL_STRING_TYPE: diff --git a/src/elements.cc b/src/elements.cc index 32581bd8ae..01842630da 100644 --- a/src/elements.cc +++ b/src/elements.cc @@ -187,7 +187,7 @@ static void CopyDictionaryToObjectElements( : SKIP_WRITE_BARRIER; Isolate* isolate = from->GetIsolate(); for (int i = 0; i < copy_size; i++) { - int entry = from->FindEntry(i + from_start); + int entry = from->FindEntry(isolate, i + from_start); if (entry != SeededNumberDictionary::kNotFound) { Object* value = from->ValueAt(entry); DCHECK(!value->IsTheHole(isolate)); @@ -417,8 +417,9 @@ static void CopyDictionaryToDoubleElements(FixedArrayBase* from_base, if (to_start + copy_size > to_length) { copy_size = to_length - to_start; } + Isolate* isolate = from->GetIsolate(); for (int i = 0; i < copy_size; i++) { - int entry = from->FindEntry(i + from_start); + int entry = from->FindEntry(isolate, i + from_start); if (entry != SeededNumberDictionary::kNotFound) { to->set(i + to_start, from->ValueAt(entry)->Number()); } else { @@ -1628,7 +1629,7 @@ class DictionaryElementsAccessor // Iterate through entire range, as accessing elements out of order is // observable for (uint32_t k = start_from; k < length; ++k) { - int entry = dictionary->FindEntry(k); + int entry = dictionary->FindEntry(isolate, k); if (entry == SeededNumberDictionary::kNotFound) { if (search_for_hole) return Just(true); continue; @@ -1694,7 +1695,7 @@ class DictionaryElementsAccessor // Iterate through entire range, as accessing elements out of order is // observable. for (uint32_t k = start_from; k < length; ++k) { - int entry = dictionary->FindEntry(k); + int entry = dictionary->FindEntry(isolate, k); if (entry == SeededNumberDictionary::kNotFound) { continue; } diff --git a/src/factory.cc b/src/factory.cc index 382c1f20c2..05acbebd43 100644 --- a/src/factory.cc +++ b/src/factory.cc @@ -276,6 +276,7 @@ Handle Factory::InternalizeStringWithKey(StringTableKey* key) { MaybeHandle Factory::NewStringFromOneByte(Vector string, PretenureFlag pretenure) { int length = string.length(); + if (length == 0) return empty_string(); if (length == 1) return LookupSingleCharacterStringFromCode(string[0]); Handle result; ASSIGN_RETURN_ON_EXCEPTION( @@ -369,6 +370,7 @@ MaybeHandle Factory::NewStringFromUtf8SubString( MaybeHandle Factory::NewStringFromTwoByte(const uc16* string, int length, PretenureFlag pretenure) { + if (length == 0) return empty_string(); if (String::IsOneByte(string, length)) { if (length == 1) return LookupSingleCharacterStringFromCode(string[0]); Handle result; @@ -453,38 +455,63 @@ Handle Factory::NewInternalizedStringImpl( String); } +namespace { + +MaybeHandle GetInternalizedStringMap(Factory* f, Handle string) { + switch (string->map()->instance_type()) { + case STRING_TYPE: + return f->internalized_string_map(); + case ONE_BYTE_STRING_TYPE: + return f->one_byte_internalized_string_map(); + case EXTERNAL_STRING_TYPE: + return f->external_internalized_string_map(); + case EXTERNAL_ONE_BYTE_STRING_TYPE: + return f->external_one_byte_internalized_string_map(); + case EXTERNAL_STRING_WITH_ONE_BYTE_DATA_TYPE: + return f->external_internalized_string_with_one_byte_data_map(); + case SHORT_EXTERNAL_STRING_TYPE: + return f->short_external_internalized_string_map(); + case SHORT_EXTERNAL_ONE_BYTE_STRING_TYPE: + return f->short_external_one_byte_internalized_string_map(); + case SHORT_EXTERNAL_STRING_WITH_ONE_BYTE_DATA_TYPE: + return f->short_external_internalized_string_with_one_byte_data_map(); + default: return MaybeHandle(); // No match found. + } +} + +} // namespace MaybeHandle Factory::InternalizedStringMapForString( Handle string) { // If the string is in new space it cannot be used as internalized. if (isolate()->heap()->InNewSpace(*string)) return MaybeHandle(); - // Find the corresponding internalized string map for strings. - switch (string->map()->instance_type()) { - case STRING_TYPE: return internalized_string_map(); - case ONE_BYTE_STRING_TYPE: - return one_byte_internalized_string_map(); - case EXTERNAL_STRING_TYPE: return external_internalized_string_map(); - case EXTERNAL_ONE_BYTE_STRING_TYPE: - return external_one_byte_internalized_string_map(); - case EXTERNAL_STRING_WITH_ONE_BYTE_DATA_TYPE: - return external_internalized_string_with_one_byte_data_map(); - case SHORT_EXTERNAL_STRING_TYPE: - return short_external_internalized_string_map(); - case SHORT_EXTERNAL_ONE_BYTE_STRING_TYPE: - return short_external_one_byte_internalized_string_map(); - case SHORT_EXTERNAL_STRING_WITH_ONE_BYTE_DATA_TYPE: - return short_external_internalized_string_with_one_byte_data_map(); - default: return MaybeHandle(); // No match found. - } + return GetInternalizedStringMap(this, string); } +template +Handle Factory::InternalizeExternalString(Handle string) { + Handle cast_string = Handle::cast(string); + Handle map = GetInternalizedStringMap(this, string).ToHandleChecked(); + Handle external_string = New(map, OLD_SPACE); + external_string->set_length(cast_string->length()); + external_string->set_hash_field(cast_string->hash_field()); + external_string->set_resource(nullptr); + isolate()->heap()->RegisterExternalString(*external_string); + return external_string; +} + +template Handle + Factory::InternalizeExternalString(Handle); +template Handle + Factory::InternalizeExternalString(Handle); MaybeHandle Factory::NewRawOneByteString( int length, PretenureFlag pretenure) { if (length > String::kMaxLength || length < 0) { THROW_NEW_ERROR(isolate(), NewInvalidStringLengthError(), SeqOneByteString); } + DCHECK(length > 0); // Use Factory::empty_string() instead. CALL_HEAP_FUNCTION( isolate(), isolate()->heap()->AllocateRawOneByteString(length, pretenure), @@ -497,6 +524,7 @@ MaybeHandle Factory::NewRawTwoByteString( if (length > String::kMaxLength || length < 0) { THROW_NEW_ERROR(isolate(), NewInvalidStringLengthError(), SeqTwoByteString); } + DCHECK(length > 0); // Use Factory::empty_string() instead. CALL_HEAP_FUNCTION( isolate(), isolate()->heap()->AllocateRawTwoByteString(length, pretenure), @@ -585,6 +613,12 @@ Handle ConcatStringContent(Handle result, MaybeHandle Factory::NewConsString(Handle left, Handle right) { + if (left->IsThinString()) { + left = handle(Handle::cast(left)->actual(), isolate()); + } + if (right->IsThinString()) { + right = handle(Handle::cast(right)->actual(), isolate()); + } int left_length = left->length(); if (left_length == 0) return right; int right_length = right->length(); diff --git a/src/factory.h b/src/factory.h index 49008e2da6..c79edcd27e 100644 --- a/src/factory.h +++ b/src/factory.h @@ -227,6 +227,11 @@ class V8_EXPORT_PRIVATE Factory final { MUST_USE_RESULT MaybeHandle InternalizedStringMapForString( Handle string); + // Creates an internalized copy of an external string. |string| must be + // of type StringClass. + template + Handle InternalizeExternalString(Handle string); + // Allocates and partially initializes an one-byte or two-byte String. The // characters of the string are uninitialized. Currently used in regexp code // only, where they are pretenured. diff --git a/src/heap/heap-inl.h b/src/heap/heap-inl.h index 4d060f8e43..67d65bfc85 100644 --- a/src/heap/heap-inl.h +++ b/src/heap/heap-inl.h @@ -225,6 +225,8 @@ AllocationResult Heap::AllocateInternalizedStringImpl(T t, int chars, AllocationResult Heap::AllocateOneByteInternalizedString( Vector str, uint32_t hash_field) { CHECK_GE(String::kMaxLength, str.length()); + // The canonical empty_string is the only zero-length string we allow. + DCHECK_IMPLIES(str.length() == 0, roots_[kempty_stringRootIndex] == nullptr); // Compute map and object size. Map* map = one_byte_internalized_string_map(); int size = SeqOneByteString::SizeFor(str.length()); @@ -256,6 +258,7 @@ AllocationResult Heap::AllocateOneByteInternalizedString( AllocationResult Heap::AllocateTwoByteInternalizedString(Vector str, uint32_t hash_field) { CHECK_GE(String::kMaxLength, str.length()); + DCHECK_NE(0, str.length()); // Use Heap::empty_string() instead. // Compute map and object size. Map* map = internalized_string_map(); int size = SeqTwoByteString::SizeFor(str.length()); diff --git a/src/heap/heap.cc b/src/heap/heap.cc index 316c5772a0..62ec7f5937 100644 --- a/src/heap/heap.cc +++ b/src/heap/heap.cc @@ -1733,12 +1733,21 @@ String* Heap::UpdateNewSpaceReferenceInExternalStringTableEntry(Heap* heap, if (!first_word.IsForwardingAddress()) { // Unreachable external string can be finalized. - heap->FinalizeExternalString(String::cast(*p)); + String* string = String::cast(*p); + if (!string->IsExternalString()) { + // Original external string has been internalized. + DCHECK(string->IsThinString()); + return NULL; + } + heap->FinalizeExternalString(string); return NULL; } // String is still reachable. - return String::cast(first_word.ToForwardingAddress()); + String* string = String::cast(first_word.ToForwardingAddress()); + if (string->IsThinString()) string = ThinString::cast(string)->actual(); + // Internalization can replace external strings with non-external strings. + return string->IsExternalString() ? string : nullptr; } @@ -6396,14 +6405,19 @@ void Heap::ExternalStringTable::CleanUpNewSpaceStrings() { int last = 0; Isolate* isolate = heap_->isolate(); for (int i = 0; i < new_space_strings_.length(); ++i) { - if (new_space_strings_[i]->IsTheHole(isolate)) { + Object* o = new_space_strings_[i]; + if (o->IsTheHole(isolate)) { continue; } - DCHECK(new_space_strings_[i]->IsExternalString()); - if (heap_->InNewSpace(new_space_strings_[i])) { - new_space_strings_[last++] = new_space_strings_[i]; + if (o->IsThinString()) { + o = ThinString::cast(o)->actual(); + if (!o->IsExternalString()) continue; + } + DCHECK(o->IsExternalString()); + if (heap_->InNewSpace(o)) { + new_space_strings_[last++] = o; } else { - old_space_strings_.Add(new_space_strings_[i]); + old_space_strings_.Add(o); } } new_space_strings_.Rewind(last); @@ -6415,12 +6429,17 @@ void Heap::ExternalStringTable::CleanUpAll() { int last = 0; Isolate* isolate = heap_->isolate(); for (int i = 0; i < old_space_strings_.length(); ++i) { - if (old_space_strings_[i]->IsTheHole(isolate)) { + Object* o = old_space_strings_[i]; + if (o->IsTheHole(isolate)) { continue; } - DCHECK(old_space_strings_[i]->IsExternalString()); - DCHECK(!heap_->InNewSpace(old_space_strings_[i])); - old_space_strings_[last++] = old_space_strings_[i]; + if (o->IsThinString()) { + o = ThinString::cast(o)->actual(); + if (!o->IsExternalString()) continue; + } + DCHECK(o->IsExternalString()); + DCHECK(!heap_->InNewSpace(o)); + old_space_strings_[last++] = o; } old_space_strings_.Rewind(last); old_space_strings_.Trim(); @@ -6433,11 +6452,21 @@ void Heap::ExternalStringTable::CleanUpAll() { void Heap::ExternalStringTable::TearDown() { for (int i = 0; i < new_space_strings_.length(); ++i) { - heap_->FinalizeExternalString(ExternalString::cast(new_space_strings_[i])); + Object* o = new_space_strings_[i]; + if (o->IsThinString()) { + o = ThinString::cast(o)->actual(); + if (!o->IsExternalString()) continue; + } + heap_->FinalizeExternalString(ExternalString::cast(o)); } new_space_strings_.Free(); for (int i = 0; i < old_space_strings_.length(); ++i) { - heap_->FinalizeExternalString(ExternalString::cast(old_space_strings_[i])); + Object* o = old_space_strings_[i]; + if (o->IsThinString()) { + o = ThinString::cast(o)->actual(); + if (!o->IsExternalString()) continue; + } + heap_->FinalizeExternalString(ExternalString::cast(o)); } old_space_strings_.Free(); } diff --git a/src/heap/heap.h b/src/heap/heap.h index b8d9166cb7..4a7ba35e42 100644 --- a/src/heap/heap.h +++ b/src/heap/heap.h @@ -100,6 +100,8 @@ using v8::MemoryPressureLevel; V(Map, string_map, StringMap) \ V(Map, cons_one_byte_string_map, ConsOneByteStringMap) \ V(Map, cons_string_map, ConsStringMap) \ + V(Map, thin_one_byte_string_map, ThinOneByteStringMap) \ + V(Map, thin_string_map, ThinStringMap) \ V(Map, sliced_string_map, SlicedStringMap) \ V(Map, sliced_one_byte_string_map, SlicedOneByteStringMap) \ V(Map, external_string_map, ExternalStringMap) \ @@ -2118,10 +2120,6 @@ class Heap { MUST_USE_RESULT AllocationResult AllocateCode(int object_size, bool immovable); - MUST_USE_RESULT AllocationResult InternalizeStringWithKey(HashTableKey* key); - - MUST_USE_RESULT AllocationResult InternalizeString(String* str); - // =========================================================================== void set_force_oom(bool value) { force_oom_ = value; } diff --git a/src/heap/mark-compact.cc b/src/heap/mark-compact.cc index 427185a5c8..d76dd42982 100644 --- a/src/heap/mark-compact.cc +++ b/src/heap/mark-compact.cc @@ -1437,8 +1437,12 @@ class StringTableCleaner : public ObjectVisitor { if (o->IsHeapObject()) { if (Marking::IsWhite(ObjectMarking::MarkBitFrom(HeapObject::cast(o)))) { if (finalize_external_strings) { - DCHECK(o->IsExternalString()); - heap_->FinalizeExternalString(String::cast(*p)); + if (o->IsExternalString()) { + heap_->FinalizeExternalString(String::cast(*p)); + } else { + // The original external string may have been internalized. + DCHECK(o->IsThinString()); + } } else { pointers_removed_++; } diff --git a/src/heap/objects-visiting-inl.h b/src/heap/objects-visiting-inl.h index d86406bf5f..53a1310ed9 100644 --- a/src/heap/objects-visiting-inl.h +++ b/src/heap/objects-visiting-inl.h @@ -31,6 +31,10 @@ void StaticNewSpaceVisitor::Initialize() { kVisitConsString, &FixedBodyVisitor::Visit); + table_.Register( + kVisitThinString, + &FixedBodyVisitor::Visit); + table_.Register(kVisitSlicedString, &FixedBodyVisitor::Visit); @@ -117,6 +121,10 @@ void StaticMarkingVisitor::Initialize() { &FixedBodyVisitor::Visit); + table_.Register(kVisitThinString, + &FixedBodyVisitor::Visit); + table_.Register(kVisitSlicedString, &FixedBodyVisitor::Visit); diff --git a/src/heap/objects-visiting.cc b/src/heap/objects-visiting.cc index 146aa58675..a2651bfb98 100644 --- a/src/heap/objects-visiting.cc +++ b/src/heap/objects-visiting.cc @@ -41,6 +41,9 @@ StaticVisitorBase::VisitorId StaticVisitorBase::GetVisitorId( case kExternalStringTag: return GetVisitorIdForSize(kVisitDataObject, kVisitDataObjectGeneric, instance_size, has_unboxed_fields); + + case kThinStringTag: + return kVisitThinString; } UNREACHABLE(); } diff --git a/src/heap/objects-visiting.h b/src/heap/objects-visiting.h index e35e47c3aa..f10f370314 100644 --- a/src/heap/objects-visiting.h +++ b/src/heap/objects-visiting.h @@ -79,6 +79,7 @@ class StaticVisitorBase : public AllStatic { V(StructGeneric) \ V(ConsString) \ V(SlicedString) \ + V(ThinString) \ V(Symbol) \ V(Oddball) \ V(Code) \ diff --git a/src/heap/scavenger.cc b/src/heap/scavenger.cc index aea41dd552..1173bb60c6 100644 --- a/src/heap/scavenger.cc +++ b/src/heap/scavenger.cc @@ -30,6 +30,7 @@ class ScavengingVisitor : public StaticVisitorBase { table_.Register(kVisitSeqOneByteString, &EvacuateSeqOneByteString); table_.Register(kVisitSeqTwoByteString, &EvacuateSeqTwoByteString); table_.Register(kVisitShortcutCandidate, &EvacuateShortcutCandidate); + table_.Register(kVisitThinString, &EvacuateThinString); table_.Register(kVisitByteArray, &EvacuateByteArray); table_.Register(kVisitFixedArray, &EvacuateFixedArray); table_.Register(kVisitFixedDoubleArray, &EvacuateFixedDoubleArray); @@ -89,6 +90,12 @@ class ScavengingVisitor : public StaticVisitorBase { return &table_; } + static void EvacuateThinStringNoShortcut(Map* map, HeapObject** slot, + HeapObject* object) { + EvacuateObject(map, slot, object, + ThinString::kSize); + } + private: enum ObjectContents { DATA_OBJECT, POINTER_OBJECT }; @@ -335,6 +342,22 @@ class ScavengingVisitor : public StaticVisitorBase { object_size); } + static inline void EvacuateThinString(Map* map, HeapObject** slot, + HeapObject* object) { + if (marks_handling == IGNORE_MARKS) { + HeapObject* actual = ThinString::cast(object)->actual(); + *slot = actual; + // ThinStrings always refer to internalized strings, which are + // always in old space. + DCHECK(!map->GetHeap()->InNewSpace(actual)); + object->set_map_word(MapWord::FromForwardingAddress(actual)); + return; + } + + EvacuateObject(map, slot, object, + ThinString::kSize); + } + template class ObjectEvacuationStrategy { public: @@ -419,6 +442,10 @@ void Scavenger::SelectScavengingVisitorsTable() { StaticVisitorBase::kVisitShortcutCandidate, scavenging_visitors_table_.GetVisitorById( StaticVisitorBase::kVisitConsString)); + scavenging_visitors_table_.Register( + StaticVisitorBase::kVisitThinString, + &ScavengingVisitor:: + EvacuateThinStringNoShortcut); } } } diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc index c1701be519..a61db8f6df 100644 --- a/src/ia32/code-stubs-ia32.cc +++ b/src/ia32/code-stubs-ia32.cc @@ -604,7 +604,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (8) Is the external string one byte? If yes, go to (5). // (9) Two byte sequential. Load regexp code for two byte. Go to (E). // (10) Short external string or not a string? If yes, bail out to runtime. - // (11) Sliced string. Replace subject with parent. Go to (1). + // (11) Sliced or thin string. Replace subject with parent. Go to (1). Label seq_one_byte_string /* 5 */, seq_two_byte_string /* 9 */, external_string /* 7 */, check_underlying /* 1 */, @@ -634,6 +634,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // have already been covered. STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); + STATIC_ASSERT(kThinStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ cmp(ebx, Immediate(kExternalStringTag)); @@ -912,11 +913,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ test(ebx, Immediate(kIsNotStringMask | kShortExternalStringTag)); __ j(not_zero, &runtime); - // (11) Sliced string. Replace subject with parent. Go to (1). + // (11) Sliced or thin string. Replace subject with parent. Go to (1). + Label thin_string; + __ cmp(ebx, Immediate(kThinStringTag)); + __ j(equal, &thin_string, Label::kNear); // Load offset into edi and replace subject string with parent. __ mov(edi, FieldOperand(eax, SlicedString::kOffsetOffset)); __ mov(eax, FieldOperand(eax, SlicedString::kParentOffset)); __ jmp(&check_underlying); // Go to (1). + + __ bind(&thin_string); + __ mov(eax, FieldOperand(eax, ThinString::kActualOffset)); + __ jmp(&check_underlying); // Go to (1). #endif // V8_INTERPRETED_REGEXP } diff --git a/src/ia32/codegen-ia32.cc b/src/ia32/codegen-ia32.cc index ccd159e299..339aa716f3 100644 --- a/src/ia32/codegen-ia32.cc +++ b/src/ia32/codegen-ia32.cc @@ -501,9 +501,12 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ j(zero, &check_sequential, Label::kNear); // Dispatch on the indirect string shape: slice or cons. - Label cons_string; - __ test(result, Immediate(kSlicedNotConsMask)); - __ j(zero, &cons_string, Label::kNear); + Label cons_string, thin_string; + __ and_(result, Immediate(kStringRepresentationMask)); + __ cmp(result, Immediate(kConsStringTag)); + __ j(equal, &cons_string, Label::kNear); + __ cmp(result, Immediate(kThinStringTag)); + __ j(equal, &thin_string, Label::kNear); // Handle slices. Label indirect_string_loaded; @@ -513,6 +516,11 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ mov(string, FieldOperand(string, SlicedString::kParentOffset)); __ jmp(&indirect_string_loaded, Label::kNear); + // Handle thin strings. + __ bind(&thin_string); + __ mov(string, FieldOperand(string, ThinString::kActualOffset)); + __ jmp(&indirect_string_loaded, Label::kNear); + // Handle cons strings. // Check whether the right hand side is the empty string (i.e. if // this is really a flat string in a cons string). If that is not diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc index 008f7b93b8..d93552b2a9 100644 --- a/src/ia32/macro-assembler-ia32.cc +++ b/src/ia32/macro-assembler-ia32.cc @@ -2566,11 +2566,13 @@ void MacroAssembler::JumpIfNotBothSequentialOneByteStrings(Register object1, const int kFlatOneByteStringTag = kStringTag | kOneByteStringTag | kSeqStringTag; // Interleave bits from both instance types and compare them in one check. - DCHECK_EQ(0, kFlatOneByteStringMask & (kFlatOneByteStringMask << 3)); + const int kShift = 8; + DCHECK_EQ(0, kFlatOneByteStringMask & (kFlatOneByteStringMask << kShift)); and_(scratch1, kFlatOneByteStringMask); and_(scratch2, kFlatOneByteStringMask); - lea(scratch1, Operand(scratch1, scratch2, times_8, 0)); - cmp(scratch1, kFlatOneByteStringTag | (kFlatOneByteStringTag << 3)); + shl(scratch2, kShift); + or_(scratch1, scratch2); + cmp(scratch1, kFlatOneByteStringTag | (kFlatOneByteStringTag << kShift)); j(not_equal, failure); } diff --git a/src/ic/accessor-assembler.cc b/src/ic/accessor-assembler.cc index 6a27045f12..707ecff5e1 100644 --- a/src/ic/accessor-assembler.cc +++ b/src/ic/accessor-assembler.cc @@ -1301,6 +1301,8 @@ void AccessorAssemblerImpl::KeyedLoadIC(const LoadICParameters* p) { void AccessorAssemblerImpl::KeyedLoadICGeneric(const LoadICParameters* p) { Variable var_index(this, MachineType::PointerRepresentation()); + Variable var_unique(this, MachineRepresentation::kTagged); + var_unique.Bind(p->name); // Dummy initialization. Variable var_details(this, MachineRepresentation::kWord32); Variable var_value(this, MachineRepresentation::kTagged); Label if_index(this), if_unique_name(this), if_element_hole(this), @@ -1317,8 +1319,8 @@ void AccessorAssemblerImpl::KeyedLoadICGeneric(const LoadICParameters* p) { Int32Constant(LAST_CUSTOM_ELEMENTS_RECEIVER)), &slow); - Node* key = p->name; - TryToName(key, &if_index, &var_index, &if_unique_name, &slow); + TryToName(p->name, &if_index, &var_index, &if_unique_name, &var_unique, + &slow); Bind(&if_index); { @@ -1367,6 +1369,7 @@ void AccessorAssemblerImpl::KeyedLoadICGeneric(const LoadICParameters* p) { Bind(&if_unique_name); { Comment("key is unique name"); + Node* key = var_unique.value(); // Check if the receiver has fast or slow properties. properties = LoadProperties(receiver); Node* properties_map = LoadMap(properties); @@ -1425,6 +1428,7 @@ void AccessorAssemblerImpl::KeyedLoadICGeneric(const LoadICParameters* p) { // We checked for LAST_CUSTOM_ELEMENTS_RECEIVER before, which rules out // seeing global objects here (which would need special handling). + Node* key = var_unique.value(); Variable var_name_index(this, MachineType::PointerRepresentation()); Label dictionary_found(this, &var_name_index); NameDictionaryLookup(properties, key, &dictionary_found, diff --git a/src/ic/ic.cc b/src/ic/ic.cc index c29100ab3f..6cd1641ade 100644 --- a/src/ic/ic.cc +++ b/src/ic/ic.cc @@ -1578,6 +1578,8 @@ static Handle TryConvertKey(Handle key, Isolate* isolate) { } } else if (key->IsUndefined(isolate)) { key = isolate->factory()->undefined_string(); + } else if (key->IsString()) { + key = isolate->factory()->InternalizeString(Handle::cast(key)); } return key; } diff --git a/src/ic/keyed-store-generic.cc b/src/ic/keyed-store-generic.cc index cf1c49f0c8..549bb6af39 100644 --- a/src/ic/keyed-store-generic.cc +++ b/src/ic/keyed-store-generic.cc @@ -452,6 +452,8 @@ void KeyedStoreGenericAssembler::EmitGenericElementStore( // Out-of-capacity accesses (index >= capacity) jump here. Additionally, // an ElementsKind transition might be necessary. + // The index can also be negative at this point! Jump to the runtime in that + // case to convert it to a named property. Bind(&if_grow); { Comment("Grow backing store"); @@ -747,6 +749,8 @@ void KeyedStoreGenericAssembler::KeyedStoreGeneric(LanguageMode language_mode) { Node* context = Parameter(Descriptor::kContext); Variable var_index(this, MachineType::PointerRepresentation()); + Variable var_unique(this, MachineRepresentation::kTagged); + var_unique.Bind(name); // Dummy initialization. Label if_index(this), if_unique_name(this), slow(this); GotoIf(TaggedIsSmi(receiver), &slow); @@ -758,7 +762,7 @@ void KeyedStoreGenericAssembler::KeyedStoreGeneric(LanguageMode language_mode) { Int32Constant(LAST_CUSTOM_ELEMENTS_RECEIVER)), &slow); - TryToName(name, &if_index, &var_index, &if_unique_name, &slow); + TryToName(name, &if_index, &var_index, &if_unique_name, &var_unique, &slow); Bind(&if_index); { @@ -770,8 +774,8 @@ void KeyedStoreGenericAssembler::KeyedStoreGeneric(LanguageMode language_mode) { Bind(&if_unique_name); { Comment("key is unique name"); - KeyedStoreGenericAssembler::StoreICParameters p(context, receiver, name, - value, slot, vector); + StoreICParameters p(context, receiver, var_unique.value(), value, slot, + vector); EmitGenericPropertyStore(receiver, receiver_map, &p, &slow, language_mode); } diff --git a/src/mips/code-stubs-mips.cc b/src/mips/code-stubs-mips.cc index f829f4c5a0..741dd3e3c9 100644 --- a/src/mips/code-stubs-mips.cc +++ b/src/mips/code-stubs-mips.cc @@ -1423,7 +1423,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (6) External string. Make it, offset-wise, look like a sequential string. // Go to (4). // (7) Short external string or not a string? If yes, bail out to runtime. - // (8) Sliced string. Replace subject with parent. Go to (1). + // (8) Sliced or thin string. Replace subject with parent. Go to (1). Label seq_string /* 4 */, external_string /* 6 */, check_underlying /* 1 */, not_seq_nor_cons /* 5 */, not_long_external /* 7 */; @@ -1444,6 +1444,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (2) Sequential or cons? If not, go to (5). STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); + STATIC_ASSERT(kThinStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); // Go to (5). @@ -1470,8 +1471,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ Branch(&runtime, ls, a3, Operand(a1)); __ sra(a1, a1, kSmiTagSize); // Untag the Smi. - STATIC_ASSERT(kStringEncodingMask == 4); - STATIC_ASSERT(kOneByteStringTag == 4); + STATIC_ASSERT(kStringEncodingMask == 8); + STATIC_ASSERT(kOneByteStringTag == 8); STATIC_ASSERT(kTwoByteStringTag == 0); __ And(a0, a0, Operand(kStringEncodingMask)); // Non-zero for one-byte. __ lw(t9, FieldMemOperand(regexp_data, JSRegExp::kDataOneByteCodeOffset)); @@ -1720,12 +1721,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ And(at, a1, Operand(kIsNotStringMask | kShortExternalStringMask)); __ Branch(&runtime, ne, at, Operand(zero_reg)); - // (8) Sliced string. Replace subject with parent. Go to (4). + // (8) Sliced or thin string. Replace subject with parent. Go to (4). + Label thin_string; + __ Branch(&thin_string, eq, a1, Operand(kThinStringTag)); // Load offset into t0 and replace subject string with parent. __ lw(t0, FieldMemOperand(subject, SlicedString::kOffsetOffset)); __ sra(t0, t0, kSmiTagSize); __ lw(subject, FieldMemOperand(subject, SlicedString::kParentOffset)); __ jmp(&check_underlying); // Go to (4). + + __ bind(&thin_string); + __ lw(subject, FieldMemOperand(subject, ThinString::kActualOffset)); + __ jmp(&check_underlying); // Go to (4). #endif // V8_INTERPRETED_REGEXP } diff --git a/src/mips/codegen-mips.cc b/src/mips/codegen-mips.cc index a57299abf6..15011023cc 100644 --- a/src/mips/codegen-mips.cc +++ b/src/mips/codegen-mips.cc @@ -620,9 +620,10 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ Branch(&check_sequential, eq, at, Operand(zero_reg)); // Dispatch on the indirect string shape: slice or cons. - Label cons_string; - __ And(at, result, Operand(kSlicedNotConsMask)); - __ Branch(&cons_string, eq, at, Operand(zero_reg)); + Label cons_string, thin_string; + __ And(at, result, Operand(kStringRepresentationMask)); + __ Branch(&cons_string, eq, at, Operand(kConsStringTag)); + __ Branch(&thin_string, eq, at, Operand(kThinStringTag)); // Handle slices. Label indirect_string_loaded; @@ -632,6 +633,11 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ Addu(index, index, at); __ jmp(&indirect_string_loaded); + // Handle thin strings. + __ bind(&thin_string); + __ lw(string, FieldMemOperand(string, ThinString::kActualOffset)); + __ jmp(&indirect_string_loaded); + // Handle cons strings. // Check whether the right hand side is the empty string (i.e. if // this is really a flat string in a cons string). If that is not diff --git a/src/mips64/code-stubs-mips64.cc b/src/mips64/code-stubs-mips64.cc index 200ecfa049..ad67d5f2c1 100644 --- a/src/mips64/code-stubs-mips64.cc +++ b/src/mips64/code-stubs-mips64.cc @@ -1420,7 +1420,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (6) External string. Make it, offset-wise, look like a sequential string. // Go to (4). // (7) Short external string or not a string? If yes, bail out to runtime. - // (8) Sliced string. Replace subject with parent. Go to (1). + // (8) Sliced or thin string. Replace subject with parent. Go to (1). Label check_underlying; // (1) Label seq_string; // (4) @@ -1444,6 +1444,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (2) Sequential or cons? If not, go to (5). STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); + STATIC_ASSERT(kThinStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); // Go to (5). @@ -1470,8 +1471,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ Branch(&runtime, ls, a3, Operand(a1)); __ SmiUntag(a1); - STATIC_ASSERT(kStringEncodingMask == 4); - STATIC_ASSERT(kOneByteStringTag == 4); + STATIC_ASSERT(kStringEncodingMask == 8); + STATIC_ASSERT(kOneByteStringTag == 8); STATIC_ASSERT(kTwoByteStringTag == 0); __ And(a0, a0, Operand(kStringEncodingMask)); // Non-zero for one_byte. __ ld(t9, FieldMemOperand(regexp_data, JSRegExp::kDataOneByteCodeOffset)); @@ -1721,12 +1722,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ And(at, a1, Operand(kIsNotStringMask | kShortExternalStringMask)); __ Branch(&runtime, ne, at, Operand(zero_reg)); - // (8) Sliced string. Replace subject with parent. Go to (4). + // (8) Sliced or thin string. Replace subject with parent. Go to (4). + Label thin_string; + __ Branch(&thin_string, eq, a1, Operand(kThinStringTag)); // Load offset into t0 and replace subject string with parent. __ ld(t0, FieldMemOperand(subject, SlicedString::kOffsetOffset)); __ SmiUntag(t0); __ ld(subject, FieldMemOperand(subject, SlicedString::kParentOffset)); __ jmp(&check_underlying); // Go to (1). + + __ bind(&thin_string); + __ ld(subject, FieldMemOperand(subject, ThinString::kActualOffset)); + __ jmp(&check_underlying); // Go to (1). #endif // V8_INTERPRETED_REGEXP } diff --git a/src/mips64/codegen-mips64.cc b/src/mips64/codegen-mips64.cc index 134fe4dd88..78652846df 100644 --- a/src/mips64/codegen-mips64.cc +++ b/src/mips64/codegen-mips64.cc @@ -622,9 +622,10 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ Branch(&check_sequential, eq, at, Operand(zero_reg)); // Dispatch on the indirect string shape: slice or cons. - Label cons_string; - __ And(at, result, Operand(kSlicedNotConsMask)); - __ Branch(&cons_string, eq, at, Operand(zero_reg)); + Label cons_string, thin_string; + __ And(at, result, Operand(kStringRepresentationMask)); + __ Branch(&cons_string, eq, at, Operand(kConsStringTag)); + __ Branch(&thin_string, eq, at, Operand(kThinStringTag)); // Handle slices. Label indirect_string_loaded; @@ -634,6 +635,11 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ Daddu(index, index, at); __ jmp(&indirect_string_loaded); + // Handle thin strings. + __ bind(&thin_string); + __ ld(string, FieldMemOperand(string, ThinString::kActualOffset)); + __ jmp(&indirect_string_loaded); + // Handle cons strings. // Check whether the right hand side is the empty string (i.e. if // this is really a flat string in a cons string). If that is not diff --git a/src/objects-body-descriptors-inl.h b/src/objects-body-descriptors-inl.h index bffc8bdb3e..608d1847b9 100644 --- a/src/objects-body-descriptors-inl.h +++ b/src/objects-body-descriptors-inl.h @@ -440,6 +440,8 @@ ReturnType BodyDescriptorApply(InstanceType type, T1 p1, T2 p2, T3 p3) { return ReturnType(); case kConsStringTag: return Op::template apply(p1, p2, p3); + case kThinStringTag: + return Op::template apply(p1, p2, p3); case kSlicedStringTag: return Op::template apply(p1, p2, p3); case kExternalStringTag: diff --git a/src/objects-debug.cc b/src/objects-debug.cc index ee39fdd069..5caf12a7be 100644 --- a/src/objects-debug.cc +++ b/src/objects-debug.cc @@ -555,6 +555,8 @@ void String::StringVerify() { ConsString::cast(this)->ConsStringVerify(); } else if (IsSlicedString()) { SlicedString::cast(this)->SlicedStringVerify(); + } else if (IsThinString()) { + ThinString::cast(this)->ThinStringVerify(); } } @@ -566,12 +568,16 @@ void ConsString::ConsStringVerify() { CHECK(this->length() >= ConsString::kMinLength); CHECK(this->length() == this->first()->length() + this->second()->length()); if (this->IsFlat()) { - // A flat cons can only be created by String::SlowTryFlatten. - // Afterwards, the first part may be externalized. - CHECK(this->first()->IsSeqString() || this->first()->IsExternalString()); + // A flat cons can only be created by String::SlowFlatten. + // Afterwards, the first part may be externalized or internalized. + CHECK(this->first()->IsSeqString() || this->first()->IsExternalString() || + this->first()->IsThinString()); } } +void ThinString::ThinStringVerify() { + CHECK(this->actual()->IsInternalizedString()); +} void SlicedString::SlicedStringVerify() { CHECK(!this->parent()->IsConsString()); diff --git a/src/objects-inl.h b/src/objects-inl.h index b56efe305b..597f51e34c 100644 --- a/src/objects-inl.h +++ b/src/objects-inl.h @@ -274,6 +274,11 @@ bool HeapObject::IsConsString() const { return StringShape(String::cast(this)).IsCons(); } +bool HeapObject::IsThinString() const { + if (!IsString()) return false; + return StringShape(String::cast(this)).IsThin(); +} + bool HeapObject::IsSlicedString() const { if (!IsString()) return false; return StringShape(String::cast(this)).IsSliced(); @@ -697,6 +702,7 @@ CAST_ACCESSOR(StringTable) CAST_ACCESSOR(Struct) CAST_ACCESSOR(Symbol) CAST_ACCESSOR(TemplateInfo) +CAST_ACCESSOR(ThinString) CAST_ACCESSOR(Uint16x8) CAST_ACCESSOR(Uint32x4) CAST_ACCESSOR(Uint8x16) @@ -843,6 +849,10 @@ bool StringShape::IsCons() { return (type_ & kStringRepresentationMask) == kConsStringTag; } +bool StringShape::IsThin() { + return (type_ & kStringRepresentationMask) == kThinStringTag; +} + bool StringShape::IsSliced() { return (type_ & kStringRepresentationMask) == kSlicedStringTag; } @@ -3719,6 +3729,9 @@ bool String::Equals(Handle one, Handle two) { Handle String::Flatten(Handle string, PretenureFlag pretenure) { + if (string->IsThinString()) { + return handle(Handle::cast(string)->actual()); + } if (!string->IsConsString()) return string; Handle cons = Handle::cast(string); if (cons->IsFlat()) return handle(cons->first()); @@ -3743,6 +3756,9 @@ uint16_t String::Get(int index) { case kSlicedStringTag | kOneByteStringTag: case kSlicedStringTag | kTwoByteStringTag: return SlicedString::cast(this)->SlicedStringGet(index); + case kThinStringTag | kOneByteStringTag: + case kThinStringTag | kTwoByteStringTag: + return ThinString::cast(this)->ThinStringGet(index); default: break; } @@ -3774,6 +3790,7 @@ String* String::GetUnderlying() { DCHECK(this->IsFlat()); DCHECK(StringShape(this).IsIndirect()); STATIC_ASSERT(ConsString::kFirstOffset == SlicedString::kParentOffset); + STATIC_ASSERT(ConsString::kFirstOffset == ThinString::kActualOffset); const int kUnderlyingOffset = SlicedString::kParentOffset; return String::cast(READ_FIELD(this, kUnderlyingOffset)); } @@ -3825,6 +3842,11 @@ ConsString* String::VisitFlat(Visitor* visitor, case kConsStringTag | kTwoByteStringTag: return ConsString::cast(string); + case kThinStringTag | kOneByteStringTag: + case kThinStringTag | kTwoByteStringTag: + string = ThinString::cast(string)->actual(); + continue; + default: UNREACHABLE(); return NULL; @@ -3956,6 +3978,7 @@ void ConsString::set_second(String* value, WriteBarrierMode mode) { CONDITIONAL_WRITE_BARRIER(GetHeap(), this, kSecondOffset, value, mode); } +ACCESSORS(ThinString, actual, String, kActualOffset); bool ExternalString::is_short() { InstanceType type = map()->instance_type(); diff --git a/src/objects-printer.cc b/src/objects-printer.cc index 4c1c32b046..9e7cc25d0b 100644 --- a/src/objects-printer.cc +++ b/src/objects-printer.cc @@ -858,6 +858,8 @@ void String::StringPrint(std::ostream& os) { // NOLINT os << "#"; } else if (StringShape(this).IsCons()) { os << "c\""; + } else if (StringShape(this).IsThin()) { + os << ">\""; } else { os << "\""; } diff --git a/src/objects.cc b/src/objects.cc index 947a6e076f..9bd276c20d 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -2512,7 +2512,7 @@ bool String::MakeExternal(v8::String::ExternalStringResource* resource) { Heap* heap = GetHeap(); bool is_one_byte = this->IsOneByteRepresentation(); bool is_internalized = this->IsInternalizedString(); - bool has_pointers = this->IsConsString() || this->IsSlicedString(); + bool has_pointers = StringShape(this).IsIndirect(); // Morph the string to an external string by replacing the map and // reinitializing the fields. This won't work if the space the existing @@ -2584,7 +2584,7 @@ bool String::MakeExternal(v8::String::ExternalOneByteStringResource* resource) { if (size < ExternalString::kShortSize) return false; Heap* heap = GetHeap(); bool is_internalized = this->IsInternalizedString(); - bool has_pointers = this->IsConsString() || this->IsSlicedString(); + bool has_pointers = StringShape(this).IsIndirect(); // Morph the string to an external string by replacing the map and // reinitializing the fields. This won't work if the space the existing @@ -10928,11 +10928,7 @@ Handle String::Trim(Handle string, TrimMode mode) { return isolate->factory()->NewSubString(string, left, right); } -bool String::LooksValid() { - if (!GetIsolate()->heap()->Contains(this)) return false; - return true; -} - +bool String::LooksValid() { return GetIsolate()->heap()->Contains(this); } // static MaybeHandle Name::ToFunctionName(Handle name) { @@ -11066,6 +11062,10 @@ String::FlatContent String::GetFlatContent() { } string = cons->first(); shape = StringShape(string); + } else if (shape.representation_tag() == kThinStringTag) { + ThinString* thin = ThinString::cast(string); + string = thin->actual(); + shape = StringShape(string); } if (shape.representation_tag() == kSlicedStringTag) { SlicedString* slice = SlicedString::cast(string); @@ -11160,6 +11160,7 @@ const uc16* String::GetTwoByteData(unsigned start) { return slice->parent()->GetTwoByteData(start + slice->offset()); } case kConsStringTag: + case kThinStringTag: UNREACHABLE(); return NULL; } @@ -11426,6 +11427,7 @@ uint16_t ConsString::ConsStringGet(int index) { return 0; } +uint16_t ThinString::ThinStringGet(int index) { return actual()->Get(index); } uint16_t SlicedString::SlicedStringGet(int index) { return parent()->Get(offset() + index); @@ -11520,6 +11522,10 @@ void String::WriteToFlat(String* src, WriteToFlat(slice->parent(), sink, from + offset, to + offset); return; } + case kOneByteStringTag | kThinStringTag: + case kTwoByteStringTag | kThinStringTag: + source = ThinString::cast(source)->actual(); + break; } } } @@ -11741,6 +11747,17 @@ bool String::SlowEquals(String* other) { if (len != other->length()) return false; if (len == 0) return true; + // Fast check: if at least one ThinString is involved, dereference it/them + // and restart. + if (this->IsThinString() || other->IsThinString()) { + if (other->IsThinString()) other = ThinString::cast(other)->actual(); + if (this->IsThinString()) { + return ThinString::cast(this)->actual()->Equals(other); + } else { + return this->Equals(other); + } + } + // Fast check: if hash code is computed for both strings // a fast negative check can be performed. if (HasHashCode() && other->HasHashCode()) { @@ -11782,6 +11799,14 @@ bool String::SlowEquals(Handle one, Handle two) { if (one_length != two->length()) return false; if (one_length == 0) return true; + // Fast check: if at least one ThinString is involved, dereference it/them + // and restart. + if (one->IsThinString() || two->IsThinString()) { + if (one->IsThinString()) one = handle(ThinString::cast(*one)->actual()); + if (two->IsThinString()) two = handle(ThinString::cast(*two)->actual()); + return String::Equals(one, two); + } + // Fast check: if hash code is computed for both strings // a fast negative check can be performed. if (one->HasHashCode() && two->HasHashCode()) { @@ -16837,6 +16862,14 @@ class InternalizedStringKey : public HashTableKey { DCHECK(string_->IsInternalizedString()); return string_; } + // External strings get special treatment, to avoid copying their contents. + if (string_->IsExternalOneByteString()) { + return isolate->factory() + ->InternalizeExternalString(string_); + } else if (string_->IsExternalTwoByteString()) { + return isolate->factory() + ->InternalizeExternalString(string_); + } // Otherwise allocate a new internalized string. return isolate->factory()->NewInternalizedStringImpl( string_, string_->length(), string_->hash_field()); @@ -16846,6 +16879,7 @@ class InternalizedStringKey : public HashTableKey { return String::cast(obj)->Hash(); } + private: Handle string_; }; @@ -17780,6 +17814,9 @@ MaybeHandle StringTable::InternalizeStringIfExists( if (string->IsInternalizedString()) { return string; } + if (string->IsThinString()) { + return handle(Handle::cast(string)->actual(), isolate); + } return LookupStringIfExists(isolate, string); } @@ -17826,31 +17863,72 @@ void StringTable::EnsureCapacityForDeserialization(Isolate* isolate, isolate->heap()->SetRootStringTable(*table); } +namespace { + +template +void MigrateExternalStringResource(Isolate* isolate, Handle from, + Handle to) { + Handle cast_from = Handle::cast(from); + Handle cast_to = Handle::cast(to); + const typename StringClass::Resource* to_resource = cast_to->resource(); + if (to_resource == nullptr) { + // |to| is a just-created internalized copy of |from|. Migrate the resource. + cast_to->set_resource(cast_from->resource()); + // Zap |from|'s resource pointer to reflect the fact that |from| has + // relinquished ownership of its resource. + cast_from->set_resource(nullptr); + } else if (to_resource != cast_from->resource()) { + // |to| already existed and has its own resource. Finalize |from|. + isolate->heap()->FinalizeExternalString(*from); + } +} + +} // namespace Handle StringTable::LookupString(Isolate* isolate, Handle string) { + if (string->IsThinString()) { + DCHECK(Handle::cast(string)->actual()->IsInternalizedString()); + return handle(Handle::cast(string)->actual(), isolate); + } if (string->IsConsString() && string->IsFlat()) { - string = String::Flatten(string); + string = handle(Handle::cast(string)->first(), isolate); if (string->IsInternalizedString()) return string; } InternalizedStringKey key(string); Handle result = LookupKey(isolate, &key); - if (string->IsConsString()) { - Handle cons = Handle::cast(string); - cons->set_first(*result); - cons->set_second(isolate->heap()->empty_string()); - } else if (string->IsSlicedString()) { - STATIC_ASSERT(ConsString::kSize == SlicedString::kSize); + if (string->IsExternalString()) { + if (result->IsExternalOneByteString()) { + MigrateExternalStringResource(isolate, string, + result); + } else if (result->IsExternalTwoByteString()) { + MigrateExternalStringResource(isolate, string, + result); + } + } + + // The LookupKey() call above tries to internalize the string in-place. + // In cases where that wasn't possible (e.g. new-space strings), turn them + // into ThinStrings referring to their internalized versions now. + if (!string->IsInternalizedString()) { DisallowHeapAllocation no_gc; bool one_byte = result->IsOneByteRepresentation(); - Handle map = one_byte ? isolate->factory()->cons_one_byte_string_map() - : isolate->factory()->cons_string_map(); - string->set_map(*map); - Handle cons = Handle::cast(string); - cons->set_first(*result); - cons->set_second(isolate->heap()->empty_string()); + Handle map = one_byte ? isolate->factory()->thin_one_byte_string_map() + : isolate->factory()->thin_string_map(); + int old_size = string->Size(); + DCHECK(old_size >= ThinString::kSize); + string->synchronized_set_map(*map); + Handle thin = Handle::cast(string); + thin->set_actual(*result); + Address thin_end = thin->address() + ThinString::kSize; + int size_delta = old_size - ThinString::kSize; + if (size_delta != 0) { + Heap* heap = isolate->heap(); + heap->CreateFillerObjectAt(thin_end, size_delta, ClearRecordedSlots::kNo); + heap->AdjustLiveBytes(*thin, -size_delta); + } } return result; } diff --git a/src/objects.h b/src/objects.h index 53c5e18c53..2d9235f23e 100644 --- a/src/objects.h +++ b/src/objects.h @@ -108,6 +108,7 @@ // - SeqTwoByteString // - SlicedString // - ConsString +// - ThinString // - ExternalString // - ExternalOneByteString // - ExternalTwoByteString @@ -333,10 +334,12 @@ const int kStubMinorKeyBits = kSmiValueSize - kStubMajorKeyBits - 1; V(CONS_STRING_TYPE) \ V(EXTERNAL_STRING_TYPE) \ V(SLICED_STRING_TYPE) \ + V(THIN_STRING_TYPE) \ V(ONE_BYTE_STRING_TYPE) \ V(CONS_ONE_BYTE_STRING_TYPE) \ V(EXTERNAL_ONE_BYTE_STRING_TYPE) \ V(SLICED_ONE_BYTE_STRING_TYPE) \ + V(THIN_ONE_BYTE_STRING_TYPE) \ V(EXTERNAL_STRING_WITH_ONE_BYTE_DATA_TYPE) \ V(SHORT_EXTERNAL_STRING_TYPE) \ V(SHORT_EXTERNAL_ONE_BYTE_STRING_TYPE) \ @@ -520,7 +523,10 @@ const int kStubMinorKeyBits = kSmiValueSize - kStubMajorKeyBits - 1; V(SHORT_EXTERNAL_INTERNALIZED_STRING_WITH_ONE_BYTE_DATA_TYPE, \ ExternalTwoByteString::kShortSize, \ short_external_internalized_string_with_one_byte_data, \ - ShortExternalInternalizedStringWithOneByteData) + ShortExternalInternalizedStringWithOneByteData) \ + V(THIN_STRING_TYPE, ThinString::kSize, thin_string, ThinString) \ + V(THIN_ONE_BYTE_STRING_TYPE, ThinString::kSize, thin_one_byte_string, \ + ThinOneByteString) // A struct is a simple object a set of object-valued fields. Including an // object type in this causes the compiler to generate most of the boilerplate @@ -572,20 +578,21 @@ const uint32_t kIsNotInternalizedMask = 0x40; const uint32_t kNotInternalizedTag = 0x40; const uint32_t kInternalizedTag = 0x0; -// If bit 7 is clear then bit 2 indicates whether the string consists of +// If bit 7 is clear then bit 3 indicates whether the string consists of // two-byte characters or one-byte characters. -const uint32_t kStringEncodingMask = 0x4; +const uint32_t kStringEncodingMask = 0x8; const uint32_t kTwoByteStringTag = 0x0; -const uint32_t kOneByteStringTag = 0x4; +const uint32_t kOneByteStringTag = 0x8; -// If bit 7 is clear, the low-order 2 bits indicate the representation +// If bit 7 is clear, the low-order 3 bits indicate the representation // of the string. -const uint32_t kStringRepresentationMask = 0x03; +const uint32_t kStringRepresentationMask = 0x07; enum StringRepresentationTag { kSeqStringTag = 0x0, kConsStringTag = 0x1, kExternalStringTag = 0x2, - kSlicedStringTag = 0x3 + kSlicedStringTag = 0x3, + kThinStringTag = 0x5 }; const uint32_t kIsIndirectStringMask = 0x1; const uint32_t kIsIndirectStringTag = 0x1; @@ -595,22 +602,17 @@ STATIC_ASSERT((kConsStringTag & kIsIndirectStringMask) == kIsIndirectStringTag); // NOLINT STATIC_ASSERT((kSlicedStringTag & kIsIndirectStringMask) == kIsIndirectStringTag); // NOLINT +STATIC_ASSERT((kThinStringTag & kIsIndirectStringMask) == kIsIndirectStringTag); -// Use this mask to distinguish between cons and slice only after making -// sure that the string is one of the two (an indirect string). -const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag; -STATIC_ASSERT(IS_POWER_OF_TWO(kSlicedNotConsMask)); - -// If bit 7 is clear, then bit 3 indicates whether this two-byte +// If bit 7 is clear, then bit 4 indicates whether this two-byte // string actually contains one byte data. -const uint32_t kOneByteDataHintMask = 0x08; -const uint32_t kOneByteDataHintTag = 0x08; +const uint32_t kOneByteDataHintMask = 0x10; +const uint32_t kOneByteDataHintTag = 0x10; // If bit 7 is clear and string representation indicates an external string, -// then bit 4 indicates whether the data pointer is cached. -const uint32_t kShortExternalStringMask = 0x10; -const uint32_t kShortExternalStringTag = 0x10; - +// then bit 5 indicates whether the data pointer is cached. +const uint32_t kShortExternalStringMask = 0x20; +const uint32_t kShortExternalStringTag = 0x20; // A ConsString with an empty string as the right side is a candidate // for being shortcut by the garbage collector. We don't allocate any @@ -674,6 +676,9 @@ enum InstanceType { SHORT_EXTERNAL_STRING_WITH_ONE_BYTE_DATA_TYPE = SHORT_EXTERNAL_INTERNALIZED_STRING_WITH_ONE_BYTE_DATA_TYPE | kNotInternalizedTag, + THIN_STRING_TYPE = kTwoByteStringTag | kThinStringTag | kNotInternalizedTag, + THIN_ONE_BYTE_STRING_TYPE = + kOneByteStringTag | kThinStringTag | kNotInternalizedTag, // Non-string names SYMBOL_TYPE = kNotStringTag, // FIRST_NONSTRING_TYPE, LAST_NAME_TYPE @@ -1028,6 +1033,7 @@ template inline bool Is(Object* obj); V(SeqTwoByteString) \ V(SeqOneByteString) \ V(InternalizedString) \ + V(ThinString) \ V(Symbol) \ \ V(FixedTypedArrayBase) \ @@ -9319,6 +9325,7 @@ class StringShape BASE_EMBEDDED { inline bool IsExternal(); inline bool IsCons(); inline bool IsSliced(); + inline bool IsThin(); inline bool IsIndirect(); inline bool IsExternalOneByte(); inline bool IsExternalTwoByte(); @@ -10032,6 +10039,34 @@ class ConsString: public String { DISALLOW_IMPLICIT_CONSTRUCTORS(ConsString); }; +// The ThinString class describes string objects that are just references +// to another string object. They are used for in-place internalization when +// the original string cannot actually be internalized in-place: in these +// cases, the original string is converted to a ThinString pointing at its +// internalized version (which is allocated as a new object). +// In terms of memory layout and most algorithms operating on strings, +// ThinStrings can be thought of as "one-part cons strings". +class ThinString : public String { + public: + // Actual string that this ThinString refers to. + inline String* actual() const; + inline void set_actual(String* s, + WriteBarrierMode mode = UPDATE_WRITE_BARRIER); + + V8_EXPORT_PRIVATE uint16_t ThinStringGet(int index); + + DECLARE_CAST(ThinString) + DECLARE_VERIFIER(ThinString) + + // Layout description. + static const int kActualOffset = String::kSize; + static const int kSize = kActualOffset + kPointerSize; + + typedef FixedBodyDescriptor BodyDescriptor; + + private: + DISALLOW_COPY_AND_ASSIGN(ThinString); +}; // The Sliced String class describes strings that are substrings of another // sequential string. The motivation is to save time and memory when creating diff --git a/src/ppc/code-stubs-ppc.cc b/src/ppc/code-stubs-ppc.cc index 9b84f5c0a6..3b3d9dcb49 100644 --- a/src/ppc/code-stubs-ppc.cc +++ b/src/ppc/code-stubs-ppc.cc @@ -1374,7 +1374,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (6) External string. Make it, offset-wise, look like a sequential string. // Go to (4). // (7) Short external string or not a string? If yes, bail out to runtime. - // (8) Sliced string. Replace subject with parent. Go to (1). + // (8) Sliced or thin string. Replace subject with parent. Go to (1). Label seq_string /* 4 */, external_string /* 6 */, check_underlying /* 1 */, not_seq_nor_cons /* 5 */, not_long_external /* 7 */; @@ -1395,6 +1395,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (2) Sequential or cons? If not, go to (5). STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); + STATIC_ASSERT(kThinStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); STATIC_ASSERT(kExternalStringTag < 0xffffu); @@ -1671,12 +1672,19 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ andi(r0, r4, Operand(kIsNotStringMask | kShortExternalStringMask)); __ bne(&runtime, cr0); - // (8) Sliced string. Replace subject with parent. Go to (4). + // (8) Sliced or thin string. Replace subject with parent. Go to (4). + Label thin_string; + __ cmpi(r4, Operand(kThinStringTag)); + __ beq(&thin_string); // Load offset into r11 and replace subject string with parent. __ LoadP(r11, FieldMemOperand(subject, SlicedString::kOffsetOffset)); __ SmiUntag(r11); __ LoadP(subject, FieldMemOperand(subject, SlicedString::kParentOffset)); __ b(&check_underlying); // Go to (4). + + __ bind(&thin_string); + __ LoadP(subject, FieldMemOperand(subject, ThinString::kActualOffset)); + __ b(&check_underlying); // Go to (4). #endif // V8_INTERPRETED_REGEXP } diff --git a/src/ppc/codegen-ppc.cc b/src/ppc/codegen-ppc.cc index bb365b4e63..9109ca3fca 100644 --- a/src/ppc/codegen-ppc.cc +++ b/src/ppc/codegen-ppc.cc @@ -86,11 +86,13 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, Register string, __ andi(r0, result, Operand(kIsIndirectStringMask)); __ beq(&check_sequential, cr0); - // Dispatch on the indirect string shape: slice or cons. - Label cons_string; - __ mov(ip, Operand(kSlicedNotConsMask)); - __ and_(r0, result, ip, SetRC); - __ beq(&cons_string, cr0); + // Dispatch on the indirect string shape: slice or cons or thin. + Label cons_string, thin_string; + __ andi(ip, result, Operand(kStringRepresentationMask)); + __ cmpi(ip, Operand(kConsStringTag)); + __ beq(&cons_string); + __ cmpi(ip, Operand(kThinStringTag)); + __ beq(&thin_string); // Handle slices. Label indirect_string_loaded; @@ -100,6 +102,11 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, Register string, __ add(index, index, ip); __ b(&indirect_string_loaded); + // Handle thin strings. + __ bind(&thin_string); + __ LoadP(string, FieldMemOperand(string, ThinString::kActualOffset)); + __ b(&indirect_string_loaded); + // Handle cons strings. // Check whether the right hand side is the empty string (i.e. if // this is really a flat string in a cons string). If that is not diff --git a/src/profiler/heap-snapshot-generator.cc b/src/profiler/heap-snapshot-generator.cc index fbb4e973d6..c84892b069 100644 --- a/src/profiler/heap-snapshot-generator.cc +++ b/src/profiler/heap-snapshot-generator.cc @@ -1165,6 +1165,10 @@ void V8HeapExplorer::ExtractStringReferences(int entry, String* string) { SlicedString* ss = SlicedString::cast(string); SetInternalReference(ss, entry, "parent", ss->parent(), SlicedString::kParentOffset); + } else if (string->IsThinString()) { + ThinString* ts = ThinString::cast(string); + SetInternalReference(ts, entry, "actual", ts->actual(), + ThinString::kActualOffset); } } diff --git a/src/regexp/regexp-macro-assembler.cc b/src/regexp/regexp-macro-assembler.cc index 0a7f5c1b9e..025ffb5660 100644 --- a/src/regexp/regexp-macro-assembler.cc +++ b/src/regexp/regexp-macro-assembler.cc @@ -133,6 +133,8 @@ const byte* NativeRegExpMacroAssembler::StringCharacterPosition( } else if (subject->IsSlicedString()) { start_index += SlicedString::cast(subject)->offset(); subject = SlicedString::cast(subject)->parent(); + } else if (subject->IsThinString()) { + subject = ThinString::cast(subject)->actual(); } DCHECK(start_index >= 0); DCHECK(start_index <= subject->length()); @@ -238,6 +240,8 @@ NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match( SlicedString* slice = SlicedString::cast(subject_ptr); subject_ptr = slice->parent(); slice_offset = slice->offset(); + } else if (StringShape(subject_ptr).IsThin()) { + subject_ptr = ThinString::cast(subject_ptr)->actual(); } // Ensure that an underlying string has the same representation. bool is_one_byte = subject_ptr->IsOneByteRepresentation(); diff --git a/src/runtime/runtime-i18n.cc b/src/runtime/runtime-i18n.cc index 7554f99cbb..2d0c55eb21 100644 --- a/src/runtime/runtime-i18n.cc +++ b/src/runtime/runtime-i18n.cc @@ -865,6 +865,8 @@ MUST_USE_RESULT Object* LocaleConvertCase(Handle s, Isolate* isolate, Handle result; std::unique_ptr sap; + if (dest_length == 0) return isolate->heap()->empty_string(); + // This is not a real loop. It'll be executed only once (no overflow) or // twice (overflow). for (int i = 0; i < 2; ++i) { @@ -1086,7 +1088,7 @@ RUNTIME_FUNCTION(Runtime_StringToUpperCaseI18N) { int32_t length = s->length(); s = String::Flatten(s); - if (s->HasOnlyOneByteChars()) { + if (s->HasOnlyOneByteChars() && length > 0) { Handle result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked(); diff --git a/src/runtime/runtime-internal.cc b/src/runtime/runtime-internal.cc index 33534a786e..d9113753d0 100644 --- a/src/runtime/runtime-internal.cc +++ b/src/runtime/runtime-internal.cc @@ -298,6 +298,7 @@ RUNTIME_FUNCTION(Runtime_AllocateSeqOneByteString) { HandleScope scope(isolate); DCHECK_EQ(1, args.length()); CONVERT_SMI_ARG_CHECKED(length, 0); + if (length == 0) return isolate->heap()->empty_string(); Handle result; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, result, isolate->factory()->NewRawOneByteString(length)); @@ -308,6 +309,7 @@ RUNTIME_FUNCTION(Runtime_AllocateSeqTwoByteString) { HandleScope scope(isolate); DCHECK_EQ(1, args.length()); CONVERT_SMI_ARG_CHECKED(length, 0); + if (length == 0) return isolate->heap()->empty_string(); Handle result; ASSIGN_RETURN_FAILURE_ON_EXCEPTION( isolate, result, isolate->factory()->NewRawTwoByteString(length)); diff --git a/src/runtime/runtime-object.cc b/src/runtime/runtime-object.cc index 9d7a4569e7..397ad6c383 100644 --- a/src/runtime/runtime-object.cc +++ b/src/runtime/runtime-object.cc @@ -56,6 +56,14 @@ static MaybeHandle KeyedGetObjectProperty(Isolate* isolate, DisallowHeapAllocation no_allocation; Handle receiver = Handle::cast(receiver_obj); Handle key = Handle::cast(key_obj); + // Get to a ThinString's referenced internalized string, but don't + // otherwise force internalization. We assume that internalization + // (which is a dictionary lookup with a non-internalized key) is + // about as expensive as doing the property dictionary lookup with + // the non-internalized key directly. + if (key->IsThinString()) { + key = handle(Handle::cast(key)->actual(), isolate); + } if (receiver->IsJSGlobalObject()) { // Attempt dictionary lookup. GlobalDictionary* dictionary = receiver->global_dictionary(); diff --git a/src/runtime/runtime-regexp.cc b/src/runtime/runtime-regexp.cc index 9a489ecff8..4fd2e37aa7 100644 --- a/src/runtime/runtime-regexp.cc +++ b/src/runtime/runtime-regexp.cc @@ -431,6 +431,9 @@ MUST_USE_RESULT static Object* StringReplaceGlobalAtomRegExpWithString( } else { result_len = static_cast(result_len_64); } + if (result_len == 0) { + return isolate->heap()->empty_string(); + } int subject_pos = 0; int result_pos = 0; diff --git a/src/runtime/runtime-strings.cc b/src/runtime/runtime-strings.cc index 31d9f1fc6e..a126e8cd63 100644 --- a/src/runtime/runtime-strings.cc +++ b/src/runtime/runtime-strings.cc @@ -263,6 +263,9 @@ RUNTIME_FUNCTION(Runtime_StringBuilderConcat) { if (length == -1) { return isolate->Throw(isolate->heap()->illegal_argument_string()); } + if (length == 0) { + return isolate->heap()->empty_string(); + } if (one_byte) { Handle answer; diff --git a/src/s390/code-stubs-s390.cc b/src/s390/code-stubs-s390.cc index e670c2e3a4..ed3c423bfb 100644 --- a/src/s390/code-stubs-s390.cc +++ b/src/s390/code-stubs-s390.cc @@ -1371,7 +1371,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (6) External string. Make it, offset-wise, look like a sequential string. // Go to (4). // (7) Short external string or not a string? If yes, bail out to runtime. - // (8) Sliced string. Replace subject with parent. Go to (1). + // (8) Sliced or thin string. Replace subject with parent. Go to (1). Label seq_string /* 4 */, external_string /* 6 */, check_underlying /* 1 */, not_seq_nor_cons /* 5 */, not_long_external /* 7 */; @@ -1393,6 +1393,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (2) Sequential or cons? If not, go to (5). STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); + STATIC_ASSERT(kThinStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); STATIC_ASSERT(kExternalStringTag < 0xffffu); @@ -1680,12 +1681,19 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ AndP(r0, r3); __ bne(&runtime); - // (8) Sliced string. Replace subject with parent. Go to (4). + // (8) Sliced or thin string. Replace subject with parent. Go to (4). + Label thin_string; + __ CmpP(r3, Operand(kThinStringTag)); + __ beq(&thin_string); // Load offset into ip and replace subject string with parent. __ LoadP(ip, FieldMemOperand(subject, SlicedString::kOffsetOffset)); __ SmiUntag(ip); __ LoadP(subject, FieldMemOperand(subject, SlicedString::kParentOffset)); __ b(&check_underlying); // Go to (4). + + __ bind(&thin_string); + __ LoadP(subject, FieldMemOperand(subject, ThinString::kActualOffset)); + __ b(&check_underlying); // Go to (4). #endif // V8_INTERPRETED_REGEXP } diff --git a/src/s390/codegen-s390.cc b/src/s390/codegen-s390.cc index 02cc8c206c..7ab1444357 100644 --- a/src/s390/codegen-s390.cc +++ b/src/s390/codegen-s390.cc @@ -81,11 +81,13 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, Register string, __ beq(&check_sequential, Label::kNear /*, cr0*/); // Dispatch on the indirect string shape: slice or cons. - Label cons_string; - __ mov(ip, Operand(kSlicedNotConsMask)); - __ LoadRR(r0, result); - __ AndP(r0, ip /*, SetRC*/); // Should be okay to remove RC - __ beq(&cons_string, Label::kNear /*, cr0*/); + Label cons_string, thin_string; + __ LoadRR(ip, result); + __ nilf(ip, Operand(kStringRepresentationMask)); + __ CmpP(ip, Operand(kConsStringTag)); + __ beq(&cons_string); + __ CmpP(ip, Operand(kThinStringTag)); + __ beq(&thin_string); // Handle slices. Label indirect_string_loaded; @@ -95,6 +97,11 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, Register string, __ AddP(index, ip); __ b(&indirect_string_loaded, Label::kNear); + // Handle thin strings. + __ bind(&thin_string); + __ LoadP(string, FieldMemOperand(string, ThinString::kActualOffset)); + __ b(&indirect_string_loaded, Label::kNear); + // Handle cons strings. // Check whether the right hand side is the empty string (i.e. if // this is really a flat string in a cons string). If that is not diff --git a/src/value-serializer.cc b/src/value-serializer.cc index 0e3a6a6e1e..90cb9c92af 100644 --- a/src/value-serializer.cc +++ b/src/value-serializer.cc @@ -1135,8 +1135,9 @@ MaybeHandle ValueDeserializer::ReadUtf8String() { if (!ReadVarint().To(&utf8_length) || utf8_length > static_cast(std::numeric_limits::max()) || - !ReadRawBytes(utf8_length).To(&utf8_bytes)) + !ReadRawBytes(utf8_length).To(&utf8_bytes)) { return MaybeHandle(); + } return isolate_->factory()->NewStringFromUtf8( Vector::cast(utf8_bytes), pretenure_); } @@ -1147,16 +1148,20 @@ MaybeHandle ValueDeserializer::ReadTwoByteString() { if (!ReadVarint().To(&byte_length) || byte_length > static_cast(std::numeric_limits::max()) || - byte_length % sizeof(uc16) != 0 || !ReadRawBytes(byte_length).To(&bytes)) + byte_length % sizeof(uc16) != 0 || + !ReadRawBytes(byte_length).To(&bytes)) { return MaybeHandle(); + } // Allocate an uninitialized string so that we can do a raw memcpy into the // string on the heap (regardless of alignment). + if (byte_length == 0) return isolate_->factory()->empty_string(); Handle string; if (!isolate_->factory() ->NewRawTwoByteString(byte_length / sizeof(uc16), pretenure_) - .ToHandle(&string)) + .ToHandle(&string)) { return MaybeHandle(); + } // Copy the bytes directly into the new string. // Warning: this uses host endianness. diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc index 614ce93efc..a0b77c2aa2 100644 --- a/src/x64/code-stubs-x64.cc +++ b/src/x64/code-stubs-x64.cc @@ -484,7 +484,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (8) Is the external string one byte? If yes, go to (5). // (9) Two byte sequential. Load regexp code for two byte. Go to (E). // (10) Short external string or not a string? If yes, bail out to runtime. - // (11) Sliced string. Replace subject with parent. Go to (1). + // (11) Sliced or thin string. Replace subject with parent. Go to (1). Label seq_one_byte_string /* 5 */, seq_two_byte_string /* 9 */, external_string /* 7 */, check_underlying /* 1 */, @@ -514,6 +514,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // have already been covered. STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); + STATIC_ASSERT(kThinStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ cmpp(rbx, Immediate(kExternalStringTag)); @@ -802,11 +803,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ testb(rbx, Immediate(kIsNotStringMask | kShortExternalStringMask)); __ j(not_zero, &runtime); - // (11) Sliced string. Replace subject with parent. Go to (1). + // (11) Sliced or thin string. Replace subject with parent. Go to (1). + Label thin_string; + __ cmpl(rbx, Immediate(kThinStringTag)); + __ j(equal, &thin_string, Label::kNear); // Load offset into r14 and replace subject string with parent. __ SmiToInteger32(r14, FieldOperand(rdi, SlicedString::kOffsetOffset)); __ movp(rdi, FieldOperand(rdi, SlicedString::kParentOffset)); __ jmp(&check_underlying); + + __ bind(&thin_string); + __ movp(rdi, FieldOperand(rdi, ThinString::kActualOffset)); + __ jmp(&check_underlying); #endif // V8_INTERPRETED_REGEXP } diff --git a/src/x64/codegen-x64.cc b/src/x64/codegen-x64.cc index 2432d7ed4f..19a459bf9b 100644 --- a/src/x64/codegen-x64.cc +++ b/src/x64/codegen-x64.cc @@ -77,9 +77,12 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ j(zero, &check_sequential, Label::kNear); // Dispatch on the indirect string shape: slice or cons. - Label cons_string; - __ testb(result, Immediate(kSlicedNotConsMask)); - __ j(zero, &cons_string, Label::kNear); + Label cons_string, thin_string; + __ andl(result, Immediate(kStringRepresentationMask)); + __ cmpl(result, Immediate(kConsStringTag)); + __ j(equal, &cons_string, Label::kNear); + __ cmpl(result, Immediate(kThinStringTag)); + __ j(equal, &thin_string, Label::kNear); // Handle slices. Label indirect_string_loaded; @@ -88,6 +91,11 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ movp(string, FieldOperand(string, SlicedString::kParentOffset)); __ jmp(&indirect_string_loaded, Label::kNear); + // Handle thin strings. + __ bind(&thin_string); + __ movp(string, FieldOperand(string, ThinString::kActualOffset)); + __ jmp(&indirect_string_loaded, Label::kNear); + // Handle cons strings. // Check whether the right hand side is the empty string (i.e. if // this is really a flat string in a cons string). If that is not diff --git a/src/x64/macro-assembler-x64.cc b/src/x64/macro-assembler-x64.cc index d695d0ed90..26a3dae1c6 100644 --- a/src/x64/macro-assembler-x64.cc +++ b/src/x64/macro-assembler-x64.cc @@ -2540,10 +2540,12 @@ void MacroAssembler::JumpIfNotBothSequentialOneByteStrings( andl(scratch1, Immediate(kFlatOneByteStringMask)); andl(scratch2, Immediate(kFlatOneByteStringMask)); // Interleave the bits to check both scratch1 and scratch2 in one test. - DCHECK_EQ(0, kFlatOneByteStringMask & (kFlatOneByteStringMask << 3)); - leap(scratch1, Operand(scratch1, scratch2, times_8, 0)); + const int kShift = 8; + DCHECK_EQ(0, kFlatOneByteStringMask & (kFlatOneByteStringMask << kShift)); + shlp(scratch2, Immediate(kShift)); + orp(scratch1, scratch2); cmpl(scratch1, - Immediate(kFlatOneByteStringTag + (kFlatOneByteStringTag << 3))); + Immediate(kFlatOneByteStringTag + (kFlatOneByteStringTag << kShift))); j(not_equal, on_fail, near_jump); } diff --git a/src/x87/code-stubs-x87.cc b/src/x87/code-stubs-x87.cc index 31ee17bdcf..4d161d4fed 100644 --- a/src/x87/code-stubs-x87.cc +++ b/src/x87/code-stubs-x87.cc @@ -425,7 +425,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // (8) Is the external string one byte? If yes, go to (5). // (9) Two byte sequential. Load regexp code for two byte. Go to (E). // (10) Short external string or not a string? If yes, bail out to runtime. - // (11) Sliced string. Replace subject with parent. Go to (1). + // (11) Sliced or thin string. Replace subject with parent. Go to (1). Label seq_one_byte_string /* 5 */, seq_two_byte_string /* 9 */, external_string /* 7 */, check_underlying /* 1 */, @@ -455,6 +455,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // have already been covered. STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); + STATIC_ASSERT(kThinStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ cmp(ebx, Immediate(kExternalStringTag)); @@ -733,11 +734,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ test(ebx, Immediate(kIsNotStringMask | kShortExternalStringTag)); __ j(not_zero, &runtime); - // (11) Sliced string. Replace subject with parent. Go to (1). + // (11) Sliced or thin string. Replace subject with parent. Go to (1). + Label thin_string; + __ cmp(ebx, Immediate(kThinStringTag)); + __ j(equal, &thin_string, Label::kNear); // Load offset into edi and replace subject string with parent. __ mov(edi, FieldOperand(eax, SlicedString::kOffsetOffset)); __ mov(eax, FieldOperand(eax, SlicedString::kParentOffset)); __ jmp(&check_underlying); // Go to (1). + + __ bind(&thin_string); + __ mov(eax, FieldOperand(eax, ThinString::kActualOffset)); + __ jmp(&check_underlying); // Go to (1). #endif // V8_INTERPRETED_REGEXP } diff --git a/src/x87/codegen-x87.cc b/src/x87/codegen-x87.cc index a2bba1dcd7..846b2987e1 100644 --- a/src/x87/codegen-x87.cc +++ b/src/x87/codegen-x87.cc @@ -228,9 +228,12 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ j(zero, &check_sequential, Label::kNear); // Dispatch on the indirect string shape: slice or cons. - Label cons_string; - __ test(result, Immediate(kSlicedNotConsMask)); - __ j(zero, &cons_string, Label::kNear); + Label cons_string, thin_string; + __ and_(result, Immediate(kStringRepresentationMask)); + __ cmp(result, Immediate(kConsStringTag)); + __ j(equal, &cons_string, Label::kNear); + __ cmp(result, Immediate(kThinStringTag)); + __ j(equal, &thin_string, Label::kNear); // Handle slices. Label indirect_string_loaded; @@ -240,6 +243,11 @@ void StringCharLoadGenerator::Generate(MacroAssembler* masm, __ mov(string, FieldOperand(string, SlicedString::kParentOffset)); __ jmp(&indirect_string_loaded, Label::kNear); + // Handle thin strings. + __ bind(&thin_string); + __ mov(string, FieldOperand(string, ThinString::kActualOffset)); + __ jmp(&indirect_string_loaded, Label::kNear); + // Handle cons strings. // Check whether the right hand side is the empty string (i.e. if // this is really a flat string in a cons string). If that is not diff --git a/src/x87/macro-assembler-x87.cc b/src/x87/macro-assembler-x87.cc index 005fa053ac..3b3dcaf53f 100644 --- a/src/x87/macro-assembler-x87.cc +++ b/src/x87/macro-assembler-x87.cc @@ -2415,11 +2415,13 @@ void MacroAssembler::JumpIfNotBothSequentialOneByteStrings(Register object1, const int kFlatOneByteStringTag = kStringTag | kOneByteStringTag | kSeqStringTag; // Interleave bits from both instance types and compare them in one check. - DCHECK_EQ(0, kFlatOneByteStringMask & (kFlatOneByteStringMask << 3)); + const int kShift = 8; + DCHECK_EQ(0, kFlatOneByteStringMask & (kFlatOneByteStringMask << kShift)); and_(scratch1, kFlatOneByteStringMask); and_(scratch2, kFlatOneByteStringMask); - lea(scratch1, Operand(scratch1, scratch2, times_8, 0)); - cmp(scratch1, kFlatOneByteStringTag | (kFlatOneByteStringTag << 3)); + shl(scratch2, kShift); + or_(scratch1, scratch2); + cmp(scratch1, kFlatOneByteStringTag | (kFlatOneByteStringTag << kShift)); j(not_equal, failure); } diff --git a/test/cctest/test-code-stub-assembler.cc b/test/cctest/test-code-stub-assembler.cc index 7fcb32f6c5..eaa09eee84 100644 --- a/test/cctest/test-code-stub-assembler.cc +++ b/test/cctest/test-code-stub-assembler.cc @@ -239,22 +239,26 @@ TEST(TryToName) { Label passed(&m), failed(&m); Label if_keyisindex(&m), if_keyisunique(&m), if_bailout(&m); - Variable var_index(&m, MachineType::PointerRepresentation()); + { + Variable var_index(&m, MachineType::PointerRepresentation()); + Variable var_unique(&m, MachineRepresentation::kTagged); - m.TryToName(key, &if_keyisindex, &var_index, &if_keyisunique, &if_bailout); + m.TryToName(key, &if_keyisindex, &var_index, &if_keyisunique, &var_unique, + &if_bailout); - m.Bind(&if_keyisindex); - m.GotoUnless( - m.WordEqual(expected_result, m.SmiConstant(Smi::FromInt(kKeyIsIndex))), - &failed); - m.Branch(m.WordEqual(m.SmiUntag(expected_arg), var_index.value()), &passed, - &failed); + m.Bind(&if_keyisindex); + m.GotoUnless(m.WordEqual(expected_result, + m.SmiConstant(Smi::FromInt(kKeyIsIndex))), + &failed); + m.Branch(m.WordEqual(m.SmiUntag(expected_arg), var_index.value()), + &passed, &failed); - m.Bind(&if_keyisunique); - m.GotoUnless( - m.WordEqual(expected_result, m.SmiConstant(Smi::FromInt(kKeyIsUnique))), - &failed); - m.Branch(m.WordEqual(expected_arg, key), &passed, &failed); + m.Bind(&if_keyisunique); + m.GotoUnless(m.WordEqual(expected_result, + m.SmiConstant(Smi::FromInt(kKeyIsUnique))), + &failed); + m.Branch(m.WordEqual(expected_arg, var_unique.value()), &passed, &failed); + } m.Bind(&if_bailout); m.Branch( @@ -350,6 +354,23 @@ TEST(TryToName) { Handle key = isolate->factory()->NewStringFromAsciiChecked("test"); ft.CheckTrue(key, expect_bailout); } + + { + // TryToName() => internalized version. + Handle s = isolate->factory()->NewStringFromAsciiChecked("foo"); + Handle internalized = isolate->factory()->InternalizeString(s); + ft.CheckTrue(s, expect_unique, internalized); + } + + { + // TryToName() => internalized version. + uc16 array1[] = {2001, 2002, 2003}; + Vector str1(array1); + Handle s = + isolate->factory()->NewStringFromTwoByte(str1).ToHandleChecked(); + Handle internalized = isolate->factory()->InternalizeString(s); + ft.CheckTrue(s, expect_unique, internalized); + } } namespace { diff --git a/test/cctest/test-strings.cc b/test/cctest/test-strings.cc index d7b51f8274..ff3b7bddaa 100644 --- a/test/cctest/test-strings.cc +++ b/test/cctest/test-strings.cc @@ -1195,6 +1195,31 @@ class OneByteVectorResource : public v8::String::ExternalOneByteStringResource { i::Vector data_; }; +TEST(InternalizeExternal) { + i::Isolate* isolate = CcTest::i_isolate(); + Factory* factory = isolate->factory(); + // This won't leak; the external string mechanism will call Dispose() on it. + OneByteVectorResource* resource = + new OneByteVectorResource(i::Vector("prop", 4)); + { + v8::HandleScope scope(CcTest::isolate()); + v8::Local ext_string = + v8::String::NewExternalOneByte(CcTest::isolate(), resource) + .ToLocalChecked(); + Handle string = v8::Utils::OpenHandle(*ext_string); + CHECK(string->IsExternalString()); + CHECK(!string->IsInternalizedString()); + CHECK(isolate->heap()->InNewSpace(*string)); + factory->InternalizeName(string); + CHECK(string->IsThinString()); + CcTest::CollectGarbage(i::NEW_SPACE); + CcTest::CollectGarbage(i::NEW_SPACE); + CHECK(string->IsInternalizedString()); + CHECK(!isolate->heap()->InNewSpace(*string)); + } + CcTest::CollectGarbage(i::OLD_SPACE); + CcTest::CollectGarbage(i::OLD_SPACE); +} TEST(SliceFromExternal) { FLAG_string_slices = true; diff --git a/test/mjsunit/thin-strings.js b/test/mjsunit/thin-strings.js new file mode 100644 index 0000000000..817570de11 --- /dev/null +++ b/test/mjsunit/thin-strings.js @@ -0,0 +1,38 @@ +// Copyright 2016 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Flags: --allow-natives-syntax + +function get_thin_string(a, b) { + var str = a + b; // Make a ConsString. + var o = {}; + o[str]; // Turn it into a ThinString. + return str; +} + +var str = get_thin_string("foo", "bar"); + +var re = /.o+ba./; +assertEquals(["foobar"], re.exec(str)); +assertEquals(["foobar"], re.exec(str)); +assertEquals(["foobar"], re.exec(str)); + +function CheckCS() { + assertEquals("o", str.substring(1, 2)); + assertEquals("f".charCodeAt(0), str.charCodeAt(0)); + assertEquals("f", str.split(/oo/)[0]); +} +CheckCS(); +%OptimizeFunctionOnNextCall(CheckCS); +CheckCS(); + +function CheckTF() { + try {} catch(e) {} // Turbofan. + assertEquals("o", str.substring(1, 2)); + assertEquals("f".charCodeAt(0), str.charCodeAt(0)); + assertEquals("f", str.split(/oo/)[0]); +} +CheckTF(); +%OptimizeFunctionOnNextCall(CheckTF); +CheckTF();