// Copyright 2014 the V8 project authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_STRING_BUILDER_H_ #define V8_STRING_BUILDER_H_ #include "src/assert-scope.h" #include "src/factory.h" #include "src/handles.h" #include "src/isolate.h" #include "src/objects.h" #include "src/utils.h" namespace v8 { namespace internal { const int kStringBuilderConcatHelperLengthBits = 11; const int kStringBuilderConcatHelperPositionBits = 19; typedef BitField StringBuilderSubstringLength; typedef BitField StringBuilderSubstringPosition; template static inline void StringBuilderConcatHelper(String* special, sinkchar* sink, FixedArray* fixed_array, int array_length) { DisallowHeapAllocation no_gc; int position = 0; for (int i = 0; i < array_length; i++) { Object* element = fixed_array->get(i); if (element->IsSmi()) { // Smi encoding of position and length. int encoded_slice = Smi::ToInt(element); int pos; int len; if (encoded_slice > 0) { // Position and length encoded in one smi. pos = StringBuilderSubstringPosition::decode(encoded_slice); len = StringBuilderSubstringLength::decode(encoded_slice); } else { // Position and length encoded in two smis. Object* obj = fixed_array->get(++i); DCHECK(obj->IsSmi()); pos = Smi::ToInt(obj); len = -encoded_slice; } String::WriteToFlat(special, sink + position, pos, pos + len); position += len; } else { String* string = String::cast(element); int element_length = string->length(); String::WriteToFlat(string, sink + position, 0, element_length); position += element_length; } } } // Returns the result length of the concatenation. // On illegal argument, -1 is returned. static inline int StringBuilderConcatLength(int special_length, FixedArray* fixed_array, int array_length, bool* one_byte) { DisallowHeapAllocation no_gc; int position = 0; for (int i = 0; i < array_length; i++) { int increment = 0; Object* elt = fixed_array->get(i); if (elt->IsSmi()) { // Smi encoding of position and length. int smi_value = Smi::ToInt(elt); int pos; int len; if (smi_value > 0) { // Position and length encoded in one smi. pos = StringBuilderSubstringPosition::decode(smi_value); len = StringBuilderSubstringLength::decode(smi_value); } else { // Position and length encoded in two smis. len = -smi_value; // Get the position and check that it is a positive smi. i++; if (i >= array_length) return -1; Object* next_smi = fixed_array->get(i); if (!next_smi->IsSmi()) return -1; pos = Smi::ToInt(next_smi); if (pos < 0) return -1; } DCHECK_GE(pos, 0); DCHECK_GE(len, 0); if (pos > special_length || len > special_length - pos) return -1; increment = len; } else if (elt->IsString()) { String* element = String::cast(elt); int element_length = element->length(); increment = element_length; if (*one_byte && !element->HasOnlyOneByteChars()) { *one_byte = false; } } else { return -1; } if (increment > String::kMaxLength - position) { return kMaxInt; // Provoke throw on allocation. } position += increment; } return position; } class FixedArrayBuilder { public: explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity) : array_(isolate->factory()->NewFixedArrayWithHoles(initial_capacity)), length_(0), has_non_smi_elements_(false) { // Require a non-zero initial size. Ensures that doubling the size to // extend the array will work. DCHECK_GT(initial_capacity, 0); } explicit FixedArrayBuilder(Handle backing_store) : array_(backing_store), length_(0), has_non_smi_elements_(false) { // Require a non-zero initial size. Ensures that doubling the size to // extend the array will work. DCHECK_GT(backing_store->length(), 0); } bool HasCapacity(int elements) { int length = array_->length(); int required_length = length_ + elements; return (length >= required_length); } void EnsureCapacity(int elements) { int length = array_->length(); int required_length = length_ + elements; if (length < required_length) { int new_length = length; do { new_length *= 2; } while (new_length < required_length); Handle extended_array = array_->GetIsolate()->factory()->NewFixedArrayWithHoles(new_length); array_->CopyTo(0, *extended_array, 0, length_); array_ = extended_array; } } void Add(Object* value) { DCHECK(!value->IsSmi()); DCHECK(length_ < capacity()); array_->set(length_, value); length_++; has_non_smi_elements_ = true; } void Add(Smi* value) { DCHECK(value->IsSmi()); DCHECK(length_ < capacity()); array_->set(length_, value); length_++; } Handle array() { return array_; } int length() { return length_; } int capacity() { return array_->length(); } Handle ToJSArray(Handle target_array) { JSArray::SetContent(target_array, array_); target_array->set_length(Smi::FromInt(length_)); return target_array; } private: Handle array_; int length_; bool has_non_smi_elements_; }; class ReplacementStringBuilder { public: ReplacementStringBuilder(Heap* heap, Handle subject, int estimated_part_count) : heap_(heap), array_builder_(heap->isolate(), estimated_part_count), subject_(subject), character_count_(0), is_one_byte_(subject->IsOneByteRepresentation()) { // Require a non-zero initial size. Ensures that doubling the size to // extend the array will work. DCHECK_GT(estimated_part_count, 0); } static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from, int to) { DCHECK_GE(from, 0); int length = to - from; DCHECK_GT(length, 0); if (StringBuilderSubstringLength::is_valid(length) && StringBuilderSubstringPosition::is_valid(from)) { int encoded_slice = StringBuilderSubstringLength::encode(length) | StringBuilderSubstringPosition::encode(from); builder->Add(Smi::FromInt(encoded_slice)); } else { // Otherwise encode as two smis. builder->Add(Smi::FromInt(-length)); builder->Add(Smi::FromInt(from)); } } void EnsureCapacity(int elements) { array_builder_.EnsureCapacity(elements); } void AddSubjectSlice(int from, int to) { AddSubjectSlice(&array_builder_, from, to); IncrementCharacterCount(to - from); } void AddString(Handle string) { int length = string->length(); DCHECK_GT(length, 0); AddElement(*string); if (!string->IsOneByteRepresentation()) { is_one_byte_ = false; } IncrementCharacterCount(length); } MaybeHandle ToString(); void IncrementCharacterCount(int by) { if (character_count_ > String::kMaxLength - by) { STATIC_ASSERT(String::kMaxLength < kMaxInt); character_count_ = kMaxInt; } else { character_count_ += by; } } private: void AddElement(Object* element) { DCHECK(element->IsSmi() || element->IsString()); DCHECK(array_builder_.capacity() > array_builder_.length()); array_builder_.Add(element); } Heap* heap_; FixedArrayBuilder array_builder_; Handle subject_; int character_count_; bool is_one_byte_; }; class IncrementalStringBuilder { public: explicit IncrementalStringBuilder(Isolate* isolate); INLINE(String::Encoding CurrentEncoding()) { return encoding_; } template INLINE(void Append(SrcChar c)); INLINE(void AppendCharacter(uint8_t c)) { if (encoding_ == String::ONE_BYTE_ENCODING) { Append(c); } else { Append(c); } } INLINE(void AppendCString(const char* s)) { const uint8_t* u = reinterpret_cast(s); if (encoding_ == String::ONE_BYTE_ENCODING) { while (*u != '\0') Append(*(u++)); } else { while (*u != '\0') Append(*(u++)); } } INLINE(void AppendCString(const uc16* s)) { if (encoding_ == String::ONE_BYTE_ENCODING) { while (*s != '\0') Append(*(s++)); } else { while (*s != '\0') Append(*(s++)); } } INLINE(bool CurrentPartCanFit(int length)) { return part_length_ - current_index_ > length; } // We make a rough estimate to find out if the current string can be // serialized without allocating a new string part. The worst case length of // an escaped character is 6. Shifting the remaining string length right by 3 // is a more pessimistic estimate, but faster to calculate. INLINE(int EscapedLengthIfCurrentPartFits(int length)) { if (length > kMaxPartLength) return 0; STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength); // This shift will not overflow because length is already less than the // maximum part length. int worst_case_length = length << 3; return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0; } void AppendString(Handle string); MaybeHandle Finish(); INLINE(bool HasOverflowed()) const { return overflowed_; } INLINE(int Length()) const { return accumulator_->length() + current_index_; } // Change encoding to two-byte. void ChangeEncoding() { DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); ShrinkCurrentPart(); encoding_ = String::TWO_BYTE_ENCODING; Extend(); } template class NoExtend { public: explicit NoExtend(Handle string, int offset) { DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString()); if (sizeof(DestChar) == 1) { start_ = reinterpret_cast( Handle::cast(string)->GetChars() + offset); } else { start_ = reinterpret_cast( Handle::cast(string)->GetChars() + offset); } cursor_ = start_; } INLINE(void Append(DestChar c)) { *(cursor_++) = c; } INLINE(void AppendCString(const char* s)) { const uint8_t* u = reinterpret_cast(s); while (*u != '\0') Append(*(u++)); } int written() { return static_cast(cursor_ - start_); } private: DestChar* start_; DestChar* cursor_; DisallowHeapAllocation no_gc_; }; template class NoExtendString : public NoExtend { public: NoExtendString(Handle string, int required_length) : NoExtend(string, 0), string_(string) { DCHECK(string->length() >= required_length); } Handle Finalize() { Handle string = Handle::cast(string_); int length = NoExtend::written(); Handle result = SeqString::Truncate(string, length); string_ = Handle(); return result; } private: Handle string_; }; template class NoExtendBuilder : public NoExtend { public: NoExtendBuilder(IncrementalStringBuilder* builder, int required_length) : NoExtend(builder->current_part(), builder->current_index_), builder_(builder) { DCHECK(builder->CurrentPartCanFit(required_length)); } ~NoExtendBuilder() { builder_->current_index_ += NoExtend::written(); } private: IncrementalStringBuilder* builder_; }; private: Factory* factory() { return isolate_->factory(); } INLINE(Handle accumulator()) { return accumulator_; } INLINE(void set_accumulator(Handle string)) { *accumulator_.location() = *string; } INLINE(Handle current_part()) { return current_part_; } INLINE(void set_current_part(Handle string)) { *current_part_.location() = *string; } // Add the current part to the accumulator. void Accumulate(Handle new_part); // Finish the current part and allocate a new part. void Extend(); // Shrink current part to the right size. void ShrinkCurrentPart() { DCHECK(current_index_ < part_length_); set_current_part(SeqString::Truncate( Handle::cast(current_part()), current_index_)); } static const int kInitialPartLength = 32; static const int kMaxPartLength = 16 * 1024; static const int kPartLengthGrowthFactor = 2; Isolate* isolate_; String::Encoding encoding_; bool overflowed_; int part_length_; int current_index_; Handle accumulator_; Handle current_part_; }; template void IncrementalStringBuilder::Append(SrcChar c) { DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1); if (sizeof(DestChar) == 1) { DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_); SeqOneByteString::cast(*current_part_) ->SeqOneByteStringSet(current_index_++, c); } else { DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_); SeqTwoByteString::cast(*current_part_) ->SeqTwoByteStringSet(current_index_++, c); } if (current_index_ == part_length_) Extend(); } } // namespace internal } // namespace v8 #endif // V8_STRING_BUILDER_H_