From 9eb7d4a53c5e7d40d1151a187da7d1e097034f78 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Tue, 23 Aug 2011 12:22:12 +0000 Subject: [PATCH] Replace ToAsciiVector and ToUC16Vector with single function that returns a tagged value. The tag tells whether the content is ASCII or UC16, or even if the string wasn't flat. BUG: v8:1633 Review URL: http://codereview.chromium.org/7709024 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8999 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/handles.cc | 8 +- src/hydrogen-instructions.cc | 3 +- src/interpreter-irregexp.cc | 10 +- src/jsregexp.cc | 48 ++++---- src/objects-inl.h | 5 + src/objects.cc | 129 ++++++++++----------- src/objects.h | 60 +++++++++- src/runtime.cc | 219 +++++++++++++++++++---------------- test/mjsunit/string-split.js | 11 ++ 9 files changed, 288 insertions(+), 205 deletions(-) diff --git a/src/handles.cc b/src/handles.cc index c9984aa92f..e7d0f9b7e9 100644 --- a/src/handles.cc +++ b/src/handles.cc @@ -617,15 +617,17 @@ Handle CalculateLineEnds(Handle src, { AssertNoAllocation no_heap_allocation; // ensure vectors stay valid. // Dispatch on type of strings. - if (src->IsAsciiRepresentation()) { + String::FlatContent content = src->GetFlatContent(no_heap_allocation); + ASSERT(content.IsFlat()); + if (content.IsAscii()) { CalculateLineEnds(isolate, &line_ends, - src->ToAsciiVector(), + content.ToAsciiVector(), with_last_line); } else { CalculateLineEnds(isolate, &line_ends, - src->ToUC16Vector(), + content.ToUC16Vector(), with_last_line); } } diff --git a/src/hydrogen-instructions.cc b/src/hydrogen-instructions.cc index 176218616e..df22bfde93 100644 --- a/src/hydrogen-instructions.cc +++ b/src/hydrogen-instructions.cc @@ -778,7 +778,8 @@ void HHasInstanceTypeAndBranch::PrintDataTo(StringStream* stream) { void HTypeofIsAndBranch::PrintDataTo(StringStream* stream) { value()->PrintNameTo(stream); stream->Add(" == "); - stream->Add(type_literal_->ToAsciiVector()); + AssertNoAllocation no_alloc; + stream->Add(type_literal_->GetFlatContent(no_alloc).ToAsciiVector()); } diff --git a/src/interpreter-irregexp.cc b/src/interpreter-irregexp.cc index 1c6c52ca89..45f60755da 100644 --- a/src/interpreter-irregexp.cc +++ b/src/interpreter-irregexp.cc @@ -1,4 +1,4 @@ -// Copyright 2008 the V8 project authors. All rights reserved. +// Copyright 2011 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -635,8 +635,9 @@ bool IrregexpInterpreter::Match(Isolate* isolate, AssertNoAllocation a; const byte* code_base = code_array->GetDataStartAddress(); uc16 previous_char = '\n'; - if (subject->IsAsciiRepresentation()) { - Vector subject_vector = subject->ToAsciiVector(); + String::FlatContent subject_content = subject->GetFlatContent(a); + if (subject_content.IsAscii()) { + Vector subject_vector = subject_content.ToAsciiVector(); if (start_position != 0) previous_char = subject_vector[start_position - 1]; return RawMatch(isolate, code_base, @@ -645,7 +646,8 @@ bool IrregexpInterpreter::Match(Isolate* isolate, start_position, previous_char); } else { - Vector subject_vector = subject->ToUC16Vector(); + ASSERT(subject_content.IsTwoByte()); + Vector subject_vector = subject_content.ToUC16Vector(); if (start_position != 0) previous_char = subject_vector[start_position - 1]; return RawMatch(isolate, code_base, diff --git a/src/jsregexp.cc b/src/jsregexp.cc index af644376b5..ed7b9689ff 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -212,19 +212,7 @@ static void SetAtomLastCapture(FixedArray* array, RegExpImpl::SetCapture(array, 1, to); } - /* template , typename PatternChar> -static int ReStringMatch(Vector sub_vector, - Vector pat_vector, - int start_index) { - int pattern_length = pat_vector.length(); - if (pattern_length == 0) return start_index; - - int subject_length = sub_vector.length(); - if (start_index + pattern_length > subject_length) return -1; - return SearchString(sub_vector, pat_vector, start_index); -} - */ Handle RegExpImpl::AtomExec(Handle re, Handle subject, int index, @@ -237,35 +225,41 @@ Handle RegExpImpl::AtomExec(Handle re, if (!subject->IsFlat()) FlattenString(subject); AssertNoAllocation no_heap_allocation; // ensure vectors stay valid // Extract flattened substrings of cons strings before determining asciiness. - String* seq_sub = *subject; - if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); int needle_len = needle->length(); + ASSERT(needle->IsFlat()); if (needle_len != 0) { - if (index + needle_len > subject->length()) - return isolate->factory()->null_value(); + if (index + needle_len > subject->length()) { + return isolate->factory()->null_value(); + } + String::FlatContent needle_content = + needle->GetFlatContent(no_heap_allocation); + String::FlatContent subject_content = + subject->GetFlatContent(no_heap_allocation); + ASSERT(needle_content.IsFlat()); + ASSERT(subject_content.IsFlat()); // dispatch on type of strings - index = (needle->IsAsciiRepresentation() - ? (seq_sub->IsAsciiRepresentation() + index = (needle_content.IsAscii() + ? (subject_content.IsAscii() ? SearchString(isolate, - seq_sub->ToAsciiVector(), - needle->ToAsciiVector(), + subject_content.ToAsciiVector(), + needle_content.ToAsciiVector(), index) : SearchString(isolate, - seq_sub->ToUC16Vector(), - needle->ToAsciiVector(), + subject_content.ToUC16Vector(), + needle_content.ToAsciiVector(), index)) - : (seq_sub->IsAsciiRepresentation() + : (subject_content.IsAscii() ? SearchString(isolate, - seq_sub->ToAsciiVector(), - needle->ToUC16Vector(), + subject_content.ToAsciiVector(), + needle_content.ToUC16Vector(), index) : SearchString(isolate, - seq_sub->ToUC16Vector(), - needle->ToUC16Vector(), + subject_content.ToUC16Vector(), + needle_content.ToUC16Vector(), index))); if (index == -1) return isolate->factory()->null_value(); } diff --git a/src/objects-inl.h b/src/objects-inl.h index c5fda89e42..098bd7a5c7 100644 --- a/src/objects-inl.h +++ b/src/objects-inl.h @@ -297,6 +297,11 @@ StringRepresentationTag StringShape::representation_tag() { } +uint32_t StringShape::encoding_tag() { + return type_ & kStringEncodingMask; +} + + uint32_t StringShape::full_representation_tag() { return (type_ & (kStringRepresentationMask | kStringEncodingMask)); } diff --git a/src/objects.cc b/src/objects.cc index 1b29071324..031c8bba22 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -5038,55 +5038,38 @@ int String::Utf8Length() { } -Vector String::ToAsciiVector() { - ASSERT(IsAsciiRepresentation()); - ASSERT(IsFlat()); - - int offset = 0; +String::FlatContent String::GetFlatContent(const AssertNoAllocation& promise) { + // Argument isn't used, it's only there to ensure that the user is + // aware that the extracted vectors may not survive a GC. int length = this->length(); - StringRepresentationTag string_tag = StringShape(this).representation_tag(); + StringShape shape(this); String* string = this; - if (string_tag == kConsStringTag) { + if (shape.representation_tag() == kConsStringTag) { ConsString* cons = ConsString::cast(string); - ASSERT(cons->second()->length() == 0); + if (cons->second()->length() != 0) { + return FlatContent(); + } string = cons->first(); - string_tag = StringShape(string).representation_tag(); + shape = StringShape(string); } - if (string_tag == kSeqStringTag) { - SeqAsciiString* seq = SeqAsciiString::cast(string); - char* start = seq->GetChars(); - return Vector(start + offset, length); + if (shape.encoding_tag() == kAsciiStringTag) { + const char* start; + if (shape.representation_tag() == kSeqStringTag) { + start = SeqAsciiString::cast(string)->GetChars(); + } else { + start = ExternalAsciiString::cast(string)->resource()->data(); + } + return FlatContent(Vector(start, length)); + } else { + ASSERT(shape.encoding_tag() == kTwoByteStringTag); + const uc16* start; + if (shape.representation_tag() == kSeqStringTag) { + start = SeqTwoByteString::cast(string)->GetChars(); + } else { + start = ExternalTwoByteString::cast(string)->resource()->data(); + } + return FlatContent(Vector(start, length)); } - ASSERT(string_tag == kExternalStringTag); - ExternalAsciiString* ext = ExternalAsciiString::cast(string); - const char* start = ext->resource()->data(); - return Vector(start + offset, length); -} - - -Vector String::ToUC16Vector() { - ASSERT(IsTwoByteRepresentation()); - ASSERT(IsFlat()); - - int offset = 0; - int length = this->length(); - StringRepresentationTag string_tag = StringShape(this).representation_tag(); - String* string = this; - if (string_tag == kConsStringTag) { - ConsString* cons = ConsString::cast(string); - ASSERT(cons->second()->length() == 0); - string = cons->first(); - string_tag = StringShape(string).representation_tag(); - } - if (string_tag == kSeqStringTag) { - SeqTwoByteString* seq = SeqTwoByteString::cast(string); - return Vector(seq->GetChars() + offset, length); - } - ASSERT(string_tag == kExternalStringTag); - ExternalTwoByteString* ext = ExternalTwoByteString::cast(string); - const uc16* start = - reinterpret_cast(ext->resource()->data()); - return Vector(start + offset, length); } @@ -5536,11 +5519,14 @@ void FlatStringReader::PostGarbageCollection() { if (str_ == NULL) return; Handle str(str_); ASSERT(str->IsFlat()); - is_ascii_ = str->IsAsciiRepresentation(); + AssertNoAllocation no_alloc; + String::FlatContent content = str->GetFlatContent(no_alloc); + ASSERT(content.is_flat()); + is_ascii_ = content.IsAscii(); if (is_ascii_) { - start_ = str->ToAsciiVector().start(); + start_ = content.ToAsciiVector().start(); } else { - start_ = str->ToUC16Vector().start(); + start_ = content.ToUC16Vector().start(); } } @@ -5860,12 +5846,14 @@ template static inline bool CompareStringContentsPartial(Isolate* isolate, IteratorA* ia, String* b) { - if (b->IsFlat()) { - if (b->IsAsciiRepresentation()) { - VectorIterator ib(b->ToAsciiVector()); + AssertNoAllocation no_alloc; + String::FlatContent content = b->GetFlatContent(no_alloc); + if (content.IsFlat()) { + if (content.IsAscii()) { + VectorIterator ib(content.ToAsciiVector()); return CompareStringContents(ia, &ib); } else { - VectorIterator ib(b->ToUC16Vector()); + VectorIterator ib(content.ToUC16Vector()); return CompareStringContents(ia, &ib); } } else { @@ -5895,6 +5883,8 @@ bool String::SlowEquals(String* other) { String* lhs = this->TryFlattenGetString(); String* rhs = other->TryFlattenGetString(); + AssertNoAllocation no_alloc; + if (StringShape(lhs).IsSequentialAscii() && StringShape(rhs).IsSequentialAscii()) { const char* str1 = SeqAsciiString::cast(lhs)->GetChars(); @@ -5904,16 +5894,18 @@ bool String::SlowEquals(String* other) { } Isolate* isolate = GetIsolate(); - if (lhs->IsFlat()) { - if (lhs->IsAsciiRepresentation()) { - Vector vec1 = lhs->ToAsciiVector(); - if (rhs->IsFlat()) { - if (rhs->IsAsciiRepresentation()) { - Vector vec2 = rhs->ToAsciiVector(); + String::FlatContent lhs_content = lhs->GetFlatContent(no_alloc); + String::FlatContent rhs_content = rhs->GetFlatContent(no_alloc); + if (lhs_content.IsFlat()) { + if (lhs_content.IsAscii()) { + Vector vec1 = lhs_content.ToAsciiVector(); + if (rhs_content.IsFlat()) { + if (rhs_content.IsAscii()) { + Vector vec2 = rhs_content.ToAsciiVector(); return CompareRawStringContents(vec1, vec2); } else { VectorIterator buf1(vec1); - VectorIterator ib(rhs->ToUC16Vector()); + VectorIterator ib(rhs_content.ToUC16Vector()); return CompareStringContents(&buf1, &ib); } } else { @@ -5923,14 +5915,14 @@ bool String::SlowEquals(String* other) { isolate->objects_string_compare_buffer_b()); } } else { - Vector vec1 = lhs->ToUC16Vector(); - if (rhs->IsFlat()) { - if (rhs->IsAsciiRepresentation()) { + Vector vec1 = lhs_content.ToUC16Vector(); + if (rhs_content.IsFlat()) { + if (rhs_content.IsAscii()) { VectorIterator buf1(vec1); - VectorIterator ib(rhs->ToAsciiVector()); + VectorIterator ib(rhs_content.ToAsciiVector()); return CompareStringContents(&buf1, &ib); } else { - Vector vec2(rhs->ToUC16Vector()); + Vector vec2(rhs_content.ToUC16Vector()); return CompareRawStringContents(vec1, vec2); } } else { @@ -5981,10 +5973,13 @@ bool String::IsEqualTo(Vector str) { bool String::IsAsciiEqualTo(Vector str) { + AssertNoAllocation no_alloc; int slen = length(); if (str.length() != slen) return false; - if (IsFlat() && IsAsciiRepresentation()) { - return CompareChars(ToAsciiVector().start(), str.start(), slen) == 0; + FlatContent content = GetFlatContent(no_alloc); + if (content.IsAscii()) { + return CompareChars(content.ToAsciiVector().start(), + str.start(), slen) == 0; } for (int i = 0; i < slen; i++) { if (Get(i) != static_cast(str[i])) return false; @@ -5994,10 +5989,12 @@ bool String::IsAsciiEqualTo(Vector str) { bool String::IsTwoByteEqualTo(Vector str) { + AssertNoAllocation no_alloc; int slen = length(); if (str.length() != slen) return false; - if (IsFlat() && IsTwoByteRepresentation()) { - return CompareChars(ToUC16Vector().start(), str.start(), slen) == 0; + FlatContent content = GetFlatContent(no_alloc); + if (content.IsTwoByte()) { + return CompareChars(content.ToUC16Vector().start(), str.start(), slen) == 0; } for (int i = 0; i < slen; i++) { if (Get(i) != str[i]) return false; diff --git a/src/objects.h b/src/objects.h index 93f7a1d119..76fc0d5b48 100644 --- a/src/objects.h +++ b/src/objects.h @@ -5789,6 +5789,7 @@ class StringShape BASE_EMBEDDED { inline bool IsSequentialTwoByte(); inline bool IsSymbol(); inline StringRepresentationTag representation_tag(); + inline uint32_t encoding_tag(); inline uint32_t full_representation_tag(); inline uint32_t size_tag(); #ifdef DEBUG @@ -5820,6 +5821,51 @@ class StringShape BASE_EMBEDDED { // All string values have a length field. class String: public HeapObject { public: + // Representation of the flat content of a String. + // A non-flat string doesn't have flat content. + // A flat string has content that's encoded as a sequence of either + // ASCII chars or two-byte UC16. + // Returned by String::GetFlatContent(). + class FlatContent { + public: + // Returns true if the string is flat and this structure contains content. + bool IsFlat() { return state_ != NON_FLAT; } + // Returns true if the structure contains ASCII content. + bool IsAscii() { return state_ == ASCII; } + // Returns true if the structure contains two-byte content. + bool IsTwoByte() { return state_ == TWO_BYTE; } + + // Return the ASCII content of the string. Only use if IsAscii() returns + // true. + Vector ToAsciiVector() { + ASSERT_EQ(ASCII, state_); + return Vector::cast(buffer_); + } + // Return the two-byte content of the string. Only use if IsTwoByte() + // returns true. + Vector ToUC16Vector() { + ASSERT_EQ(TWO_BYTE, state_); + return Vector::cast(buffer_); + } + + private: + enum State { NON_FLAT, ASCII, TWO_BYTE }; + + // Constructors only used by String::GetFlatContent(). + explicit FlatContent(Vector chars) + : buffer_(Vector::cast(chars)), + state_(ASCII) { } + explicit FlatContent(Vector chars) + : buffer_(Vector::cast(chars)), + state_(TWO_BYTE) { } + FlatContent() : buffer_(), state_(NON_FLAT) { } + + Vector buffer_; + State state_; + + friend class String; + }; + // Get and set the length of the string. inline int length(); inline void set_length(int value); @@ -5831,10 +5877,10 @@ class String: public HeapObject { inline bool IsAsciiRepresentation(); inline bool IsTwoByteRepresentation(); - // Returns whether this string has ascii chars, i.e. all of them can - // be ascii encoded. This might be the case even if the string is + // Returns whether this string has only ASCII chars, i.e. all of them can + // be ASCII encoded. This might be the case even if the string is // two-byte. Such strings may appear when the embedder prefers - // two-byte external representations even for ascii data. + // two-byte external representations even for ASCII data. // // NOTE: this should be considered only a hint. False negatives are // possible. @@ -5868,8 +5914,12 @@ class String: public HeapObject { // string. inline String* TryFlattenGetString(PretenureFlag pretenure = NOT_TENURED); - Vector ToAsciiVector(); - Vector ToUC16Vector(); + // Tries to return the content of a flat string as a structure holding either + // a flat vector of char or of uc16. + // If the string isn't flat, and therefore doesn't have flat content, the + // returned structure will report so, and can't provide a vector of either + // kind. + FlatContent GetFlatContent(const AssertNoAllocation& safety_promise); // Mark the string as an undetectable object. It only applies to // ascii and two byte string types. diff --git a/src/runtime.cc b/src/runtime.cc index 6c9fe1badf..f9420470e2 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2663,21 +2663,22 @@ class CompiledReplacement { void CompiledReplacement::Compile(Handle replacement, int capture_count, int subject_length) { - ASSERT(replacement->IsFlat()); - if (replacement->IsAsciiRepresentation()) { + { AssertNoAllocation no_alloc; - ParseReplacementPattern(&parts_, - replacement->ToAsciiVector(), - capture_count, - subject_length); - } else { - ASSERT(replacement->IsTwoByteRepresentation()); - AssertNoAllocation no_alloc; - - ParseReplacementPattern(&parts_, - replacement->ToUC16Vector(), - capture_count, - subject_length); + String::FlatContent content = replacement->GetFlatContent(no_alloc); + ASSERT(content.IsFlat()); + if (content.IsAscii()) { + ParseReplacementPattern(&parts_, + content.ToAsciiVector(), + capture_count, + subject_length); + } else { + ASSERT(content.IsTwoByte()); + ParseReplacementPattern(&parts_, + content.ToUC16Vector(), + capture_count, + subject_length); + } } Isolate* isolate = replacement->GetIsolate(); // Find substrings of replacement string and create them as String objects. @@ -3049,34 +3050,32 @@ int Runtime::StringMatch(Isolate* isolate, AssertNoAllocation no_heap_allocation; // ensure vectors stay valid // Extract flattened substrings of cons strings before determining asciiness. - String* seq_sub = *sub; - if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); - String* seq_pat = *pat; - if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first(); + String::FlatContent seq_sub = sub->GetFlatContent(no_heap_allocation); + String::FlatContent seq_pat = pat->GetFlatContent(no_heap_allocation); // dispatch on type of strings - if (seq_pat->IsAsciiRepresentation()) { - Vector pat_vector = seq_pat->ToAsciiVector(); - if (seq_sub->IsAsciiRepresentation()) { + if (seq_pat.IsAscii()) { + Vector pat_vector = seq_pat.ToAsciiVector(); + if (seq_sub.IsAscii()) { return SearchString(isolate, - seq_sub->ToAsciiVector(), + seq_sub.ToAsciiVector(), pat_vector, start_index); } return SearchString(isolate, - seq_sub->ToUC16Vector(), + seq_sub.ToUC16Vector(), pat_vector, start_index); } - Vector pat_vector = seq_pat->ToUC16Vector(); - if (seq_sub->IsAsciiRepresentation()) { + Vector pat_vector = seq_pat.ToUC16Vector(); + if (seq_sub.IsAscii()) { return SearchString(isolate, - seq_sub->ToAsciiVector(), + seq_sub.ToAsciiVector(), pat_vector, start_index); } return SearchString(isolate, - seq_sub->ToUC16Vector(), + seq_sub.ToUC16Vector(), pat_vector, start_index); } @@ -3161,31 +3160,29 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringLastIndexOf) { int position = -1; AssertNoAllocation no_heap_allocation; // ensure vectors stay valid - // Extract flattened substrings of cons strings before determining asciiness. - String* seq_sub = *sub; - if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); - String* seq_pat = *pat; - if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first(); - if (seq_pat->IsAsciiRepresentation()) { - Vector pat_vector = seq_pat->ToAsciiVector(); - if (seq_sub->IsAsciiRepresentation()) { - position = StringMatchBackwards(seq_sub->ToAsciiVector(), + String::FlatContent sub_content = sub->GetFlatContent(no_heap_allocation); + String::FlatContent pat_content = pat->GetFlatContent(no_heap_allocation); + + if (pat_content.IsAscii()) { + Vector pat_vector = pat_content.ToAsciiVector(); + if (sub_content.IsAscii()) { + position = StringMatchBackwards(sub_content.ToAsciiVector(), pat_vector, start_index); } else { - position = StringMatchBackwards(seq_sub->ToUC16Vector(), + position = StringMatchBackwards(sub_content.ToUC16Vector(), pat_vector, start_index); } } else { - Vector pat_vector = seq_pat->ToUC16Vector(); - if (seq_sub->IsAsciiRepresentation()) { - position = StringMatchBackwards(seq_sub->ToAsciiVector(), + Vector pat_vector = pat_content.ToUC16Vector(); + if (sub_content.IsAscii()) { + position = StringMatchBackwards(sub_content.ToAsciiVector(), pat_vector, start_index); } else { - position = StringMatchBackwards(seq_sub->ToUC16Vector(), + position = StringMatchBackwards(sub_content.ToUC16Vector(), pat_vector, start_index); } @@ -3403,36 +3400,38 @@ static bool SearchStringMultiple(Isolate* isolate, for (;;) { // Break when search complete. builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); AssertNoAllocation no_gc; - if (subject->IsAsciiRepresentation()) { - Vector subject_vector = subject->ToAsciiVector(); - if (pattern->IsAsciiRepresentation()) { + String::FlatContent subject_content = subject->GetFlatContent(no_gc); + String::FlatContent pattern_content = pattern->GetFlatContent(no_gc); + if (subject_content.IsAscii()) { + Vector subject_vector = subject_content.ToAsciiVector(); + if (pattern_content.IsAscii()) { if (SearchStringMultiple(isolate, subject_vector, - pattern->ToAsciiVector(), + pattern_content.ToAsciiVector(), *pattern, builder, &match_pos)) break; } else { if (SearchStringMultiple(isolate, subject_vector, - pattern->ToUC16Vector(), + pattern_content.ToUC16Vector(), *pattern, builder, &match_pos)) break; } } else { - Vector subject_vector = subject->ToUC16Vector(); - if (pattern->IsAsciiRepresentation()) { + Vector subject_vector = subject_content.ToUC16Vector(); + if (pattern_content.IsAscii()) { if (SearchStringMultiple(isolate, subject_vector, - pattern->ToAsciiVector(), + pattern_content.ToAsciiVector(), *pattern, builder, &match_pos)) break; } else { if (SearchStringMultiple(isolate, subject_vector, - pattern->ToUC16Vector(), + pattern_content.ToUC16Vector(), *pattern, builder, &match_pos)) break; @@ -5420,12 +5419,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONString) { str = String::cast(flat); ASSERT(str->IsFlat()); } - if (str->IsTwoByteRepresentation()) { + AssertNoAllocation no_alloc; + String::FlatContent flat = str->GetFlatContent(no_alloc); + ASSERT(flat.IsFlat()); + if (flat.IsTwoByte()) { return QuoteJsonString(isolate, - str->ToUC16Vector()); + flat.ToUC16Vector()); } else { return QuoteJsonString(isolate, - str->ToAsciiVector()); + flat.ToAsciiVector()); } } @@ -5442,12 +5444,14 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_QuoteJSONStringComma) { str = String::cast(flat); ASSERT(str->IsFlat()); } - if (str->IsTwoByteRepresentation()) { + AssertNoAllocation no_alloc; + String::FlatContent flat = str->GetFlatContent(no_alloc); + if (flat.IsTwoByte()) { return QuoteJsonString(isolate, - str->ToUC16Vector()); + flat.ToUC16Vector()); } else { return QuoteJsonString(isolate, - str->ToAsciiVector()); + flat.ToAsciiVector()); } } @@ -5482,14 +5486,16 @@ static MaybeObject* QuoteJsonStringArray(Isolate* isolate, for (int i = 0; i < length; i++) { if (i != 0) *(write_cursor++) = ','; String* str = String::cast(array->get(i)); - if (str->IsTwoByteRepresentation()) { + String::FlatContent content = str->GetFlatContent(no_gc); + ASSERT(content.IsFlat()); + if (content.IsTwoByte()) { write_cursor = WriteQuoteJsonString(isolate, write_cursor, - str->ToUC16Vector()); + content.ToUC16Vector()); } else { write_cursor = WriteQuoteJsonString(isolate, write_cursor, - str->ToAsciiVector()); + content.ToAsciiVector()); } } *(write_cursor++) = ']'; @@ -5968,11 +5974,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { // No allocation block. { - AssertNoAllocation nogc; - if (subject->IsAsciiRepresentation()) { - Vector subject_vector = subject->ToAsciiVector(); - if (pattern->IsAsciiRepresentation()) { - Vector pattern_vector = pattern->ToAsciiVector(); + AssertNoAllocation no_gc; + String::FlatContent subject_content = subject->GetFlatContent(no_gc); + String::FlatContent pattern_content = pattern->GetFlatContent(no_gc); + ASSERT(subject_content.IsFlat()); + ASSERT(pattern_content.IsFlat()); + if (subject_content.IsAscii()) { + Vector subject_vector = subject_content.ToAsciiVector(); + if (pattern_content.IsAscii()) { + Vector pattern_vector = pattern_content.ToAsciiVector(); if (pattern_vector.length() == 1) { FindAsciiStringIndices(subject_vector, pattern_vector[0], @@ -5988,22 +5998,22 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { } else { FindStringIndices(isolate, subject_vector, - pattern->ToUC16Vector(), + pattern_content.ToUC16Vector(), &indices, limit); } } else { - Vector subject_vector = subject->ToUC16Vector(); + Vector subject_vector = subject_content.ToUC16Vector(); if (pattern->IsAsciiRepresentation()) { FindStringIndices(isolate, subject_vector, - pattern->ToAsciiVector(), + pattern_content.ToAsciiVector(), &indices, limit); } else { FindStringIndices(isolate, subject_vector, - pattern->ToUC16Vector(), + pattern_content.ToUC16Vector(), &indices, limit); } @@ -6085,36 +6095,40 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringToArray) { CONVERT_ARG_CHECKED(String, s, 0); CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[1]); - s->TryFlatten(); + s = FlattenGetString(s); const int length = static_cast(Min(s->length(), limit)); Handle elements; + int position = 0; if (s->IsFlat() && s->IsAsciiRepresentation()) { + // Try using cached chars where possible. Object* obj; { MaybeObject* maybe_obj = isolate->heap()->AllocateUninitializedFixedArray(length); if (!maybe_obj->ToObject(&obj)) return maybe_obj; } + AssertNoAllocation no_alloc; elements = Handle(FixedArray::cast(obj), isolate); - - Vector chars = s->ToAsciiVector(); - // Note, this will initialize all elements (not only the prefix) - // to prevent GC from seeing partially initialized array. - int num_copied_from_cache = CopyCachedAsciiCharsToArray(isolate->heap(), - chars.start(), - *elements, - length); - - for (int i = num_copied_from_cache; i < length; ++i) { - Handle str = LookupSingleCharacterStringFromCode(chars[i]); - elements->set(i, *str); + String::FlatContent content = s->GetFlatContent(no_alloc); + if (content.IsAscii()) { + Vector chars = content.ToAsciiVector(); + // Note, this will initialize all elements (not only the prefix) + // to prevent GC from seeing partially initialized array. + position = CopyCachedAsciiCharsToArray(isolate->heap(), + chars.start(), + *elements, + length); + } else { + MemsetPointer(elements->data_start(), + isolate->heap()->undefined_value(), + length); } } else { elements = isolate->factory()->NewFixedArray(length); - for (int i = 0; i < length; ++i) { - Handle str = LookupSingleCharacterStringFromCode(s->Get(i)); - elements->set(i, *str); - } + } + for (int i = position; i < length; ++i) { + Handle str = LookupSingleCharacterStringFromCode(s->Get(i)); + elements->set(i, *str); } #ifdef DEBUG @@ -6916,6 +6930,7 @@ static Object* StringInputBufferCompare(RuntimeState* state, static Object* FlatStringCompare(String* x, String* y) { ASSERT(x->IsFlat()); ASSERT(y->IsFlat()); + AssertNoAllocation no_alloc; Object* equal_prefix_result = Smi::FromInt(EQUAL); int prefix_length = x->length(); if (y->length() < prefix_length) { @@ -6925,22 +6940,24 @@ static Object* FlatStringCompare(String* x, String* y) { equal_prefix_result = Smi::FromInt(LESS); } int r; - if (x->IsAsciiRepresentation()) { - Vector x_chars = x->ToAsciiVector(); - if (y->IsAsciiRepresentation()) { - Vector y_chars = y->ToAsciiVector(); + String::FlatContent x_content = x->GetFlatContent(no_alloc); + String::FlatContent y_content = y->GetFlatContent(no_alloc); + if (x_content.IsAscii()) { + Vector x_chars = x_content.ToAsciiVector(); + if (y_content.IsAscii()) { + Vector y_chars = y_content.ToAsciiVector(); r = CompareChars(x_chars.start(), y_chars.start(), prefix_length); } else { - Vector y_chars = y->ToUC16Vector(); + Vector y_chars = y_content.ToUC16Vector(); r = CompareChars(x_chars.start(), y_chars.start(), prefix_length); } } else { - Vector x_chars = x->ToUC16Vector(); - if (y->IsAsciiRepresentation()) { - Vector y_chars = y->ToAsciiVector(); + Vector x_chars = x_content.ToUC16Vector(); + if (y_content.IsAscii()) { + Vector y_chars = y_content.ToAsciiVector(); r = CompareChars(x_chars.start(), y_chars.start(), prefix_length); } else { - Vector y_chars = y->ToUC16Vector(); + Vector y_chars = y_content.ToUC16Vector(); r = CompareChars(x_chars.start(), y_chars.start(), prefix_length); } } @@ -8821,13 +8838,14 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_DateParseString) { FixedArray* output_array = FixedArray::cast(output->elements()); RUNTIME_ASSERT(output_array->length() >= DateParser::OUTPUT_SIZE); bool result; - if (str->IsAsciiRepresentation()) { - result = DateParser::Parse(str->ToAsciiVector(), + String::FlatContent str_content = str->GetFlatContent(no_allocation); + if (str_content.IsAscii()) { + result = DateParser::Parse(str_content.ToAsciiVector(), output_array, isolate->unicode_cache()); } else { - ASSERT(str->IsTwoByteRepresentation()); - result = DateParser::Parse(str->ToUC16Vector(), + ASSERT(str_content.IsTwoByte()); + result = DateParser::Parse(str_content.ToUC16Vector(), output_array, isolate->unicode_cache()); } @@ -12805,9 +12823,12 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_ListNatives) { RUNTIME_FUNCTION(MaybeObject*, Runtime_Log) { ASSERT(args.length() == 2); + AssertNoAllocation no_alloc; CONVERT_CHECKED(String, format, args[0]); CONVERT_CHECKED(JSArray, elms, args[1]); - Vector chars = format->ToAsciiVector(); + String::FlatContent format_content = format->GetFlatContent(no_alloc); + RUNTIME_ASSERT(format_content.IsAscii()); + Vector chars = format_content.ToAsciiVector(); LOGGER->LogRuntime(chars, elms); return isolate->heap()->undefined_value(); } diff --git a/test/mjsunit/string-split.js b/test/mjsunit/string-split.js index 6fcf55799e..9a5bb43f4a 100644 --- a/test/mjsunit/string-split.js +++ b/test/mjsunit/string-split.js @@ -116,3 +116,14 @@ assertEquals(["a", "b", "c"], "abc".split("", 3)); assertEquals(["a", "b", "c"], "abc".split("", numberObj(3))); assertEquals(["a", "b", "c"], "abc".split("", 4)); assertEquals(["a", "b", "c"], "abc".split("", numberObj(4))); + +var all_ascii_chars = []; +for (var i = 0; i < 128; i++) all_ascii_chars[i] = String.fromCharCode(i); +var all_ascii_string = all_ascii_chars.join(""); + +var split_chars = all_ascii_string.split(""); +assertEquals(128, split_chars.length); +for (var i = 0; i < 128; i++) { + assertEquals(1, split_chars[i].length); + assertEquals(i, split_chars[i].charCodeAt(0)); +} \ No newline at end of file