Remove InputBuffer
R=yangguo@chromium.org BUG= Review URL: https://chromiumcodereview.appspot.com/11727004 Patch from Dan Carney <dcarney@google.com>. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13298 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
a50259f75c
commit
04ccb975f4
419
src/objects.cc
419
src/objects.cc
@ -6693,252 +6693,6 @@ const uc16* SeqTwoByteString::SeqTwoByteStringGetData(unsigned start) {
|
||||
}
|
||||
|
||||
|
||||
void SeqTwoByteString::SeqTwoByteStringReadBlockIntoBuffer(ReadBlockBuffer* rbb,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars) {
|
||||
unsigned chars_read = 0;
|
||||
unsigned offset = *offset_ptr;
|
||||
while (chars_read < max_chars) {
|
||||
uint16_t c = *reinterpret_cast<uint16_t*>(
|
||||
reinterpret_cast<char*>(this) -
|
||||
kHeapObjectTag + kHeaderSize + offset * kShortSize);
|
||||
if (c <= kMaxAsciiCharCode) {
|
||||
// Fast case for ASCII characters. Cursor is an input output argument.
|
||||
if (!unibrow::CharacterStream::EncodeAsciiCharacter(c,
|
||||
rbb->util_buffer,
|
||||
rbb->capacity,
|
||||
rbb->cursor)) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (!unibrow::CharacterStream::EncodeNonAsciiCharacter(c,
|
||||
rbb->util_buffer,
|
||||
rbb->capacity,
|
||||
rbb->cursor)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
offset++;
|
||||
chars_read++;
|
||||
}
|
||||
*offset_ptr = offset;
|
||||
rbb->remaining += chars_read;
|
||||
}
|
||||
|
||||
|
||||
const unibrow::byte* SeqOneByteString::SeqOneByteStringReadBlock(
|
||||
unsigned* remaining,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars) {
|
||||
const unibrow::byte* b = reinterpret_cast<unibrow::byte*>(this) -
|
||||
kHeapObjectTag + kHeaderSize + *offset_ptr * kCharSize;
|
||||
*remaining = max_chars;
|
||||
*offset_ptr += max_chars;
|
||||
return b;
|
||||
}
|
||||
|
||||
|
||||
// This will iterate unless the block of string data spans two 'halves' of
|
||||
// a ConsString, in which case it will recurse. Since the block of string
|
||||
// data to be read has a maximum size this limits the maximum recursion
|
||||
// depth to something sane. Since C++ does not have tail call recursion
|
||||
// elimination, the iteration must be explicit. Since this is not an
|
||||
// -IntoBuffer method it can delegate to one of the efficient
|
||||
// *AsciiStringReadBlock routines.
|
||||
const unibrow::byte* ConsString::ConsStringReadBlock(ReadBlockBuffer* rbb,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars) {
|
||||
ConsString* current = this;
|
||||
unsigned offset = *offset_ptr;
|
||||
int offset_correction = 0;
|
||||
|
||||
while (true) {
|
||||
String* left = current->first();
|
||||
unsigned left_length = (unsigned)left->length();
|
||||
if (left_length > offset &&
|
||||
(max_chars <= left_length - offset ||
|
||||
(rbb->capacity <= left_length - offset &&
|
||||
(max_chars = left_length - offset, true)))) { // comma operator!
|
||||
// Left hand side only - iterate unless we have reached the bottom of
|
||||
// the cons tree. The assignment on the left of the comma operator is
|
||||
// in order to make use of the fact that the -IntoBuffer routines can
|
||||
// produce at most 'capacity' characters. This enables us to postpone
|
||||
// the point where we switch to the -IntoBuffer routines (below) in order
|
||||
// to maximize the chances of delegating a big chunk of work to the
|
||||
// efficient *AsciiStringReadBlock routines.
|
||||
if (StringShape(left).IsCons()) {
|
||||
current = ConsString::cast(left);
|
||||
continue;
|
||||
} else {
|
||||
const unibrow::byte* answer =
|
||||
String::ReadBlock(left, rbb, &offset, max_chars);
|
||||
*offset_ptr = offset + offset_correction;
|
||||
return answer;
|
||||
}
|
||||
} else if (left_length <= offset) {
|
||||
// Right hand side only - iterate unless we have reached the bottom of
|
||||
// the cons tree.
|
||||
String* right = current->second();
|
||||
offset -= left_length;
|
||||
offset_correction += left_length;
|
||||
if (StringShape(right).IsCons()) {
|
||||
current = ConsString::cast(right);
|
||||
continue;
|
||||
} else {
|
||||
const unibrow::byte* answer =
|
||||
String::ReadBlock(right, rbb, &offset, max_chars);
|
||||
*offset_ptr = offset + offset_correction;
|
||||
return answer;
|
||||
}
|
||||
} else {
|
||||
// The block to be read spans two sides of the ConsString, so we call the
|
||||
// -IntoBuffer version, which will recurse. The -IntoBuffer methods
|
||||
// are able to assemble data from several part strings because they use
|
||||
// the util_buffer to store their data and never return direct pointers
|
||||
// to their storage. We don't try to read more than the buffer capacity
|
||||
// here or we can get too much recursion.
|
||||
ASSERT(rbb->remaining == 0);
|
||||
ASSERT(rbb->cursor == 0);
|
||||
current->ConsStringReadBlockIntoBuffer(
|
||||
rbb,
|
||||
&offset,
|
||||
max_chars > rbb->capacity ? rbb->capacity : max_chars);
|
||||
*offset_ptr = offset + offset_correction;
|
||||
return rbb->util_buffer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const unibrow::byte* ExternalAsciiString::ExternalAsciiStringReadBlock(
|
||||
unsigned* remaining,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars) {
|
||||
// Cast const char* to unibrow::byte* (signedness difference).
|
||||
const unibrow::byte* b =
|
||||
reinterpret_cast<const unibrow::byte*>(GetChars()) + *offset_ptr;
|
||||
*remaining = max_chars;
|
||||
*offset_ptr += max_chars;
|
||||
return b;
|
||||
}
|
||||
|
||||
|
||||
void ExternalTwoByteString::ExternalTwoByteStringReadBlockIntoBuffer(
|
||||
ReadBlockBuffer* rbb,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars) {
|
||||
unsigned chars_read = 0;
|
||||
unsigned offset = *offset_ptr;
|
||||
const uint16_t* data = GetChars();
|
||||
while (chars_read < max_chars) {
|
||||
uint16_t c = data[offset];
|
||||
if (c <= kMaxAsciiCharCode) {
|
||||
// Fast case for ASCII characters. Cursor is an input output argument.
|
||||
if (!unibrow::CharacterStream::EncodeAsciiCharacter(c,
|
||||
rbb->util_buffer,
|
||||
rbb->capacity,
|
||||
rbb->cursor))
|
||||
break;
|
||||
} else {
|
||||
if (!unibrow::CharacterStream::EncodeNonAsciiCharacter(c,
|
||||
rbb->util_buffer,
|
||||
rbb->capacity,
|
||||
rbb->cursor))
|
||||
break;
|
||||
}
|
||||
offset++;
|
||||
chars_read++;
|
||||
}
|
||||
*offset_ptr = offset;
|
||||
rbb->remaining += chars_read;
|
||||
}
|
||||
|
||||
|
||||
void SeqOneByteString::SeqOneByteStringReadBlockIntoBuffer(ReadBlockBuffer* rbb,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars) {
|
||||
unsigned capacity = rbb->capacity - rbb->cursor;
|
||||
if (max_chars > capacity) max_chars = capacity;
|
||||
memcpy(rbb->util_buffer + rbb->cursor,
|
||||
reinterpret_cast<char*>(this) - kHeapObjectTag + kHeaderSize +
|
||||
*offset_ptr * kCharSize,
|
||||
max_chars);
|
||||
rbb->remaining += max_chars;
|
||||
*offset_ptr += max_chars;
|
||||
rbb->cursor += max_chars;
|
||||
}
|
||||
|
||||
|
||||
void ExternalAsciiString::ExternalAsciiStringReadBlockIntoBuffer(
|
||||
ReadBlockBuffer* rbb,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars) {
|
||||
unsigned capacity = rbb->capacity - rbb->cursor;
|
||||
if (max_chars > capacity) max_chars = capacity;
|
||||
memcpy(rbb->util_buffer + rbb->cursor, GetChars() + *offset_ptr, max_chars);
|
||||
rbb->remaining += max_chars;
|
||||
*offset_ptr += max_chars;
|
||||
rbb->cursor += max_chars;
|
||||
}
|
||||
|
||||
|
||||
// This method determines the type of string involved and then copies
|
||||
// a whole chunk of characters into a buffer, or returns a pointer to a buffer
|
||||
// where they can be found. The pointer is not necessarily valid across a GC
|
||||
// (see AsciiStringReadBlock).
|
||||
const unibrow::byte* String::ReadBlock(String* input,
|
||||
ReadBlockBuffer* rbb,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars) {
|
||||
ASSERT(*offset_ptr <= static_cast<unsigned>(input->length()));
|
||||
if (max_chars == 0) {
|
||||
rbb->remaining = 0;
|
||||
return NULL;
|
||||
}
|
||||
switch (StringShape(input).representation_tag()) {
|
||||
case kSeqStringTag:
|
||||
if (input->IsOneByteRepresentation()) {
|
||||
SeqOneByteString* str = SeqOneByteString::cast(input);
|
||||
return str->SeqOneByteStringReadBlock(&rbb->remaining,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
} else {
|
||||
SeqTwoByteString* str = SeqTwoByteString::cast(input);
|
||||
str->SeqTwoByteStringReadBlockIntoBuffer(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
return rbb->util_buffer;
|
||||
}
|
||||
case kConsStringTag:
|
||||
return ConsString::cast(input)->ConsStringReadBlock(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
case kExternalStringTag:
|
||||
if (input->IsOneByteRepresentation()) {
|
||||
return ExternalAsciiString::cast(input)->ExternalAsciiStringReadBlock(
|
||||
&rbb->remaining,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
} else {
|
||||
ExternalTwoByteString::cast(input)->
|
||||
ExternalTwoByteStringReadBlockIntoBuffer(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
return rbb->util_buffer;
|
||||
}
|
||||
case kSlicedStringTag:
|
||||
return SlicedString::cast(input)->SlicedStringReadBlock(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void Relocatable::PostGarbageCollectionProcessing() {
|
||||
Isolate* isolate = Isolate::Current();
|
||||
Relocatable* current = isolate->relocatable_top();
|
||||
@ -7023,11 +6777,6 @@ void FlatStringReader::PostGarbageCollection() {
|
||||
}
|
||||
|
||||
|
||||
void StringInputBuffer::Seek(unsigned pos) {
|
||||
Reset(pos, input_);
|
||||
}
|
||||
|
||||
|
||||
String* ConsStringIteratorOp::Operate(String* string,
|
||||
unsigned* offset_out,
|
||||
int32_t* type_out,
|
||||
@ -7163,154 +6912,6 @@ String* ConsStringIteratorOp::NextLeaf(bool* blew_stack,
|
||||
}
|
||||
|
||||
|
||||
// This method determines the type of string involved and then copies
|
||||
// a whole chunk of characters into a buffer. It can be used with strings
|
||||
// that have been glued together to form a ConsString and which must cooperate
|
||||
// to fill up a buffer.
|
||||
void String::ReadBlockIntoBuffer(String* input,
|
||||
ReadBlockBuffer* rbb,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars) {
|
||||
ASSERT(*offset_ptr <= (unsigned)input->length());
|
||||
if (max_chars == 0) return;
|
||||
|
||||
switch (StringShape(input).representation_tag()) {
|
||||
case kSeqStringTag:
|
||||
if (input->IsOneByteRepresentation()) {
|
||||
SeqOneByteString::cast(input)->SeqOneByteStringReadBlockIntoBuffer(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
return;
|
||||
} else {
|
||||
SeqTwoByteString::cast(input)->SeqTwoByteStringReadBlockIntoBuffer(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
return;
|
||||
}
|
||||
case kConsStringTag:
|
||||
ConsString::cast(input)->ConsStringReadBlockIntoBuffer(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
return;
|
||||
case kExternalStringTag:
|
||||
if (input->IsOneByteRepresentation()) {
|
||||
ExternalAsciiString::cast(input)->
|
||||
ExternalAsciiStringReadBlockIntoBuffer(rbb, offset_ptr, max_chars);
|
||||
} else {
|
||||
ExternalTwoByteString::cast(input)->
|
||||
ExternalTwoByteStringReadBlockIntoBuffer(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
}
|
||||
return;
|
||||
case kSlicedStringTag:
|
||||
SlicedString::cast(input)->SlicedStringReadBlockIntoBuffer(rbb,
|
||||
offset_ptr,
|
||||
max_chars);
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
const unibrow::byte* String::ReadBlock(String* input,
|
||||
unibrow::byte* util_buffer,
|
||||
unsigned capacity,
|
||||
unsigned* remaining,
|
||||
unsigned* offset_ptr) {
|
||||
ASSERT(*offset_ptr <= (unsigned)input->length());
|
||||
unsigned chars = input->length() - *offset_ptr;
|
||||
ReadBlockBuffer rbb(util_buffer, 0, capacity, 0);
|
||||
const unibrow::byte* answer = ReadBlock(input, &rbb, offset_ptr, chars);
|
||||
ASSERT(rbb.remaining <= static_cast<unsigned>(input->length()));
|
||||
*remaining = rbb.remaining;
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
||||
const unibrow::byte* String::ReadBlock(String** raw_input,
|
||||
unibrow::byte* util_buffer,
|
||||
unsigned capacity,
|
||||
unsigned* remaining,
|
||||
unsigned* offset_ptr) {
|
||||
Handle<String> input(raw_input);
|
||||
ASSERT(*offset_ptr <= (unsigned)input->length());
|
||||
unsigned chars = input->length() - *offset_ptr;
|
||||
if (chars > capacity) chars = capacity;
|
||||
ReadBlockBuffer rbb(util_buffer, 0, capacity, 0);
|
||||
ReadBlockIntoBuffer(*input, &rbb, offset_ptr, chars);
|
||||
ASSERT(rbb.remaining <= static_cast<unsigned>(input->length()));
|
||||
*remaining = rbb.remaining;
|
||||
return rbb.util_buffer;
|
||||
}
|
||||
|
||||
|
||||
// This will iterate unless the block of string data spans two 'halves' of
|
||||
// a ConsString, in which case it will recurse. Since the block of string
|
||||
// data to be read has a maximum size this limits the maximum recursion
|
||||
// depth to something sane. Since C++ does not have tail call recursion
|
||||
// elimination, the iteration must be explicit.
|
||||
void ConsString::ConsStringReadBlockIntoBuffer(ReadBlockBuffer* rbb,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars) {
|
||||
ConsString* current = this;
|
||||
unsigned offset = *offset_ptr;
|
||||
int offset_correction = 0;
|
||||
|
||||
while (true) {
|
||||
String* left = current->first();
|
||||
unsigned left_length = (unsigned)left->length();
|
||||
if (left_length > offset &&
|
||||
max_chars <= left_length - offset) {
|
||||
// Left hand side only - iterate unless we have reached the bottom of
|
||||
// the cons tree.
|
||||
if (StringShape(left).IsCons()) {
|
||||
current = ConsString::cast(left);
|
||||
continue;
|
||||
} else {
|
||||
String::ReadBlockIntoBuffer(left, rbb, &offset, max_chars);
|
||||
*offset_ptr = offset + offset_correction;
|
||||
return;
|
||||
}
|
||||
} else if (left_length <= offset) {
|
||||
// Right hand side only - iterate unless we have reached the bottom of
|
||||
// the cons tree.
|
||||
offset -= left_length;
|
||||
offset_correction += left_length;
|
||||
String* right = current->second();
|
||||
if (StringShape(right).IsCons()) {
|
||||
current = ConsString::cast(right);
|
||||
continue;
|
||||
} else {
|
||||
String::ReadBlockIntoBuffer(right, rbb, &offset, max_chars);
|
||||
*offset_ptr = offset + offset_correction;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
// The block to be read spans two sides of the ConsString, so we recurse.
|
||||
// First recurse on the left.
|
||||
max_chars -= left_length - offset;
|
||||
String::ReadBlockIntoBuffer(left, rbb, &offset, left_length - offset);
|
||||
// We may have reached the max or there may not have been enough space
|
||||
// in the buffer for the characters in the left hand side.
|
||||
if (offset == left_length) {
|
||||
// Recurse on the right.
|
||||
String* right = String::cast(current->second());
|
||||
offset -= left_length;
|
||||
offset_correction += left_length;
|
||||
String::ReadBlockIntoBuffer(right, rbb, &offset, max_chars);
|
||||
}
|
||||
*offset_ptr = offset + offset_correction;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
uint16_t ConsString::ConsStringGet(int index) {
|
||||
ASSERT(index >= 0 && index < this->length());
|
||||
|
||||
@ -7347,26 +6948,6 @@ uint16_t SlicedString::SlicedStringGet(int index) {
|
||||
}
|
||||
|
||||
|
||||
const unibrow::byte* SlicedString::SlicedStringReadBlock(
|
||||
ReadBlockBuffer* buffer, unsigned* offset_ptr, unsigned chars) {
|
||||
unsigned offset = this->offset();
|
||||
*offset_ptr += offset;
|
||||
const unibrow::byte* answer = String::ReadBlock(String::cast(parent()),
|
||||
buffer, offset_ptr, chars);
|
||||
*offset_ptr -= offset;
|
||||
return answer;
|
||||
}
|
||||
|
||||
|
||||
void SlicedString::SlicedStringReadBlockIntoBuffer(
|
||||
ReadBlockBuffer* buffer, unsigned* offset_ptr, unsigned chars) {
|
||||
unsigned offset = this->offset();
|
||||
*offset_ptr += offset;
|
||||
String::ReadBlockIntoBuffer(String::cast(parent()),
|
||||
buffer, offset_ptr, chars);
|
||||
*offset_ptr -= offset;
|
||||
}
|
||||
|
||||
template <typename sinkchar>
|
||||
void String::WriteToFlat(String* src,
|
||||
sinkchar* sink,
|
||||
|
@ -7313,18 +7313,6 @@ class String: public HeapObject {
|
||||
const uc16* GetTwoByteData();
|
||||
const uc16* GetTwoByteData(unsigned start);
|
||||
|
||||
// Support for StringInputBuffer
|
||||
static const unibrow::byte* ReadBlock(String* input,
|
||||
unibrow::byte* util_buffer,
|
||||
unsigned capacity,
|
||||
unsigned* remaining,
|
||||
unsigned* offset);
|
||||
static const unibrow::byte* ReadBlock(String** input,
|
||||
unibrow::byte* util_buffer,
|
||||
unsigned capacity,
|
||||
unsigned* remaining,
|
||||
unsigned* offset);
|
||||
|
||||
// Helper function for flattening strings.
|
||||
template <typename sinkchar>
|
||||
static void WriteToFlat(String* source,
|
||||
@ -7383,33 +7371,6 @@ class String: public HeapObject {
|
||||
int32_t type,
|
||||
unsigned length);
|
||||
|
||||
protected:
|
||||
class ReadBlockBuffer {
|
||||
public:
|
||||
ReadBlockBuffer(unibrow::byte* util_buffer_,
|
||||
unsigned cursor_,
|
||||
unsigned capacity_,
|
||||
unsigned remaining_) :
|
||||
util_buffer(util_buffer_),
|
||||
cursor(cursor_),
|
||||
capacity(capacity_),
|
||||
remaining(remaining_) {
|
||||
}
|
||||
unibrow::byte* util_buffer;
|
||||
unsigned cursor;
|
||||
unsigned capacity;
|
||||
unsigned remaining;
|
||||
};
|
||||
|
||||
static inline const unibrow::byte* ReadBlock(String* input,
|
||||
ReadBlockBuffer* buffer,
|
||||
unsigned* offset,
|
||||
unsigned max_chars);
|
||||
static void ReadBlockIntoBuffer(String* input,
|
||||
ReadBlockBuffer* buffer,
|
||||
unsigned* offset_ptr,
|
||||
unsigned max_chars);
|
||||
|
||||
private:
|
||||
// Try to flatten the top level ConsString that is hiding behind this
|
||||
// string. This is a no-op unless the string is a ConsString. Flatten
|
||||
@ -7485,14 +7446,6 @@ class SeqOneByteString: public SeqString {
|
||||
// Q.v. String::kMaxLength which is the maximal size of concatenated strings.
|
||||
static const int kMaxLength = (kMaxSize - kHeaderSize);
|
||||
|
||||
// Support for StringInputBuffer.
|
||||
inline void SeqOneByteStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
|
||||
unsigned* offset,
|
||||
unsigned chars);
|
||||
inline const unibrow::byte* SeqOneByteStringReadBlock(unsigned* remaining,
|
||||
unsigned* offset,
|
||||
unsigned chars);
|
||||
|
||||
DECLARE_VERIFIER(SeqOneByteString)
|
||||
|
||||
private:
|
||||
@ -7537,11 +7490,6 @@ class SeqTwoByteString: public SeqString {
|
||||
// Q.v. String::kMaxLength which is the maximal size of concatenated strings.
|
||||
static const int kMaxLength = (kMaxSize - kHeaderSize) / sizeof(uint16_t);
|
||||
|
||||
// Support for StringInputBuffer.
|
||||
inline void SeqTwoByteStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
|
||||
unsigned* offset_ptr,
|
||||
unsigned chars);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(SeqTwoByteString);
|
||||
};
|
||||
@ -7584,14 +7532,6 @@ class ConsString: public String {
|
||||
static const int kSecondOffset = kFirstOffset + kPointerSize;
|
||||
static const int kSize = kSecondOffset + kPointerSize;
|
||||
|
||||
// Support for StringInputBuffer.
|
||||
inline const unibrow::byte* ConsStringReadBlock(ReadBlockBuffer* buffer,
|
||||
unsigned* offset_ptr,
|
||||
unsigned chars);
|
||||
inline void ConsStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
|
||||
unsigned* offset_ptr,
|
||||
unsigned chars);
|
||||
|
||||
// Minimum length for a cons string.
|
||||
static const int kMinLength = 13;
|
||||
|
||||
@ -7636,13 +7576,6 @@ class SlicedString: public String {
|
||||
static const int kOffsetOffset = kParentOffset + kPointerSize;
|
||||
static const int kSize = kOffsetOffset + kPointerSize;
|
||||
|
||||
// Support for StringInputBuffer
|
||||
inline const unibrow::byte* SlicedStringReadBlock(ReadBlockBuffer* buffer,
|
||||
unsigned* offset_ptr,
|
||||
unsigned chars);
|
||||
inline void SlicedStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
|
||||
unsigned* offset_ptr,
|
||||
unsigned chars);
|
||||
// Minimum length for a sliced string.
|
||||
static const int kMinLength = 13;
|
||||
|
||||
@ -7719,14 +7652,6 @@ class ExternalAsciiString: public ExternalString {
|
||||
template<typename StaticVisitor>
|
||||
inline void ExternalAsciiStringIterateBody();
|
||||
|
||||
// Support for StringInputBuffer.
|
||||
const unibrow::byte* ExternalAsciiStringReadBlock(unsigned* remaining,
|
||||
unsigned* offset,
|
||||
unsigned chars);
|
||||
inline void ExternalAsciiStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
|
||||
unsigned* offset,
|
||||
unsigned chars);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalAsciiString);
|
||||
};
|
||||
@ -7767,12 +7692,6 @@ class ExternalTwoByteString: public ExternalString {
|
||||
template<typename StaticVisitor>
|
||||
inline void ExternalTwoByteStringIterateBody();
|
||||
|
||||
|
||||
// Support for StringInputBuffer.
|
||||
void ExternalTwoByteStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
|
||||
unsigned* offset_ptr,
|
||||
unsigned chars);
|
||||
|
||||
private:
|
||||
DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalTwoByteString);
|
||||
};
|
||||
@ -7819,24 +7738,6 @@ class FlatStringReader : public Relocatable {
|
||||
};
|
||||
|
||||
|
||||
// Note that StringInputBuffers are not valid across a GC! To fix this
|
||||
// it would have to store a String Handle instead of a String* and
|
||||
// AsciiStringReadBlock would have to be modified to use memcpy.
|
||||
//
|
||||
// StringInputBuffer is able to traverse any string regardless of how
|
||||
// deeply nested a sequence of ConsStrings it is made of. However,
|
||||
// performance will be better if deep strings are flattened before they
|
||||
// are traversed. Since flattening requires memory allocation this is
|
||||
// not always desirable, however (esp. in debugging situations).
|
||||
class StringInputBuffer: public unibrow::InputBuffer<String, String*, 1024> {
|
||||
public:
|
||||
virtual void Seek(unsigned pos);
|
||||
inline StringInputBuffer(): unibrow::InputBuffer<String, String*, 1024>() {}
|
||||
explicit inline StringInputBuffer(String* backing):
|
||||
unibrow::InputBuffer<String, String*, 1024>(backing) {}
|
||||
};
|
||||
|
||||
|
||||
// A ConsStringOp that returns null.
|
||||
// Useful when the operation to apply on a ConsString
|
||||
// requires an expensive data structure.
|
||||
|
@ -430,10 +430,6 @@ class Scanner {
|
||||
// be empty).
|
||||
bool ScanRegExpFlags();
|
||||
|
||||
// Tells whether the buffer contains an identifier (no escapes).
|
||||
// Used for checking if a property name is an identifier.
|
||||
static bool IsIdentifier(unibrow::CharacterStream* buffer);
|
||||
|
||||
private:
|
||||
// The current and look-ahead token.
|
||||
struct TokenDesc {
|
||||
|
@ -137,109 +137,6 @@ unsigned Utf8::Length(uchar c, int previous) {
|
||||
}
|
||||
}
|
||||
|
||||
uchar CharacterStream::GetNext() {
|
||||
uchar result = DecodeCharacter(buffer_, &cursor_);
|
||||
if (remaining_ == 1) {
|
||||
cursor_ = 0;
|
||||
FillBuffer();
|
||||
} else {
|
||||
remaining_--;
|
||||
}
|
||||
ASSERT(BoundsCheck(cursor_));
|
||||
return result;
|
||||
}
|
||||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define IF_LITTLE(expr) expr
|
||||
#define IF_BIG(expr) ((void) 0)
|
||||
#elif __BYTE_ORDER == __BIG_ENDIAN
|
||||
#define IF_LITTLE(expr) ((void) 0)
|
||||
#define IF_BIG(expr) expr
|
||||
#else
|
||||
#warning Unknown byte ordering
|
||||
#endif
|
||||
|
||||
bool CharacterStream::EncodeAsciiCharacter(uchar c, byte* buffer,
|
||||
unsigned capacity, unsigned& offset) {
|
||||
if (offset >= capacity) return false;
|
||||
buffer[offset] = c;
|
||||
offset += 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CharacterStream::EncodeNonAsciiCharacter(uchar c, byte* buffer,
|
||||
unsigned capacity, unsigned& offset) {
|
||||
unsigned aligned = (offset + 0x3) & ~0x3;
|
||||
if ((aligned + sizeof(uchar)) > capacity)
|
||||
return false;
|
||||
if (offset == aligned) {
|
||||
IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = (c << 8) | 0x80);
|
||||
IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c | (1 << 31));
|
||||
} else {
|
||||
buffer[offset] = 0x80;
|
||||
IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = c << 8);
|
||||
IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c);
|
||||
}
|
||||
offset = aligned + sizeof(uchar);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CharacterStream::EncodeCharacter(uchar c, byte* buffer, unsigned capacity,
|
||||
unsigned& offset) {
|
||||
if (c <= Utf8::kMaxOneByteChar) {
|
||||
return EncodeAsciiCharacter(c, buffer, capacity, offset);
|
||||
} else {
|
||||
return EncodeNonAsciiCharacter(c, buffer, capacity, offset);
|
||||
}
|
||||
}
|
||||
|
||||
uchar CharacterStream::DecodeCharacter(const byte* buffer, unsigned* offset) {
|
||||
byte b = buffer[*offset];
|
||||
if (b <= Utf8::kMaxOneByteChar) {
|
||||
(*offset)++;
|
||||
return b;
|
||||
} else {
|
||||
unsigned aligned = (*offset + 0x3) & ~0x3;
|
||||
*offset = aligned + sizeof(uchar);
|
||||
IF_LITTLE(return *reinterpret_cast<const uchar*>(buffer + aligned) >> 8);
|
||||
IF_BIG(return *reinterpret_cast<const uchar*>(buffer + aligned) &
|
||||
~(1 << 31));
|
||||
}
|
||||
}
|
||||
|
||||
#undef IF_LITTLE
|
||||
#undef IF_BIG
|
||||
|
||||
template <class R, class I, unsigned s>
|
||||
void InputBuffer<R, I, s>::FillBuffer() {
|
||||
buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);
|
||||
}
|
||||
|
||||
template <class R, class I, unsigned s>
|
||||
void InputBuffer<R, I, s>::Rewind() {
|
||||
Reset(input_);
|
||||
}
|
||||
|
||||
template <class R, class I, unsigned s>
|
||||
void InputBuffer<R, I, s>::Reset(unsigned position, I input) {
|
||||
input_ = input;
|
||||
remaining_ = 0;
|
||||
cursor_ = 0;
|
||||
offset_ = position;
|
||||
buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);
|
||||
}
|
||||
|
||||
template <class R, class I, unsigned s>
|
||||
void InputBuffer<R, I, s>::Reset(I input) {
|
||||
Reset(0, input);
|
||||
}
|
||||
|
||||
template <class R, class I, unsigned s>
|
||||
void InputBuffer<R, I, s>::Seek(unsigned position) {
|
||||
offset_ = position;
|
||||
buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);
|
||||
}
|
||||
|
||||
Utf8DecoderBase::Utf8DecoderBase()
|
||||
: unbuffered_start_(NULL),
|
||||
utf16_length_(0),
|
||||
|
@ -277,33 +277,6 @@ uchar Utf8::CalculateValue(const byte* str,
|
||||
}
|
||||
|
||||
|
||||
unsigned CharacterStream::Length() {
|
||||
unsigned result = 0;
|
||||
while (has_more()) {
|
||||
result++;
|
||||
GetNext();
|
||||
}
|
||||
Rewind();
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned CharacterStream::Utf16Length() {
|
||||
unsigned result = 0;
|
||||
while (has_more()) {
|
||||
uchar c = GetNext();
|
||||
result += c > Utf16::kMaxNonSurrogateCharCode ? 2 : 1;
|
||||
}
|
||||
Rewind();
|
||||
return result;
|
||||
}
|
||||
|
||||
void CharacterStream::Seek(unsigned position) {
|
||||
Rewind();
|
||||
for (unsigned i = 0; i < position; i++) {
|
||||
GetNext();
|
||||
}
|
||||
}
|
||||
|
||||
void Utf8DecoderBase::Reset(uint16_t* buffer,
|
||||
unsigned buffer_length,
|
||||
const uint8_t* stream,
|
||||
|
@ -100,21 +100,6 @@ class UnicodeData {
|
||||
static const uchar kMaxCodePoint;
|
||||
};
|
||||
|
||||
// --- U t f 8 a n d 16 ---
|
||||
|
||||
template <typename Data>
|
||||
class Buffer {
|
||||
public:
|
||||
inline Buffer(Data data, unsigned length) : data_(data), length_(length) { }
|
||||
inline Buffer() : data_(0), length_(0) { }
|
||||
Data data() { return data_; }
|
||||
unsigned length() { return length_; }
|
||||
private:
|
||||
Data data_;
|
||||
unsigned length_;
|
||||
};
|
||||
|
||||
|
||||
class Utf16 {
|
||||
public:
|
||||
static inline bool IsLeadSurrogate(int code) {
|
||||
@ -173,72 +158,6 @@ class Utf8 {
|
||||
unsigned* cursor);
|
||||
};
|
||||
|
||||
// --- C h a r a c t e r S t r e a m ---
|
||||
|
||||
class CharacterStream {
|
||||
public:
|
||||
inline uchar GetNext();
|
||||
inline bool has_more() { return remaining_ != 0; }
|
||||
// Note that default implementation is not efficient.
|
||||
virtual void Seek(unsigned);
|
||||
unsigned Length();
|
||||
unsigned Utf16Length();
|
||||
virtual ~CharacterStream() { }
|
||||
static inline bool EncodeCharacter(uchar c, byte* buffer, unsigned capacity,
|
||||
unsigned& offset);
|
||||
static inline bool EncodeAsciiCharacter(uchar c, byte* buffer,
|
||||
unsigned capacity, unsigned& offset);
|
||||
static inline bool EncodeNonAsciiCharacter(uchar c, byte* buffer,
|
||||
unsigned capacity, unsigned& offset);
|
||||
static inline uchar DecodeCharacter(const byte* buffer, unsigned* offset);
|
||||
virtual void Rewind() = 0;
|
||||
|
||||
protected:
|
||||
virtual void FillBuffer() = 0;
|
||||
virtual bool BoundsCheck(unsigned offset) = 0;
|
||||
// The number of characters left in the current buffer
|
||||
unsigned remaining_;
|
||||
// The current offset within the buffer
|
||||
unsigned cursor_;
|
||||
// The buffer containing the decoded characters.
|
||||
const byte* buffer_;
|
||||
};
|
||||
|
||||
// --- I n p u t B u f f e r ---
|
||||
|
||||
/**
|
||||
* Provides efficient access to encoded characters in strings. It
|
||||
* does so by reading characters one block at a time, rather than one
|
||||
* character at a time, which gives string implementations an
|
||||
* opportunity to optimize the decoding.
|
||||
*/
|
||||
template <class Reader, class Input = Reader*, unsigned kSize = 256>
|
||||
class InputBuffer : public CharacterStream {
|
||||
public:
|
||||
virtual void Rewind();
|
||||
inline void Reset(Input input);
|
||||
void Seek(unsigned position);
|
||||
inline void Reset(unsigned position, Input input);
|
||||
protected:
|
||||
InputBuffer() { }
|
||||
explicit InputBuffer(Input input) { Reset(input); }
|
||||
virtual void FillBuffer();
|
||||
virtual bool BoundsCheck(unsigned offset) {
|
||||
return (buffer_ != util_buffer_) || (offset < kSize);
|
||||
}
|
||||
|
||||
// A custom offset that can be used by the string implementation to
|
||||
// mark progress within the encoded string.
|
||||
unsigned offset_;
|
||||
// The input string
|
||||
Input input_;
|
||||
// To avoid heap allocation, we keep an internal buffer to which
|
||||
// the encoded string can write its characters. The string
|
||||
// implementation is free to decide whether it wants to use this
|
||||
// buffer or not.
|
||||
byte util_buffer_[kSize];
|
||||
};
|
||||
|
||||
|
||||
class Utf8DecoderBase {
|
||||
public:
|
||||
|
@ -1,7 +1,7 @@
|
||||
// Copyright 2012 the V8 project authors. All rights reserved.
|
||||
|
||||
// Check that we can traverse very deep stacks of ConsStrings using
|
||||
// StringInputBuffer. Check that Get(int) works on very deep stacks
|
||||
// StringCharacterStram. Check that Get(int) works on very deep stacks
|
||||
// of ConsStrings. These operations may not be very fast, but they
|
||||
// should be possible without getting errors due to too deep recursion.
|
||||
|
||||
@ -514,23 +514,16 @@ static Handle<String> ConstructBalanced(
|
||||
}
|
||||
|
||||
|
||||
static StringInputBuffer buffer;
|
||||
static ConsStringIteratorOp cons_string_iterator_op_1;
|
||||
static ConsStringIteratorOp cons_string_iterator_op_2;
|
||||
|
||||
static void Traverse(Handle<String> s1, Handle<String> s2) {
|
||||
int i = 0;
|
||||
buffer.Reset(*s1);
|
||||
StringCharacterStream character_stream_1(*s1, &cons_string_iterator_op_1);
|
||||
StringCharacterStream character_stream_2(*s2, &cons_string_iterator_op_2);
|
||||
StringInputBuffer buffer2(*s2);
|
||||
while (buffer.has_more()) {
|
||||
CHECK(buffer2.has_more());
|
||||
CHECK(character_stream_1.HasMore());
|
||||
while (character_stream_1.HasMore()) {
|
||||
CHECK(character_stream_2.HasMore());
|
||||
uint16_t c = buffer.GetNext();
|
||||
CHECK_EQ(c, buffer2.GetNext());
|
||||
CHECK_EQ(c, character_stream_1.GetNext());
|
||||
uint16_t c = character_stream_1.GetNext();
|
||||
CHECK_EQ(c, character_stream_2.GetNext());
|
||||
i++;
|
||||
}
|
||||
@ -543,17 +536,11 @@ static void Traverse(Handle<String> s1, Handle<String> s2) {
|
||||
|
||||
static void TraverseFirst(Handle<String> s1, Handle<String> s2, int chars) {
|
||||
int i = 0;
|
||||
buffer.Reset(*s1);
|
||||
StringInputBuffer buffer2(*s2);
|
||||
StringCharacterStream character_stream_1(*s1, &cons_string_iterator_op_1);
|
||||
StringCharacterStream character_stream_2(*s2, &cons_string_iterator_op_2);
|
||||
while (buffer.has_more() && i < chars) {
|
||||
CHECK(buffer2.has_more());
|
||||
CHECK(character_stream_1.HasMore());
|
||||
while (character_stream_1.HasMore() && i < chars) {
|
||||
CHECK(character_stream_2.HasMore());
|
||||
uint16_t c = buffer.GetNext();
|
||||
CHECK_EQ(c, buffer2.GetNext());
|
||||
CHECK_EQ(c, character_stream_1.GetNext());
|
||||
uint16_t c = character_stream_1.GetNext();
|
||||
CHECK_EQ(c, character_stream_2.GetNext());
|
||||
i++;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user