Remove InputBuffer

R=yangguo@chromium.org
BUG=

Review URL: https://chromiumcodereview.appspot.com/11727004
Patch from Dan Carney <dcarney@google.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13298 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2013-01-03 09:18:01 +00:00
parent a50259f75c
commit 04ccb975f4
7 changed files with 5 additions and 751 deletions

View File

@ -6693,252 +6693,6 @@ const uc16* SeqTwoByteString::SeqTwoByteStringGetData(unsigned start) {
}
void SeqTwoByteString::SeqTwoByteStringReadBlockIntoBuffer(ReadBlockBuffer* rbb,
unsigned* offset_ptr,
unsigned max_chars) {
unsigned chars_read = 0;
unsigned offset = *offset_ptr;
while (chars_read < max_chars) {
uint16_t c = *reinterpret_cast<uint16_t*>(
reinterpret_cast<char*>(this) -
kHeapObjectTag + kHeaderSize + offset * kShortSize);
if (c <= kMaxAsciiCharCode) {
// Fast case for ASCII characters. Cursor is an input output argument.
if (!unibrow::CharacterStream::EncodeAsciiCharacter(c,
rbb->util_buffer,
rbb->capacity,
rbb->cursor)) {
break;
}
} else {
if (!unibrow::CharacterStream::EncodeNonAsciiCharacter(c,
rbb->util_buffer,
rbb->capacity,
rbb->cursor)) {
break;
}
}
offset++;
chars_read++;
}
*offset_ptr = offset;
rbb->remaining += chars_read;
}
const unibrow::byte* SeqOneByteString::SeqOneByteStringReadBlock(
unsigned* remaining,
unsigned* offset_ptr,
unsigned max_chars) {
const unibrow::byte* b = reinterpret_cast<unibrow::byte*>(this) -
kHeapObjectTag + kHeaderSize + *offset_ptr * kCharSize;
*remaining = max_chars;
*offset_ptr += max_chars;
return b;
}
// This will iterate unless the block of string data spans two 'halves' of
// a ConsString, in which case it will recurse. Since the block of string
// data to be read has a maximum size this limits the maximum recursion
// depth to something sane. Since C++ does not have tail call recursion
// elimination, the iteration must be explicit. Since this is not an
// -IntoBuffer method it can delegate to one of the efficient
// *AsciiStringReadBlock routines.
const unibrow::byte* ConsString::ConsStringReadBlock(ReadBlockBuffer* rbb,
unsigned* offset_ptr,
unsigned max_chars) {
ConsString* current = this;
unsigned offset = *offset_ptr;
int offset_correction = 0;
while (true) {
String* left = current->first();
unsigned left_length = (unsigned)left->length();
if (left_length > offset &&
(max_chars <= left_length - offset ||
(rbb->capacity <= left_length - offset &&
(max_chars = left_length - offset, true)))) { // comma operator!
// Left hand side only - iterate unless we have reached the bottom of
// the cons tree. The assignment on the left of the comma operator is
// in order to make use of the fact that the -IntoBuffer routines can
// produce at most 'capacity' characters. This enables us to postpone
// the point where we switch to the -IntoBuffer routines (below) in order
// to maximize the chances of delegating a big chunk of work to the
// efficient *AsciiStringReadBlock routines.
if (StringShape(left).IsCons()) {
current = ConsString::cast(left);
continue;
} else {
const unibrow::byte* answer =
String::ReadBlock(left, rbb, &offset, max_chars);
*offset_ptr = offset + offset_correction;
return answer;
}
} else if (left_length <= offset) {
// Right hand side only - iterate unless we have reached the bottom of
// the cons tree.
String* right = current->second();
offset -= left_length;
offset_correction += left_length;
if (StringShape(right).IsCons()) {
current = ConsString::cast(right);
continue;
} else {
const unibrow::byte* answer =
String::ReadBlock(right, rbb, &offset, max_chars);
*offset_ptr = offset + offset_correction;
return answer;
}
} else {
// The block to be read spans two sides of the ConsString, so we call the
// -IntoBuffer version, which will recurse. The -IntoBuffer methods
// are able to assemble data from several part strings because they use
// the util_buffer to store their data and never return direct pointers
// to their storage. We don't try to read more than the buffer capacity
// here or we can get too much recursion.
ASSERT(rbb->remaining == 0);
ASSERT(rbb->cursor == 0);
current->ConsStringReadBlockIntoBuffer(
rbb,
&offset,
max_chars > rbb->capacity ? rbb->capacity : max_chars);
*offset_ptr = offset + offset_correction;
return rbb->util_buffer;
}
}
}
const unibrow::byte* ExternalAsciiString::ExternalAsciiStringReadBlock(
unsigned* remaining,
unsigned* offset_ptr,
unsigned max_chars) {
// Cast const char* to unibrow::byte* (signedness difference).
const unibrow::byte* b =
reinterpret_cast<const unibrow::byte*>(GetChars()) + *offset_ptr;
*remaining = max_chars;
*offset_ptr += max_chars;
return b;
}
void ExternalTwoByteString::ExternalTwoByteStringReadBlockIntoBuffer(
ReadBlockBuffer* rbb,
unsigned* offset_ptr,
unsigned max_chars) {
unsigned chars_read = 0;
unsigned offset = *offset_ptr;
const uint16_t* data = GetChars();
while (chars_read < max_chars) {
uint16_t c = data[offset];
if (c <= kMaxAsciiCharCode) {
// Fast case for ASCII characters. Cursor is an input output argument.
if (!unibrow::CharacterStream::EncodeAsciiCharacter(c,
rbb->util_buffer,
rbb->capacity,
rbb->cursor))
break;
} else {
if (!unibrow::CharacterStream::EncodeNonAsciiCharacter(c,
rbb->util_buffer,
rbb->capacity,
rbb->cursor))
break;
}
offset++;
chars_read++;
}
*offset_ptr = offset;
rbb->remaining += chars_read;
}
void SeqOneByteString::SeqOneByteStringReadBlockIntoBuffer(ReadBlockBuffer* rbb,
unsigned* offset_ptr,
unsigned max_chars) {
unsigned capacity = rbb->capacity - rbb->cursor;
if (max_chars > capacity) max_chars = capacity;
memcpy(rbb->util_buffer + rbb->cursor,
reinterpret_cast<char*>(this) - kHeapObjectTag + kHeaderSize +
*offset_ptr * kCharSize,
max_chars);
rbb->remaining += max_chars;
*offset_ptr += max_chars;
rbb->cursor += max_chars;
}
void ExternalAsciiString::ExternalAsciiStringReadBlockIntoBuffer(
ReadBlockBuffer* rbb,
unsigned* offset_ptr,
unsigned max_chars) {
unsigned capacity = rbb->capacity - rbb->cursor;
if (max_chars > capacity) max_chars = capacity;
memcpy(rbb->util_buffer + rbb->cursor, GetChars() + *offset_ptr, max_chars);
rbb->remaining += max_chars;
*offset_ptr += max_chars;
rbb->cursor += max_chars;
}
// This method determines the type of string involved and then copies
// a whole chunk of characters into a buffer, or returns a pointer to a buffer
// where they can be found. The pointer is not necessarily valid across a GC
// (see AsciiStringReadBlock).
const unibrow::byte* String::ReadBlock(String* input,
ReadBlockBuffer* rbb,
unsigned* offset_ptr,
unsigned max_chars) {
ASSERT(*offset_ptr <= static_cast<unsigned>(input->length()));
if (max_chars == 0) {
rbb->remaining = 0;
return NULL;
}
switch (StringShape(input).representation_tag()) {
case kSeqStringTag:
if (input->IsOneByteRepresentation()) {
SeqOneByteString* str = SeqOneByteString::cast(input);
return str->SeqOneByteStringReadBlock(&rbb->remaining,
offset_ptr,
max_chars);
} else {
SeqTwoByteString* str = SeqTwoByteString::cast(input);
str->SeqTwoByteStringReadBlockIntoBuffer(rbb,
offset_ptr,
max_chars);
return rbb->util_buffer;
}
case kConsStringTag:
return ConsString::cast(input)->ConsStringReadBlock(rbb,
offset_ptr,
max_chars);
case kExternalStringTag:
if (input->IsOneByteRepresentation()) {
return ExternalAsciiString::cast(input)->ExternalAsciiStringReadBlock(
&rbb->remaining,
offset_ptr,
max_chars);
} else {
ExternalTwoByteString::cast(input)->
ExternalTwoByteStringReadBlockIntoBuffer(rbb,
offset_ptr,
max_chars);
return rbb->util_buffer;
}
case kSlicedStringTag:
return SlicedString::cast(input)->SlicedStringReadBlock(rbb,
offset_ptr,
max_chars);
default:
break;
}
UNREACHABLE();
return 0;
}
void Relocatable::PostGarbageCollectionProcessing() {
Isolate* isolate = Isolate::Current();
Relocatable* current = isolate->relocatable_top();
@ -7023,11 +6777,6 @@ void FlatStringReader::PostGarbageCollection() {
}
void StringInputBuffer::Seek(unsigned pos) {
Reset(pos, input_);
}
String* ConsStringIteratorOp::Operate(String* string,
unsigned* offset_out,
int32_t* type_out,
@ -7163,154 +6912,6 @@ String* ConsStringIteratorOp::NextLeaf(bool* blew_stack,
}
// This method determines the type of string involved and then copies
// a whole chunk of characters into a buffer. It can be used with strings
// that have been glued together to form a ConsString and which must cooperate
// to fill up a buffer.
void String::ReadBlockIntoBuffer(String* input,
ReadBlockBuffer* rbb,
unsigned* offset_ptr,
unsigned max_chars) {
ASSERT(*offset_ptr <= (unsigned)input->length());
if (max_chars == 0) return;
switch (StringShape(input).representation_tag()) {
case kSeqStringTag:
if (input->IsOneByteRepresentation()) {
SeqOneByteString::cast(input)->SeqOneByteStringReadBlockIntoBuffer(rbb,
offset_ptr,
max_chars);
return;
} else {
SeqTwoByteString::cast(input)->SeqTwoByteStringReadBlockIntoBuffer(rbb,
offset_ptr,
max_chars);
return;
}
case kConsStringTag:
ConsString::cast(input)->ConsStringReadBlockIntoBuffer(rbb,
offset_ptr,
max_chars);
return;
case kExternalStringTag:
if (input->IsOneByteRepresentation()) {
ExternalAsciiString::cast(input)->
ExternalAsciiStringReadBlockIntoBuffer(rbb, offset_ptr, max_chars);
} else {
ExternalTwoByteString::cast(input)->
ExternalTwoByteStringReadBlockIntoBuffer(rbb,
offset_ptr,
max_chars);
}
return;
case kSlicedStringTag:
SlicedString::cast(input)->SlicedStringReadBlockIntoBuffer(rbb,
offset_ptr,
max_chars);
return;
default:
break;
}
UNREACHABLE();
return;
}
const unibrow::byte* String::ReadBlock(String* input,
unibrow::byte* util_buffer,
unsigned capacity,
unsigned* remaining,
unsigned* offset_ptr) {
ASSERT(*offset_ptr <= (unsigned)input->length());
unsigned chars = input->length() - *offset_ptr;
ReadBlockBuffer rbb(util_buffer, 0, capacity, 0);
const unibrow::byte* answer = ReadBlock(input, &rbb, offset_ptr, chars);
ASSERT(rbb.remaining <= static_cast<unsigned>(input->length()));
*remaining = rbb.remaining;
return answer;
}
const unibrow::byte* String::ReadBlock(String** raw_input,
unibrow::byte* util_buffer,
unsigned capacity,
unsigned* remaining,
unsigned* offset_ptr) {
Handle<String> input(raw_input);
ASSERT(*offset_ptr <= (unsigned)input->length());
unsigned chars = input->length() - *offset_ptr;
if (chars > capacity) chars = capacity;
ReadBlockBuffer rbb(util_buffer, 0, capacity, 0);
ReadBlockIntoBuffer(*input, &rbb, offset_ptr, chars);
ASSERT(rbb.remaining <= static_cast<unsigned>(input->length()));
*remaining = rbb.remaining;
return rbb.util_buffer;
}
// This will iterate unless the block of string data spans two 'halves' of
// a ConsString, in which case it will recurse. Since the block of string
// data to be read has a maximum size this limits the maximum recursion
// depth to something sane. Since C++ does not have tail call recursion
// elimination, the iteration must be explicit.
void ConsString::ConsStringReadBlockIntoBuffer(ReadBlockBuffer* rbb,
unsigned* offset_ptr,
unsigned max_chars) {
ConsString* current = this;
unsigned offset = *offset_ptr;
int offset_correction = 0;
while (true) {
String* left = current->first();
unsigned left_length = (unsigned)left->length();
if (left_length > offset &&
max_chars <= left_length - offset) {
// Left hand side only - iterate unless we have reached the bottom of
// the cons tree.
if (StringShape(left).IsCons()) {
current = ConsString::cast(left);
continue;
} else {
String::ReadBlockIntoBuffer(left, rbb, &offset, max_chars);
*offset_ptr = offset + offset_correction;
return;
}
} else if (left_length <= offset) {
// Right hand side only - iterate unless we have reached the bottom of
// the cons tree.
offset -= left_length;
offset_correction += left_length;
String* right = current->second();
if (StringShape(right).IsCons()) {
current = ConsString::cast(right);
continue;
} else {
String::ReadBlockIntoBuffer(right, rbb, &offset, max_chars);
*offset_ptr = offset + offset_correction;
return;
}
} else {
// The block to be read spans two sides of the ConsString, so we recurse.
// First recurse on the left.
max_chars -= left_length - offset;
String::ReadBlockIntoBuffer(left, rbb, &offset, left_length - offset);
// We may have reached the max or there may not have been enough space
// in the buffer for the characters in the left hand side.
if (offset == left_length) {
// Recurse on the right.
String* right = String::cast(current->second());
offset -= left_length;
offset_correction += left_length;
String::ReadBlockIntoBuffer(right, rbb, &offset, max_chars);
}
*offset_ptr = offset + offset_correction;
return;
}
}
}
uint16_t ConsString::ConsStringGet(int index) {
ASSERT(index >= 0 && index < this->length());
@ -7347,26 +6948,6 @@ uint16_t SlicedString::SlicedStringGet(int index) {
}
const unibrow::byte* SlicedString::SlicedStringReadBlock(
ReadBlockBuffer* buffer, unsigned* offset_ptr, unsigned chars) {
unsigned offset = this->offset();
*offset_ptr += offset;
const unibrow::byte* answer = String::ReadBlock(String::cast(parent()),
buffer, offset_ptr, chars);
*offset_ptr -= offset;
return answer;
}
void SlicedString::SlicedStringReadBlockIntoBuffer(
ReadBlockBuffer* buffer, unsigned* offset_ptr, unsigned chars) {
unsigned offset = this->offset();
*offset_ptr += offset;
String::ReadBlockIntoBuffer(String::cast(parent()),
buffer, offset_ptr, chars);
*offset_ptr -= offset;
}
template <typename sinkchar>
void String::WriteToFlat(String* src,
sinkchar* sink,

View File

@ -7313,18 +7313,6 @@ class String: public HeapObject {
const uc16* GetTwoByteData();
const uc16* GetTwoByteData(unsigned start);
// Support for StringInputBuffer
static const unibrow::byte* ReadBlock(String* input,
unibrow::byte* util_buffer,
unsigned capacity,
unsigned* remaining,
unsigned* offset);
static const unibrow::byte* ReadBlock(String** input,
unibrow::byte* util_buffer,
unsigned capacity,
unsigned* remaining,
unsigned* offset);
// Helper function for flattening strings.
template <typename sinkchar>
static void WriteToFlat(String* source,
@ -7383,33 +7371,6 @@ class String: public HeapObject {
int32_t type,
unsigned length);
protected:
class ReadBlockBuffer {
public:
ReadBlockBuffer(unibrow::byte* util_buffer_,
unsigned cursor_,
unsigned capacity_,
unsigned remaining_) :
util_buffer(util_buffer_),
cursor(cursor_),
capacity(capacity_),
remaining(remaining_) {
}
unibrow::byte* util_buffer;
unsigned cursor;
unsigned capacity;
unsigned remaining;
};
static inline const unibrow::byte* ReadBlock(String* input,
ReadBlockBuffer* buffer,
unsigned* offset,
unsigned max_chars);
static void ReadBlockIntoBuffer(String* input,
ReadBlockBuffer* buffer,
unsigned* offset_ptr,
unsigned max_chars);
private:
// Try to flatten the top level ConsString that is hiding behind this
// string. This is a no-op unless the string is a ConsString. Flatten
@ -7485,14 +7446,6 @@ class SeqOneByteString: public SeqString {
// Q.v. String::kMaxLength which is the maximal size of concatenated strings.
static const int kMaxLength = (kMaxSize - kHeaderSize);
// Support for StringInputBuffer.
inline void SeqOneByteStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
unsigned* offset,
unsigned chars);
inline const unibrow::byte* SeqOneByteStringReadBlock(unsigned* remaining,
unsigned* offset,
unsigned chars);
DECLARE_VERIFIER(SeqOneByteString)
private:
@ -7537,11 +7490,6 @@ class SeqTwoByteString: public SeqString {
// Q.v. String::kMaxLength which is the maximal size of concatenated strings.
static const int kMaxLength = (kMaxSize - kHeaderSize) / sizeof(uint16_t);
// Support for StringInputBuffer.
inline void SeqTwoByteStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
unsigned* offset_ptr,
unsigned chars);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(SeqTwoByteString);
};
@ -7584,14 +7532,6 @@ class ConsString: public String {
static const int kSecondOffset = kFirstOffset + kPointerSize;
static const int kSize = kSecondOffset + kPointerSize;
// Support for StringInputBuffer.
inline const unibrow::byte* ConsStringReadBlock(ReadBlockBuffer* buffer,
unsigned* offset_ptr,
unsigned chars);
inline void ConsStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
unsigned* offset_ptr,
unsigned chars);
// Minimum length for a cons string.
static const int kMinLength = 13;
@ -7636,13 +7576,6 @@ class SlicedString: public String {
static const int kOffsetOffset = kParentOffset + kPointerSize;
static const int kSize = kOffsetOffset + kPointerSize;
// Support for StringInputBuffer
inline const unibrow::byte* SlicedStringReadBlock(ReadBlockBuffer* buffer,
unsigned* offset_ptr,
unsigned chars);
inline void SlicedStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
unsigned* offset_ptr,
unsigned chars);
// Minimum length for a sliced string.
static const int kMinLength = 13;
@ -7719,14 +7652,6 @@ class ExternalAsciiString: public ExternalString {
template<typename StaticVisitor>
inline void ExternalAsciiStringIterateBody();
// Support for StringInputBuffer.
const unibrow::byte* ExternalAsciiStringReadBlock(unsigned* remaining,
unsigned* offset,
unsigned chars);
inline void ExternalAsciiStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
unsigned* offset,
unsigned chars);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalAsciiString);
};
@ -7767,12 +7692,6 @@ class ExternalTwoByteString: public ExternalString {
template<typename StaticVisitor>
inline void ExternalTwoByteStringIterateBody();
// Support for StringInputBuffer.
void ExternalTwoByteStringReadBlockIntoBuffer(ReadBlockBuffer* buffer,
unsigned* offset_ptr,
unsigned chars);
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalTwoByteString);
};
@ -7819,24 +7738,6 @@ class FlatStringReader : public Relocatable {
};
// Note that StringInputBuffers are not valid across a GC! To fix this
// it would have to store a String Handle instead of a String* and
// AsciiStringReadBlock would have to be modified to use memcpy.
//
// StringInputBuffer is able to traverse any string regardless of how
// deeply nested a sequence of ConsStrings it is made of. However,
// performance will be better if deep strings are flattened before they
// are traversed. Since flattening requires memory allocation this is
// not always desirable, however (esp. in debugging situations).
class StringInputBuffer: public unibrow::InputBuffer<String, String*, 1024> {
public:
virtual void Seek(unsigned pos);
inline StringInputBuffer(): unibrow::InputBuffer<String, String*, 1024>() {}
explicit inline StringInputBuffer(String* backing):
unibrow::InputBuffer<String, String*, 1024>(backing) {}
};
// A ConsStringOp that returns null.
// Useful when the operation to apply on a ConsString
// requires an expensive data structure.

View File

@ -430,10 +430,6 @@ class Scanner {
// be empty).
bool ScanRegExpFlags();
// Tells whether the buffer contains an identifier (no escapes).
// Used for checking if a property name is an identifier.
static bool IsIdentifier(unibrow::CharacterStream* buffer);
private:
// The current and look-ahead token.
struct TokenDesc {

View File

@ -137,109 +137,6 @@ unsigned Utf8::Length(uchar c, int previous) {
}
}
uchar CharacterStream::GetNext() {
uchar result = DecodeCharacter(buffer_, &cursor_);
if (remaining_ == 1) {
cursor_ = 0;
FillBuffer();
} else {
remaining_--;
}
ASSERT(BoundsCheck(cursor_));
return result;
}
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define IF_LITTLE(expr) expr
#define IF_BIG(expr) ((void) 0)
#elif __BYTE_ORDER == __BIG_ENDIAN
#define IF_LITTLE(expr) ((void) 0)
#define IF_BIG(expr) expr
#else
#warning Unknown byte ordering
#endif
bool CharacterStream::EncodeAsciiCharacter(uchar c, byte* buffer,
unsigned capacity, unsigned& offset) {
if (offset >= capacity) return false;
buffer[offset] = c;
offset += 1;
return true;
}
bool CharacterStream::EncodeNonAsciiCharacter(uchar c, byte* buffer,
unsigned capacity, unsigned& offset) {
unsigned aligned = (offset + 0x3) & ~0x3;
if ((aligned + sizeof(uchar)) > capacity)
return false;
if (offset == aligned) {
IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = (c << 8) | 0x80);
IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c | (1 << 31));
} else {
buffer[offset] = 0x80;
IF_LITTLE(*reinterpret_cast<uchar*>(buffer + aligned) = c << 8);
IF_BIG(*reinterpret_cast<uchar*>(buffer + aligned) = c);
}
offset = aligned + sizeof(uchar);
return true;
}
bool CharacterStream::EncodeCharacter(uchar c, byte* buffer, unsigned capacity,
unsigned& offset) {
if (c <= Utf8::kMaxOneByteChar) {
return EncodeAsciiCharacter(c, buffer, capacity, offset);
} else {
return EncodeNonAsciiCharacter(c, buffer, capacity, offset);
}
}
uchar CharacterStream::DecodeCharacter(const byte* buffer, unsigned* offset) {
byte b = buffer[*offset];
if (b <= Utf8::kMaxOneByteChar) {
(*offset)++;
return b;
} else {
unsigned aligned = (*offset + 0x3) & ~0x3;
*offset = aligned + sizeof(uchar);
IF_LITTLE(return *reinterpret_cast<const uchar*>(buffer + aligned) >> 8);
IF_BIG(return *reinterpret_cast<const uchar*>(buffer + aligned) &
~(1 << 31));
}
}
#undef IF_LITTLE
#undef IF_BIG
template <class R, class I, unsigned s>
void InputBuffer<R, I, s>::FillBuffer() {
buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);
}
template <class R, class I, unsigned s>
void InputBuffer<R, I, s>::Rewind() {
Reset(input_);
}
template <class R, class I, unsigned s>
void InputBuffer<R, I, s>::Reset(unsigned position, I input) {
input_ = input;
remaining_ = 0;
cursor_ = 0;
offset_ = position;
buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);
}
template <class R, class I, unsigned s>
void InputBuffer<R, I, s>::Reset(I input) {
Reset(0, input);
}
template <class R, class I, unsigned s>
void InputBuffer<R, I, s>::Seek(unsigned position) {
offset_ = position;
buffer_ = R::ReadBlock(input_, util_buffer_, s, &remaining_, &offset_);
}
Utf8DecoderBase::Utf8DecoderBase()
: unbuffered_start_(NULL),
utf16_length_(0),

View File

@ -277,33 +277,6 @@ uchar Utf8::CalculateValue(const byte* str,
}
unsigned CharacterStream::Length() {
unsigned result = 0;
while (has_more()) {
result++;
GetNext();
}
Rewind();
return result;
}
unsigned CharacterStream::Utf16Length() {
unsigned result = 0;
while (has_more()) {
uchar c = GetNext();
result += c > Utf16::kMaxNonSurrogateCharCode ? 2 : 1;
}
Rewind();
return result;
}
void CharacterStream::Seek(unsigned position) {
Rewind();
for (unsigned i = 0; i < position; i++) {
GetNext();
}
}
void Utf8DecoderBase::Reset(uint16_t* buffer,
unsigned buffer_length,
const uint8_t* stream,

View File

@ -100,21 +100,6 @@ class UnicodeData {
static const uchar kMaxCodePoint;
};
// --- U t f 8 a n d 16 ---
template <typename Data>
class Buffer {
public:
inline Buffer(Data data, unsigned length) : data_(data), length_(length) { }
inline Buffer() : data_(0), length_(0) { }
Data data() { return data_; }
unsigned length() { return length_; }
private:
Data data_;
unsigned length_;
};
class Utf16 {
public:
static inline bool IsLeadSurrogate(int code) {
@ -173,72 +158,6 @@ class Utf8 {
unsigned* cursor);
};
// --- C h a r a c t e r S t r e a m ---
class CharacterStream {
public:
inline uchar GetNext();
inline bool has_more() { return remaining_ != 0; }
// Note that default implementation is not efficient.
virtual void Seek(unsigned);
unsigned Length();
unsigned Utf16Length();
virtual ~CharacterStream() { }
static inline bool EncodeCharacter(uchar c, byte* buffer, unsigned capacity,
unsigned& offset);
static inline bool EncodeAsciiCharacter(uchar c, byte* buffer,
unsigned capacity, unsigned& offset);
static inline bool EncodeNonAsciiCharacter(uchar c, byte* buffer,
unsigned capacity, unsigned& offset);
static inline uchar DecodeCharacter(const byte* buffer, unsigned* offset);
virtual void Rewind() = 0;
protected:
virtual void FillBuffer() = 0;
virtual bool BoundsCheck(unsigned offset) = 0;
// The number of characters left in the current buffer
unsigned remaining_;
// The current offset within the buffer
unsigned cursor_;
// The buffer containing the decoded characters.
const byte* buffer_;
};
// --- I n p u t B u f f e r ---
/**
* Provides efficient access to encoded characters in strings. It
* does so by reading characters one block at a time, rather than one
* character at a time, which gives string implementations an
* opportunity to optimize the decoding.
*/
template <class Reader, class Input = Reader*, unsigned kSize = 256>
class InputBuffer : public CharacterStream {
public:
virtual void Rewind();
inline void Reset(Input input);
void Seek(unsigned position);
inline void Reset(unsigned position, Input input);
protected:
InputBuffer() { }
explicit InputBuffer(Input input) { Reset(input); }
virtual void FillBuffer();
virtual bool BoundsCheck(unsigned offset) {
return (buffer_ != util_buffer_) || (offset < kSize);
}
// A custom offset that can be used by the string implementation to
// mark progress within the encoded string.
unsigned offset_;
// The input string
Input input_;
// To avoid heap allocation, we keep an internal buffer to which
// the encoded string can write its characters. The string
// implementation is free to decide whether it wants to use this
// buffer or not.
byte util_buffer_[kSize];
};
class Utf8DecoderBase {
public:

View File

@ -1,7 +1,7 @@
// Copyright 2012 the V8 project authors. All rights reserved.
// Check that we can traverse very deep stacks of ConsStrings using
// StringInputBuffer. Check that Get(int) works on very deep stacks
// StringCharacterStram. Check that Get(int) works on very deep stacks
// of ConsStrings. These operations may not be very fast, but they
// should be possible without getting errors due to too deep recursion.
@ -514,23 +514,16 @@ static Handle<String> ConstructBalanced(
}
static StringInputBuffer buffer;
static ConsStringIteratorOp cons_string_iterator_op_1;
static ConsStringIteratorOp cons_string_iterator_op_2;
static void Traverse(Handle<String> s1, Handle<String> s2) {
int i = 0;
buffer.Reset(*s1);
StringCharacterStream character_stream_1(*s1, &cons_string_iterator_op_1);
StringCharacterStream character_stream_2(*s2, &cons_string_iterator_op_2);
StringInputBuffer buffer2(*s2);
while (buffer.has_more()) {
CHECK(buffer2.has_more());
CHECK(character_stream_1.HasMore());
while (character_stream_1.HasMore()) {
CHECK(character_stream_2.HasMore());
uint16_t c = buffer.GetNext();
CHECK_EQ(c, buffer2.GetNext());
CHECK_EQ(c, character_stream_1.GetNext());
uint16_t c = character_stream_1.GetNext();
CHECK_EQ(c, character_stream_2.GetNext());
i++;
}
@ -543,17 +536,11 @@ static void Traverse(Handle<String> s1, Handle<String> s2) {
static void TraverseFirst(Handle<String> s1, Handle<String> s2, int chars) {
int i = 0;
buffer.Reset(*s1);
StringInputBuffer buffer2(*s2);
StringCharacterStream character_stream_1(*s1, &cons_string_iterator_op_1);
StringCharacterStream character_stream_2(*s2, &cons_string_iterator_op_2);
while (buffer.has_more() && i < chars) {
CHECK(buffer2.has_more());
CHECK(character_stream_1.HasMore());
while (character_stream_1.HasMore() && i < chars) {
CHECK(character_stream_2.HasMore());
uint16_t c = buffer.GetNext();
CHECK_EQ(c, buffer2.GetNext());
CHECK_EQ(c, character_stream_1.GetNext());
uint16_t c = character_stream_1.GetNext();
CHECK_EQ(c, character_stream_2.GetNext());
i++;
}