Add StringBufferStream

add a class StringBufferStream which will replace
StringInputBuffer and SafeStringInputBuffer and requires no
ascii/two byte encoding scheme

R=yangguo@chromium.org
BUG=

Review URL: https://chromiumcodereview.appspot.com/11428106
Patch from Dan Carney <dcarney@google.com>.

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13147 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
yangguo@chromium.org 2012-12-06 11:49:15 +00:00
parent 424a492dde
commit a2d0b05a11
3 changed files with 416 additions and 0 deletions

View File

@ -2512,6 +2512,77 @@ String* String::GetUnderlying() {
}
template<class Visitor, class ConsOp>
void String::Visit(
String* string,
unsigned offset,
Visitor& visitor,
ConsOp& consOp,
int32_t type,
unsigned length) {
ASSERT(length == static_cast<unsigned>(string->length()));
ASSERT(offset <= length);
unsigned sliceOffset = offset;
while (true) {
ASSERT(type == string->map()->instance_type());
switch (type & (kStringRepresentationMask | kStringEncodingMask)) {
case kSeqStringTag | kOneByteStringTag:
visitor.VisitOneByteString(
reinterpret_cast<const uint8_t*>(
SeqOneByteString::cast(string)->GetChars()) + sliceOffset,
length - offset);
return;
case kSeqStringTag | kTwoByteStringTag:
visitor.VisitTwoByteString(
reinterpret_cast<const uint16_t*>(
SeqTwoByteString::cast(string)->GetChars()) + sliceOffset,
length - offset);
return;
case kExternalStringTag | kOneByteStringTag:
visitor.VisitOneByteString(
reinterpret_cast<const uint8_t*>(
ExternalAsciiString::cast(string)->GetChars()) + sliceOffset,
length - offset);
return;
case kExternalStringTag | kTwoByteStringTag:
visitor.VisitTwoByteString(
reinterpret_cast<const uint16_t*>(
ExternalTwoByteString::cast(string)->GetChars()) + sliceOffset,
length - offset);
return;
case kSlicedStringTag | kOneByteStringTag:
case kSlicedStringTag | kTwoByteStringTag: {
SlicedString* slicedString = SlicedString::cast(string);
sliceOffset += slicedString->offset();
string = slicedString->parent();
type = string->map()->instance_type();
continue;
}
case kConsStringTag | kOneByteStringTag:
case kConsStringTag | kTwoByteStringTag:
string = consOp.Operate(ConsString::cast(string), &offset, &type,
&length);
if (string == NULL) return;
sliceOffset = offset;
ASSERT(length == static_cast<unsigned>(string->length()));
continue;
default:
UNREACHABLE();
return;
}
}
}
uint16_t SeqOneByteString::SeqOneByteStringGet(int index) {
ASSERT(index >= 0 && index < length());
return READ_BYTE_FIELD(this, kHeaderSize + index * kCharSize);
@ -2690,6 +2761,146 @@ const uint16_t* ExternalTwoByteString::ExternalTwoByteStringGetData(
}
unsigned ConsStringIteratorOp::OffsetForDepth(unsigned depth) {
return depth & kDepthMask;
}
uint32_t ConsStringIteratorOp::MaskForDepth(unsigned depth) {
return 1 << OffsetForDepth(depth);
}
void ConsStringIteratorOp::SetRightDescent() {
trace_ |= MaskForDepth(depth_ - 1);
}
void ConsStringIteratorOp::ClearRightDescent() {
trace_ &= ~MaskForDepth(depth_ - 1);
}
void ConsStringIteratorOp::PushLeft(ConsString* string) {
frames_[depth_++ & kDepthMask] = string;
}
void ConsStringIteratorOp::PushRight(ConsString* string, int32_t type) {
// Inplace update
frames_[(depth_-1) & kDepthMask] = string;
if (depth_ != 1) return;
// Optimization: can replace root in this case.
root_ = string;
root_type_ = type;
root_length_ = string->length();
}
void ConsStringIteratorOp::AdjustMaximumDepth() {
if (depth_ > maximum_depth_) maximum_depth_ = depth_;
}
void ConsStringIteratorOp::Pop() {
ASSERT(depth_ > 0);
ASSERT(depth_ <= maximum_depth_);
depth_--;
}
void ConsStringIteratorOp::Reset() {
consumed_ = 0;
ResetStack();
}
bool ConsStringIteratorOp::HasMore() {
return depth_ != 0;
}
void ConsStringIteratorOp::ResetStack() {
depth_ = 0;
maximum_depth_ = 0;
}
bool ConsStringIteratorOp::ContinueOperation(ContinueResponse* response) {
bool blewStack;
int32_t type;
String* string = NextLeaf(&blewStack, &type);
// String found.
if (string != NULL) {
unsigned length = string->length();
consumed_ += length;
response->string_ = string;
response->offset_ = 0;
response->length_ = length;
response->type_ = type;
return true;
}
// Traversal complete.
if (!blewStack) return false;
// Restart search.
ResetStack();
response->string_ = root_;
response->offset_ = consumed_;
response->length_ = root_length_;
response->type_ = root_type_;
return true;
}
uint16_t StringCharacterStream::GetNext() {
ASSERT(buffer8_ != NULL);
return is_one_byte_ ? *buffer8_++ : *buffer16_++;
}
StringCharacterStream::StringCharacterStream(
String* string, unsigned offset, ConsStringIteratorOp* op)
: is_one_byte_(true),
buffer8_(NULL),
end_(NULL),
op_(op) {
op->Reset();
String::Visit(string,
offset, *this, *op, string->map()->instance_type(), string->length());
}
bool StringCharacterStream::HasMore() {
if (buffer8_ != end_) return true;
if (!op_->HasMore()) return false;
ConsStringIteratorOp::ContinueResponse response;
// This has been checked above
if (!op_->ContinueOperation(&response)) {
UNREACHABLE();
return false;
}
String::Visit(response.string_,
response.offset_, *this, *op_, response.type_, response.length_);
return true;
}
void StringCharacterStream::VisitOneByteString(
const uint8_t* chars, unsigned length) {
is_one_byte_ = true;
buffer8_ = chars;
end_ = chars + length;
}
void StringCharacterStream::VisitTwoByteString(
const uint16_t* chars, unsigned length) {
is_one_byte_ = false;
buffer16_ = chars;
end_ = reinterpret_cast<const uint8_t*>(chars + length);
}
void JSFunctionResultCache::MakeZeroSize() {
set_finger_index(kEntriesIndex);
set_size(kEntriesIndex);

View File

@ -7016,6 +7016,131 @@ void StringInputBuffer::Seek(unsigned pos) {
}
String* ConsStringIteratorOp::Operate(ConsString* consString,
unsigned* outerOffset, int32_t* typeOut, unsigned* lengthOut) {
ASSERT(*lengthOut == (unsigned)consString->length());
// Push the root string.
PushLeft(consString);
root_ = consString;
root_type_ = *typeOut;
root_length_ = *lengthOut;
unsigned targetOffset = *outerOffset;
unsigned offset = 0;
while (true) {
// Loop until the string is found which contains the target offset.
String* string = consString->first();
unsigned length = string->length();
int32_t type;
if (targetOffset < offset + length) {
// Target offset is in the left branch.
// Mark the descent.
ClearRightDescent();
// Keep going if we're still in a ConString.
type = string->map()->instance_type();
if ((type & kStringRepresentationMask) == kConsStringTag) {
consString = ConsString::cast(string);
PushLeft(consString);
continue;
}
} else {
// Descend right.
// Update progress through the string.
offset += length;
// Keep going if we're still in a ConString.
string = consString->second();
type = string->map()->instance_type();
if ((type & kStringRepresentationMask) == kConsStringTag) {
consString = ConsString::cast(string);
PushRight(consString, type);
continue;
}
// Mark the descent.
SetRightDescent();
// Need this to be updated for the current string.
length = string->length();
// Account for the possibility of an empty right leaf.
while (length == 0) {
bool blewStack;
// Need to adjust maximum depth for NextLeaf to work.
AdjustMaximumDepth();
string = NextLeaf(&blewStack, &type);
if (string == NULL) {
// Luckily, this case is impossible.
ASSERT(!blewStack);
return NULL;
}
length = string->length();
}
}
// Tell the stack we're done decending.
AdjustMaximumDepth();
ASSERT(length != 0);
// Adjust return values and exit.
unsigned innerOffset = targetOffset - offset;
consumed_ += length - innerOffset;
*outerOffset = innerOffset;
*typeOut = type;
*lengthOut = length;
return string;
}
UNREACHABLE();
return NULL;
}
String* ConsStringIteratorOp::NextLeaf(bool* blewStack, int32_t* typeOut) {
while (true) {
// Tree traversal complete.
if (depth_ == 0) {
*blewStack = false;
return NULL;
}
// We've lost track of higher nodes.
if (maximum_depth_ - depth_ == kStackSize) {
*blewStack = true;
return NULL;
}
// Check if we're done with this level.
bool haveAlreadyReadRight = trace_ & MaskForDepth(depth_ - 1);
if (haveAlreadyReadRight) {
Pop();
continue;
}
// Go right.
ConsString* consString = frames_[OffsetForDepth(depth_ - 1)];
String* string = consString->second();
int32_t type = string->map()->instance_type();
if ((type & kStringRepresentationMask) != kConsStringTag) {
// Don't need to mark the descent here.
// Pop stack so next iteration is in correct place.
Pop();
*typeOut = type;
return string;
}
// No need to mark the descent.
consString = ConsString::cast(string);
PushRight(consString, type);
// Need to traverse all the way left.
while (true) {
// Continue left.
// Update marker.
ClearRightDescent();
string = consString->first();
type = string->map()->instance_type();
if ((type & kStringRepresentationMask) != kConsStringTag) {
AdjustMaximumDepth();
*typeOut = type;
return string;
}
consString = ConsString::cast(string);
PushLeft(consString);
}
}
UNREACHABLE();
return NULL;
}
// This method determines the type of string involved and then copies
// a whole chunk of characters into a buffer. It can be used with strings
// that have been glued together to form a ConsString and which must cooperate

View File

@ -7513,6 +7513,14 @@ class String: public HeapObject {
return NonAsciiStart(chars, length) >= length;
}
template<class Visitor, class ConsOp>
static inline void Visit(String* string,
unsigned offset,
Visitor& visitor,
ConsOp& consOp,
int32_t type,
unsigned length);
protected:
class ReadBlockBuffer {
public:
@ -7967,6 +7975,78 @@ class StringInputBuffer: public unibrow::InputBuffer<String, String*, 1024> {
};
// This maintains an off-stack representation of the stack frames required
// to traverse a ConsString, allowing an entirely iterative and restartable
// traversal of the entire string
// Note: this class is not GC-safe.
class ConsStringIteratorOp {
public:
struct ContinueResponse {
String* string_;
unsigned offset_;
unsigned length_;
int32_t type_;
};
inline ConsStringIteratorOp() {}
String* Operate(ConsString* consString, unsigned* outerOffset,
int32_t* typeOut, unsigned* lengthOut);
inline bool ContinueOperation(ContinueResponse* response);
inline void Reset();
inline bool HasMore();
private:
// TODO(dcarney): Templatize this out for different stack sizes.
static const unsigned kStackSize = 32;
// Use a mask instead of doing modulo operations for stack wrapping.
static const unsigned kDepthMask = kStackSize-1;
STATIC_ASSERT(IS_POWER_OF_TWO(kStackSize));
static inline unsigned OffsetForDepth(unsigned depth);
static inline uint32_t MaskForDepth(unsigned depth);
inline void ClearRightDescent();
inline void SetRightDescent();
inline void PushLeft(ConsString* string);
inline void PushRight(ConsString* string, int32_t type);
inline void AdjustMaximumDepth();
inline void Pop();
inline void ResetStack();
String* NextLeaf(bool* blewStack, int32_t* typeOut);
unsigned depth_;
unsigned maximum_depth_;
uint32_t trace_;
ConsString* frames_[kStackSize];
unsigned consumed_;
ConsString* root_;
int32_t root_type_;
unsigned root_length_;
DISALLOW_COPY_AND_ASSIGN(ConsStringIteratorOp);
};
// Note: this class is not GC-safe.
class StringCharacterStream {
public:
inline StringCharacterStream(
String* string, unsigned offset, ConsStringIteratorOp* op);
inline uint16_t GetNext();
inline bool HasMore();
inline void Reset(String* string, unsigned offset, ConsStringIteratorOp* op);
inline void VisitOneByteString(const uint8_t* chars, unsigned length);
inline void VisitTwoByteString(const uint16_t* chars, unsigned length);
private:
bool is_one_byte_;
union {
const uint8_t* buffer8_;
const uint16_t* buffer16_;
};
const uint8_t* end_;
ConsStringIteratorOp* op_;
DISALLOW_COPY_AND_ASSIGN(StringCharacterStream);
};
template <typename T>
class VectorIterator {
public: