Add StringBufferStream
add a class StringBufferStream which will replace StringInputBuffer and SafeStringInputBuffer and requires no ascii/two byte encoding scheme R=yangguo@chromium.org BUG= Review URL: https://chromiumcodereview.appspot.com/11428106 Patch from Dan Carney <dcarney@google.com>. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13147 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
424a492dde
commit
a2d0b05a11
@ -2512,6 +2512,77 @@ String* String::GetUnderlying() {
|
||||
}
|
||||
|
||||
|
||||
template<class Visitor, class ConsOp>
|
||||
void String::Visit(
|
||||
String* string,
|
||||
unsigned offset,
|
||||
Visitor& visitor,
|
||||
ConsOp& consOp,
|
||||
int32_t type,
|
||||
unsigned length) {
|
||||
|
||||
ASSERT(length == static_cast<unsigned>(string->length()));
|
||||
ASSERT(offset <= length);
|
||||
|
||||
unsigned sliceOffset = offset;
|
||||
while (true) {
|
||||
ASSERT(type == string->map()->instance_type());
|
||||
|
||||
switch (type & (kStringRepresentationMask | kStringEncodingMask)) {
|
||||
case kSeqStringTag | kOneByteStringTag:
|
||||
visitor.VisitOneByteString(
|
||||
reinterpret_cast<const uint8_t*>(
|
||||
SeqOneByteString::cast(string)->GetChars()) + sliceOffset,
|
||||
length - offset);
|
||||
return;
|
||||
|
||||
case kSeqStringTag | kTwoByteStringTag:
|
||||
visitor.VisitTwoByteString(
|
||||
reinterpret_cast<const uint16_t*>(
|
||||
SeqTwoByteString::cast(string)->GetChars()) + sliceOffset,
|
||||
length - offset);
|
||||
return;
|
||||
|
||||
case kExternalStringTag | kOneByteStringTag:
|
||||
visitor.VisitOneByteString(
|
||||
reinterpret_cast<const uint8_t*>(
|
||||
ExternalAsciiString::cast(string)->GetChars()) + sliceOffset,
|
||||
length - offset);
|
||||
return;
|
||||
|
||||
case kExternalStringTag | kTwoByteStringTag:
|
||||
visitor.VisitTwoByteString(
|
||||
reinterpret_cast<const uint16_t*>(
|
||||
ExternalTwoByteString::cast(string)->GetChars()) + sliceOffset,
|
||||
length - offset);
|
||||
return;
|
||||
|
||||
case kSlicedStringTag | kOneByteStringTag:
|
||||
case kSlicedStringTag | kTwoByteStringTag: {
|
||||
SlicedString* slicedString = SlicedString::cast(string);
|
||||
sliceOffset += slicedString->offset();
|
||||
string = slicedString->parent();
|
||||
type = string->map()->instance_type();
|
||||
continue;
|
||||
}
|
||||
|
||||
case kConsStringTag | kOneByteStringTag:
|
||||
case kConsStringTag | kTwoByteStringTag:
|
||||
string = consOp.Operate(ConsString::cast(string), &offset, &type,
|
||||
&length);
|
||||
if (string == NULL) return;
|
||||
sliceOffset = offset;
|
||||
ASSERT(length == static_cast<unsigned>(string->length()));
|
||||
continue;
|
||||
|
||||
default:
|
||||
UNREACHABLE();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
uint16_t SeqOneByteString::SeqOneByteStringGet(int index) {
|
||||
ASSERT(index >= 0 && index < length());
|
||||
return READ_BYTE_FIELD(this, kHeaderSize + index * kCharSize);
|
||||
@ -2690,6 +2761,146 @@ const uint16_t* ExternalTwoByteString::ExternalTwoByteStringGetData(
|
||||
}
|
||||
|
||||
|
||||
unsigned ConsStringIteratorOp::OffsetForDepth(unsigned depth) {
|
||||
return depth & kDepthMask;
|
||||
}
|
||||
|
||||
|
||||
uint32_t ConsStringIteratorOp::MaskForDepth(unsigned depth) {
|
||||
return 1 << OffsetForDepth(depth);
|
||||
}
|
||||
|
||||
|
||||
void ConsStringIteratorOp::SetRightDescent() {
|
||||
trace_ |= MaskForDepth(depth_ - 1);
|
||||
}
|
||||
|
||||
|
||||
void ConsStringIteratorOp::ClearRightDescent() {
|
||||
trace_ &= ~MaskForDepth(depth_ - 1);
|
||||
}
|
||||
|
||||
|
||||
void ConsStringIteratorOp::PushLeft(ConsString* string) {
|
||||
frames_[depth_++ & kDepthMask] = string;
|
||||
}
|
||||
|
||||
|
||||
void ConsStringIteratorOp::PushRight(ConsString* string, int32_t type) {
|
||||
// Inplace update
|
||||
frames_[(depth_-1) & kDepthMask] = string;
|
||||
if (depth_ != 1) return;
|
||||
// Optimization: can replace root in this case.
|
||||
root_ = string;
|
||||
root_type_ = type;
|
||||
root_length_ = string->length();
|
||||
}
|
||||
|
||||
|
||||
void ConsStringIteratorOp::AdjustMaximumDepth() {
|
||||
if (depth_ > maximum_depth_) maximum_depth_ = depth_;
|
||||
}
|
||||
|
||||
|
||||
void ConsStringIteratorOp::Pop() {
|
||||
ASSERT(depth_ > 0);
|
||||
ASSERT(depth_ <= maximum_depth_);
|
||||
depth_--;
|
||||
}
|
||||
|
||||
|
||||
void ConsStringIteratorOp::Reset() {
|
||||
consumed_ = 0;
|
||||
ResetStack();
|
||||
}
|
||||
|
||||
|
||||
bool ConsStringIteratorOp::HasMore() {
|
||||
return depth_ != 0;
|
||||
}
|
||||
|
||||
|
||||
void ConsStringIteratorOp::ResetStack() {
|
||||
depth_ = 0;
|
||||
maximum_depth_ = 0;
|
||||
}
|
||||
|
||||
|
||||
bool ConsStringIteratorOp::ContinueOperation(ContinueResponse* response) {
|
||||
bool blewStack;
|
||||
int32_t type;
|
||||
String* string = NextLeaf(&blewStack, &type);
|
||||
// String found.
|
||||
if (string != NULL) {
|
||||
unsigned length = string->length();
|
||||
consumed_ += length;
|
||||
response->string_ = string;
|
||||
response->offset_ = 0;
|
||||
response->length_ = length;
|
||||
response->type_ = type;
|
||||
return true;
|
||||
}
|
||||
// Traversal complete.
|
||||
if (!blewStack) return false;
|
||||
// Restart search.
|
||||
ResetStack();
|
||||
response->string_ = root_;
|
||||
response->offset_ = consumed_;
|
||||
response->length_ = root_length_;
|
||||
response->type_ = root_type_;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
uint16_t StringCharacterStream::GetNext() {
|
||||
ASSERT(buffer8_ != NULL);
|
||||
return is_one_byte_ ? *buffer8_++ : *buffer16_++;
|
||||
}
|
||||
|
||||
|
||||
StringCharacterStream::StringCharacterStream(
|
||||
String* string, unsigned offset, ConsStringIteratorOp* op)
|
||||
: is_one_byte_(true),
|
||||
buffer8_(NULL),
|
||||
end_(NULL),
|
||||
op_(op) {
|
||||
op->Reset();
|
||||
String::Visit(string,
|
||||
offset, *this, *op, string->map()->instance_type(), string->length());
|
||||
}
|
||||
|
||||
|
||||
bool StringCharacterStream::HasMore() {
|
||||
if (buffer8_ != end_) return true;
|
||||
if (!op_->HasMore()) return false;
|
||||
ConsStringIteratorOp::ContinueResponse response;
|
||||
// This has been checked above
|
||||
if (!op_->ContinueOperation(&response)) {
|
||||
UNREACHABLE();
|
||||
return false;
|
||||
}
|
||||
String::Visit(response.string_,
|
||||
response.offset_, *this, *op_, response.type_, response.length_);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void StringCharacterStream::VisitOneByteString(
|
||||
const uint8_t* chars, unsigned length) {
|
||||
is_one_byte_ = true;
|
||||
buffer8_ = chars;
|
||||
end_ = chars + length;
|
||||
}
|
||||
|
||||
|
||||
void StringCharacterStream::VisitTwoByteString(
|
||||
const uint16_t* chars, unsigned length) {
|
||||
is_one_byte_ = false;
|
||||
buffer16_ = chars;
|
||||
end_ = reinterpret_cast<const uint8_t*>(chars + length);
|
||||
}
|
||||
|
||||
|
||||
void JSFunctionResultCache::MakeZeroSize() {
|
||||
set_finger_index(kEntriesIndex);
|
||||
set_size(kEntriesIndex);
|
||||
|
125
src/objects.cc
125
src/objects.cc
@ -7016,6 +7016,131 @@ void StringInputBuffer::Seek(unsigned pos) {
|
||||
}
|
||||
|
||||
|
||||
String* ConsStringIteratorOp::Operate(ConsString* consString,
|
||||
unsigned* outerOffset, int32_t* typeOut, unsigned* lengthOut) {
|
||||
ASSERT(*lengthOut == (unsigned)consString->length());
|
||||
// Push the root string.
|
||||
PushLeft(consString);
|
||||
root_ = consString;
|
||||
root_type_ = *typeOut;
|
||||
root_length_ = *lengthOut;
|
||||
unsigned targetOffset = *outerOffset;
|
||||
unsigned offset = 0;
|
||||
while (true) {
|
||||
// Loop until the string is found which contains the target offset.
|
||||
String* string = consString->first();
|
||||
unsigned length = string->length();
|
||||
int32_t type;
|
||||
if (targetOffset < offset + length) {
|
||||
// Target offset is in the left branch.
|
||||
// Mark the descent.
|
||||
ClearRightDescent();
|
||||
// Keep going if we're still in a ConString.
|
||||
type = string->map()->instance_type();
|
||||
if ((type & kStringRepresentationMask) == kConsStringTag) {
|
||||
consString = ConsString::cast(string);
|
||||
PushLeft(consString);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// Descend right.
|
||||
// Update progress through the string.
|
||||
offset += length;
|
||||
// Keep going if we're still in a ConString.
|
||||
string = consString->second();
|
||||
type = string->map()->instance_type();
|
||||
if ((type & kStringRepresentationMask) == kConsStringTag) {
|
||||
consString = ConsString::cast(string);
|
||||
PushRight(consString, type);
|
||||
continue;
|
||||
}
|
||||
// Mark the descent.
|
||||
SetRightDescent();
|
||||
// Need this to be updated for the current string.
|
||||
length = string->length();
|
||||
// Account for the possibility of an empty right leaf.
|
||||
while (length == 0) {
|
||||
bool blewStack;
|
||||
// Need to adjust maximum depth for NextLeaf to work.
|
||||
AdjustMaximumDepth();
|
||||
string = NextLeaf(&blewStack, &type);
|
||||
if (string == NULL) {
|
||||
// Luckily, this case is impossible.
|
||||
ASSERT(!blewStack);
|
||||
return NULL;
|
||||
}
|
||||
length = string->length();
|
||||
}
|
||||
}
|
||||
// Tell the stack we're done decending.
|
||||
AdjustMaximumDepth();
|
||||
ASSERT(length != 0);
|
||||
// Adjust return values and exit.
|
||||
unsigned innerOffset = targetOffset - offset;
|
||||
consumed_ += length - innerOffset;
|
||||
*outerOffset = innerOffset;
|
||||
*typeOut = type;
|
||||
*lengthOut = length;
|
||||
return string;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
String* ConsStringIteratorOp::NextLeaf(bool* blewStack, int32_t* typeOut) {
|
||||
while (true) {
|
||||
// Tree traversal complete.
|
||||
if (depth_ == 0) {
|
||||
*blewStack = false;
|
||||
return NULL;
|
||||
}
|
||||
// We've lost track of higher nodes.
|
||||
if (maximum_depth_ - depth_ == kStackSize) {
|
||||
*blewStack = true;
|
||||
return NULL;
|
||||
}
|
||||
// Check if we're done with this level.
|
||||
bool haveAlreadyReadRight = trace_ & MaskForDepth(depth_ - 1);
|
||||
if (haveAlreadyReadRight) {
|
||||
Pop();
|
||||
continue;
|
||||
}
|
||||
// Go right.
|
||||
ConsString* consString = frames_[OffsetForDepth(depth_ - 1)];
|
||||
String* string = consString->second();
|
||||
int32_t type = string->map()->instance_type();
|
||||
if ((type & kStringRepresentationMask) != kConsStringTag) {
|
||||
// Don't need to mark the descent here.
|
||||
// Pop stack so next iteration is in correct place.
|
||||
Pop();
|
||||
*typeOut = type;
|
||||
return string;
|
||||
}
|
||||
// No need to mark the descent.
|
||||
consString = ConsString::cast(string);
|
||||
PushRight(consString, type);
|
||||
// Need to traverse all the way left.
|
||||
while (true) {
|
||||
// Continue left.
|
||||
// Update marker.
|
||||
ClearRightDescent();
|
||||
string = consString->first();
|
||||
type = string->map()->instance_type();
|
||||
if ((type & kStringRepresentationMask) != kConsStringTag) {
|
||||
AdjustMaximumDepth();
|
||||
*typeOut = type;
|
||||
return string;
|
||||
}
|
||||
consString = ConsString::cast(string);
|
||||
PushLeft(consString);
|
||||
}
|
||||
}
|
||||
UNREACHABLE();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
// This method determines the type of string involved and then copies
|
||||
// a whole chunk of characters into a buffer. It can be used with strings
|
||||
// that have been glued together to form a ConsString and which must cooperate
|
||||
|
@ -7513,6 +7513,14 @@ class String: public HeapObject {
|
||||
return NonAsciiStart(chars, length) >= length;
|
||||
}
|
||||
|
||||
template<class Visitor, class ConsOp>
|
||||
static inline void Visit(String* string,
|
||||
unsigned offset,
|
||||
Visitor& visitor,
|
||||
ConsOp& consOp,
|
||||
int32_t type,
|
||||
unsigned length);
|
||||
|
||||
protected:
|
||||
class ReadBlockBuffer {
|
||||
public:
|
||||
@ -7967,6 +7975,78 @@ class StringInputBuffer: public unibrow::InputBuffer<String, String*, 1024> {
|
||||
};
|
||||
|
||||
|
||||
// This maintains an off-stack representation of the stack frames required
|
||||
// to traverse a ConsString, allowing an entirely iterative and restartable
|
||||
// traversal of the entire string
|
||||
// Note: this class is not GC-safe.
|
||||
class ConsStringIteratorOp {
|
||||
public:
|
||||
struct ContinueResponse {
|
||||
String* string_;
|
||||
unsigned offset_;
|
||||
unsigned length_;
|
||||
int32_t type_;
|
||||
};
|
||||
inline ConsStringIteratorOp() {}
|
||||
String* Operate(ConsString* consString, unsigned* outerOffset,
|
||||
int32_t* typeOut, unsigned* lengthOut);
|
||||
inline bool ContinueOperation(ContinueResponse* response);
|
||||
inline void Reset();
|
||||
inline bool HasMore();
|
||||
|
||||
private:
|
||||
// TODO(dcarney): Templatize this out for different stack sizes.
|
||||
static const unsigned kStackSize = 32;
|
||||
// Use a mask instead of doing modulo operations for stack wrapping.
|
||||
static const unsigned kDepthMask = kStackSize-1;
|
||||
STATIC_ASSERT(IS_POWER_OF_TWO(kStackSize));
|
||||
static inline unsigned OffsetForDepth(unsigned depth);
|
||||
static inline uint32_t MaskForDepth(unsigned depth);
|
||||
|
||||
inline void ClearRightDescent();
|
||||
inline void SetRightDescent();
|
||||
inline void PushLeft(ConsString* string);
|
||||
inline void PushRight(ConsString* string, int32_t type);
|
||||
inline void AdjustMaximumDepth();
|
||||
inline void Pop();
|
||||
inline void ResetStack();
|
||||
String* NextLeaf(bool* blewStack, int32_t* typeOut);
|
||||
|
||||
unsigned depth_;
|
||||
unsigned maximum_depth_;
|
||||
uint32_t trace_;
|
||||
ConsString* frames_[kStackSize];
|
||||
unsigned consumed_;
|
||||
ConsString* root_;
|
||||
int32_t root_type_;
|
||||
unsigned root_length_;
|
||||
DISALLOW_COPY_AND_ASSIGN(ConsStringIteratorOp);
|
||||
};
|
||||
|
||||
|
||||
// Note: this class is not GC-safe.
|
||||
class StringCharacterStream {
|
||||
public:
|
||||
inline StringCharacterStream(
|
||||
String* string, unsigned offset, ConsStringIteratorOp* op);
|
||||
inline uint16_t GetNext();
|
||||
inline bool HasMore();
|
||||
inline void Reset(String* string, unsigned offset, ConsStringIteratorOp* op);
|
||||
inline void VisitOneByteString(const uint8_t* chars, unsigned length);
|
||||
inline void VisitTwoByteString(const uint16_t* chars, unsigned length);
|
||||
|
||||
private:
|
||||
bool is_one_byte_;
|
||||
union {
|
||||
const uint8_t* buffer8_;
|
||||
const uint16_t* buffer16_;
|
||||
};
|
||||
const uint8_t* end_;
|
||||
ConsStringIteratorOp* op_;
|
||||
DISALLOW_COPY_AND_ASSIGN(StringCharacterStream);
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
class VectorIterator {
|
||||
public:
|
||||
|
Loading…
Reference in New Issue
Block a user