Streamline the scanner for external two byte string input.
Review URL: http://codereview.chromium.org/165403 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@2703 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
33fb11c12f
commit
1c70072f7f
@ -1046,7 +1046,7 @@ void ObjectTemplate::SetInternalFieldCount(int value) {
|
||||
|
||||
ScriptData* ScriptData::PreCompile(const char* input, int length) {
|
||||
unibrow::Utf8InputBuffer<> buf(input, length);
|
||||
return i::PreParse(&buf, NULL);
|
||||
return i::PreParse(i::Handle<i::String>(), &buf, NULL);
|
||||
}
|
||||
|
||||
|
||||
|
@ -266,7 +266,7 @@ Handle<JSFunction> Compiler::Compile(Handle<String> source,
|
||||
if (pre_data == NULL && source_length >= FLAG_min_preparse_length) {
|
||||
Access<SafeStringInputBuffer> buf(&safe_string_input_buffer);
|
||||
buf->Reset(source.location());
|
||||
pre_data = PreParse(buf.value(), extension);
|
||||
pre_data = PreParse(source, buf.value(), extension);
|
||||
}
|
||||
|
||||
// Create a script object describing the script to be compiled.
|
||||
|
@ -87,8 +87,10 @@ Handle<String> Factory::NewStringFromUtf8(Vector<const char> string,
|
||||
}
|
||||
|
||||
|
||||
Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string) {
|
||||
CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string), String);
|
||||
Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string,
|
||||
PretenureFlag pretenure) {
|
||||
CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string, pretenure),
|
||||
String);
|
||||
}
|
||||
|
||||
|
||||
|
@ -92,7 +92,8 @@ class Factory : public AllStatic {
|
||||
Vector<const char> str,
|
||||
PretenureFlag pretenure = NOT_TENURED);
|
||||
|
||||
static Handle<String> NewStringFromTwoByte(Vector<const uc16> str);
|
||||
static Handle<String> NewStringFromTwoByte(Vector<const uc16> str,
|
||||
PretenureFlag pretenure = NOT_TENURED);
|
||||
|
||||
// Allocates and partially initializes a TwoByte String. The characters of
|
||||
// the string are uninitialized. Currently used in regexp code only, where
|
||||
|
@ -97,7 +97,7 @@ class Parser {
|
||||
|
||||
// Pre-parse the program from the character stream; returns true on
|
||||
// success, false if a stack-overflow happened during parsing.
|
||||
bool PreParseProgram(unibrow::CharacterStream* stream);
|
||||
bool PreParseProgram(Handle<String> source, unibrow::CharacterStream* stream);
|
||||
|
||||
void ReportMessage(const char* message, Vector<const char*> args);
|
||||
virtual void ReportMessageAt(Scanner::Location loc,
|
||||
@ -1167,13 +1167,14 @@ Parser::Parser(Handle<Script> script,
|
||||
}
|
||||
|
||||
|
||||
bool Parser::PreParseProgram(unibrow::CharacterStream* stream) {
|
||||
bool Parser::PreParseProgram(Handle<String> source,
|
||||
unibrow::CharacterStream* stream) {
|
||||
HistogramTimerScope timer(&Counters::pre_parse);
|
||||
StackGuard guard;
|
||||
AssertNoZoneAllocation assert_no_zone_allocation;
|
||||
AssertNoAllocation assert_no_allocation;
|
||||
NoHandleAllocation no_handle_allocation;
|
||||
scanner_.Init(Handle<String>(), stream, 0);
|
||||
scanner_.Init(source, stream, 0);
|
||||
ASSERT(target_stack_ == NULL);
|
||||
mode_ = PARSE_EAGERLY;
|
||||
DummyScope top_scope;
|
||||
@ -4593,7 +4594,8 @@ unsigned* ScriptDataImpl::Data() {
|
||||
}
|
||||
|
||||
|
||||
ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
|
||||
ScriptDataImpl* PreParse(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
v8::Extension* extension) {
|
||||
Handle<Script> no_script;
|
||||
bool allow_natives_syntax =
|
||||
@ -4601,7 +4603,7 @@ ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
|
||||
FLAG_allow_natives_syntax ||
|
||||
Bootstrapper::IsActive();
|
||||
PreParser parser(no_script, allow_natives_syntax, extension);
|
||||
if (!parser.PreParseProgram(stream)) return NULL;
|
||||
if (!parser.PreParseProgram(source, stream)) return NULL;
|
||||
// The list owns the backing store so we need to clone the vector.
|
||||
// That way, the result will be exactly the right size rather than
|
||||
// the expected 50% too large.
|
||||
|
@ -143,7 +143,8 @@ FunctionLiteral* MakeAST(bool compile_in_global_context,
|
||||
ScriptDataImpl* pre_data);
|
||||
|
||||
|
||||
ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
|
||||
ScriptDataImpl* PreParse(Handle<String> source,
|
||||
unibrow::CharacterStream* stream,
|
||||
v8::Extension* extension);
|
||||
|
||||
|
||||
|
@ -92,18 +92,7 @@ void UTF8Buffer::AddCharSlow(uc32 c) {
|
||||
|
||||
|
||||
UTF16Buffer::UTF16Buffer()
|
||||
: pos_(0),
|
||||
pushback_buffer_(0),
|
||||
last_(0),
|
||||
stream_(NULL) { }
|
||||
|
||||
|
||||
void UTF16Buffer::Initialize(Handle<String> data,
|
||||
unibrow::CharacterStream* input) {
|
||||
data_ = data;
|
||||
pos_ = 0;
|
||||
stream_ = input;
|
||||
}
|
||||
: pos_(0), size_(0) { }
|
||||
|
||||
|
||||
Handle<String> UTF16Buffer::SubString(int start, int end) {
|
||||
@ -111,14 +100,27 @@ Handle<String> UTF16Buffer::SubString(int start, int end) {
|
||||
}
|
||||
|
||||
|
||||
void UTF16Buffer::PushBack(uc32 ch) {
|
||||
// CharacterStreamUTF16Buffer
|
||||
CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
|
||||
: pushback_buffer_(0), last_(0), stream_(NULL) { }
|
||||
|
||||
|
||||
void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
|
||||
unibrow::CharacterStream* input) {
|
||||
data_ = data;
|
||||
pos_ = 0;
|
||||
stream_ = input;
|
||||
}
|
||||
|
||||
|
||||
void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
|
||||
pushback_buffer()->Add(last_);
|
||||
last_ = ch;
|
||||
pos_--;
|
||||
}
|
||||
|
||||
|
||||
uc32 UTF16Buffer::Advance() {
|
||||
uc32 CharacterStreamUTF16Buffer::Advance() {
|
||||
// NOTE: It is of importance to Persian / Farsi resources that we do
|
||||
// *not* strip format control characters in the scanner; see
|
||||
//
|
||||
@ -135,7 +137,7 @@ uc32 UTF16Buffer::Advance() {
|
||||
uc32 next = stream_->GetNext();
|
||||
return last_ = next;
|
||||
} else {
|
||||
// note: currently the following increment is necessary to avoid a
|
||||
// Note: currently the following increment is necessary to avoid a
|
||||
// test-parser problem!
|
||||
pos_++;
|
||||
return last_ = static_cast<uc32>(-1);
|
||||
@ -143,13 +145,53 @@ uc32 UTF16Buffer::Advance() {
|
||||
}
|
||||
|
||||
|
||||
void UTF16Buffer::SeekForward(int pos) {
|
||||
void CharacterStreamUTF16Buffer::SeekForward(int pos) {
|
||||
pos_ = pos;
|
||||
ASSERT(pushback_buffer()->is_empty());
|
||||
stream_->Seek(pos);
|
||||
}
|
||||
|
||||
|
||||
// TwoByteStringUTF16Buffer
|
||||
TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer()
|
||||
: raw_data_(NULL) { }
|
||||
|
||||
|
||||
void TwoByteStringUTF16Buffer::Initialize(
|
||||
Handle<ExternalTwoByteString> data) {
|
||||
ASSERT(!data.is_null());
|
||||
|
||||
data_ = data;
|
||||
pos_ = 0;
|
||||
|
||||
raw_data_ = data->resource()->data();
|
||||
size_ = data->length();
|
||||
}
|
||||
|
||||
|
||||
uc32 TwoByteStringUTF16Buffer::Advance() {
|
||||
if (pos_ < size_) {
|
||||
return raw_data_[pos_++];
|
||||
} else {
|
||||
// note: currently the following increment is necessary to avoid a
|
||||
// test-parser problem!
|
||||
pos_++;
|
||||
return static_cast<uc32>(-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {
|
||||
pos_--;
|
||||
ASSERT(pos_ >= 0 && raw_data_[pos_] == ch);
|
||||
}
|
||||
|
||||
|
||||
void TwoByteStringUTF16Buffer::SeekForward(int pos) {
|
||||
pos_ = pos;
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Scanner
|
||||
|
||||
@ -161,7 +203,15 @@ Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {
|
||||
void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
|
||||
int position) {
|
||||
// Initialize the source buffer.
|
||||
source_.Initialize(source, stream);
|
||||
if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
|
||||
two_byte_string_buffer_.Initialize(
|
||||
Handle<ExternalTwoByteString>::cast(source));
|
||||
source_ = &two_byte_string_buffer_;
|
||||
} else {
|
||||
char_stream_buffer_.Initialize(source, stream);
|
||||
source_ = &char_stream_buffer_;
|
||||
}
|
||||
|
||||
position_ = position;
|
||||
|
||||
// Reset literals buffer
|
||||
@ -180,7 +230,7 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
|
||||
|
||||
|
||||
Handle<String> Scanner::SubString(int start, int end) {
|
||||
return source_.SubString(start - position_, end - position_);
|
||||
return source_->SubString(start - position_, end - position_);
|
||||
}
|
||||
|
||||
|
||||
@ -223,17 +273,6 @@ void Scanner::AddCharAdvance() {
|
||||
}
|
||||
|
||||
|
||||
void Scanner::Advance() {
|
||||
c0_ = source_.Advance();
|
||||
}
|
||||
|
||||
|
||||
void Scanner::PushBack(uc32 ch) {
|
||||
source_.PushBack(ch);
|
||||
c0_ = ch;
|
||||
}
|
||||
|
||||
|
||||
static inline bool IsByteOrderMark(uc32 c) {
|
||||
// The Unicode value U+FFFE is guaranteed never to be assigned as a
|
||||
// Unicode character; this implies that in a Unicode context the
|
||||
@ -583,7 +622,7 @@ void Scanner::Scan() {
|
||||
|
||||
|
||||
void Scanner::SeekForward(int pos) {
|
||||
source_.SeekForward(pos - 1);
|
||||
source_->SeekForward(pos - 1);
|
||||
Advance();
|
||||
Scan();
|
||||
}
|
||||
|
@ -73,24 +73,53 @@ class UTF8Buffer {
|
||||
class UTF16Buffer {
|
||||
public:
|
||||
UTF16Buffer();
|
||||
virtual ~UTF16Buffer() {}
|
||||
|
||||
virtual void PushBack(uc32 ch) = 0;
|
||||
// returns a value < 0 when the buffer end is reached
|
||||
virtual uc32 Advance() = 0;
|
||||
virtual void SeekForward(int pos) = 0;
|
||||
|
||||
void Initialize(Handle<String> data, unibrow::CharacterStream* stream);
|
||||
void PushBack(uc32 ch);
|
||||
uc32 Advance(); // returns a value < 0 when the buffer end is reached
|
||||
uint16_t CharAt(int index);
|
||||
int pos() const { return pos_; }
|
||||
int size() const { return size_; }
|
||||
Handle<String> SubString(int start, int end);
|
||||
List<uc32>* pushback_buffer() { return &pushback_buffer_; }
|
||||
void SeekForward(int pos);
|
||||
|
||||
private:
|
||||
protected:
|
||||
Handle<String> data_;
|
||||
int pos_;
|
||||
int size_;
|
||||
};
|
||||
|
||||
|
||||
class CharacterStreamUTF16Buffer: public UTF16Buffer {
|
||||
public:
|
||||
CharacterStreamUTF16Buffer();
|
||||
virtual ~CharacterStreamUTF16Buffer() {}
|
||||
void Initialize(Handle<String> data, unibrow::CharacterStream* stream);
|
||||
virtual void PushBack(uc32 ch);
|
||||
virtual uc32 Advance();
|
||||
virtual void SeekForward(int pos);
|
||||
|
||||
private:
|
||||
List<uc32> pushback_buffer_;
|
||||
uc32 last_;
|
||||
unibrow::CharacterStream* stream_;
|
||||
|
||||
List<uc32>* pushback_buffer() { return &pushback_buffer_; }
|
||||
};
|
||||
|
||||
|
||||
class TwoByteStringUTF16Buffer: public UTF16Buffer {
|
||||
public:
|
||||
TwoByteStringUTF16Buffer();
|
||||
virtual ~TwoByteStringUTF16Buffer() {}
|
||||
void Initialize(Handle<ExternalTwoByteString> data);
|
||||
virtual void PushBack(uc32 ch);
|
||||
virtual uc32 Advance();
|
||||
virtual void SeekForward(int pos);
|
||||
|
||||
private:
|
||||
const uint16_t* raw_data_;
|
||||
};
|
||||
|
||||
|
||||
@ -184,8 +213,11 @@ class Scanner {
|
||||
static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
|
||||
|
||||
private:
|
||||
CharacterStreamUTF16Buffer char_stream_buffer_;
|
||||
TwoByteStringUTF16Buffer two_byte_string_buffer_;
|
||||
|
||||
// Source.
|
||||
UTF16Buffer source_;
|
||||
UTF16Buffer* source_;
|
||||
int position_;
|
||||
|
||||
// Buffer to hold literal values (identifiers, strings, numbers)
|
||||
@ -219,8 +251,11 @@ class Scanner {
|
||||
void TerminateLiteral();
|
||||
|
||||
// Low-level scanning support.
|
||||
void Advance();
|
||||
void PushBack(uc32 ch);
|
||||
void Advance() { c0_ = source_->Advance(); }
|
||||
void PushBack(uc32 ch) {
|
||||
source_->PushBack(ch);
|
||||
c0_ = ch;
|
||||
}
|
||||
|
||||
bool SkipWhiteSpace();
|
||||
Token::Value SkipSingleLineComment();
|
||||
@ -243,7 +278,7 @@ class Scanner {
|
||||
|
||||
// Return the current source position.
|
||||
int source_pos() {
|
||||
return source_.pos() - kCharacterLookaheadBufferSize + position_;
|
||||
return source_->pos() - kCharacterLookaheadBufferSize + position_;
|
||||
}
|
||||
|
||||
// Decodes a unicode escape-sequence which is part of an identifier.
|
||||
|
Loading…
Reference in New Issue
Block a user