Add separate scanner only intended for preparsing.

Review URL: http://codereview.chromium.org/5063003

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5837 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2010-11-17 12:00:22 +00:00
parent af7825c05f
commit 9902e43293
5 changed files with 1283 additions and 142 deletions

View File

@ -37,6 +37,7 @@
#include "parser.h"
#include "platform.h"
#include "preparser.h"
#include "prescanner.h"
#include "runtime.h"
#include "scopeinfo.h"
#include "string-stream.h"
@ -4637,6 +4638,57 @@ int ScriptDataImpl::ReadNumber(byte** source) {
}
static ScriptDataImpl* DoPreParse(UTF16Buffer* stream,
bool allow_lazy,
PartialParserRecorder* recorder) {
typedef preparser::Scanner<UTF16Buffer, UTF8Buffer> PreScanner;
PreScanner scanner;
scanner.Initialize(stream);
preparser::PreParser<PreScanner, PartialParserRecorder> preparser;
if (!preparser.PreParseProgram(&scanner, recorder, allow_lazy)) {
Top::StackOverflow();
return NULL;
}
// Extract the accumulated data from the recorder as a single
// contiguous vector that we are responsible for disposing.
Vector<unsigned> store = recorder->ExtractData();
return new ScriptDataImpl(store);
}
// Create an UTF16Buffer for the preparser to use as input,
// and preparse the source.
static ScriptDataImpl* DoPreParse(Handle<String> source,
unibrow::CharacterStream* stream,
bool allow_lazy,
PartialParserRecorder* recorder) {
if (source.is_null()) {
CharacterStreamUTF16Buffer buffer;
int length = stream->Length();
buffer.Initialize(source, stream, 0, length);
return DoPreParse(&buffer, allow_lazy, recorder);
} else if (source->IsExternalAsciiString()) {
ExternalStringUTF16Buffer<ExternalAsciiString, char> buffer;
int length = source->length();
buffer.Initialize(Handle<ExternalAsciiString>::cast(source), 0, length);
return DoPreParse(&buffer, allow_lazy, recorder);
} else if (source->IsExternalTwoByteString()) {
ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> buffer;
int length = source->length();
buffer.Initialize(Handle<ExternalTwoByteString>::cast(source), 0, length);
return DoPreParse(&buffer, allow_lazy, recorder);
} else {
CharacterStreamUTF16Buffer buffer;
SafeStringInputBuffer input;
input.Reset(0, source.location());
int length = source->length();
buffer.Initialize(source, &input, 0, length);
return DoPreParse(&buffer, allow_lazy, recorder);
}
}
// Preparse, but only collect data that is immediately useful,
// even if the preparser data is only used once.
ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
@ -4649,19 +4701,9 @@ ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
// If we don't allow lazy compilation, the log data will be empty.
return NULL;
}
preparser::PreParser<Scanner, PartialParserRecorder> parser;
Scanner scanner;
scanner.Initialize(source, stream, JAVASCRIPT);
PartialParserRecorder recorder;
if (!parser.PreParseProgram(&scanner, &recorder, allow_lazy)) {
Top::StackOverflow();
return NULL;
}
// Extract the accumulated data from the recorder as a single
// contiguous vector that we are responsible for disposing.
Vector<unsigned> store = recorder.ExtractData();
return new ScriptDataImpl(store);
return DoPreParse(source, stream, allow_lazy, &recorder);
}
@ -4669,19 +4711,9 @@ ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension) {
Handle<Script> no_script;
preparser::PreParser<Scanner, CompleteParserRecorder> parser;
Scanner scanner;
scanner.Initialize(source, stream, JAVASCRIPT);
bool allow_lazy = FLAG_lazy && (extension == NULL);
CompleteParserRecorder recorder;
if (!parser.PreParseProgram(&scanner, &recorder, allow_lazy)) {
Top::StackOverflow();
return NULL;
}
// Extract the accumulated data from the recorder as a single
// contiguous vector that we are responsible for disposing.
Vector<unsigned> store = recorder.ExtractData();
return new ScriptDataImpl(store);
return DoPreParse(source, stream, allow_lazy, &recorder);
}

1098
src/prescanner.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -41,6 +41,25 @@
namespace v8 {
namespace internal {
// Interface through which the scanner reads characters from the input source.
class UTF16Buffer {
public:
UTF16Buffer();
virtual ~UTF16Buffer() {}
virtual void PushBack(uc32 ch) = 0;
// Returns a value < 0 when the buffer end is reached.
virtual uc32 Advance() = 0;
virtual void SeekForward(int pos) = 0;
int pos() const { return pos_; }
protected:
int pos_; // Current position in the buffer.
int end_; // Position where scanning should stop (EOF).
};
class ScannerConstants : AllStatic {
public:
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;

View File

@ -38,7 +38,7 @@ namespace internal {
// ----------------------------------------------------------------------------
// UTF8Buffer
UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }
UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { }
UTF8Buffer::~UTF8Buffer() {}
@ -123,55 +123,6 @@ void CharacterStreamUTF16Buffer::SeekForward(int pos) {
}
// ExternalStringUTF16Buffer
template <typename StringType, typename CharType>
ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
: raw_data_(NULL) { }
template <typename StringType, typename CharType>
void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
Handle<StringType> data,
int start_position,
int end_position) {
ASSERT(!data.is_null());
raw_data_ = data->resource()->data();
ASSERT(end_position <= data->length());
if (start_position > 0) {
SeekForward(start_position);
}
end_ =
end_position != Scanner::kNoEndPosition ? end_position : data->length();
}
template <typename StringType, typename CharType>
uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
if (pos_ < end_) {
return raw_data_[pos_++];
} else {
// note: currently the following increment is necessary to avoid a
// test-parser problem!
pos_++;
return static_cast<uc32>(-1);
}
}
template <typename StringType, typename CharType>
void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
pos_--;
ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
}
template <typename StringType, typename CharType>
void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
pos_ = pos;
}
// ----------------------------------------------------------------------------
// Scanner::LiteralScope
@ -297,7 +248,7 @@ void Scanner::StartLiteral() {
}
void Scanner::AddChar(uc32 c) {
void Scanner::AddLiteralChar(uc32 c) {
literal_buffer_.AddChar(c);
}
@ -312,8 +263,8 @@ void Scanner::DropLiteral() {
}
void Scanner::AddCharAdvance() {
AddChar(c0_);
void Scanner::AddLiteralCharAdvance() {
AddLiteralChar(c0_);
Advance();
}
@ -525,29 +476,29 @@ Token::Value Scanner::ScanJsonString() {
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Token::ILLEGAL;
if (c0_ != '\\') {
AddCharAdvance();
AddLiteralCharAdvance();
} else {
Advance();
switch (c0_) {
case '"':
case '\\':
case '/':
AddChar(c0_);
AddLiteralChar(c0_);
break;
case 'b':
AddChar('\x08');
AddLiteralChar('\x08');
break;
case 'f':
AddChar('\x0c');
AddLiteralChar('\x0c');
break;
case 'n':
AddChar('\x0a');
AddLiteralChar('\x0a');
break;
case 'r':
AddChar('\x0d');
AddLiteralChar('\x0d');
break;
case 't':
AddChar('\x09');
AddLiteralChar('\x09');
break;
case 'u': {
uc32 value = 0;
@ -559,7 +510,7 @@ Token::Value Scanner::ScanJsonString() {
}
value = value * 16 + digit;
}
AddChar(value);
AddLiteralChar(value);
break;
}
default:
@ -579,31 +530,31 @@ Token::Value Scanner::ScanJsonString() {
Token::Value Scanner::ScanJsonNumber() {
LiteralScope literal(this);
if (c0_ == '-') AddCharAdvance();
if (c0_ == '-') AddLiteralCharAdvance();
if (c0_ == '0') {
AddCharAdvance();
AddLiteralCharAdvance();
// Prefix zero is only allowed if it's the only digit before
// a decimal point or exponent.
if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
} else {
if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
do {
AddCharAdvance();
AddLiteralCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
if (c0_ == '.') {
AddCharAdvance();
AddLiteralCharAdvance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
do {
AddCharAdvance();
AddLiteralCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
if (AsciiAlphaToLower(c0_) == 'e') {
AddCharAdvance();
if (c0_ == '-' || c0_ == '+') AddCharAdvance();
AddLiteralCharAdvance();
if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance();
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
do {
AddCharAdvance();
AddLiteralCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
literal.Complete();
@ -958,7 +909,7 @@ void Scanner::ScanEscape() {
// According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
// should be illegal, but they are commonly handled
// as non-escaped characters by JS VMs.
AddChar(c);
AddLiteralChar(c);
}
@ -975,7 +926,7 @@ Token::Value Scanner::ScanString() {
if (c0_ < 0) return Token::ILLEGAL;
ScanEscape();
} else {
AddChar(c);
AddLiteralChar(c);
}
}
if (c0_ != quote) return Token::ILLEGAL;
@ -1006,7 +957,7 @@ Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) {
// Returns true if any decimal digits were scanned, returns false otherwise.
void Scanner::ScanDecimalDigits() {
while (IsDecimalDigit(c0_))
AddCharAdvance();
AddLiteralCharAdvance();
}
@ -1018,25 +969,25 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
LiteralScope literal(this);
if (seen_period) {
// we have already seen a decimal point of the float
AddChar('.');
AddLiteralChar('.');
ScanDecimalDigits(); // we know we have at least one digit
} else {
// if the first character is '0' we must check for octals and hex
if (c0_ == '0') {
AddCharAdvance();
AddLiteralCharAdvance();
// either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
if (c0_ == 'x' || c0_ == 'X') {
// hex number
kind = HEX;
AddCharAdvance();
AddLiteralCharAdvance();
if (!IsHexDigit(c0_)) {
// we must have at least one hex digit after 'x'/'X'
return Token::ILLEGAL;
}
while (IsHexDigit(c0_)) {
AddCharAdvance();
AddLiteralCharAdvance();
}
} else if ('0' <= c0_ && c0_ <= '7') {
// (possible) octal number
@ -1047,7 +998,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
break;
}
if (c0_ < '0' || '7' < c0_) break;
AddCharAdvance();
AddLiteralCharAdvance();
}
}
}
@ -1056,7 +1007,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
if (kind == DECIMAL) {
ScanDecimalDigits(); // optional
if (c0_ == '.') {
AddCharAdvance();
AddLiteralCharAdvance();
ScanDecimalDigits(); // optional
}
}
@ -1067,9 +1018,9 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
// scan exponent
AddCharAdvance();
AddLiteralCharAdvance();
if (c0_ == '+' || c0_ == '-')
AddCharAdvance();
AddLiteralCharAdvance();
if (!IsDecimalDigit(c0_)) {
// we must have at least one decimal digit after 'e'/'E'
return Token::ILLEGAL;
@ -1113,10 +1064,10 @@ Token::Value Scanner::ScanIdentifier() {
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier start characters.
if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
AddChar(c);
AddLiteralChar(c);
keyword_match.Fail();
} else {
AddChar(c0_);
AddLiteralChar(c0_);
keyword_match.AddChar(c0_);
Advance();
}
@ -1127,10 +1078,10 @@ Token::Value Scanner::ScanIdentifier() {
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier part characters.
if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
AddChar(c);
AddLiteralChar(c);
keyword_match.Fail();
} else {
AddChar(c0_);
AddLiteralChar(c0_);
keyword_match.AddChar(c0_);
Advance();
}
@ -1156,18 +1107,18 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
// constructor.
LiteralScope literal(this);
if (seen_equal)
AddChar('=');
AddLiteralChar('=');
while (c0_ != '/' || in_character_class) {
if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
if (c0_ == '\\') { // escaped character
AddCharAdvance();
AddLiteralCharAdvance();
if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
AddCharAdvance();
AddLiteralCharAdvance();
} else { // unescaped character
if (c0_ == '[') in_character_class = true;
if (c0_ == ']') in_character_class = false;
AddCharAdvance();
AddLiteralCharAdvance();
}
}
Advance(); // consume '/'
@ -1186,11 +1137,11 @@ bool Scanner::ScanRegExpFlags() {
if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
// We allow any escaped character, unlike the restriction on
// IdentifierPart when it is used to build an IdentifierName.
AddChar(c);
AddLiteralChar(c);
continue;
}
}
AddCharAdvance();
AddLiteralCharAdvance();
}
literal.Complete();

View File

@ -42,25 +42,35 @@ class UTF8Buffer {
~UTF8Buffer();
inline void AddChar(uc32 c) {
if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
buffer_.Add(static_cast<char>(c));
} else {
AddCharSlow(c);
if (recording_) {
if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
buffer_.Add(static_cast<char>(c));
} else {
AddCharSlow(c);
}
}
}
void StartLiteral() {
buffer_.StartSequence();
recording_ = true;
}
Vector<const char> EndLiteral() {
buffer_.Add(kEndMarker);
Vector<char> sequence = buffer_.EndSequence();
return Vector<const char>(sequence.start(), sequence.length());
if (recording_) {
recording_ = false;
buffer_.Add(kEndMarker);
Vector<char> sequence = buffer_.EndSequence();
return Vector<const char>(sequence.start(), sequence.length());
}
return Vector<const char>();
}
void DropLiteral() {
buffer_.DropSequence();
if (recording_) {
recording_ = false;
buffer_.DropSequence();
}
}
void Reset() {
@ -79,30 +89,11 @@ class UTF8Buffer {
private:
static const int kInitialCapacity = 256;
SequenceCollector<char, 4> buffer_;
bool recording_;
void AddCharSlow(uc32 c);
};
// Interface through which the scanner reads characters from the input source.
class UTF16Buffer {
public:
UTF16Buffer();
virtual ~UTF16Buffer() {}
virtual void PushBack(uc32 ch) = 0;
// Returns a value < 0 when the buffer end is reached.
virtual uc32 Advance() = 0;
virtual void SeekForward(int pos) = 0;
int pos() const { return pos_; }
protected:
int pos_; // Current position in the buffer.
int end_; // Position where scanning should stop (EOF).
};
// UTF16 buffer to read characters from a character stream.
class CharacterStreamUTF16Buffer: public UTF16Buffer {
public:
@ -273,8 +264,8 @@ class Scanner {
// Literal buffer support
inline void StartLiteral();
inline void AddChar(uc32 ch);
inline void AddCharAdvance();
inline void AddLiteralChar(uc32 ch);
inline void AddLiteralCharAdvance();
inline void TerminateLiteral();
// Stops scanning of a literal, e.g., due to an encountered error.
inline void DropLiteral();
@ -389,6 +380,56 @@ class Scanner {
uc32 c0_;
};
// ExternalStringUTF16Buffer
template <typename StringType, typename CharType>
ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
: raw_data_(NULL) { }
template <typename StringType, typename CharType>
void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
Handle<StringType> data,
int start_position,
int end_position) {
ASSERT(!data.is_null());
raw_data_ = data->resource()->data();
ASSERT(end_position <= data->length());
if (start_position > 0) {
SeekForward(start_position);
}
end_ =
end_position != Scanner::kNoEndPosition ? end_position : data->length();
}
template <typename StringType, typename CharType>
uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
if (pos_ < end_) {
return raw_data_[pos_++];
} else {
// note: currently the following increment is necessary to avoid a
// test-parser problem!
pos_++;
return static_cast<uc32>(-1);
}
}
template <typename StringType, typename CharType>
void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
pos_--;
ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
}
template <typename StringType, typename CharType>
void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
pos_ = pos;
}
} } // namespace v8::internal
#endif // V8_SCANNER_H_