Add separate scanner only intended for preparsing.
Review URL: http://codereview.chromium.org/5063003 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5837 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
parent
af7825c05f
commit
9902e43293
@ -37,6 +37,7 @@
|
|||||||
#include "parser.h"
|
#include "parser.h"
|
||||||
#include "platform.h"
|
#include "platform.h"
|
||||||
#include "preparser.h"
|
#include "preparser.h"
|
||||||
|
#include "prescanner.h"
|
||||||
#include "runtime.h"
|
#include "runtime.h"
|
||||||
#include "scopeinfo.h"
|
#include "scopeinfo.h"
|
||||||
#include "string-stream.h"
|
#include "string-stream.h"
|
||||||
@ -4637,6 +4638,57 @@ int ScriptDataImpl::ReadNumber(byte** source) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static ScriptDataImpl* DoPreParse(UTF16Buffer* stream,
|
||||||
|
bool allow_lazy,
|
||||||
|
PartialParserRecorder* recorder) {
|
||||||
|
typedef preparser::Scanner<UTF16Buffer, UTF8Buffer> PreScanner;
|
||||||
|
PreScanner scanner;
|
||||||
|
scanner.Initialize(stream);
|
||||||
|
preparser::PreParser<PreScanner, PartialParserRecorder> preparser;
|
||||||
|
if (!preparser.PreParseProgram(&scanner, recorder, allow_lazy)) {
|
||||||
|
Top::StackOverflow();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract the accumulated data from the recorder as a single
|
||||||
|
// contiguous vector that we are responsible for disposing.
|
||||||
|
Vector<unsigned> store = recorder->ExtractData();
|
||||||
|
return new ScriptDataImpl(store);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Create an UTF16Buffer for the preparser to use as input,
|
||||||
|
// and preparse the source.
|
||||||
|
static ScriptDataImpl* DoPreParse(Handle<String> source,
|
||||||
|
unibrow::CharacterStream* stream,
|
||||||
|
bool allow_lazy,
|
||||||
|
PartialParserRecorder* recorder) {
|
||||||
|
if (source.is_null()) {
|
||||||
|
CharacterStreamUTF16Buffer buffer;
|
||||||
|
int length = stream->Length();
|
||||||
|
buffer.Initialize(source, stream, 0, length);
|
||||||
|
return DoPreParse(&buffer, allow_lazy, recorder);
|
||||||
|
} else if (source->IsExternalAsciiString()) {
|
||||||
|
ExternalStringUTF16Buffer<ExternalAsciiString, char> buffer;
|
||||||
|
int length = source->length();
|
||||||
|
buffer.Initialize(Handle<ExternalAsciiString>::cast(source), 0, length);
|
||||||
|
return DoPreParse(&buffer, allow_lazy, recorder);
|
||||||
|
} else if (source->IsExternalTwoByteString()) {
|
||||||
|
ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t> buffer;
|
||||||
|
int length = source->length();
|
||||||
|
buffer.Initialize(Handle<ExternalTwoByteString>::cast(source), 0, length);
|
||||||
|
return DoPreParse(&buffer, allow_lazy, recorder);
|
||||||
|
} else {
|
||||||
|
CharacterStreamUTF16Buffer buffer;
|
||||||
|
SafeStringInputBuffer input;
|
||||||
|
input.Reset(0, source.location());
|
||||||
|
int length = source->length();
|
||||||
|
buffer.Initialize(source, &input, 0, length);
|
||||||
|
return DoPreParse(&buffer, allow_lazy, recorder);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Preparse, but only collect data that is immediately useful,
|
// Preparse, but only collect data that is immediately useful,
|
||||||
// even if the preparser data is only used once.
|
// even if the preparser data is only used once.
|
||||||
ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
|
ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
|
||||||
@ -4649,19 +4701,9 @@ ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
|
|||||||
// If we don't allow lazy compilation, the log data will be empty.
|
// If we don't allow lazy compilation, the log data will be empty.
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
preparser::PreParser<Scanner, PartialParserRecorder> parser;
|
|
||||||
Scanner scanner;
|
|
||||||
scanner.Initialize(source, stream, JAVASCRIPT);
|
|
||||||
PartialParserRecorder recorder;
|
PartialParserRecorder recorder;
|
||||||
if (!parser.PreParseProgram(&scanner, &recorder, allow_lazy)) {
|
|
||||||
Top::StackOverflow();
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract the accumulated data from the recorder as a single
|
return DoPreParse(source, stream, allow_lazy, &recorder);
|
||||||
// contiguous vector that we are responsible for disposing.
|
|
||||||
Vector<unsigned> store = recorder.ExtractData();
|
|
||||||
return new ScriptDataImpl(store);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -4669,19 +4711,9 @@ ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
|
|||||||
unibrow::CharacterStream* stream,
|
unibrow::CharacterStream* stream,
|
||||||
v8::Extension* extension) {
|
v8::Extension* extension) {
|
||||||
Handle<Script> no_script;
|
Handle<Script> no_script;
|
||||||
preparser::PreParser<Scanner, CompleteParserRecorder> parser;
|
|
||||||
Scanner scanner;
|
|
||||||
scanner.Initialize(source, stream, JAVASCRIPT);
|
|
||||||
bool allow_lazy = FLAG_lazy && (extension == NULL);
|
bool allow_lazy = FLAG_lazy && (extension == NULL);
|
||||||
CompleteParserRecorder recorder;
|
CompleteParserRecorder recorder;
|
||||||
if (!parser.PreParseProgram(&scanner, &recorder, allow_lazy)) {
|
return DoPreParse(source, stream, allow_lazy, &recorder);
|
||||||
Top::StackOverflow();
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
// Extract the accumulated data from the recorder as a single
|
|
||||||
// contiguous vector that we are responsible for disposing.
|
|
||||||
Vector<unsigned> store = recorder.ExtractData();
|
|
||||||
return new ScriptDataImpl(store);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
1098
src/prescanner.h
Normal file
1098
src/prescanner.h
Normal file
File diff suppressed because it is too large
Load Diff
@ -41,6 +41,25 @@
|
|||||||
namespace v8 {
|
namespace v8 {
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
|
// Interface through which the scanner reads characters from the input source.
|
||||||
|
class UTF16Buffer {
|
||||||
|
public:
|
||||||
|
UTF16Buffer();
|
||||||
|
virtual ~UTF16Buffer() {}
|
||||||
|
|
||||||
|
virtual void PushBack(uc32 ch) = 0;
|
||||||
|
// Returns a value < 0 when the buffer end is reached.
|
||||||
|
virtual uc32 Advance() = 0;
|
||||||
|
virtual void SeekForward(int pos) = 0;
|
||||||
|
|
||||||
|
int pos() const { return pos_; }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int pos_; // Current position in the buffer.
|
||||||
|
int end_; // Position where scanning should stop (EOF).
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class ScannerConstants : AllStatic {
|
class ScannerConstants : AllStatic {
|
||||||
public:
|
public:
|
||||||
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
|
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
|
||||||
|
131
src/scanner.cc
131
src/scanner.cc
@ -38,7 +38,7 @@ namespace internal {
|
|||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// UTF8Buffer
|
// UTF8Buffer
|
||||||
|
|
||||||
UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }
|
UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { }
|
||||||
|
|
||||||
|
|
||||||
UTF8Buffer::~UTF8Buffer() {}
|
UTF8Buffer::~UTF8Buffer() {}
|
||||||
@ -123,55 +123,6 @@ void CharacterStreamUTF16Buffer::SeekForward(int pos) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ExternalStringUTF16Buffer
|
|
||||||
template <typename StringType, typename CharType>
|
|
||||||
ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
|
|
||||||
: raw_data_(NULL) { }
|
|
||||||
|
|
||||||
|
|
||||||
template <typename StringType, typename CharType>
|
|
||||||
void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
|
|
||||||
Handle<StringType> data,
|
|
||||||
int start_position,
|
|
||||||
int end_position) {
|
|
||||||
ASSERT(!data.is_null());
|
|
||||||
raw_data_ = data->resource()->data();
|
|
||||||
|
|
||||||
ASSERT(end_position <= data->length());
|
|
||||||
if (start_position > 0) {
|
|
||||||
SeekForward(start_position);
|
|
||||||
}
|
|
||||||
end_ =
|
|
||||||
end_position != Scanner::kNoEndPosition ? end_position : data->length();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename StringType, typename CharType>
|
|
||||||
uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
|
|
||||||
if (pos_ < end_) {
|
|
||||||
return raw_data_[pos_++];
|
|
||||||
} else {
|
|
||||||
// note: currently the following increment is necessary to avoid a
|
|
||||||
// test-parser problem!
|
|
||||||
pos_++;
|
|
||||||
return static_cast<uc32>(-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename StringType, typename CharType>
|
|
||||||
void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
|
|
||||||
pos_--;
|
|
||||||
ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
|
|
||||||
ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename StringType, typename CharType>
|
|
||||||
void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
|
|
||||||
pos_ = pos;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// Scanner::LiteralScope
|
// Scanner::LiteralScope
|
||||||
|
|
||||||
@ -297,7 +248,7 @@ void Scanner::StartLiteral() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Scanner::AddChar(uc32 c) {
|
void Scanner::AddLiteralChar(uc32 c) {
|
||||||
literal_buffer_.AddChar(c);
|
literal_buffer_.AddChar(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -312,8 +263,8 @@ void Scanner::DropLiteral() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Scanner::AddCharAdvance() {
|
void Scanner::AddLiteralCharAdvance() {
|
||||||
AddChar(c0_);
|
AddLiteralChar(c0_);
|
||||||
Advance();
|
Advance();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -525,29 +476,29 @@ Token::Value Scanner::ScanJsonString() {
|
|||||||
// Check for control character (0x00-0x1f) or unterminated string (<0).
|
// Check for control character (0x00-0x1f) or unterminated string (<0).
|
||||||
if (c0_ < 0x20) return Token::ILLEGAL;
|
if (c0_ < 0x20) return Token::ILLEGAL;
|
||||||
if (c0_ != '\\') {
|
if (c0_ != '\\') {
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
} else {
|
} else {
|
||||||
Advance();
|
Advance();
|
||||||
switch (c0_) {
|
switch (c0_) {
|
||||||
case '"':
|
case '"':
|
||||||
case '\\':
|
case '\\':
|
||||||
case '/':
|
case '/':
|
||||||
AddChar(c0_);
|
AddLiteralChar(c0_);
|
||||||
break;
|
break;
|
||||||
case 'b':
|
case 'b':
|
||||||
AddChar('\x08');
|
AddLiteralChar('\x08');
|
||||||
break;
|
break;
|
||||||
case 'f':
|
case 'f':
|
||||||
AddChar('\x0c');
|
AddLiteralChar('\x0c');
|
||||||
break;
|
break;
|
||||||
case 'n':
|
case 'n':
|
||||||
AddChar('\x0a');
|
AddLiteralChar('\x0a');
|
||||||
break;
|
break;
|
||||||
case 'r':
|
case 'r':
|
||||||
AddChar('\x0d');
|
AddLiteralChar('\x0d');
|
||||||
break;
|
break;
|
||||||
case 't':
|
case 't':
|
||||||
AddChar('\x09');
|
AddLiteralChar('\x09');
|
||||||
break;
|
break;
|
||||||
case 'u': {
|
case 'u': {
|
||||||
uc32 value = 0;
|
uc32 value = 0;
|
||||||
@ -559,7 +510,7 @@ Token::Value Scanner::ScanJsonString() {
|
|||||||
}
|
}
|
||||||
value = value * 16 + digit;
|
value = value * 16 + digit;
|
||||||
}
|
}
|
||||||
AddChar(value);
|
AddLiteralChar(value);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@ -579,31 +530,31 @@ Token::Value Scanner::ScanJsonString() {
|
|||||||
|
|
||||||
Token::Value Scanner::ScanJsonNumber() {
|
Token::Value Scanner::ScanJsonNumber() {
|
||||||
LiteralScope literal(this);
|
LiteralScope literal(this);
|
||||||
if (c0_ == '-') AddCharAdvance();
|
if (c0_ == '-') AddLiteralCharAdvance();
|
||||||
if (c0_ == '0') {
|
if (c0_ == '0') {
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
// Prefix zero is only allowed if it's the only digit before
|
// Prefix zero is only allowed if it's the only digit before
|
||||||
// a decimal point or exponent.
|
// a decimal point or exponent.
|
||||||
if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
|
if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
|
||||||
} else {
|
} else {
|
||||||
if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
|
if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
|
||||||
do {
|
do {
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
} while (c0_ >= '0' && c0_ <= '9');
|
} while (c0_ >= '0' && c0_ <= '9');
|
||||||
}
|
}
|
||||||
if (c0_ == '.') {
|
if (c0_ == '.') {
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
|
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
|
||||||
do {
|
do {
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
} while (c0_ >= '0' && c0_ <= '9');
|
} while (c0_ >= '0' && c0_ <= '9');
|
||||||
}
|
}
|
||||||
if (AsciiAlphaToLower(c0_) == 'e') {
|
if (AsciiAlphaToLower(c0_) == 'e') {
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
if (c0_ == '-' || c0_ == '+') AddCharAdvance();
|
if (c0_ == '-' || c0_ == '+') AddLiteralCharAdvance();
|
||||||
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
|
if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
|
||||||
do {
|
do {
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
} while (c0_ >= '0' && c0_ <= '9');
|
} while (c0_ >= '0' && c0_ <= '9');
|
||||||
}
|
}
|
||||||
literal.Complete();
|
literal.Complete();
|
||||||
@ -958,7 +909,7 @@ void Scanner::ScanEscape() {
|
|||||||
// According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
|
// According to ECMA-262, 3rd, 7.8.4 (p 18ff) these
|
||||||
// should be illegal, but they are commonly handled
|
// should be illegal, but they are commonly handled
|
||||||
// as non-escaped characters by JS VMs.
|
// as non-escaped characters by JS VMs.
|
||||||
AddChar(c);
|
AddLiteralChar(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -975,7 +926,7 @@ Token::Value Scanner::ScanString() {
|
|||||||
if (c0_ < 0) return Token::ILLEGAL;
|
if (c0_ < 0) return Token::ILLEGAL;
|
||||||
ScanEscape();
|
ScanEscape();
|
||||||
} else {
|
} else {
|
||||||
AddChar(c);
|
AddLiteralChar(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (c0_ != quote) return Token::ILLEGAL;
|
if (c0_ != quote) return Token::ILLEGAL;
|
||||||
@ -1006,7 +957,7 @@ Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) {
|
|||||||
// Returns true if any decimal digits were scanned, returns false otherwise.
|
// Returns true if any decimal digits were scanned, returns false otherwise.
|
||||||
void Scanner::ScanDecimalDigits() {
|
void Scanner::ScanDecimalDigits() {
|
||||||
while (IsDecimalDigit(c0_))
|
while (IsDecimalDigit(c0_))
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1018,25 +969,25 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
|
|||||||
LiteralScope literal(this);
|
LiteralScope literal(this);
|
||||||
if (seen_period) {
|
if (seen_period) {
|
||||||
// we have already seen a decimal point of the float
|
// we have already seen a decimal point of the float
|
||||||
AddChar('.');
|
AddLiteralChar('.');
|
||||||
ScanDecimalDigits(); // we know we have at least one digit
|
ScanDecimalDigits(); // we know we have at least one digit
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// if the first character is '0' we must check for octals and hex
|
// if the first character is '0' we must check for octals and hex
|
||||||
if (c0_ == '0') {
|
if (c0_ == '0') {
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
|
|
||||||
// either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
|
// either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number
|
||||||
if (c0_ == 'x' || c0_ == 'X') {
|
if (c0_ == 'x' || c0_ == 'X') {
|
||||||
// hex number
|
// hex number
|
||||||
kind = HEX;
|
kind = HEX;
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
if (!IsHexDigit(c0_)) {
|
if (!IsHexDigit(c0_)) {
|
||||||
// we must have at least one hex digit after 'x'/'X'
|
// we must have at least one hex digit after 'x'/'X'
|
||||||
return Token::ILLEGAL;
|
return Token::ILLEGAL;
|
||||||
}
|
}
|
||||||
while (IsHexDigit(c0_)) {
|
while (IsHexDigit(c0_)) {
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
}
|
}
|
||||||
} else if ('0' <= c0_ && c0_ <= '7') {
|
} else if ('0' <= c0_ && c0_ <= '7') {
|
||||||
// (possible) octal number
|
// (possible) octal number
|
||||||
@ -1047,7 +998,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (c0_ < '0' || '7' < c0_) break;
|
if (c0_ < '0' || '7' < c0_) break;
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1056,7 +1007,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
|
|||||||
if (kind == DECIMAL) {
|
if (kind == DECIMAL) {
|
||||||
ScanDecimalDigits(); // optional
|
ScanDecimalDigits(); // optional
|
||||||
if (c0_ == '.') {
|
if (c0_ == '.') {
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
ScanDecimalDigits(); // optional
|
ScanDecimalDigits(); // optional
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1067,9 +1018,9 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
|
|||||||
ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
|
ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number
|
||||||
if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
|
if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed
|
||||||
// scan exponent
|
// scan exponent
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
if (c0_ == '+' || c0_ == '-')
|
if (c0_ == '+' || c0_ == '-')
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
if (!IsDecimalDigit(c0_)) {
|
if (!IsDecimalDigit(c0_)) {
|
||||||
// we must have at least one decimal digit after 'e'/'E'
|
// we must have at least one decimal digit after 'e'/'E'
|
||||||
return Token::ILLEGAL;
|
return Token::ILLEGAL;
|
||||||
@ -1113,10 +1064,10 @@ Token::Value Scanner::ScanIdentifier() {
|
|||||||
uc32 c = ScanIdentifierUnicodeEscape();
|
uc32 c = ScanIdentifierUnicodeEscape();
|
||||||
// Only allow legal identifier start characters.
|
// Only allow legal identifier start characters.
|
||||||
if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
|
if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
|
||||||
AddChar(c);
|
AddLiteralChar(c);
|
||||||
keyword_match.Fail();
|
keyword_match.Fail();
|
||||||
} else {
|
} else {
|
||||||
AddChar(c0_);
|
AddLiteralChar(c0_);
|
||||||
keyword_match.AddChar(c0_);
|
keyword_match.AddChar(c0_);
|
||||||
Advance();
|
Advance();
|
||||||
}
|
}
|
||||||
@ -1127,10 +1078,10 @@ Token::Value Scanner::ScanIdentifier() {
|
|||||||
uc32 c = ScanIdentifierUnicodeEscape();
|
uc32 c = ScanIdentifierUnicodeEscape();
|
||||||
// Only allow legal identifier part characters.
|
// Only allow legal identifier part characters.
|
||||||
if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
|
if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
|
||||||
AddChar(c);
|
AddLiteralChar(c);
|
||||||
keyword_match.Fail();
|
keyword_match.Fail();
|
||||||
} else {
|
} else {
|
||||||
AddChar(c0_);
|
AddLiteralChar(c0_);
|
||||||
keyword_match.AddChar(c0_);
|
keyword_match.AddChar(c0_);
|
||||||
Advance();
|
Advance();
|
||||||
}
|
}
|
||||||
@ -1156,18 +1107,18 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
|
|||||||
// constructor.
|
// constructor.
|
||||||
LiteralScope literal(this);
|
LiteralScope literal(this);
|
||||||
if (seen_equal)
|
if (seen_equal)
|
||||||
AddChar('=');
|
AddLiteralChar('=');
|
||||||
|
|
||||||
while (c0_ != '/' || in_character_class) {
|
while (c0_ != '/' || in_character_class) {
|
||||||
if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
|
if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
|
||||||
if (c0_ == '\\') { // escaped character
|
if (c0_ == '\\') { // escaped character
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
|
if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
} else { // unescaped character
|
} else { // unescaped character
|
||||||
if (c0_ == '[') in_character_class = true;
|
if (c0_ == '[') in_character_class = true;
|
||||||
if (c0_ == ']') in_character_class = false;
|
if (c0_ == ']') in_character_class = false;
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Advance(); // consume '/'
|
Advance(); // consume '/'
|
||||||
@ -1186,11 +1137,11 @@ bool Scanner::ScanRegExpFlags() {
|
|||||||
if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
|
if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
|
||||||
// We allow any escaped character, unlike the restriction on
|
// We allow any escaped character, unlike the restriction on
|
||||||
// IdentifierPart when it is used to build an IdentifierName.
|
// IdentifierPart when it is used to build an IdentifierName.
|
||||||
AddChar(c);
|
AddLiteralChar(c);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
AddCharAdvance();
|
AddLiteralCharAdvance();
|
||||||
}
|
}
|
||||||
literal.Complete();
|
literal.Complete();
|
||||||
|
|
||||||
|
101
src/scanner.h
101
src/scanner.h
@ -42,25 +42,35 @@ class UTF8Buffer {
|
|||||||
~UTF8Buffer();
|
~UTF8Buffer();
|
||||||
|
|
||||||
inline void AddChar(uc32 c) {
|
inline void AddChar(uc32 c) {
|
||||||
if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
|
if (recording_) {
|
||||||
buffer_.Add(static_cast<char>(c));
|
if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
|
||||||
} else {
|
buffer_.Add(static_cast<char>(c));
|
||||||
AddCharSlow(c);
|
} else {
|
||||||
|
AddCharSlow(c);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void StartLiteral() {
|
void StartLiteral() {
|
||||||
buffer_.StartSequence();
|
buffer_.StartSequence();
|
||||||
|
recording_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Vector<const char> EndLiteral() {
|
Vector<const char> EndLiteral() {
|
||||||
buffer_.Add(kEndMarker);
|
if (recording_) {
|
||||||
Vector<char> sequence = buffer_.EndSequence();
|
recording_ = false;
|
||||||
return Vector<const char>(sequence.start(), sequence.length());
|
buffer_.Add(kEndMarker);
|
||||||
|
Vector<char> sequence = buffer_.EndSequence();
|
||||||
|
return Vector<const char>(sequence.start(), sequence.length());
|
||||||
|
}
|
||||||
|
return Vector<const char>();
|
||||||
}
|
}
|
||||||
|
|
||||||
void DropLiteral() {
|
void DropLiteral() {
|
||||||
buffer_.DropSequence();
|
if (recording_) {
|
||||||
|
recording_ = false;
|
||||||
|
buffer_.DropSequence();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Reset() {
|
void Reset() {
|
||||||
@ -79,30 +89,11 @@ class UTF8Buffer {
|
|||||||
private:
|
private:
|
||||||
static const int kInitialCapacity = 256;
|
static const int kInitialCapacity = 256;
|
||||||
SequenceCollector<char, 4> buffer_;
|
SequenceCollector<char, 4> buffer_;
|
||||||
|
bool recording_;
|
||||||
void AddCharSlow(uc32 c);
|
void AddCharSlow(uc32 c);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// Interface through which the scanner reads characters from the input source.
|
|
||||||
class UTF16Buffer {
|
|
||||||
public:
|
|
||||||
UTF16Buffer();
|
|
||||||
virtual ~UTF16Buffer() {}
|
|
||||||
|
|
||||||
virtual void PushBack(uc32 ch) = 0;
|
|
||||||
// Returns a value < 0 when the buffer end is reached.
|
|
||||||
virtual uc32 Advance() = 0;
|
|
||||||
virtual void SeekForward(int pos) = 0;
|
|
||||||
|
|
||||||
int pos() const { return pos_; }
|
|
||||||
|
|
||||||
protected:
|
|
||||||
int pos_; // Current position in the buffer.
|
|
||||||
int end_; // Position where scanning should stop (EOF).
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
// UTF16 buffer to read characters from a character stream.
|
// UTF16 buffer to read characters from a character stream.
|
||||||
class CharacterStreamUTF16Buffer: public UTF16Buffer {
|
class CharacterStreamUTF16Buffer: public UTF16Buffer {
|
||||||
public:
|
public:
|
||||||
@ -273,8 +264,8 @@ class Scanner {
|
|||||||
|
|
||||||
// Literal buffer support
|
// Literal buffer support
|
||||||
inline void StartLiteral();
|
inline void StartLiteral();
|
||||||
inline void AddChar(uc32 ch);
|
inline void AddLiteralChar(uc32 ch);
|
||||||
inline void AddCharAdvance();
|
inline void AddLiteralCharAdvance();
|
||||||
inline void TerminateLiteral();
|
inline void TerminateLiteral();
|
||||||
// Stops scanning of a literal, e.g., due to an encountered error.
|
// Stops scanning of a literal, e.g., due to an encountered error.
|
||||||
inline void DropLiteral();
|
inline void DropLiteral();
|
||||||
@ -389,6 +380,56 @@ class Scanner {
|
|||||||
uc32 c0_;
|
uc32 c0_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// ExternalStringUTF16Buffer
|
||||||
|
template <typename StringType, typename CharType>
|
||||||
|
ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
|
||||||
|
: raw_data_(NULL) { }
|
||||||
|
|
||||||
|
|
||||||
|
template <typename StringType, typename CharType>
|
||||||
|
void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
|
||||||
|
Handle<StringType> data,
|
||||||
|
int start_position,
|
||||||
|
int end_position) {
|
||||||
|
ASSERT(!data.is_null());
|
||||||
|
raw_data_ = data->resource()->data();
|
||||||
|
|
||||||
|
ASSERT(end_position <= data->length());
|
||||||
|
if (start_position > 0) {
|
||||||
|
SeekForward(start_position);
|
||||||
|
}
|
||||||
|
end_ =
|
||||||
|
end_position != Scanner::kNoEndPosition ? end_position : data->length();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename StringType, typename CharType>
|
||||||
|
uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
|
||||||
|
if (pos_ < end_) {
|
||||||
|
return raw_data_[pos_++];
|
||||||
|
} else {
|
||||||
|
// note: currently the following increment is necessary to avoid a
|
||||||
|
// test-parser problem!
|
||||||
|
pos_++;
|
||||||
|
return static_cast<uc32>(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename StringType, typename CharType>
|
||||||
|
void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
|
||||||
|
pos_--;
|
||||||
|
ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
|
||||||
|
ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename StringType, typename CharType>
|
||||||
|
void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
|
||||||
|
pos_ = pos;
|
||||||
|
}
|
||||||
|
|
||||||
} } // namespace v8::internal
|
} } // namespace v8::internal
|
||||||
|
|
||||||
#endif // V8_SCANNER_H_
|
#endif // V8_SCANNER_H_
|
||||||
|
Loading…
Reference in New Issue
Block a user