Ensure that scanner state is correctly reset when an error is encountered.

Add a scope object to ensure that leaving a literal scanning prematurely
will clean up after itself.
Also reset the literal buffer if a scanner is reinitialized with a new
source code.

Review URL: http://codereview.chromium.org/3137037

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5327 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2010-08-24 12:29:50 +00:00
parent 46c0c0f44f
commit 23d0abb040
3 changed files with 102 additions and 33 deletions

View File

@ -319,6 +319,26 @@ void KeywordMatcher::Step(uc32 input) {
} }
// ----------------------------------------------------------------------------
// Scanner::LiteralScope
Scanner::LiteralScope::LiteralScope(Scanner* self)
: scanner_(self), complete_(false) {
self->StartLiteral();
}
Scanner::LiteralScope::~LiteralScope() {
if (!complete_) scanner_->DropLiteral();
}
void Scanner::LiteralScope::Complete() {
scanner_->TerminateLiteral();
complete_ = true;
}
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Scanner // Scanner
@ -386,8 +406,10 @@ void Scanner::Init(Handle<String> source,
// Set c0_ (one character ahead) // Set c0_ (one character ahead)
ASSERT(kCharacterLookaheadBufferSize == 1); ASSERT(kCharacterLookaheadBufferSize == 1);
Advance(); Advance();
// Initialise current_ to not refer to a literal. // Initialize current_ to not refer to a literal.
current_.literal_chars = Vector<const char>(); current_.literal_chars = Vector<const char>();
// Reset literal buffer.
literal_buffer_.Reset();
// Skip initial whitespace allowing HTML comment ends just like // Skip initial whitespace allowing HTML comment ends just like
// after a newline and scan first token. // after a newline and scan first token.
@ -423,11 +445,17 @@ void Scanner::AddChar(uc32 c) {
literal_buffer_.AddChar(c); literal_buffer_.AddChar(c);
} }
void Scanner::TerminateLiteral() { void Scanner::TerminateLiteral() {
next_.literal_chars = literal_buffer_.EndLiteral(); next_.literal_chars = literal_buffer_.EndLiteral();
} }
void Scanner::DropLiteral() {
literal_buffer_.DropLiteral();
}
void Scanner::AddCharAdvance() { void Scanner::AddCharAdvance() {
AddChar(c0_); AddChar(c0_);
Advance(); Advance();
@ -636,7 +664,7 @@ void Scanner::ScanJson() {
Token::Value Scanner::ScanJsonString() { Token::Value Scanner::ScanJsonString() {
ASSERT_EQ('"', c0_); ASSERT_EQ('"', c0_);
Advance(); Advance();
StartLiteral(); LiteralScope literal(this);
while (c0_ != '"' && c0_ > 0) { while (c0_ != '"' && c0_ > 0) {
// Check for control character (0x00-0x1f) or unterminated string (<0). // Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Token::ILLEGAL; if (c0_ < 0x20) return Token::ILLEGAL;
@ -670,7 +698,9 @@ Token::Value Scanner::ScanJsonString() {
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
Advance(); Advance();
int digit = HexValue(c0_); int digit = HexValue(c0_);
if (digit < 0) return Token::ILLEGAL; if (digit < 0) {
return Token::ILLEGAL;
}
value = value * 16 + digit; value = value * 16 + digit;
} }
AddChar(value); AddChar(value);
@ -685,14 +715,14 @@ Token::Value Scanner::ScanJsonString() {
if (c0_ != '"') { if (c0_ != '"') {
return Token::ILLEGAL; return Token::ILLEGAL;
} }
TerminateLiteral(); literal.Complete();
Advance(); Advance();
return Token::STRING; return Token::STRING;
} }
Token::Value Scanner::ScanJsonNumber() { Token::Value Scanner::ScanJsonNumber() {
StartLiteral(); LiteralScope literal(this);
if (c0_ == '-') AddCharAdvance(); if (c0_ == '-') AddCharAdvance();
if (c0_ == '0') { if (c0_ == '0') {
AddCharAdvance(); AddCharAdvance();
@ -720,21 +750,21 @@ Token::Value Scanner::ScanJsonNumber() {
AddCharAdvance(); AddCharAdvance();
} while (c0_ >= '0' && c0_ <= '9'); } while (c0_ >= '0' && c0_ <= '9');
} }
TerminateLiteral(); literal.Complete();
return Token::NUMBER; return Token::NUMBER;
} }
Token::Value Scanner::ScanJsonIdentifier(const char* text, Token::Value Scanner::ScanJsonIdentifier(const char* text,
Token::Value token) { Token::Value token) {
StartLiteral(); LiteralScope literal(this);
while (*text != '\0') { while (*text != '\0') {
if (c0_ != *text) return Token::ILLEGAL; if (c0_ != *text) return Token::ILLEGAL;
Advance(); Advance();
text++; text++;
} }
if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
TerminateLiteral(); literal.Complete();
return token; return token;
} }
@ -1077,7 +1107,7 @@ Token::Value Scanner::ScanString() {
uc32 quote = c0_; uc32 quote = c0_;
Advance(); // consume quote Advance(); // consume quote
StartLiteral(); LiteralScope literal(this);
while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
uc32 c = c0_; uc32 c = c0_;
Advance(); Advance();
@ -1088,10 +1118,8 @@ Token::Value Scanner::ScanString() {
AddChar(c); AddChar(c);
} }
} }
if (c0_ != quote) { if (c0_ != quote) return Token::ILLEGAL;
return Token::ILLEGAL; literal.Complete();
}
TerminateLiteral();
Advance(); // consume quote Advance(); // consume quote
return Token::STRING; return Token::STRING;
@ -1127,7 +1155,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
StartLiteral(); LiteralScope literal(this);
if (seen_period) { if (seen_period) {
// we have already seen a decimal point of the float // we have already seen a decimal point of the float
AddChar('.'); AddChar('.');
@ -1143,12 +1171,13 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
// hex number // hex number
kind = HEX; kind = HEX;
AddCharAdvance(); AddCharAdvance();
if (!IsHexDigit(c0_)) if (!IsHexDigit(c0_)) {
// we must have at least one hex digit after 'x'/'X' // we must have at least one hex digit after 'x'/'X'
return Token::ILLEGAL; return Token::ILLEGAL;
while (IsHexDigit(c0_)) }
while (IsHexDigit(c0_)) {
AddCharAdvance(); AddCharAdvance();
}
} else if ('0' <= c0_ && c0_ <= '7') { } else if ('0' <= c0_ && c0_ <= '7') {
// (possible) octal number // (possible) octal number
kind = OCTAL; kind = OCTAL;
@ -1181,12 +1210,12 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
AddCharAdvance(); AddCharAdvance();
if (c0_ == '+' || c0_ == '-') if (c0_ == '+' || c0_ == '-')
AddCharAdvance(); AddCharAdvance();
if (!IsDecimalDigit(c0_)) if (!IsDecimalDigit(c0_)) {
// we must have at least one decimal digit after 'e'/'E' // we must have at least one decimal digit after 'e'/'E'
return Token::ILLEGAL; return Token::ILLEGAL;
}
ScanDecimalDigits(); ScanDecimalDigits();
} }
TerminateLiteral();
// The source character immediately following a numeric literal must // The source character immediately following a numeric literal must
// not be an identifier start or a decimal digit; see ECMA-262 // not be an identifier start or a decimal digit; see ECMA-262
@ -1195,6 +1224,8 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))
return Token::ILLEGAL; return Token::ILLEGAL;
literal.Complete();
return Token::NUMBER; return Token::NUMBER;
} }
@ -1214,7 +1245,7 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
Token::Value Scanner::ScanIdentifier() { Token::Value Scanner::ScanIdentifier() {
ASSERT(kIsIdentifierStart.get(c0_)); ASSERT(kIsIdentifierStart.get(c0_));
StartLiteral(); LiteralScope literal(this);
KeywordMatcher keyword_match; KeywordMatcher keyword_match;
// Scan identifier start character. // Scan identifier start character.
@ -1244,7 +1275,7 @@ Token::Value Scanner::ScanIdentifier() {
Advance(); Advance();
} }
} }
TerminateLiteral(); literal.Complete();
return keyword_match.token(); return keyword_match.token();
} }
@ -1274,36 +1305,32 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
// the scanner should pass uninterpreted bodies to the RegExp // the scanner should pass uninterpreted bodies to the RegExp
// constructor. // constructor.
StartLiteral(); LiteralScope literal(this);
if (seen_equal) if (seen_equal)
AddChar('='); AddChar('=');
while (c0_ != '/' || in_character_class) { while (c0_ != '/' || in_character_class) {
if (kIsLineTerminator.get(c0_) || c0_ < 0) if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
return false;
if (c0_ == '\\') { // escaped character if (c0_ == '\\') { // escaped character
AddCharAdvance(); AddCharAdvance();
if (kIsLineTerminator.get(c0_) || c0_ < 0) if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
return false;
AddCharAdvance(); AddCharAdvance();
} else { // unescaped character } else { // unescaped character
if (c0_ == '[') if (c0_ == '[') in_character_class = true;
in_character_class = true; if (c0_ == ']') in_character_class = false;
if (c0_ == ']')
in_character_class = false;
AddCharAdvance(); AddCharAdvance();
} }
} }
Advance(); // consume '/' Advance(); // consume '/'
TerminateLiteral(); literal.Complete();
return true; return true;
} }
bool Scanner::ScanRegExpFlags() { bool Scanner::ScanRegExpFlags() {
// Scan regular expression flags. // Scan regular expression flags.
StartLiteral(); LiteralScope literal(this);
while (kIsIdentifierPart.get(c0_)) { while (kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') { if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape(); uc32 c = ScanIdentifierUnicodeEscape();
@ -1316,7 +1343,7 @@ bool Scanner::ScanRegExpFlags() {
} }
AddCharAdvance(); AddCharAdvance();
} }
TerminateLiteral(); literal.Complete();
next_.location.end_pos = source_pos() - 1; next_.location.end_pos = source_pos() - 1;
return true; return true;

View File

@ -58,6 +58,14 @@ class UTF8Buffer {
return Vector<const char>(sequence.start(), sequence.length()); return Vector<const char>(sequence.start(), sequence.length());
} }
void DropLiteral() {
buffer_.DropSequence();
}
void Reset() {
buffer_.Reset();
}
// The end marker added after a parsed literal. // The end marker added after a parsed literal.
// Using zero allows the usage of strlen and similar functions on // Using zero allows the usage of strlen and similar functions on
// identifiers and numbers (but not strings, since they may contain zero // identifiers and numbers (but not strings, since they may contain zero
@ -262,6 +270,17 @@ class Scanner {
public: public:
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
class LiteralScope {
public:
explicit LiteralScope(Scanner* self);
~LiteralScope();
void Complete();
private:
Scanner* scanner_;
bool complete_;
};
// Construction // Construction
explicit Scanner(ParserMode parse_mode); explicit Scanner(ParserMode parse_mode);
@ -382,6 +401,8 @@ class Scanner {
inline void AddChar(uc32 ch); inline void AddChar(uc32 ch);
inline void AddCharAdvance(); inline void AddCharAdvance();
inline void TerminateLiteral(); inline void TerminateLiteral();
// Stops scanning of a literal, e.g., due to an encountered error.
inline void DropLiteral();
// Low-level scanning support. // Low-level scanning support.
void Advance() { c0_ = source_->Advance(); } void Advance() { c0_ = source_->Advance(); }

View File

@ -560,6 +560,15 @@ class Collector {
return Vector<T>(new_store, total_length); return Vector<T>(new_store, total_length);
} }
// Resets the collector to be empty.
virtual void Reset() {
for (int i = chunks_.length() - 1; i >= 0; i--) {
chunks_.at(i).Dispose();
}
chunks_.Rewind(0);
index_ = 0;
}
protected: protected:
static const int kMinCapacity = 16; static const int kMinCapacity = 16;
List<Vector<T> > chunks_; List<Vector<T> > chunks_;
@ -632,6 +641,18 @@ class SequenceCollector : public Collector<T> {
this->index_ - sequence_start); this->index_ - sequence_start);
} }
// Drops the currently added sequence, and all collected elements in it.
void DropSequence() {
ASSERT(sequence_start_ != kNoSequence);
this->index_ = sequence_start_;
sequence_start_ = kNoSequence;
}
virtual void Reset() {
sequence_start_ = kNoSequence;
this->Collector<T>::Reset();
}
private: private:
static const int kNoSequence = -1; static const int kNoSequence = -1;
int sequence_start_; int sequence_start_;