From c1efaa01fe399aba87748bdd33f3b300c0609e42 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Tue, 10 Nov 2009 10:23:23 +0000 Subject: [PATCH] Reapply 3246 and 3247 after fixing valgrind warning. Review URL: http://codereview.chromium.org/386001 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@3257 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/scanner.cc | 30 +++++++++++++++--------------- src/scanner.h | 43 +++++++++++++++++++++++++++++++------------ 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 23b8aff9c4..0d3b789f9b 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -49,17 +49,11 @@ StaticResource Scanner::utf8_decoder_; // ---------------------------------------------------------------------------- // UTF8Buffer -UTF8Buffer::UTF8Buffer() { - static const int kInitialCapacity = 1 * KB; - data_ = NewArray(kInitialCapacity); - limit_ = ComputeLimit(data_, kInitialCapacity); - Reset(); - ASSERT(Capacity() == kInitialCapacity && pos() == 0); -} +UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { } UTF8Buffer::~UTF8Buffer() { - DeleteArray(data_); + if (data_ != NULL) DeleteArray(data_); } @@ -69,7 +63,7 @@ void UTF8Buffer::AddCharSlow(uc32 c) { int old_capacity = Capacity(); int old_position = pos(); int new_capacity = - Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit); + Min(old_capacity * 3, old_capacity + kCapacityGrowthLimit); char* new_data = NewArray(new_capacity); memcpy(new_data, data_, old_position); DeleteArray(data_); @@ -346,12 +340,11 @@ void Scanner::Init(Handle source, unibrow::CharacterStream* stream, position_ = position; - // Reset literals buffer - literals_.Reset(); - // Set c0_ (one character ahead) ASSERT(kCharacterLookaheadBufferSize == 1); Advance(); + // Initializer current_ to not refer to a literal buffer. + current_.literal_buffer = NULL; // Skip initial whitespace allowing HTML comment ends just like // after a newline and scan first token. @@ -384,17 +377,23 @@ Token::Value Scanner::Next() { void Scanner::StartLiteral() { - next_.literal_pos = literals_.pos(); + // Use the first buffer unless it's currently in use by the current_ token. + // In most cases we won't have two literals/identifiers in a row, so + // the second buffer won't be used very often and is unlikely to grow much. + UTF8Buffer* free_buffer = + (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_ + : &literal_buffer_2_; + next_.literal_buffer = free_buffer; + free_buffer->Reset(); } void Scanner::AddChar(uc32 c) { - literals_.AddChar(c); + next_.literal_buffer->AddChar(c); } void Scanner::TerminateLiteral() { - next_.literal_end = literals_.pos(); AddChar(0); } @@ -514,6 +513,7 @@ Token::Value Scanner::ScanHtmlComment() { void Scanner::Scan() { + next_.literal_buffer = NULL; Token::Value token; has_line_terminator_before_next_ = false; do { diff --git a/src/scanner.h b/src/scanner.h index 201803da5e..dc903cd6af 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -41,6 +41,7 @@ class UTF8Buffer { ~UTF8Buffer(); void AddChar(uc32 c) { + ASSERT_NOT_NULL(data_); if (cursor_ <= limit_ && static_cast(c) <= unibrow::Utf8::kMaxOneByteChar) { *cursor_++ = static_cast(c); @@ -49,16 +50,29 @@ class UTF8Buffer { } } - void Reset() { cursor_ = data_; } - int pos() const { return cursor_ - data_; } + void Reset() { + if (data_ == NULL) { + data_ = NewArray(kInitialCapacity); + limit_ = ComputeLimit(data_, kInitialCapacity); + } + cursor_ = data_; + } + + int pos() const { + ASSERT_NOT_NULL(data_); + return cursor_ - data_; + } + char* data() const { return data_; } private: + static const int kInitialCapacity = 256; char* data_; char* cursor_; char* limit_; int Capacity() const { + ASSERT_NOT_NULL(data_); return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; } @@ -278,26 +292,30 @@ class Scanner { // token returned by Next()). The string is 0-terminated and in // UTF-8 format; they may contain 0-characters. Literal strings are // collected for identifiers, strings, and numbers. + // These functions only give the correct result if the literal + // was scanned between calls to StartLiteral() and TerminateLiteral(). const char* literal_string() const { - return &literals_.data()[current_.literal_pos]; + return current_.literal_buffer->data(); } int literal_length() const { - return current_.literal_end - current_.literal_pos; - } - - Vector next_literal() const { - return Vector(next_literal_string(), next_literal_length()); + // Excluding terminal '\0' added by TerminateLiteral(). + return current_.literal_buffer->pos() - 1; } // Returns the literal string for the next token (the token that // would be returned if Next() were called). const char* next_literal_string() const { - return &literals_.data()[next_.literal_pos]; + return next_.literal_buffer->data(); } // Returns the length of the next token (that would be returned if // Next() were called). int next_literal_length() const { - return next_.literal_end - next_.literal_pos; + return next_.literal_buffer->pos() - 1; + } + + Vector next_literal() const { + return Vector(next_literal_string(), + next_literal_length()); } // Scans the input as a regular expression pattern, previous @@ -339,7 +357,8 @@ class Scanner { // Buffer to hold literal values (identifiers, strings, numbers) // using 0-terminated UTF-8 encoding. - UTF8Buffer literals_; + UTF8Buffer literal_buffer_1_; + UTF8Buffer literal_buffer_2_; bool stack_overflow_; static StaticResource utf8_decoder_; @@ -351,7 +370,7 @@ class Scanner { struct TokenDesc { Token::Value token; Location location; - int literal_pos, literal_end; + UTF8Buffer* literal_buffer; }; TokenDesc current_; // desc for current token (as returned by Next())