Rename scanner.* to scanner-character-streams.*. and scanner-base.* to scanner.*

R=lrn@chromium.org Signed-off-by: Thiago Farina <tfarina@chromium.org> Review URL: http://codereview.chromium.org/7739020 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@9195 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
2011-09-08 13:06:44 +00:00 · 2011-09-08 13:06:44 +00:00 · 689f3cb314
commit 689f3cb314
parent 17d3f54b09
24 changed files with 2012 additions and 2006 deletions
--- a/src/SConscript
+++ b/src/SConscript
@ -111,8 +111,8 @@ SOURCES = {
    runtime.cc
    runtime-profiler.cc
    safepoint-table.cc
-    scanner-base.cc
    scanner.cc
+    scanner-character-streams.cc
    scopeinfo.cc
    scopes.cc
    serialize.cc
@ -245,7 +245,7 @@ PREPARSER_SOURCES = {
    preparse-data.cc
    preparser.cc
    preparser-api.cc
-    scanner-base.cc
+    scanner.cc
    strtod.cc
    token.cc
    unicode.cc
--- a/src/api.cc
+++ b/src/api.cc
@ -44,6 +44,7 @@
 #include "platform.h"
 #include "profile-generator-inl.h"
 #include "runtime-profiler.h"
+#include "scanner-character-streams.h"
 #include "serialize.h"
 #include "snapshot.h"
 #include "v8threads.h"
--- a/src/compiler.cc
+++ b/src/compiler.cc
@ -41,6 +41,7 @@
 #include "parser.h"
 #include "rewriter.h"
 #include "runtime-profiler.h"
+#include "scanner-character-streams.h"
 #include "scopeinfo.h"
 #include "scopes.h"
 #include "vm-state-inl.h"
--- a/src/conversions-inl.h
+++ b/src/conversions-inl.h
@ -38,9 +38,10 @@
 // Extra POSIX/ANSI functions for Win32/MSVC.

 #include "conversions.h"
-#include "strtod.h"
-#include "platform.h"
 #include "double.h"
+#include "platform.h"
+#include "scanner.h"
+#include "strtod.h"

 namespace v8 {
 namespace internal {
--- a/src/conversions.cc
+++ b/src/conversions.cc
@ -31,7 +31,6 @@

 #include "conversions-inl.h"
 #include "dtoa.h"
-#include "scanner-base.h"
 #include "strtod.h"
 #include "utils.h"

--- a/src/conversions.h
+++ b/src/conversions.h
@ -30,11 +30,13 @@

 #include <limits>

-#include "scanner-base.h"
+#include "utils.h"

 namespace v8 {
 namespace internal {

+class UnicodeCache;
+
 // Maximum number of significant digits in decimal representation.
 // The longest possible double in decimal representation is
 // (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074
--- a/src/dateparser.h
+++ b/src/dateparser.h
@ -30,7 +30,6 @@

 #include "allocation.h"
 #include "char-predicates-inl.h"
-#include "scanner-base.h"

 namespace v8 {
 namespace internal {
--- a/src/heap.cc
+++ b/src/heap.cc
@ -41,7 +41,6 @@
 #include "natives.h"
 #include "objects-visiting.h"
 #include "runtime-profiler.h"
-#include "scanner-base.h"
 #include "scopeinfo.h"
 #include "snapshot.h"
 #include "v8threads.h"
--- a/src/isolate.cc
+++ b/src/isolate.cc
@ -43,7 +43,6 @@
 #include "messages.h"
 #include "regexp-stack.h"
 #include "runtime-profiler.h"
-#include "scanner.h"
 #include "scopeinfo.h"
 #include "serialize.h"
 #include "simulator.h"
--- a/src/objects.cc
+++ b/src/objects.cc
@ -41,7 +41,6 @@
 #include "objects-visiting.h"
 #include "macro-assembler.h"
 #include "safepoint-table.h"
-#include "scanner-base.h"
 #include "string-stream.h"
 #include "utils.h"
 #include "vm-state-inl.h"
--- a/src/parser.cc
+++ b/src/parser.cc
@ -39,6 +39,7 @@
 #include "platform.h"
 #include "preparser.h"
 #include "runtime.h"
+#include "scanner-character-streams.h"
 #include "scopeinfo.h"
 #include "string-stream.h"

--- a/src/parser.h
+++ b/src/parser.h
@ -30,10 +30,9 @@

 #include "allocation.h"
 #include "ast.h"
-#include "scanner.h"
-#include "scopes.h"
 #include "preparse-data-format.h"
 #include "preparse-data.h"
+#include "scopes.h"

 namespace v8 {
 namespace internal {
--- a/src/preparser-api.cc
+++ b/src/preparser-api.cc
@ -38,7 +38,6 @@
 #include "utils.h"
 #include "list.h"
 #include "hashmap.h"
-#include "scanner-base.h"
 #include "preparse-data-format.h"
 #include "preparse-data.h"
 #include "preparser.h"
--- a/src/preparser.cc
+++ b/src/preparser.cc
@ -28,21 +28,19 @@
 #include <math.h>

 #include "../include/v8stdint.h"
-#include "unicode.h"
-#include "globals.h"
-#include "checks.h"
-#include "allocation.h"
-#include "utils.h"
-#include "list.h"
-#include "conversions.h"
-#include "hashmap.h"

-#include "scanner-base.h"
+#include "allocation.h"
+#include "checks.h"
+#include "conversions.h"
+#include "conversions-inl.h"
+#include "globals.h"
+#include "hashmap.h"
+#include "list.h"
 #include "preparse-data-format.h"
 #include "preparse-data.h"
 #include "preparser.h"
-
-#include "conversions-inl.h"
+#include "unicode.h"
+#include "utils.h"

 namespace v8 {

--- a/src/preparser.h
+++ b/src/preparser.h
@ -28,7 +28,15 @@
 #ifndef V8_PREPARSER_H
 #define V8_PREPARSER_H

+#include "token.h"
+#include "scanner.h"
+
 namespace v8 {
+
+namespace internal {
+class UnicodeCache;
+}
+
 namespace preparser {

 typedef uint8_t byte;
@ -106,7 +114,7 @@ class PreParser {
    kPreParseSuccess
  };

-  ~PreParser() { }
+  ~PreParser() {}

  // Pre-parse the program from the character stream; returns true on
  // success (even if parsing failed, the pre-parse data successfully
--- a/src/scanner-base.cc
+++ b/src/scanner-base.cc
--- a/src/scanner-base.h
+++ b/src/scanner-base.h
@ -1,564 +0,0 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-//       copyright notice, this list of conditions and the following
-//       disclaimer in the documentation and/or other materials provided
-//       with the distribution.
-//     * Neither the name of Google Inc. nor the names of its
-//       contributors may be used to endorse or promote products derived
-//       from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// Features shared by parsing and pre-parsing scanners.
-
-#ifndef V8_SCANNER_BASE_H_
-#define V8_SCANNER_BASE_H_
-
-#include "allocation.h"
-#include "char-predicates.h"
-#include "checks.h"
-#include "globals.h"
-#include "token.h"
-#include "unicode-inl.h"
-#include "utils.h"
-
-namespace v8 {
-namespace internal {
-
-// Returns the value (0 .. 15) of a hexadecimal character c.
-// If c is not a legal hexadecimal character, returns a value < 0.
-inline int HexValue(uc32 c) {
-  c -= '0';
-  if (static_cast<unsigned>(c) <= 9) return c;
-  c = (c | 0x20) - ('a' - '0');  // detect 0x11..0x16 and 0x31..0x36.
-  if (static_cast<unsigned>(c) <= 5) return c + 10;
-  return -1;
-}
-
-
-// ---------------------------------------------------------------------
-// Buffered stream of characters, using an internal UC16 buffer.
-
-class UC16CharacterStream {
- public:
-  UC16CharacterStream() : pos_(0) { }
-  virtual ~UC16CharacterStream() { }
-
-  // Returns and advances past the next UC16 character in the input
-  // stream. If there are no more characters, it returns a negative
-  // value.
-  inline uc32 Advance() {
-    if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
-      pos_++;
-      return static_cast<uc32>(*(buffer_cursor_++));
-    }
-    // Note: currently the following increment is necessary to avoid a
-    // parser problem! The scanner treats the final kEndOfInput as
-    // a character with a position, and does math relative to that
-    // position.
-    pos_++;
-
-    return kEndOfInput;
-  }
-
-  // Return the current position in the character stream.
-  // Starts at zero.
-  inline unsigned pos() const { return pos_; }
-
-  // Skips forward past the next character_count UC16 characters
-  // in the input, or until the end of input if that comes sooner.
-  // Returns the number of characters actually skipped. If less
-  // than character_count,
-  inline unsigned SeekForward(unsigned character_count) {
-    unsigned buffered_chars =
-        static_cast<unsigned>(buffer_end_ - buffer_cursor_);
-    if (character_count <= buffered_chars) {
-      buffer_cursor_ += character_count;
-      pos_ += character_count;
-      return character_count;
-    }
-    return SlowSeekForward(character_count);
-  }
-
-  // Pushes back the most recently read UC16 character (or negative
-  // value if at end of input), i.e., the value returned by the most recent
-  // call to Advance.
-  // Must not be used right after calling SeekForward.
-  virtual void PushBack(int32_t character) = 0;
-
- protected:
-  static const uc32 kEndOfInput = -1;
-
-  // Ensures that the buffer_cursor_ points to the character at
-  // position pos_ of the input, if possible. If the position
-  // is at or after the end of the input, return false. If there
-  // are more characters available, return true.
-  virtual bool ReadBlock() = 0;
-  virtual unsigned SlowSeekForward(unsigned character_count) = 0;
-
-  const uc16* buffer_cursor_;
-  const uc16* buffer_end_;
-  unsigned pos_;
-};
-
-
-class UnicodeCache {
-// ---------------------------------------------------------------------
-// Caching predicates used by scanners.
- public:
-  UnicodeCache() {}
-  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
-
-  StaticResource<Utf8Decoder>* utf8_decoder() {
-    return &utf8_decoder_;
-  }
-
-  bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); }
-  bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); }
-  bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
-  bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
-
- private:
-
-  unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
-  unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
-  unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
-  unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
-  StaticResource<Utf8Decoder> utf8_decoder_;
-
-  DISALLOW_COPY_AND_ASSIGN(UnicodeCache);
-};
-
-
-// ----------------------------------------------------------------------------
-// LiteralBuffer -  Collector of chars of literals.
-
-class LiteralBuffer {
- public:
-  LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }
-
-  ~LiteralBuffer() {
-    if (backing_store_.length() > 0) {
-      backing_store_.Dispose();
-    }
-  }
-
-  inline void AddChar(uc16 character) {
-    if (position_ >= backing_store_.length()) ExpandBuffer();
-    if (is_ascii_) {
-      if (character < kMaxAsciiCharCodeU) {
-        backing_store_[position_] = static_cast<byte>(character);
-        position_ += kASCIISize;
-        return;
-      }
-      ConvertToUC16();
-    }
-    *reinterpret_cast<uc16*>(&backing_store_[position_]) = character;
-    position_ += kUC16Size;
-  }
-
-  bool is_ascii() { return is_ascii_; }
-
-  Vector<const uc16> uc16_literal() {
-    ASSERT(!is_ascii_);
-    ASSERT((position_ & 0x1) == 0);
-    return Vector<const uc16>(
-        reinterpret_cast<const uc16*>(backing_store_.start()),
-        position_ >> 1);
-  }
-
-  Vector<const char> ascii_literal() {
-    ASSERT(is_ascii_);
-    return Vector<const char>(
-        reinterpret_cast<const char*>(backing_store_.start()),
-        position_);
-  }
-
-  int length() {
-    return is_ascii_ ? position_ : (position_ >> 1);
-  }
-
-  void Reset() {
-    position_ = 0;
-    is_ascii_ = true;
-  }
- private:
-  static const int kInitialCapacity = 16;
-  static const int kGrowthFactory = 4;
-  static const int kMinConversionSlack = 256;
-  static const int kMaxGrowth = 1 * MB;
-  inline int NewCapacity(int min_capacity) {
-    int capacity = Max(min_capacity, backing_store_.length());
-    int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
-    return new_capacity;
-  }
-
-  void ExpandBuffer() {
-    Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
-    memcpy(new_store.start(), backing_store_.start(), position_);
-    backing_store_.Dispose();
-    backing_store_ = new_store;
-  }
-
-  void ConvertToUC16() {
-    ASSERT(is_ascii_);
-    Vector<byte> new_store;
-    int new_content_size = position_ * kUC16Size;
-    if (new_content_size >= backing_store_.length()) {
-      // Ensure room for all currently read characters as UC16 as well
-      // as the character about to be stored.
-      new_store = Vector<byte>::New(NewCapacity(new_content_size));
-    } else {
-      new_store = backing_store_;
-    }
-    char* src = reinterpret_cast<char*>(backing_store_.start());
-    uc16* dst = reinterpret_cast<uc16*>(new_store.start());
-    for (int i = position_ - 1; i >= 0; i--) {
-      dst[i] = src[i];
-    }
-    if (new_store.start() != backing_store_.start()) {
-      backing_store_.Dispose();
-      backing_store_ = new_store;
-    }
-    position_ = new_content_size;
-    is_ascii_ = false;
-  }
-
-  bool is_ascii_;
-  int position_;
-  Vector<byte> backing_store_;
-
-  DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
-};
-
-
-// ----------------------------------------------------------------------------
-// Scanner base-class.
-
-// Generic functionality used by both JSON and JavaScript scanners.
-class Scanner {
- public:
-  // -1 is outside of the range of any real source code.
-  static const int kNoOctalLocation = -1;
-
-  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
-
-  class LiteralScope {
-   public:
-    explicit LiteralScope(Scanner* self);
-    ~LiteralScope();
-    void Complete();
-
-   private:
-    Scanner* scanner_;
-    bool complete_;
-  };
-
-  explicit Scanner(UnicodeCache* scanner_contants);
-
-  // Returns the current token again.
-  Token::Value current_token() { return current_.token; }
-
-  // One token look-ahead (past the token returned by Next()).
-  Token::Value peek() const { return next_.token; }
-
-  struct Location {
-    Location(int b, int e) : beg_pos(b), end_pos(e) { }
-    Location() : beg_pos(0), end_pos(0) { }
-
-    bool IsValid() const {
-      return beg_pos >= 0 && end_pos >= beg_pos;
-    }
-
-    static Location invalid() { return Location(-1, -1); }
-
-    int beg_pos;
-    int end_pos;
-  };
-
-  // Returns the location information for the current token
-  // (the token returned by Next()).
-  Location location() const { return current_.location; }
-  Location peek_location() const { return next_.location; }
-
-  // Returns the literal string, if any, for the current token (the
-  // token returned by Next()). The string is 0-terminated and in
-  // UTF-8 format; they may contain 0-characters. Literal strings are
-  // collected for identifiers, strings, and numbers.
-  // These functions only give the correct result if the literal
-  // was scanned between calls to StartLiteral() and TerminateLiteral().
-  bool is_literal_ascii() {
-    ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->is_ascii();
-  }
-  Vector<const char> literal_ascii_string() {
-    ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->ascii_literal();
-  }
-  Vector<const uc16> literal_uc16_string() {
-    ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->uc16_literal();
-  }
-  int literal_length() const {
-    ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->length();
-  }
-
-  bool literal_contains_escapes() const {
-    Location location = current_.location;
-    int source_length = (location.end_pos - location.beg_pos);
-    if (current_.token == Token::STRING) {
-      // Subtract delimiters.
-      source_length -= 2;
-    }
-    return current_.literal_chars->length() != source_length;
-  }
-
-  // Returns the literal string for the next token (the token that
-  // would be returned if Next() were called).
-  bool is_next_literal_ascii() {
-    ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->is_ascii();
-  }
-  Vector<const char> next_literal_ascii_string() {
-    ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->ascii_literal();
-  }
-  Vector<const uc16> next_literal_uc16_string() {
-    ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->uc16_literal();
-  }
-  int next_literal_length() const {
-    ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->length();
-  }
-
-  UnicodeCache* unicode_cache() { return unicode_cache_; }
-
-  static const int kCharacterLookaheadBufferSize = 1;
-
- protected:
-  // The current and look-ahead token.
-  struct TokenDesc {
-    Token::Value token;
-    Location location;
-    LiteralBuffer* literal_chars;
-  };
-
-  // Call this after setting source_ to the input.
-  void Init() {
-    // Set c0_ (one character ahead)
-    STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
-    Advance();
-    // Initialize current_ to not refer to a literal.
-    current_.literal_chars = NULL;
-  }
-
-  // Literal buffer support
-  inline void StartLiteral() {
-    LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?
-            &literal_buffer2_ : &literal_buffer1_;
-    free_buffer->Reset();
-    next_.literal_chars = free_buffer;
-  }
-
-  inline void AddLiteralChar(uc32 c) {
-    ASSERT_NOT_NULL(next_.literal_chars);
-    next_.literal_chars->AddChar(c);
-  }
-
-  // Complete scanning of a literal.
-  inline void TerminateLiteral() {
-    // Does nothing in the current implementation.
-  }
-
-  // Stops scanning of a literal and drop the collected characters,
-  // e.g., due to an encountered error.
-  inline void DropLiteral() {
-    next_.literal_chars = NULL;
-  }
-
-  inline void AddLiteralCharAdvance() {
-    AddLiteralChar(c0_);
-    Advance();
-  }
-
-  // Low-level scanning support.
-  void Advance() { c0_ = source_->Advance(); }
-  void PushBack(uc32 ch) {
-    source_->PushBack(c0_);
-    c0_ = ch;
-  }
-
-  inline Token::Value Select(Token::Value tok) {
-    Advance();
-    return tok;
-  }
-
-  inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
-    Advance();
-    if (c0_ == next) {
-      Advance();
-      return then;
-    } else {
-      return else_;
-    }
-  }
-
-  uc32 ScanHexNumber(int expected_length);
-
-  // Return the current source position.
-  int source_pos() {
-    return source_->pos() - kCharacterLookaheadBufferSize;
-  }
-
-  UnicodeCache* unicode_cache_;
-
-  // Buffers collecting literal strings, numbers, etc.
-  LiteralBuffer literal_buffer1_;
-  LiteralBuffer literal_buffer2_;
-
-  TokenDesc current_;  // desc for current token (as returned by Next())
-  TokenDesc next_;     // desc for next token (one token look-ahead)
-
-  // Input stream. Must be initialized to an UC16CharacterStream.
-  UC16CharacterStream* source_;
-
-  // One Unicode character look-ahead; c0_ < 0 at the end of the input.
-  uc32 c0_;
-};
-
-// ----------------------------------------------------------------------------
-// JavaScriptScanner - base logic for JavaScript scanning.
-
-class JavaScriptScanner : public Scanner {
- public:
-  // A LiteralScope that disables recording of some types of JavaScript
-  // literals. If the scanner is configured to not record the specific
-  // type of literal, the scope will not call StartLiteral.
-  class LiteralScope {
-   public:
-    explicit LiteralScope(JavaScriptScanner* self)
-        : scanner_(self), complete_(false) {
-      scanner_->StartLiteral();
-    }
-     ~LiteralScope() {
-       if (!complete_) scanner_->DropLiteral();
-     }
-    void Complete() {
-      scanner_->TerminateLiteral();
-      complete_ = true;
-    }
-
-   private:
-    JavaScriptScanner* scanner_;
-    bool complete_;
-  };
-
-  explicit JavaScriptScanner(UnicodeCache* scanner_contants);
-
-  void Initialize(UC16CharacterStream* source);
-
-  // Returns the next token.
-  Token::Value Next();
-
-  // Returns true if there was a line terminator before the peek'ed token,
-  // possibly inside a multi-line comment.
-  bool HasAnyLineTerminatorBeforeNext() const {
-    return has_line_terminator_before_next_ ||
-           has_multiline_comment_before_next_;
-  }
-
-  // Scans the input as a regular expression pattern, previous
-  // character(s) must be /(=). Returns true if a pattern is scanned.
-  bool ScanRegExpPattern(bool seen_equal);
-  // Returns true if regexp flags are scanned (always since flags can
-  // be empty).
-  bool ScanRegExpFlags();
-
-  // Tells whether the buffer contains an identifier (no escapes).
-  // Used for checking if a property name is an identifier.
-  static bool IsIdentifier(unibrow::CharacterStream* buffer);
-
-  // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
-  uc32 ScanOctalEscape(uc32 c, int length);
-
-  // Returns the location of the last seen octal literal
-  Location octal_position() const { return octal_pos_; }
-  void clear_octal_position() { octal_pos_ = Location::invalid(); }
-
-  // Seek forward to the given position.  This operation does not
-  // work in general, for instance when there are pushed back
-  // characters, but works for seeking forward until simple delimiter
-  // tokens, which is what it is used for.
-  void SeekForward(int pos);
-
-  bool HarmonyBlockScoping() const {
-    return harmony_block_scoping_;
-  }
-  void SetHarmonyBlockScoping(bool block_scoping) {
-    harmony_block_scoping_ = block_scoping;
-  }
-
-
- protected:
-  bool SkipWhiteSpace();
-  Token::Value SkipSingleLineComment();
-  Token::Value SkipMultiLineComment();
-
-  // Scans a single JavaScript token.
-  void Scan();
-
-  void ScanDecimalDigits();
-  Token::Value ScanNumber(bool seen_period);
-  Token::Value ScanIdentifierOrKeyword();
-  Token::Value ScanIdentifierSuffix(LiteralScope* literal);
-
-  void ScanEscape();
-  Token::Value ScanString();
-
-  // Scans a possible HTML comment -- begins with '<!'.
-  Token::Value ScanHtmlComment();
-
-  // Decodes a unicode escape-sequence which is part of an identifier.
-  // If the escape sequence cannot be decoded the result is kBadChar.
-  uc32 ScanIdentifierUnicodeEscape();
-  // Recognizes a uniocde escape-sequence and adds its characters,
-  // uninterpreted, to the current literal. Used for parsing RegExp
-  // flags.
-  bool ScanLiteralUnicodeEscape();
-
-  // Start position of the octal literal last scanned.
-  Location octal_pos_;
-
-  // Whether there is a line terminator whitespace character after
-  // the current token, and  before the next. Does not count newlines
-  // inside multiline comments.
-  bool has_line_terminator_before_next_;
-  // Whether there is a multi-line comment that contains a
-  // line-terminator after the current token, and before the next.
-  bool has_multiline_comment_before_next_;
-  // Whether we scan 'let' as a keyword for harmony block scoped
-  // let bindings.
-  bool harmony_block_scoping_;
-};
-
-} }  // namespace v8::internal
-
-#endif  // V8_SCANNER_BASE_H_
--- a/src/scanner-character-streams.cc
+++ b/src/scanner-character-streams.cc
@ -0,0 +1,328 @@
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "v8.h"
+
+#include "scanner-character-streams.h"
+
+#include "ast.h"
+#include "handles.h"
+#include "unicode-inl.h"
+
+namespace v8 {
+namespace internal {
+
+// ----------------------------------------------------------------------------
+// BufferedUC16CharacterStreams
+
+BufferedUC16CharacterStream::BufferedUC16CharacterStream()
+    : UC16CharacterStream(),
+      pushback_limit_(NULL) {
+  // Initialize buffer as being empty. First read will fill the buffer.
+  buffer_cursor_ = buffer_;
+  buffer_end_ = buffer_;
+}
+
+BufferedUC16CharacterStream::~BufferedUC16CharacterStream() { }
+
+void BufferedUC16CharacterStream::PushBack(uc32 character) {
+  if (character == kEndOfInput) {
+    pos_--;
+    return;
+  }
+  if (pushback_limit_ == NULL && buffer_cursor_ > buffer_) {
+    // buffer_ is writable, buffer_cursor_ is const pointer.
+    buffer_[--buffer_cursor_ - buffer_] = static_cast<uc16>(character);
+    pos_--;
+    return;
+  }
+  SlowPushBack(static_cast<uc16>(character));
+}
+
+
+void BufferedUC16CharacterStream::SlowPushBack(uc16 character) {
+  // In pushback mode, the end of the buffer contains pushback,
+  // and the start of the buffer (from buffer start to pushback_limit_)
+  // contains valid data that comes just after the pushback.
+  // We NULL the pushback_limit_ if pushing all the way back to the
+  // start of the buffer.
+
+  if (pushback_limit_ == NULL) {
+    // Enter pushback mode.
+    pushback_limit_ = buffer_end_;
+    buffer_end_ = buffer_ + kBufferSize;
+    buffer_cursor_ = buffer_end_;
+  }
+  // Ensure that there is room for at least one pushback.
+  ASSERT(buffer_cursor_ > buffer_);
+  ASSERT(pos_ > 0);
+  buffer_[--buffer_cursor_ - buffer_] = character;
+  if (buffer_cursor_ == buffer_) {
+    pushback_limit_ = NULL;
+  } else if (buffer_cursor_ < pushback_limit_) {
+    pushback_limit_ = buffer_cursor_;
+  }
+  pos_--;
+}
+
+
+bool BufferedUC16CharacterStream::ReadBlock() {
+  buffer_cursor_ = buffer_;
+  if (pushback_limit_ != NULL) {
+    // Leave pushback mode.
+    buffer_end_ = pushback_limit_;
+    pushback_limit_ = NULL;
+    // If there were any valid characters left at the
+    // start of the buffer, use those.
+    if (buffer_cursor_ < buffer_end_) return true;
+    // Otherwise read a new block.
+  }
+  unsigned length = FillBuffer(pos_, kBufferSize);
+  buffer_end_ = buffer_ + length;
+  return length > 0;
+}
+
+
+unsigned BufferedUC16CharacterStream::SlowSeekForward(unsigned delta) {
+  // Leave pushback mode (i.e., ignore that there might be valid data
+  // in the buffer before the pushback_limit_ point).
+  pushback_limit_ = NULL;
+  return BufferSeekForward(delta);
+}
+
+// ----------------------------------------------------------------------------
+// GenericStringUC16CharacterStream
+
+
+GenericStringUC16CharacterStream::GenericStringUC16CharacterStream(
+    Handle<String> data,
+    unsigned start_position,
+    unsigned end_position)
+    : string_(data),
+      length_(end_position) {
+  ASSERT(end_position >= start_position);
+  buffer_cursor_ = buffer_;
+  buffer_end_ = buffer_;
+  pos_ = start_position;
+}
+
+
+GenericStringUC16CharacterStream::~GenericStringUC16CharacterStream() { }
+
+
+unsigned GenericStringUC16CharacterStream::BufferSeekForward(unsigned delta) {
+  unsigned old_pos = pos_;
+  pos_ = Min(pos_ + delta, length_);
+  ReadBlock();
+  return pos_ - old_pos;
+}
+
+
+unsigned GenericStringUC16CharacterStream::FillBuffer(unsigned from_pos,
+                                                      unsigned length) {
+  if (from_pos >= length_) return 0;
+  if (from_pos + length > length_) {
+    length = length_ - from_pos;
+  }
+  String::WriteToFlat<uc16>(*string_, buffer_, from_pos, from_pos + length);
+  return length;
+}
+
+
+// ----------------------------------------------------------------------------
+// Utf8ToUC16CharacterStream
+Utf8ToUC16CharacterStream::Utf8ToUC16CharacterStream(const byte* data,
+                                                     unsigned length)
+    : BufferedUC16CharacterStream(),
+      raw_data_(data),
+      raw_data_length_(length),
+      raw_data_pos_(0),
+      raw_character_position_(0) {
+  ReadBlock();
+}
+
+
+Utf8ToUC16CharacterStream::~Utf8ToUC16CharacterStream() { }
+
+
+unsigned Utf8ToUC16CharacterStream::BufferSeekForward(unsigned delta) {
+  unsigned old_pos = pos_;
+  unsigned target_pos = pos_ + delta;
+  SetRawPosition(target_pos);
+  pos_ = raw_character_position_;
+  ReadBlock();
+  return pos_ - old_pos;
+}
+
+
+unsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position,
+                                               unsigned length) {
+  static const unibrow::uchar kMaxUC16Character = 0xffff;
+  SetRawPosition(char_position);
+  if (raw_character_position_ != char_position) {
+    // char_position was not a valid position in the stream (hit the end
+    // while spooling to it).
+    return 0u;
+  }
+  unsigned i = 0;
+  while (i < length) {
+    if (raw_data_pos_ == raw_data_length_) break;
+    unibrow::uchar c = raw_data_[raw_data_pos_];
+    if (c <= unibrow::Utf8::kMaxOneByteChar) {
+      raw_data_pos_++;
+    } else {
+      c =  unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
+                                         raw_data_length_ - raw_data_pos_,
+                                         &raw_data_pos_);
+      // Don't allow characters outside of the BMP.
+      if (c > kMaxUC16Character) {
+        c = unibrow::Utf8::kBadChar;
+      }
+    }
+    buffer_[i++] = static_cast<uc16>(c);
+  }
+  raw_character_position_ = char_position + i;
+  return i;
+}
+
+
+static const byte kUtf8MultiByteMask = 0xC0;
+static const byte kUtf8MultiByteCharStart = 0xC0;
+static const byte kUtf8MultiByteCharFollower = 0x80;
+
+
+#ifdef DEBUG
+static bool IsUtf8MultiCharacterStart(byte first_byte) {
+  return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart;
+}
+#endif
+
+
+static bool IsUtf8MultiCharacterFollower(byte later_byte) {
+  return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower;
+}
+
+
+// Move the cursor back to point at the preceding UTF-8 character start
+// in the buffer.
+static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) {
+  byte character = buffer[--*cursor];
+  if (character > unibrow::Utf8::kMaxOneByteChar) {
+    ASSERT(IsUtf8MultiCharacterFollower(character));
+    // Last byte of a multi-byte character encoding. Step backwards until
+    // pointing to the first byte of the encoding, recognized by having the
+    // top two bits set.
+    while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { }
+    ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor]));
+  }
+}
+
+
+// Move the cursor forward to point at the next following UTF-8 character start
+// in the buffer.
+static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {
+  byte character = buffer[(*cursor)++];
+  if (character > unibrow::Utf8::kMaxOneByteChar) {
+    // First character of a multi-byte character encoding.
+    // The number of most-significant one-bits determines the length of the
+    // encoding:
+    //  110..... - (0xCx, 0xDx) one additional byte (minimum).
+    //  1110.... - (0xEx) two additional bytes.
+    //  11110... - (0xFx) three additional bytes (maximum).
+    ASSERT(IsUtf8MultiCharacterStart(character));
+    // Additional bytes is:
+    // 1 if value in range 0xC0 .. 0xDF.
+    // 2 if value in range 0xE0 .. 0xEF.
+    // 3 if value in range 0xF0 .. 0xF7.
+    // Encode that in a single value.
+    unsigned additional_bytes =
+        ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03;
+    *cursor += additional_bytes;
+    ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes]));
+  }
+}
+
+
+void Utf8ToUC16CharacterStream::SetRawPosition(unsigned target_position) {
+  if (raw_character_position_ > target_position) {
+    // Spool backwards in utf8 buffer.
+    do {
+      Utf8CharacterBack(raw_data_, &raw_data_pos_);
+      raw_character_position_--;
+    } while (raw_character_position_ > target_position);
+    return;
+  }
+  // Spool forwards in the utf8 buffer.
+  while (raw_character_position_ < target_position) {
+    if (raw_data_pos_ == raw_data_length_) return;
+    Utf8CharacterForward(raw_data_, &raw_data_pos_);
+    raw_character_position_++;
+  }
+}
+
+
+// ----------------------------------------------------------------------------
+// ExternalTwoByteStringUC16CharacterStream
+
+ExternalTwoByteStringUC16CharacterStream::
+    ~ExternalTwoByteStringUC16CharacterStream() { }
+
+
+ExternalTwoByteStringUC16CharacterStream
+    ::ExternalTwoByteStringUC16CharacterStream(
+        Handle<ExternalTwoByteString> data,
+        int start_position,
+        int end_position)
+    : UC16CharacterStream(),
+      source_(data),
+      raw_data_(data->GetTwoByteData(start_position)) {
+  buffer_cursor_ = raw_data_,
+  buffer_end_ = raw_data_ + (end_position - start_position);
+  pos_ = start_position;
+}
+
+
+// ----------------------------------------------------------------------------
+// Scanner::LiteralScope
+
+Scanner::LiteralScope::LiteralScope(Scanner* self)
+    : scanner_(self), complete_(false) {
+  self->StartLiteral();
+}
+
+
+Scanner::LiteralScope::~LiteralScope() {
+  if (!complete_) scanner_->DropLiteral();
+}
+
+
+void Scanner::LiteralScope::Complete() {
+  scanner_->TerminateLiteral();
+  complete_ = true;
+}
+
+} }  // namespace v8::internal
--- a/src/scanner-character-streams.h
+++ b/src/scanner-character-streams.h
@ -0,0 +1,129 @@
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+//       notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+//       copyright notice, this list of conditions and the following
+//       disclaimer in the documentation and/or other materials provided
+//       with the distribution.
+//     * Neither the name of Google Inc. nor the names of its
+//       contributors may be used to endorse or promote products derived
+//       from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_SCANNER_CHARACTER_STREAMS_H_
+#define V8_SCANNER_CHARACTER_STREAMS_H_
+
+#include "scanner.h"
+
+namespace v8 {
+namespace internal {
+
+// A buffered character stream based on a random access character
+// source (ReadBlock can be called with pos_ pointing to any position,
+// even positions before the current).
+class BufferedUC16CharacterStream: public UC16CharacterStream {
+ public:
+  BufferedUC16CharacterStream();
+  virtual ~BufferedUC16CharacterStream();
+
+  virtual void PushBack(uc32 character);
+
+ protected:
+  static const unsigned kBufferSize = 512;
+  static const unsigned kPushBackStepSize = 16;
+
+  virtual unsigned SlowSeekForward(unsigned delta);
+  virtual bool ReadBlock();
+  virtual void SlowPushBack(uc16 character);
+
+  virtual unsigned BufferSeekForward(unsigned delta) = 0;
+  virtual unsigned FillBuffer(unsigned position, unsigned length) = 0;
+
+  const uc16* pushback_limit_;
+  uc16 buffer_[kBufferSize];
+};
+
+
+// Generic string stream.
+class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream {
+ public:
+  GenericStringUC16CharacterStream(Handle<String> data,
+                                   unsigned start_position,
+                                   unsigned end_position);
+  virtual ~GenericStringUC16CharacterStream();
+
+ protected:
+  virtual unsigned BufferSeekForward(unsigned delta);
+  virtual unsigned FillBuffer(unsigned position, unsigned length);
+
+  Handle<String> string_;
+  unsigned start_position_;
+  unsigned length_;
+};
+
+
+// UC16 stream based on a literal UTF-8 string.
+class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream {
+ public:
+  Utf8ToUC16CharacterStream(const byte* data, unsigned length);
+  virtual ~Utf8ToUC16CharacterStream();
+
+ protected:
+  virtual unsigned BufferSeekForward(unsigned delta);
+  virtual unsigned FillBuffer(unsigned char_position, unsigned length);
+  void SetRawPosition(unsigned char_position);
+
+  const byte* raw_data_;
+  unsigned raw_data_length_;  // Measured in bytes, not characters.
+  unsigned raw_data_pos_;
+  // The character position of the character at raw_data[raw_data_pos_].
+  // Not necessarily the same as pos_.
+  unsigned raw_character_position_;
+};
+
+
+// UTF16 buffer to read characters from an external string.
+class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream {
+ public:
+  ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data,
+                                           int start_position,
+                                           int end_position);
+  virtual ~ExternalTwoByteStringUC16CharacterStream();
+
+  virtual void PushBack(uc32 character) {
+    ASSERT(buffer_cursor_ > raw_data_);
+    buffer_cursor_--;
+    pos_--;
+  }
+
+ protected:
+  virtual unsigned SlowSeekForward(unsigned delta) {
+    // Fast case always handles seeking.
+    return 0;
+  }
+  virtual bool ReadBlock() {
+    // Entire string is read at start.
+    return false;
+  }
+  Handle<ExternalTwoByteString> source_;
+  const uc16* raw_data_;  // Pointer to the actual array of characters.
+};
+
+} }  // namespace v8::internal
+
+#endif  // V8_SCANNER_CHARACTER_STREAMS_H_
--- a/src/scanner.cc
+++ b/src/scanner.cc
--- a/src/scanner.h
+++ b/src/scanner.h
@ -25,103 +25,538 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+// Features shared by parsing and pre-parsing scanners.
+
 #ifndef V8_SCANNER_H_
 #define V8_SCANNER_H_

-#include "scanner-base.h"
+#include "allocation.h"
+#include "char-predicates.h"
+#include "checks.h"
+#include "globals.h"
+#include "token.h"
+#include "unicode-inl.h"
+#include "utils.h"

 namespace v8 {
 namespace internal {

-// A buffered character stream based on a random access character
-// source (ReadBlock can be called with pos_ pointing to any position,
-// even positions before the current).
-class BufferedUC16CharacterStream: public UC16CharacterStream {
- public:
-  BufferedUC16CharacterStream();
-  virtual ~BufferedUC16CharacterStream();
+// Returns the value (0 .. 15) of a hexadecimal character c.
+// If c is not a legal hexadecimal character, returns a value < 0.
+inline int HexValue(uc32 c) {
+  c -= '0';
+  if (static_cast<unsigned>(c) <= 9) return c;
+  c = (c | 0x20) - ('a' - '0');  // detect 0x11..0x16 and 0x31..0x36.
+  if (static_cast<unsigned>(c) <= 5) return c + 10;
+  return -1;
+}

-  virtual void PushBack(uc32 character);
+
+// ---------------------------------------------------------------------
+// Buffered stream of characters, using an internal UC16 buffer.
+
+class UC16CharacterStream {
+ public:
+  UC16CharacterStream() : pos_(0) { }
+  virtual ~UC16CharacterStream() { }
+
+  // Returns and advances past the next UC16 character in the input
+  // stream. If there are no more characters, it returns a negative
+  // value.
+  inline uc32 Advance() {
+    if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
+      pos_++;
+      return static_cast<uc32>(*(buffer_cursor_++));
+    }
+    // Note: currently the following increment is necessary to avoid a
+    // parser problem! The scanner treats the final kEndOfInput as
+    // a character with a position, and does math relative to that
+    // position.
+    pos_++;
+
+    return kEndOfInput;
+  }
+
+  // Return the current position in the character stream.
+  // Starts at zero.
+  inline unsigned pos() const { return pos_; }
+
+  // Skips forward past the next character_count UC16 characters
+  // in the input, or until the end of input if that comes sooner.
+  // Returns the number of characters actually skipped. If less
+  // than character_count,
+  inline unsigned SeekForward(unsigned character_count) {
+    unsigned buffered_chars =
+        static_cast<unsigned>(buffer_end_ - buffer_cursor_);
+    if (character_count <= buffered_chars) {
+      buffer_cursor_ += character_count;
+      pos_ += character_count;
+      return character_count;
+    }
+    return SlowSeekForward(character_count);
+  }
+
+  // Pushes back the most recently read UC16 character (or negative
+  // value if at end of input), i.e., the value returned by the most recent
+  // call to Advance.
+  // Must not be used right after calling SeekForward.
+  virtual void PushBack(int32_t character) = 0;

 protected:
-  static const unsigned kBufferSize = 512;
-  static const unsigned kPushBackStepSize = 16;
+  static const uc32 kEndOfInput = -1;

-  virtual unsigned SlowSeekForward(unsigned delta);
-  virtual bool ReadBlock();
-  virtual void SlowPushBack(uc16 character);
+  // Ensures that the buffer_cursor_ points to the character at
+  // position pos_ of the input, if possible. If the position
+  // is at or after the end of the input, return false. If there
+  // are more characters available, return true.
+  virtual bool ReadBlock() = 0;
+  virtual unsigned SlowSeekForward(unsigned character_count) = 0;

-  virtual unsigned BufferSeekForward(unsigned delta) = 0;
-  virtual unsigned FillBuffer(unsigned position, unsigned length) = 0;
-
-  const uc16* pushback_limit_;
-  uc16 buffer_[kBufferSize];
+  const uc16* buffer_cursor_;
+  const uc16* buffer_end_;
+  unsigned pos_;
 };


-// Generic string stream.
-class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream {
+class UnicodeCache {
+// ---------------------------------------------------------------------
+// Caching predicates used by scanners.
 public:
-  GenericStringUC16CharacterStream(Handle<String> data,
-                                   unsigned start_position,
-                                   unsigned end_position);
-  virtual ~GenericStringUC16CharacterStream();
+  UnicodeCache() {}
+  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;

- protected:
-  virtual unsigned BufferSeekForward(unsigned delta);
-  virtual unsigned FillBuffer(unsigned position, unsigned length);
+  StaticResource<Utf8Decoder>* utf8_decoder() {
+    return &utf8_decoder_;
+  }

-  Handle<String> string_;
-  unsigned start_position_;
-  unsigned length_;
+  bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); }
+  bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); }
+  bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); }
+  bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); }
+
+ private:
+
+  unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
+  unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
+  unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
+  unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
+  StaticResource<Utf8Decoder> utf8_decoder_;
+
+  DISALLOW_COPY_AND_ASSIGN(UnicodeCache);
 };


-// UC16 stream based on a literal UTF-8 string.
-class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream {
+// ----------------------------------------------------------------------------
+// LiteralBuffer -  Collector of chars of literals.
+
+class LiteralBuffer {
 public:
-  Utf8ToUC16CharacterStream(const byte* data, unsigned length);
-  virtual ~Utf8ToUC16CharacterStream();
+  LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }

- protected:
-  virtual unsigned BufferSeekForward(unsigned delta);
-  virtual unsigned FillBuffer(unsigned char_position, unsigned length);
-  void SetRawPosition(unsigned char_position);
+  ~LiteralBuffer() {
+    if (backing_store_.length() > 0) {
+      backing_store_.Dispose();
+    }
+  }

-  const byte* raw_data_;
-  unsigned raw_data_length_;  // Measured in bytes, not characters.
-  unsigned raw_data_pos_;
-  // The character position of the character at raw_data[raw_data_pos_].
-  // Not necessarily the same as pos_.
-  unsigned raw_character_position_;
+  inline void AddChar(uc16 character) {
+    if (position_ >= backing_store_.length()) ExpandBuffer();
+    if (is_ascii_) {
+      if (character < kMaxAsciiCharCodeU) {
+        backing_store_[position_] = static_cast<byte>(character);
+        position_ += kASCIISize;
+        return;
+      }
+      ConvertToUC16();
+    }
+    *reinterpret_cast<uc16*>(&backing_store_[position_]) = character;
+    position_ += kUC16Size;
+  }
+
+  bool is_ascii() { return is_ascii_; }
+
+  Vector<const uc16> uc16_literal() {
+    ASSERT(!is_ascii_);
+    ASSERT((position_ & 0x1) == 0);
+    return Vector<const uc16>(
+        reinterpret_cast<const uc16*>(backing_store_.start()),
+        position_ >> 1);
+  }
+
+  Vector<const char> ascii_literal() {
+    ASSERT(is_ascii_);
+    return Vector<const char>(
+        reinterpret_cast<const char*>(backing_store_.start()),
+        position_);
+  }
+
+  int length() {
+    return is_ascii_ ? position_ : (position_ >> 1);
+  }
+
+  void Reset() {
+    position_ = 0;
+    is_ascii_ = true;
+  }
+ private:
+  static const int kInitialCapacity = 16;
+  static const int kGrowthFactory = 4;
+  static const int kMinConversionSlack = 256;
+  static const int kMaxGrowth = 1 * MB;
+  inline int NewCapacity(int min_capacity) {
+    int capacity = Max(min_capacity, backing_store_.length());
+    int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
+    return new_capacity;
+  }
+
+  void ExpandBuffer() {
+    Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
+    memcpy(new_store.start(), backing_store_.start(), position_);
+    backing_store_.Dispose();
+    backing_store_ = new_store;
+  }
+
+  void ConvertToUC16() {
+    ASSERT(is_ascii_);
+    Vector<byte> new_store;
+    int new_content_size = position_ * kUC16Size;
+    if (new_content_size >= backing_store_.length()) {
+      // Ensure room for all currently read characters as UC16 as well
+      // as the character about to be stored.
+      new_store = Vector<byte>::New(NewCapacity(new_content_size));
+    } else {
+      new_store = backing_store_;
+    }
+    char* src = reinterpret_cast<char*>(backing_store_.start());
+    uc16* dst = reinterpret_cast<uc16*>(new_store.start());
+    for (int i = position_ - 1; i >= 0; i--) {
+      dst[i] = src[i];
+    }
+    if (new_store.start() != backing_store_.start()) {
+      backing_store_.Dispose();
+      backing_store_ = new_store;
+    }
+    position_ = new_content_size;
+    is_ascii_ = false;
+  }
+
+  bool is_ascii_;
+  int position_;
+  Vector<byte> backing_store_;
+
+  DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
 };


-// UTF16 buffer to read characters from an external string.
-class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream {
- public:
-  ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data,
-                                           int start_position,
-                                           int end_position);
-  virtual ~ExternalTwoByteStringUC16CharacterStream();
+// ----------------------------------------------------------------------------
+// Scanner base-class.

-  virtual void PushBack(uc32 character) {
-    ASSERT(buffer_cursor_ > raw_data_);
-    buffer_cursor_--;
-    pos_--;
+// Generic functionality used by both JSON and JavaScript scanners.
+class Scanner {
+ public:
+  // -1 is outside of the range of any real source code.
+  static const int kNoOctalLocation = -1;
+
+  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
+
+  class LiteralScope {
+   public:
+    explicit LiteralScope(Scanner* self);
+    ~LiteralScope();
+    void Complete();
+
+   private:
+    Scanner* scanner_;
+    bool complete_;
+  };
+
+  explicit Scanner(UnicodeCache* scanner_contants);
+
+  // Returns the current token again.
+  Token::Value current_token() { return current_.token; }
+
+  // One token look-ahead (past the token returned by Next()).
+  Token::Value peek() const { return next_.token; }
+
+  struct Location {
+    Location(int b, int e) : beg_pos(b), end_pos(e) { }
+    Location() : beg_pos(0), end_pos(0) { }
+
+    bool IsValid() const {
+      return beg_pos >= 0 && end_pos >= beg_pos;
+    }
+
+    static Location invalid() { return Location(-1, -1); }
+
+    int beg_pos;
+    int end_pos;
+  };
+
+  // Returns the location information for the current token
+  // (the token returned by Next()).
+  Location location() const { return current_.location; }
+  Location peek_location() const { return next_.location; }
+
+  // Returns the literal string, if any, for the current token (the
+  // token returned by Next()). The string is 0-terminated and in
+  // UTF-8 format; they may contain 0-characters. Literal strings are
+  // collected for identifiers, strings, and numbers.
+  // These functions only give the correct result if the literal
+  // was scanned between calls to StartLiteral() and TerminateLiteral().
+  bool is_literal_ascii() {
+    ASSERT_NOT_NULL(current_.literal_chars);
+    return current_.literal_chars->is_ascii();
  }
+  Vector<const char> literal_ascii_string() {
+    ASSERT_NOT_NULL(current_.literal_chars);
+    return current_.literal_chars->ascii_literal();
+  }
+  Vector<const uc16> literal_uc16_string() {
+    ASSERT_NOT_NULL(current_.literal_chars);
+    return current_.literal_chars->uc16_literal();
+  }
+  int literal_length() const {
+    ASSERT_NOT_NULL(current_.literal_chars);
+    return current_.literal_chars->length();
+  }
+
+  bool literal_contains_escapes() const {
+    Location location = current_.location;
+    int source_length = (location.end_pos - location.beg_pos);
+    if (current_.token == Token::STRING) {
+      // Subtract delimiters.
+      source_length -= 2;
+    }
+    return current_.literal_chars->length() != source_length;
+  }
+
+  // Returns the literal string for the next token (the token that
+  // would be returned if Next() were called).
+  bool is_next_literal_ascii() {
+    ASSERT_NOT_NULL(next_.literal_chars);
+    return next_.literal_chars->is_ascii();
+  }
+  Vector<const char> next_literal_ascii_string() {
+    ASSERT_NOT_NULL(next_.literal_chars);
+    return next_.literal_chars->ascii_literal();
+  }
+  Vector<const uc16> next_literal_uc16_string() {
+    ASSERT_NOT_NULL(next_.literal_chars);
+    return next_.literal_chars->uc16_literal();
+  }
+  int next_literal_length() const {
+    ASSERT_NOT_NULL(next_.literal_chars);
+    return next_.literal_chars->length();
+  }
+
+  UnicodeCache* unicode_cache() { return unicode_cache_; }
+
+  static const int kCharacterLookaheadBufferSize = 1;

 protected:
-  virtual unsigned SlowSeekForward(unsigned delta) {
-    // Fast case always handles seeking.
-    return 0;
+  // The current and look-ahead token.
+  struct TokenDesc {
+    Token::Value token;
+    Location location;
+    LiteralBuffer* literal_chars;
+  };
+
+  // Call this after setting source_ to the input.
+  void Init() {
+    // Set c0_ (one character ahead)
+    STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
+    Advance();
+    // Initialize current_ to not refer to a literal.
+    current_.literal_chars = NULL;
  }
-  virtual bool ReadBlock() {
-    // Entire string is read at start.
-    return false;
+
+  // Literal buffer support
+  inline void StartLiteral() {
+    LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?
+            &literal_buffer2_ : &literal_buffer1_;
+    free_buffer->Reset();
+    next_.literal_chars = free_buffer;
  }
-  Handle<ExternalTwoByteString> source_;
-  const uc16* raw_data_;  // Pointer to the actual array of characters.
+
+  inline void AddLiteralChar(uc32 c) {
+    ASSERT_NOT_NULL(next_.literal_chars);
+    next_.literal_chars->AddChar(c);
+  }
+
+  // Complete scanning of a literal.
+  inline void TerminateLiteral() {
+    // Does nothing in the current implementation.
+  }
+
+  // Stops scanning of a literal and drop the collected characters,
+  // e.g., due to an encountered error.
+  inline void DropLiteral() {
+    next_.literal_chars = NULL;
+  }
+
+  inline void AddLiteralCharAdvance() {
+    AddLiteralChar(c0_);
+    Advance();
+  }
+
+  // Low-level scanning support.
+  void Advance() { c0_ = source_->Advance(); }
+  void PushBack(uc32 ch) {
+    source_->PushBack(c0_);
+    c0_ = ch;
+  }
+
+  inline Token::Value Select(Token::Value tok) {
+    Advance();
+    return tok;
+  }
+
+  inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
+    Advance();
+    if (c0_ == next) {
+      Advance();
+      return then;
+    } else {
+      return else_;
+    }
+  }
+
+  uc32 ScanHexNumber(int expected_length);
+
+  // Return the current source position.
+  int source_pos() {
+    return source_->pos() - kCharacterLookaheadBufferSize;
+  }
+
+  UnicodeCache* unicode_cache_;
+
+  // Buffers collecting literal strings, numbers, etc.
+  LiteralBuffer literal_buffer1_;
+  LiteralBuffer literal_buffer2_;
+
+  TokenDesc current_;  // desc for current token (as returned by Next())
+  TokenDesc next_;     // desc for next token (one token look-ahead)
+
+  // Input stream. Must be initialized to an UC16CharacterStream.
+  UC16CharacterStream* source_;
+
+  // One Unicode character look-ahead; c0_ < 0 at the end of the input.
+  uc32 c0_;
+};
+
+// ----------------------------------------------------------------------------
+// JavaScriptScanner - base logic for JavaScript scanning.
+
+class JavaScriptScanner : public Scanner {
+ public:
+  // A LiteralScope that disables recording of some types of JavaScript
+  // literals. If the scanner is configured to not record the specific
+  // type of literal, the scope will not call StartLiteral.
+  class LiteralScope {
+   public:
+    explicit LiteralScope(JavaScriptScanner* self)
+        : scanner_(self), complete_(false) {
+      scanner_->StartLiteral();
+    }
+     ~LiteralScope() {
+       if (!complete_) scanner_->DropLiteral();
+     }
+    void Complete() {
+      scanner_->TerminateLiteral();
+      complete_ = true;
+    }
+
+   private:
+    JavaScriptScanner* scanner_;
+    bool complete_;
+  };
+
+  explicit JavaScriptScanner(UnicodeCache* scanner_contants);
+
+  void Initialize(UC16CharacterStream* source);
+
+  // Returns the next token.
+  Token::Value Next();
+
+  // Returns true if there was a line terminator before the peek'ed token,
+  // possibly inside a multi-line comment.
+  bool HasAnyLineTerminatorBeforeNext() const {
+    return has_line_terminator_before_next_ ||
+           has_multiline_comment_before_next_;
+  }
+
+  // Scans the input as a regular expression pattern, previous
+  // character(s) must be /(=). Returns true if a pattern is scanned.
+  bool ScanRegExpPattern(bool seen_equal);
+  // Returns true if regexp flags are scanned (always since flags can
+  // be empty).
+  bool ScanRegExpFlags();
+
+  // Tells whether the buffer contains an identifier (no escapes).
+  // Used for checking if a property name is an identifier.
+  static bool IsIdentifier(unibrow::CharacterStream* buffer);
+
+  // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
+  uc32 ScanOctalEscape(uc32 c, int length);
+
+  // Returns the location of the last seen octal literal
+  Location octal_position() const { return octal_pos_; }
+  void clear_octal_position() { octal_pos_ = Location::invalid(); }
+
+  // Seek forward to the given position.  This operation does not
+  // work in general, for instance when there are pushed back
+  // characters, but works for seeking forward until simple delimiter
+  // tokens, which is what it is used for.
+  void SeekForward(int pos);
+
+  bool HarmonyBlockScoping() const {
+    return harmony_block_scoping_;
+  }
+  void SetHarmonyBlockScoping(bool block_scoping) {
+    harmony_block_scoping_ = block_scoping;
+  }
+
+
+ protected:
+  bool SkipWhiteSpace();
+  Token::Value SkipSingleLineComment();
+  Token::Value SkipMultiLineComment();
+
+  // Scans a single JavaScript token.
+  void Scan();
+
+  void ScanDecimalDigits();
+  Token::Value ScanNumber(bool seen_period);
+  Token::Value ScanIdentifierOrKeyword();
+  Token::Value ScanIdentifierSuffix(LiteralScope* literal);
+
+  void ScanEscape();
+  Token::Value ScanString();
+
+  // Scans a possible HTML comment -- begins with '<!'.
+  Token::Value ScanHtmlComment();
+
+  // Decodes a unicode escape-sequence which is part of an identifier.
+  // If the escape sequence cannot be decoded the result is kBadChar.
+  uc32 ScanIdentifierUnicodeEscape();
+  // Recognizes a uniocde escape-sequence and adds its characters,
+  // uninterpreted, to the current literal. Used for parsing RegExp
+  // flags.
+  bool ScanLiteralUnicodeEscape();
+
+  // Start position of the octal literal last scanned.
+  Location octal_pos_;
+
+  // Whether there is a line terminator whitespace character after
+  // the current token, and  before the next. Does not count newlines
+  // inside multiline comments.
+  bool has_line_terminator_before_next_;
+  // Whether there is a multi-line comment that contains a
+  // line-terminator after the current token, and before the next.
+  bool has_multiline_comment_before_next_;
+  // Whether we scan 'let' as a keyword for harmony block scoped
+  // let bindings.
+  bool harmony_block_scoping_;
 };

 } }  // namespace v8::internal
--- a/src/v8conversions.cc
+++ b/src/v8conversions.cc
@ -34,7 +34,6 @@
 #include "v8conversions.h"
 #include "dtoa.h"
 #include "factory.h"
-#include "scanner-base.h"
 #include "strtod.h"

 namespace v8 {
--- a/test/cctest/test-parsing.cc
+++ b/test/cctest/test-parsing.cc
@ -31,14 +31,14 @@

 #include "v8.h"

-#include "isolate.h"
-#include "token.h"
-#include "scanner.h"
-#include "parser.h"
-#include "utils.h"
-#include "execution.h"
-#include "preparser.h"
 #include "cctest.h"
+#include "execution.h"
+#include "isolate.h"
+#include "parser.h"
+#include "preparser.h"
+#include "scanner-character-streams.h"
+#include "token.h"
+#include "utils.h"

 TEST(ScanKeywords) {
  struct KeywordToken {
--- a/tools/gyp/v8.gyp
+++ b/tools/gyp/v8.gyp
@ -407,10 +407,10 @@
            '../../src/runtime-profiler.h',
            '../../src/safepoint-table.cc',
            '../../src/safepoint-table.h',
-            '../../src/scanner-base.cc',
-            '../../src/scanner-base.h',
            '../../src/scanner.cc',
            '../../src/scanner.h',
+            '../../src/scanner-character-streams.cc',
+            '../../src/scanner-character-streams.h',
            '../../src/scopeinfo.cc',
            '../../src/scopeinfo.h',
            '../../src/scopes.cc',
@ -825,8 +825,8 @@
            '../../src/preparser.cc',
            '../../src/preparser.h',
            '../../src/preparser-api.cc',
-            '../../src/scanner-base.cc',
-            '../../src/scanner-base.h',
+            '../../src/scanner.cc',
+            '../../src/scanner.h',
            '../../src/strtod.cc',
            '../../src/strtod.h',
            '../../src/token.cc',