v8/src/scanner-base.h

// Copyright 2010 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials provided
//       with the distribution.
//     * Neither the name of Google Inc. nor the names of its
//       contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Features shared by parsing and pre-parsing scanners.

#ifndef V8_SCANNER_BASE_H_
#define V8_SCANNER_BASE_H_

#include "globals.h"
#include "checks.h"
#include "allocation.h"
#include "token.h"
#include "unicode-inl.h"
#include "char-predicates.h"
#include "utils.h"

namespace v8 {
namespace internal {

class ScannerConstants : AllStatic {
 public:
  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;

  static StaticResource<Utf8Decoder>* utf8_decoder() {
    return &utf8_decoder_;
  }

  static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
  static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
  static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
  static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;

  static bool IsIdentifier(unibrow::CharacterStream* buffer);

 private:
  static StaticResource<Utf8Decoder> utf8_decoder_;
};


class KeywordMatcher {
//  Incrementally recognize keywords.
//
//  Recognized keywords:
//      break case catch const* continue debugger* default delete do else
//      finally false for function if in instanceof native* new null
//      return switch this throw true try typeof var void while with
//
//  *: Actually "future reserved keywords". These are the only ones we
//     recognize, the remaining are allowed as identifiers.
//     In ES5 strict mode, we should disallow all reserved keywords.
 public:
  KeywordMatcher()
      : state_(INITIAL),
        token_(Token::IDENTIFIER),
        keyword_(NULL),
        counter_(0),
        keyword_token_(Token::ILLEGAL) {}

  Token::Value token() { return token_; }

  inline void AddChar(unibrow::uchar input) {
    if (state_ != UNMATCHABLE) {
      Step(input);
    }
  }

  void Fail() {
    token_ = Token::IDENTIFIER;
    state_ = UNMATCHABLE;
  }

 private:
  enum State {
    UNMATCHABLE,
    INITIAL,
    KEYWORD_PREFIX,
    KEYWORD_MATCHED,
    C,
    CA,
    CO,
    CON,
    D,
    DE,
    F,
    I,
    IN,
    N,
    T,
    TH,
    TR,
    V,
    W
  };

  struct FirstState {
    const char* keyword;
    State state;
    Token::Value token;
  };

  // Range of possible first characters of a keyword.
  static const unsigned int kFirstCharRangeMin = 'b';
  static const unsigned int kFirstCharRangeMax = 'w';
  static const unsigned int kFirstCharRangeLength =
      kFirstCharRangeMax - kFirstCharRangeMin + 1;
  // State map for first keyword character range.
  static FirstState first_states_[kFirstCharRangeLength];

  // If input equals keyword's character at position, continue matching keyword
  // from that position.
  inline bool MatchKeywordStart(unibrow::uchar input,
                                const char* keyword,
                                int position,
                                Token::Value token_if_match) {
    if (input == static_cast<unibrow::uchar>(keyword[position])) {
      state_ = KEYWORD_PREFIX;
      this->keyword_ = keyword;
      this->counter_ = position + 1;
      this->keyword_token_ = token_if_match;
      return true;
    }
    return false;
  }

  // If input equals match character, transition to new state and return true.
  inline bool MatchState(unibrow::uchar input, char match, State new_state) {
    if (input == static_cast<unibrow::uchar>(match)) {
      state_ = new_state;
      return true;
    }
    return false;
  }

  inline bool MatchKeyword(unibrow::uchar input,
                           char match,
                           State new_state,
                           Token::Value keyword_token) {
    if (input != static_cast<unibrow::uchar>(match)) {
      return false;
    }
    state_ = new_state;
    token_ = keyword_token;
    return true;
  }

  void Step(unibrow::uchar input);

  // Current state.
  State state_;
  // Token for currently added characters.
  Token::Value token_;

  // Matching a specific keyword string (there is only one possible valid
  // keyword with the current prefix).
  const char* keyword_;
  int counter_;
  Token::Value keyword_token_;
};


} }  // namespace v8::internal

#endif  // V8_SCANNER_BASE_H_
Move part of scanner.* into scanner-base.* for reuse in preparser scanner. Make checks.h not depend on flags.h or global.h (or anything else except include/v8stdint.h). Only checks.cc has the dependencies (so another implementation of checks.cc can be provided by the preparser). Now files depending on checks.h (using ASSERT macros) can include it directly without depending on all of v8. Review URL: http://codereview.chromium.org/4576001 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5775 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 2010-11-05 13:33:40 +00:00			`// Copyright 2010 the V8 project authors. All rights reserved.`
			`// Redistribution and use in source and binary forms, with or without`
			`// modification, are permitted provided that the following conditions are`
			`// met:`
			`//`
			`// * Redistributions of source code must retain the above copyright`
			`// notice, this list of conditions and the following disclaimer.`
			`// * Redistributions in binary form must reproduce the above`
			`// copyright notice, this list of conditions and the following`
			`// disclaimer in the documentation and/or other materials provided`
			`// with the distribution.`
			`// * Neither the name of Google Inc. nor the names of its`
			`// contributors may be used to endorse or promote products derived`
			`// from this software without specific prior written permission.`
			`//`
			`// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
			`// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
			`// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
			`// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
			`// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
			`// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`

			`// Features shared by parsing and pre-parsing scanners.`

			`#ifndef V8_SCANNER_BASE_H_`
			`#define V8_SCANNER_BASE_H_`

Move static scanner fields to scanner-base.h Review URL: http://codereview.chromium.org/5026005 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5828 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 2010-11-16 08:01:45 +00:00			`#include "globals.h"`
			`#include "checks.h"`
			`#include "allocation.h"`
Move part of scanner.* into scanner-base.* for reuse in preparser scanner. Make checks.h not depend on flags.h or global.h (or anything else except include/v8stdint.h). Only checks.cc has the dependencies (so another implementation of checks.cc can be provided by the preparser). Now files depending on checks.h (using ASSERT macros) can include it directly without depending on all of v8. Review URL: http://codereview.chromium.org/4576001 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5775 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 2010-11-05 13:33:40 +00:00			`#include "token.h"`
Move static scanner fields to scanner-base.h Review URL: http://codereview.chromium.org/5026005 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5828 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 2010-11-16 08:01:45 +00:00			`#include "unicode-inl.h"`
			`#include "char-predicates.h"`
			`#include "utils.h"`
Move part of scanner.* into scanner-base.* for reuse in preparser scanner. Make checks.h not depend on flags.h or global.h (or anything else except include/v8stdint.h). Only checks.cc has the dependencies (so another implementation of checks.cc can be provided by the preparser). Now files depending on checks.h (using ASSERT macros) can include it directly without depending on all of v8. Review URL: http://codereview.chromium.org/4576001 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5775 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 2010-11-05 13:33:40 +00:00
			`namespace v8 {`
			`namespace internal {`

Move static scanner fields to scanner-base.h Review URL: http://codereview.chromium.org/5026005 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5828 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 2010-11-16 08:01:45 +00:00			`class ScannerConstants : AllStatic {`
			`public:`
			`typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;`

			`static StaticResource<Utf8Decoder>* utf8_decoder() {`
			`return &utf8_decoder_;`
			`}`

			`static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;`
			`static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;`
			`static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;`
			`static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;`

			`static bool IsIdentifier(unibrow::CharacterStream* buffer);`

			`private:`
			`static StaticResource<Utf8Decoder> utf8_decoder_;`
			`};`


Move part of scanner.* into scanner-base.* for reuse in preparser scanner. Make checks.h not depend on flags.h or global.h (or anything else except include/v8stdint.h). Only checks.cc has the dependencies (so another implementation of checks.cc can be provided by the preparser). Now files depending on checks.h (using ASSERT macros) can include it directly without depending on all of v8. Review URL: http://codereview.chromium.org/4576001 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5775 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 2010-11-05 13:33:40 +00:00			`class KeywordMatcher {`
			`// Incrementally recognize keywords.`
			`//`
			`// Recognized keywords:`
			`// break case catch const* continue debugger* default delete do else`
			`// finally false for function if in instanceof native* new null`
			`// return switch this throw true try typeof var void while with`
			`//`
			`// *: Actually "future reserved keywords". These are the only ones we`
Move static scanner fields to scanner-base.h Review URL: http://codereview.chromium.org/5026005 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5828 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 2010-11-16 08:01:45 +00:00			`// recognize, the remaining are allowed as identifiers.`
			`// In ES5 strict mode, we should disallow all reserved keywords.`
Move part of scanner.* into scanner-base.* for reuse in preparser scanner. Make checks.h not depend on flags.h or global.h (or anything else except include/v8stdint.h). Only checks.cc has the dependencies (so another implementation of checks.cc can be provided by the preparser). Now files depending on checks.h (using ASSERT macros) can include it directly without depending on all of v8. Review URL: http://codereview.chromium.org/4576001 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5775 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 2010-11-05 13:33:40 +00:00			`public:`
			`KeywordMatcher()`
			`: state_(INITIAL),`
			`token_(Token::IDENTIFIER),`
			`keyword_(NULL),`
			`counter_(0),`
			`keyword_token_(Token::ILLEGAL) {}`

			`Token::Value token() { return token_; }`

			`inline void AddChar(unibrow::uchar input) {`
			`if (state_ != UNMATCHABLE) {`
			`Step(input);`
			`}`
			`}`

			`void Fail() {`
			`token_ = Token::IDENTIFIER;`
			`state_ = UNMATCHABLE;`
			`}`

			`private:`
			`enum State {`
			`UNMATCHABLE,`
			`INITIAL,`
			`KEYWORD_PREFIX,`
			`KEYWORD_MATCHED,`
			`C,`
			`CA,`
			`CO,`
			`CON,`
			`D,`
			`DE,`
			`F,`
			`I,`
			`IN,`
			`N,`
			`T,`
			`TH,`
			`TR,`
			`V,`
			`W`
			`};`

			`struct FirstState {`
			`const char* keyword;`
			`State state;`
			`Token::Value token;`
			`};`

			`// Range of possible first characters of a keyword.`
			`static const unsigned int kFirstCharRangeMin = 'b';`
			`static const unsigned int kFirstCharRangeMax = 'w';`
			`static const unsigned int kFirstCharRangeLength =`
			`kFirstCharRangeMax - kFirstCharRangeMin + 1;`
			`// State map for first keyword character range.`
			`static FirstState first_states_[kFirstCharRangeLength];`

			`// If input equals keyword's character at position, continue matching keyword`
			`// from that position.`
			`inline bool MatchKeywordStart(unibrow::uchar input,`
			`const char* keyword,`
			`int position,`
			`Token::Value token_if_match) {`
			`if (input == static_cast<unibrow::uchar>(keyword[position])) {`
			`state_ = KEYWORD_PREFIX;`
			`this->keyword_ = keyword;`
			`this->counter_ = position + 1;`
			`this->keyword_token_ = token_if_match;`
			`return true;`
			`}`
			`return false;`
			`}`

			`// If input equals match character, transition to new state and return true.`
			`inline bool MatchState(unibrow::uchar input, char match, State new_state) {`
			`if (input == static_cast<unibrow::uchar>(match)) {`
			`state_ = new_state;`
			`return true;`
			`}`
			`return false;`
			`}`

			`inline bool MatchKeyword(unibrow::uchar input,`
			`char match,`
			`State new_state,`
			`Token::Value keyword_token) {`
			`if (input != static_cast<unibrow::uchar>(match)) {`
			`return false;`
			`}`
			`state_ = new_state;`
			`token_ = keyword_token;`
			`return true;`
			`}`

			`void Step(unibrow::uchar input);`

			`// Current state.`
			`State state_;`
			`// Token for currently added characters.`
			`Token::Value token_;`

			`// Matching a specific keyword string (there is only one possible valid`
			`// keyword with the current prefix).`
			`const char* keyword_;`
			`int counter_;`
			`Token::Value keyword_token_;`
			`};`


			`} } // namespace v8::internal`

			`#endif // V8_SCANNER_BASE_H_`