v8/src/preparser.h

// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials provided
//       with the distribution.
//     * Neither the name of Google Inc. nor the names of its
//       contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifndef V8_PREPARSER_H
#define V8_PREPARSER_H

#include "hashmap.h"
#include "token.h"
#include "scanner.h"

namespace v8 {
namespace internal {

// Common base class shared between parser and pre-parser.
class ParserBase {
 public:
  ParserBase(Scanner* scanner, uintptr_t stack_limit)
      : scanner_(scanner),
        stack_limit_(stack_limit),
        stack_overflow_(false),
        allow_lazy_(false),
        allow_natives_syntax_(false),
        allow_generators_(false),
        allow_for_of_(false) { }
  // TODO(mstarzinger): Only virtual until message reporting has been unified.
  virtual ~ParserBase() { }

  // Getters that indicate whether certain syntactical constructs are
  // allowed to be parsed by this instance of the parser.
  bool allow_lazy() const { return allow_lazy_; }
  bool allow_natives_syntax() const { return allow_natives_syntax_; }
  bool allow_generators() const { return allow_generators_; }
  bool allow_for_of() const { return allow_for_of_; }
  bool allow_modules() const { return scanner()->HarmonyModules(); }
  bool allow_harmony_scoping() const { return scanner()->HarmonyScoping(); }
  bool allow_harmony_numeric_literals() const {
    return scanner()->HarmonyNumericLiterals();
  }

  // Setters that determine whether certain syntactical constructs are
  // allowed to be parsed by this instance of the parser.
  void set_allow_lazy(bool allow) { allow_lazy_ = allow; }
  void set_allow_natives_syntax(bool allow) { allow_natives_syntax_ = allow; }
  void set_allow_generators(bool allow) { allow_generators_ = allow; }
  void set_allow_for_of(bool allow) { allow_for_of_ = allow; }
  void set_allow_modules(bool allow) { scanner()->SetHarmonyModules(allow); }
  void set_allow_harmony_scoping(bool allow) {
    scanner()->SetHarmonyScoping(allow);
  }
  void set_allow_harmony_numeric_literals(bool allow) {
    scanner()->SetHarmonyNumericLiterals(allow);
  }

 protected:
  enum AllowEvalOrArgumentsAsIdentifier {
    kAllowEvalOrArguments,
    kDontAllowEvalOrArguments
  };

  Scanner* scanner() const { return scanner_; }
  int position() { return scanner_->location().beg_pos; }
  int peek_position() { return scanner_->peek_location().beg_pos; }
  bool stack_overflow() const { return stack_overflow_; }
  void set_stack_overflow() { stack_overflow_ = true; }

  INLINE(Token::Value peek()) {
    if (stack_overflow_) return Token::ILLEGAL;
    return scanner()->peek();
  }

  INLINE(Token::Value Next()) {
    if (stack_overflow_) return Token::ILLEGAL;
    {
      int marker;
      if (reinterpret_cast<uintptr_t>(&marker) < stack_limit_) {
        // Any further calls to Next or peek will return the illegal token.
        // The current call must return the next token, which might already
        // have been peek'ed.
        stack_overflow_ = true;
      }
    }
    return scanner()->Next();
  }

  void Consume(Token::Value token) {
    Token::Value next = Next();
    USE(next);
    USE(token);
    ASSERT(next == token);
  }

  bool Check(Token::Value token) {
    Token::Value next = peek();
    if (next == token) {
      Consume(next);
      return true;
    }
    return false;
  }

  void Expect(Token::Value token, bool* ok) {
    Token::Value next = Next();
    if (next != token) {
      ReportUnexpectedToken(next);
      *ok = false;
    }
  }

  bool peek_any_identifier();
  void ExpectSemicolon(bool* ok);
  bool CheckContextualKeyword(Vector<const char> keyword);
  void ExpectContextualKeyword(Vector<const char> keyword, bool* ok);

  // Strict mode octal literal validation.
  void CheckOctalLiteral(int beg_pos, int end_pos, bool* ok);

  // Determine precedence of given token.
  static int Precedence(Token::Value token, bool accept_IN);

  // Report syntax errors.
  virtual void ReportUnexpectedToken(Token::Value token) = 0;
  virtual void ReportMessageAt(Scanner::Location loc, const char* type) = 0;

  // Used to detect duplicates in object literals. Each of the values
  // kGetterProperty, kSetterProperty and kValueProperty represents
  // a type of object literal property. When parsing a property, its
  // type value is stored in the DuplicateFinder for the property name.
  // Values are chosen so that having intersection bits means the there is
  // an incompatibility.
  // I.e., you can add a getter to a property that already has a setter, since
  // kGetterProperty and kSetterProperty doesn't intersect, but not if it
  // already has a getter or a value. Adding the getter to an existing
  // setter will store the value (kGetterProperty | kSetterProperty), which
  // is incompatible with adding any further properties.
  enum PropertyKind {
    kNone = 0,
    // Bit patterns representing different object literal property types.
    kGetterProperty = 1,
    kSetterProperty = 2,
    kValueProperty = 7,
    // Helper constants.
    kValueFlag = 4
  };

  // Validation per ECMA 262 - 11.1.5 "Object Initialiser".
  class ObjectLiteralChecker {
   public:
    ObjectLiteralChecker(ParserBase* parser, LanguageMode mode)
        : parser_(parser),
          finder_(scanner()->unicode_cache()),
          language_mode_(mode) { }

    void CheckProperty(Token::Value property, PropertyKind type, bool* ok);

   private:
    ParserBase* parser() const { return parser_; }
    Scanner* scanner() const { return parser_->scanner(); }

    // Checks the type of conflict based on values coming from PropertyType.
    bool HasConflict(PropertyKind type1, PropertyKind type2) {
      return (type1 & type2) != 0;
    }
    bool IsDataDataConflict(PropertyKind type1, PropertyKind type2) {
      return ((type1 & type2) & kValueFlag) != 0;
    }
    bool IsDataAccessorConflict(PropertyKind type1, PropertyKind type2) {
      return ((type1 ^ type2) & kValueFlag) != 0;
    }
    bool IsAccessorAccessorConflict(PropertyKind type1, PropertyKind type2) {
      return ((type1 | type2) & kValueFlag) == 0;
    }

    ParserBase* parser_;
    DuplicateFinder finder_;
    LanguageMode language_mode_;
  };

 private:
  Scanner* scanner_;
  uintptr_t stack_limit_;
  bool stack_overflow_;

  bool allow_lazy_;
  bool allow_natives_syntax_;
  bool allow_generators_;
  bool allow_for_of_;
};


// Preparsing checks a JavaScript program and emits preparse-data that helps
// a later parsing to be faster.
// See preparse-data-format.h for the data format.

// The PreParser checks that the syntax follows the grammar for JavaScript,
// and collects some information about the program along the way.
// The grammar check is only performed in order to understand the program
// sufficiently to deduce some information about it, that can be used
// to speed up later parsing. Finding errors is not the goal of pre-parsing,
// rather it is to speed up properly written and correct programs.
// That means that contextual checks (like a label being declared where
// it is used) are generally omitted.
class PreParser : public ParserBase {
 public:
  enum PreParseResult {
    kPreParseStackOverflow,
    kPreParseSuccess
  };

  PreParser(Scanner* scanner,
            ParserRecorder* log,
            uintptr_t stack_limit)
      : ParserBase(scanner, stack_limit),
        log_(log),
        scope_(NULL),
        strict_mode_violation_location_(Scanner::Location::invalid()),
        strict_mode_violation_type_(NULL),
        parenthesized_function_(false) { }

  ~PreParser() {}

  // Pre-parse the program from the character stream; returns true on
  // success (even if parsing failed, the pre-parse data successfully
  // captured the syntax error), and false if a stack-overflow happened
  // during parsing.
  PreParseResult PreParseProgram() {
    Scope top_scope(&scope_, kTopLevelScope);
    bool ok = true;
    int start_position = scanner()->peek_location().beg_pos;
    ParseSourceElements(Token::EOS, &ok);
    if (stack_overflow()) return kPreParseStackOverflow;
    if (!ok) {
      ReportUnexpectedToken(scanner()->current_token());
    } else if (!scope_->is_classic_mode()) {
      CheckOctalLiteral(start_position, scanner()->location().end_pos, &ok);
    }
    return kPreParseSuccess;
  }

  // Parses a single function literal, from the opening parentheses before
  // parameters to the closing brace after the body.
  // Returns a FunctionEntry describing the body of the function in enough
  // detail that it can be lazily compiled.
  // The scanner is expected to have matched the "function" or "function*"
  // keyword and parameters, and have consumed the initial '{'.
  // At return, unless an error occurred, the scanner is positioned before the
  // the final '}'.
  PreParseResult PreParseLazyFunction(LanguageMode mode,
                                      bool is_generator,
                                      ParserRecorder* log);

 private:
  // These types form an algebra over syntactic categories that is just
  // rich enough to let us recognize and propagate the constructs that
  // are either being counted in the preparser data, or is important
  // to throw the correct syntax error exceptions.

  enum ScopeType {
    kTopLevelScope,
    kFunctionScope
  };

  enum VariableDeclarationContext {
    kSourceElement,
    kStatement,
    kForStatement
  };

  // If a list of variable declarations includes any initializers.
  enum VariableDeclarationProperties {
    kHasInitializers,
    kHasNoInitializers
  };

  class Expression;

  class Identifier {
   public:
    static Identifier Default() {
      return Identifier(kUnknownIdentifier);
    }
    static Identifier Eval()  {
      return Identifier(kEvalIdentifier);
    }
    static Identifier Arguments()  {
      return Identifier(kArgumentsIdentifier);
    }
    static Identifier FutureReserved()  {
      return Identifier(kFutureReservedIdentifier);
    }
    static Identifier FutureStrictReserved()  {
      return Identifier(kFutureStrictReservedIdentifier);
    }
    static Identifier Yield()  {
      return Identifier(kYieldIdentifier);
    }
    bool IsEval() { return type_ == kEvalIdentifier; }
    bool IsArguments() { return type_ == kArgumentsIdentifier; }
    bool IsEvalOrArguments() { return type_ >= kEvalIdentifier; }
    bool IsYield() { return type_ == kYieldIdentifier; }
    bool IsFutureReserved() { return type_ == kFutureReservedIdentifier; }
    bool IsFutureStrictReserved() {
      return type_ == kFutureStrictReservedIdentifier;
    }
    bool IsValidStrictVariable() { return type_ == kUnknownIdentifier; }

   private:
    enum Type {
      kUnknownIdentifier,
      kFutureReservedIdentifier,
      kFutureStrictReservedIdentifier,
      kYieldIdentifier,
      kEvalIdentifier,
      kArgumentsIdentifier
    };
    explicit Identifier(Type type) : type_(type) { }
    Type type_;

    friend class Expression;
  };

  // Bits 0 and 1 are used to identify the type of expression:
  // If bit 0 is set, it's an identifier.
  // if bit 1 is set, it's a string literal.
  // If neither is set, it's no particular type, and both set isn't
  // use yet.
  // Bit 2 is used to mark the expression as being parenthesized,
  // so "(foo)" isn't recognized as a pure identifier (and possible label).
  class Expression {
   public:
    static Expression Default() {
      return Expression(kUnknownExpression);
    }

    static Expression FromIdentifier(Identifier id) {
      return Expression(kIdentifierFlag | (id.type_ << kIdentifierShift));
    }

    static Expression StringLiteral() {
      return Expression(kUnknownStringLiteral);
    }

    static Expression UseStrictStringLiteral() {
      return Expression(kUseStrictString);
    }

    static Expression This() {
      return Expression(kThisExpression);
    }

    static Expression ThisProperty() {
      return Expression(kThisPropertyExpression);
    }

    static Expression StrictFunction() {
      return Expression(kStrictFunctionExpression);
    }

    bool IsIdentifier() {
      return (code_ & kIdentifierFlag) != 0;
    }

    // Only works corretly if it is actually an identifier expression.
    PreParser::Identifier AsIdentifier() {
      return PreParser::Identifier(
          static_cast<PreParser::Identifier::Type>(code_ >> kIdentifierShift));
    }

    bool IsParenthesized() {
      // If bit 0 or 1 is set, we interpret bit 2 as meaning parenthesized.
      return (code_ & 7) > 4;
    }

    bool IsRawIdentifier() {
      return !IsParenthesized() && IsIdentifier();
    }

    bool IsStringLiteral() { return (code_ & kStringLiteralFlag) != 0; }

    bool IsRawStringLiteral() {
      return !IsParenthesized() && IsStringLiteral();
    }

    bool IsUseStrictLiteral() {
      return (code_ & kStringLiteralMask) == kUseStrictString;
    }

    bool IsThis() {
      return code_ == kThisExpression;
    }

    bool IsThisProperty() {
      return code_ == kThisPropertyExpression;
    }

    bool IsStrictFunction() {
      return code_ == kStrictFunctionExpression;
    }

    Expression Parenthesize() {
      int type = code_ & 3;
      if (type != 0) {
        // Identifiers and string literals can be parenthesized.
        // They no longer work as labels or directive prologues,
        // but are still recognized in other contexts.
        return Expression(code_ | kParenthesizedExpressionFlag);
      }
      // For other types of expressions, it's not important to remember
      // the parentheses.
      return *this;
    }

   private:
    // First two/three bits are used as flags.
    // Bit 0 and 1 represent identifiers or strings literals, and are
    // mutually exclusive, but can both be absent.
    // If bit 0 or 1 are set, bit 2 marks that the expression has
    // been wrapped in parentheses (a string literal can no longer
    // be a directive prologue, and an identifier can no longer be
    // a label.
    enum  {
      kUnknownExpression = 0,
      // Identifiers
      kIdentifierFlag = 1,  // Used to detect labels.
      kIdentifierShift = 3,

      kStringLiteralFlag = 2,  // Used to detect directive prologue.
      kUnknownStringLiteral = kStringLiteralFlag,
      kUseStrictString = kStringLiteralFlag | 8,
      kStringLiteralMask = kUseStrictString,

      // Only if identifier or string literal.
      kParenthesizedExpressionFlag = 4,

      // Below here applies if neither identifier nor string literal.
      kThisExpression = 4,
      kThisPropertyExpression = 8,
      kStrictFunctionExpression = 12
    };

    explicit Expression(int expression_code) : code_(expression_code) { }

    int code_;
  };

  class Statement {
   public:
    static Statement Default() {
      return Statement(kUnknownStatement);
    }

    static Statement FunctionDeclaration() {
      return Statement(kFunctionDeclaration);
    }

    // Creates expression statement from expression.
    // Preserves being an unparenthesized string literal, possibly
    // "use strict".
    static Statement ExpressionStatement(Expression expression) {
      if (!expression.IsParenthesized()) {
        if (expression.IsUseStrictLiteral()) {
          return Statement(kUseStrictExpressionStatement);
        }
        if (expression.IsStringLiteral()) {
          return Statement(kStringLiteralExpressionStatement);
        }
      }
      return Default();
    }

    bool IsStringLiteral() {
      return code_ == kStringLiteralExpressionStatement;
    }

    bool IsUseStrictLiteral() {
      return code_ == kUseStrictExpressionStatement;
    }

    bool IsFunctionDeclaration() {
      return code_ == kFunctionDeclaration;
    }

   private:
    enum Type {
      kUnknownStatement,
      kStringLiteralExpressionStatement,
      kUseStrictExpressionStatement,
      kFunctionDeclaration
    };

    explicit Statement(Type code) : code_(code) {}
    Type code_;
  };

  enum SourceElements {
    kUnknownSourceElements
  };

  typedef int Arguments;

  class Scope {
   public:
    Scope(Scope** variable, ScopeType type)
        : variable_(variable),
          prev_(*variable),
          type_(type),
          materialized_literal_count_(0),
          expected_properties_(0),
          with_nesting_count_(0),
          language_mode_(
              (prev_ != NULL) ? prev_->language_mode() : CLASSIC_MODE),
          is_generator_(false) {
      *variable = this;
    }
    ~Scope() { *variable_ = prev_; }
    void NextMaterializedLiteralIndex() { materialized_literal_count_++; }
    void AddProperty() { expected_properties_++; }
    ScopeType type() { return type_; }
    int expected_properties() { return expected_properties_; }
    int materialized_literal_count() { return materialized_literal_count_; }
    bool IsInsideWith() { return with_nesting_count_ != 0; }
    bool is_generator() { return is_generator_; }
    void set_is_generator(bool is_generator) { is_generator_ = is_generator; }
    bool is_classic_mode() {
      return language_mode_ == CLASSIC_MODE;
    }
    LanguageMode language_mode() {
      return language_mode_;
    }
    void set_language_mode(LanguageMode language_mode) {
      language_mode_ = language_mode;
    }

    class InsideWith {
     public:
      explicit InsideWith(Scope* scope) : scope_(scope) {
        scope->with_nesting_count_++;
      }

      ~InsideWith() { scope_->with_nesting_count_--; }

     private:
      Scope* scope_;
      DISALLOW_COPY_AND_ASSIGN(InsideWith);
    };

   private:
    Scope** const variable_;
    Scope* const prev_;
    const ScopeType type_;
    int materialized_literal_count_;
    int expected_properties_;
    int with_nesting_count_;
    LanguageMode language_mode_;
    bool is_generator_;
  };

  // Report syntax error
  void ReportUnexpectedToken(Token::Value token);
  void ReportMessageAt(Scanner::Location location, const char* type) {
    ReportMessageAt(location, type, NULL);
  }
  void ReportMessageAt(Scanner::Location location,
                       const char* type,
                       const char* name_opt) {
    log_->LogMessage(location.beg_pos, location.end_pos, type, name_opt);
  }
  void ReportMessageAt(int start_pos,
                       int end_pos,
                       const char* type,
                       const char* name_opt) {
    log_->LogMessage(start_pos, end_pos, type, name_opt);
  }

  // All ParseXXX functions take as the last argument an *ok parameter
  // which is set to false if parsing failed; it is unchanged otherwise.
  // By making the 'exception handling' explicit, we are forced to check
  // for failure at the call sites.
  Statement ParseSourceElement(bool* ok);
  SourceElements ParseSourceElements(int end_token, bool* ok);
  Statement ParseStatement(bool* ok);
  Statement ParseFunctionDeclaration(bool* ok);
  Statement ParseBlock(bool* ok);
  Statement ParseVariableStatement(VariableDeclarationContext var_context,
                                   bool* ok);
  Statement ParseVariableDeclarations(VariableDeclarationContext var_context,
                                      VariableDeclarationProperties* decl_props,
                                      int* num_decl,
                                      bool* ok);
  Statement ParseExpressionOrLabelledStatement(bool* ok);
  Statement ParseIfStatement(bool* ok);
  Statement ParseContinueStatement(bool* ok);
  Statement ParseBreakStatement(bool* ok);
  Statement ParseReturnStatement(bool* ok);
  Statement ParseWithStatement(bool* ok);
  Statement ParseSwitchStatement(bool* ok);
  Statement ParseDoWhileStatement(bool* ok);
  Statement ParseWhileStatement(bool* ok);
  Statement ParseForStatement(bool* ok);
  Statement ParseThrowStatement(bool* ok);
  Statement ParseTryStatement(bool* ok);
  Statement ParseDebuggerStatement(bool* ok);

  Expression ParseExpression(bool accept_IN, bool* ok);
  Expression ParseAssignmentExpression(bool accept_IN, bool* ok);
  Expression ParseYieldExpression(bool* ok);
  Expression ParseConditionalExpression(bool accept_IN, bool* ok);
  Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok);
  Expression ParseUnaryExpression(bool* ok);
  Expression ParsePostfixExpression(bool* ok);
  Expression ParseLeftHandSideExpression(bool* ok);
  Expression ParseNewExpression(bool* ok);
  Expression ParseMemberExpression(bool* ok);
  Expression ParseMemberWithNewPrefixesExpression(unsigned new_count, bool* ok);
  Expression ParsePrimaryExpression(bool* ok);
  Expression ParseArrayLiteral(bool* ok);
  Expression ParseObjectLiteral(bool* ok);
  Expression ParseRegExpLiteral(bool seen_equal, bool* ok);
  Expression ParseV8Intrinsic(bool* ok);

  Arguments ParseArguments(bool* ok);
  Expression ParseFunctionLiteral(bool is_generator, bool* ok);
  void ParseLazyFunctionLiteralBody(bool* ok);

  Identifier ParseIdentifier(AllowEvalOrArgumentsAsIdentifier, bool* ok);
  Identifier ParseIdentifierName(bool* ok);
  Identifier ParseIdentifierNameOrGetOrSet(bool* is_get,
                                           bool* is_set,
                                           bool* ok);

  // Logs the currently parsed literal as a symbol in the preparser data.
  void LogSymbol();
  // Log the currently parsed identifier.
  Identifier GetIdentifierSymbol();
  // Log the currently parsed string literal.
  Expression GetStringSymbol();

  void set_language_mode(LanguageMode language_mode) {
    scope_->set_language_mode(language_mode);
  }

  bool is_classic_mode() {
    return scope_->language_mode() == CLASSIC_MODE;
  }

  bool is_extended_mode() {
    return scope_->language_mode() == EXTENDED_MODE;
  }

  LanguageMode language_mode() { return scope_->language_mode(); }

  bool CheckInOrOf(bool accept_OF);

  void SetStrictModeViolation(Scanner::Location,
                              const char* type,
                              bool* ok);

  void CheckDelayedStrictModeViolation(int beg_pos, int end_pos, bool* ok);

  void StrictModeIdentifierViolation(Scanner::Location,
                                     Identifier identifier,
                                     bool* ok);

  ParserRecorder* log_;
  Scope* scope_;
  Scanner::Location strict_mode_violation_location_;
  const char* strict_mode_violation_type_;
  bool parenthesized_function_;
};

} }  // v8::internal

#endif  // V8_PREPARSER_H