// Copyright 2012 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef V8_PARSER_H_ #define V8_PARSER_H_ #include "allocation.h" #include "ast.h" #include "preparse-data-format.h" #include "preparse-data.h" #include "scopes.h" #include "preparser.h" namespace v8 { namespace internal { class CompilationInfo; class FuncNameInferrer; class ParserLog; class PositionStack; class Target; template class ZoneListWrapper; class FunctionEntry BASE_EMBEDDED { public: enum { kStartPositionIndex, kEndPositionIndex, kLiteralCountIndex, kPropertyCountIndex, kLanguageModeIndex, kSize }; explicit FunctionEntry(Vector backing) : backing_(backing) { } FunctionEntry() : backing_() { } int start_pos() { return backing_[kStartPositionIndex]; } int end_pos() { return backing_[kEndPositionIndex]; } int literal_count() { return backing_[kLiteralCountIndex]; } int property_count() { return backing_[kPropertyCountIndex]; } LanguageMode language_mode() { ASSERT(backing_[kLanguageModeIndex] == CLASSIC_MODE || backing_[kLanguageModeIndex] == STRICT_MODE || backing_[kLanguageModeIndex] == EXTENDED_MODE); return static_cast(backing_[kLanguageModeIndex]); } bool is_valid() { return !backing_.is_empty(); } private: Vector backing_; }; class ScriptDataImpl : public ScriptData { public: explicit ScriptDataImpl(Vector store) : store_(store), owns_store_(true) { } // Create an empty ScriptDataImpl that is guaranteed to not satisfy // a SanityCheck. ScriptDataImpl() : owns_store_(false) { } virtual ~ScriptDataImpl(); virtual int Length(); virtual const char* Data(); virtual bool HasError(); void Initialize(); void ReadNextSymbolPosition(); FunctionEntry GetFunctionEntry(int start); int GetSymbolIdentifier(); bool SanityCheck(); Scanner::Location MessageLocation(); const char* BuildMessage(); Vector BuildArgs(); int symbol_count() { return (store_.length() > PreparseDataConstants::kHeaderSize) ? store_[PreparseDataConstants::kSymbolCountOffset] : 0; } // The following functions should only be called if SanityCheck has // returned true. bool has_error() { return store_[PreparseDataConstants::kHasErrorOffset]; } unsigned magic() { return store_[PreparseDataConstants::kMagicOffset]; } unsigned version() { return store_[PreparseDataConstants::kVersionOffset]; } private: Vector store_; unsigned char* symbol_data_; unsigned char* symbol_data_end_; int function_index_; bool owns_store_; unsigned Read(int position); unsigned* ReadAddress(int position); // Reads a number from the current symbols int ReadNumber(byte** source); ScriptDataImpl(const char* backing_store, int length) : store_(reinterpret_cast(const_cast(backing_store)), length / static_cast(sizeof(unsigned))), owns_store_(false) { ASSERT_EQ(0, static_cast( reinterpret_cast(backing_store) % sizeof(unsigned))); } // Read strings written by ParserRecorder::WriteString. static const char* ReadString(unsigned* start, int* chars); friend class ScriptData; }; class PreParserApi { public: // Pre-parse a character stream and return full preparse data. // // This interface is here instead of in preparser.h because it instantiates a // preparser recorder object that is suited to the parser's purposes. Also, // the preparser doesn't know about ScriptDataImpl. static ScriptDataImpl* PreParse(Isolate* isolate, Utf16CharacterStream* source); }; // ---------------------------------------------------------------------------- // REGEXP PARSING // A BufferedZoneList is an automatically growing list, just like (and backed // by) a ZoneList, that is optimized for the case of adding and removing // a single element. The last element added is stored outside the backing list, // and if no more than one element is ever added, the ZoneList isn't even // allocated. // Elements must not be NULL pointers. template class BufferedZoneList { public: BufferedZoneList() : list_(NULL), last_(NULL) {} // Adds element at end of list. This element is buffered and can // be read using last() or removed using RemoveLast until a new Add or until // RemoveLast or GetList has been called. void Add(T* value, Zone* zone) { if (last_ != NULL) { if (list_ == NULL) { list_ = new(zone) ZoneList(initial_size, zone); } list_->Add(last_, zone); } last_ = value; } T* last() { ASSERT(last_ != NULL); return last_; } T* RemoveLast() { ASSERT(last_ != NULL); T* result = last_; if ((list_ != NULL) && (list_->length() > 0)) last_ = list_->RemoveLast(); else last_ = NULL; return result; } T* Get(int i) { ASSERT((0 <= i) && (i < length())); if (list_ == NULL) { ASSERT_EQ(0, i); return last_; } else { if (i == list_->length()) { ASSERT(last_ != NULL); return last_; } else { return list_->at(i); } } } void Clear() { list_ = NULL; last_ = NULL; } int length() { int length = (list_ == NULL) ? 0 : list_->length(); return length + ((last_ == NULL) ? 0 : 1); } ZoneList* GetList(Zone* zone) { if (list_ == NULL) { list_ = new(zone) ZoneList(initial_size, zone); } if (last_ != NULL) { list_->Add(last_, zone); last_ = NULL; } return list_; } private: ZoneList* list_; T* last_; }; // Accumulates RegExp atoms and assertions into lists of terms and alternatives. class RegExpBuilder: public ZoneObject { public: explicit RegExpBuilder(Zone* zone); void AddCharacter(uc16 character); // "Adds" an empty expression. Does nothing except consume a // following quantifier void AddEmpty(); void AddAtom(RegExpTree* tree); void AddAssertion(RegExpTree* tree); void NewAlternative(); // '|' void AddQuantifierToAtom( int min, int max, RegExpQuantifier::QuantifierType type); RegExpTree* ToRegExp(); private: void FlushCharacters(); void FlushText(); void FlushTerms(); Zone* zone() const { return zone_; } Zone* zone_; bool pending_empty_; ZoneList* characters_; BufferedZoneList terms_; BufferedZoneList text_; BufferedZoneList alternatives_; #ifdef DEBUG enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_; #define LAST(x) last_added_ = x; #else #define LAST(x) #endif }; class RegExpParser BASE_EMBEDDED { public: RegExpParser(FlatStringReader* in, Handle* error, bool multiline_mode, Zone* zone); static bool ParseRegExp(FlatStringReader* input, bool multiline, RegExpCompileData* result, Zone* zone); RegExpTree* ParsePattern(); RegExpTree* ParseDisjunction(); RegExpTree* ParseGroup(); RegExpTree* ParseCharacterClass(); // Parses a {...,...} quantifier and stores the range in the given // out parameters. bool ParseIntervalQuantifier(int* min_out, int* max_out); // Parses and returns a single escaped character. The character // must not be 'b' or 'B' since they are usually handle specially. uc32 ParseClassCharacterEscape(); // Checks whether the following is a length-digit hexadecimal number, // and sets the value if it is. bool ParseHexEscape(int length, uc32* value); uc32 ParseOctalLiteral(); // Tries to parse the input as a back reference. If successful it // stores the result in the output parameter and returns true. If // it fails it will push back the characters read so the same characters // can be reparsed. bool ParseBackReferenceIndex(int* index_out); CharacterRange ParseClassAtom(uc16* char_class); RegExpTree* ReportError(Vector message); void Advance(); void Advance(int dist); void Reset(int pos); // Reports whether the pattern might be used as a literal search string. // Only use if the result of the parse is a single atom node. bool simple(); bool contains_anchor() { return contains_anchor_; } void set_contains_anchor() { contains_anchor_ = true; } int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } int position() { return next_pos_ - 1; } bool failed() { return failed_; } static const int kMaxCaptures = 1 << 16; static const uc32 kEndMarker = (1 << 21); private: enum SubexpressionType { INITIAL, CAPTURE, // All positive values represent captures. POSITIVE_LOOKAHEAD, NEGATIVE_LOOKAHEAD, GROUPING }; class RegExpParserState : public ZoneObject { public: RegExpParserState(RegExpParserState* previous_state, SubexpressionType group_type, int disjunction_capture_index, Zone* zone) : previous_state_(previous_state), builder_(new(zone) RegExpBuilder(zone)), group_type_(group_type), disjunction_capture_index_(disjunction_capture_index) {} // Parser state of containing expression, if any. RegExpParserState* previous_state() { return previous_state_; } bool IsSubexpression() { return previous_state_ != NULL; } // RegExpBuilder building this regexp's AST. RegExpBuilder* builder() { return builder_; } // Type of regexp being parsed (parenthesized group or entire regexp). SubexpressionType group_type() { return group_type_; } // Index in captures array of first capture in this sub-expression, if any. // Also the capture index of this sub-expression itself, if group_type // is CAPTURE. int capture_index() { return disjunction_capture_index_; } private: // Linked list implementation of stack of states. RegExpParserState* previous_state_; // Builder for the stored disjunction. RegExpBuilder* builder_; // Stored disjunction type (capture, look-ahead or grouping), if any. SubexpressionType group_type_; // Stored disjunction's capture index (if any). int disjunction_capture_index_; }; Isolate* isolate() { return isolate_; } Zone* zone() const { return zone_; } uc32 current() { return current_; } bool has_more() { return has_more_; } bool has_next() { return next_pos_ < in()->length(); } uc32 Next(); FlatStringReader* in() { return in_; } void ScanForCaptures(); Isolate* isolate_; Zone* zone_; Handle* error_; ZoneList* captures_; FlatStringReader* in_; uc32 current_; int next_pos_; // The capture count is only valid after we have scanned for captures. int capture_count_; bool has_more_; bool multiline_; bool simple_; bool contains_anchor_; bool is_scanned_for_captures_; bool failed_; }; // ---------------------------------------------------------------------------- // JAVASCRIPT PARSING class Parser; class SingletonLogger; class ParserTraits { public: typedef Parser* ParserType; // Return types for traversing functions. typedef Handle IdentifierType; explicit ParserTraits(Parser* parser) : parser_(parser) {} // Helper functions for recursive descent. bool is_classic_mode() const; bool is_generator() const; bool IsEvalOrArguments(Handle identifier) const; // Reporting errors. void ReportMessageAt(Scanner::Location source_location, const char* message, Vector args); void ReportMessage(const char* message, Vector > args); void ReportMessageAt(Scanner::Location source_location, const char* message, Vector > args); // Identifiers: static IdentifierType EmptyIdentifier() { return Handle(); } IdentifierType GetSymbol(); private: Parser* parser_; }; class Parser : public ParserBase { public: explicit Parser(CompilationInfo* info); ~Parser() { delete reusable_preparser_; reusable_preparser_ = NULL; } // Parses the source code represented by the compilation info and sets its // function literal. Returns false (and deallocates any allocated AST // nodes) if parsing failed. static bool Parse(CompilationInfo* info, bool allow_lazy = false) { Parser parser(info); parser.set_allow_lazy(allow_lazy); return parser.Parse(); } bool Parse(); private: friend class ParserTraits; static const int kMaxNumFunctionLocals = 131071; // 2^17-1 enum Mode { PARSE_LAZILY, PARSE_EAGERLY }; enum VariableDeclarationContext { kModuleElement, kBlockElement, kStatement, kForStatement }; // If a list of variable declarations includes any initializers. enum VariableDeclarationProperties { kHasInitializers, kHasNoInitializers }; class BlockState; class FunctionState BASE_EMBEDDED { public: FunctionState(Parser* parser, Scope* scope); ~FunctionState(); int NextMaterializedLiteralIndex() { return next_materialized_literal_index_++; } int materialized_literal_count() { return next_materialized_literal_index_ - JSFunction::kLiteralsPrefixSize; } int NextHandlerIndex() { return next_handler_index_++; } int handler_count() { return next_handler_index_; } void AddProperty() { expected_property_count_++; } int expected_property_count() { return expected_property_count_; } void set_generator_object_variable(Variable *variable) { ASSERT(variable != NULL); ASSERT(!is_generator()); generator_object_variable_ = variable; } Variable* generator_object_variable() const { return generator_object_variable_; } bool is_generator() const { return generator_object_variable_ != NULL; } AstNodeFactory* factory() { return &factory_; } private: // Used to assign an index to each literal that needs materialization in // the function. Includes regexp literals, and boilerplate for object and // array literals. int next_materialized_literal_index_; // Used to assign a per-function index to try and catch handlers. int next_handler_index_; // Properties count estimation. int expected_property_count_; // For generators, the variable that holds the generator object. This // variable is used by yield expressions and return statements. NULL // indicates that this function is not a generator. Variable* generator_object_variable_; Parser* parser_; FunctionState* outer_function_state_; Scope* outer_scope_; int saved_ast_node_id_; AstNodeFactory factory_; }; class ParsingModeScope BASE_EMBEDDED { public: ParsingModeScope(Parser* parser, Mode mode) : parser_(parser), old_mode_(parser->mode()) { parser_->mode_ = mode; } ~ParsingModeScope() { parser_->mode_ = old_mode_; } private: Parser* parser_; Mode old_mode_; }; // Returns NULL if parsing failed. FunctionLiteral* ParseProgram(); FunctionLiteral* ParseLazy(); FunctionLiteral* ParseLazy(Utf16CharacterStream* source); Isolate* isolate() { return isolate_; } Zone* zone() const { return zone_; } CompilationInfo* info() const { return info_; } // Called by ParseProgram after setting up the scanner. FunctionLiteral* DoParseProgram(CompilationInfo* info, Handle source); // Report syntax error void ReportInvalidPreparseData(Handle name, bool* ok); void set_pre_parse_data(ScriptDataImpl *data) { pre_parse_data_ = data; symbol_cache_.Initialize(data ? data->symbol_count() : 0, zone()); } bool inside_with() const { return top_scope_->inside_with(); } Scanner& scanner() { return scanner_; } Mode mode() const { return mode_; } ScriptDataImpl* pre_parse_data() const { return pre_parse_data_; } bool is_extended_mode() { ASSERT(top_scope_ != NULL); return top_scope_->is_extended_mode(); } Scope* DeclarationScope(VariableMode mode) { return IsLexicalVariableMode(mode) ? top_scope_ : top_scope_->DeclarationScope(); } // All ParseXXX functions take as the last argument an *ok parameter // which is set to false if parsing failed; it is unchanged otherwise. // By making the 'exception handling' explicit, we are forced to check // for failure at the call sites. void* ParseSourceElements(ZoneList* processor, int end_token, bool is_eval, bool is_global, bool* ok); Statement* ParseModuleElement(ZoneStringList* labels, bool* ok); Statement* ParseModuleDeclaration(ZoneStringList* names, bool* ok); Module* ParseModule(bool* ok); Module* ParseModuleLiteral(bool* ok); Module* ParseModulePath(bool* ok); Module* ParseModuleVariable(bool* ok); Module* ParseModuleUrl(bool* ok); Module* ParseModuleSpecifier(bool* ok); Block* ParseImportDeclaration(bool* ok); Statement* ParseExportDeclaration(bool* ok); Statement* ParseBlockElement(ZoneStringList* labels, bool* ok); Statement* ParseStatement(ZoneStringList* labels, bool* ok); Statement* ParseFunctionDeclaration(ZoneStringList* names, bool* ok); Statement* ParseNativeDeclaration(bool* ok); Block* ParseBlock(ZoneStringList* labels, bool* ok); Block* ParseVariableStatement(VariableDeclarationContext var_context, ZoneStringList* names, bool* ok); Block* ParseVariableDeclarations(VariableDeclarationContext var_context, VariableDeclarationProperties* decl_props, ZoneStringList* names, Handle* out, bool* ok); Statement* ParseExpressionOrLabelledStatement(ZoneStringList* labels, bool* ok); IfStatement* ParseIfStatement(ZoneStringList* labels, bool* ok); Statement* ParseContinueStatement(bool* ok); Statement* ParseBreakStatement(ZoneStringList* labels, bool* ok); Statement* ParseReturnStatement(bool* ok); Statement* ParseWithStatement(ZoneStringList* labels, bool* ok); CaseClause* ParseCaseClause(bool* default_seen_ptr, bool* ok); SwitchStatement* ParseSwitchStatement(ZoneStringList* labels, bool* ok); DoWhileStatement* ParseDoWhileStatement(ZoneStringList* labels, bool* ok); WhileStatement* ParseWhileStatement(ZoneStringList* labels, bool* ok); Statement* ParseForStatement(ZoneStringList* labels, bool* ok); Statement* ParseThrowStatement(bool* ok); Expression* MakeCatchContext(Handle id, VariableProxy* value); TryStatement* ParseTryStatement(bool* ok); DebuggerStatement* ParseDebuggerStatement(bool* ok); // Support for hamony block scoped bindings. Block* ParseScopedBlock(ZoneStringList* labels, bool* ok); Expression* ParseExpression(bool accept_IN, bool* ok); Expression* ParseAssignmentExpression(bool accept_IN, bool* ok); Expression* ParseYieldExpression(bool* ok); Expression* ParseConditionalExpression(bool accept_IN, bool* ok); Expression* ParseBinaryExpression(int prec, bool accept_IN, bool* ok); Expression* ParseUnaryExpression(bool* ok); Expression* ParsePostfixExpression(bool* ok); Expression* ParseLeftHandSideExpression(bool* ok); Expression* ParseNewExpression(bool* ok); Expression* ParseMemberExpression(bool* ok); Expression* ParseNewPrefix(PositionStack* stack, bool* ok); Expression* ParseMemberWithNewPrefixesExpression(PositionStack* stack, bool* ok); Expression* ParsePrimaryExpression(bool* ok); Expression* ParseArrayLiteral(bool* ok); Expression* ParseObjectLiteral(bool* ok); Expression* ParseRegExpLiteral(bool seen_equal, bool* ok); // Initialize the components of a for-in / for-of statement. void InitializeForEachStatement(ForEachStatement* stmt, Expression* each, Expression* subject, Statement* body); ZoneList* ParseArguments(bool* ok); FunctionLiteral* ParseFunctionLiteral( Handle name, Scanner::Location function_name_location, bool name_is_strict_reserved, bool is_generator, int function_token_position, FunctionLiteral::FunctionType type, bool* ok); // Magical syntax support. Expression* ParseV8Intrinsic(bool* ok); bool CheckInOrOf(bool accept_OF, ForEachStatement::VisitMode* visit_mode); Handle LiteralString(PretenureFlag tenured) { if (scanner().is_literal_ascii()) { return isolate_->factory()->NewStringFromAscii( scanner().literal_ascii_string(), tenured); } else { return isolate_->factory()->NewStringFromTwoByte( scanner().literal_utf16_string(), tenured); } } Handle NextLiteralString(PretenureFlag tenured) { if (scanner().is_next_literal_ascii()) { return isolate_->factory()->NewStringFromAscii( scanner().next_literal_ascii_string(), tenured); } else { return isolate_->factory()->NewStringFromTwoByte( scanner().next_literal_utf16_string(), tenured); } } // Get odd-ball literals. Literal* GetLiteralUndefined(int position); Literal* GetLiteralTheHole(int position); // Determine if the expression is a variable proxy and mark it as being used // in an assignment or with a increment/decrement operator. This is currently // used on for the statically checking assignments to harmony const bindings. void MarkAsLValue(Expression* expression); // Strict mode validation of LValue expressions void CheckStrictModeLValue(Expression* expression, bool* ok); // For harmony block scoping mode: Check if the scope has conflicting var/let // declarations from different scopes. It covers for example // // function f() { { { var x; } let x; } } // function g() { { var x; let x; } } // // The var declarations are hoisted to the function scope, but originate from // a scope where the name has also been let bound or the var declaration is // hoisted over such a scope. void CheckConflictingVarDeclarations(Scope* scope, bool* ok); // Parser support VariableProxy* NewUnresolved(Handle name, VariableMode mode, Interface* interface); void Declare(Declaration* declaration, bool resolve, bool* ok); bool TargetStackContainsLabel(Handle label); BreakableStatement* LookupBreakTarget(Handle label, bool* ok); IterationStatement* LookupContinueTarget(Handle label, bool* ok); void RegisterTargetUse(Label* target, Target* stop); // Factory methods. Scope* NewScope(Scope* parent, ScopeType type); Handle LookupSymbol(int symbol_id); Handle LookupCachedSymbol(int symbol_id); // Generate AST node that throw a ReferenceError with the given type. Expression* NewThrowReferenceError(Handle type); // Generate AST node that throw a SyntaxError with the given // type. The first argument may be null (in the handle sense) in // which case no arguments are passed to the constructor. Expression* NewThrowSyntaxError(Handle type, Handle first); // Generate AST node that throw a TypeError with the given // type. Both arguments must be non-null (in the handle sense). Expression* NewThrowTypeError(Handle type, Handle first, Handle second); // Generic AST generator for throwing errors from compiled code. Expression* NewThrowError(Handle constructor, Handle type, Vector< Handle > arguments); PreParser::PreParseResult LazyParseFunctionLiteral( SingletonLogger* logger); AstNodeFactory* factory() { return current_function_state_->factory(); } Isolate* isolate_; ZoneList > symbol_cache_; Handle