// Copyright 2010 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "v8.h" #include "api.h" #include "ast.h" #include "bootstrapper.h" #include "codegen.h" #include "compiler.h" #include "func-name-inferrer.h" #include "messages.h" #include "parser.h" #include "platform.h" #include "preparser.h" #include "prescanner.h" #include "runtime.h" #include "scopeinfo.h" #include "string-stream.h" #include "ast-inl.h" #include "jump-target-inl.h" namespace v8 { namespace internal { // PositionStack is used for on-stack allocation of token positions for // new expressions. Please look at ParseNewExpression. class PositionStack { public: explicit PositionStack(bool* ok) : top_(NULL), ok_(ok) {} ~PositionStack() { ASSERT(!*ok_ || is_empty()); } class Element { public: Element(PositionStack* stack, int value) { previous_ = stack->top(); value_ = value; stack->set_top(this); } private: Element* previous() { return previous_; } int value() { return value_; } friend class PositionStack; Element* previous_; int value_; }; bool is_empty() { return top_ == NULL; } int pop() { ASSERT(!is_empty()); int result = top_->value(); top_ = top_->previous(); return result; } private: Element* top() { return top_; } void set_top(Element* value) { top_ = value; } Element* top_; bool* ok_; }; RegExpBuilder::RegExpBuilder() : pending_empty_(false), characters_(NULL), terms_(), alternatives_() #ifdef DEBUG , last_added_(ADD_NONE) #endif {} void RegExpBuilder::FlushCharacters() { pending_empty_ = false; if (characters_ != NULL) { RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); characters_ = NULL; text_.Add(atom); LAST(ADD_ATOM); } } void RegExpBuilder::FlushText() { FlushCharacters(); int num_text = text_.length(); if (num_text == 0) { return; } else if (num_text == 1) { terms_.Add(text_.last()); } else { RegExpText* text = new RegExpText(); for (int i = 0; i < num_text; i++) text_.Get(i)->AppendToText(text); terms_.Add(text); } text_.Clear(); } void RegExpBuilder::AddCharacter(uc16 c) { pending_empty_ = false; if (characters_ == NULL) { characters_ = new ZoneList(4); } characters_->Add(c); LAST(ADD_CHAR); } void RegExpBuilder::AddEmpty() { pending_empty_ = true; } void RegExpBuilder::AddAtom(RegExpTree* term) { if (term->IsEmpty()) { AddEmpty(); return; } if (term->IsTextElement()) { FlushCharacters(); text_.Add(term); } else { FlushText(); terms_.Add(term); } LAST(ADD_ATOM); } void RegExpBuilder::AddAssertion(RegExpTree* assert) { FlushText(); terms_.Add(assert); LAST(ADD_ASSERT); } void RegExpBuilder::NewAlternative() { FlushTerms(); } void RegExpBuilder::FlushTerms() { FlushText(); int num_terms = terms_.length(); RegExpTree* alternative; if (num_terms == 0) { alternative = RegExpEmpty::GetInstance(); } else if (num_terms == 1) { alternative = terms_.last(); } else { alternative = new RegExpAlternative(terms_.GetList()); } alternatives_.Add(alternative); terms_.Clear(); LAST(ADD_NONE); } RegExpTree* RegExpBuilder::ToRegExp() { FlushTerms(); int num_alternatives = alternatives_.length(); if (num_alternatives == 0) { return RegExpEmpty::GetInstance(); } if (num_alternatives == 1) { return alternatives_.last(); } return new RegExpDisjunction(alternatives_.GetList()); } void RegExpBuilder::AddQuantifierToAtom(int min, int max, RegExpQuantifier::Type type) { if (pending_empty_) { pending_empty_ = false; return; } RegExpTree* atom; if (characters_ != NULL) { ASSERT(last_added_ == ADD_CHAR); // Last atom was character. Vector char_vector = characters_->ToConstVector(); int num_chars = char_vector.length(); if (num_chars > 1) { Vector prefix = char_vector.SubVector(0, num_chars - 1); text_.Add(new RegExpAtom(prefix)); char_vector = char_vector.SubVector(num_chars - 1, num_chars); } characters_ = NULL; atom = new RegExpAtom(char_vector); FlushText(); } else if (text_.length() > 0) { ASSERT(last_added_ == ADD_ATOM); atom = text_.RemoveLast(); FlushText(); } else if (terms_.length() > 0) { ASSERT(last_added_ == ADD_ATOM); atom = terms_.RemoveLast(); if (atom->max_match() == 0) { // Guaranteed to only match an empty string. LAST(ADD_TERM); if (min == 0) { return; } terms_.Add(atom); return; } } else { // Only call immediately after adding an atom or character! UNREACHABLE(); return; } terms_.Add(new RegExpQuantifier(min, max, type, atom)); LAST(ADD_TERM); } // A temporary scope stores information during parsing, just like // a plain scope. However, temporary scopes are not kept around // after parsing or referenced by syntax trees so they can be stack- // allocated and hence used by the pre-parser. class TemporaryScope BASE_EMBEDDED { public: explicit TemporaryScope(TemporaryScope** variable); ~TemporaryScope(); int NextMaterializedLiteralIndex() { int next_index = materialized_literal_count_ + JSFunction::kLiteralsPrefixSize; materialized_literal_count_++; return next_index; } int materialized_literal_count() { return materialized_literal_count_; } void SetThisPropertyAssignmentInfo( bool only_simple_this_property_assignments, Handle this_property_assignments) { only_simple_this_property_assignments_ = only_simple_this_property_assignments; this_property_assignments_ = this_property_assignments; } bool only_simple_this_property_assignments() { return only_simple_this_property_assignments_; } Handle this_property_assignments() { return this_property_assignments_; } void AddProperty() { expected_property_count_++; } int expected_property_count() { return expected_property_count_; } void AddLoop() { loop_count_++; } bool ContainsLoops() const { return loop_count_ > 0; } private: // Captures the number of literals that need materialization in the // function. Includes regexp literals, and boilerplate for object // and array literals. int materialized_literal_count_; // Properties count estimation. int expected_property_count_; // Keeps track of assignments to properties of this. Used for // optimizing constructors. bool only_simple_this_property_assignments_; Handle this_property_assignments_; // Captures the number of loops inside the scope. int loop_count_; // Bookkeeping TemporaryScope** variable_; TemporaryScope* parent_; }; TemporaryScope::TemporaryScope(TemporaryScope** variable) : materialized_literal_count_(0), expected_property_count_(0), only_simple_this_property_assignments_(false), this_property_assignments_(Factory::empty_fixed_array()), loop_count_(0), variable_(variable), parent_(*variable) { *variable = this; } TemporaryScope::~TemporaryScope() { *variable_ = parent_; } Handle Parser::LookupSymbol(int symbol_id, Vector string) { // Length of symbol cache is the number of identified symbols. // If we are larger than that, or negative, it's not a cached symbol. // This might also happen if there is no preparser symbol data, even // if there is some preparser data. if (static_cast(symbol_id) >= static_cast(symbol_cache_.length())) { return Factory::LookupSymbol(string); } return LookupCachedSymbol(symbol_id, string); } Handle Parser::LookupCachedSymbol(int symbol_id, Vector string) { // Make sure the cache is large enough to hold the symbol identifier. if (symbol_cache_.length() <= symbol_id) { // Increase length to index + 1. symbol_cache_.AddBlock(Handle::null(), symbol_id + 1 - symbol_cache_.length()); } Handle result = symbol_cache_.at(symbol_id); if (result.is_null()) { result = Factory::LookupSymbol(string); symbol_cache_.at(symbol_id) = result; return result; } Counters::total_preparse_symbols_skipped.Increment(); return result; } Vector PartialParserRecorder::ExtractData() { int function_size = function_store_.size(); int total_size = ScriptDataImpl::kHeaderSize + function_size; Vector data = Vector::New(total_size); preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size; preamble_[ScriptDataImpl::kSymbolCountOffset] = 0; memcpy(data.start(), preamble_, sizeof(preamble_)); int symbol_start = ScriptDataImpl::kHeaderSize + function_size; if (function_size > 0) { function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize, symbol_start)); } return data; } void CompleteParserRecorder::LogSymbol(int start, Vector literal) { if (!is_recording_) return; int hash = vector_hash(literal); HashMap::Entry* entry = symbol_table_.Lookup(&literal, hash, true); int id = static_cast(reinterpret_cast(entry->value)); if (id == 0) { // Put (symbol_id_ + 1) into entry and increment it. id = ++symbol_id_; entry->value = reinterpret_cast(id); Vector > symbol = symbol_entries_.AddBlock(1, literal); entry->key = &symbol[0]; } WriteNumber(id - 1); } Vector CompleteParserRecorder::ExtractData() { int function_size = function_store_.size(); // Add terminator to symbols, then pad to unsigned size. int symbol_size = symbol_store_.size(); int padding = sizeof(unsigned) - (symbol_size % sizeof(unsigned)); symbol_store_.AddBlock(padding, ScriptDataImpl::kNumberTerminator); symbol_size += padding; int total_size = ScriptDataImpl::kHeaderSize + function_size + (symbol_size / sizeof(unsigned)); Vector data = Vector::New(total_size); preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size; preamble_[ScriptDataImpl::kSymbolCountOffset] = symbol_id_; memcpy(data.start(), preamble_, sizeof(preamble_)); int symbol_start = ScriptDataImpl::kHeaderSize + function_size; if (function_size > 0) { function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize, symbol_start)); } if (!has_error()) { symbol_store_.WriteTo( Vector::cast(data.SubVector(symbol_start, total_size))); } return data; } FunctionEntry ScriptDataImpl::GetFunctionEntry(int start) { // The current pre-data entry must be a FunctionEntry with the given // start position. if ((function_index_ + FunctionEntry::kSize <= store_.length()) && (static_cast(store_[function_index_]) == start)) { int index = function_index_; function_index_ += FunctionEntry::kSize; return FunctionEntry(store_.SubVector(index, index + FunctionEntry::kSize)); } return FunctionEntry(); } int ScriptDataImpl::GetSymbolIdentifier() { return ReadNumber(&symbol_data_); } bool ScriptDataImpl::SanityCheck() { // Check that the header data is valid and doesn't specify // point to positions outside the store. if (store_.length() < ScriptDataImpl::kHeaderSize) return false; if (magic() != ScriptDataImpl::kMagicNumber) return false; if (version() != ScriptDataImpl::kCurrentVersion) return false; if (has_error()) { // Extra sane sanity check for error message encoding. if (store_.length() <= kHeaderSize + kMessageTextPos) return false; if (Read(kMessageStartPos) > Read(kMessageEndPos)) return false; unsigned arg_count = Read(kMessageArgCountPos); int pos = kMessageTextPos; for (unsigned int i = 0; i <= arg_count; i++) { if (store_.length() <= kHeaderSize + pos) return false; int length = static_cast(Read(pos)); if (length < 0) return false; pos += 1 + length; } if (store_.length() < kHeaderSize + pos) return false; return true; } // Check that the space allocated for function entries is sane. int functions_size = static_cast(store_[ScriptDataImpl::kFunctionsSizeOffset]); if (functions_size < 0) return false; if (functions_size % FunctionEntry::kSize != 0) return false; // Check that the count of symbols is non-negative. int symbol_count = static_cast(store_[ScriptDataImpl::kSymbolCountOffset]); if (symbol_count < 0) return false; // Check that the total size has room for header and function entries. int minimum_size = ScriptDataImpl::kHeaderSize + functions_size; if (store_.length() < minimum_size) return false; return true; } PartialParserRecorder::PartialParserRecorder() : function_store_(0), is_recording_(true), pause_count_(0) { preamble_[ScriptDataImpl::kMagicOffset] = ScriptDataImpl::kMagicNumber; preamble_[ScriptDataImpl::kVersionOffset] = ScriptDataImpl::kCurrentVersion; preamble_[ScriptDataImpl::kHasErrorOffset] = false; preamble_[ScriptDataImpl::kFunctionsSizeOffset] = 0; preamble_[ScriptDataImpl::kSymbolCountOffset] = 0; preamble_[ScriptDataImpl::kSizeOffset] = 0; ASSERT_EQ(6, ScriptDataImpl::kHeaderSize); #ifdef DEBUG prev_start_ = -1; #endif } CompleteParserRecorder::CompleteParserRecorder() : PartialParserRecorder(), symbol_store_(0), symbol_entries_(0), symbol_table_(vector_compare), symbol_id_(0) { } void PartialParserRecorder::WriteString(Vector str) { function_store_.Add(str.length()); for (int i = 0; i < str.length(); i++) { function_store_.Add(str[i]); } } void CompleteParserRecorder::WriteNumber(int number) { ASSERT(number >= 0); int mask = (1 << 28) - 1; for (int i = 28; i > 0; i -= 7) { if (number > mask) { symbol_store_.Add(static_cast(number >> i) | 0x80u); number &= mask; } mask >>= 7; } symbol_store_.Add(static_cast(number)); } const char* ScriptDataImpl::ReadString(unsigned* start, int* chars) { int length = start[0]; char* result = NewArray(length + 1); for (int i = 0; i < length; i++) { result[i] = start[i + 1]; } result[length] = '\0'; if (chars != NULL) *chars = length; return result; } void PartialParserRecorder::LogMessage(Scanner::Location loc, const char* message, Vector args) { if (has_error()) return; preamble_[ScriptDataImpl::kHasErrorOffset] = true; function_store_.Reset(); STATIC_ASSERT(ScriptDataImpl::kMessageStartPos == 0); function_store_.Add(loc.beg_pos); STATIC_ASSERT(ScriptDataImpl::kMessageEndPos == 1); function_store_.Add(loc.end_pos); STATIC_ASSERT(ScriptDataImpl::kMessageArgCountPos == 2); function_store_.Add(args.length()); STATIC_ASSERT(ScriptDataImpl::kMessageTextPos == 3); WriteString(CStrVector(message)); for (int i = 0; i < args.length(); i++) { WriteString(CStrVector(args[i])); } is_recording_ = false; } Scanner::Location ScriptDataImpl::MessageLocation() { int beg_pos = Read(kMessageStartPos); int end_pos = Read(kMessageEndPos); return Scanner::Location(beg_pos, end_pos); } const char* ScriptDataImpl::BuildMessage() { unsigned* start = ReadAddress(kMessageTextPos); return ReadString(start, NULL); } Vector ScriptDataImpl::BuildArgs() { int arg_count = Read(kMessageArgCountPos); const char** array = NewArray(arg_count); // Position after text found by skipping past length field and // length field content words. int pos = kMessageTextPos + 1 + Read(kMessageTextPos); for (int i = 0; i < arg_count; i++) { int count = 0; array[i] = ReadString(ReadAddress(pos), &count); pos += count + 1; } return Vector(array, arg_count); } unsigned ScriptDataImpl::Read(int position) { return store_[ScriptDataImpl::kHeaderSize + position]; } unsigned* ScriptDataImpl::ReadAddress(int position) { return &store_[ScriptDataImpl::kHeaderSize + position]; } Scope* Parser::NewScope(Scope* parent, Scope::Type type, bool inside_with) { Scope* result = new Scope(parent, type); result->Initialize(inside_with); return result; } // ---------------------------------------------------------------------------- // Target is a support class to facilitate manipulation of the // Parser's target_stack_ (the stack of potential 'break' and // 'continue' statement targets). Upon construction, a new target is // added; it is removed upon destruction. class Target BASE_EMBEDDED { public: Target(Target** variable, AstNode* node) : variable_(variable), node_(node), previous_(*variable) { *variable = this; } ~Target() { *variable_ = previous_; } Target* previous() { return previous_; } AstNode* node() { return node_; } private: Target** variable_; AstNode* node_; Target* previous_; }; class TargetScope BASE_EMBEDDED { public: explicit TargetScope(Target** variable) : variable_(variable), previous_(*variable) { *variable = NULL; } ~TargetScope() { *variable_ = previous_; } private: Target** variable_; Target* previous_; }; // ---------------------------------------------------------------------------- // LexicalScope is a support class to facilitate manipulation of the // Parser's scope stack. The constructor sets the parser's top scope // to the incoming scope, and the destructor resets it. class LexicalScope BASE_EMBEDDED { public: LexicalScope(Scope** scope_variable, int* with_nesting_level_variable, Scope* scope) : scope_variable_(scope_variable), with_nesting_level_variable_(with_nesting_level_variable), prev_scope_(*scope_variable), prev_level_(*with_nesting_level_variable) { *scope_variable = scope; *with_nesting_level_variable = 0; } ~LexicalScope() { (*scope_variable_)->Leave(); *scope_variable_ = prev_scope_; *with_nesting_level_variable_ = prev_level_; } private: Scope** scope_variable_; int* with_nesting_level_variable_; Scope* prev_scope_; int prev_level_; }; // ---------------------------------------------------------------------------- // The CHECK_OK macro is a convenient macro to enforce error // handling for functions that may fail (by returning !*ok). // // CAUTION: This macro appends extra statements after a call, // thus it must never be used where only a single statement // is correct (e.g. an if statement branch w/o braces)! #define CHECK_OK ok); \ if (!*ok) return NULL; \ ((void)0 #define DUMMY ) // to make indentation work #undef DUMMY #define CHECK_FAILED /**/); \ if (failed_) return NULL; \ ((void)0 #define DUMMY ) // to make indentation work #undef DUMMY // ---------------------------------------------------------------------------- // Implementation of Parser Parser::Parser(Handle