Stand-alone parser template.

Uses existing Scanner and ParserLog.
Generates same preparse-data as existing preparser.

Review URL: http://codereview.chromium.org/4112012

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5750 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2010-11-02 07:21:37 +00:00
parent ebcd03b46a
commit 42b6151247
4 changed files with 1700 additions and 217 deletions

View File

@ -36,6 +36,7 @@
#include "messages.h"
#include "parser.h"
#include "platform.h"
#include "preparser.h"
#include "runtime.h"
#include "scopeinfo.h"
#include "scopes.h"
@ -390,27 +391,6 @@ class ParserFactory BASE_EMBEDDED {
};
class ParserLog BASE_EMBEDDED {
public:
virtual ~ParserLog() { }
// Records the occurrence of a function.
virtual FunctionEntry LogFunction(int start) { return FunctionEntry(); }
virtual void LogSymbol(int start, Vector<const char> symbol) {}
virtual void LogError() { }
// Return the current position in the function entry log.
virtual int function_position() { return 0; }
virtual int symbol_position() { return 0; }
virtual int symbol_ids() { return 0; }
virtual void PauseRecording() {}
virtual void ResumeRecording() {}
virtual Vector<unsigned> ExtractData() {
return Vector<unsigned>();
};
};
class ConditionalLogPauseScope {
public:
ConditionalLogPauseScope(bool pause, ParserLog* log)
@ -484,141 +464,65 @@ class AstBuildingParserFactory : public ParserFactory {
};
// Record only functions.
class PartialParserRecorder: public ParserLog {
public:
PartialParserRecorder();
virtual FunctionEntry LogFunction(int start);
virtual int function_position() { return function_store_.size(); }
virtual void LogError() { }
virtual void LogMessage(Scanner::Location loc,
const char* message,
Vector<const char*> args);
virtual Vector<unsigned> ExtractData() {
int function_size = function_store_.size();
int total_size = ScriptDataImpl::kHeaderSize + function_size;
Vector<unsigned> data = Vector<unsigned>::New(total_size);
preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size;
preamble_[ScriptDataImpl::kSymbolCountOffset] = 0;
memcpy(data.start(), preamble_, sizeof(preamble_));
int symbol_start = ScriptDataImpl::kHeaderSize + function_size;
if (function_size > 0) {
function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize,
symbol_start));
}
return data;
Vector<unsigned> PartialParserRecorder::ExtractData() {
int function_size = function_store_.size();
int total_size = ScriptDataImpl::kHeaderSize + function_size;
Vector<unsigned> data = Vector<unsigned>::New(total_size);
preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size;
preamble_[ScriptDataImpl::kSymbolCountOffset] = 0;
memcpy(data.start(), preamble_, sizeof(preamble_));
int symbol_start = ScriptDataImpl::kHeaderSize + function_size;
if (function_size > 0) {
function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize,
symbol_start));
}
return data;
}
virtual void PauseRecording() {
pause_count_++;
is_recording_ = false;
void CompleteParserRecorder::LogSymbol(int start, Vector<const char> literal) {
if (!is_recording_) return;
int hash = vector_hash(literal);
HashMap::Entry* entry = symbol_table_.Lookup(&literal, hash, true);
int id = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
if (id == 0) {
// Put (symbol_id_ + 1) into entry and increment it.
id = ++symbol_id_;
entry->value = reinterpret_cast<void*>(id);
Vector<Vector<const char> > symbol = symbol_entries_.AddBlock(1, literal);
entry->key = &symbol[0];
}
WriteNumber(id - 1);
}
virtual void ResumeRecording() {
ASSERT(pause_count_ > 0);
if (--pause_count_ == 0) is_recording_ = !has_error();
Vector<unsigned> CompleteParserRecorder::ExtractData() {
int function_size = function_store_.size();
// Add terminator to symbols, then pad to unsigned size.
int symbol_size = symbol_store_.size();
int padding = sizeof(unsigned) - (symbol_size % sizeof(unsigned));
symbol_store_.AddBlock(padding, ScriptDataImpl::kNumberTerminator);
symbol_size += padding;
int total_size = ScriptDataImpl::kHeaderSize + function_size
+ (symbol_size / sizeof(unsigned));
Vector<unsigned> data = Vector<unsigned>::New(total_size);
preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size;
preamble_[ScriptDataImpl::kSymbolCountOffset] = symbol_id_;
memcpy(data.start(), preamble_, sizeof(preamble_));
int symbol_start = ScriptDataImpl::kHeaderSize + function_size;
if (function_size > 0) {
function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize,
symbol_start));
}
protected:
bool has_error() {
return static_cast<bool>(preamble_[ScriptDataImpl::kHasErrorOffset]);
if (!has_error()) {
symbol_store_.WriteTo(
Vector<byte>::cast(data.SubVector(symbol_start, total_size)));
}
bool is_recording() {
return is_recording_;
}
void WriteString(Vector<const char> str);
Collector<unsigned> function_store_;
unsigned preamble_[ScriptDataImpl::kHeaderSize];
bool is_recording_;
int pause_count_;
#ifdef DEBUG
int prev_start;
#endif
};
return data;
}
// Record both functions and symbols.
class CompleteParserRecorder: public PartialParserRecorder {
public:
CompleteParserRecorder();
virtual void LogSymbol(int start, Vector<const char> literal) {
if (!is_recording_) return;
int hash = vector_hash(literal);
HashMap::Entry* entry = symbol_table_.Lookup(&literal, hash, true);
int id = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
if (id == 0) {
// Put (symbol_id_ + 1) into entry and increment it.
id = ++symbol_id_;
entry->value = reinterpret_cast<void*>(id);
Vector<Vector<const char> > symbol = symbol_entries_.AddBlock(1, literal);
entry->key = &symbol[0];
}
WriteNumber(id - 1);
}
virtual Vector<unsigned> ExtractData() {
int function_size = function_store_.size();
// Add terminator to symbols, then pad to unsigned size.
int symbol_size = symbol_store_.size();
int padding = sizeof(unsigned) - (symbol_size % sizeof(unsigned));
symbol_store_.AddBlock(padding, ScriptDataImpl::kNumberTerminator);
symbol_size += padding;
int total_size = ScriptDataImpl::kHeaderSize + function_size
+ (symbol_size / sizeof(unsigned));
Vector<unsigned> data = Vector<unsigned>::New(total_size);
preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size;
preamble_[ScriptDataImpl::kSymbolCountOffset] = symbol_id_;
memcpy(data.start(), preamble_, sizeof(preamble_));
int symbol_start = ScriptDataImpl::kHeaderSize + function_size;
if (function_size > 0) {
function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize,
symbol_start));
}
if (!has_error()) {
symbol_store_.WriteTo(
Vector<byte>::cast(data.SubVector(symbol_start, total_size)));
}
return data;
}
virtual int symbol_position() { return symbol_store_.size(); }
virtual int symbol_ids() { return symbol_id_; }
private:
static int vector_hash(Vector<const char> string) {
int hash = 0;
for (int i = 0; i < string.length(); i++) {
int c = string[i];
hash += c;
hash += (hash << 10);
hash ^= (hash >> 6);
}
return hash;
}
static bool vector_compare(void* a, void* b) {
Vector<const char>* string1 = reinterpret_cast<Vector<const char>* >(a);
Vector<const char>* string2 = reinterpret_cast<Vector<const char>* >(b);
int length = string1->length();
if (string2->length() != length) return false;
return memcmp(string1->start(), string2->start(), length) == 0;
}
// Write a non-negative number to the symbol store.
void WriteNumber(int number);
Collector<byte> symbol_store_;
Collector<Vector<const char> > symbol_entries_;
HashMap symbol_table_;
int symbol_id_;
};
FunctionEntry ScriptDataImpl::GetFunctionEntry(int start) {
@ -691,7 +595,7 @@ PartialParserRecorder::PartialParserRecorder()
preamble_[ScriptDataImpl::kSizeOffset] = 0;
ASSERT_EQ(6, ScriptDataImpl::kHeaderSize);
#ifdef DEBUG
prev_start = -1;
prev_start_ = -1;
#endif
}
@ -742,8 +646,8 @@ const char* ScriptDataImpl::ReadString(unsigned* start, int* chars) {
void PartialParserRecorder::LogMessage(Scanner::Location loc,
const char* message,
Vector<const char*> args) {
const char* message,
Vector<const char*> args) {
if (has_error()) return;
preamble_[ScriptDataImpl::kHasErrorOffset] = true;
function_store_.Reset();
@ -800,18 +704,6 @@ unsigned* ScriptDataImpl::ReadAddress(int position) {
}
FunctionEntry PartialParserRecorder::LogFunction(int start) {
#ifdef DEBUG
ASSERT(start > prev_start);
prev_start = start;
#endif
if (!is_recording_) return FunctionEntry();
FunctionEntry result(function_store_.AddBlock(FunctionEntry::kSize, 0));
result.set_start_pos(start);
return result;
}
class AstBuildingParser : public Parser {
public:
AstBuildingParser(Handle<Script> script, bool allow_natives_syntax,
@ -1036,26 +928,6 @@ Parser::Parser(Handle<Script> script,
}
bool Parser::PreParseProgram(Handle<String> source,
unibrow::CharacterStream* stream) {
HistogramTimerScope timer(&Counters::pre_parse);
AssertNoZoneAllocation assert_no_zone_allocation;
AssertNoAllocation assert_no_allocation;
NoHandleAllocation no_handle_allocation;
scanner_.Initialize(source, stream, JAVASCRIPT);
ASSERT(target_stack_ == NULL);
mode_ = FLAG_lazy ? PARSE_LAZILY : PARSE_EAGERLY;
if (allow_natives_syntax_ || extension_ != NULL) mode_ = PARSE_EAGERLY;
DummyScope top_scope;
LexicalScope scope(&this->top_scope_, &this->with_nesting_level_, &top_scope);
TemporaryScope temp_scope(&this->temp_scope_);
ZoneListWrapper<Statement> processor;
bool ok = true;
ParseSourceElements(&processor, Token::EOS, &ok);
return !scanner().stack_overflow();
}
FunctionLiteral* Parser::ParseProgram(Handle<String> source,
bool in_global_context) {
CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT);
@ -1740,7 +1612,9 @@ Statement* Parser::ParseNativeDeclaration(bool* ok) {
while (!done) {
ParseIdentifier(CHECK_OK);
done = (peek() == Token::RPAREN);
if (!done) Expect(Token::COMMA, CHECK_OK);
if (!done) {
Expect(Token::COMMA, CHECK_OK);
}
}
Expect(Token::RPAREN, CHECK_OK);
Expect(Token::SEMICOLON, CHECK_OK);
@ -3720,7 +3594,6 @@ FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name,
Expect(Token::RBRACE, CHECK_OK);
} else {
FunctionEntry entry;
if (is_lazily_compiled) entry = log()->LogFunction(function_block_pos);
{
ConditionalLogPauseScope pause_if(is_lazily_compiled, log());
ParseSourceElements(&body, Token::RBRACE, CHECK_OK);
@ -3733,12 +3606,11 @@ FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name,
Expect(Token::RBRACE, CHECK_OK);
end_pos = scanner_.location().end_pos;
if (entry.is_valid()) {
ASSERT(is_lazily_compiled);
if (is_pre_parsing_ && is_lazily_compiled) {
ASSERT(is_pre_parsing_);
entry.set_end_pos(end_pos);
entry.set_literal_count(materialized_literal_count);
entry.set_property_count(expected_property_count);
log()->LogFunction(function_block_pos, end_pos,
materialized_literal_count,
expected_property_count);
}
}
@ -5003,23 +4875,6 @@ bool ScriptDataImpl::HasError() {
}
// Preparse, but only collect data that is immediately useful,
// even if the preparser data is only used once.
ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension) {
Handle<Script> no_script;
bool allow_natives_syntax =
FLAG_allow_natives_syntax || Bootstrapper::IsActive();
PartialPreParser parser(no_script, allow_natives_syntax, extension);
if (!parser.PreParseProgram(source, stream)) return NULL;
// Extract the accumulated data from the recorder as a single
// contiguous vector that we are responsible for disposing.
Vector<unsigned> store = parser.recorder()->ExtractData();
return new ScriptDataImpl(store);
}
void ScriptDataImpl::Initialize() {
// Prepares state for use.
if (store_.length() >= kHeaderSize) {
@ -5063,17 +4918,45 @@ int ScriptDataImpl::ReadNumber(byte** source) {
}
// Preparse, but only collect data that is immediately useful,
// even if the preparser data is only used once.
ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension) {
Handle<Script> no_script;
preparser::PreParser<Scanner, PartialParserRecorder> parser;
Scanner scanner;
scanner.Initialize(source, stream, JAVASCRIPT);
bool allow_lazy = FLAG_lazy && (extension == NULL);
PartialParserRecorder recorder;
if (!parser.PreParseProgram(&scanner, &recorder, allow_lazy)) {
Top::StackOverflow();
return NULL;
}
// Extract the accumulated data from the recorder as a single
// contiguous vector that we are responsible for disposing.
Vector<unsigned> store = recorder.ExtractData();
return new ScriptDataImpl(store);
}
ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension) {
Handle<Script> no_script;
bool allow_natives_syntax =
FLAG_allow_natives_syntax || Bootstrapper::IsActive();
CompletePreParser parser(no_script, allow_natives_syntax, extension);
if (!parser.PreParseProgram(source, stream)) return NULL;
preparser::PreParser<Scanner, CompleteParserRecorder> parser;
Scanner scanner;
scanner.Initialize(source, stream, JAVASCRIPT);
bool allow_lazy = FLAG_lazy && (extension == NULL);
CompleteParserRecorder recorder;
if (!parser.PreParseProgram(&scanner, &recorder, allow_lazy)) {
Top::StackOverflow();
return NULL;
}
// Extract the accumulated data from the recorder as a single
// contiguous vector that we are responsible for disposing.
Vector<unsigned> store = parser.recorder()->ExtractData();
Vector<unsigned> store = recorder.ExtractData();
return new ScriptDataImpl(store);
}

View File

@ -177,6 +177,152 @@ class ScriptDataImpl : public ScriptData {
};
class ParserLog BASE_EMBEDDED {
public:
virtual ~ParserLog() { }
// Records the occurrence of a function.
virtual void LogFunction(int start, int end, int literals, int properties) {}
// Records the occurrence of a symbol in the source. The vector holds the
// UTF-8 encoded symbol content.
virtual void LogSymbol(int start, Vector<const char> symbol) {}
// Records the occurrence of a symbol in the source. The symbol pointer
// points to the UTF-8 encoded symbol content.
virtual void LogSymbol(int start, const char* symbol, int length) {}
// Return the current position in the function entry log.
virtual int function_position() { return 0; }
// Return the current position in the symbol entry log.
// Notice: Functions and symbols are currently logged separately.
virtual int symbol_position() { return 0; }
// Return the number of distinct symbols logged.
virtual int symbol_ids() { return 0; }
// Pauses recording. The Log-functions above will do nothing during pausing.
// Pauses can be nested.
virtual void PauseRecording() {}
// Ends a recording pause.
virtual void ResumeRecording() {}
// Extracts a representation of the logged data that can be used by
// ScriptData.
virtual Vector<unsigned> ExtractData() {
return Vector<unsigned>();
};
};
// Record only functions.
class PartialParserRecorder: public ParserLog {
public:
PartialParserRecorder();
virtual void LogFunction(int start, int end, int literals, int properties) {
function_store_.Add(start);
function_store_.Add(end);
function_store_.Add(literals);
function_store_.Add(properties);
}
// Logs an error message and marks the log as containing an error.
// Further logging will be ignored, and ExtractData will return a vector
// representing the error only.
void LogMessage(int start,
int end,
const char* message,
const char* argument_opt) {
Scanner::Location location(start, end);
Vector<const char*> arguments;
if (argument_opt != NULL) {
arguments = Vector<const char*>(&argument_opt, 1);
}
this->LogMessage(location, message, arguments);
}
virtual int function_position() { return function_store_.size(); }
virtual void LogMessage(Scanner::Location loc,
const char* message,
Vector<const char*> args);
virtual Vector<unsigned> ExtractData();
virtual void PauseRecording() {
pause_count_++;
is_recording_ = false;
}
virtual void ResumeRecording() {
ASSERT(pause_count_ > 0);
if (--pause_count_ == 0) is_recording_ = !has_error();
}
protected:
bool has_error() {
return static_cast<bool>(preamble_[ScriptDataImpl::kHasErrorOffset]);
}
bool is_recording() {
return is_recording_;
}
void WriteString(Vector<const char> str);
Collector<unsigned> function_store_;
unsigned preamble_[ScriptDataImpl::kHeaderSize];
bool is_recording_;
int pause_count_;
#ifdef DEBUG
int prev_start_;
#endif
};
// Record both functions and symbols.
class CompleteParserRecorder: public PartialParserRecorder {
public:
CompleteParserRecorder();
virtual void LogSymbol(int start, Vector<const char> literal);
virtual void LogSymbol(int start, const char* symbol, int length) {
LogSymbol(start, Vector<const char>(symbol, length));
}
virtual Vector<unsigned> ExtractData();
virtual int symbol_position() { return symbol_store_.size(); }
virtual int symbol_ids() { return symbol_id_; }
private:
static int vector_hash(Vector<const char> string) {
int hash = 0;
for (int i = 0; i < string.length(); i++) {
int c = string[i];
hash += c;
hash += (hash << 10);
hash ^= (hash >> 6);
}
return hash;
}
static bool vector_compare(void* a, void* b) {
Vector<const char>* string1 = reinterpret_cast<Vector<const char>* >(a);
Vector<const char>* string2 = reinterpret_cast<Vector<const char>* >(b);
int length = string1->length();
if (string2->length() != length) return false;
return memcmp(string1->start(), string2->start(), length) == 0;
}
// Write a non-negative number to the symbol store.
void WriteNumber(int number);
Collector<byte> symbol_store_;
Collector<Vector<const char> > symbol_entries_;
HashMap symbol_table_;
int symbol_id_;
};
class ParserApi {
public:
// Parses the source code represented by the compilation info and sets its
@ -434,10 +580,6 @@ class Parser {
ParserFactory* factory, ParserLog* log, ScriptDataImpl* pre_data);
virtual ~Parser() { }
// Pre-parse the program from the character stream; returns true on
// success, false if a stack-overflow happened during parsing.
bool PreParseProgram(Handle<String> source, unibrow::CharacterStream* stream);
void ReportMessage(const char* message, Vector<const char*> args);
virtual void ReportMessageAt(Scanner::Location loc,
const char* message,

1428
src/preparser.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -26,6 +26,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdlib.h>
#include <stdio.h>
#include "v8.h"
@ -34,7 +35,8 @@
#include "parser.h"
#include "utils.h"
#include "execution.h"
#include "scanner.h"
#include "preparser.h"
#include "cctest.h"
namespace i = ::v8::internal;
@ -239,3 +241,31 @@ TEST(Preparsing) {
i::Vector<const char*> args = pre_impl->BuildArgs();
CHECK_GT(strlen(message), 0);
}
TEST(StandAlonePreParser) {
int marker;
i::StackGuard::SetStackLimit(
reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
const char* programs[] = {
"{label: 42}",
"var x = 42;",
"function foo(x, y) { return x + y; }",
"native function foo(); return %ArgleBargle(glop);",
NULL
};
for (int i = 0; programs[i]; i++) {
const char* program = programs[i];
unibrow::Utf8InputBuffer<256> stream(program, strlen(program));
i::CompleteParserRecorder log;
i::Scanner scanner;
scanner.Initialize(i::Handle<i::String>::null(), &stream, i::JAVASCRIPT);
i::preparser::PreParser<i::Scanner, i::CompleteParserRecorder> preparser;
bool result = preparser.PreParseProgram(&scanner, &log, true);
CHECK(result);
i::ScriptDataImpl data(log.ExtractData());
CHECK(!data.has_error());
}
}