Made predata smaller by storing symbol data in variable length base-128.

Remove position from symbol data - they must come in the correct order anyway.

Review URL: http://codereview.chromium.org/3384003

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5458 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
lrn@chromium.org 2010-09-15 10:54:35 +00:00
parent 62ec3292de
commit cb514b72db
3 changed files with 125 additions and 69 deletions

View File

@ -996,21 +996,23 @@ class CompleteParserRecorder: public PartialParserRecorder {
int id = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
if (id == 0) {
// Put (symbol_id_ + 1) into entry and increment it.
symbol_id_++;
entry->value = reinterpret_cast<void*>(symbol_id_);
id = ++symbol_id_;
entry->value = reinterpret_cast<void*>(id);
Vector<Vector<const char> > symbol = symbol_entries_.AddBlock(1, literal);
entry->key = &symbol[0];
} else {
// Log a reuse of an earlier seen symbol.
symbol_store_.Add(start);
symbol_store_.Add(id - 1);
}
symbol_store_.Add(id - 1);
}
virtual Vector<unsigned> ExtractData() {
int function_size = function_store_.size();
// Add terminator to symbols, then pad to unsigned size.
int symbol_size = symbol_store_.size();
int total_size = ScriptDataImpl::kHeaderSize + function_size + symbol_size;
int padding = sizeof(unsigned) - (symbol_size % sizeof(unsigned));
symbol_store_.AddBlock(padding, ScriptDataImpl::kNumberTerminator);
symbol_size += padding;
int total_size = ScriptDataImpl::kHeaderSize + function_size
+ (symbol_size / sizeof(unsigned));
Vector<unsigned> data = Vector<unsigned>::New(total_size);
preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size;
preamble_[ScriptDataImpl::kSymbolCountOffset] = symbol_id_;
@ -1020,8 +1022,9 @@ class CompleteParserRecorder: public PartialParserRecorder {
function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize,
symbol_start));
}
if (symbol_size > 0) {
symbol_store_.WriteTo(data.SubVector(symbol_start, total_size));
if (!has_error()) {
symbol_store_.WriteTo(
Vector<byte>::cast(data.SubVector(symbol_start, total_size)));
}
return data;
}
@ -1029,12 +1032,7 @@ class CompleteParserRecorder: public PartialParserRecorder {
virtual int symbol_position() { return symbol_store_.size(); }
virtual int symbol_ids() { return symbol_id_; }
private:
Collector<unsigned> symbol_store_;
Collector<Vector<const char> > symbol_entries_;
HashMap symbol_table_;
int symbol_id_;
static int vector_hash(Vector<const char> string) {
static int vector_hash(Vector<const char> string) {
int hash = 0;
for (int i = 0; i < string.length(); i++) {
int c = string[i];
@ -1052,6 +1050,14 @@ class CompleteParserRecorder: public PartialParserRecorder {
if (string2->length() != length) return false;
return memcmp(string1->start(), string2->start(), length) == 0;
}
// Write a non-negative number to the symbol store.
void WriteNumber(int number);
Collector<byte> symbol_store_;
Collector<Vector<const char> > symbol_entries_;
HashMap symbol_table_;
int symbol_id_;
};
@ -1076,18 +1082,11 @@ FunctionEntry ScriptDataImpl::GetFunctionEntry(int start) {
}
int ScriptDataImpl::GetSymbolIdentifier(int start) {
int next = symbol_index_ + 2;
if (next <= store_.length()
&& static_cast<int>(store_[symbol_index_]) == start) {
symbol_index_ = next;
return store_[next - 1];
}
return symbol_id_++;
int ScriptDataImpl::GetSymbolIdentifier() {
return ReadNumber(&symbol_data_);
}
bool ScriptDataImpl::SanityCheck() {
// Check that the header data is valid and doesn't specify
// point to positions outside the store.
@ -1118,7 +1117,7 @@ bool ScriptDataImpl::SanityCheck() {
int symbol_count =
static_cast<int>(store_[ScriptDataImpl::kSymbolCountOffset]);
if (symbol_count < 0) return false;
// Check that the total size has room both function entries.
// Check that the total size has room for header and function entries.
int minimum_size =
ScriptDataImpl::kHeaderSize + functions_size;
if (store_.length() < minimum_size) return false;
@ -1158,6 +1157,22 @@ void PartialParserRecorder::WriteString(Vector<const char> str) {
}
void CompleteParserRecorder::WriteNumber(int number) {
ASSERT(number >= 0);
int mask = (1 << 28) - 1;
for (int i = 28; i > 0; i -= 7) {
if (number > mask) {
symbol_store_.Add(static_cast<byte>(number >> i) | 0x80u);
number &= mask;
}
mask >>= 7;
}
symbol_store_.Add(static_cast<byte>(number));
}
const char* ScriptDataImpl::ReadString(unsigned* start, int* chars) {
int length = start[0];
char* result = NewArray<char>(length + 1);
@ -1206,7 +1221,8 @@ const char* ScriptDataImpl::BuildMessage() {
Vector<const char*> ScriptDataImpl::BuildArgs() {
int arg_count = Read(kMessageArgCountPos);
const char** array = NewArray<const char*>(arg_count);
// Position after the string starting at position 3.
// Position after text found by skipping past length field and
// length field content words.
int pos = kMessageTextPos + 1 + Read(kMessageTextPos);
for (int i = 0; i < arg_count; i++) {
int count = 0;
@ -1287,7 +1303,8 @@ class CompletePreParser : public PreParser {
public:
CompletePreParser(Handle<Script> script, bool allow_natives_syntax,
v8::Extension* extension)
: PreParser(script, allow_natives_syntax, extension, &recorder_) { }
: PreParser(script, allow_natives_syntax, extension, &recorder_),
recorder_() { }
virtual PartialParserRecorder* recorder() { return &recorder_; }
private:
CompleteParserRecorder recorder_;
@ -1298,7 +1315,8 @@ class PartialPreParser : public PreParser {
public:
PartialPreParser(Handle<Script> script, bool allow_natives_syntax,
v8::Extension* extension)
: PreParser(script, allow_natives_syntax, extension, &recorder_) { }
: PreParser(script, allow_natives_syntax, extension, &recorder_),
recorder_() { }
virtual PartialParserRecorder* recorder() { return &recorder_; }
private:
PartialParserRecorder recorder_;
@ -1639,17 +1657,12 @@ void Parser::ReportMessage(const char* type, Vector<const char*> args) {
Handle<String> Parser::GetSymbol(bool* ok) {
if (pre_data() != NULL) {
int symbol_id =
pre_data()->GetSymbolIdentifier(scanner_.location().beg_pos);
if (symbol_id < 0) {
ReportInvalidPreparseData(Factory::empty_symbol(), ok);
return Handle<String>::null();
}
return factory()->LookupSymbol(symbol_id, scanner_.literal());
}
log()->LogSymbol(scanner_.location().beg_pos, scanner_.literal());
return factory()->LookupSymbol(-1, scanner_.literal());
int symbol_id = -1;
if (pre_data() != NULL) {
symbol_id = pre_data()->GetSymbolIdentifier();
}
return factory()->LookupSymbol(symbol_id, scanner_.literal());
}
@ -4176,8 +4189,7 @@ FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name,
Counters::total_preparse_skipped.Increment(end_pos - function_block_pos);
scanner_.SeekForward(end_pos);
pre_data()->Skip(entry.predata_function_skip(),
entry.predata_symbol_skip(),
entry.symbol_id_skip());
entry.predata_symbol_skip());
materialized_literal_count = entry.literal_count();
expected_property_count = entry.property_count();
only_simple_this_property_assignments = false;
@ -4191,7 +4203,6 @@ FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name,
FunctionEntry entry = log()->LogFunction(function_block_pos);
int predata_function_position_before = log()->function_position();
int predata_symbol_position_before = log()->symbol_position();
int symbol_ids_before = log()->symbol_ids();
ParseSourceElements(&body, Token::RBRACE, CHECK_OK);
materialized_literal_count = temp_scope.materialized_literal_count();
expected_property_count = temp_scope.expected_property_count();
@ -4209,8 +4220,6 @@ FunctionLiteral* Parser::ParseFunctionLiteral(Handle<String> var_name,
log()->function_position() - predata_function_position_before);
entry.set_predata_symbol_skip(
log()->symbol_position() - predata_symbol_position_before);
entry.set_symbol_id_skip(
log()->symbol_ids() - symbol_ids_before);
}
}
@ -5482,6 +5491,47 @@ ScriptDataImpl* PartialPreParse(Handle<String> source,
}
void ScriptDataImpl::Initialize() {
if (store_.length() >= kHeaderSize) {
int symbol_data_offset = kHeaderSize + store_[kFunctionsSizeOffset];
if (store_.length() > symbol_data_offset) {
symbol_data_ = reinterpret_cast<byte*>(&store_[symbol_data_offset]);
} else {
// Partial preparse causes no symbol information.
symbol_data_ = reinterpret_cast<byte*>(&store_[0] + store_.length());
}
symbol_data_end_ = reinterpret_cast<byte*>(&store_[0] + store_.length());
}
}
int ScriptDataImpl::ReadNumber(byte** source) {
// Reads a number from symbol_data_ in base 128. The most significant
// bit marks that there are more digits.
// If the first byte is 0x80 (kNumberTerminator), it would normally
// represent a leading zero. Since that is useless, and therefore won't
// appear as the first digit of any actual value, it is used to
// mark the end of the input stream.
byte* data = *source;
if (data >= symbol_data_end_) return -1;
byte input = *data;
if (input == kNumberTerminator) {
// End of stream marker.
return -1;
}
int result = input & 0x7f;
data++;
while ((input & 0x80u) != 0) {
if (data >= symbol_data_end_) return -1;
input = *data;
result = (result << 7) | (input & 0x7f);
data++;
}
*source = data;
return result;
}
ScriptDataImpl* PreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension) {

View File

@ -82,15 +82,9 @@ class FunctionEntry BASE_EMBEDDED {
backing_[kPredataSymbolSkipOffset] = value;
}
int symbol_id_skip() { return backing_[kSymbolIdSkipOffset]; }
void set_symbol_id_skip(int value) {
backing_[kSymbolIdSkipOffset] = value;
}
bool is_valid() { return backing_.length() > 0; }
static const int kSize = 7;
static const int kSize = 6;
private:
Vector<unsigned> backing_;
@ -100,7 +94,6 @@ class FunctionEntry BASE_EMBEDDED {
static const int kPropertyCountOffset = 3;
static const int kPredataFunctionSkipOffset = 4;
static const int kPredataSymbolSkipOffset = 5;
static const int kSymbolIdSkipOffset = 6;
};
@ -109,18 +102,10 @@ class ScriptDataImpl : public ScriptData {
explicit ScriptDataImpl(Vector<unsigned> store)
: store_(store),
function_index_(kHeaderSize),
symbol_id_(0),
owns_store_(true) {
Initialize();
}
void Initialize() {
if (store_.length() >= kHeaderSize) {
// Otherwise we won't satisfy the SanityCheck.
symbol_index_ = kHeaderSize + store_[kFunctionsSizeOffset];
}
}
// Create an empty ScriptDataImpl that is guaranteed to not satisfy
// a SanityCheck.
ScriptDataImpl() : store_(Vector<unsigned>()), owns_store_(false) { }
@ -130,8 +115,11 @@ class ScriptDataImpl : public ScriptData {
virtual const char* Data();
virtual bool HasError();
void Initialize();
void ReadNextSymbolPosition();
FunctionEntry GetFunctionEntry(int start);
int GetSymbolIdentifier(int start);
int GetSymbolIdentifier();
void SkipFunctionEntry(int start);
bool SanityCheck();
@ -149,19 +137,27 @@ class ScriptDataImpl : public ScriptData {
unsigned version() { return store_[kVersionOffset]; }
// Skip forward in the preparser data by the given number
// of unsigned ints.
virtual void Skip(int function_entries, int symbol_entries, int symbol_ids) {
// of unsigned ints of function entries and the given number of bytes of
// symbol id encoding.
void Skip(int function_entries, int symbol_entries) {
ASSERT(function_entries >= 0);
ASSERT(function_entries
<= (static_cast<int>(store_[kFunctionsSizeOffset])
- (function_index_ - kHeaderSize)));
function_index_ += function_entries;
symbol_index_ += symbol_entries;
symbol_id_ += symbol_ids;
ASSERT(symbol_entries >= 0);
ASSERT(symbol_entries <= symbol_data_end_ - symbol_data_);
unsigned max_function_skip = store_[kFunctionsSizeOffset] -
static_cast<unsigned>(function_index_ - kHeaderSize);
function_index_ +=
Min(static_cast<unsigned>(function_entries), max_function_skip);
symbol_data_ +=
Min(static_cast<unsigned>(symbol_entries),
static_cast<unsigned>(symbol_data_end_ - symbol_data_));
}
static const unsigned kMagicNumber = 0xBadDead;
static const unsigned kCurrentVersion = 2;
static const unsigned kCurrentVersion = 3;
static const int kMagicOffset = 0;
static const int kVersionOffset = 1;
@ -171,26 +167,30 @@ class ScriptDataImpl : public ScriptData {
static const int kSizeOffset = 5;
static const int kHeaderSize = 6;
// If encoding a message, the following positions are fixed.
static const int kMessageStartPos = 0;
static const int kMessageEndPos = 1;
static const int kMessageArgCountPos = 2;
static const int kMessageTextPos = 3;
static const byte kNumberTerminator = 0x80u;
private:
Vector<unsigned> store_;
unsigned char* symbol_data_;
unsigned char* symbol_data_end_;
int function_index_;
int symbol_index_;
int symbol_id_;
bool owns_store_;
unsigned Read(int position);
unsigned* ReadAddress(int position);
// Reads a number from the current symbols
int ReadNumber(byte** source);
ScriptDataImpl(const char* backing_store, int length)
: store_(reinterpret_cast<unsigned*>(const_cast<char*>(backing_store)),
length / sizeof(unsigned)),
function_index_(kHeaderSize),
symbol_id_(0),
owns_store_(false) {
ASSERT_EQ(0, reinterpret_cast<intptr_t>(backing_store) % sizeof(unsigned));
Initialize();

View File

@ -391,6 +391,12 @@ class Vector {
// Factory method for creating empty vectors.
static Vector<T> empty() { return Vector<T>(NULL, 0); }
template<typename S>
static Vector<T> cast(Vector<S> input) {
return Vector<T>(reinterpret_cast<T*>(input.start()),
input.length() * sizeof(S) / sizeof(T));
}
protected:
void set_start(T* start) { start_ = start; }