Unify and fix checkers for duplicate object literal properties.

R=ulan@chromium.org
TEST=preparser/duplicate-property

Review URL: https://codereview.chromium.org/25755002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@17114 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
mstarzinger@chromium.org 2013-10-04 16:58:01 +00:00
parent 3c8e87d840
commit 067a266426
8 changed files with 316 additions and 362 deletions

View File

@ -31,6 +31,7 @@
#include "conversions-inl.h"
#include "dtoa.h"
#include "list-inl.h"
#include "strtod.h"
#include "utils.h"

View File

@ -3771,84 +3771,6 @@ Handle<Object> Parser::GetBoilerplateValue(Expression* expression) {
}
// Validation per 11.1.5 Object Initialiser
class ObjectLiteralPropertyChecker {
public:
ObjectLiteralPropertyChecker(Parser* parser, LanguageMode language_mode) :
props_(Literal::Match),
parser_(parser),
language_mode_(language_mode) {
}
void CheckProperty(
ObjectLiteral::Property* property,
Scanner::Location loc,
bool* ok);
private:
enum PropertyKind {
kGetAccessor = 0x01,
kSetAccessor = 0x02,
kAccessor = kGetAccessor | kSetAccessor,
kData = 0x04
};
static intptr_t GetPropertyKind(ObjectLiteral::Property* property) {
switch (property->kind()) {
case ObjectLiteral::Property::GETTER:
return kGetAccessor;
case ObjectLiteral::Property::SETTER:
return kSetAccessor;
default:
return kData;
}
}
HashMap props_;
Parser* parser_;
LanguageMode language_mode_;
};
void ObjectLiteralPropertyChecker::CheckProperty(
ObjectLiteral::Property* property,
Scanner::Location loc,
bool* ok) {
ASSERT(property != NULL);
Literal* literal = property->key();
HashMap::Entry* entry = props_.Lookup(literal, literal->Hash(), true);
intptr_t prev = reinterpret_cast<intptr_t> (entry->value);
intptr_t curr = GetPropertyKind(property);
// Duplicate data properties are illegal in strict or extended mode.
if (language_mode_ != CLASSIC_MODE && (curr & prev & kData) != 0) {
parser_->ReportMessageAt(loc, "strict_duplicate_property",
Vector<const char*>::empty());
*ok = false;
return;
}
// Data property conflicting with an accessor.
if (((curr & kData) && (prev & kAccessor)) ||
((prev & kData) && (curr & kAccessor))) {
parser_->ReportMessageAt(loc, "accessor_data_property",
Vector<const char*>::empty());
*ok = false;
return;
}
// Two accessors of the same type conflicting
if ((curr & prev & kAccessor) != 0) {
parser_->ReportMessageAt(loc, "accessor_get_set",
Vector<const char*>::empty());
*ok = false;
return;
}
// Update map
entry->value = reinterpret_cast<void*> (prev | curr);
*ok = true;
}
void Parser::BuildObjectLiteralConstantProperties(
ZoneList<ObjectLiteral::Property*>* properties,
Handle<FixedArray> constant_properties,
@ -3921,12 +3843,17 @@ void Parser::BuildObjectLiteralConstantProperties(
}
// Force instantiation of template instances class.
template void ObjectLiteralChecker<Parser>::CheckProperty(
Token::Value property, PropertyKind type, bool* ok);
ObjectLiteral::Property* Parser::ParseObjectLiteralGetSet(bool is_getter,
bool* ok) {
// Special handling of getter and setter syntax:
// { ... , get foo() { ... }, ... , set foo(v) { ... v ... } , ... }
// We have already read the "get" or "set" keyword.
Token::Value next = Next();
// We have already read the "get" or "set" keyword and the name.
Token::Value next = scanner().current_token();
bool is_keyword = Token::IsKeyword(next);
if (next == Token::IDENTIFIER || next == Token::NUMBER ||
next == Token::FUTURE_RESERVED_WORD ||
@ -3968,7 +3895,8 @@ Expression* Parser::ParseObjectLiteral(bool* ok) {
int number_of_boilerplate_properties = 0;
bool has_function = false;
ObjectLiteralPropertyChecker checker(this, top_scope_->language_mode());
ObjectLiteralChecker<Parser> checker(this, &scanner_,
top_scope_->language_mode());
Expect(Token::LBRACE, CHECK_OK);
@ -3978,9 +3906,6 @@ Expression* Parser::ParseObjectLiteral(bool* ok) {
Literal* key = NULL;
Token::Value next = peek();
// Location of the property name token
Scanner::Location loc = scanner().peek_location();
switch (next) {
case Token::FUTURE_RESERVED_WORD:
case Token::FUTURE_STRICT_RESERVED_WORD:
@ -3992,15 +3917,14 @@ Expression* Parser::ParseObjectLiteral(bool* ok) {
if (fni_ != NULL) fni_->PushLiteralName(id);
if ((is_getter || is_setter) && peek() != Token::COLON) {
// Update loc to point to the identifier
loc = scanner().peek_location();
// Validate the property.
PropertyKind type = is_getter ? kGetterProperty : kSetterProperty;
checker.CheckProperty(Next(), type, CHECK_OK);
ObjectLiteral::Property* property =
ParseObjectLiteralGetSet(is_getter, CHECK_OK);
if (IsBoilerplateProperty(property)) {
number_of_boilerplate_properties++;
}
// Validate the property.
checker.CheckProperty(property, loc, CHECK_OK);
properties->Add(property, zone());
if (peek() != Token::RBRACE) Expect(Token::COMMA, CHECK_OK);
@ -4051,6 +3975,9 @@ Expression* Parser::ParseObjectLiteral(bool* ok) {
}
}
// Validate the property
checker.CheckProperty(next, kValueProperty, CHECK_OK);
Expect(Token::COLON, CHECK_OK);
Expression* value = ParseAssignmentExpression(true, CHECK_OK);
@ -4068,8 +3995,6 @@ Expression* Parser::ParseObjectLiteral(bool* ok) {
// Count CONSTANT or COMPUTED properties to maintain the enumeration order.
if (IsBoilerplateProperty(property)) number_of_boilerplate_properties++;
// Validate the property
checker.CheckProperty(property, loc, CHECK_OK);
properties->Add(property, zone());
// TODO(1240767): Consider allowing trailing comma.

View File

@ -463,7 +463,7 @@ class Parser BASE_EMBEDDED {
void ReportMessageAt(Scanner::Location loc,
const char* message,
Vector<const char*> args);
Vector<const char*> args = Vector<const char*>::empty());
void ReportMessageAt(Scanner::Location loc,
const char* message,
Vector<Handle<String> > args);
@ -879,6 +879,7 @@ class Parser BASE_EMBEDDED {
CompilationInfo* info_;
friend class BlockState;
friend class FunctionState;
friend class ObjectLiteralChecker<Parser>;
};

View File

@ -1231,39 +1231,6 @@ PreParser::Expression PreParser::ParseArrayLiteral(bool* ok) {
return Expression::Default();
}
void PreParser::CheckDuplicate(DuplicateFinder* finder,
i::Token::Value property,
int type,
bool* ok) {
int old_type;
if (property == i::Token::NUMBER) {
old_type = finder->AddNumber(scanner_->literal_ascii_string(), type);
} else if (scanner_->is_literal_ascii()) {
old_type = finder->AddAsciiSymbol(scanner_->literal_ascii_string(),
type);
} else {
old_type = finder->AddUtf16Symbol(scanner_->literal_utf16_string(), type);
}
if (HasConflict(old_type, type)) {
if (IsDataDataConflict(old_type, type)) {
// Both are data properties.
if (is_classic_mode()) return;
ReportMessageAt(scanner_->location(),
"strict_duplicate_property", NULL);
} else if (IsDataAccessorConflict(old_type, type)) {
// Both a data and an accessor property with the same name.
ReportMessageAt(scanner_->location(),
"accessor_data_property", NULL);
} else {
ASSERT(IsAccessorAccessorConflict(old_type, type));
// Both accessors of the same type.
ReportMessageAt(scanner_->location(),
"accessor_get_set", NULL);
}
*ok = false;
}
}
PreParser::Expression PreParser::ParseObjectLiteral(bool* ok) {
// ObjectLiteral ::
@ -1272,8 +1239,9 @@ PreParser::Expression PreParser::ParseObjectLiteral(bool* ok) {
// | (('get' | 'set') (IdentifierName | String | Number) FunctionLiteral)
// )*[','] '}'
i::ObjectLiteralChecker<PreParser> checker(this, scanner_, language_mode());
Expect(i::Token::LBRACE, CHECK_OK);
DuplicateFinder duplicate_finder(scanner_->unicode_cache());
while (peek() != i::Token::RBRACE) {
i::Token::Value next = peek();
switch (next) {
@ -1298,30 +1266,31 @@ PreParser::Expression PreParser::ParseObjectLiteral(bool* ok) {
if (!is_keyword) {
LogSymbol();
}
PropertyType type = is_getter ? kGetterProperty : kSetterProperty;
CheckDuplicate(&duplicate_finder, name, type, CHECK_OK);
i::PropertyKind type = is_getter ? i::kGetterProperty
: i::kSetterProperty;
checker.CheckProperty(name, type, CHECK_OK);
ParseFunctionLiteral(false, CHECK_OK);
if (peek() != i::Token::RBRACE) {
Expect(i::Token::COMMA, CHECK_OK);
}
continue; // restart the while
}
CheckDuplicate(&duplicate_finder, next, kValueProperty, CHECK_OK);
checker.CheckProperty(next, i::kValueProperty, CHECK_OK);
break;
}
case i::Token::STRING:
Consume(next);
CheckDuplicate(&duplicate_finder, next, kValueProperty, CHECK_OK);
checker.CheckProperty(next, i::kValueProperty, CHECK_OK);
GetStringSymbol();
break;
case i::Token::NUMBER:
Consume(next);
CheckDuplicate(&duplicate_finder, next, kValueProperty, CHECK_OK);
checker.CheckProperty(next, i::kValueProperty, CHECK_OK);
break;
default:
if (i::Token::IsKeyword(next)) {
Consume(next);
CheckDuplicate(&duplicate_finder, next, kValueProperty, CHECK_OK);
checker.CheckProperty(next, i::kValueProperty, CHECK_OK);
} else {
// Unexpected token.
*ok = false;
@ -1402,7 +1371,7 @@ PreParser::Expression PreParser::ParseFunctionLiteral(bool is_generator,
Expect(i::Token::LPAREN, CHECK_OK);
int start_position = scanner_->location().beg_pos;
bool done = (peek() == i::Token::RPAREN);
DuplicateFinder duplicate_finder(scanner_->unicode_cache());
i::DuplicateFinder duplicate_finder(scanner_->unicode_cache());
while (!done) {
Identifier id = ParseIdentifier(CHECK_OK);
if (!id.IsValidStrictVariable()) {
@ -1694,139 +1663,4 @@ bool PreParser::peek_any_identifier() {
next == i::Token::YIELD;
}
int DuplicateFinder::AddAsciiSymbol(i::Vector<const char> key, int value) {
return AddSymbol(i::Vector<const byte>::cast(key), true, value);
}
int DuplicateFinder::AddUtf16Symbol(i::Vector<const uint16_t> key, int value) {
return AddSymbol(i::Vector<const byte>::cast(key), false, value);
}
int DuplicateFinder::AddSymbol(i::Vector<const byte> key,
bool is_ascii,
int value) {
uint32_t hash = Hash(key, is_ascii);
byte* encoding = BackupKey(key, is_ascii);
i::HashMap::Entry* entry = map_.Lookup(encoding, hash, true);
int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
entry->value =
reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value));
return old_value;
}
int DuplicateFinder::AddNumber(i::Vector<const char> key, int value) {
ASSERT(key.length() > 0);
// Quick check for already being in canonical form.
if (IsNumberCanonical(key)) {
return AddAsciiSymbol(key, value);
}
int flags = i::ALLOW_HEX | i::ALLOW_OCTAL | i::ALLOW_IMPLICIT_OCTAL |
i::ALLOW_BINARY;
double double_value = StringToDouble(unicode_constants_, key, flags, 0.0);
int length;
const char* string;
if (!std::isfinite(double_value)) {
string = "Infinity";
length = 8; // strlen("Infinity");
} else {
string = DoubleToCString(double_value,
i::Vector<char>(number_buffer_, kBufferSize));
length = i::StrLength(string);
}
return AddSymbol(i::Vector<const byte>(reinterpret_cast<const byte*>(string),
length), true, value);
}
bool DuplicateFinder::IsNumberCanonical(i::Vector<const char> number) {
// Test for a safe approximation of number literals that are already
// in canonical form: max 15 digits, no leading zeroes, except an
// integer part that is a single zero, and no trailing zeros below
// the decimal point.
int pos = 0;
int length = number.length();
if (number.length() > 15) return false;
if (number[pos] == '0') {
pos++;
} else {
while (pos < length &&
static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;
}
if (length == pos) return true;
if (number[pos] != '.') return false;
pos++;
bool invalid_last_digit = true;
while (pos < length) {
byte digit = number[pos] - '0';
if (digit > '9' - '0') return false;
invalid_last_digit = (digit == 0);
pos++;
}
return !invalid_last_digit;
}
uint32_t DuplicateFinder::Hash(i::Vector<const byte> key, bool is_ascii) {
// Primitive hash function, almost identical to the one used
// for strings (except that it's seeded by the length and ASCII-ness).
int length = key.length();
uint32_t hash = (length << 1) | (is_ascii ? 1 : 0) ;
for (int i = 0; i < length; i++) {
uint32_t c = key[i];
hash = (hash + c) * 1025;
hash ^= (hash >> 6);
}
return hash;
}
bool DuplicateFinder::Match(void* first, void* second) {
// Decode lengths.
// Length + ASCII-bit is encoded as base 128, most significant heptet first,
// with a 8th bit being non-zero while there are more heptets.
// The value encodes the number of bytes following, and whether the original
// was ASCII.
byte* s1 = reinterpret_cast<byte*>(first);
byte* s2 = reinterpret_cast<byte*>(second);
uint32_t length_ascii_field = 0;
byte c1;
do {
c1 = *s1;
if (c1 != *s2) return false;
length_ascii_field = (length_ascii_field << 7) | (c1 & 0x7f);
s1++;
s2++;
} while ((c1 & 0x80) != 0);
int length = static_cast<int>(length_ascii_field >> 1);
return memcmp(s1, s2, length) == 0;
}
byte* DuplicateFinder::BackupKey(i::Vector<const byte> bytes,
bool is_ascii) {
uint32_t ascii_length = (bytes.length() << 1) | (is_ascii ? 1 : 0);
backing_store_.StartSequence();
// Emit ascii_length as base-128 encoded number, with the 7th bit set
// on the byte of every heptet except the last, least significant, one.
if (ascii_length >= (1 << 7)) {
if (ascii_length >= (1 << 14)) {
if (ascii_length >= (1 << 21)) {
if (ascii_length >= (1 << 28)) {
backing_store_.Add(static_cast<byte>((ascii_length >> 28) | 0x80));
}
backing_store_.Add(static_cast<byte>((ascii_length >> 21) | 0x80u));
}
backing_store_.Add(static_cast<byte>((ascii_length >> 14) | 0x80u));
}
backing_store_.Add(static_cast<byte>((ascii_length >> 7) | 0x80u));
}
backing_store_.Add(static_cast<byte>(ascii_length & 0x7f));
backing_store_.AddBlock(bytes);
return backing_store_.EndSequence().start();
}
} } // v8::preparser

View File

@ -35,9 +35,98 @@
namespace v8 {
namespace internal {
class UnicodeCache;
// Used to detect duplicates in object literals. Each of the values
// kGetterProperty, kSetterProperty and kValueProperty represents
// a type of object literal property. When parsing a property, its
// type value is stored in the DuplicateFinder for the property name.
// Values are chosen so that having intersection bits means the there is
// an incompatibility.
// I.e., you can add a getter to a property that already has a setter, since
// kGetterProperty and kSetterProperty doesn't intersect, but not if it
// already has a getter or a value. Adding the getter to an existing
// setter will store the value (kGetterProperty | kSetterProperty), which
// is incompatible with adding any further properties.
enum PropertyKind {
kNone = 0,
// Bit patterns representing different object literal property types.
kGetterProperty = 1,
kSetterProperty = 2,
kValueProperty = 7,
// Helper constants.
kValueFlag = 4
};
// Validation per 11.1.5 Object Initialiser
template<typename P>
class ObjectLiteralChecker {
public:
ObjectLiteralChecker(P* parser, Scanner* scanner, LanguageMode mode)
: parser_(parser),
scanner_(scanner),
finder_(scanner->unicode_cache()),
language_mode_(mode) { }
void CheckProperty(Token::Value property, PropertyKind type, bool* ok);
private:
// Checks the type of conflict based on values coming from PropertyType.
bool HasConflict(PropertyKind type1, PropertyKind type2) {
return (type1 & type2) != 0;
}
bool IsDataDataConflict(PropertyKind type1, PropertyKind type2) {
return ((type1 & type2) & kValueFlag) != 0;
}
bool IsDataAccessorConflict(PropertyKind type1, PropertyKind type2) {
return ((type1 ^ type2) & kValueFlag) != 0;
}
bool IsAccessorAccessorConflict(PropertyKind type1, PropertyKind type2) {
return ((type1 | type2) & kValueFlag) == 0;
}
P* parser_;
Scanner* scanner_;
DuplicateFinder finder_;
LanguageMode language_mode_;
};
template<typename P>
void ObjectLiteralChecker<P>::CheckProperty(Token::Value property,
PropertyKind type,
bool* ok) {
int old;
if (property == Token::NUMBER) {
old = finder_.AddNumber(scanner_->literal_ascii_string(), type);
} else if (scanner_->is_literal_ascii()) {
old = finder_.AddAsciiSymbol(scanner_->literal_ascii_string(), type);
} else {
old = finder_.AddUtf16Symbol(scanner_->literal_utf16_string(), type);
}
PropertyKind old_type = static_cast<PropertyKind>(old);
if (HasConflict(old_type, type)) {
if (IsDataDataConflict(old_type, type)) {
// Both are data properties.
if (language_mode_ == CLASSIC_MODE) return;
parser_->ReportMessageAt(scanner_->location(),
"strict_duplicate_property");
} else if (IsDataAccessorConflict(old_type, type)) {
// Both a data and an accessor property with the same name.
parser_->ReportMessageAt(scanner_->location(),
"accessor_data_property");
} else {
ASSERT(IsAccessorAccessorConflict(old_type, type));
// Both accessors of the same type.
parser_->ReportMessageAt(scanner_->location(),
"accessor_get_set");
}
*ok = false;
}
}
} // v8::internal
namespace preparser {
typedef uint8_t byte;
@ -57,53 +146,6 @@ typedef uint8_t byte;
namespace i = v8::internal;
class DuplicateFinder {
public:
explicit DuplicateFinder(i::UnicodeCache* constants)
: unicode_constants_(constants),
backing_store_(16),
map_(&Match) { }
int AddAsciiSymbol(i::Vector<const char> key, int value);
int AddUtf16Symbol(i::Vector<const uint16_t> key, int value);
// Add a a number literal by converting it (if necessary)
// to the string that ToString(ToNumber(literal)) would generate.
// and then adding that string with AddAsciiSymbol.
// This string is the actual value used as key in an object literal,
// and the one that must be different from the other keys.
int AddNumber(i::Vector<const char> key, int value);
private:
int AddSymbol(i::Vector<const byte> key, bool is_ascii, int value);
// Backs up the key and its length in the backing store.
// The backup is stored with a base 127 encoding of the
// length (plus a bit saying whether the string is ASCII),
// followed by the bytes of the key.
byte* BackupKey(i::Vector<const byte> key, bool is_ascii);
// Compare two encoded keys (both pointing into the backing store)
// for having the same base-127 encoded lengths and ASCII-ness,
// and then having the same 'length' bytes following.
static bool Match(void* first, void* second);
// Creates a hash from a sequence of bytes.
static uint32_t Hash(i::Vector<const byte> key, bool is_ascii);
// Checks whether a string containing a JS number is its canonical
// form.
static bool IsNumberCanonical(i::Vector<const char> key);
// Size of buffer. Sufficient for using it to call DoubleToCString in
// from conversions.h.
static const int kBufferSize = 100;
i::UnicodeCache* unicode_constants_;
// Backing store used to store strings used as hashmap keys.
i::SequenceCollector<unsigned char> backing_store_;
i::HashMap map_;
// Buffer used for string->number->canonical string conversions.
char number_buffer_[kBufferSize];
};
class PreParser {
public:
enum PreParseResult {
@ -183,45 +225,6 @@ class PreParser {
i::ParserRecorder* log);
private:
// Used to detect duplicates in object literals. Each of the values
// kGetterProperty, kSetterProperty and kValueProperty represents
// a type of object literal property. When parsing a property, its
// type value is stored in the DuplicateFinder for the property name.
// Values are chosen so that having intersection bits means the there is
// an incompatibility.
// I.e., you can add a getter to a property that already has a setter, since
// kGetterProperty and kSetterProperty doesn't intersect, but not if it
// already has a getter or a value. Adding the getter to an existing
// setter will store the value (kGetterProperty | kSetterProperty), which
// is incompatible with adding any further properties.
enum PropertyType {
kNone = 0,
// Bit patterns representing different object literal property types.
kGetterProperty = 1,
kSetterProperty = 2,
kValueProperty = 7,
// Helper constants.
kValueFlag = 4
};
// Checks the type of conflict based on values coming from PropertyType.
bool HasConflict(int type1, int type2) { return (type1 & type2) != 0; }
bool IsDataDataConflict(int type1, int type2) {
return ((type1 & type2) & kValueFlag) != 0;
}
bool IsDataAccessorConflict(int type1, int type2) {
return ((type1 ^ type2) & kValueFlag) != 0;
}
bool IsAccessorAccessorConflict(int type1, int type2) {
return ((type1 | type2) & kValueFlag) == 0;
}
void CheckDuplicate(DuplicateFinder* finder,
i::Token::Value property,
int type,
bool* ok);
// These types form an algebra over syntactic categories that is just
// rich enough to let us recognize and propagate the constructs that
// are either being counted in the preparser data, or is important
@ -531,7 +534,7 @@ class PreParser {
void ReportUnexpectedToken(i::Token::Value token);
void ReportMessageAt(i::Scanner::Location location,
const char* type,
const char* name_opt) {
const char* name_opt = NULL) {
log_->LogMessage(location.beg_pos, location.end_pos, type, name_opt);
}
void ReportMessageAt(int start_pos,
@ -686,7 +689,10 @@ class PreParser {
bool allow_generators_;
bool allow_for_of_;
bool parenthesized_function_;
friend class i::ObjectLiteralChecker<PreParser>;
};
} } // v8::preparser
#endif // V8_PREPARSER_H

View File

@ -27,10 +27,14 @@
// Features shared by parsing and pre-parsing scanners.
#include <cmath>
#include "scanner.h"
#include "../include/v8stdint.h"
#include "char-predicates-inl.h"
#include "conversions-inl.h"
#include "list-inl.h"
namespace v8 {
namespace internal {
@ -1108,4 +1112,140 @@ bool Scanner::ScanRegExpFlags() {
return true;
}
int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) {
return AddSymbol(Vector<const byte>::cast(key), true, value);
}
int DuplicateFinder::AddUtf16Symbol(Vector<const uint16_t> key, int value) {
return AddSymbol(Vector<const byte>::cast(key), false, value);
}
int DuplicateFinder::AddSymbol(Vector<const byte> key,
bool is_ascii,
int value) {
uint32_t hash = Hash(key, is_ascii);
byte* encoding = BackupKey(key, is_ascii);
HashMap::Entry* entry = map_.Lookup(encoding, hash, true);
int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
entry->value =
reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value));
return old_value;
}
int DuplicateFinder::AddNumber(Vector<const char> key, int value) {
ASSERT(key.length() > 0);
// Quick check for already being in canonical form.
if (IsNumberCanonical(key)) {
return AddAsciiSymbol(key, value);
}
int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
double double_value = StringToDouble(unicode_constants_, key, flags, 0.0);
int length;
const char* string;
if (!std::isfinite(double_value)) {
string = "Infinity";
length = 8; // strlen("Infinity");
} else {
string = DoubleToCString(double_value,
Vector<char>(number_buffer_, kBufferSize));
length = StrLength(string);
}
return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string),
length), true, value);
}
bool DuplicateFinder::IsNumberCanonical(Vector<const char> number) {
// Test for a safe approximation of number literals that are already
// in canonical form: max 15 digits, no leading zeroes, except an
// integer part that is a single zero, and no trailing zeros below
// the decimal point.
int pos = 0;
int length = number.length();
if (number.length() > 15) return false;
if (number[pos] == '0') {
pos++;
} else {
while (pos < length &&
static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;
}
if (length == pos) return true;
if (number[pos] != '.') return false;
pos++;
bool invalid_last_digit = true;
while (pos < length) {
byte digit = number[pos] - '0';
if (digit > '9' - '0') return false;
invalid_last_digit = (digit == 0);
pos++;
}
return !invalid_last_digit;
}
uint32_t DuplicateFinder::Hash(Vector<const byte> key, bool is_ascii) {
// Primitive hash function, almost identical to the one used
// for strings (except that it's seeded by the length and ASCII-ness).
int length = key.length();
uint32_t hash = (length << 1) | (is_ascii ? 1 : 0) ;
for (int i = 0; i < length; i++) {
uint32_t c = key[i];
hash = (hash + c) * 1025;
hash ^= (hash >> 6);
}
return hash;
}
bool DuplicateFinder::Match(void* first, void* second) {
// Decode lengths.
// Length + ASCII-bit is encoded as base 128, most significant heptet first,
// with a 8th bit being non-zero while there are more heptets.
// The value encodes the number of bytes following, and whether the original
// was ASCII.
byte* s1 = reinterpret_cast<byte*>(first);
byte* s2 = reinterpret_cast<byte*>(second);
uint32_t length_ascii_field = 0;
byte c1;
do {
c1 = *s1;
if (c1 != *s2) return false;
length_ascii_field = (length_ascii_field << 7) | (c1 & 0x7f);
s1++;
s2++;
} while ((c1 & 0x80) != 0);
int length = static_cast<int>(length_ascii_field >> 1);
return memcmp(s1, s2, length) == 0;
}
byte* DuplicateFinder::BackupKey(Vector<const byte> bytes,
bool is_ascii) {
uint32_t ascii_length = (bytes.length() << 1) | (is_ascii ? 1 : 0);
backing_store_.StartSequence();
// Emit ascii_length as base-128 encoded number, with the 7th bit set
// on the byte of every heptet except the last, least significant, one.
if (ascii_length >= (1 << 7)) {
if (ascii_length >= (1 << 14)) {
if (ascii_length >= (1 << 21)) {
if (ascii_length >= (1 << 28)) {
backing_store_.Add(static_cast<byte>((ascii_length >> 28) | 0x80));
}
backing_store_.Add(static_cast<byte>((ascii_length >> 21) | 0x80u));
}
backing_store_.Add(static_cast<byte>((ascii_length >> 14) | 0x80u));
}
backing_store_.Add(static_cast<byte>((ascii_length >> 7) | 0x80u));
}
backing_store_.Add(static_cast<byte>(ascii_length & 0x7f));
backing_store_.AddBlock(bytes);
return backing_store_.EndSequence().start();
}
} } // namespace v8::internal

View File

@ -34,6 +34,8 @@
#include "char-predicates.h"
#include "checks.h"
#include "globals.h"
#include "hashmap.h"
#include "list.h"
#include "token.h"
#include "unicode-inl.h"
#include "utils.h"
@ -121,9 +123,10 @@ class Utf16CharacterStream {
};
class UnicodeCache {
// ---------------------------------------------------------------------
// Caching predicates used by scanners.
class UnicodeCache {
public:
UnicodeCache() {}
typedef unibrow::Utf8Decoder<512> Utf8Decoder;
@ -148,6 +151,56 @@ class UnicodeCache {
};
// ---------------------------------------------------------------------
// DuplicateFinder discovers duplicate symbols.
class DuplicateFinder {
public:
explicit DuplicateFinder(UnicodeCache* constants)
: unicode_constants_(constants),
backing_store_(16),
map_(&Match) { }
int AddAsciiSymbol(Vector<const char> key, int value);
int AddUtf16Symbol(Vector<const uint16_t> key, int value);
// Add a a number literal by converting it (if necessary)
// to the string that ToString(ToNumber(literal)) would generate.
// and then adding that string with AddAsciiSymbol.
// This string is the actual value used as key in an object literal,
// and the one that must be different from the other keys.
int AddNumber(Vector<const char> key, int value);
private:
int AddSymbol(Vector<const byte> key, bool is_ascii, int value);
// Backs up the key and its length in the backing store.
// The backup is stored with a base 127 encoding of the
// length (plus a bit saying whether the string is ASCII),
// followed by the bytes of the key.
byte* BackupKey(Vector<const byte> key, bool is_ascii);
// Compare two encoded keys (both pointing into the backing store)
// for having the same base-127 encoded lengths and ASCII-ness,
// and then having the same 'length' bytes following.
static bool Match(void* first, void* second);
// Creates a hash from a sequence of bytes.
static uint32_t Hash(Vector<const byte> key, bool is_ascii);
// Checks whether a string containing a JS number is its canonical
// form.
static bool IsNumberCanonical(Vector<const char> key);
// Size of buffer. Sufficient for using it to call DoubleToCString in
// from conversions.h.
static const int kBufferSize = 100;
UnicodeCache* unicode_constants_;
// Backing store used to store strings used as hashmap keys.
SequenceCollector<unsigned char> backing_store_;
HashMap map_;
// Buffer used for string->number->canonical string conversions.
char number_buffer_[kBufferSize];
};
// ----------------------------------------------------------------------------
// LiteralBuffer - Collector of chars of literals.

View File

@ -27,12 +27,6 @@
[
[ALWAYS, {
# TODO(mstarzinger): Uhm, this is kind of embarrassing, but our parser
# does not catch some syntax errors with duplicate properties in object
# literals that our preparser actually caught. I will fix this glitch in a
# follow-up change.
'duplicate-property/*': [SKIP],
# TODO(mstarzinger): This script parses but throws a TypeError when run.
'non-alphanum': [FAIL],