Separate DuplicateFinder from Scanner.
DuplicateFinder isn't actually used by the Scanner, except for one convenience function which we should probably remove, also. BUG= Review-Url: https://codereview.chromium.org/2281443002 Cr-Commit-Position: refs/heads/master@{#38904}
This commit is contained in:
parent
b550c07734
commit
5b9b44d1f6
2
BUILD.gn
2
BUILD.gn
@ -1443,6 +1443,8 @@ v8_source_set("v8_base") {
|
||||
"src/objects.h",
|
||||
"src/ostreams.cc",
|
||||
"src/ostreams.h",
|
||||
"src/parsing/duplicate-finder.cc",
|
||||
"src/parsing/duplicate-finder.h",
|
||||
"src/parsing/expression-classifier.h",
|
||||
"src/parsing/func-name-inferrer.cc",
|
||||
"src/parsing/func-name-inferrer.h",
|
||||
|
@ -6,7 +6,7 @@
|
||||
#define V8_COLLECTOR_H_
|
||||
|
||||
#include "src/checks.h"
|
||||
#include "src/list.h"
|
||||
#include "src/list-inl.h"
|
||||
#include "src/vector.h"
|
||||
|
||||
namespace v8 {
|
||||
|
145
src/parsing/duplicate-finder.cc
Normal file
145
src/parsing/duplicate-finder.cc
Normal file
@ -0,0 +1,145 @@
|
||||
// Copyright 2011 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/parsing/duplicate-finder.h"
|
||||
|
||||
#include "src/conversions.h"
|
||||
#include "src/unicode-cache.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
|
||||
return AddSymbol(key, true, value);
|
||||
}
|
||||
|
||||
int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
|
||||
return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
|
||||
}
|
||||
|
||||
int DuplicateFinder::AddSymbol(Vector<const uint8_t> key, bool is_one_byte,
|
||||
int value) {
|
||||
uint32_t hash = Hash(key, is_one_byte);
|
||||
byte* encoding = BackupKey(key, is_one_byte);
|
||||
base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);
|
||||
int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
|
||||
entry->value =
|
||||
reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value));
|
||||
return old_value;
|
||||
}
|
||||
|
||||
int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
|
||||
DCHECK(key.length() > 0);
|
||||
// Quick check for already being in canonical form.
|
||||
if (IsNumberCanonical(key)) {
|
||||
return AddOneByteSymbol(key, value);
|
||||
}
|
||||
|
||||
int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
|
||||
double double_value = StringToDouble(unicode_constants_, key, flags, 0.0);
|
||||
int length;
|
||||
const char* string;
|
||||
if (!std::isfinite(double_value)) {
|
||||
string = "Infinity";
|
||||
length = 8; // strlen("Infinity");
|
||||
} else {
|
||||
string = DoubleToCString(double_value,
|
||||
Vector<char>(number_buffer_, kBufferSize));
|
||||
length = StrLength(string);
|
||||
}
|
||||
return AddSymbol(
|
||||
Vector<const byte>(reinterpret_cast<const byte*>(string), length), true,
|
||||
value);
|
||||
}
|
||||
|
||||
bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
|
||||
// Test for a safe approximation of number literals that are already
|
||||
// in canonical form: max 15 digits, no leading zeroes, except an
|
||||
// integer part that is a single zero, and no trailing zeros below
|
||||
// the decimal point.
|
||||
int pos = 0;
|
||||
int length = number.length();
|
||||
if (number.length() > 15) return false;
|
||||
if (number[pos] == '0') {
|
||||
pos++;
|
||||
} else {
|
||||
while (pos < length &&
|
||||
static_cast<unsigned>(number[pos] - '0') <= ('9' - '0'))
|
||||
pos++;
|
||||
}
|
||||
if (length == pos) return true;
|
||||
if (number[pos] != '.') return false;
|
||||
pos++;
|
||||
bool invalid_last_digit = true;
|
||||
while (pos < length) {
|
||||
uint8_t digit = number[pos] - '0';
|
||||
if (digit > '9' - '0') return false;
|
||||
invalid_last_digit = (digit == 0);
|
||||
pos++;
|
||||
}
|
||||
return !invalid_last_digit;
|
||||
}
|
||||
|
||||
uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
|
||||
// Primitive hash function, almost identical to the one used
|
||||
// for strings (except that it's seeded by the length and representation).
|
||||
int length = key.length();
|
||||
uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0);
|
||||
for (int i = 0; i < length; i++) {
|
||||
uint32_t c = key[i];
|
||||
hash = (hash + c) * 1025;
|
||||
hash ^= (hash >> 6);
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool DuplicateFinder::Match(void* first, void* second) {
|
||||
// Decode lengths.
|
||||
// Length + representation is encoded as base 128, most significant heptet
|
||||
// first, with a 8th bit being non-zero while there are more heptets.
|
||||
// The value encodes the number of bytes following, and whether the original
|
||||
// was Latin1.
|
||||
byte* s1 = reinterpret_cast<byte*>(first);
|
||||
byte* s2 = reinterpret_cast<byte*>(second);
|
||||
uint32_t length_one_byte_field = 0;
|
||||
byte c1;
|
||||
do {
|
||||
c1 = *s1;
|
||||
if (c1 != *s2) return false;
|
||||
length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
|
||||
s1++;
|
||||
s2++;
|
||||
} while ((c1 & 0x80) != 0);
|
||||
int length = static_cast<int>(length_one_byte_field >> 1);
|
||||
return memcmp(s1, s2, length) == 0;
|
||||
}
|
||||
|
||||
byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
|
||||
bool is_one_byte) {
|
||||
uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
|
||||
backing_store_.StartSequence();
|
||||
// Emit one_byte_length as base-128 encoded number, with the 7th bit set
|
||||
// on the byte of every heptet except the last, least significant, one.
|
||||
if (one_byte_length >= (1 << 7)) {
|
||||
if (one_byte_length >= (1 << 14)) {
|
||||
if (one_byte_length >= (1 << 21)) {
|
||||
if (one_byte_length >= (1 << 28)) {
|
||||
backing_store_.Add(
|
||||
static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
|
||||
}
|
||||
backing_store_.Add(
|
||||
static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
|
||||
}
|
||||
backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
|
||||
}
|
||||
backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
|
||||
}
|
||||
backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
|
||||
|
||||
backing_store_.AddBlock(bytes);
|
||||
return backing_store_.EndSequence().start();
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
64
src/parsing/duplicate-finder.h
Normal file
64
src/parsing/duplicate-finder.h
Normal file
@ -0,0 +1,64 @@
|
||||
// Copyright 2011 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_PARSING_DUPLICATE_FINDER_H_
|
||||
#define V8_PARSING_DUPLICATE_FINDER_H_
|
||||
|
||||
#include "src/base/hashmap.h"
|
||||
#include "src/collector.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
class UnicodeCache;
|
||||
|
||||
// DuplicateFinder discovers duplicate symbols.
|
||||
class DuplicateFinder {
|
||||
public:
|
||||
explicit DuplicateFinder(UnicodeCache* constants)
|
||||
: unicode_constants_(constants), backing_store_(16), map_(&Match) {}
|
||||
|
||||
int AddOneByteSymbol(Vector<const uint8_t> key, int value);
|
||||
int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
|
||||
// Add a a number literal by converting it (if necessary)
|
||||
// to the string that ToString(ToNumber(literal)) would generate.
|
||||
// and then adding that string with AddOneByteSymbol.
|
||||
// This string is the actual value used as key in an object literal,
|
||||
// and the one that must be different from the other keys.
|
||||
int AddNumber(Vector<const uint8_t> key, int value);
|
||||
|
||||
private:
|
||||
int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
|
||||
// Backs up the key and its length in the backing store.
|
||||
// The backup is stored with a base 127 encoding of the
|
||||
// length (plus a bit saying whether the string is one byte),
|
||||
// followed by the bytes of the key.
|
||||
uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
|
||||
|
||||
// Compare two encoded keys (both pointing into the backing store)
|
||||
// for having the same base-127 encoded lengths and representation.
|
||||
// and then having the same 'length' bytes following.
|
||||
static bool Match(void* first, void* second);
|
||||
// Creates a hash from a sequence of bytes.
|
||||
static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
|
||||
// Checks whether a string containing a JS number is its canonical
|
||||
// form.
|
||||
static bool IsNumberCanonical(Vector<const uint8_t> key);
|
||||
|
||||
// Size of buffer. Sufficient for using it to call DoubleToCString in
|
||||
// from conversions.h.
|
||||
static const int kBufferSize = 100;
|
||||
|
||||
UnicodeCache* unicode_constants_;
|
||||
// Backing store used to store strings used as hashmap keys.
|
||||
SequenceCollector<unsigned char> backing_store_;
|
||||
base::HashMap map_;
|
||||
// Buffer used for string->number->canonical string conversions.
|
||||
char number_buffer_[kBufferSize];
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
||||
#endif // V8_PARSING_DUPLICATE_FINDER_H_
|
@ -12,6 +12,8 @@
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
class DuplicateFinder;
|
||||
|
||||
#define ERROR_CODES(T) \
|
||||
T(ExpressionProduction, 0) \
|
||||
T(FormalParameterInitializerProduction, 1) \
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "src/base/platform/platform.h"
|
||||
#include "src/char-predicates-inl.h"
|
||||
#include "src/messages.h"
|
||||
#include "src/parsing/duplicate-finder.h"
|
||||
#include "src/parsing/parameter-initializer-rewriter.h"
|
||||
#include "src/parsing/parse-info.h"
|
||||
#include "src/parsing/rewriter.h"
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "src/conversions.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/list.h"
|
||||
#include "src/parsing/duplicate-finder.h"
|
||||
#include "src/parsing/parser-base.h"
|
||||
#include "src/parsing/preparse-data-format.h"
|
||||
#include "src/parsing/preparse-data.h"
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "src/char-predicates-inl.h"
|
||||
#include "src/conversions-inl.h"
|
||||
#include "src/list-inl.h"
|
||||
#include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
@ -1573,6 +1574,8 @@ bool Scanner::ContainsDot() {
|
||||
|
||||
|
||||
int Scanner::FindSymbol(DuplicateFinder* finder, int value) {
|
||||
// TODO(vogelheim): Move this logic into the calling class; this can be fully
|
||||
// implemented using the public interface.
|
||||
if (is_literal_one_byte()) {
|
||||
return finder->AddOneByteSymbol(literal_one_byte_string(), value);
|
||||
}
|
||||
@ -1632,144 +1635,6 @@ void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) {
|
||||
}
|
||||
|
||||
|
||||
int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
|
||||
return AddSymbol(key, true, value);
|
||||
}
|
||||
|
||||
|
||||
int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
|
||||
return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
|
||||
}
|
||||
|
||||
|
||||
int DuplicateFinder::AddSymbol(Vector<const uint8_t> key,
|
||||
bool is_one_byte,
|
||||
int value) {
|
||||
uint32_t hash = Hash(key, is_one_byte);
|
||||
byte* encoding = BackupKey(key, is_one_byte);
|
||||
base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);
|
||||
int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
|
||||
entry->value =
|
||||
reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value));
|
||||
return old_value;
|
||||
}
|
||||
|
||||
|
||||
int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
|
||||
DCHECK(key.length() > 0);
|
||||
// Quick check for already being in canonical form.
|
||||
if (IsNumberCanonical(key)) {
|
||||
return AddOneByteSymbol(key, value);
|
||||
}
|
||||
|
||||
int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
|
||||
double double_value = StringToDouble(
|
||||
unicode_constants_, key, flags, 0.0);
|
||||
int length;
|
||||
const char* string;
|
||||
if (!std::isfinite(double_value)) {
|
||||
string = "Infinity";
|
||||
length = 8; // strlen("Infinity");
|
||||
} else {
|
||||
string = DoubleToCString(double_value,
|
||||
Vector<char>(number_buffer_, kBufferSize));
|
||||
length = StrLength(string);
|
||||
}
|
||||
return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string),
|
||||
length), true, value);
|
||||
}
|
||||
|
||||
|
||||
bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
|
||||
// Test for a safe approximation of number literals that are already
|
||||
// in canonical form: max 15 digits, no leading zeroes, except an
|
||||
// integer part that is a single zero, and no trailing zeros below
|
||||
// the decimal point.
|
||||
int pos = 0;
|
||||
int length = number.length();
|
||||
if (number.length() > 15) return false;
|
||||
if (number[pos] == '0') {
|
||||
pos++;
|
||||
} else {
|
||||
while (pos < length &&
|
||||
static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;
|
||||
}
|
||||
if (length == pos) return true;
|
||||
if (number[pos] != '.') return false;
|
||||
pos++;
|
||||
bool invalid_last_digit = true;
|
||||
while (pos < length) {
|
||||
uint8_t digit = number[pos] - '0';
|
||||
if (digit > '9' - '0') return false;
|
||||
invalid_last_digit = (digit == 0);
|
||||
pos++;
|
||||
}
|
||||
return !invalid_last_digit;
|
||||
}
|
||||
|
||||
|
||||
uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
|
||||
// Primitive hash function, almost identical to the one used
|
||||
// for strings (except that it's seeded by the length and representation).
|
||||
int length = key.length();
|
||||
uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0);
|
||||
for (int i = 0; i < length; i++) {
|
||||
uint32_t c = key[i];
|
||||
hash = (hash + c) * 1025;
|
||||
hash ^= (hash >> 6);
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
bool DuplicateFinder::Match(void* first, void* second) {
|
||||
// Decode lengths.
|
||||
// Length + representation is encoded as base 128, most significant heptet
|
||||
// first, with a 8th bit being non-zero while there are more heptets.
|
||||
// The value encodes the number of bytes following, and whether the original
|
||||
// was Latin1.
|
||||
byte* s1 = reinterpret_cast<byte*>(first);
|
||||
byte* s2 = reinterpret_cast<byte*>(second);
|
||||
uint32_t length_one_byte_field = 0;
|
||||
byte c1;
|
||||
do {
|
||||
c1 = *s1;
|
||||
if (c1 != *s2) return false;
|
||||
length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
|
||||
s1++;
|
||||
s2++;
|
||||
} while ((c1 & 0x80) != 0);
|
||||
int length = static_cast<int>(length_one_byte_field >> 1);
|
||||
return memcmp(s1, s2, length) == 0;
|
||||
}
|
||||
|
||||
|
||||
byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
|
||||
bool is_one_byte) {
|
||||
uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
|
||||
backing_store_.StartSequence();
|
||||
// Emit one_byte_length as base-128 encoded number, with the 7th bit set
|
||||
// on the byte of every heptet except the last, least significant, one.
|
||||
if (one_byte_length >= (1 << 7)) {
|
||||
if (one_byte_length >= (1 << 14)) {
|
||||
if (one_byte_length >= (1 << 21)) {
|
||||
if (one_byte_length >= (1 << 28)) {
|
||||
backing_store_.Add(
|
||||
static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
|
||||
}
|
||||
backing_store_.Add(
|
||||
static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
|
||||
}
|
||||
backing_store_.Add(
|
||||
static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
|
||||
}
|
||||
backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
|
||||
}
|
||||
backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
|
||||
|
||||
backing_store_.AddBlock(bytes);
|
||||
return backing_store_.EndSequence().start();
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
|
@ -8,12 +8,9 @@
|
||||
#define V8_PARSING_SCANNER_H_
|
||||
|
||||
#include "src/allocation.h"
|
||||
#include "src/base/hashmap.h"
|
||||
#include "src/base/logging.h"
|
||||
#include "src/char-predicates.h"
|
||||
#include "src/collector.h"
|
||||
#include "src/globals.h"
|
||||
#include "src/list.h"
|
||||
#include "src/messages.h"
|
||||
#include "src/parsing/token.h"
|
||||
#include "src/unicode-decoder.h"
|
||||
@ -25,6 +22,7 @@ namespace internal {
|
||||
|
||||
class AstRawString;
|
||||
class AstValueFactory;
|
||||
class DuplicateFinder;
|
||||
class ParserRecorder;
|
||||
class UnicodeCache;
|
||||
|
||||
@ -99,56 +97,6 @@ class Utf16CharacterStream {
|
||||
};
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// DuplicateFinder discovers duplicate symbols.
|
||||
|
||||
class DuplicateFinder {
|
||||
public:
|
||||
explicit DuplicateFinder(UnicodeCache* constants)
|
||||
: unicode_constants_(constants),
|
||||
backing_store_(16),
|
||||
map_(&Match) { }
|
||||
|
||||
int AddOneByteSymbol(Vector<const uint8_t> key, int value);
|
||||
int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
|
||||
// Add a a number literal by converting it (if necessary)
|
||||
// to the string that ToString(ToNumber(literal)) would generate.
|
||||
// and then adding that string with AddOneByteSymbol.
|
||||
// This string is the actual value used as key in an object literal,
|
||||
// and the one that must be different from the other keys.
|
||||
int AddNumber(Vector<const uint8_t> key, int value);
|
||||
|
||||
private:
|
||||
int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
|
||||
// Backs up the key and its length in the backing store.
|
||||
// The backup is stored with a base 127 encoding of the
|
||||
// length (plus a bit saying whether the string is one byte),
|
||||
// followed by the bytes of the key.
|
||||
uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
|
||||
|
||||
// Compare two encoded keys (both pointing into the backing store)
|
||||
// for having the same base-127 encoded lengths and representation.
|
||||
// and then having the same 'length' bytes following.
|
||||
static bool Match(void* first, void* second);
|
||||
// Creates a hash from a sequence of bytes.
|
||||
static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
|
||||
// Checks whether a string containing a JS number is its canonical
|
||||
// form.
|
||||
static bool IsNumberCanonical(Vector<const uint8_t> key);
|
||||
|
||||
// Size of buffer. Sufficient for using it to call DoubleToCString in
|
||||
// from conversions.h.
|
||||
static const int kBufferSize = 100;
|
||||
|
||||
UnicodeCache* unicode_constants_;
|
||||
// Backing store used to store strings used as hashmap keys.
|
||||
SequenceCollector<unsigned char> backing_store_;
|
||||
base::HashMap map_;
|
||||
// Buffer used for string->number->canonical string conversions.
|
||||
char number_buffer_[kBufferSize];
|
||||
};
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// JavaScript Scanner.
|
||||
|
||||
|
@ -1039,6 +1039,8 @@
|
||||
'objects.h',
|
||||
'ostreams.cc',
|
||||
'ostreams.h',
|
||||
'parsing/duplicate-finder.cc',
|
||||
'parsing/duplicate-finder.h',
|
||||
'parsing/expression-classifier.h',
|
||||
'parsing/func-name-inferrer.cc',
|
||||
'parsing/func-name-inferrer.h',
|
||||
|
Loading…
Reference in New Issue
Block a user