Separate DuplicateFinder from Scanner.

DuplicateFinder isn't actually used by the Scanner, except for one convenience function which we should probably remove, also. BUG= Review-Url: https://codereview.chromium.org/2281443002 Cr-Commit-Position: refs/heads/master@{#38904}
2016-08-25 04:58:07 -07:00 · 2016-08-25 04:58:07 -07:00 · 5b9b44d1f6
commit 5b9b44d1f6
parent b550c07734
10 changed files with 222 additions and 192 deletions
--- a/BUILD.gn
+++ b/BUILD.gn
@ -1443,6 +1443,8 @@ v8_source_set("v8_base") {
    "src/objects.h",
    "src/ostreams.cc",
    "src/ostreams.h",
+    "src/parsing/duplicate-finder.cc",
+    "src/parsing/duplicate-finder.h",
    "src/parsing/expression-classifier.h",
    "src/parsing/func-name-inferrer.cc",
    "src/parsing/func-name-inferrer.h",
--- a/src/collector.h
+++ b/src/collector.h
@ -6,7 +6,7 @@
 #define V8_COLLECTOR_H_

 #include "src/checks.h"
-#include "src/list.h"
+#include "src/list-inl.h"
 #include "src/vector.h"

 namespace v8 {
--- a/src/parsing/duplicate-finder.cc
+++ b/src/parsing/duplicate-finder.cc
@ -0,0 +1,145 @@
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/parsing/duplicate-finder.h"
+
+#include "src/conversions.h"
+#include "src/unicode-cache.h"
+
+namespace v8 {
+namespace internal {
+
+int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
+  return AddSymbol(key, true, value);
+}
+
+int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
+  return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
+}
+
+int DuplicateFinder::AddSymbol(Vector<const uint8_t> key, bool is_one_byte,
+                               int value) {
+  uint32_t hash = Hash(key, is_one_byte);
+  byte* encoding = BackupKey(key, is_one_byte);
+  base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);
+  int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
+  entry->value =
+      reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value));
+  return old_value;
+}
+
+int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
+  DCHECK(key.length() > 0);
+  // Quick check for already being in canonical form.
+  if (IsNumberCanonical(key)) {
+    return AddOneByteSymbol(key, value);
+  }
+
+  int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
+  double double_value = StringToDouble(unicode_constants_, key, flags, 0.0);
+  int length;
+  const char* string;
+  if (!std::isfinite(double_value)) {
+    string = "Infinity";
+    length = 8;  // strlen("Infinity");
+  } else {
+    string = DoubleToCString(double_value,
+                             Vector<char>(number_buffer_, kBufferSize));
+    length = StrLength(string);
+  }
+  return AddSymbol(
+      Vector<const byte>(reinterpret_cast<const byte*>(string), length), true,
+      value);
+}
+
+bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
+  // Test for a safe approximation of number literals that are already
+  // in canonical form: max 15 digits, no leading zeroes, except an
+  // integer part that is a single zero, and no trailing zeros below
+  // the decimal point.
+  int pos = 0;
+  int length = number.length();
+  if (number.length() > 15) return false;
+  if (number[pos] == '0') {
+    pos++;
+  } else {
+    while (pos < length &&
+           static_cast<unsigned>(number[pos] - '0') <= ('9' - '0'))
+      pos++;
+  }
+  if (length == pos) return true;
+  if (number[pos] != '.') return false;
+  pos++;
+  bool invalid_last_digit = true;
+  while (pos < length) {
+    uint8_t digit = number[pos] - '0';
+    if (digit > '9' - '0') return false;
+    invalid_last_digit = (digit == 0);
+    pos++;
+  }
+  return !invalid_last_digit;
+}
+
+uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
+  // Primitive hash function, almost identical to the one used
+  // for strings (except that it's seeded by the length and representation).
+  int length = key.length();
+  uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0);
+  for (int i = 0; i < length; i++) {
+    uint32_t c = key[i];
+    hash = (hash + c) * 1025;
+    hash ^= (hash >> 6);
+  }
+  return hash;
+}
+
+bool DuplicateFinder::Match(void* first, void* second) {
+  // Decode lengths.
+  // Length + representation is encoded as base 128, most significant heptet
+  // first, with a 8th bit being non-zero while there are more heptets.
+  // The value encodes the number of bytes following, and whether the original
+  // was Latin1.
+  byte* s1 = reinterpret_cast<byte*>(first);
+  byte* s2 = reinterpret_cast<byte*>(second);
+  uint32_t length_one_byte_field = 0;
+  byte c1;
+  do {
+    c1 = *s1;
+    if (c1 != *s2) return false;
+    length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
+    s1++;
+    s2++;
+  } while ((c1 & 0x80) != 0);
+  int length = static_cast<int>(length_one_byte_field >> 1);
+  return memcmp(s1, s2, length) == 0;
+}
+
+byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
+                                 bool is_one_byte) {
+  uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
+  backing_store_.StartSequence();
+  // Emit one_byte_length as base-128 encoded number, with the 7th bit set
+  // on the byte of every heptet except the last, least significant, one.
+  if (one_byte_length >= (1 << 7)) {
+    if (one_byte_length >= (1 << 14)) {
+      if (one_byte_length >= (1 << 21)) {
+        if (one_byte_length >= (1 << 28)) {
+          backing_store_.Add(
+              static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
+        }
+        backing_store_.Add(
+            static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
+      }
+      backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
+    }
+    backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
+  }
+  backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
+
+  backing_store_.AddBlock(bytes);
+  return backing_store_.EndSequence().start();
+}
+
+}  // namespace internal
+}  // namespace v8
--- a/src/parsing/duplicate-finder.h
+++ b/src/parsing/duplicate-finder.h
@ -0,0 +1,64 @@
+// Copyright 2011 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_PARSING_DUPLICATE_FINDER_H_
+#define V8_PARSING_DUPLICATE_FINDER_H_
+
+#include "src/base/hashmap.h"
+#include "src/collector.h"
+
+namespace v8 {
+namespace internal {
+
+class UnicodeCache;
+
+// DuplicateFinder discovers duplicate symbols.
+class DuplicateFinder {
+ public:
+  explicit DuplicateFinder(UnicodeCache* constants)
+      : unicode_constants_(constants), backing_store_(16), map_(&Match) {}
+
+  int AddOneByteSymbol(Vector<const uint8_t> key, int value);
+  int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
+  // Add a a number literal by converting it (if necessary)
+  // to the string that ToString(ToNumber(literal)) would generate.
+  // and then adding that string with AddOneByteSymbol.
+  // This string is the actual value used as key in an object literal,
+  // and the one that must be different from the other keys.
+  int AddNumber(Vector<const uint8_t> key, int value);
+
+ private:
+  int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
+  // Backs up the key and its length in the backing store.
+  // The backup is stored with a base 127 encoding of the
+  // length (plus a bit saying whether the string is one byte),
+  // followed by the bytes of the key.
+  uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
+
+  // Compare two encoded keys (both pointing into the backing store)
+  // for having the same base-127 encoded lengths and representation.
+  // and then having the same 'length' bytes following.
+  static bool Match(void* first, void* second);
+  // Creates a hash from a sequence of bytes.
+  static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
+  // Checks whether a string containing a JS number is its canonical
+  // form.
+  static bool IsNumberCanonical(Vector<const uint8_t> key);
+
+  // Size of buffer. Sufficient for using it to call DoubleToCString in
+  // from conversions.h.
+  static const int kBufferSize = 100;
+
+  UnicodeCache* unicode_constants_;
+  // Backing store used to store strings used as hashmap keys.
+  SequenceCollector<unsigned char> backing_store_;
+  base::HashMap map_;
+  // Buffer used for string->number->canonical string conversions.
+  char number_buffer_[kBufferSize];
+};
+
+}  // namespace internal
+}  // namespace v8
+
+#endif  // V8_PARSING_DUPLICATE_FINDER_H_
--- a/src/parsing/expression-classifier.h
+++ b/src/parsing/expression-classifier.h
@ -12,6 +12,8 @@
 namespace v8 {
 namespace internal {

+class DuplicateFinder;
+
 #define ERROR_CODES(T)                       \
  T(ExpressionProduction, 0)                 \
  T(FormalParameterInitializerProduction, 1) \
--- a/src/parsing/parser.cc
+++ b/src/parsing/parser.cc
@ -15,6 +15,7 @@
 #include "src/base/platform/platform.h"
 #include "src/char-predicates-inl.h"
 #include "src/messages.h"
+#include "src/parsing/duplicate-finder.h"
 #include "src/parsing/parameter-initializer-rewriter.h"
 #include "src/parsing/parse-info.h"
 #include "src/parsing/rewriter.h"
--- a/src/parsing/preparser.cc
+++ b/src/parsing/preparser.cc
@ -10,6 +10,7 @@
 #include "src/conversions.h"
 #include "src/globals.h"
 #include "src/list.h"
+#include "src/parsing/duplicate-finder.h"
 #include "src/parsing/parser-base.h"
 #include "src/parsing/preparse-data-format.h"
 #include "src/parsing/preparse-data.h"
--- a/src/parsing/scanner.cc
+++ b/src/parsing/scanner.cc
@ -14,6 +14,7 @@
 #include "src/char-predicates-inl.h"
 #include "src/conversions-inl.h"
 #include "src/list-inl.h"
+#include "src/parsing/duplicate-finder.h"  // For Scanner::FindSymbol

 namespace v8 {
 namespace internal {
@ -1573,6 +1574,8 @@ bool Scanner::ContainsDot() {


 int Scanner::FindSymbol(DuplicateFinder* finder, int value) {
+  // TODO(vogelheim): Move this logic into the calling class; this can be fully
+  //                  implemented using the public interface.
  if (is_literal_one_byte()) {
    return finder->AddOneByteSymbol(literal_one_byte_string(), value);
  }
@ -1632,144 +1635,6 @@ void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) {
 }


-int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
-  return AddSymbol(key, true, value);
-}
-
-
-int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
-  return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
-}
-
-
-int DuplicateFinder::AddSymbol(Vector<const uint8_t> key,
-                               bool is_one_byte,
-                               int value) {
-  uint32_t hash = Hash(key, is_one_byte);
-  byte* encoding = BackupKey(key, is_one_byte);
-  base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);
-  int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
-  entry->value =
-    reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value));
-  return old_value;
-}
-
-
-int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
-  DCHECK(key.length() > 0);
-  // Quick check for already being in canonical form.
-  if (IsNumberCanonical(key)) {
-    return AddOneByteSymbol(key, value);
-  }
-
-  int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
-  double double_value = StringToDouble(
-      unicode_constants_, key, flags, 0.0);
-  int length;
-  const char* string;
-  if (!std::isfinite(double_value)) {
-    string = "Infinity";
-    length = 8;  // strlen("Infinity");
-  } else {
-    string = DoubleToCString(double_value,
-                             Vector<char>(number_buffer_, kBufferSize));
-    length = StrLength(string);
-  }
-  return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string),
-                                      length), true, value);
-}
-
-
-bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
-  // Test for a safe approximation of number literals that are already
-  // in canonical form: max 15 digits, no leading zeroes, except an
-  // integer part that is a single zero, and no trailing zeros below
-  // the decimal point.
-  int pos = 0;
-  int length = number.length();
-  if (number.length() > 15) return false;
-  if (number[pos] == '0') {
-    pos++;
-  } else {
-    while (pos < length &&
-           static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;
-  }
-  if (length == pos) return true;
-  if (number[pos] != '.') return false;
-  pos++;
-  bool invalid_last_digit = true;
-  while (pos < length) {
-    uint8_t digit = number[pos] - '0';
-    if (digit > '9' - '0') return false;
-    invalid_last_digit = (digit == 0);
-    pos++;
-  }
-  return !invalid_last_digit;
-}
-
-
-uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
-  // Primitive hash function, almost identical to the one used
-  // for strings (except that it's seeded by the length and representation).
-  int length = key.length();
-  uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0);
-  for (int i = 0; i < length; i++) {
-    uint32_t c = key[i];
-    hash = (hash + c) * 1025;
-    hash ^= (hash >> 6);
-  }
-  return hash;
-}
-
-
-bool DuplicateFinder::Match(void* first, void* second) {
-  // Decode lengths.
-  // Length + representation is encoded as base 128, most significant heptet
-  // first, with a 8th bit being non-zero while there are more heptets.
-  // The value encodes the number of bytes following, and whether the original
-  // was Latin1.
-  byte* s1 = reinterpret_cast<byte*>(first);
-  byte* s2 = reinterpret_cast<byte*>(second);
-  uint32_t length_one_byte_field = 0;
-  byte c1;
-  do {
-    c1 = *s1;
-    if (c1 != *s2) return false;
-    length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
-    s1++;
-    s2++;
-  } while ((c1 & 0x80) != 0);
-  int length = static_cast<int>(length_one_byte_field >> 1);
-  return memcmp(s1, s2, length) == 0;
-}
-
-
-byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
-                                 bool is_one_byte) {
-  uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
-  backing_store_.StartSequence();
-  // Emit one_byte_length as base-128 encoded number, with the 7th bit set
-  // on the byte of every heptet except the last, least significant, one.
-  if (one_byte_length >= (1 << 7)) {
-    if (one_byte_length >= (1 << 14)) {
-      if (one_byte_length >= (1 << 21)) {
-        if (one_byte_length >= (1 << 28)) {
-          backing_store_.Add(
-              static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
-        }
-        backing_store_.Add(
-            static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
-      }
-      backing_store_.Add(
-          static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
-    }
-    backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
-  }
-  backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
-
-  backing_store_.AddBlock(bytes);
-  return backing_store_.EndSequence().start();
-}

 }  // namespace internal
 }  // namespace v8
--- a/src/parsing/scanner.h
+++ b/src/parsing/scanner.h
@ -8,12 +8,9 @@
 #define V8_PARSING_SCANNER_H_

 #include "src/allocation.h"
-#include "src/base/hashmap.h"
 #include "src/base/logging.h"
 #include "src/char-predicates.h"
-#include "src/collector.h"
 #include "src/globals.h"
-#include "src/list.h"
 #include "src/messages.h"
 #include "src/parsing/token.h"
 #include "src/unicode-decoder.h"
@ -25,6 +22,7 @@ namespace internal {

 class AstRawString;
 class AstValueFactory;
+class DuplicateFinder;
 class ParserRecorder;
 class UnicodeCache;

@ -99,56 +97,6 @@ class Utf16CharacterStream {
 };


-// ---------------------------------------------------------------------
-// DuplicateFinder discovers duplicate symbols.
-
-class DuplicateFinder {
- public:
-  explicit DuplicateFinder(UnicodeCache* constants)
-      : unicode_constants_(constants),
-        backing_store_(16),
-        map_(&Match) { }
-
-  int AddOneByteSymbol(Vector<const uint8_t> key, int value);
-  int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
-  // Add a a number literal by converting it (if necessary)
-  // to the string that ToString(ToNumber(literal)) would generate.
-  // and then adding that string with AddOneByteSymbol.
-  // This string is the actual value used as key in an object literal,
-  // and the one that must be different from the other keys.
-  int AddNumber(Vector<const uint8_t> key, int value);
-
- private:
-  int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
-  // Backs up the key and its length in the backing store.
-  // The backup is stored with a base 127 encoding of the
-  // length (plus a bit saying whether the string is one byte),
-  // followed by the bytes of the key.
-  uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
-
-  // Compare two encoded keys (both pointing into the backing store)
-  // for having the same base-127 encoded lengths and representation.
-  // and then having the same 'length' bytes following.
-  static bool Match(void* first, void* second);
-  // Creates a hash from a sequence of bytes.
-  static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
-  // Checks whether a string containing a JS number is its canonical
-  // form.
-  static bool IsNumberCanonical(Vector<const uint8_t> key);
-
-  // Size of buffer. Sufficient for using it to call DoubleToCString in
-  // from conversions.h.
-  static const int kBufferSize = 100;
-
-  UnicodeCache* unicode_constants_;
-  // Backing store used to store strings used as hashmap keys.
-  SequenceCollector<unsigned char> backing_store_;
-  base::HashMap map_;
-  // Buffer used for string->number->canonical string conversions.
-  char number_buffer_[kBufferSize];
-};
-
-
 // ----------------------------------------------------------------------------
 // JavaScript Scanner.

--- a/src/v8.gyp
+++ b/src/v8.gyp
@ -1039,6 +1039,8 @@
        'objects.h',
        'ostreams.cc',
        'ostreams.h',
+        'parsing/duplicate-finder.cc',
+        'parsing/duplicate-finder.h',
        'parsing/expression-classifier.h',
        'parsing/func-name-inferrer.cc',
        'parsing/func-name-inferrer.h',