Convert scanner buffers to use standard character types.

R=marja@chromium.org

BUG=

Review URL: https://codereview.chromium.org/198583003

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@19883 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
This commit is contained in:
dcarney@chromium.org 2014-03-13 09:15:14 +00:00
parent ee6b885d25
commit 619d812a02
4 changed files with 71 additions and 64 deletions

View File

@ -3512,8 +3512,8 @@ class SingletonLogger : public ParserRecorder {
}; };
// Logs a symbol creation of a literal or identifier. // Logs a symbol creation of a literal or identifier.
virtual void LogAsciiSymbol(int start, Vector<const char> literal) { } virtual void LogOneByteSymbol(int start, Vector<const uint8_t> literal) { }
virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) { } virtual void LogTwoByteSymbol(int start, Vector<const uint16_t> literal) { }
// Logs an error message and marks the log as containing an error. // Logs an error message and marks the log as containing an error.
// Further logging will be ignored, and ExtractData will return a vector // Further logging will be ignored, and ExtractData will return a vector

View File

@ -52,8 +52,8 @@ class ParserRecorder {
StrictMode strict_mode) = 0; StrictMode strict_mode) = 0;
// Logs a symbol creation of a literal or identifier. // Logs a symbol creation of a literal or identifier.
virtual void LogAsciiSymbol(int start, Vector<const char> literal) { } virtual void LogOneByteSymbol(int start, Vector<const uint8_t> literal) = 0;
virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) { } virtual void LogTwoByteSymbol(int start, Vector<const uint16_t> literal) = 0;
// Logs an error message and marks the log as containing an error. // Logs an error message and marks the log as containing an error.
// Further logging will be ignored, and ExtractData will return a vector // Further logging will be ignored, and ExtractData will return a vector
@ -148,8 +148,8 @@ class FunctionLoggingParserRecorder : public ParserRecorder {
class PartialParserRecorder : public FunctionLoggingParserRecorder { class PartialParserRecorder : public FunctionLoggingParserRecorder {
public: public:
PartialParserRecorder() : FunctionLoggingParserRecorder() { } PartialParserRecorder() : FunctionLoggingParserRecorder() { }
virtual void LogAsciiSymbol(int start, Vector<const char> literal) { } virtual void LogOneByteSymbol(int start, Vector<const uint8_t> literal) { }
virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) { } virtual void LogTwoByteSymbol(int start, Vector<const uint16_t> literal) { }
virtual ~PartialParserRecorder() { } virtual ~PartialParserRecorder() { }
virtual Vector<unsigned> ExtractData(); virtual Vector<unsigned> ExtractData();
virtual int symbol_position() { return 0; } virtual int symbol_position() { return 0; }
@ -165,13 +165,13 @@ class CompleteParserRecorder: public FunctionLoggingParserRecorder {
CompleteParserRecorder(); CompleteParserRecorder();
virtual ~CompleteParserRecorder() { } virtual ~CompleteParserRecorder() { }
virtual void LogAsciiSymbol(int start, Vector<const char> literal) { virtual void LogOneByteSymbol(int start, Vector<const uint8_t> literal) {
if (!is_recording_) return; if (!is_recording_) return;
int hash = vector_hash(literal); int hash = vector_hash(literal);
LogSymbol(start, hash, true, Vector<const byte>::cast(literal)); LogSymbol(start, hash, true, literal);
} }
virtual void LogUtf16Symbol(int start, Vector<const uc16> literal) { virtual void LogTwoByteSymbol(int start, Vector<const uint16_t> literal) {
if (!is_recording_) return; if (!is_recording_) return;
int hash = vector_hash(literal); int hash = vector_hash(literal);
LogSymbol(start, hash, false, Vector<const byte>::cast(literal)); LogSymbol(start, hash, false, Vector<const byte>::cast(literal));

View File

@ -909,7 +909,7 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
KEYWORD("yield", Token::YIELD) KEYWORD("yield", Token::YIELD)
static Token::Value KeywordOrIdentifierToken(const char* input, static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
int input_length, int input_length,
bool harmony_scoping, bool harmony_scoping,
bool harmony_modules) { bool harmony_modules) {
@ -985,7 +985,7 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {
literal.Complete(); literal.Complete();
if (next_.literal_chars->is_one_byte()) { if (next_.literal_chars->is_one_byte()) {
Vector<const char> chars = next_.literal_chars->one_byte_literal(); Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
return KeywordOrIdentifierToken(chars.start(), return KeywordOrIdentifierToken(chars.start(),
chars.length(), chars.length(),
harmony_scoping_, harmony_scoping_,
@ -1123,7 +1123,7 @@ Handle<String> Scanner::AllocateNextLiteralString(Isolate* isolate,
Vector<const uint8_t>::cast(next_literal_one_byte_string()), tenured); Vector<const uint8_t>::cast(next_literal_one_byte_string()), tenured);
} else { } else {
return isolate->factory()->NewStringFromTwoByte( return isolate->factory()->NewStringFromTwoByte(
next_literal_utf16_string(), tenured); next_literal_two_byte_string(), tenured);
} }
} }
@ -1131,10 +1131,10 @@ Handle<String> Scanner::AllocateNextLiteralString(Isolate* isolate,
Handle<String> Scanner::AllocateInternalizedString(Isolate* isolate) { Handle<String> Scanner::AllocateInternalizedString(Isolate* isolate) {
if (is_literal_one_byte()) { if (is_literal_one_byte()) {
return isolate->factory()->InternalizeOneByteString( return isolate->factory()->InternalizeOneByteString(
Vector<const uint8_t>::cast(literal_one_byte_string())); literal_one_byte_string());
} else { } else {
return isolate->factory()->InternalizeTwoByteString( return isolate->factory()->InternalizeTwoByteString(
literal_utf16_string()); literal_two_byte_string());
} }
} }
@ -1142,7 +1142,7 @@ Handle<String> Scanner::AllocateInternalizedString(Isolate* isolate) {
double Scanner::DoubleValue() { double Scanner::DoubleValue() {
ASSERT(is_literal_one_byte()); ASSERT(is_literal_one_byte());
return StringToDouble( return StringToDouble(
unicode_cache_, literal_one_byte_string(), unicode_cache_, Vector<const char>::cast(literal_one_byte_string()),
ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
} }
@ -1154,32 +1154,32 @@ int Scanner::FindNumber(DuplicateFinder* finder, int value) {
int Scanner::FindSymbol(DuplicateFinder* finder, int value) { int Scanner::FindSymbol(DuplicateFinder* finder, int value) {
if (is_literal_one_byte()) { if (is_literal_one_byte()) {
return finder->AddAsciiSymbol(literal_one_byte_string(), value); return finder->AddOneByteSymbol(literal_one_byte_string(), value);
} }
return finder->AddUtf16Symbol(literal_utf16_string(), value); return finder->AddTwoByteSymbol(literal_two_byte_string(), value);
} }
void Scanner::LogSymbol(ParserRecorder* log, int position) { void Scanner::LogSymbol(ParserRecorder* log, int position) {
if (is_literal_one_byte()) { if (is_literal_one_byte()) {
log->LogAsciiSymbol(position, literal_one_byte_string()); log->LogOneByteSymbol(position, literal_one_byte_string());
} else { } else {
log->LogUtf16Symbol(position, literal_utf16_string()); log->LogTwoByteSymbol(position, literal_two_byte_string());
} }
} }
int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) { int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {
return AddSymbol(Vector<const byte>::cast(key), true, value); return AddSymbol(key, true, value);
} }
int DuplicateFinder::AddUtf16Symbol(Vector<const uint16_t> key, int value) { int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {
return AddSymbol(Vector<const byte>::cast(key), false, value); return AddSymbol(Vector<const uint8_t>::cast(key), false, value);
} }
int DuplicateFinder::AddSymbol(Vector<const byte> key, int DuplicateFinder::AddSymbol(Vector<const uint8_t> key,
bool is_one_byte, bool is_one_byte,
int value) { int value) {
uint32_t hash = Hash(key, is_one_byte); uint32_t hash = Hash(key, is_one_byte);
@ -1192,15 +1192,16 @@ int DuplicateFinder::AddSymbol(Vector<const byte> key,
} }
int DuplicateFinder::AddNumber(Vector<const char> key, int value) { int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {
ASSERT(key.length() > 0); ASSERT(key.length() > 0);
// Quick check for already being in canonical form. // Quick check for already being in canonical form.
if (IsNumberCanonical(key)) { if (IsNumberCanonical(key)) {
return AddAsciiSymbol(key, value); return AddOneByteSymbol(key, value);
} }
int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY; int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY;
double double_value = StringToDouble(unicode_constants_, key, flags, 0.0); double double_value = StringToDouble(
unicode_constants_, Vector<const char>::cast(key), flags, 0.0);
int length; int length;
const char* string; const char* string;
if (!std::isfinite(double_value)) { if (!std::isfinite(double_value)) {
@ -1216,7 +1217,7 @@ int DuplicateFinder::AddNumber(Vector<const char> key, int value) {
} }
bool DuplicateFinder::IsNumberCanonical(Vector<const char> number) { bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {
// Test for a safe approximation of number literals that are already // Test for a safe approximation of number literals that are already
// in canonical form: max 15 digits, no leading zeroes, except an // in canonical form: max 15 digits, no leading zeroes, except an
// integer part that is a single zero, and no trailing zeros below // integer part that is a single zero, and no trailing zeros below
@ -1235,7 +1236,7 @@ bool DuplicateFinder::IsNumberCanonical(Vector<const char> number) {
pos++; pos++;
bool invalid_last_digit = true; bool invalid_last_digit = true;
while (pos < length) { while (pos < length) {
byte digit = number[pos] - '0'; uint8_t digit = number[pos] - '0';
if (digit > '9' - '0') return false; if (digit > '9' - '0') return false;
invalid_last_digit = (digit == 0); invalid_last_digit = (digit == 0);
pos++; pos++;
@ -1244,7 +1245,7 @@ bool DuplicateFinder::IsNumberCanonical(Vector<const char> number) {
} }
uint32_t DuplicateFinder::Hash(Vector<const byte> key, bool is_one_byte) { uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
// Primitive hash function, almost identical to the one used // Primitive hash function, almost identical to the one used
// for strings (except that it's seeded by the length and ASCII-ness). // for strings (except that it's seeded by the length and ASCII-ness).
int length = key.length(); int length = key.length();
@ -1280,7 +1281,7 @@ bool DuplicateFinder::Match(void* first, void* second) {
} }
byte* DuplicateFinder::BackupKey(Vector<const byte> bytes, byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
bool is_one_byte) { bool is_one_byte) {
uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0); uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
backing_store_.StartSequence(); backing_store_.StartSequence();
@ -1290,15 +1291,18 @@ byte* DuplicateFinder::BackupKey(Vector<const byte> bytes,
if (one_byte_length >= (1 << 14)) { if (one_byte_length >= (1 << 14)) {
if (one_byte_length >= (1 << 21)) { if (one_byte_length >= (1 << 21)) {
if (one_byte_length >= (1 << 28)) { if (one_byte_length >= (1 << 28)) {
backing_store_.Add(static_cast<byte>((one_byte_length >> 28) | 0x80)); backing_store_.Add(
static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
} }
backing_store_.Add(static_cast<byte>((one_byte_length >> 21) | 0x80u)); backing_store_.Add(
static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
} }
backing_store_.Add(static_cast<byte>((one_byte_length >> 14) | 0x80u)); backing_store_.Add(
static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
} }
backing_store_.Add(static_cast<byte>((one_byte_length >> 7) | 0x80u)); backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
} }
backing_store_.Add(static_cast<byte>(one_byte_length & 0x7f)); backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
backing_store_.AddBlock(bytes); backing_store_.AddBlock(bytes);
return backing_store_.EndSequence().start(); return backing_store_.EndSequence().start();

View File

@ -120,8 +120,8 @@ class Utf16CharacterStream {
virtual bool ReadBlock() = 0; virtual bool ReadBlock() = 0;
virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0; virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
const uc16* buffer_cursor_; const uint16_t* buffer_cursor_;
const uc16* buffer_end_; const uint16_t* buffer_end_;
unsigned pos_; unsigned pos_;
}; };
@ -169,32 +169,32 @@ class DuplicateFinder {
backing_store_(16), backing_store_(16),
map_(&Match) { } map_(&Match) { }
int AddAsciiSymbol(Vector<const char> key, int value); int AddOneByteSymbol(Vector<const uint8_t> key, int value);
int AddUtf16Symbol(Vector<const uint16_t> key, int value); int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
// Add a a number literal by converting it (if necessary) // Add a a number literal by converting it (if necessary)
// to the string that ToString(ToNumber(literal)) would generate. // to the string that ToString(ToNumber(literal)) would generate.
// and then adding that string with AddAsciiSymbol. // and then adding that string with AddAsciiSymbol.
// This string is the actual value used as key in an object literal, // This string is the actual value used as key in an object literal,
// and the one that must be different from the other keys. // and the one that must be different from the other keys.
int AddNumber(Vector<const char> key, int value); int AddNumber(Vector<const uint8_t> key, int value);
private: private:
int AddSymbol(Vector<const byte> key, bool is_one_byte, int value); int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
// Backs up the key and its length in the backing store. // Backs up the key and its length in the backing store.
// The backup is stored with a base 127 encoding of the // The backup is stored with a base 127 encoding of the
// length (plus a bit saying whether the string is ASCII), // length (plus a bit saying whether the string is one byte),
// followed by the bytes of the key. // followed by the bytes of the key.
byte* BackupKey(Vector<const byte> key, bool is_one_byte); uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
// Compare two encoded keys (both pointing into the backing store) // Compare two encoded keys (both pointing into the backing store)
// for having the same base-127 encoded lengths and ASCII-ness, // for having the same base-127 encoded lengths and ASCII-ness,
// and then having the same 'length' bytes following. // and then having the same 'length' bytes following.
static bool Match(void* first, void* second); static bool Match(void* first, void* second);
// Creates a hash from a sequence of bytes. // Creates a hash from a sequence of bytes.
static uint32_t Hash(Vector<const byte> key, bool is_one_byte); static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
// Checks whether a string containing a JS number is its canonical // Checks whether a string containing a JS number is its canonical
// form. // form.
static bool IsNumberCanonical(Vector<const char> key); static bool IsNumberCanonical(Vector<const uint8_t> key);
// Size of buffer. Sufficient for using it to call DoubleToCString in // Size of buffer. Sufficient for using it to call DoubleToCString in
// from conversions.h. // from conversions.h.
@ -230,10 +230,10 @@ class LiteralBuffer {
position_ += kOneByteSize; position_ += kOneByteSize;
return; return;
} }
ConvertToUtf16(); ConvertToTwoByte();
} }
ASSERT(code_unit < 0x10000u); ASSERT(code_unit < 0x10000u);
*reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit; *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
position_ += kUC16Size; position_ += kUC16Size;
} }
@ -244,18 +244,18 @@ class LiteralBuffer {
(memcmp(keyword.start(), backing_store_.start(), position_) == 0); (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
} }
Vector<const uc16> utf16_literal() { Vector<const uint16_t> two_byte_literal() {
ASSERT(!is_one_byte_); ASSERT(!is_one_byte_);
ASSERT((position_ & 0x1) == 0); ASSERT((position_ & 0x1) == 0);
return Vector<const uc16>( return Vector<const uint16_t>(
reinterpret_cast<const uc16*>(backing_store_.start()), reinterpret_cast<const uint16_t*>(backing_store_.start()),
position_ >> 1); position_ >> 1);
} }
Vector<const char> one_byte_literal() { Vector<const uint8_t> one_byte_literal() {
ASSERT(is_one_byte_); ASSERT(is_one_byte_);
return Vector<const char>( return Vector<const uint8_t>(
reinterpret_cast<const char*>(backing_store_.start()), reinterpret_cast<const uint8_t*>(backing_store_.start()),
position_); position_);
} }
@ -286,7 +286,7 @@ class LiteralBuffer {
backing_store_ = new_store; backing_store_ = new_store;
} }
void ConvertToUtf16() { void ConvertToTwoByte() {
ASSERT(is_one_byte_); ASSERT(is_one_byte_);
Vector<byte> new_store; Vector<byte> new_store;
int new_content_size = position_ * kUC16Size; int new_content_size = position_ * kUC16Size;
@ -298,7 +298,7 @@ class LiteralBuffer {
new_store = backing_store_; new_store = backing_store_;
} }
uint8_t* src = backing_store_.start(); uint8_t* src = backing_store_.start();
uc16* dst = reinterpret_cast<uc16*>(new_store.start()); uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
for (int i = position_ - 1; i >= 0; i--) { for (int i = position_ - 1; i >= 0; i--) {
dst[i] = src[i]; dst[i] = src[i];
} }
@ -408,7 +408,9 @@ class Scanner {
if (is_literal_one_byte() && if (is_literal_one_byte() &&
literal_length() == length && literal_length() == length &&
!literal_contains_escapes()) { !literal_contains_escapes()) {
return !strncmp(literal_one_byte_string().start(), data, length); const char* token =
reinterpret_cast<const char*>(literal_one_byte_string().start());
return !strncmp(token, data, length);
} }
return false; return false;
} }
@ -416,7 +418,8 @@ class Scanner {
if (is_literal_one_byte() && if (is_literal_one_byte() &&
literal_length() == 3 && literal_length() == 3 &&
!literal_contains_escapes()) { !literal_contains_escapes()) {
const char* token = literal_one_byte_string().start(); const char* token =
reinterpret_cast<const char*>(literal_one_byte_string().start());
*is_get = strncmp(token, "get", 3) == 0; *is_get = strncmp(token, "get", 3) == 0;
*is_set = !*is_get && strncmp(token, "set", 3) == 0; *is_set = !*is_get && strncmp(token, "set", 3) == 0;
} }
@ -551,13 +554,13 @@ class Scanner {
// numbers. // numbers.
// These functions only give the correct result if the literal // These functions only give the correct result if the literal
// was scanned between calls to StartLiteral() and TerminateLiteral(). // was scanned between calls to StartLiteral() and TerminateLiteral().
Vector<const char> literal_one_byte_string() { Vector<const uint8_t> literal_one_byte_string() {
ASSERT_NOT_NULL(current_.literal_chars); ASSERT_NOT_NULL(current_.literal_chars);
return current_.literal_chars->one_byte_literal(); return current_.literal_chars->one_byte_literal();
} }
Vector<const uc16> literal_utf16_string() { Vector<const uint16_t> literal_two_byte_string() {
ASSERT_NOT_NULL(current_.literal_chars); ASSERT_NOT_NULL(current_.literal_chars);
return current_.literal_chars->utf16_literal(); return current_.literal_chars->two_byte_literal();
} }
bool is_literal_one_byte() { bool is_literal_one_byte() {
ASSERT_NOT_NULL(current_.literal_chars); ASSERT_NOT_NULL(current_.literal_chars);
@ -569,13 +572,13 @@ class Scanner {
} }
// Returns the literal string for the next token (the token that // Returns the literal string for the next token (the token that
// would be returned if Next() were called). // would be returned if Next() were called).
Vector<const char> next_literal_one_byte_string() { Vector<const uint8_t> next_literal_one_byte_string() {
ASSERT_NOT_NULL(next_.literal_chars); ASSERT_NOT_NULL(next_.literal_chars);
return next_.literal_chars->one_byte_literal(); return next_.literal_chars->one_byte_literal();
} }
Vector<const uc16> next_literal_utf16_string() { Vector<const uint16_t> next_literal_two_byte_string() {
ASSERT_NOT_NULL(next_.literal_chars); ASSERT_NOT_NULL(next_.literal_chars);
return next_.literal_chars->utf16_literal(); return next_.literal_chars->two_byte_literal();
} }
bool is_next_literal_one_byte() { bool is_next_literal_one_byte() {
ASSERT_NOT_NULL(next_.literal_chars); ASSERT_NOT_NULL(next_.literal_chars);