[wasm][asm.js] Adding custom asm.js lexer.
Adding a custom lexer for asm.js parsing. It takes advantage of a number of asm.js properties to simply things: * Assumes 'use asm' is the only string. * Does not handle unicode for now (tools don't emit it). * Combines global + local string table with lexer. R=marja@chromium.org,vogelheim@chromium.org,kschimpf@chromium.org BUG=v8:4203 BUG=v8:6090 Review-Url: https://codereview.chromium.org/2751693002 Cr-Commit-Position: refs/heads/master@{#43874}
This commit is contained in:
parent
18c77ce51b
commit
4c3217e132
3
BUILD.gn
3
BUILD.gn
@ -968,6 +968,9 @@ v8_source_set("v8_base") {
|
||||
"src/arguments.h",
|
||||
"src/asmjs/asm-js.cc",
|
||||
"src/asmjs/asm-js.h",
|
||||
"src/asmjs/asm-names.h",
|
||||
"src/asmjs/asm-scanner.cc",
|
||||
"src/asmjs/asm-scanner.h",
|
||||
"src/asmjs/asm-typer.cc",
|
||||
"src/asmjs/asm-typer.h",
|
||||
"src/asmjs/asm-types.cc",
|
||||
|
110
src/asmjs/asm-names.h
Normal file
110
src/asmjs/asm-names.h
Normal file
@ -0,0 +1,110 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_ASMJS_ASM_NAMES_H_
|
||||
#define V8_ASMJS_ASM_NAMES_H_
|
||||
|
||||
#define STDLIB_MATH_VALUE_LIST(V) \
|
||||
V(E) \
|
||||
V(LN10) \
|
||||
V(LN2) \
|
||||
V(LOG2E) \
|
||||
V(LOG10E) \
|
||||
V(PI) \
|
||||
V(SQRT1_2) \
|
||||
V(SQRT2)
|
||||
|
||||
// V(stdlib.Math.<name>, Name, wasm-opcode, asm-js-type)
|
||||
#define STDLIB_MATH_FUNCTION_MONOMORPHIC_LIST(V) \
|
||||
V(acos, Acos, kExprF64Acos, dq2d) \
|
||||
V(asin, Asin, kExprF64Asin, dq2d) \
|
||||
V(atan, Atan, kExprF64Atan, dq2d) \
|
||||
V(cos, Cos, kExprF64Cos, dq2d) \
|
||||
V(sin, Sin, kExprF64Sin, dq2d) \
|
||||
V(tan, Tan, kExprF64Tan, dq2d) \
|
||||
V(exp, Exp, kExprF64Exp, dq2d) \
|
||||
V(log, Log, kExprF64Log, dq2d) \
|
||||
V(atan2, Atan2, kExprF64Atan2, dqdq2d) \
|
||||
V(pow, Pow, kExprF64Pow, dqdq2d) \
|
||||
V(imul, Imul, kExprI32Mul, ii2s) \
|
||||
V(clz32, Clz32, kExprI32Clz, i2s)
|
||||
|
||||
// V(stdlib.Math.<name>, Name, unused, asm-js-type)
|
||||
#define STDLIB_MATH_FUNCTION_CEIL_LIKE_LIST(V) \
|
||||
V(ceil, Ceil, x, ceil_like) \
|
||||
V(floor, Floor, x, ceil_like) \
|
||||
V(sqrt, Sqrt, x, ceil_like)
|
||||
|
||||
// V(stdlib.Math.<name>, Name, unused, asm-js-type)
|
||||
#define STDLIB_MATH_FUNCTION_LIST(V) \
|
||||
V(min, Min, x, minmax) \
|
||||
V(max, Max, x, minmax) \
|
||||
V(abs, Abs, x, abs) \
|
||||
V(fround, Fround, x, fround) \
|
||||
STDLIB_MATH_FUNCTION_MONOMORPHIC_LIST(V) \
|
||||
STDLIB_MATH_FUNCTION_CEIL_LIKE_LIST(V)
|
||||
|
||||
// V(stdlib.<name>, wasm-load-type, wasm-store-type, wasm-type)
|
||||
#define STDLIB_ARRAY_TYPE_LIST(V) \
|
||||
V(Int8Array, Mem8S, Mem8, I32) \
|
||||
V(Uint8Array, Mem8U, Mem8, I32) \
|
||||
V(Int16Array, Mem16S, Mem16, I32) \
|
||||
V(Uint16Array, Mem16U, Mem16, I32) \
|
||||
V(Int32Array, Mem, Mem, I32) \
|
||||
V(Uint32Array, Mem, Mem, I32) \
|
||||
V(Float32Array, Mem, Mem, F32) \
|
||||
V(Float64Array, Mem, Mem, F64)
|
||||
|
||||
#define STDLIB_OTHER_LIST(V) \
|
||||
V(Infinity) \
|
||||
V(NaN) \
|
||||
V(Math)
|
||||
|
||||
// clang-format off (for return)
|
||||
#define KEYWORD_NAME_LIST(V) \
|
||||
V(arguments) \
|
||||
V(break) \
|
||||
V(case) \
|
||||
V(const) \
|
||||
V(continue) \
|
||||
V(default) \
|
||||
V(do) \
|
||||
V(else) \
|
||||
V(eval) \
|
||||
V(for) \
|
||||
V(function) \
|
||||
V(if) \
|
||||
V(new) \
|
||||
V(return ) \
|
||||
V(switch) \
|
||||
V(var) \
|
||||
V(while)
|
||||
// clang-format on
|
||||
|
||||
// V(token-string, token-name)
|
||||
#define LONG_SYMBOL_NAME_LIST(V) \
|
||||
V("<=", LE) \
|
||||
V(">=", GE) \
|
||||
V("==", EQ) \
|
||||
V("!=", NE) \
|
||||
V("<<", SHL) \
|
||||
V(">>", SAR) \
|
||||
V(">>>", SHR) \
|
||||
V("'use asm'", UseAsm)
|
||||
|
||||
// clang-format off
|
||||
#define SIMPLE_SINGLE_TOKEN_LIST(V) \
|
||||
V('+') V('-') V('*') V('%') V('~') V('^') V('&') V('|') V('(') V(')') \
|
||||
V('[') V(']') V('{') V('}') V(':') V(';') V(',') V('?')
|
||||
// clang-format on
|
||||
|
||||
// V(name, value, string-name)
|
||||
#define SPECIAL_TOKEN_LIST(V) \
|
||||
V(kUninitialized, 0, "{uninitalized}") \
|
||||
V(kEndOfInput, -1, "{end of input}") \
|
||||
V(kParseError, -2, "{parse error}") \
|
||||
V(kUnsigned, -3, "{unsigned value}") \
|
||||
V(kDouble, -4, "{double value}")
|
||||
|
||||
#endif
|
413
src/asmjs/asm-scanner.cc
Normal file
413
src/asmjs/asm-scanner.cc
Normal file
@ -0,0 +1,413 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/asmjs/asm-scanner.h"
|
||||
|
||||
#include "src/conversions.h"
|
||||
#include "src/flags.h"
|
||||
#include "src/parsing/scanner.h"
|
||||
#include "src/unicode-cache.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
namespace {
|
||||
// Cap number of identifiers to ensure we can assign both global and
|
||||
// local ones a token id in the range of an int32_t.
|
||||
static const int kMaxIdentifierCount = 0xf000000;
|
||||
};
|
||||
|
||||
AsmJsScanner::AsmJsScanner()
|
||||
: token_(kUninitialized),
|
||||
preceding_token_(kUninitialized),
|
||||
next_token_(kUninitialized),
|
||||
rewind_(false),
|
||||
in_local_scope_(false),
|
||||
global_count_(0),
|
||||
double_value_(0.0),
|
||||
unsigned_value_(0),
|
||||
preceded_by_newline_(false) {
|
||||
#define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
|
||||
STDLIB_MATH_FUNCTION_LIST(V)
|
||||
STDLIB_ARRAY_TYPE_LIST(V)
|
||||
#undef V
|
||||
#define V(name) property_names_[#name] = kToken_##name;
|
||||
STDLIB_MATH_VALUE_LIST(V)
|
||||
STDLIB_OTHER_LIST(V)
|
||||
#undef V
|
||||
#define V(name) global_names_[#name] = kToken_##name;
|
||||
KEYWORD_NAME_LIST(V)
|
||||
#undef V
|
||||
}
|
||||
|
||||
void AsmJsScanner::SetStream(std::unique_ptr<Utf16CharacterStream> stream) {
|
||||
stream_ = std::move(stream);
|
||||
Next();
|
||||
}
|
||||
|
||||
void AsmJsScanner::Next() {
|
||||
if (rewind_) {
|
||||
preceding_token_ = token_;
|
||||
token_ = next_token_;
|
||||
next_token_ = kUninitialized;
|
||||
rewind_ = false;
|
||||
return;
|
||||
}
|
||||
|
||||
if (token_ == kEndOfInput || token_ == kParseError) {
|
||||
return;
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
if (FLAG_trace_asm_scanner) {
|
||||
if (Token() == kDouble) {
|
||||
PrintF("%lf ", AsDouble());
|
||||
} else if (Token() == kUnsigned) {
|
||||
PrintF("%" PRIu64 " ", AsUnsigned());
|
||||
} else {
|
||||
std::string name = Name(Token());
|
||||
PrintF("%s ", name.c_str());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
preceded_by_newline_ = false;
|
||||
preceding_token_ = token_;
|
||||
for (;;) {
|
||||
uc32 ch = stream_->Advance();
|
||||
switch (ch) {
|
||||
case ' ':
|
||||
case '\t':
|
||||
case '\r':
|
||||
// Ignore whitespace.
|
||||
break;
|
||||
|
||||
case '\n':
|
||||
// Track when we've passed a newline for optional semicolon support,
|
||||
// but keep scanning.
|
||||
preceded_by_newline_ = true;
|
||||
break;
|
||||
|
||||
case kEndOfInput:
|
||||
token_ = kEndOfInput;
|
||||
return;
|
||||
|
||||
case '\'':
|
||||
case '"':
|
||||
ConsumeString(ch);
|
||||
return;
|
||||
|
||||
case '/':
|
||||
ch = stream_->Advance();
|
||||
if (ch == '/') {
|
||||
ConsumeCPPComment();
|
||||
} else if (ch == '*') {
|
||||
if (!ConsumeCComment()) {
|
||||
token_ = kParseError;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
stream_->Back();
|
||||
token_ = '/';
|
||||
return;
|
||||
}
|
||||
// Breaks out of switch, but loops again (i.e. the case when we parsed
|
||||
// a comment, but need to continue to look for the next token).
|
||||
break;
|
||||
|
||||
case '<':
|
||||
case '>':
|
||||
case '=':
|
||||
case '!':
|
||||
ConsumeCompareOrShift(ch);
|
||||
return;
|
||||
|
||||
#define V(single_char_token) case single_char_token:
|
||||
SIMPLE_SINGLE_TOKEN_LIST(V)
|
||||
#undef V
|
||||
// Use fixed token IDs for ASCII.
|
||||
token_ = ch;
|
||||
return;
|
||||
|
||||
default:
|
||||
if (IsIdentifierStart(ch)) {
|
||||
ConsumeIdentifier(ch);
|
||||
} else if (IsNumberStart(ch)) {
|
||||
ConsumeNumber(ch);
|
||||
} else {
|
||||
// TODO(bradnelson): Support unicode (probably via UnicodeCache).
|
||||
token_ = kParseError;
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AsmJsScanner::Rewind() {
|
||||
DCHECK(!rewind_);
|
||||
next_token_ = token_;
|
||||
token_ = preceding_token_;
|
||||
preceding_token_ = kUninitialized;
|
||||
rewind_ = true;
|
||||
preceded_by_newline_ = false;
|
||||
identifier_string_.clear();
|
||||
}
|
||||
|
||||
void AsmJsScanner::ResetLocals() { local_names_.clear(); }
|
||||
|
||||
#if DEBUG
|
||||
// Only used for debugging.
|
||||
std::string AsmJsScanner::Name(token_t token) const {
|
||||
if (token >= 32 && token < 127) {
|
||||
return std::string(1, static_cast<char>(token));
|
||||
}
|
||||
for (auto& i : local_names_) {
|
||||
if (i.second == token) {
|
||||
return i.first;
|
||||
}
|
||||
}
|
||||
for (auto& i : global_names_) {
|
||||
if (i.second == token) {
|
||||
return i.first;
|
||||
}
|
||||
}
|
||||
for (auto& i : property_names_) {
|
||||
if (i.second == token) {
|
||||
return i.first;
|
||||
}
|
||||
}
|
||||
switch (token) {
|
||||
#define V(rawname, name) \
|
||||
case kToken_##name: \
|
||||
return rawname;
|
||||
LONG_SYMBOL_NAME_LIST(V)
|
||||
#undef V
|
||||
#define V(name, value, string_name) \
|
||||
case name: \
|
||||
return string_name;
|
||||
SPECIAL_TOKEN_LIST(V)
|
||||
default:
|
||||
break;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return "{unreachable}";
|
||||
}
|
||||
#endif
|
||||
|
||||
int AsmJsScanner::GetPosition() const {
|
||||
DCHECK(!rewind_);
|
||||
return static_cast<int>(stream_->pos());
|
||||
}
|
||||
|
||||
void AsmJsScanner::Seek(int pos) {
|
||||
stream_->Seek(pos);
|
||||
preceding_token_ = kUninitialized;
|
||||
token_ = kUninitialized;
|
||||
next_token_ = kUninitialized;
|
||||
rewind_ = false;
|
||||
Next();
|
||||
}
|
||||
|
||||
void AsmJsScanner::ConsumeIdentifier(uc32 ch) {
|
||||
// Consume characters while still part of the identifier.
|
||||
identifier_string_.clear();
|
||||
while (IsIdentifierPart(ch)) {
|
||||
identifier_string_ += ch;
|
||||
ch = stream_->Advance();
|
||||
}
|
||||
// Go back one for next time.
|
||||
stream_->Back();
|
||||
|
||||
// Decode what the identifier means.
|
||||
if (preceding_token_ == '.') {
|
||||
auto i = property_names_.find(identifier_string_);
|
||||
if (i != property_names_.end()) {
|
||||
token_ = i->second;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
{
|
||||
auto i = local_names_.find(identifier_string_);
|
||||
if (i != local_names_.end()) {
|
||||
token_ = i->second;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (!in_local_scope_) {
|
||||
auto i = global_names_.find(identifier_string_);
|
||||
if (i != global_names_.end()) {
|
||||
token_ = i->second;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (preceding_token_ == '.') {
|
||||
CHECK(global_count_ < kMaxIdentifierCount);
|
||||
token_ = kGlobalsStart + global_count_++;
|
||||
property_names_[identifier_string_] = token_;
|
||||
} else if (in_local_scope_) {
|
||||
CHECK(local_names_.size() < kMaxIdentifierCount);
|
||||
token_ = kLocalsStart - static_cast<token_t>(local_names_.size());
|
||||
local_names_[identifier_string_] = token_;
|
||||
} else {
|
||||
CHECK(global_count_ < kMaxIdentifierCount);
|
||||
token_ = kGlobalsStart + global_count_++;
|
||||
global_names_[identifier_string_] = token_;
|
||||
}
|
||||
}
|
||||
|
||||
void AsmJsScanner::ConsumeNumber(uc32 ch) {
|
||||
std::string number;
|
||||
number = ch;
|
||||
bool has_dot = ch == '.';
|
||||
for (;;) {
|
||||
ch = stream_->Advance();
|
||||
if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
|
||||
(ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' ||
|
||||
ch == 'x' ||
|
||||
((ch == '-' || ch == '+') && (number[number.size() - 1] == 'e' ||
|
||||
number[number.size() - 1] == 'E'))) {
|
||||
// TODO(bradnelson): Test weird cases ending in -.
|
||||
if (ch == '.') {
|
||||
has_dot = true;
|
||||
}
|
||||
number.push_back(ch);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
stream_->Back();
|
||||
// Special case the most common number.
|
||||
if (number.size() == 1 && number[0] == '0') {
|
||||
unsigned_value_ = 0;
|
||||
token_ = kUnsigned;
|
||||
return;
|
||||
}
|
||||
// Pick out dot.
|
||||
if (number.size() == 1 && number[0] == '.') {
|
||||
token_ = '.';
|
||||
return;
|
||||
}
|
||||
// Decode numbers.
|
||||
UnicodeCache cache;
|
||||
double_value_ = StringToDouble(
|
||||
&cache,
|
||||
Vector<uint8_t>(
|
||||
const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(number.data())),
|
||||
static_cast<int>(number.size())),
|
||||
ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY | ALLOW_IMPLICIT_OCTAL);
|
||||
if (std::isnan(double_value_)) {
|
||||
// Check if string to number conversion didn't consume all the characters.
|
||||
// This happens if the character filter let through something invalid
|
||||
// like: 0123ef for example.
|
||||
// TODO(bradnelson): Check if this happens often enough to be a perf
|
||||
// problem.
|
||||
if (number[0] == '.') {
|
||||
for (size_t k = 1; k < number.size(); ++k) {
|
||||
stream_->Back();
|
||||
}
|
||||
token_ = '.';
|
||||
return;
|
||||
}
|
||||
// Anything else that doesn't parse is an error.
|
||||
token_ = kParseError;
|
||||
return;
|
||||
}
|
||||
if (has_dot) {
|
||||
token_ = kDouble;
|
||||
} else {
|
||||
unsigned_value_ = static_cast<uint32_t>(double_value_);
|
||||
token_ = kUnsigned;
|
||||
}
|
||||
}
|
||||
|
||||
bool AsmJsScanner::ConsumeCComment() {
|
||||
for (;;) {
|
||||
uc32 ch = stream_->Advance();
|
||||
while (ch == '*') {
|
||||
ch = stream_->Advance();
|
||||
if (ch == '/') {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if (ch == kEndOfInput) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AsmJsScanner::ConsumeCPPComment() {
|
||||
for (;;) {
|
||||
uc32 ch = stream_->Advance();
|
||||
if (ch == '\n' || ch == kEndOfInput) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AsmJsScanner::ConsumeString(uc32 quote) {
|
||||
// Only string allowed is 'use asm' / "use asm".
|
||||
const char* expected = "use asm";
|
||||
for (; *expected != '\0'; ++expected) {
|
||||
if (stream_->Advance() != *expected) {
|
||||
token_ = kParseError;
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (stream_->Advance() != quote) {
|
||||
token_ = kParseError;
|
||||
return;
|
||||
}
|
||||
token_ = kToken_UseAsm;
|
||||
}
|
||||
|
||||
void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) {
|
||||
uc32 next_ch = stream_->Advance();
|
||||
if (next_ch == '=') {
|
||||
switch (ch) {
|
||||
case '<':
|
||||
token_ = kToken_LE;
|
||||
break;
|
||||
case '>':
|
||||
token_ = kToken_GE;
|
||||
break;
|
||||
case '=':
|
||||
token_ = kToken_EQ;
|
||||
break;
|
||||
case '!':
|
||||
token_ = kToken_NE;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else if (ch == '<' && next_ch == '<') {
|
||||
token_ = kToken_SHL;
|
||||
} else if (ch == '>' && next_ch == '>') {
|
||||
if (stream_->Advance() == '>') {
|
||||
token_ = kToken_SHR;
|
||||
} else {
|
||||
token_ = kToken_SAR;
|
||||
stream_->Back();
|
||||
}
|
||||
} else {
|
||||
stream_->Back();
|
||||
token_ = ch;
|
||||
}
|
||||
}
|
||||
|
||||
bool AsmJsScanner::IsIdentifierStart(uc32 ch) {
|
||||
return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' ||
|
||||
ch == '$';
|
||||
}
|
||||
|
||||
bool AsmJsScanner::IsIdentifierPart(uc32 ch) {
|
||||
return IsIdentifierStart(ch) || (ch >= '0' && ch <= '9');
|
||||
}
|
||||
|
||||
bool AsmJsScanner::IsNumberStart(uc32 ch) {
|
||||
return ch == '.' || (ch >= '0' && ch <= '9');
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
158
src/asmjs/asm-scanner.h
Normal file
158
src/asmjs/asm-scanner.h
Normal file
@ -0,0 +1,158 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef V8_ASMJS_ASM_SCANNER_H_
|
||||
#define V8_ASMJS_ASM_SCANNER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "src/asmjs/asm-names.h"
|
||||
#include "src/base/logging.h"
|
||||
#include "src/globals.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
class Utf16CharacterStream;
|
||||
|
||||
// A custom scanner to extract the token stream needed to parse valid
|
||||
// asm.js: http://asmjs.org/spec/latest/
|
||||
// This scanner intentionally avoids the portion of JavaScript lexing
|
||||
// that are not required to determine if code is valid asm.js code.
|
||||
// * Strings are disallowed except for 'use asm'.
|
||||
// * Only the subset of keywords needed to check asm.js invariants are
|
||||
// included.
|
||||
// * Identifiers are accumulated into local + global string tables
|
||||
// (for performance).
|
||||
class V8_EXPORT_PRIVATE AsmJsScanner {
|
||||
public:
|
||||
typedef int32_t token_t;
|
||||
|
||||
AsmJsScanner();
|
||||
// Pick the stream to parse (must be called before anything else).
|
||||
void SetStream(std::unique_ptr<Utf16CharacterStream> stream);
|
||||
|
||||
// Get current token.
|
||||
token_t Token() const { return token_; }
|
||||
// Advance to the next token.
|
||||
void Next();
|
||||
// Back up by one token.
|
||||
void Rewind();
|
||||
// Get raw string for current identifier.
|
||||
const std::string& GetIdentifierString() const {
|
||||
// Identifier strings don't work after a rewind.
|
||||
DCHECK(!rewind_);
|
||||
return identifier_string_;
|
||||
}
|
||||
// Check if we just passed a newline.
|
||||
bool IsPrecededByNewline() const {
|
||||
// Newline tracking doesn't work if you back up.
|
||||
DCHECK(!rewind_);
|
||||
return preceded_by_newline_;
|
||||
}
|
||||
|
||||
#if DEBUG
|
||||
// Debug only method to go from a token back to its name.
|
||||
// Slow, only use for debugging.
|
||||
std::string Name(token_t token) const;
|
||||
#endif
|
||||
|
||||
// Get current position (to use with Seek).
|
||||
int GetPosition() const;
|
||||
// Restores old position (token after that position).
|
||||
void Seek(int pos);
|
||||
|
||||
// Select whether identifiers are resolved in global or local scope,
|
||||
// and which scope new identifiers are added to.
|
||||
void EnterLocalScope() { in_local_scope_ = true; }
|
||||
void EnterGlobalScope() { in_local_scope_ = false; }
|
||||
// Drop all current local identifiers.
|
||||
void ResetLocals();
|
||||
|
||||
// Methods to check if a token is an identifier and which scope.
|
||||
bool IsLocal() const { return IsLocal(Token()); }
|
||||
bool IsGlobal() const { return IsGlobal(Token()); }
|
||||
static bool IsLocal(token_t token) { return token <= kLocalsStart; }
|
||||
static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
|
||||
// Methods to find the index position of an identifier (count starting from
|
||||
// 0 for each scope separately).
|
||||
static size_t LocalIndex(token_t token) {
|
||||
DCHECK(IsLocal(token));
|
||||
return -(token - kLocalsStart);
|
||||
}
|
||||
static size_t GlobalIndex(token_t token) {
|
||||
DCHECK(IsGlobal(token));
|
||||
return token - kGlobalsStart;
|
||||
}
|
||||
|
||||
// Methods to check if the current token is an asm.js "number" (contains a
|
||||
// dot) or an "unsigned" (a number without a dot).
|
||||
bool IsUnsigned() const { return Token() == kUnsigned; }
|
||||
uint64_t AsUnsigned() const { return unsigned_value_; }
|
||||
bool IsDouble() const { return Token() == kDouble; }
|
||||
double AsDouble() const { return double_value_; }
|
||||
|
||||
// clang-format off
|
||||
enum {
|
||||
// [-10000 .. -10000-kMaxIdentifierCount) :: Local identifiers
|
||||
// [-10000 .. -1) :: Builtin tokens like keywords
|
||||
// (also includes some special
|
||||
// ones like end of input)
|
||||
// 0 .. 255 :: Single char tokens
|
||||
// 256 .. 256+kMaxIdentifierCount :: Global identifiers
|
||||
kLocalsStart = -10000,
|
||||
#define V(name, _junk1, _junk2, _junk3) kToken_##name,
|
||||
STDLIB_MATH_FUNCTION_LIST(V)
|
||||
STDLIB_ARRAY_TYPE_LIST(V)
|
||||
#undef V
|
||||
#define V(name) kToken_##name,
|
||||
STDLIB_OTHER_LIST(V)
|
||||
STDLIB_MATH_VALUE_LIST(V)
|
||||
KEYWORD_NAME_LIST(V)
|
||||
#undef V
|
||||
#define V(rawname, name) kToken_##name,
|
||||
LONG_SYMBOL_NAME_LIST(V)
|
||||
#undef V
|
||||
#define V(name, value, string_name) name = value,
|
||||
SPECIAL_TOKEN_LIST(V)
|
||||
#undef V
|
||||
kGlobalsStart = 256,
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
private:
|
||||
std::unique_ptr<Utf16CharacterStream> stream_;
|
||||
token_t token_;
|
||||
token_t preceding_token_;
|
||||
token_t next_token_;
|
||||
bool rewind_;
|
||||
std::string identifier_string_;
|
||||
bool in_local_scope_;
|
||||
std::unordered_map<std::string, token_t> local_names_;
|
||||
std::unordered_map<std::string, token_t> global_names_;
|
||||
std::unordered_map<std::string, token_t> property_names_;
|
||||
int global_count_;
|
||||
double double_value_;
|
||||
uint64_t unsigned_value_;
|
||||
bool preceded_by_newline_;
|
||||
|
||||
// Consume multiple characters.
|
||||
void ConsumeIdentifier(uc32 ch);
|
||||
void ConsumeNumber(uc32 ch);
|
||||
bool ConsumeCComment();
|
||||
void ConsumeCPPComment();
|
||||
void ConsumeString(uc32 quote);
|
||||
void ConsumeCompareOrShift(uc32 ch);
|
||||
|
||||
// Classify character categories.
|
||||
bool IsIdentifierStart(uc32 ch);
|
||||
bool IsIdentifierPart(uc32 ch);
|
||||
bool IsNumberStart(uc32 ch);
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
||||
#endif
|
@ -551,6 +551,8 @@ DEFINE_BOOL(validate_asm, false, "validate asm.js modules before compiling")
|
||||
DEFINE_BOOL(suppress_asm_messages, false,
|
||||
"don't emit asm.js related messages (for golden file testing)")
|
||||
DEFINE_BOOL(trace_asm_time, false, "log asm.js timing info to the console")
|
||||
DEFINE_BOOL(trace_asm_scanner, false,
|
||||
"log tokens encountered by asm.js scanner")
|
||||
|
||||
DEFINE_BOOL(dump_wasm_module, false, "dump WASM module bytes")
|
||||
DEFINE_STRING(dump_wasm_module_path, NULL, "directory to dump wasm modules to")
|
||||
|
@ -6,6 +6,7 @@
|
||||
#define V8_PARSING_SCANNER_CHARACTER_STREAMS_H_
|
||||
|
||||
#include "include/v8.h" // for v8::ScriptCompiler
|
||||
#include "src/globals.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
@ -16,7 +17,7 @@ class Utf16CharacterStream;
|
||||
class RuntimeCallStats;
|
||||
class String;
|
||||
|
||||
class ScannerStream {
|
||||
class V8_EXPORT_PRIVATE ScannerStream {
|
||||
public:
|
||||
static Utf16CharacterStream* For(Handle<String> data);
|
||||
static Utf16CharacterStream* For(Handle<String> data, int start_pos,
|
||||
|
@ -1165,6 +1165,18 @@ Token::Value Scanner::ScanTemplateContinuation() {
|
||||
return ScanTemplateSpan();
|
||||
}
|
||||
|
||||
Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
|
||||
Handle<String> tmp;
|
||||
if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
|
||||
Handle<String> tmp;
|
||||
if (source_mapping_url_.length() > 0)
|
||||
tmp = source_mapping_url_.Internalize(isolate);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void Scanner::ScanDecimalDigits() {
|
||||
while (IsDecimalDigit(c0_))
|
||||
|
@ -330,18 +330,8 @@ class Scanner {
|
||||
Token::Value ScanTemplateStart();
|
||||
Token::Value ScanTemplateContinuation();
|
||||
|
||||
Handle<String> SourceUrl(Isolate* isolate) const {
|
||||
Handle<String> tmp;
|
||||
if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
Handle<String> SourceMappingUrl(Isolate* isolate) const {
|
||||
Handle<String> tmp;
|
||||
if (source_mapping_url_.length() > 0)
|
||||
tmp = source_mapping_url_.Internalize(isolate);
|
||||
return tmp;
|
||||
}
|
||||
Handle<String> SourceUrl(Isolate* isolate) const;
|
||||
Handle<String> SourceMappingUrl(Isolate* isolate) const;
|
||||
|
||||
bool FoundHtmlComment() const { return found_html_comment_; }
|
||||
|
||||
|
@ -417,6 +417,9 @@
|
||||
'arguments.h',
|
||||
'asmjs/asm-js.cc',
|
||||
'asmjs/asm-js.h',
|
||||
'asmjs/asm-names.h',
|
||||
'asmjs/asm-scanner.cc',
|
||||
'asmjs/asm-scanner.h',
|
||||
'asmjs/asm-typer.cc',
|
||||
'asmjs/asm-typer.h',
|
||||
'asmjs/asm-types.cc',
|
||||
|
@ -15,6 +15,7 @@ v8_executable("unittests") {
|
||||
"api/isolate-unittest.cc",
|
||||
"api/remote-object-unittest.cc",
|
||||
"api/v8-object-unittest.cc",
|
||||
"asmjs/asm-scanner-unittest.cc",
|
||||
"base/atomic-utils-unittest.cc",
|
||||
"base/bits-unittest.cc",
|
||||
"base/cpu-unittest.cc",
|
||||
|
290
test/unittests/asmjs/asm-scanner-unittest.cc
Normal file
290
test/unittests/asmjs/asm-scanner-unittest.cc
Normal file
@ -0,0 +1,290 @@
|
||||
// Copyright 2017 the V8 project authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "src/asmjs/asm-scanner.h"
|
||||
#include "src/objects.h"
|
||||
#include "src/parsing/scanner-character-streams.h"
|
||||
#include "src/parsing/scanner.h"
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
|
||||
namespace v8 {
|
||||
namespace internal {
|
||||
|
||||
#define TOK(t) AsmJsScanner::kToken_##t
|
||||
|
||||
class AsmJsScannerTest : public ::testing::Test {
|
||||
protected:
|
||||
void SetupSource(const char* source) {
|
||||
scanner.SetStream(ScannerStream::ForTesting(source));
|
||||
}
|
||||
|
||||
void Skip(AsmJsScanner::token_t t) {
|
||||
CHECK_EQ(t, scanner.Token());
|
||||
scanner.Next();
|
||||
}
|
||||
|
||||
void SkipGlobal() {
|
||||
CHECK(scanner.IsGlobal());
|
||||
scanner.Next();
|
||||
}
|
||||
|
||||
void SkipLocal() {
|
||||
CHECK(scanner.IsLocal());
|
||||
scanner.Next();
|
||||
}
|
||||
|
||||
void CheckForEnd() { CHECK(scanner.Token() == AsmJsScanner::kEndOfInput); }
|
||||
|
||||
void CheckForParseError() {
|
||||
CHECK(scanner.Token() == AsmJsScanner::kParseError);
|
||||
}
|
||||
|
||||
AsmJsScanner scanner;
|
||||
};
|
||||
|
||||
TEST_F(AsmJsScannerTest, SimpleFunction) {
|
||||
SetupSource("function foo() { return; }");
|
||||
Skip(TOK(function));
|
||||
DCHECK_EQ("foo", scanner.GetIdentifierString());
|
||||
SkipGlobal();
|
||||
Skip('(');
|
||||
Skip(')');
|
||||
Skip('{');
|
||||
// clang-format off
|
||||
Skip(TOK(return));
|
||||
// clang-format on
|
||||
Skip(';');
|
||||
Skip('}');
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, JSKeywords) {
|
||||
SetupSource(
|
||||
"arguments break case const continue\n"
|
||||
"default do else eval for function\n"
|
||||
"if new return switch var while\n");
|
||||
Skip(TOK(arguments));
|
||||
Skip(TOK(break));
|
||||
Skip(TOK(case));
|
||||
Skip(TOK(const));
|
||||
Skip(TOK(continue));
|
||||
Skip(TOK(default));
|
||||
Skip(TOK(do));
|
||||
Skip(TOK(else));
|
||||
Skip(TOK(eval));
|
||||
Skip(TOK(for));
|
||||
Skip(TOK(function));
|
||||
Skip(TOK(if));
|
||||
Skip(TOK(new));
|
||||
// clang-format off
|
||||
Skip(TOK(return));
|
||||
// clang-format on
|
||||
Skip(TOK(switch));
|
||||
Skip(TOK(var));
|
||||
Skip(TOK(while));
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, JSOperatorsSpread) {
|
||||
SetupSource(
|
||||
"+ - * / % & | ^ ~ << >> >>>\n"
|
||||
"< > <= >= == !=\n");
|
||||
Skip('+');
|
||||
Skip('-');
|
||||
Skip('*');
|
||||
Skip('/');
|
||||
Skip('%');
|
||||
Skip('&');
|
||||
Skip('|');
|
||||
Skip('^');
|
||||
Skip('~');
|
||||
Skip(TOK(SHL));
|
||||
Skip(TOK(SAR));
|
||||
Skip(TOK(SHR));
|
||||
Skip('<');
|
||||
Skip('>');
|
||||
Skip(TOK(LE));
|
||||
Skip(TOK(GE));
|
||||
Skip(TOK(EQ));
|
||||
Skip(TOK(NE));
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, JSOperatorsTight) {
|
||||
SetupSource(
|
||||
"+-*/%&|^~<<>> >>>\n"
|
||||
"<><=>= ==!=\n");
|
||||
Skip('+');
|
||||
Skip('-');
|
||||
Skip('*');
|
||||
Skip('/');
|
||||
Skip('%');
|
||||
Skip('&');
|
||||
Skip('|');
|
||||
Skip('^');
|
||||
Skip('~');
|
||||
Skip(TOK(SHL));
|
||||
Skip(TOK(SAR));
|
||||
Skip(TOK(SHR));
|
||||
Skip('<');
|
||||
Skip('>');
|
||||
Skip(TOK(LE));
|
||||
Skip(TOK(GE));
|
||||
Skip(TOK(EQ));
|
||||
Skip(TOK(NE));
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, UsesOfAsm) {
|
||||
SetupSource("'use asm' \"use asm\"\n");
|
||||
Skip(TOK(UseAsm));
|
||||
Skip(TOK(UseAsm));
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, DefaultGlobalScope) {
|
||||
SetupSource("var x = x + x;");
|
||||
Skip(TOK(var));
|
||||
CHECK_EQ("x", scanner.GetIdentifierString());
|
||||
AsmJsScanner::token_t x = scanner.Token();
|
||||
SkipGlobal();
|
||||
Skip('=');
|
||||
Skip(x);
|
||||
Skip('+');
|
||||
Skip(x);
|
||||
Skip(';');
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, GlobalScope) {
|
||||
SetupSource("var x = x + x;");
|
||||
scanner.EnterGlobalScope();
|
||||
Skip(TOK(var));
|
||||
CHECK_EQ("x", scanner.GetIdentifierString());
|
||||
AsmJsScanner::token_t x = scanner.Token();
|
||||
SkipGlobal();
|
||||
Skip('=');
|
||||
Skip(x);
|
||||
Skip('+');
|
||||
Skip(x);
|
||||
Skip(';');
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, LocalScope) {
|
||||
SetupSource("var x = x + x;");
|
||||
scanner.EnterLocalScope();
|
||||
Skip(TOK(var));
|
||||
CHECK_EQ("x", scanner.GetIdentifierString());
|
||||
AsmJsScanner::token_t x = scanner.Token();
|
||||
SkipLocal();
|
||||
Skip('=');
|
||||
Skip(x);
|
||||
Skip('+');
|
||||
Skip(x);
|
||||
Skip(';');
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, Numbers) {
|
||||
SetupSource("1 1.2 0x1f 1.e3");
|
||||
|
||||
CHECK(scanner.IsUnsigned());
|
||||
CHECK_EQ(1, scanner.AsUnsigned());
|
||||
scanner.Next();
|
||||
|
||||
CHECK(scanner.IsDouble());
|
||||
CHECK_EQ(1.2, scanner.AsDouble());
|
||||
scanner.Next();
|
||||
|
||||
CHECK(scanner.IsUnsigned());
|
||||
CHECK_EQ(31, scanner.AsUnsigned());
|
||||
scanner.Next();
|
||||
|
||||
CHECK(scanner.IsDouble());
|
||||
CHECK_EQ(1.0e3, scanner.AsDouble());
|
||||
scanner.Next();
|
||||
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, BadNumber) {
|
||||
SetupSource(".123fe");
|
||||
Skip('.');
|
||||
CheckForParseError();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, Rewind1) {
|
||||
SetupSource("+ - * /");
|
||||
Skip('+');
|
||||
scanner.Rewind();
|
||||
Skip('+');
|
||||
Skip('-');
|
||||
scanner.Rewind();
|
||||
Skip('-');
|
||||
Skip('*');
|
||||
scanner.Rewind();
|
||||
Skip('*');
|
||||
Skip('/');
|
||||
scanner.Rewind();
|
||||
Skip('/');
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, Comments) {
|
||||
SetupSource(
|
||||
"var // This is a test /* */ eval\n"
|
||||
"var /* test *** test */ eval\n"
|
||||
"function /* this */ ^");
|
||||
Skip(TOK(var));
|
||||
Skip(TOK(var));
|
||||
Skip(TOK(eval));
|
||||
Skip(TOK(function));
|
||||
Skip('^');
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, TrailingCComment) {
|
||||
SetupSource("var /* test\n");
|
||||
Skip(TOK(var));
|
||||
CheckForParseError();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, Seeking) {
|
||||
SetupSource("var eval do arguments function break\n");
|
||||
Skip(TOK(var));
|
||||
int old_pos = scanner.GetPosition();
|
||||
Skip(TOK(eval));
|
||||
Skip(TOK(do));
|
||||
Skip(TOK(arguments));
|
||||
scanner.Rewind();
|
||||
Skip(TOK(arguments));
|
||||
scanner.Rewind();
|
||||
scanner.Seek(old_pos);
|
||||
Skip(TOK(do));
|
||||
Skip(TOK(arguments));
|
||||
Skip(TOK(function));
|
||||
Skip(TOK(break));
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
TEST_F(AsmJsScannerTest, Newlines) {
|
||||
SetupSource(
|
||||
"var x = 1\n"
|
||||
"var y = 2\n");
|
||||
Skip(TOK(var));
|
||||
scanner.Next();
|
||||
Skip('=');
|
||||
scanner.Next();
|
||||
CHECK(scanner.IsPrecededByNewline());
|
||||
Skip(TOK(var));
|
||||
scanner.Next();
|
||||
Skip('=');
|
||||
scanner.Next();
|
||||
CHECK(scanner.IsPrecededByNewline());
|
||||
CheckForEnd();
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
} // namespace v8
|
@ -13,6 +13,7 @@
|
||||
'api/isolate-unittest.cc',
|
||||
'api/remote-object-unittest.cc',
|
||||
'api/v8-object-unittest.cc',
|
||||
'asmjs/asm-scanner-unittest.cc',
|
||||
'base/atomic-utils-unittest.cc',
|
||||
'base/bits-unittest.cc',
|
||||
'base/cpu-unittest.cc',
|
||||
|
Loading…
Reference in New Issue
Block a user