diff --git a/BUILD.gn b/BUILD.gn index ffc15f9c56..568ab50f34 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -968,6 +968,9 @@ v8_source_set("v8_base") { "src/arguments.h", "src/asmjs/asm-js.cc", "src/asmjs/asm-js.h", + "src/asmjs/asm-names.h", + "src/asmjs/asm-scanner.cc", + "src/asmjs/asm-scanner.h", "src/asmjs/asm-typer.cc", "src/asmjs/asm-typer.h", "src/asmjs/asm-types.cc", diff --git a/src/asmjs/asm-names.h b/src/asmjs/asm-names.h new file mode 100644 index 0000000000..1cc151dc4c --- /dev/null +++ b/src/asmjs/asm-names.h @@ -0,0 +1,110 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_ASMJS_ASM_NAMES_H_ +#define V8_ASMJS_ASM_NAMES_H_ + +#define STDLIB_MATH_VALUE_LIST(V) \ + V(E) \ + V(LN10) \ + V(LN2) \ + V(LOG2E) \ + V(LOG10E) \ + V(PI) \ + V(SQRT1_2) \ + V(SQRT2) + +// V(stdlib.Math., Name, wasm-opcode, asm-js-type) +#define STDLIB_MATH_FUNCTION_MONOMORPHIC_LIST(V) \ + V(acos, Acos, kExprF64Acos, dq2d) \ + V(asin, Asin, kExprF64Asin, dq2d) \ + V(atan, Atan, kExprF64Atan, dq2d) \ + V(cos, Cos, kExprF64Cos, dq2d) \ + V(sin, Sin, kExprF64Sin, dq2d) \ + V(tan, Tan, kExprF64Tan, dq2d) \ + V(exp, Exp, kExprF64Exp, dq2d) \ + V(log, Log, kExprF64Log, dq2d) \ + V(atan2, Atan2, kExprF64Atan2, dqdq2d) \ + V(pow, Pow, kExprF64Pow, dqdq2d) \ + V(imul, Imul, kExprI32Mul, ii2s) \ + V(clz32, Clz32, kExprI32Clz, i2s) + +// V(stdlib.Math., Name, unused, asm-js-type) +#define STDLIB_MATH_FUNCTION_CEIL_LIKE_LIST(V) \ + V(ceil, Ceil, x, ceil_like) \ + V(floor, Floor, x, ceil_like) \ + V(sqrt, Sqrt, x, ceil_like) + +// V(stdlib.Math., Name, unused, asm-js-type) +#define STDLIB_MATH_FUNCTION_LIST(V) \ + V(min, Min, x, minmax) \ + V(max, Max, x, minmax) \ + V(abs, Abs, x, abs) \ + V(fround, Fround, x, fround) \ + STDLIB_MATH_FUNCTION_MONOMORPHIC_LIST(V) \ + STDLIB_MATH_FUNCTION_CEIL_LIKE_LIST(V) + +// V(stdlib., wasm-load-type, wasm-store-type, wasm-type) +#define STDLIB_ARRAY_TYPE_LIST(V) \ + V(Int8Array, Mem8S, Mem8, I32) \ + V(Uint8Array, Mem8U, Mem8, I32) \ + V(Int16Array, Mem16S, Mem16, I32) \ + V(Uint16Array, Mem16U, Mem16, I32) \ + V(Int32Array, Mem, Mem, I32) \ + V(Uint32Array, Mem, Mem, I32) \ + V(Float32Array, Mem, Mem, F32) \ + V(Float64Array, Mem, Mem, F64) + +#define STDLIB_OTHER_LIST(V) \ + V(Infinity) \ + V(NaN) \ + V(Math) + +// clang-format off (for return) +#define KEYWORD_NAME_LIST(V) \ + V(arguments) \ + V(break) \ + V(case) \ + V(const) \ + V(continue) \ + V(default) \ + V(do) \ + V(else) \ + V(eval) \ + V(for) \ + V(function) \ + V(if) \ + V(new) \ + V(return ) \ + V(switch) \ + V(var) \ + V(while) +// clang-format on + +// V(token-string, token-name) +#define LONG_SYMBOL_NAME_LIST(V) \ + V("<=", LE) \ + V(">=", GE) \ + V("==", EQ) \ + V("!=", NE) \ + V("<<", SHL) \ + V(">>", SAR) \ + V(">>>", SHR) \ + V("'use asm'", UseAsm) + +// clang-format off +#define SIMPLE_SINGLE_TOKEN_LIST(V) \ + V('+') V('-') V('*') V('%') V('~') V('^') V('&') V('|') V('(') V(')') \ + V('[') V(']') V('{') V('}') V(':') V(';') V(',') V('?') +// clang-format on + +// V(name, value, string-name) +#define SPECIAL_TOKEN_LIST(V) \ + V(kUninitialized, 0, "{uninitalized}") \ + V(kEndOfInput, -1, "{end of input}") \ + V(kParseError, -2, "{parse error}") \ + V(kUnsigned, -3, "{unsigned value}") \ + V(kDouble, -4, "{double value}") + +#endif diff --git a/src/asmjs/asm-scanner.cc b/src/asmjs/asm-scanner.cc new file mode 100644 index 0000000000..949b44a65f --- /dev/null +++ b/src/asmjs/asm-scanner.cc @@ -0,0 +1,413 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/asmjs/asm-scanner.h" + +#include "src/conversions.h" +#include "src/flags.h" +#include "src/parsing/scanner.h" +#include "src/unicode-cache.h" + +namespace v8 { +namespace internal { + +namespace { +// Cap number of identifiers to ensure we can assign both global and +// local ones a token id in the range of an int32_t. +static const int kMaxIdentifierCount = 0xf000000; +}; + +AsmJsScanner::AsmJsScanner() + : token_(kUninitialized), + preceding_token_(kUninitialized), + next_token_(kUninitialized), + rewind_(false), + in_local_scope_(false), + global_count_(0), + double_value_(0.0), + unsigned_value_(0), + preceded_by_newline_(false) { +#define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name; + STDLIB_MATH_FUNCTION_LIST(V) + STDLIB_ARRAY_TYPE_LIST(V) +#undef V +#define V(name) property_names_[#name] = kToken_##name; + STDLIB_MATH_VALUE_LIST(V) + STDLIB_OTHER_LIST(V) +#undef V +#define V(name) global_names_[#name] = kToken_##name; + KEYWORD_NAME_LIST(V) +#undef V +} + +void AsmJsScanner::SetStream(std::unique_ptr stream) { + stream_ = std::move(stream); + Next(); +} + +void AsmJsScanner::Next() { + if (rewind_) { + preceding_token_ = token_; + token_ = next_token_; + next_token_ = kUninitialized; + rewind_ = false; + return; + } + + if (token_ == kEndOfInput || token_ == kParseError) { + return; + } + +#if DEBUG + if (FLAG_trace_asm_scanner) { + if (Token() == kDouble) { + PrintF("%lf ", AsDouble()); + } else if (Token() == kUnsigned) { + PrintF("%" PRIu64 " ", AsUnsigned()); + } else { + std::string name = Name(Token()); + PrintF("%s ", name.c_str()); + } + } +#endif + + preceded_by_newline_ = false; + preceding_token_ = token_; + for (;;) { + uc32 ch = stream_->Advance(); + switch (ch) { + case ' ': + case '\t': + case '\r': + // Ignore whitespace. + break; + + case '\n': + // Track when we've passed a newline for optional semicolon support, + // but keep scanning. + preceded_by_newline_ = true; + break; + + case kEndOfInput: + token_ = kEndOfInput; + return; + + case '\'': + case '"': + ConsumeString(ch); + return; + + case '/': + ch = stream_->Advance(); + if (ch == '/') { + ConsumeCPPComment(); + } else if (ch == '*') { + if (!ConsumeCComment()) { + token_ = kParseError; + return; + } + } else { + stream_->Back(); + token_ = '/'; + return; + } + // Breaks out of switch, but loops again (i.e. the case when we parsed + // a comment, but need to continue to look for the next token). + break; + + case '<': + case '>': + case '=': + case '!': + ConsumeCompareOrShift(ch); + return; + +#define V(single_char_token) case single_char_token: + SIMPLE_SINGLE_TOKEN_LIST(V) +#undef V + // Use fixed token IDs for ASCII. + token_ = ch; + return; + + default: + if (IsIdentifierStart(ch)) { + ConsumeIdentifier(ch); + } else if (IsNumberStart(ch)) { + ConsumeNumber(ch); + } else { + // TODO(bradnelson): Support unicode (probably via UnicodeCache). + token_ = kParseError; + } + return; + } + } +} + +void AsmJsScanner::Rewind() { + DCHECK(!rewind_); + next_token_ = token_; + token_ = preceding_token_; + preceding_token_ = kUninitialized; + rewind_ = true; + preceded_by_newline_ = false; + identifier_string_.clear(); +} + +void AsmJsScanner::ResetLocals() { local_names_.clear(); } + +#if DEBUG +// Only used for debugging. +std::string AsmJsScanner::Name(token_t token) const { + if (token >= 32 && token < 127) { + return std::string(1, static_cast(token)); + } + for (auto& i : local_names_) { + if (i.second == token) { + return i.first; + } + } + for (auto& i : global_names_) { + if (i.second == token) { + return i.first; + } + } + for (auto& i : property_names_) { + if (i.second == token) { + return i.first; + } + } + switch (token) { +#define V(rawname, name) \ + case kToken_##name: \ + return rawname; + LONG_SYMBOL_NAME_LIST(V) +#undef V +#define V(name, value, string_name) \ + case name: \ + return string_name; + SPECIAL_TOKEN_LIST(V) + default: + break; + } + UNREACHABLE(); + return "{unreachable}"; +} +#endif + +int AsmJsScanner::GetPosition() const { + DCHECK(!rewind_); + return static_cast(stream_->pos()); +} + +void AsmJsScanner::Seek(int pos) { + stream_->Seek(pos); + preceding_token_ = kUninitialized; + token_ = kUninitialized; + next_token_ = kUninitialized; + rewind_ = false; + Next(); +} + +void AsmJsScanner::ConsumeIdentifier(uc32 ch) { + // Consume characters while still part of the identifier. + identifier_string_.clear(); + while (IsIdentifierPart(ch)) { + identifier_string_ += ch; + ch = stream_->Advance(); + } + // Go back one for next time. + stream_->Back(); + + // Decode what the identifier means. + if (preceding_token_ == '.') { + auto i = property_names_.find(identifier_string_); + if (i != property_names_.end()) { + token_ = i->second; + return; + } + } else { + { + auto i = local_names_.find(identifier_string_); + if (i != local_names_.end()) { + token_ = i->second; + return; + } + } + if (!in_local_scope_) { + auto i = global_names_.find(identifier_string_); + if (i != global_names_.end()) { + token_ = i->second; + return; + } + } + } + if (preceding_token_ == '.') { + CHECK(global_count_ < kMaxIdentifierCount); + token_ = kGlobalsStart + global_count_++; + property_names_[identifier_string_] = token_; + } else if (in_local_scope_) { + CHECK(local_names_.size() < kMaxIdentifierCount); + token_ = kLocalsStart - static_cast(local_names_.size()); + local_names_[identifier_string_] = token_; + } else { + CHECK(global_count_ < kMaxIdentifierCount); + token_ = kGlobalsStart + global_count_++; + global_names_[identifier_string_] = token_; + } +} + +void AsmJsScanner::ConsumeNumber(uc32 ch) { + std::string number; + number = ch; + bool has_dot = ch == '.'; + for (;;) { + ch = stream_->Advance(); + if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') || + (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' || + ch == 'x' || + ((ch == '-' || ch == '+') && (number[number.size() - 1] == 'e' || + number[number.size() - 1] == 'E'))) { + // TODO(bradnelson): Test weird cases ending in -. + if (ch == '.') { + has_dot = true; + } + number.push_back(ch); + } else { + break; + } + } + stream_->Back(); + // Special case the most common number. + if (number.size() == 1 && number[0] == '0') { + unsigned_value_ = 0; + token_ = kUnsigned; + return; + } + // Pick out dot. + if (number.size() == 1 && number[0] == '.') { + token_ = '.'; + return; + } + // Decode numbers. + UnicodeCache cache; + double_value_ = StringToDouble( + &cache, + Vector( + const_cast(reinterpret_cast(number.data())), + static_cast(number.size())), + ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY | ALLOW_IMPLICIT_OCTAL); + if (std::isnan(double_value_)) { + // Check if string to number conversion didn't consume all the characters. + // This happens if the character filter let through something invalid + // like: 0123ef for example. + // TODO(bradnelson): Check if this happens often enough to be a perf + // problem. + if (number[0] == '.') { + for (size_t k = 1; k < number.size(); ++k) { + stream_->Back(); + } + token_ = '.'; + return; + } + // Anything else that doesn't parse is an error. + token_ = kParseError; + return; + } + if (has_dot) { + token_ = kDouble; + } else { + unsigned_value_ = static_cast(double_value_); + token_ = kUnsigned; + } +} + +bool AsmJsScanner::ConsumeCComment() { + for (;;) { + uc32 ch = stream_->Advance(); + while (ch == '*') { + ch = stream_->Advance(); + if (ch == '/') { + return true; + } + } + if (ch == kEndOfInput) { + return false; + } + } +} + +void AsmJsScanner::ConsumeCPPComment() { + for (;;) { + uc32 ch = stream_->Advance(); + if (ch == '\n' || ch == kEndOfInput) { + return; + } + } +} + +void AsmJsScanner::ConsumeString(uc32 quote) { + // Only string allowed is 'use asm' / "use asm". + const char* expected = "use asm"; + for (; *expected != '\0'; ++expected) { + if (stream_->Advance() != *expected) { + token_ = kParseError; + return; + } + } + if (stream_->Advance() != quote) { + token_ = kParseError; + return; + } + token_ = kToken_UseAsm; +} + +void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) { + uc32 next_ch = stream_->Advance(); + if (next_ch == '=') { + switch (ch) { + case '<': + token_ = kToken_LE; + break; + case '>': + token_ = kToken_GE; + break; + case '=': + token_ = kToken_EQ; + break; + case '!': + token_ = kToken_NE; + break; + default: + UNREACHABLE(); + } + } else if (ch == '<' && next_ch == '<') { + token_ = kToken_SHL; + } else if (ch == '>' && next_ch == '>') { + if (stream_->Advance() == '>') { + token_ = kToken_SHR; + } else { + token_ = kToken_SAR; + stream_->Back(); + } + } else { + stream_->Back(); + token_ = ch; + } +} + +bool AsmJsScanner::IsIdentifierStart(uc32 ch) { + return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || + ch == '$'; +} + +bool AsmJsScanner::IsIdentifierPart(uc32 ch) { + return IsIdentifierStart(ch) || (ch >= '0' && ch <= '9'); +} + +bool AsmJsScanner::IsNumberStart(uc32 ch) { + return ch == '.' || (ch >= '0' && ch <= '9'); +} + +} // namespace internal +} // namespace v8 diff --git a/src/asmjs/asm-scanner.h b/src/asmjs/asm-scanner.h new file mode 100644 index 0000000000..1e4b9f3d3f --- /dev/null +++ b/src/asmjs/asm-scanner.h @@ -0,0 +1,158 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_ASMJS_ASM_SCANNER_H_ +#define V8_ASMJS_ASM_SCANNER_H_ + +#include +#include +#include + +#include "src/asmjs/asm-names.h" +#include "src/base/logging.h" +#include "src/globals.h" + +namespace v8 { +namespace internal { + +class Utf16CharacterStream; + +// A custom scanner to extract the token stream needed to parse valid +// asm.js: http://asmjs.org/spec/latest/ +// This scanner intentionally avoids the portion of JavaScript lexing +// that are not required to determine if code is valid asm.js code. +// * Strings are disallowed except for 'use asm'. +// * Only the subset of keywords needed to check asm.js invariants are +// included. +// * Identifiers are accumulated into local + global string tables +// (for performance). +class V8_EXPORT_PRIVATE AsmJsScanner { + public: + typedef int32_t token_t; + + AsmJsScanner(); + // Pick the stream to parse (must be called before anything else). + void SetStream(std::unique_ptr stream); + + // Get current token. + token_t Token() const { return token_; } + // Advance to the next token. + void Next(); + // Back up by one token. + void Rewind(); + // Get raw string for current identifier. + const std::string& GetIdentifierString() const { + // Identifier strings don't work after a rewind. + DCHECK(!rewind_); + return identifier_string_; + } + // Check if we just passed a newline. + bool IsPrecededByNewline() const { + // Newline tracking doesn't work if you back up. + DCHECK(!rewind_); + return preceded_by_newline_; + } + +#if DEBUG + // Debug only method to go from a token back to its name. + // Slow, only use for debugging. + std::string Name(token_t token) const; +#endif + + // Get current position (to use with Seek). + int GetPosition() const; + // Restores old position (token after that position). + void Seek(int pos); + + // Select whether identifiers are resolved in global or local scope, + // and which scope new identifiers are added to. + void EnterLocalScope() { in_local_scope_ = true; } + void EnterGlobalScope() { in_local_scope_ = false; } + // Drop all current local identifiers. + void ResetLocals(); + + // Methods to check if a token is an identifier and which scope. + bool IsLocal() const { return IsLocal(Token()); } + bool IsGlobal() const { return IsGlobal(Token()); } + static bool IsLocal(token_t token) { return token <= kLocalsStart; } + static bool IsGlobal(token_t token) { return token >= kGlobalsStart; } + // Methods to find the index position of an identifier (count starting from + // 0 for each scope separately). + static size_t LocalIndex(token_t token) { + DCHECK(IsLocal(token)); + return -(token - kLocalsStart); + } + static size_t GlobalIndex(token_t token) { + DCHECK(IsGlobal(token)); + return token - kGlobalsStart; + } + + // Methods to check if the current token is an asm.js "number" (contains a + // dot) or an "unsigned" (a number without a dot). + bool IsUnsigned() const { return Token() == kUnsigned; } + uint64_t AsUnsigned() const { return unsigned_value_; } + bool IsDouble() const { return Token() == kDouble; } + double AsDouble() const { return double_value_; } + + // clang-format off + enum { + // [-10000 .. -10000-kMaxIdentifierCount) :: Local identifiers + // [-10000 .. -1) :: Builtin tokens like keywords + // (also includes some special + // ones like end of input) + // 0 .. 255 :: Single char tokens + // 256 .. 256+kMaxIdentifierCount :: Global identifiers + kLocalsStart = -10000, +#define V(name, _junk1, _junk2, _junk3) kToken_##name, + STDLIB_MATH_FUNCTION_LIST(V) + STDLIB_ARRAY_TYPE_LIST(V) +#undef V +#define V(name) kToken_##name, + STDLIB_OTHER_LIST(V) + STDLIB_MATH_VALUE_LIST(V) + KEYWORD_NAME_LIST(V) +#undef V +#define V(rawname, name) kToken_##name, + LONG_SYMBOL_NAME_LIST(V) +#undef V +#define V(name, value, string_name) name = value, + SPECIAL_TOKEN_LIST(V) +#undef V + kGlobalsStart = 256, + }; + // clang-format on + + private: + std::unique_ptr stream_; + token_t token_; + token_t preceding_token_; + token_t next_token_; + bool rewind_; + std::string identifier_string_; + bool in_local_scope_; + std::unordered_map local_names_; + std::unordered_map global_names_; + std::unordered_map property_names_; + int global_count_; + double double_value_; + uint64_t unsigned_value_; + bool preceded_by_newline_; + + // Consume multiple characters. + void ConsumeIdentifier(uc32 ch); + void ConsumeNumber(uc32 ch); + bool ConsumeCComment(); + void ConsumeCPPComment(); + void ConsumeString(uc32 quote); + void ConsumeCompareOrShift(uc32 ch); + + // Classify character categories. + bool IsIdentifierStart(uc32 ch); + bool IsIdentifierPart(uc32 ch); + bool IsNumberStart(uc32 ch); +}; + +} // namespace internal +} // namespace v8 +#endif diff --git a/src/flag-definitions.h b/src/flag-definitions.h index 9c07f97c2a..17cd8f6e37 100644 --- a/src/flag-definitions.h +++ b/src/flag-definitions.h @@ -551,6 +551,8 @@ DEFINE_BOOL(validate_asm, false, "validate asm.js modules before compiling") DEFINE_BOOL(suppress_asm_messages, false, "don't emit asm.js related messages (for golden file testing)") DEFINE_BOOL(trace_asm_time, false, "log asm.js timing info to the console") +DEFINE_BOOL(trace_asm_scanner, false, + "log tokens encountered by asm.js scanner") DEFINE_BOOL(dump_wasm_module, false, "dump WASM module bytes") DEFINE_STRING(dump_wasm_module_path, NULL, "directory to dump wasm modules to") diff --git a/src/parsing/scanner-character-streams.h b/src/parsing/scanner-character-streams.h index 291765cee4..9f7d2bd5fb 100644 --- a/src/parsing/scanner-character-streams.h +++ b/src/parsing/scanner-character-streams.h @@ -6,6 +6,7 @@ #define V8_PARSING_SCANNER_CHARACTER_STREAMS_H_ #include "include/v8.h" // for v8::ScriptCompiler +#include "src/globals.h" namespace v8 { namespace internal { @@ -16,7 +17,7 @@ class Utf16CharacterStream; class RuntimeCallStats; class String; -class ScannerStream { +class V8_EXPORT_PRIVATE ScannerStream { public: static Utf16CharacterStream* For(Handle data); static Utf16CharacterStream* For(Handle data, int start_pos, diff --git a/src/parsing/scanner.cc b/src/parsing/scanner.cc index ece66f2a6d..507570c962 100644 --- a/src/parsing/scanner.cc +++ b/src/parsing/scanner.cc @@ -1165,6 +1165,18 @@ Token::Value Scanner::ScanTemplateContinuation() { return ScanTemplateSpan(); } +Handle Scanner::SourceUrl(Isolate* isolate) const { + Handle tmp; + if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate); + return tmp; +} + +Handle Scanner::SourceMappingUrl(Isolate* isolate) const { + Handle tmp; + if (source_mapping_url_.length() > 0) + tmp = source_mapping_url_.Internalize(isolate); + return tmp; +} void Scanner::ScanDecimalDigits() { while (IsDecimalDigit(c0_)) diff --git a/src/parsing/scanner.h b/src/parsing/scanner.h index 5c8e8233fb..417bd9de1a 100644 --- a/src/parsing/scanner.h +++ b/src/parsing/scanner.h @@ -330,18 +330,8 @@ class Scanner { Token::Value ScanTemplateStart(); Token::Value ScanTemplateContinuation(); - Handle SourceUrl(Isolate* isolate) const { - Handle tmp; - if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate); - return tmp; - } - - Handle SourceMappingUrl(Isolate* isolate) const { - Handle tmp; - if (source_mapping_url_.length() > 0) - tmp = source_mapping_url_.Internalize(isolate); - return tmp; - } + Handle SourceUrl(Isolate* isolate) const; + Handle SourceMappingUrl(Isolate* isolate) const; bool FoundHtmlComment() const { return found_html_comment_; } diff --git a/src/v8.gyp b/src/v8.gyp index 80b5c24ce1..f2d335f73f 100644 --- a/src/v8.gyp +++ b/src/v8.gyp @@ -417,6 +417,9 @@ 'arguments.h', 'asmjs/asm-js.cc', 'asmjs/asm-js.h', + 'asmjs/asm-names.h', + 'asmjs/asm-scanner.cc', + 'asmjs/asm-scanner.h', 'asmjs/asm-typer.cc', 'asmjs/asm-typer.h', 'asmjs/asm-types.cc', diff --git a/test/unittests/BUILD.gn b/test/unittests/BUILD.gn index 06477ed45f..7c6117e565 100644 --- a/test/unittests/BUILD.gn +++ b/test/unittests/BUILD.gn @@ -15,6 +15,7 @@ v8_executable("unittests") { "api/isolate-unittest.cc", "api/remote-object-unittest.cc", "api/v8-object-unittest.cc", + "asmjs/asm-scanner-unittest.cc", "base/atomic-utils-unittest.cc", "base/bits-unittest.cc", "base/cpu-unittest.cc", diff --git a/test/unittests/asmjs/asm-scanner-unittest.cc b/test/unittests/asmjs/asm-scanner-unittest.cc new file mode 100644 index 0000000000..9924244ec7 --- /dev/null +++ b/test/unittests/asmjs/asm-scanner-unittest.cc @@ -0,0 +1,290 @@ +// Copyright 2017 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/asmjs/asm-scanner.h" +#include "src/objects.h" +#include "src/parsing/scanner-character-streams.h" +#include "src/parsing/scanner.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace v8 { +namespace internal { + +#define TOK(t) AsmJsScanner::kToken_##t + +class AsmJsScannerTest : public ::testing::Test { + protected: + void SetupSource(const char* source) { + scanner.SetStream(ScannerStream::ForTesting(source)); + } + + void Skip(AsmJsScanner::token_t t) { + CHECK_EQ(t, scanner.Token()); + scanner.Next(); + } + + void SkipGlobal() { + CHECK(scanner.IsGlobal()); + scanner.Next(); + } + + void SkipLocal() { + CHECK(scanner.IsLocal()); + scanner.Next(); + } + + void CheckForEnd() { CHECK(scanner.Token() == AsmJsScanner::kEndOfInput); } + + void CheckForParseError() { + CHECK(scanner.Token() == AsmJsScanner::kParseError); + } + + AsmJsScanner scanner; +}; + +TEST_F(AsmJsScannerTest, SimpleFunction) { + SetupSource("function foo() { return; }"); + Skip(TOK(function)); + DCHECK_EQ("foo", scanner.GetIdentifierString()); + SkipGlobal(); + Skip('('); + Skip(')'); + Skip('{'); + // clang-format off + Skip(TOK(return)); + // clang-format on + Skip(';'); + Skip('}'); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, JSKeywords) { + SetupSource( + "arguments break case const continue\n" + "default do else eval for function\n" + "if new return switch var while\n"); + Skip(TOK(arguments)); + Skip(TOK(break)); + Skip(TOK(case)); + Skip(TOK(const)); + Skip(TOK(continue)); + Skip(TOK(default)); + Skip(TOK(do)); + Skip(TOK(else)); + Skip(TOK(eval)); + Skip(TOK(for)); + Skip(TOK(function)); + Skip(TOK(if)); + Skip(TOK(new)); + // clang-format off + Skip(TOK(return)); + // clang-format on + Skip(TOK(switch)); + Skip(TOK(var)); + Skip(TOK(while)); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, JSOperatorsSpread) { + SetupSource( + "+ - * / % & | ^ ~ << >> >>>\n" + "< > <= >= == !=\n"); + Skip('+'); + Skip('-'); + Skip('*'); + Skip('/'); + Skip('%'); + Skip('&'); + Skip('|'); + Skip('^'); + Skip('~'); + Skip(TOK(SHL)); + Skip(TOK(SAR)); + Skip(TOK(SHR)); + Skip('<'); + Skip('>'); + Skip(TOK(LE)); + Skip(TOK(GE)); + Skip(TOK(EQ)); + Skip(TOK(NE)); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, JSOperatorsTight) { + SetupSource( + "+-*/%&|^~<<>> >>>\n" + "<><=>= ==!=\n"); + Skip('+'); + Skip('-'); + Skip('*'); + Skip('/'); + Skip('%'); + Skip('&'); + Skip('|'); + Skip('^'); + Skip('~'); + Skip(TOK(SHL)); + Skip(TOK(SAR)); + Skip(TOK(SHR)); + Skip('<'); + Skip('>'); + Skip(TOK(LE)); + Skip(TOK(GE)); + Skip(TOK(EQ)); + Skip(TOK(NE)); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, UsesOfAsm) { + SetupSource("'use asm' \"use asm\"\n"); + Skip(TOK(UseAsm)); + Skip(TOK(UseAsm)); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, DefaultGlobalScope) { + SetupSource("var x = x + x;"); + Skip(TOK(var)); + CHECK_EQ("x", scanner.GetIdentifierString()); + AsmJsScanner::token_t x = scanner.Token(); + SkipGlobal(); + Skip('='); + Skip(x); + Skip('+'); + Skip(x); + Skip(';'); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, GlobalScope) { + SetupSource("var x = x + x;"); + scanner.EnterGlobalScope(); + Skip(TOK(var)); + CHECK_EQ("x", scanner.GetIdentifierString()); + AsmJsScanner::token_t x = scanner.Token(); + SkipGlobal(); + Skip('='); + Skip(x); + Skip('+'); + Skip(x); + Skip(';'); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, LocalScope) { + SetupSource("var x = x + x;"); + scanner.EnterLocalScope(); + Skip(TOK(var)); + CHECK_EQ("x", scanner.GetIdentifierString()); + AsmJsScanner::token_t x = scanner.Token(); + SkipLocal(); + Skip('='); + Skip(x); + Skip('+'); + Skip(x); + Skip(';'); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, Numbers) { + SetupSource("1 1.2 0x1f 1.e3"); + + CHECK(scanner.IsUnsigned()); + CHECK_EQ(1, scanner.AsUnsigned()); + scanner.Next(); + + CHECK(scanner.IsDouble()); + CHECK_EQ(1.2, scanner.AsDouble()); + scanner.Next(); + + CHECK(scanner.IsUnsigned()); + CHECK_EQ(31, scanner.AsUnsigned()); + scanner.Next(); + + CHECK(scanner.IsDouble()); + CHECK_EQ(1.0e3, scanner.AsDouble()); + scanner.Next(); + + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, BadNumber) { + SetupSource(".123fe"); + Skip('.'); + CheckForParseError(); +} + +TEST_F(AsmJsScannerTest, Rewind1) { + SetupSource("+ - * /"); + Skip('+'); + scanner.Rewind(); + Skip('+'); + Skip('-'); + scanner.Rewind(); + Skip('-'); + Skip('*'); + scanner.Rewind(); + Skip('*'); + Skip('/'); + scanner.Rewind(); + Skip('/'); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, Comments) { + SetupSource( + "var // This is a test /* */ eval\n" + "var /* test *** test */ eval\n" + "function /* this */ ^"); + Skip(TOK(var)); + Skip(TOK(var)); + Skip(TOK(eval)); + Skip(TOK(function)); + Skip('^'); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, TrailingCComment) { + SetupSource("var /* test\n"); + Skip(TOK(var)); + CheckForParseError(); +} + +TEST_F(AsmJsScannerTest, Seeking) { + SetupSource("var eval do arguments function break\n"); + Skip(TOK(var)); + int old_pos = scanner.GetPosition(); + Skip(TOK(eval)); + Skip(TOK(do)); + Skip(TOK(arguments)); + scanner.Rewind(); + Skip(TOK(arguments)); + scanner.Rewind(); + scanner.Seek(old_pos); + Skip(TOK(do)); + Skip(TOK(arguments)); + Skip(TOK(function)); + Skip(TOK(break)); + CheckForEnd(); +} + +TEST_F(AsmJsScannerTest, Newlines) { + SetupSource( + "var x = 1\n" + "var y = 2\n"); + Skip(TOK(var)); + scanner.Next(); + Skip('='); + scanner.Next(); + CHECK(scanner.IsPrecededByNewline()); + Skip(TOK(var)); + scanner.Next(); + Skip('='); + scanner.Next(); + CHECK(scanner.IsPrecededByNewline()); + CheckForEnd(); +} + +} // namespace internal +} // namespace v8 diff --git a/test/unittests/unittests.gyp b/test/unittests/unittests.gyp index 79ab8bb272..34340f9eb5 100644 --- a/test/unittests/unittests.gyp +++ b/test/unittests/unittests.gyp @@ -13,6 +13,7 @@ 'api/isolate-unittest.cc', 'api/remote-object-unittest.cc', 'api/v8-object-unittest.cc', + 'asmjs/asm-scanner-unittest.cc', 'base/atomic-utils-unittest.cc', 'base/bits-unittest.cc', 'base/cpu-unittest.cc',