[scanner] Drop lonely byte support as it's unused by blink anyway.

The embedder should ultimately be responsible for handling this since they
anyway give us a copy of the data. They can easily make sure that the chunks we
get do not have lonely bytes.

Cq-Include-Trybots: luci.chromium.try:linux_chromium_rel_ng
Change-Id: Ie862107bbbdd00c4d904fbb457a206c2fd52e5d0
Reviewed-on: https://chromium-review.googlesource.com/1127044
Reviewed-by: Ulan Degenbaev <ulan@chromium.org>
Reviewed-by: Marja Hölttä <marja@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#54262}
This commit is contained in:
Toon Verwaest 2018-07-05 15:09:58 +02:00 committed by Commit Bot
parent bfeb78a763
commit c7ad1ddd44
3 changed files with 43 additions and 122 deletions

View File

@ -1480,6 +1480,10 @@ class V8_EXPORT ScriptCompiler {
* more than two data chunks. The embedder can avoid this problem by always
* returning at least 2 bytes of data.
*
* When streaming UTF-16 data, V8 does not handle characters split between
* two data chunks. The embedder has to make sure that chunks have an even
* length.
*
* If the embedder wants to cancel the streaming, they should make the next
* GetMoreData call return 0. V8 will interpret it as end of data (and most
* probably, parsing will fail). The streaming task will return as soon as

View File

@ -38,9 +38,8 @@ struct Range {
const Char* end;
size_t length() { return static_cast<size_t>(end - start); }
bool empty() const { return start == end; }
bool unaligned_start() const {
return reinterpret_cast<intptr_t>(start) % 2 == 1;
return reinterpret_cast<intptr_t>(start) % sizeof(Char) == 1;
}
};
@ -95,89 +94,47 @@ class ChunkedStream {
Range<Char> GetDataAt(size_t pos) {
Chunk chunk = FindChunk(pos);
size_t buffer_end = chunk.length();
size_t buffer_end = chunk.length;
size_t buffer_pos = Min(buffer_end, pos - chunk.position);
return {&chunk.data()[buffer_pos], &chunk.data()[buffer_end]};
return {&chunk.data[buffer_pos], &chunk.data[buffer_end]};
}
~ChunkedStream() {
for (size_t i = 0; i < chunks_.size(); i++) {
delete[] chunks_[i].raw_data;
delete[] chunks_[i].data;
}
}
static const bool kCanAccessHeap = false;
private:
// A single chunk of Chars. There may be a lonely bytes at the start and end
// in case sizeof(Char) > 1. They just need to be ignored since additional
// chunks are added by FetchChunk that contain the full character.
// TODO(verwaest): Make sure that those characters are added by blink instead
// so we can get rid of this complexity here.
struct Chunk {
// A raw chunk of Chars possibly including a lonely start and/or a lonely
// end byte.
const uint8_t* const raw_data;
// The logical position of data() (possibly skipping a lonely start byte).
const Char* const data;
// The logical position of data.
const size_t position;
// The length of the raw_data.
const size_t raw_length : sizeof(size_t) * 8 - 1;
// Tells us whether the first byte of raw_data is a lonely start byte and
// should be skipped because it's combined with a lonely end byte from the
// previous chunk.
const bool lonely_start : 1;
size_t end_position() const { return position + length(); }
// The chunk includes a lonely end byte if the chunk is 2-byte but has an
// uneven number of chars (possibly ignoring a lonely start byte that is
// merged with the lonely end byte of the previous chunk).
bool lonely_end() const {
return (raw_length - lonely_start) % sizeof(Char) == 1;
}
uint8_t lonely_end_byte() const {
DCHECK(lonely_end());
return raw_data[raw_length - 1];
}
size_t length() const {
return (raw_length - lonely_start) >> (sizeof(Char) - 1);
}
bool has_chars() const { return raw_length - lonely_start > 0; }
const Char* data() const {
return reinterpret_cast<const Char*>(raw_data + lonely_start);
}
const size_t length;
size_t end_position() const { return position + length; }
};
Chunk FindChunk(size_t position) {
if (chunks_.empty()) FetchFirstChunk();
if (chunks_.empty()) FetchChunk(size_t{0});
// Walk forwards while the position is in front of the current chunk..
if (chunks_.back().position <= position) {
while (position >= chunks_.back().end_position() &&
chunks_.back().has_chars()) {
FetchChunk();
}
// Return if the final chunk's starting position is before the position.
if (chunks_.back().position <= position) return chunks_.back();
// Otherwise walk backwards to find the intermediate chunk added to
// support lonely bytes.
// TODO(verwaest): Remove once we don't need to support lonely bytes here
// anymore.
// Walk forwards while the position is in front of the current chunk.
while (position >= chunks_.back().end_position() &&
chunks_.back().length > 0) {
FetchChunk(chunks_.back().end_position());
}
// Walk backwards.
for (auto reverse_it = chunks_.rbegin() + 1; reverse_it != chunks_.rend();
for (auto reverse_it = chunks_.rbegin(); reverse_it != chunks_.rend();
++reverse_it) {
if (reverse_it->position <= position) return *reverse_it;
}
UNREACHABLE();
}
void FetchFirstChunk() {
void FetchChunk(size_t position) {
const uint8_t* data = nullptr;
size_t length;
{
@ -185,35 +142,10 @@ class ChunkedStream {
RuntimeCallCounterId::kGetMoreDataCallback);
length = source_->GetMoreData(&data);
}
chunks_.push_back({data, 0, length, false});
}
void FetchChunk() {
DCHECK(!chunks_.empty());
const uint8_t* data = nullptr;
size_t length;
{
RuntimeCallTimerScope scope(stats_,
RuntimeCallCounterId::kGetMoreDataCallback);
length = source_->GetMoreData(&data);
}
const Chunk& last_chunk = chunks_.back();
bool lonely_start = last_chunk.lonely_end();
DCHECK(last_chunk.has_chars());
size_t position = last_chunk.end_position();
if (lonely_start) {
uint8_t* intermediate = NewArray<uint8_t>(2);
intermediate[0] = last_chunk.lonely_end_byte();
intermediate[1] = length == 0 ? 0 : data[0];
chunks_.push_back({intermediate, position, 2, false});
position += 1;
}
chunks_.push_back({data, position, length, lonely_start});
// Incoming data has to be aligned to Char size.
DCHECK_EQ(0, length % sizeof(Char));
chunks_.push_back(
{reinterpret_cast<const Char*>(data), position, length / sizeof(Char)});
}
std::vector<struct Chunk> chunks_;
@ -240,7 +172,7 @@ class BufferedCharacterStream : public Utf16CharacterStream {
buffer_cursor_ = buffer_start_;
Range<Char> range = byte_stream_.GetDataAt(position);
if (range.empty()) {
if (range.length() == 0) {
buffer_end_ = buffer_start_;
return false;
}
@ -261,10 +193,8 @@ class BufferedCharacterStream : public Utf16CharacterStream {
ByteStream<Char> byte_stream_;
};
// Provides a (partially) unbuffered utf-16 view on the bytes from the
// underlying ByteStream. It is only partially unbuffered when running on MIPS
// due to lonely start bytes making chunks unaligned. In that case, unaligned
// chars in a chunk (due to lonely start) are locally buffered.
// Provides a unbuffered utf-16 view on the bytes from the underlying
// ByteStream.
template <template <typename T> class ByteStream>
class UnbufferedCharacterStream : public Utf16CharacterStream {
public:
@ -282,20 +212,9 @@ class UnbufferedCharacterStream : public Utf16CharacterStream {
buffer_start_ = range.start;
buffer_end_ = range.end;
buffer_cursor_ = buffer_start_;
if (range.empty()) return false;
if (range.length() == 0) return false;
// TODO(verwaest): Make sure that this cannot happen by dealing with lonely
// bytes on the blink side.
#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
// Buffer anyway in case the chunk is unaligned due to a lonely start.
if (range.unaligned_start()) {
size_t length = Min(kBufferSize, range.length());
i::CopyCharsUnsigned(buffer_, buffer_start_, length);
buffer_start_ = &buffer_[0];
buffer_cursor_ = buffer_start_;
buffer_end_ = &buffer_[length];
}
#endif
DCHECK(!range.unaligned_start());
DCHECK_LE(buffer_start_, buffer_end_);
return true;
}
@ -303,10 +222,6 @@ class UnbufferedCharacterStream : public Utf16CharacterStream {
bool can_access_heap() override { return false; }
private:
#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
static const size_t kBufferSize = 512;
uc16 buffer_[kBufferSize];
#endif
ByteStream<uint16_t> byte_stream_;
};

View File

@ -28,12 +28,14 @@ class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
chunks += strlen(chunks) + 1;
} while (chunks_.back().len > 0);
}
ChunkSource(const uint8_t* data, size_t len, bool extra_chunky)
ChunkSource(const uint8_t* data, size_t char_size, size_t len,
bool extra_chunky)
: current_(0) {
// If extra_chunky, we'll use increasingly large chunk sizes.
// If not, we'll have a single chunk of full length.
size_t chunk_size = extra_chunky ? 1 : len;
for (size_t i = 0; i < len; i += chunk_size, chunk_size++) {
// If extra_chunky, we'll use increasingly large chunk sizes. If not, we'll
// have a single chunk of full length. Make sure that chunks are always
// aligned to char-size though.
size_t chunk_size = extra_chunky ? char_size : len;
for (size_t i = 0; i < len; i += chunk_size, chunk_size += char_size) {
chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
}
chunks_.push_back({nullptr, 0});
@ -371,7 +373,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
const uint8_t* data = one_byte_vector.begin();
const uint8_t* data_end = one_byte_vector.end();
ChunkSource single_chunk(data, data_end - data, false);
ChunkSource single_chunk(data, 1, data_end - data, false);
std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
i::ScannerStream::For(&single_chunk,
v8::ScriptCompiler::StreamedSource::ONE_BYTE,
@ -379,7 +381,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
length, start, end);
ChunkSource many_chunks(data, data_end - data, true);
ChunkSource many_chunks(data, 1, data_end - data, true);
one_byte_streaming_stream.reset(i::ScannerStream::For(
&many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE, nullptr));
TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
@ -390,14 +392,14 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
{
const uint8_t* data = one_byte_vector.begin();
const uint8_t* data_end = one_byte_vector.end();
ChunkSource chunks(data, data_end - data, false);
ChunkSource chunks(data, 1, data_end - data, false);
std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
i::ScannerStream::For(&chunks, v8::ScriptCompiler::StreamedSource::UTF8,
nullptr));
TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
start, end);
ChunkSource many_chunks(data, data_end - data, true);
ChunkSource many_chunks(data, 1, data_end - data, true);
utf8_streaming_stream.reset(i::ScannerStream::For(
&many_chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
@ -410,14 +412,14 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
reinterpret_cast<const uint8_t*>(two_byte_vector.begin());
const uint8_t* data_end =
reinterpret_cast<const uint8_t*>(two_byte_vector.end());
ChunkSource chunks(data, data_end - data, false);
ChunkSource chunks(data, 2, data_end - data, false);
std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
i::ScannerStream::For(
&chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
length, start, end);
ChunkSource many_chunks(data, data_end - data, true);
ChunkSource many_chunks(data, 2, data_end - data, true);
two_byte_streaming_stream.reset(i::ScannerStream::For(
&many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
@ -459,7 +461,7 @@ TEST(Regress651333) {
// Read len bytes from bytes, and compare against the expected unicode
// characters. Expect kBadChar ( == Unicode replacement char == code point
// 65533) instead of the incorrectly coded Latin1 char.
ChunkSource chunks(bytes, len, false);
ChunkSource chunks(bytes, 1, len, false);
std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
&chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
for (size_t i = 0; i < len; i++) {