[scanner] Drop lonely byte support as it's unused by blink anyway.
The embedder should ultimately be responsible for handling this since they anyway give us a copy of the data. They can easily make sure that the chunks we get do not have lonely bytes. Cq-Include-Trybots: luci.chromium.try:linux_chromium_rel_ng Change-Id: Ie862107bbbdd00c4d904fbb457a206c2fd52e5d0 Reviewed-on: https://chromium-review.googlesource.com/1127044 Reviewed-by: Ulan Degenbaev <ulan@chromium.org> Reviewed-by: Marja Hölttä <marja@chromium.org> Commit-Queue: Toon Verwaest <verwaest@chromium.org> Cr-Commit-Position: refs/heads/master@{#54262}
This commit is contained in:
parent
bfeb78a763
commit
c7ad1ddd44
@ -1480,6 +1480,10 @@ class V8_EXPORT ScriptCompiler {
|
||||
* more than two data chunks. The embedder can avoid this problem by always
|
||||
* returning at least 2 bytes of data.
|
||||
*
|
||||
* When streaming UTF-16 data, V8 does not handle characters split between
|
||||
* two data chunks. The embedder has to make sure that chunks have an even
|
||||
* length.
|
||||
*
|
||||
* If the embedder wants to cancel the streaming, they should make the next
|
||||
* GetMoreData call return 0. V8 will interpret it as end of data (and most
|
||||
* probably, parsing will fail). The streaming task will return as soon as
|
||||
|
@ -38,9 +38,8 @@ struct Range {
|
||||
const Char* end;
|
||||
|
||||
size_t length() { return static_cast<size_t>(end - start); }
|
||||
bool empty() const { return start == end; }
|
||||
bool unaligned_start() const {
|
||||
return reinterpret_cast<intptr_t>(start) % 2 == 1;
|
||||
return reinterpret_cast<intptr_t>(start) % sizeof(Char) == 1;
|
||||
}
|
||||
};
|
||||
|
||||
@ -95,89 +94,47 @@ class ChunkedStream {
|
||||
|
||||
Range<Char> GetDataAt(size_t pos) {
|
||||
Chunk chunk = FindChunk(pos);
|
||||
size_t buffer_end = chunk.length();
|
||||
size_t buffer_end = chunk.length;
|
||||
size_t buffer_pos = Min(buffer_end, pos - chunk.position);
|
||||
return {&chunk.data()[buffer_pos], &chunk.data()[buffer_end]};
|
||||
return {&chunk.data[buffer_pos], &chunk.data[buffer_end]};
|
||||
}
|
||||
|
||||
~ChunkedStream() {
|
||||
for (size_t i = 0; i < chunks_.size(); i++) {
|
||||
delete[] chunks_[i].raw_data;
|
||||
delete[] chunks_[i].data;
|
||||
}
|
||||
}
|
||||
|
||||
static const bool kCanAccessHeap = false;
|
||||
|
||||
private:
|
||||
// A single chunk of Chars. There may be a lonely bytes at the start and end
|
||||
// in case sizeof(Char) > 1. They just need to be ignored since additional
|
||||
// chunks are added by FetchChunk that contain the full character.
|
||||
// TODO(verwaest): Make sure that those characters are added by blink instead
|
||||
// so we can get rid of this complexity here.
|
||||
struct Chunk {
|
||||
// A raw chunk of Chars possibly including a lonely start and/or a lonely
|
||||
// end byte.
|
||||
const uint8_t* const raw_data;
|
||||
// The logical position of data() (possibly skipping a lonely start byte).
|
||||
const Char* const data;
|
||||
// The logical position of data.
|
||||
const size_t position;
|
||||
// The length of the raw_data.
|
||||
const size_t raw_length : sizeof(size_t) * 8 - 1;
|
||||
// Tells us whether the first byte of raw_data is a lonely start byte and
|
||||
// should be skipped because it's combined with a lonely end byte from the
|
||||
// previous chunk.
|
||||
const bool lonely_start : 1;
|
||||
|
||||
size_t end_position() const { return position + length(); }
|
||||
|
||||
// The chunk includes a lonely end byte if the chunk is 2-byte but has an
|
||||
// uneven number of chars (possibly ignoring a lonely start byte that is
|
||||
// merged with the lonely end byte of the previous chunk).
|
||||
bool lonely_end() const {
|
||||
return (raw_length - lonely_start) % sizeof(Char) == 1;
|
||||
}
|
||||
|
||||
uint8_t lonely_end_byte() const {
|
||||
DCHECK(lonely_end());
|
||||
return raw_data[raw_length - 1];
|
||||
}
|
||||
|
||||
size_t length() const {
|
||||
return (raw_length - lonely_start) >> (sizeof(Char) - 1);
|
||||
}
|
||||
|
||||
bool has_chars() const { return raw_length - lonely_start > 0; }
|
||||
|
||||
const Char* data() const {
|
||||
return reinterpret_cast<const Char*>(raw_data + lonely_start);
|
||||
}
|
||||
const size_t length;
|
||||
size_t end_position() const { return position + length; }
|
||||
};
|
||||
|
||||
Chunk FindChunk(size_t position) {
|
||||
if (chunks_.empty()) FetchFirstChunk();
|
||||
if (chunks_.empty()) FetchChunk(size_t{0});
|
||||
|
||||
// Walk forwards while the position is in front of the current chunk..
|
||||
if (chunks_.back().position <= position) {
|
||||
while (position >= chunks_.back().end_position() &&
|
||||
chunks_.back().has_chars()) {
|
||||
FetchChunk();
|
||||
}
|
||||
// Return if the final chunk's starting position is before the position.
|
||||
if (chunks_.back().position <= position) return chunks_.back();
|
||||
// Otherwise walk backwards to find the intermediate chunk added to
|
||||
// support lonely bytes.
|
||||
// TODO(verwaest): Remove once we don't need to support lonely bytes here
|
||||
// anymore.
|
||||
// Walk forwards while the position is in front of the current chunk.
|
||||
while (position >= chunks_.back().end_position() &&
|
||||
chunks_.back().length > 0) {
|
||||
FetchChunk(chunks_.back().end_position());
|
||||
}
|
||||
|
||||
// Walk backwards.
|
||||
for (auto reverse_it = chunks_.rbegin() + 1; reverse_it != chunks_.rend();
|
||||
for (auto reverse_it = chunks_.rbegin(); reverse_it != chunks_.rend();
|
||||
++reverse_it) {
|
||||
if (reverse_it->position <= position) return *reverse_it;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void FetchFirstChunk() {
|
||||
void FetchChunk(size_t position) {
|
||||
const uint8_t* data = nullptr;
|
||||
size_t length;
|
||||
{
|
||||
@ -185,35 +142,10 @@ class ChunkedStream {
|
||||
RuntimeCallCounterId::kGetMoreDataCallback);
|
||||
length = source_->GetMoreData(&data);
|
||||
}
|
||||
chunks_.push_back({data, 0, length, false});
|
||||
}
|
||||
|
||||
void FetchChunk() {
|
||||
DCHECK(!chunks_.empty());
|
||||
|
||||
const uint8_t* data = nullptr;
|
||||
size_t length;
|
||||
{
|
||||
RuntimeCallTimerScope scope(stats_,
|
||||
RuntimeCallCounterId::kGetMoreDataCallback);
|
||||
length = source_->GetMoreData(&data);
|
||||
}
|
||||
|
||||
const Chunk& last_chunk = chunks_.back();
|
||||
bool lonely_start = last_chunk.lonely_end();
|
||||
DCHECK(last_chunk.has_chars());
|
||||
|
||||
size_t position = last_chunk.end_position();
|
||||
|
||||
if (lonely_start) {
|
||||
uint8_t* intermediate = NewArray<uint8_t>(2);
|
||||
intermediate[0] = last_chunk.lonely_end_byte();
|
||||
intermediate[1] = length == 0 ? 0 : data[0];
|
||||
chunks_.push_back({intermediate, position, 2, false});
|
||||
position += 1;
|
||||
}
|
||||
|
||||
chunks_.push_back({data, position, length, lonely_start});
|
||||
// Incoming data has to be aligned to Char size.
|
||||
DCHECK_EQ(0, length % sizeof(Char));
|
||||
chunks_.push_back(
|
||||
{reinterpret_cast<const Char*>(data), position, length / sizeof(Char)});
|
||||
}
|
||||
|
||||
std::vector<struct Chunk> chunks_;
|
||||
@ -240,7 +172,7 @@ class BufferedCharacterStream : public Utf16CharacterStream {
|
||||
buffer_cursor_ = buffer_start_;
|
||||
|
||||
Range<Char> range = byte_stream_.GetDataAt(position);
|
||||
if (range.empty()) {
|
||||
if (range.length() == 0) {
|
||||
buffer_end_ = buffer_start_;
|
||||
return false;
|
||||
}
|
||||
@ -261,10 +193,8 @@ class BufferedCharacterStream : public Utf16CharacterStream {
|
||||
ByteStream<Char> byte_stream_;
|
||||
};
|
||||
|
||||
// Provides a (partially) unbuffered utf-16 view on the bytes from the
|
||||
// underlying ByteStream. It is only partially unbuffered when running on MIPS
|
||||
// due to lonely start bytes making chunks unaligned. In that case, unaligned
|
||||
// chars in a chunk (due to lonely start) are locally buffered.
|
||||
// Provides a unbuffered utf-16 view on the bytes from the underlying
|
||||
// ByteStream.
|
||||
template <template <typename T> class ByteStream>
|
||||
class UnbufferedCharacterStream : public Utf16CharacterStream {
|
||||
public:
|
||||
@ -282,20 +212,9 @@ class UnbufferedCharacterStream : public Utf16CharacterStream {
|
||||
buffer_start_ = range.start;
|
||||
buffer_end_ = range.end;
|
||||
buffer_cursor_ = buffer_start_;
|
||||
if (range.empty()) return false;
|
||||
if (range.length() == 0) return false;
|
||||
|
||||
// TODO(verwaest): Make sure that this cannot happen by dealing with lonely
|
||||
// bytes on the blink side.
|
||||
#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
|
||||
// Buffer anyway in case the chunk is unaligned due to a lonely start.
|
||||
if (range.unaligned_start()) {
|
||||
size_t length = Min(kBufferSize, range.length());
|
||||
i::CopyCharsUnsigned(buffer_, buffer_start_, length);
|
||||
buffer_start_ = &buffer_[0];
|
||||
buffer_cursor_ = buffer_start_;
|
||||
buffer_end_ = &buffer_[length];
|
||||
}
|
||||
#endif
|
||||
DCHECK(!range.unaligned_start());
|
||||
DCHECK_LE(buffer_start_, buffer_end_);
|
||||
return true;
|
||||
}
|
||||
@ -303,10 +222,6 @@ class UnbufferedCharacterStream : public Utf16CharacterStream {
|
||||
bool can_access_heap() override { return false; }
|
||||
|
||||
private:
|
||||
#if V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
|
||||
static const size_t kBufferSize = 512;
|
||||
uc16 buffer_[kBufferSize];
|
||||
#endif
|
||||
ByteStream<uint16_t> byte_stream_;
|
||||
};
|
||||
|
||||
|
@ -28,12 +28,14 @@ class ChunkSource : public v8::ScriptCompiler::ExternalSourceStream {
|
||||
chunks += strlen(chunks) + 1;
|
||||
} while (chunks_.back().len > 0);
|
||||
}
|
||||
ChunkSource(const uint8_t* data, size_t len, bool extra_chunky)
|
||||
ChunkSource(const uint8_t* data, size_t char_size, size_t len,
|
||||
bool extra_chunky)
|
||||
: current_(0) {
|
||||
// If extra_chunky, we'll use increasingly large chunk sizes.
|
||||
// If not, we'll have a single chunk of full length.
|
||||
size_t chunk_size = extra_chunky ? 1 : len;
|
||||
for (size_t i = 0; i < len; i += chunk_size, chunk_size++) {
|
||||
// If extra_chunky, we'll use increasingly large chunk sizes. If not, we'll
|
||||
// have a single chunk of full length. Make sure that chunks are always
|
||||
// aligned to char-size though.
|
||||
size_t chunk_size = extra_chunky ? char_size : len;
|
||||
for (size_t i = 0; i < len; i += chunk_size, chunk_size += char_size) {
|
||||
chunks_.push_back({data + i, i::Min(chunk_size, len - i)});
|
||||
}
|
||||
chunks_.push_back({nullptr, 0});
|
||||
@ -371,7 +373,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
|
||||
const uint8_t* data = one_byte_vector.begin();
|
||||
const uint8_t* data_end = one_byte_vector.end();
|
||||
|
||||
ChunkSource single_chunk(data, data_end - data, false);
|
||||
ChunkSource single_chunk(data, 1, data_end - data, false);
|
||||
std::unique_ptr<i::Utf16CharacterStream> one_byte_streaming_stream(
|
||||
i::ScannerStream::For(&single_chunk,
|
||||
v8::ScriptCompiler::StreamedSource::ONE_BYTE,
|
||||
@ -379,7 +381,7 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
|
||||
TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
|
||||
length, start, end);
|
||||
|
||||
ChunkSource many_chunks(data, data_end - data, true);
|
||||
ChunkSource many_chunks(data, 1, data_end - data, true);
|
||||
one_byte_streaming_stream.reset(i::ScannerStream::For(
|
||||
&many_chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE, nullptr));
|
||||
TestCharacterStream(one_byte_source, one_byte_streaming_stream.get(),
|
||||
@ -390,14 +392,14 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
|
||||
{
|
||||
const uint8_t* data = one_byte_vector.begin();
|
||||
const uint8_t* data_end = one_byte_vector.end();
|
||||
ChunkSource chunks(data, data_end - data, false);
|
||||
ChunkSource chunks(data, 1, data_end - data, false);
|
||||
std::unique_ptr<i::Utf16CharacterStream> utf8_streaming_stream(
|
||||
i::ScannerStream::For(&chunks, v8::ScriptCompiler::StreamedSource::UTF8,
|
||||
nullptr));
|
||||
TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
|
||||
start, end);
|
||||
|
||||
ChunkSource many_chunks(data, data_end - data, true);
|
||||
ChunkSource many_chunks(data, 1, data_end - data, true);
|
||||
utf8_streaming_stream.reset(i::ScannerStream::For(
|
||||
&many_chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
|
||||
TestCharacterStream(one_byte_source, utf8_streaming_stream.get(), length,
|
||||
@ -410,14 +412,14 @@ void TestCharacterStreams(const char* one_byte_source, unsigned length,
|
||||
reinterpret_cast<const uint8_t*>(two_byte_vector.begin());
|
||||
const uint8_t* data_end =
|
||||
reinterpret_cast<const uint8_t*>(two_byte_vector.end());
|
||||
ChunkSource chunks(data, data_end - data, false);
|
||||
ChunkSource chunks(data, 2, data_end - data, false);
|
||||
std::unique_ptr<i::Utf16CharacterStream> two_byte_streaming_stream(
|
||||
i::ScannerStream::For(
|
||||
&chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
|
||||
TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
|
||||
length, start, end);
|
||||
|
||||
ChunkSource many_chunks(data, data_end - data, true);
|
||||
ChunkSource many_chunks(data, 2, data_end - data, true);
|
||||
two_byte_streaming_stream.reset(i::ScannerStream::For(
|
||||
&many_chunks, v8::ScriptCompiler::StreamedSource::TWO_BYTE, nullptr));
|
||||
TestCharacterStream(one_byte_source, two_byte_streaming_stream.get(),
|
||||
@ -459,7 +461,7 @@ TEST(Regress651333) {
|
||||
// Read len bytes from bytes, and compare against the expected unicode
|
||||
// characters. Expect kBadChar ( == Unicode replacement char == code point
|
||||
// 65533) instead of the incorrectly coded Latin1 char.
|
||||
ChunkSource chunks(bytes, len, false);
|
||||
ChunkSource chunks(bytes, 1, len, false);
|
||||
std::unique_ptr<i::Utf16CharacterStream> stream(i::ScannerStream::For(
|
||||
&chunks, v8::ScriptCompiler::StreamedSource::UTF8, nullptr));
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
|
Loading…
Reference in New Issue
Block a user