[scanner] Separate ascii-in-utf8 length computation from decoding the chars
This way we walk the input string twice, but we reduce the number of branches per ascii char in the long-ascii-sequence case from 2 per char to ~ 1 + 2 / sizeof(intptr). Let's land and see what the bots say. Change-Id: I574971c7df896237f3382be634a9bedc920fc827 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1649356 Reviewed-by: Leszek Swirski <leszeks@chromium.org> Commit-Queue: Toon Verwaest <verwaest@chromium.org> Cr-Commit-Position: refs/heads/master@{#62046}
This commit is contained in:
parent
faaf4a8ab3
commit
a64ccef757
@ -607,13 +607,10 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
|
||||
size_t max_buffer = max_buffer_end - output_cursor;
|
||||
int max_length = static_cast<int>(Min(remaining, max_buffer));
|
||||
DCHECK_EQ(state, unibrow::Utf8::State::kAccept);
|
||||
const uint8_t* read_end = cursor + max_length;
|
||||
for (; cursor < read_end; cursor++) {
|
||||
uint8_t c = *cursor;
|
||||
DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F);
|
||||
if (c > unibrow::Utf8::kMaxOneByteChar) break;
|
||||
*(output_cursor++) = c;
|
||||
}
|
||||
int ascii_length = NonAsciiStart(cursor, max_length);
|
||||
CopyChars(output_cursor, cursor, ascii_length);
|
||||
cursor += ascii_length;
|
||||
output_cursor += ascii_length;
|
||||
}
|
||||
|
||||
current_.pos.bytes = chunk.start.bytes + (cursor - chunk.data);
|
||||
|
Loading…
Reference in New Issue
Block a user