diff --git a/BUILD.gn b/BUILD.gn
index 855a1c6a5d..b27a323094 100644
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -2052,6 +2052,7 @@ v8_source_set("v8_base") {
     "src/string-stream.h",
     "src/strtod.cc",
     "src/strtod.h",
+    "src/third_party/utf8-decoder/utf8-decoder.h",
     "src/tracing/trace-event.cc",
     "src/tracing/trace-event.h",
     "src/tracing/traced-value.cc",
diff --git a/src/parsing/scanner-character-streams.cc b/src/parsing/scanner-character-streams.cc
index 8f584ff715..20aa5c9f8e 100644
--- a/src/parsing/scanner-character-streams.cc
+++ b/src/parsing/scanner-character-streams.cc
@@ -203,7 +203,7 @@ class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream {
   Utf8ExternalStreamingStream(
       ScriptCompiler::ExternalSourceStream* source_stream,
       RuntimeCallStats* stats)
-      : current_({0, {0, 0, unibrow::Utf8::Utf8IncrementalBuffer(0)}}),
+      : current_({0, {0, 0, 0, unibrow::Utf8::State::kAccept}}),
         source_stream_(source_stream),
         stats_(stats) {}
   ~Utf8ExternalStreamingStream() override {
@@ -223,7 +223,8 @@ class Utf8ExternalStreamingStream : public BufferedUtf16CharacterStream {
   struct StreamPosition {
     size_t bytes;
     size_t chars;
-    unibrow::Utf8::Utf8IncrementalBuffer incomplete_char;
+    uint32_t incomplete_char;
+    unibrow::Utf8::State state;
   };
 
   // Position contains a StreamPosition and the index of the chunk the position
@@ -268,25 +269,25 @@ bool Utf8ExternalStreamingStream::SkipToPosition(size_t position) {
   const Chunk& chunk = chunks_[current_.chunk_no];
   DCHECK(current_.pos.bytes >= chunk.start.bytes);
 
-  unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =
-      chunk.start.incomplete_char;
+  unibrow::Utf8::State state = chunk.start.state;
+  uint32_t incomplete_char = chunk.start.incomplete_char;
   size_t it = current_.pos.bytes - chunk.start.bytes;
   size_t chars = chunk.start.chars;
   while (it < chunk.length && chars < position) {
-    unibrow::uchar t =
-        unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);
+    unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
+        chunk.data[it], &it, &state, &incomplete_char);
     if (t == kUtf8Bom && current_.pos.chars == 0) {
       // BOM detected at beginning of the stream. Don't copy it.
     } else if (t != unibrow::Utf8::kIncomplete) {
       chars++;
       if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++;
     }
-    it++;
   }
 
   current_.pos.bytes += it;
   current_.pos.chars = chars;
   current_.pos.incomplete_char = incomplete_char;
+  current_.pos.state = state;
   current_.chunk_no += (it == chunk.length);
 
   return current_.pos.chars == position;
@@ -304,31 +305,33 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
   uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);
   DCHECK_EQ(cursor, buffer_end_);
 
+  unibrow::Utf8::State state = current_.pos.state;
+  uint32_t incomplete_char = current_.pos.incomplete_char;
+
   // If the current chunk is the last (empty) chunk we'll have to process
   // any left-over, partial characters.
   if (chunk.length == 0) {
-    unibrow::uchar t =
-        unibrow::Utf8::ValueOfIncrementalFinish(&current_.pos.incomplete_char);
+    unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state);
     if (t != unibrow::Utf8::kBufferEmpty) {
-      DCHECK_LT(t, unibrow::Utf16::kMaxNonSurrogateCharCode);
+      DCHECK_EQ(t, unibrow::Utf8::kBadChar);
       *cursor = static_cast<uc16>(t);
       buffer_end_++;
       current_.pos.chars++;
+      current_.pos.incomplete_char = 0;
+      current_.pos.state = state;
     }
     return;
   }
 
-  unibrow::Utf8::Utf8IncrementalBuffer incomplete_char =
-      current_.pos.incomplete_char;
-  size_t it;
-  for (it = current_.pos.bytes - chunk.start.bytes;
-       it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize; it++) {
-    unibrow::uchar t =
-        unibrow::Utf8::ValueOfIncremental(chunk.data[it], &incomplete_char);
-    if (t == unibrow::Utf8::kIncomplete) continue;
+  size_t it = current_.pos.bytes - chunk.start.bytes;
+  while (it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize) {
+    unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
+        chunk.data[it], &it, &state, &incomplete_char);
     if (V8_LIKELY(t < kUtf8Bom)) {
       *(cursor++) = static_cast<uc16>(t);  // The by most frequent case.
-    } else if (t == kUtf8Bom && current_.pos.bytes + it == 2) {
+    } else if (t == unibrow::Utf8::kIncomplete) {
+      continue;
+    } else if (t == kUtf8Bom && current_.pos.bytes + it == 3) {
       // BOM detected at beginning of the stream. Don't copy it.
     } else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
       *(cursor++) = static_cast<uc16>(t);
@@ -341,6 +344,7 @@ void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
   current_.pos.bytes = chunk.start.bytes + it;
   current_.pos.chars += (cursor - buffer_end_);
   current_.pos.incomplete_char = incomplete_char;
+  current_.pos.state = state;
   current_.chunk_no += (it == chunk.length);
 
   buffer_end_ = cursor;
@@ -396,16 +400,15 @@ void Utf8ExternalStreamingStream::SearchPosition(size_t position) {
     //  checking whether the # bytes in a chunk are equal to the # chars, and if
     //  so avoid the expensive SkipToPosition.)
     bool ascii_only_chunk =
-        chunks_[chunk_no].start.incomplete_char ==
-            unibrow::Utf8::Utf8IncrementalBuffer(0) &&
+        chunks_[chunk_no].start.incomplete_char == 0 &&
         (chunks_[chunk_no + 1].start.bytes - chunks_[chunk_no].start.bytes) ==
             (chunks_[chunk_no + 1].start.chars - chunks_[chunk_no].start.chars);
     if (ascii_only_chunk) {
       size_t skip = position - chunks_[chunk_no].start.chars;
       current_ = {chunk_no,
                   {chunks_[chunk_no].start.bytes + skip,
-                   chunks_[chunk_no].start.chars + skip,
-                   unibrow::Utf8::Utf8IncrementalBuffer(0)}};
+                   chunks_[chunk_no].start.chars + skip, 0,
+                   unibrow::Utf8::State::kAccept}};
     } else {
       current_ = {chunk_no, chunks_[chunk_no].start};
       SkipToPosition(position);
diff --git a/src/third_party/utf8-decoder/LICENSE b/src/third_party/utf8-decoder/LICENSE
new file mode 100644
index 0000000000..b59bef2fb6
--- /dev/null
+++ b/src/third_party/utf8-decoder/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/third_party/utf8-decoder/README.v8 b/src/third_party/utf8-decoder/README.v8
new file mode 100644
index 0000000000..e1e13ce53f
--- /dev/null
+++ b/src/third_party/utf8-decoder/README.v8
@@ -0,0 +1,18 @@
+Name: DFA UTF-8 Decoder
+Short Name: utf8-decoder
+URL: http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+Version: 0
+License: MIT
+License File: NOT_SHIPPED
+Security Critical: no
+
+Description:
+Decodes UTF-8 bytes using a fast and simple definite finite automata.
+
+Local modifications:
+- Rejection state has been mapped to row 0 (instead of row 1) of the DFA,
+  saving some 50 bytes and making the table easier to reason about.
+- The transitions have been remapped to represent both a state transition and a
+  bit mask for the incoming byte.
+- The caller must now zero out the code point buffer after successful or
+  unsuccessful state transitions.
diff --git a/src/third_party/utf8-decoder/utf8-decoder.h b/src/third_party/utf8-decoder/utf8-decoder.h
new file mode 100644
index 0000000000..5668e5ad9e
--- /dev/null
+++ b/src/third_party/utf8-decoder/utf8-decoder.h
@@ -0,0 +1,78 @@
+// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+// The remapped transition table is justified at
+// https://docs.google.com/spreadsheets/d/1AZcQwuEL93HmNCljJWUwFMGqf7JAQ0puawZaUgP0E14
+
+#include <stdint.h>
+
+#ifndef __UTF8_DFA_DECODER_H
+#define __UTF8_DFA_DECODER_H
+
+namespace Utf8DfaDecoder {
+
+enum State : uint8_t {
+  kReject = 0,
+  kAccept = 12,
+  kTwoByte = 24,
+  kThreeByte = 36,
+  kThreeByteLowMid = 48,
+  kFourByte = 60,
+  kFourByteLow = 72,
+  kThreeByteHigh = 84,
+  kFourByteMidHigh = 96,
+};
+
+static inline void Decode(uint8_t byte, State* state, uint32_t* buffer) {
+  // This first table maps bytes to character to a transition.
+  static constexpr uint8_t transitions[] = {
+      0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 00-0F
+      0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 10-1F
+      0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 20-2F
+      0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 30-3F
+      0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 40-4F
+      0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 50-5F
+      0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 60-6F
+      0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 70-7F
+      1,  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 80-8F
+      2,  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // 90-9F
+      3,  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // A0-AF
+      3,  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  // B0-BF
+      9,  9, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // C0-CF
+      4,  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,  // D0-DF
+      10, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 5, 5,  // E0-EF
+      11, 7, 7, 7, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,  // F0-FF
+  };
+
+  // This second table maps a state to a new state when adding a transition.
+  //  00-7F
+  //  |   80-8F
+  //  |   |   90-9F
+  //  |   |   |   A0-BF
+  //  |   |   |   |   C2-DF
+  //  |   |   |   |   |   E1-EC, EE, EF
+  //  |   |   |   |   |   |   ED
+  //  |   |   |   |   |   |   |   F1-F3
+  //  |   |   |   |   |   |   |   |   F4
+  //  |   |   |   |   |   |   |   |   |   C0, C1, F5-FF
+  //  |   |   |   |   |   |   |   |   |   |  E0
+  //  |   |   |   |   |   |   |   |   |   |  |   F0
+  static constexpr uint8_t states[] = {
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,  0,   // REJECT = 0
+      12, 0,  0,  0,  24, 36, 48, 60, 72, 0, 84, 96,  // ACCEPT = 12
+      0,  12, 12, 12, 0,  0,  0,  0,  0,  0, 0,  0,   // 2-byte = 24
+      0,  24, 24, 24, 0,  0,  0,  0,  0,  0, 0,  0,   // 3-byte = 36
+      0,  24, 24, 0,  0,  0,  0,  0,  0,  0, 0,  0,   // 3-byte low/mid = 48
+      0,  36, 36, 36, 0,  0,  0,  0,  0,  0, 0,  0,   // 4-byte = 60
+      0,  36, 0,  0,  0,  0,  0,  0,  0,  0, 0,  0,   // 4-byte low = 72
+      0,  0,  0,  24, 0,  0,  0,  0,  0,  0, 0,  0,   // 3-byte high = 84
+      0,  0,  36, 36, 0,  0,  0,  0,  0,  0, 0,  0,   // 4-byte mid/high = 96
+  };
+
+  DCHECK_NE(*state, State::kReject);
+  uint8_t type = transitions[byte];
+  *state = static_cast<State>(states[*state + type]);
+  *buffer = (*buffer << 6) | (byte & (0x7F >> (type >> 1)));
+}
+
+}  // namespace Utf8DfaDecoder
+
+#endif /* __UTF8_DFA_DECODER_H */
diff --git a/src/unicode-inl.h b/src/unicode-inl.h
index ebebfaa1bd..7c0386ce52 100644
--- a/src/unicode-inl.h
+++ b/src/unicode-inl.h
@@ -113,8 +113,8 @@ unsigned Utf8::Encode(char* str,
 uchar Utf8::ValueOf(const byte* bytes, size_t length, size_t* cursor) {
   if (length <= 0) return kBadChar;
   byte first = bytes[0];
-  // Characters between 0000 and 0007F are encoded as a single character
-  if (first <= kMaxOneByteChar) {
+  // Characters between 0000 and 007F are encoded as a single character
+  if (V8_LIKELY(first <= kMaxOneByteChar)) {
     *cursor += 1;
     return first;
   }
diff --git a/src/unicode.cc b/src/unicode.cc
index 082334f230..4d7896ec37 100644
--- a/src/unicode.cc
+++ b/src/unicode.cc
@@ -193,306 +193,91 @@ static int LookupMapping(const int32_t* table,
   }
 }
 
-static inline uint8_t NonASCIISequenceLength(byte first) {
-  // clang-format off
-  static const uint8_t lengths[256] = {
-      // The first 128 entries correspond to ASCII characters.
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 00 - 0f */
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 10 - 1f */
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 20 - 2f */
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 30 - 3f */
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 40 - 4f */
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 50 - 5f */
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 60 - 6f */
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 70 - 7f */
-      // The following 64 entries correspond to continuation bytes.
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 80 - 8f */
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* 90 - 9f */
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* a0 - af */
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* b0 - bf */
-      // The next are two invalid overlong encodings and 30 two-byte sequences.
-      0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  /* c0-c1 + c2-cf */
-      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  /* d0-df */
-      // 16 three-byte sequences.
-      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,  /* e0-ef */
-      // 5 four-byte sequences, followed by sequences that could only encode
-      // code points outside of the Unicode range.
-      4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; /* f0-f4 + f5-ff */
-  // clang-format on
-  return lengths[first];
-}
-
-
-static inline bool IsContinuationCharacter(byte chr) {
-  return chr >= 0x80 && chr <= 0xBF;
-}
-
 // This method decodes an UTF-8 value according to RFC 3629 and
 // https://encoding.spec.whatwg.org/#utf-8-decoder .
 uchar Utf8::CalculateValue(const byte* str, size_t max_length, size_t* cursor) {
+  DCHECK_GT(max_length, 0);
   DCHECK_GT(str[0], kMaxOneByteChar);
 
-  size_t length = NonASCIISequenceLength(str[0]);
+  State state = State::kAccept;
+  Utf8IncrementalBuffer buffer = 0;
+  uchar t;
 
-  // Check continuation characters.
-  size_t max_count = std::min(length, max_length);
-  size_t count = 1;
-  while (count < max_count && IsContinuationCharacter(str[count])) {
-    count++;
-  }
+  size_t i = 0;
+  do {
+    t = ValueOfIncremental(str[i], &i, &state, &buffer);
+  } while (i < max_length && t == kIncomplete);
 
-  if (length >= 3 && count < 2) {
-    // Not enough continuation bytes to check overlong sequences.
-    *cursor += 1;
-    return kBadChar;
-  }
-
-  // Check overly long sequences & other conditions.
-  if (length == 3) {
-    if (str[0] == 0xE0 && (str[1] < 0xA0 || str[1] > 0xBF)) {
-      // Overlong three-byte sequence? The first byte generates a kBadChar.
-      *cursor += 1;
-      return kBadChar;
-    } else if (str[0] == 0xED && (str[1] < 0x80 || str[1] > 0x9F)) {
-      // High and low surrogate halves? The first byte generates a kBadChar.
-      *cursor += 1;
-      return kBadChar;
-    }
-  } else if (length == 4) {
-    if (str[0] == 0xF0 && (str[1] < 0x90 || str[1] > 0xBF)) {
-      // Overlong four-byte sequence. The first byte generates a kBadChar.
-      *cursor += 1;
-      return kBadChar;
-    } else if (str[0] == 0xF4 && (str[1] < 0x80 || str[1] > 0x8F)) {
-      // Code points outside of the Unicode range. The first byte generates a
-      // kBadChar.
-      *cursor += 1;
-      return kBadChar;
-    }
-  }
-
-  *cursor += count;
-
-  if (count != length) {
-    // Not enough continuation characters.
-    return kBadChar;
-  }
-
-  // All errors have been handled, so we only have to assemble the result.
-  switch (length) {
-    case 2:
-      return ((str[0] << 6) + str[1]) - 0x00003080;
-    case 3:
-      return ((str[0] << 12) + (str[1] << 6) + str[2]) - 0x000E2080;
-    case 4:
-      return ((str[0] << 18) + (str[1] << 12) + (str[2] << 6) + str[3]) -
-             0x03C82080;
-  }
-
-  UNREACHABLE();
+  *cursor += i;
+  return (state == State::kAccept) ? t : kBadChar;
 }
 
-/*
-Overlong sequence detection: Since Blink's TextCodecUTF8 rejects multi-byte
-characters which could be expressed with less bytes, we must too.
-
-Each continuation byte (10xxxxxx) carries 6 bits of payload. The lead bytes of
-1, 2, 3 and 4-byte characters are 0xxxxxxx, 110xxxxx, 1110xxxx and 11110xxx, and
-carry 7, 5, 4, and 3 bits of payload, respectively.
-
-Thus, a two-byte character can contain 11 bits of payload, a three-byte
-character 16, and a four-byte character 21.
-
-If we encounter a two-byte character which contains 7 bits or less, a three-byte
-character which contains 11 bits or less, or a four-byte character which
-contains 16 bits or less, we reject the character and generate a kBadChar for
-each of the bytes. This is because Blink handles overlong sequences by rejecting
-the first byte of the character (returning kBadChar); thus the rest are lonely
-continuation bytes and generate a kBadChar each.
-*/
-
-uchar Utf8::ValueOfIncremental(byte next, Utf8IncrementalBuffer* buffer) {
+// Decodes UTF-8 bytes incrementally, allowing the decoding of bytes as they
+// stream in. This **must** be followed by a call to ValueOfIncrementalFinish
+// when the stream is complete, to ensure incomplete sequences are handled.
+uchar Utf8::ValueOfIncremental(byte next, size_t* cursor, State* state,
+                               Utf8IncrementalBuffer* buffer) {
   DCHECK_NOT_NULL(buffer);
+  State old_state = *state;
+  *cursor += 1;
 
-  // The common case: 1-byte Utf8 (and no incomplete char in the buffer)
-  if (V8_LIKELY(next <= kMaxOneByteChar && *buffer == 0)) {
+  if (V8_LIKELY(next <= kMaxOneByteChar && old_state == State::kAccept)) {
+    DCHECK_EQ(0u, *buffer);
     return static_cast<uchar>(next);
   }
 
-  if (*buffer == 0) {
-    // We're at the start of a new character.
-    uint32_t kind = NonASCIISequenceLength(next);
-    CHECK_LE(kind, 4);
-    if (kind >= 2) {
-      // Start of 2..4 byte character, and no buffer.
+  // So we're at the lead byte of a 2/3/4 sequence, or we're at a continuation
+  // char in that sequence.
+  Utf8DfaDecoder::Decode(next, state, buffer);
 
-      // The mask for the lower bits depends on the kind, and is
-      // 0x1F, 0x0F, 0x07 for kinds 2, 3, 4 respectively. We can get that
-      // with one shift.
-      uint8_t mask = 0x7F >> kind;
-
-      // Store the kind in the top nibble, and kind - 1 (i.e., remaining bytes)
-      // in 2nd nibble, and the value  in the bottom three. The 2nd nibble is
-      // intended as a counter about how many bytes are still needed.
-      uint32_t character_info = kind << 28 | (kind - 1) << 24;
-      DCHECK_EQ(character_info & mask, 0);
-      *buffer = character_info | (next & mask);
-      return kIncomplete;
-    } else {
-      // No buffer, and not the start of a 1-byte char (handled at the
-      // beginning), and not the start of a 2..4 byte char (or the start of an
-      // overlong / invalid sequence)? Bad char.
+  switch (*state) {
+    case State::kAccept: {
+      uchar t = *buffer;
       *buffer = 0;
-      return kBadChar;
-    }
-  } else if (*buffer <= 0xFF) {
-    // We have one unprocessed byte left (from the last else case in this if
-    // statement).
-    uchar previous = *buffer;
-    *buffer = 0;
-    uchar t = ValueOfIncremental(previous, buffer);
-    if (t == kIncomplete) {
-      // If we have an incomplete character, process both the previous and the
-      // next byte at once.
-      return ValueOfIncremental(next, buffer);
-    } else {
-      // Otherwise, process the previous byte and save the next byte for next
-      // time.
-      DCHECK_EQ(0u, *buffer);
-      *buffer = next;
       return t;
     }
-  } else if (IsContinuationCharacter(next)) {
-    // We're inside of a character, as described by buffer.
 
-    // How many bytes (excluding this one) do we still expect?
-    uint8_t bytes_expected = *buffer >> 28;
-    uint8_t bytes_left = (*buffer >> 24) & 0x0F;
-
-    // Two-byte overlong sequence detection is handled by
-    // NonASCIISequenceLength, so we don't need to check anything here.
-    if (bytes_expected == 3 && bytes_left == 2) {
-      // Check that there are at least 12 bytes of payload.
-      uint8_t lead_payload = *buffer & (0x7F >> bytes_expected);
-      DCHECK_LE(lead_payload, 0xF);
-      if (lead_payload == 0 && next < 0xA0) {
-        // 0xA0 = 0b10100000 (payload: 100000). Overlong sequence: 0 bits from
-        // the first byte, at most 5 from the second byte, and at most 6 from
-        // the third -> in total at most 11.
-
-        *buffer = next;
-        return kBadChar;
-      } else if (lead_payload == 0xD && next > 0x9F) {
-        // The resulting code point would be on a range which is reserved for
-        // UTF-16 surrogate halves.
-        *buffer = next;
-        return kBadChar;
-      }
-    } else if (bytes_expected == 4 && bytes_left == 3) {
-      // Check that there are at least 17 bytes of payload.
-      uint8_t lead_payload = *buffer & (0x7F >> bytes_expected);
-
-      // If the lead byte was bigger than 0xF4 (payload: 4), it's not a start of
-      // any valid character, and this is detected by NonASCIISequenceLength.
-      DCHECK_LE(lead_payload, 0x4);
-      if (lead_payload == 0 && next < 0x90) {
-        // 0x90 = 10010000 (payload 10000). Overlong sequence: 0 bits from the
-        // first byte, at most 4 from the second byte, at most 12 from the third
-        // and fourth bytes -> in total at most 16.
-        *buffer = next;
-        return kBadChar;
-      } else if (lead_payload == 4 && next > 0x8F) {
-        // Invalid code point; value greater than 0b100001111000000000000
-        // (0x10FFFF).
-        *buffer = next;
-        return kBadChar;
-      }
-    }
-
-    bytes_left--;
-    // Update the value.
-    uint32_t value = ((*buffer & 0xFFFFFF) << 6) | (next & 0x3F);
-    if (bytes_left) {
-      *buffer = (bytes_expected << 28 | bytes_left << 24 | value);
-      return kIncomplete;
-    } else {
-#ifdef DEBUG
-      // Check that overlong sequences were already detected.
-      bool sequence_was_too_long = (bytes_expected == 2 && value < 0x80) ||
-                                   (bytes_expected == 3 && value < 0x800) ||
-                                   (bytes_expected == 4 && value < 0x8000);
-      DCHECK(!sequence_was_too_long);
-#endif
+    case State::kReject:
+      *state = State::kAccept;
       *buffer = 0;
-      return value;
-    }
-  } else {
-    // Within a character, but not a continuation character? Then the
-    // previous char was a bad char. But we need to save the current
-    // one.
-    *buffer = next;
-    return kBadChar;
+
+      // If we hit a bad byte, we need to determine if we were trying to start
+      // a sequence or continue one. If we were trying to start a sequence,
+      // that means it's just an invalid lead byte and we need to continue to
+      // the next (which we already did above). If we were already in a
+      // sequence, we need to reprocess this same byte after resetting to the
+      // initial state.
+      if (old_state != State::kAccept) {
+        // We were trying to continue a sequence, so let's reprocess this byte
+        // next time.
+        *cursor -= 1;
+      }
+      return kBadChar;
+
+    default:
+      return kIncomplete;
   }
 }
 
-uchar Utf8::ValueOfIncrementalFinish(Utf8IncrementalBuffer* buffer) {
-  DCHECK_NOT_NULL(buffer);
-  if (*buffer == 0) {
+// Finishes the incremental decoding, ensuring that if an unfinished sequence
+// is left that it is replaced by a replacement char.
+uchar Utf8::ValueOfIncrementalFinish(State* state) {
+  if (*state == State::kAccept) {
     return kBufferEmpty;
   } else {
-    // Process left-over chars. An incomplete char at the end maps to kBadChar.
-    uchar t = ValueOfIncremental(0, buffer);
-    return (t == kIncomplete) ? kBadChar : t;
+    DCHECK_GT(*state, State::kAccept);
+    *state = State::kAccept;
+    return kBadChar;
   }
 }
 
 bool Utf8::ValidateEncoding(const byte* bytes, size_t length) {
-  const byte* cursor = bytes;
-  const byte* end = bytes + length;
-
-  while (cursor < end) {
-    // Skip over single-byte values.
-    if (*cursor <= kMaxOneByteChar) {
-      ++cursor;
-      continue;
-    }
-
-    // Get the length the the character.
-    size_t seq_length = NonASCIISequenceLength(*cursor);
-    // For some invalid characters NonASCIISequenceLength returns 0.
-    if (seq_length == 0) return false;
-
-    const byte* char_end = cursor + seq_length;
-
-    // Return false if we do not have enough bytes for the character.
-    if (char_end > end) return false;
-
-    // Check if the bytes of the character are continuation bytes.
-    for (const byte* i = cursor + 1; i < char_end; ++i) {
-      if (!IsContinuationCharacter(*i)) return false;
-    }
-
-    // Check overly long sequences & other conditions.
-    if (seq_length == 3) {
-      if (cursor[0] == 0xE0 && (cursor[1] < 0xA0 || cursor[1] > 0xBF)) {
-        // Overlong three-byte sequence?
-        return false;
-      } else if (cursor[0] == 0xED && (cursor[1] < 0x80 || cursor[1] > 0x9F)) {
-        // High and low surrogate halves?
-        return false;
-      }
-    } else if (seq_length == 4) {
-      if (cursor[0] == 0xF0 && (cursor[1] < 0x90 || cursor[1] > 0xBF)) {
-        // Overlong four-byte sequence.
-        return false;
-      } else if (cursor[0] == 0xF4 && (cursor[1] < 0x80 || cursor[1] > 0x8F)) {
-        // Code points outside of the Unicode range.
-        return false;
-      }
-    }
-    cursor = char_end;
+  State state = State::kAccept;
+  Utf8IncrementalBuffer throw_away = 0;
+  for (size_t i = 0; i < length && state != State::kReject; i++) {
+    Utf8DfaDecoder::Decode(bytes[i], &state, &throw_away);
   }
-  return true;
+  return state == State::kAccept;
 }
 
 // Uppercase:            point.category == 'Lu'
diff --git a/src/unicode.h b/src/unicode.h
index 04d58f3650..c6ce9a8eb2 100644
--- a/src/unicode.h
+++ b/src/unicode.h
@@ -7,6 +7,7 @@
 
 #include <sys/types.h>
 #include "src/globals.h"
+#include "src/third_party/utf8-decoder/utf8-decoder.h"
 #include "src/utils.h"
 /**
  * \file
@@ -129,6 +130,8 @@ class Utf16 {
 
 class V8_EXPORT_PRIVATE Utf8 {
  public:
+  using State = Utf8DfaDecoder::State;
+
   static inline uchar Length(uchar chr, int previous);
   static inline unsigned EncodeOneByte(char* out, uint8_t c);
   static inline unsigned Encode(char* out,
@@ -158,9 +161,9 @@ class V8_EXPORT_PRIVATE Utf8 {
   static inline uchar ValueOf(const byte* str, size_t length, size_t* cursor);
 
   typedef uint32_t Utf8IncrementalBuffer;
-  static uchar ValueOfIncremental(byte next_byte,
+  static uchar ValueOfIncremental(byte next_byte, size_t* cursor, State* state,
                                   Utf8IncrementalBuffer* buffer);
-  static uchar ValueOfIncrementalFinish(Utf8IncrementalBuffer* buffer);
+  static uchar ValueOfIncrementalFinish(State* state);
 
   // Excludes non-characters from the set of valid code points.
   static inline bool IsValidCharacter(uchar c);
diff --git a/src/v8.gyp b/src/v8.gyp
index 5a2461868d..753732d19b 100644
--- a/src/v8.gyp
+++ b/src/v8.gyp
@@ -1406,6 +1406,7 @@
         'strtod.h',
         'ic/stub-cache.cc',
         'ic/stub-cache.h',
+        'third_party/utf8-decoder/utf8-decoder.h',
         'tracing/trace-event.cc',
         'tracing/trace-event.h',
         'tracing/traced-value.cc',
diff --git a/test/cctest/unicode-helpers.h b/test/cctest/unicode-helpers.h
index 891424a1cb..ca75fb65d7 100644
--- a/test/cctest/unicode-helpers.h
+++ b/test/cctest/unicode-helpers.h
@@ -19,12 +19,16 @@ static int Ucs2CharLength(unibrow::uchar c) {
 
 static int Utf8LengthHelper(const char* s) {
   unibrow::Utf8::Utf8IncrementalBuffer buffer(unibrow::Utf8::kBufferEmpty);
+  unibrow::Utf8::State state = unibrow::Utf8::State::kAccept;
+
   int length = 0;
-  for (; *s != '\0'; s++) {
-    unibrow::uchar tmp = unibrow::Utf8::ValueOfIncremental(*s, &buffer);
+  size_t i = 0;
+  while (s[i] != '\0') {
+    unibrow::uchar tmp =
+        unibrow::Utf8::ValueOfIncremental(s[i], &i, &state, &buffer);
     length += Ucs2CharLength(tmp);
   }
-  unibrow::uchar tmp = unibrow::Utf8::ValueOfIncrementalFinish(&buffer);
+  unibrow::uchar tmp = unibrow::Utf8::ValueOfIncrementalFinish(&state);
   length += Ucs2CharLength(tmp);
   return length;
 }
diff --git a/test/unittests/unicode-unittest.cc b/test/unittests/unicode-unittest.cc
index 06e47aedb1..e5ccaca7b1 100644
--- a/test/unittests/unicode-unittest.cc
+++ b/test/unittests/unicode-unittest.cc
@@ -37,13 +37,15 @@ void DecodeNormally(const std::vector<byte>& bytes,
 void DecodeIncrementally(const std::vector<byte>& bytes,
                          std::vector<unibrow::uchar>* output) {
   unibrow::Utf8::Utf8IncrementalBuffer buffer = 0;
-  for (auto b : bytes) {
-    unibrow::uchar result = unibrow::Utf8::ValueOfIncremental(b, &buffer);
+  unibrow::Utf8::State state = unibrow::Utf8::State::kAccept;
+  for (size_t i = 0; i < bytes.size();) {
+    unibrow::uchar result =
+        unibrow::Utf8::ValueOfIncremental(bytes[i], &i, &state, &buffer);
     if (result != unibrow::Utf8::kIncomplete) {
       output->push_back(result);
     }
   }
-  unibrow::uchar result = unibrow::Utf8::ValueOfIncrementalFinish(&buffer);
+  unibrow::uchar result = unibrow::Utf8::ValueOfIncrementalFinish(&state);
   if (result != unibrow::Utf8::kBufferEmpty) {
     output->push_back(result);
   }