[DevTools] Roll inspector_protocol.

Fixes edge cases for parsing / serializing codepoint 0xffff in JSON with UTF16. New Rev: 0213a8545f6362cd1cd5091cedf29747736552e8 Change-Id: I48b174cf1bd9263ace002996094f7143a1248766 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2020502 Auto-Submit: Johannes Henkel <johannes@chromium.org> Reviewed-by: Dmitry Gozman <dgozman@chromium.org> Commit-Queue: Dmitry Gozman <dgozman@chromium.org> Cr-Commit-Position: refs/heads/master@{#65985}
2020-01-24 12:22:33 -08:00 · 2020-01-24 12:22:33 -08:00 · ab1b0ed8d5
commit ab1b0ed8d5
parent cfb2d89a92
3 changed files with 71 additions and 3 deletions
--- a/third_party/inspector_protocol/README.v8
+++ b/third_party/inspector_protocol/README.v8
@ -2,7 +2,7 @@ Name: inspector protocol
 Short Name: inspector_protocol
 URL: https://chromium.googlesource.com/deps/inspector_protocol/
 Version: 0
-Revision: a84e91f6696a0b76e1a73286c9c2765154de9889
+Revision: 0213a8545f6362cd1cd5091cedf29747736552e8
 License: BSD
 License File: LICENSE
 Security Critical: no
--- a/third_party/inspector_protocol/crdtp/json.cc
+++ b/third_party/inspector_protocol/crdtp/json.cc
@ -249,7 +249,7 @@ class JSONEncoder : public ParserHandler {
        // So, now we transcode to UTF16,
        // using the math described at https://en.wikipedia.org/wiki/UTF-16,
        // for either one or two 16 bit characters.
-        if (codepoint < 0xffff) {
+        if (codepoint <= 0xffff) {
          Emit("\\u");
          PrintHex(static_cast<uint16_t>(codepoint), out_);
          continue;
@ -755,7 +755,7 @@ class JsonParser {
        // So, now we transcode to UTF16,
        // using the math described at https://en.wikipedia.org/wiki/UTF-16,
        // for either one or two 16 bit characters.
-        if (codepoint < 0xffff) {
+        if (codepoint <= 0xffff) {
          output->push_back(codepoint);
          continue;
        }
--- a/third_party/inspector_protocol/crdtp/json_test.cc
+++ b/third_party/inspector_protocol/crdtp/json_test.cc
@ -81,6 +81,43 @@ TEST(JsonEncoder, NotAContinuationByte) {
  EXPECT_EQ("\"Hello\"", out);  // "Hello" shows we restarted at 'H'.
 }

+TEST(JsonEncoder, EscapesLoneHighSurrogates) {
+  // This tests that the JSON encoder escapes lone high surrogates, i.e.
+  // invalid code points in the range from 0xD800 to 0xDBFF. In
+  // unescaped form, these cannot be represented in well-formed UTF-8 or
+  // UTF-16.
+  std::vector<uint16_t> chars = {'a', 0xd800, 'b', 0xdada, 'c', 0xdbff, 'd'};
+  std::string out;
+  Status status;
+  std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
+  writer->HandleString16(span<uint16_t>(chars.data(), chars.size()));
+  EXPECT_EQ("\"a\\ud800b\\udadac\\udbffd\"", out);
+}
+
+TEST(JsonEncoder, EscapesLoneLowSurrogates) {
+  // This tests that the JSON encoder escapes lone low surrogates, i.e.
+  // invalid code points in the range from 0xDC00 to 0xDFFF. In
+  // unescaped form, these cannot be represented in well-formed UTF-8 or
+  // UTF-16.
+  std::vector<uint16_t> chars = {'a', 0xdc00, 'b', 0xdede, 'c', 0xdfff, 'd'};
+  std::string out;
+  Status status;
+  std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
+  writer->HandleString16(span<uint16_t>(chars.data(), chars.size()));
+  EXPECT_EQ("\"a\\udc00b\\udedec\\udfffd\"", out);
+}
+
+TEST(JsonEncoder, EscapesFFFF) {
+  // This tests that the JSON encoder will escape the UTF16 input 0xffff as
+  // \uffff; useful to check this since it's an edge case.
+  std::vector<uint16_t> chars = {'a', 'b', 'c', 0xffff, 'd'};
+  std::string out;
+  Status status;
+  std::unique_ptr<ParserHandler> writer = NewJSONEncoder(&out, &status);
+  writer->HandleString16(span<uint16_t>(chars.data(), chars.size()));
+  EXPECT_EQ("\"abc\\uffffd\"", out);
+}
+
 TEST(JsonEncoder, IncompleteUtf8Sequence) {
  std::string out;
  Status status;
@ -248,6 +285,7 @@ class Log : public ParserHandler {
  }

  void HandleString16(span<uint16_t> chars) override {
+    raw_log_string16_.emplace_back(chars.begin(), chars.end());
    log_ << "string16: " << UTF16ToUTF8(chars) << "\n";
  }

@ -271,10 +309,15 @@ class Log : public ParserHandler {

  std::string str() const { return status_.ok() ? log_.str() : ""; }

+  std::vector<std::vector<uint16_t>> raw_log_string16() const {
+    return raw_log_string16_;
+  }
+
  Status status() const { return status_; }

 private:
  std::ostringstream log_;
+  std::vector<std::vector<uint16_t>> raw_log_string16_;
  Status status_;
 };

@ -395,6 +438,31 @@ TEST_F(JsonParserTest, Unicode_ParseUtf16) {
      log_.str());
 }

+TEST_F(JsonParserTest, Unicode_ParseUtf16_SingleEscapeUpToFFFF) {
+  // 0xFFFF is the max codepoint that can be represented as a single \u escape.
+  // One way to write this is \uffff, another way is to encode it as a 3 byte
+  // UTF-8 sequence (0xef 0xbf 0xbf). Both are equivalent.
+
+  // Example with both ways of encoding code point 0xFFFF in a JSON string.
+  std::string json = "{\"escape\": \"\xef\xbf\xbf or \\uffff\"}";
+  ParseJSON(SpanFrom(json), &log_);
+  EXPECT_TRUE(log_.status().ok());
+
+  // Shows both inputs result in equivalent output once converted to UTF-8.
+  EXPECT_EQ(
+      "map begin\n"
+      "string16: escape\n"
+      "string16: \xEF\xBF\xBF or \xEF\xBF\xBF\n"
+      "map end\n",
+      log_.str());
+
+  // Make an even stronger assertion: The parser represents \xffff as a single
+  // UTF-16 char.
+  ASSERT_EQ(2u, log_.raw_log_string16().size());
+  std::vector<uint16_t> expected = {0xffff, ' ', 'o', 'r', ' ', 0xffff};
+  EXPECT_EQ(expected, log_.raw_log_string16()[1]);
+}
+
 TEST_F(JsonParserTest, Unicode_ParseUtf8) {
  // Used below:
  // гласность - example for 2 byte utf8, Russian word "glasnost"