// Copyright 2019 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_ #define V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_ #include #include #include #include #include #include #include #include namespace v8_inspector_protocol_encoding { // This library is designed to be portable. The only allowed dependency // are the C/C++ standard libraries, up to C++11. We support both 32 bit // and 64 architectures. // // Types used below: // uint8_t: a byte, e.g. for raw bytes or UTF8 characters // uint16_t: two bytes, e.g. for UTF16 characters // For input parameters: // span: pointer to bytes and length // span: pointer to UTF16 chars and length // For output parameters: // std::vector - Owned segment of bytes / utf8 characters and length. // std::string - Same, for compatibility, even though char is signed. // ============================================================================= // span - sequence of bytes // ============================================================================= // This template is similar to std::span, which will be included in C++20. template class span { public: using index_type = size_t; span() : data_(nullptr), size_(0) {} span(const T* data, index_type size) : data_(data), size_(size) {} const T* data() const { return data_; } const T* begin() const { return data_; } const T* end() const { return data_ + size_; } const T& operator[](index_type idx) const { return data_[idx]; } span subspan(index_type offset, index_type count) const { return span(data_ + offset, count); } span subspan(index_type offset) const { return span(data_ + offset, size_ - offset); } bool empty() const { return size_ == 0; } index_type size() const { return size_; } index_type size_bytes() const { return size_ * sizeof(T); } private: const T* data_; index_type size_; }; template span SpanFrom(const std::vector& v) { return span(v.data(), v.size()); } template span SpanFrom(const char (&str)[N]) { return span(reinterpret_cast(str), N - 1); } inline span SpanFrom(const char* str) { return str ? span(reinterpret_cast(str), strlen(str)) : span(); } inline span SpanFrom(const std::string& v) { return span(reinterpret_cast(v.data()), v.size()); } // Less than / equality comparison functions for sorting / searching for byte // spans. These are similar to absl::string_view's < and == operators. inline bool SpanLessThan(span x, span y) noexcept { auto min_size = std::min(x.size(), y.size()); const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); return (r < 0) || (r == 0 && x.size() < y.size()); } inline bool SpanEquals(span x, span y) noexcept { auto len = x.size(); if (len != y.size()) return false; return x.data() == y.data() || len == 0 || std::memcmp(x.data(), y.data(), len) == 0; } // ============================================================================= // Status and Error codes // ============================================================================= enum class Error { OK = 0, // JSON parsing errors - json_parser.{h,cc}. JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01, JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02, JSON_PARSER_NO_INPUT = 0x03, JSON_PARSER_INVALID_TOKEN = 0x04, JSON_PARSER_INVALID_NUMBER = 0x05, JSON_PARSER_INVALID_STRING = 0x06, JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07, JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08, JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09, JSON_PARSER_COLON_EXPECTED = 0x0a, JSON_PARSER_UNEXPECTED_MAP_END = 0x0b, JSON_PARSER_COMMA_OR_MAP_END_EXPECTED = 0x0c, JSON_PARSER_VALUE_EXPECTED = 0x0d, CBOR_INVALID_INT32 = 0x0e, CBOR_INVALID_DOUBLE = 0x0f, CBOR_INVALID_ENVELOPE = 0x10, CBOR_ENVELOPE_CONTENTS_LENGTH_MISMATCH = 0x11, CBOR_MAP_OR_ARRAY_EXPECTED_IN_ENVELOPE = 0x12, CBOR_INVALID_STRING8 = 0x13, CBOR_INVALID_STRING16 = 0x14, CBOR_INVALID_BINARY = 0x15, CBOR_UNSUPPORTED_VALUE = 0x16, CBOR_NO_INPUT = 0x17, CBOR_INVALID_START_BYTE = 0x18, CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x19, CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x1a, CBOR_UNEXPECTED_EOF_IN_MAP = 0x1b, CBOR_INVALID_MAP_KEY = 0x1c, CBOR_STACK_LIMIT_EXCEEDED = 0x1d, CBOR_TRAILING_JUNK = 0x1e, CBOR_MAP_START_EXPECTED = 0x1f, CBOR_MAP_STOP_EXPECTED = 0x20, CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED = 0x21, }; // A status value with position that can be copied. The default status // is OK. Usually, error status values should come with a valid position. struct Status { static constexpr size_t npos() { return std::numeric_limits::max(); } bool ok() const { return error == Error::OK; } Error error = Error::OK; size_t pos = npos(); Status(Error error, size_t pos) : error(error), pos(pos) {} Status() = default; // Returns a 7 bit US-ASCII string, either "OK" or an error message // that includes the position. std::string ToASCIIString() const; private: std::string ToASCIIString(const char* msg) const; }; // Handler interface for parser events emitted by a streaming parser. // See cbor::NewCBOREncoder, cbor::ParseCBOR, json::NewJSONEncoder, // json::ParseJSON. class StreamingParserHandler { public: virtual ~StreamingParserHandler() = default; virtual void HandleMapBegin() = 0; virtual void HandleMapEnd() = 0; virtual void HandleArrayBegin() = 0; virtual void HandleArrayEnd() = 0; virtual void HandleString8(span chars) = 0; virtual void HandleString16(span chars) = 0; virtual void HandleBinary(span bytes) = 0; virtual void HandleDouble(double value) = 0; virtual void HandleInt32(int32_t value) = 0; virtual void HandleBool(bool value) = 0; virtual void HandleNull() = 0; // The parser may send one error even after other events have already // been received. Client code is reponsible to then discard the // already processed events. // |error| must be an eror, as in, |error.is_ok()| can't be true. virtual void HandleError(Status error) = 0; }; namespace cbor { // The binary encoding for the inspector protocol follows the CBOR specification // (RFC 7049). Additional constraints: // - Only indefinite length maps and arrays are supported. // - Maps and arrays are wrapped with an envelope, that is, a // CBOR tag with value 24 followed by a byte string specifying // the byte length of the enclosed map / array. The byte string // must use a 32 bit wide length. // - At the top level, a message must be an indefinite length map // wrapped by an envelope. // - Maximal size for messages is 2^32 (4 GB). // - For scalars, we support only the int32_t range, encoded as // UNSIGNED/NEGATIVE (major types 0 / 1). // - UTF16 strings, including with unbalanced surrogate pairs, are encoded // as CBOR BYTE_STRING (major type 2). For such strings, the number of // bytes encoded must be even. // - UTF8 strings (major type 3) are supported. // - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never // as UTF16 strings. // - Arbitrary byte arrays, in the inspector protocol called 'binary', // are encoded as BYTE_STRING (major type 2), prefixed with a byte // indicating base64 when rendered as JSON. // ============================================================================= // Detecting CBOR content // ============================================================================= // The first byte for an envelope, which we use for wrapping dictionaries // and arrays; and the byte that indicates a byte string with 32 bit length. // These two bytes start an envelope, and thereby also any CBOR message // produced or consumed by this protocol. See also |EnvelopeEncoder| below. uint8_t InitialByteForEnvelope(); uint8_t InitialByteFor32BitLengthByteString(); // Checks whether |msg| is a cbor message. bool IsCBORMessage(span msg); // ============================================================================= // Encoding individual CBOR items // ============================================================================= // Some constants for CBOR tokens that only take a single byte on the wire. uint8_t EncodeTrue(); uint8_t EncodeFalse(); uint8_t EncodeNull(); uint8_t EncodeIndefiniteLengthArrayStart(); uint8_t EncodeIndefiniteLengthMapStart(); uint8_t EncodeStop(); // Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE| // (major type 1) iff < 0. void EncodeInt32(int32_t value, std::vector* out); void EncodeInt32(int32_t value, std::string* out); // Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16 // character in |in| is emitted with most significant byte first, // appending to |out|. void EncodeString16(span in, std::vector* out); void EncodeString16(span in, std::string* out); // Encodes a UTF8 string |in| as STRING (major type 3). void EncodeString8(span in, std::vector* out); void EncodeString8(span in, std::string* out); // Encodes the given |latin1| string as STRING8. // If any non-ASCII character is present, it will be represented // as a 2 byte UTF8 sequence. void EncodeFromLatin1(span latin1, std::vector* out); void EncodeFromLatin1(span latin1, std::string* out); // Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII. // Otherwise, encodes as STRING16. void EncodeFromUTF16(span utf16, std::vector* out); void EncodeFromUTF16(span utf16, std::string* out); // Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with // definitive length, prefixed with tag 22 indicating expected conversion to // base64 (see RFC 7049, Table 3 and Section 2.4.4.2). void EncodeBinary(span in, std::vector* out); void EncodeBinary(span in, std::string* out); // Encodes / decodes a double as Major type 7 (SIMPLE_VALUE), // with additional info = 27, followed by 8 bytes in big endian. void EncodeDouble(double value, std::vector* out); void EncodeDouble(double value, std::string* out); // ============================================================================= // cbor::EnvelopeEncoder - for wrapping submessages // ============================================================================= // An envelope indicates the byte length of a wrapped item. // We use this for maps and array, which allows the decoder // to skip such (nested) values whole sale. // It's implemented as a CBOR tag (major type 6) with additional // info = 24, followed by a byte string with a 32 bit length value; // so the maximal structure that we can wrap is 2^32 bits long. // See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1 class EnvelopeEncoder { public: // Emits the envelope start bytes and records the position for the // byte size in |byte_size_pos_|. Also emits empty bytes for the // byte sisze so that encoding can continue. void EncodeStart(std::vector* out); void EncodeStart(std::string* out); // This records the current size in |out| at position byte_size_pos_. // Returns true iff successful. bool EncodeStop(std::vector* out); bool EncodeStop(std::string* out); private: size_t byte_size_pos_ = 0; }; // ============================================================================= // cbor::NewCBOREncoder - for encoding from a streaming parser // ============================================================================= // This can be used to convert to CBOR, by passing the return value to a parser // that drives it. The handler will encode into |out|, and iff an error occurs // it will set |status| to an error and clear |out|. Otherwise, |status.ok()| // will be |true|. std::unique_ptr NewCBOREncoder( std::vector* out, Status* status); std::unique_ptr NewCBOREncoder(std::string* out, Status* status); // ============================================================================= // cbor::CBORTokenizer - for parsing individual CBOR items // ============================================================================= // Tags for the tokens within a CBOR message that CBORTokenizer understands. // Note that this is not the same terminology as the CBOR spec (RFC 7049), // but rather, our adaptation. For instance, we lump unsigned and signed // major type into INT32 here (and disallow values outside the int32_t range). enum class CBORTokenTag { // Encountered an error in the structure of the message. Consult // status() for details. ERROR_VALUE, // Booleans and NULL. TRUE_VALUE, FALSE_VALUE, NULL_VALUE, // An int32_t (signed 32 bit integer). INT32, // A double (64 bit floating point). DOUBLE, // A UTF8 string. STRING8, // A UTF16 string. STRING16, // A binary string. BINARY, // Starts an indefinite length map; after the map start we expect // alternating keys and values, followed by STOP. MAP_START, // Starts an indefinite length array; after the array start we // expect values, followed by STOP. ARRAY_START, // Ends a map or an array. STOP, // An envelope indicator, wrapping a map or array. // Internally this carries the byte length of the wrapped // map or array. While CBORTokenizer::Next() will read / skip the entire // envelope, CBORTokenizer::EnterEnvelope() reads the tokens // inside of it. ENVELOPE, // We've reached the end there is nothing else to read. DONE, }; // The major types from RFC 7049 Section 2.1. enum class MajorType { UNSIGNED = 0, NEGATIVE = 1, BYTE_STRING = 2, STRING = 3, ARRAY = 4, MAP = 5, TAG = 6, SIMPLE_VALUE = 7 }; // CBORTokenizer segments a CBOR message, presenting the tokens therein as // numbers, strings, etc. This is not a complete CBOR parser, but makes it much // easier to implement one (e.g. ParseCBOR, above). It can also be used to parse // messages partially. class CBORTokenizer { public: explicit CBORTokenizer(span bytes); ~CBORTokenizer(); // Identifies the current token that we're looking at, // or ERROR_VALUE (in which ase ::Status() has details) // or DONE (if we're past the last token). CBORTokenTag TokenTag() const; // Advances to the next token. void Next(); // Can only be called if TokenTag() == CBORTokenTag::ENVELOPE. // While Next() would skip past the entire envelope / what it's // wrapping, EnterEnvelope positions the cursor inside of the envelope, // letting the client explore the nested structure. void EnterEnvelope(); // If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes // the error more precisely; otherwise it'll be set to Error::OK. // In either case, Status().pos is the current position. struct Status Status() const; // The following methods retrieve the token values. They can only // be called if TokenTag() matches. // To be called only if ::TokenTag() == CBORTokenTag::INT32. int32_t GetInt32() const; // To be called only if ::TokenTag() == CBORTokenTag::DOUBLE. double GetDouble() const; // To be called only if ::TokenTag() == CBORTokenTag::STRING8. span GetString8() const; // Wire representation for STRING16 is low byte first (little endian). // To be called only if ::TokenTag() == CBORTokenTag::STRING16. span GetString16WireRep() const; // To be called only if ::TokenTag() == CBORTokenTag::BINARY. span GetBinary() const; // To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE. span GetEnvelopeContents() const; private: void ReadNextToken(bool enter_envelope); void SetToken(CBORTokenTag token, size_t token_byte_length); void SetError(Error error); span bytes_; CBORTokenTag token_tag_; struct Status status_; size_t token_byte_length_; MajorType token_start_type_; uint64_t token_start_internal_value_; }; // ============================================================================= // cbor::ParseCBOR - for receiving streaming parser events for CBOR messages // ============================================================================= // Parses a CBOR encoded message from |bytes|, sending events to // |out|. If an error occurs, sends |out->HandleError|, and parsing stops. // The client is responsible for discarding the already received information in // that case. void ParseCBOR(span bytes, StreamingParserHandler* out); // ============================================================================= // cbor::AppendString8EntryToMap - for limited in-place editing of messages // ============================================================================= // Modifies the |cbor| message by appending a new key/value entry at the end // of the map. Patches up the envelope size; Status.ok() iff successful. // If not successful, |cbor| may be corrupted after this call. Status AppendString8EntryToCBORMap(span string8_key, span string8_value, std::vector* cbor); Status AppendString8EntryToCBORMap(span string8_key, span string8_value, std::string* cbor); namespace internals { // Exposed only for writing tests. size_t ReadTokenStart(span bytes, cbor::MajorType* type, uint64_t* value); void WriteTokenStart(cbor::MajorType type, uint64_t value, std::vector* encoded); void WriteTokenStart(cbor::MajorType type, uint64_t value, std::string* encoded); } // namespace internals } // namespace cbor namespace json { // Client code must provide an instance. Implementation should delegate // to whatever is appropriate. class Platform { public: virtual ~Platform() = default; // Parses |str| into |result|. Returns false iff there are // leftover characters or parsing errors. virtual bool StrToD(const char* str, double* result) const = 0; // Prints |value| in a format suitable for JSON. virtual std::unique_ptr DToStr(double value) const = 0; }; // ============================================================================= // json::NewJSONEncoder - for encoding streaming parser events as JSON // ============================================================================= // Returns a handler object which will write ascii characters to |out|. // |status->ok()| will be false iff the handler routine HandleError() is called. // In that case, we'll stop emitting output. // Except for calling the HandleError routine at any time, the client // code must call the Handle* methods in an order in which they'd occur // in valid JSON; otherwise we may crash (the code uses assert). std::unique_ptr NewJSONEncoder( const Platform* platform, std::vector* out, Status* status); std::unique_ptr NewJSONEncoder(const Platform* platform, std::string* out, Status* status); // ============================================================================= // json::ParseJSON - for receiving streaming parser events for JSON // ============================================================================= void ParseJSON(const Platform& platform, span chars, StreamingParserHandler* handler); void ParseJSON(const Platform& platform, span chars, StreamingParserHandler* handler); // ============================================================================= // json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding // ============================================================================= Status ConvertCBORToJSON(const Platform& platform, span cbor, std::string* json); Status ConvertCBORToJSON(const Platform& platform, span cbor, std::vector* json); Status ConvertJSONToCBOR(const Platform& platform, span json, std::vector* cbor); Status ConvertJSONToCBOR(const Platform& platform, span json, std::vector* cbor); Status ConvertJSONToCBOR(const Platform& platform, span json, std::string* cbor); Status ConvertJSONToCBOR(const Platform& platform, span json, std::string* cbor); } // namespace json } // namespace v8_inspector_protocol_encoding #endif // V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_