// Copyright (c) 2017 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Contains utils for reading, writing and debug printing bit streams. #ifndef LIBSPIRV_UTIL_BIT_STREAM_H_ #define LIBSPIRV_UTIL_BIT_STREAM_H_ #include #include #include #include #include #include #include namespace spvtools { namespace utils { // Returns rounded down log2(val). log2(0) is considered 0. size_t Log2U64(uint64_t val); // Terminology: // Bits - usually used for a uint64 word, first bit is the lowest. // Stream - std::string of '0' and '1', read left-to-right, // i.e. first bit is at the front and not at the end as in // std::bitset::to_string(). // Bitset - std::bitset corresponding to uint64 bits and to reverse(stream). // Converts number of bits to a respective number of chunks of size N. // For example NumBitsToNumWords<8> returns how many bytes are needed to store // |num_bits|. template inline size_t NumBitsToNumWords(size_t num_bits) { return (num_bits + (N - 1)) / N; } // Returns value of the same type as |in|, where all but the first |num_bits| // are set to zero. template inline T GetLowerBits(T in, size_t num_bits) { return sizeof(T) * 8 == num_bits ? in : in & T((T(1) << num_bits) - T(1)); } // Encodes signed integer as unsigned in zigzag order: // 0 -> 0 // -1 -> 1 // 1 -> 2 // -2 -> 3 // 2 -> 4 // Motivation: -1 is 0xFF...FF what doesn't work very well with // WriteVariableWidth which prefers to have as many 0 bits as possible. inline uint64_t EncodeZigZag(int64_t val) { return (val << 1) ^ (val >> 63); } // Decodes signed integer encoded with EncodeZigZag. inline int64_t DecodeZigZag(uint64_t val) { if (val & 1) { // Negative. // 1 -> -1 // 3 -> -2 // 5 -> -3 return -1 - (val >> 1); } else { // Non-negative. // 0 -> 0 // 2 -> 1 // 4 -> 2 return val >> 1; } } // Encodes signed integer as unsigned. This is a generalized version of // EncodeZigZag, designed to favor small positive numbers. // Values are transformed in blocks of 2^|block_exponent|. // If |block_exponent| is zero, then this degenerates into normal EncodeZigZag. // Example when |block_exponent| is 1 (return value is the index): // 0, 1, -1, -2, 2, 3, -3, -4, 4, 5, -5, -6, 6, 7, -7, -8 // Example when |block_exponent| is 2: // 0, 1, 2, 3, -1, -2, -3, -4, 4, 5, 6, 7, -5, -6, -7, -8 inline uint64_t EncodeZigZag(int64_t val, size_t block_exponent) { assert(block_exponent < 64); const uint64_t uval = static_cast(val >= 0 ? val : -val - 1); const uint64_t block_num = ((uval >> block_exponent) << 1) + (val >= 0 ? 0 : 1); const uint64_t pos = GetLowerBits(uval, block_exponent); return (block_num << block_exponent) + pos; } // Decodes signed integer encoded with EncodeZigZag. |block_exponent| must be // the same. inline int64_t DecodeZigZag(uint64_t val, size_t block_exponent) { assert(block_exponent < 64); const uint64_t block_num = val >> block_exponent; const uint64_t pos = GetLowerBits(val, block_exponent); if (block_num & 1) { // Negative. return -1LL - ((block_num >> 1) << block_exponent) - pos; } else { // Positive. return ((block_num >> 1) << block_exponent) + pos; } } // Converts |buffer| to a stream of '0' and '1'. template std::string BufferToStream(const std::vector& buffer) { std::stringstream ss; for (auto it = buffer.begin(); it != buffer.end(); ++it) { std::string str = std::bitset(*it).to_string(); // Strings generated by std::bitset::to_string are read right to left. // Reversing to left to right. std::reverse(str.begin(), str.end()); ss << str; } return ss.str(); } // Converts a left-to-right input string of '0' and '1' to a buffer of |T| // words. template std::vector StreamToBuffer(std::string str) { // The input string is left-to-right, the input argument of std::bitset needs // to right-to-left. Instead of reversing tokens, reverse the entire string // and iterate tokens from end to begin. std::reverse(str.begin(), str.end()); const int word_size = static_cast(sizeof(T) * 8); const int str_length = static_cast(str.length()); std::vector buffer; buffer.reserve(NumBitsToNumWords(str.length())); for (int index = str_length - word_size; index >= 0; index -= word_size) { buffer.push_back(static_cast( std::bitset(str, index, word_size).to_ullong())); } const size_t suffix_length = str.length() % word_size; if (suffix_length != 0) { buffer.push_back(static_cast( std::bitset(str, 0, suffix_length).to_ullong())); } return buffer; } // Adds '0' chars at the end of the string until the size is a multiple of N. template inline std::string PadToWord(std::string&& str) { const size_t tail_length = str.size() % N; if (tail_length != 0) str += std::string(N - tail_length, '0'); return std::move(str); } // Adds '0' chars at the end of the string until the size is a multiple of N. template inline std::string PadToWord(const std::string& str) { return PadToWord(std::string(str)); } // Converts a left-to-right stream of bits to std::bitset. template inline std::bitset StreamToBitset(std::string str) { std::reverse(str.begin(), str.end()); return std::bitset(str); } // Converts first |num_bits| of std::bitset to a left-to-right stream of bits. template inline std::string BitsetToStream(const std::bitset& bits, size_t num_bits = N) { std::string str = bits.to_string().substr(N - num_bits); std::reverse(str.begin(), str.end()); return str; } // Converts a left-to-right stream of bits to uint64. inline uint64_t StreamToBits(std::string str) { std::reverse(str.begin(), str.end()); return std::bitset<64>(str).to_ullong(); } // Converts first |num_bits| stored in uint64 to a left-to-right stream of bits. inline std::string BitsToStream(uint64_t bits, size_t num_bits = 64) { std::bitset<64> bitset(bits); return BitsetToStream(bitset, num_bits); } // Base class for writing sequences of bits. class BitWriterInterface { public: BitWriterInterface() {} virtual ~BitWriterInterface() {} // Writes lower |num_bits| in |bits| to the stream. // |num_bits| must be no greater than 64. virtual void WriteBits(uint64_t bits, size_t num_bits) = 0; // Writes left-to-right string of '0' and '1' to stream. // String length must be no greater than 64. // Note: "01" will be writen as 0x2, not 0x1. The string doesn't represent // numbers but a stream of bits in the order they come from encoder. virtual void WriteStream(const std::string& bits) { WriteBits(StreamToBits(bits), bits.length()); } // Writes lower |num_bits| in |bits| to the stream. // |num_bits| must be no greater than 64. template void WriteBitset(const std::bitset& bits, size_t num_bits = N) { WriteBits(bits.to_ullong(), num_bits); } // Writes bits from value of type |T| to the stream. No encoding is done. // Always writes 8 * sizeof(T) bits. template void WriteUnencoded(T val) { static_assert(sizeof(T) <= 64, "Type size too large"); uint64_t bits = 0; memcpy(&bits, &val, sizeof(T)); WriteBits(bits, sizeof(T) * 8); } // Writes |val| in chunks of size |chunk_length| followed by a signal bit: // 0 - no more chunks to follow // 1 - more chunks to follow // for example 255 is encoded into 1111 1 1111 0 for chunk length 4. // The last chunk can be truncated and signal bit omitted, if the entire // payload (for example 16 bit for uint16_t has already been written). void WriteVariableWidthU64(uint64_t val, size_t chunk_length); void WriteVariableWidthU32(uint32_t val, size_t chunk_length); void WriteVariableWidthU16(uint16_t val, size_t chunk_length); void WriteVariableWidthU8(uint8_t val, size_t chunk_length); void WriteVariableWidthS64(int64_t val, size_t chunk_length, size_t zigzag_exponent); void WriteVariableWidthS32(int32_t val, size_t chunk_length, size_t zigzag_exponent); void WriteVariableWidthS16(int16_t val, size_t chunk_length, size_t zigzag_exponent); void WriteVariableWidthS8(int8_t val, size_t chunk_length, size_t zigzag_exponent); // Writes |val| using fixed bit width. Bit width is determined by |max_val|: // max_val 0 -> bit width 1 // max_val 1 -> bit width 1 // max_val 2 -> bit width 2 // max_val 3 -> bit width 2 // max_val 4 -> bit width 3 // max_val 5 -> bit width 3 // max_val 8 -> bit width 4 // max_val n -> bit width 1 + floor(log2(n)) // |val| needs to be <= |max_val|. void WriteFixedWidth(uint64_t val, uint64_t max_val); // Returns number of bits written. virtual size_t GetNumBits() const = 0; // Provides direct access to the buffer data if implemented. virtual const uint8_t* GetData() const { return nullptr; } // Returns buffer size in bytes. size_t GetDataSizeBytes() const { return NumBitsToNumWords<8>(GetNumBits()); } // Generates and returns byte array containing written bits. virtual std::vector GetDataCopy() const = 0; BitWriterInterface(const BitWriterInterface&) = delete; BitWriterInterface& operator=(const BitWriterInterface&) = delete; }; // This class is an implementation of BitWriterInterface, using // std::vector to store written bits. class BitWriterWord64 : public BitWriterInterface { public: explicit BitWriterWord64(size_t reserve_bits = 64); void WriteBits(uint64_t bits, size_t num_bits) override; size_t GetNumBits() const override { return end_; } const uint8_t* GetData() const override { return reinterpret_cast(buffer_.data()); } std::vector GetDataCopy() const override { return std::vector(GetData(), GetData() + GetDataSizeBytes()); } // Returns written stream as std::string, padded with zeroes so that the // length is a multiple of 64. std::string GetStreamPadded64() const { return BufferToStream(buffer_); } // Sets callback to emit bit sequences after every write. void SetCallback(std::function callback) { callback_ = callback; } protected: // Sends string generated from arguments to callback_ if defined. void EmitSequence(uint64_t bits, size_t num_bits) const { if (callback_) callback_(BitsToStream(bits, num_bits)); } private: std::vector buffer_; // Total number of bits written so far. Named 'end' as analogy to std::end(). size_t end_; // If not null, the writer will use the callback to emit the written bit // sequence as a string of '0' and '1'. std::function callback_; }; // Base class for reading sequences of bits. class BitReaderInterface { public: BitReaderInterface() {} virtual ~BitReaderInterface() {} // Reads |num_bits| from the stream, stores them in |bits|. // Returns number of read bits. |num_bits| must be no greater than 64. virtual size_t ReadBits(uint64_t* bits, size_t num_bits) = 0; // Reads |num_bits| from the stream, stores them in |bits|. // Returns number of read bits. |num_bits| must be no greater than 64. template size_t ReadBitset(std::bitset* bits, size_t num_bits = N) { uint64_t val = 0; size_t num_read = ReadBits(&val, num_bits); if (num_read) { *bits = std::bitset(val); } return num_read; } // Reads |num_bits| from the stream, returns string in left-to-right order. // The length of the returned string may be less than |num_bits| if end was // reached. std::string ReadStream(size_t num_bits) { uint64_t bits = 0; size_t num_read = ReadBits(&bits, num_bits); return BitsToStream(bits, num_read); } // Reads 8 * sizeof(T) bits and stores them in |val|. template bool ReadUnencoded(T* val) { static_assert(sizeof(T) <= 64, "Type size too large"); uint64_t bits = 0; const size_t num_read = ReadBits(&bits, sizeof(T) * 8); if (num_read != sizeof(T) * 8) return false; memcpy(val, &bits, sizeof(T)); return true; } // Returns number of bits already read. virtual size_t GetNumReadBits() const = 0; // These two functions define 'hard' and 'soft' EOF. // // Returns true if the end of the buffer was reached. virtual bool ReachedEnd() const = 0; // Returns true if we reached the end of the buffer or are nearing it and only // zero bits are left to read. Implementations of this function are allowed to // commit a "false negative" error if the end of the buffer was not reached, // i.e. it can return false even if indeed only zeroes are left. // It is assumed that the consumer expects that // the buffer stream ends with padding zeroes, and would accept this as a // 'soft' EOF. Implementations of this class do not necessarily need to // implement this, default behavior can simply delegate to ReachedEnd(). virtual bool OnlyZeroesLeft() const { return ReachedEnd(); } // Reads value encoded with WriteVariableWidthXXX (see BitWriterInterface). // Reader and writer must use the same |chunk_length| and variable type. // Returns true on success, false if the bit stream ends prematurely. bool ReadVariableWidthU64(uint64_t* val, size_t chunk_length); bool ReadVariableWidthU32(uint32_t* val, size_t chunk_length); bool ReadVariableWidthU16(uint16_t* val, size_t chunk_length); bool ReadVariableWidthU8(uint8_t* val, size_t chunk_length); bool ReadVariableWidthS64(int64_t* val, size_t chunk_length, size_t zigzag_exponent); bool ReadVariableWidthS32(int32_t* val, size_t chunk_length, size_t zigzag_exponent); bool ReadVariableWidthS16(int16_t* val, size_t chunk_length, size_t zigzag_exponent); bool ReadVariableWidthS8(int8_t* val, size_t chunk_length, size_t zigzag_exponent); // Reads value written by WriteFixedWidth (|max_val| needs to be the same). // Returns true on success, false if the bit stream ends prematurely. bool ReadFixedWidth(uint64_t* val, uint64_t max_val); BitReaderInterface(const BitReaderInterface&) = delete; BitReaderInterface& operator=(const BitReaderInterface&) = delete; }; // This class is an implementation of BitReaderInterface which accepts both // uint8_t and uint64_t buffers as input. uint64_t buffers are consumed and // owned. uint8_t buffers are copied. class BitReaderWord64 : public BitReaderInterface { public: // Consumes and owns the buffer. explicit BitReaderWord64(std::vector&& buffer); // Copies the buffer and casts it to uint64. // Consuming the original buffer and casting it to uint64 is difficult, // as it would potentially cause data misalignment and poor performance. explicit BitReaderWord64(const std::vector& buffer); BitReaderWord64(const void* buffer, size_t num_bytes); size_t ReadBits(uint64_t* bits, size_t num_bits) override; size_t GetNumReadBits() const override { return pos_; } bool ReachedEnd() const override; bool OnlyZeroesLeft() const override; BitReaderWord64() = delete; // Sets callback to emit bit sequences after every read. void SetCallback(std::function callback) { callback_ = callback; } protected: // Sends string generated from arguments to callback_ if defined. void EmitSequence(uint64_t bits, size_t num_bits) const { if (callback_) callback_(BitsToStream(bits, num_bits)); } private: const std::vector buffer_; size_t pos_; // If not null, the reader will use the callback to emit the read bit // sequence as a string of '0' and '1'. std::function callback_; }; } // namespace utils } // namespace spvtools #endif // LIBSPIRV_UTIL_BIT_STREAM_H_