mirror of
https://github.com/KhronosGroup/SPIRV-Tools
synced 2025-01-16 19:14:14 +00:00
b36acbec0e
Includes: - Multi-sequence move-to-front - Coding by id descriptor - Statistical coding of non-id words - Joint coding of opcode and num_operands Removed explicit form Huffman codec constructor - The standard use case for it is to be constructed from initializer list. Using serialization for Huffman codecs
411 lines
12 KiB
C++
411 lines
12 KiB
C++
// Copyright (c) 2017 Google Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// Contains utils for reading, writing and debug printing bit streams.
|
|
|
|
#ifndef LIBSPIRV_UTIL_HUFFMAN_CODEC_H_
|
|
#define LIBSPIRV_UTIL_HUFFMAN_CODEC_H_
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <functional>
|
|
#include <queue>
|
|
#include <iomanip>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <ostream>
|
|
#include <sstream>
|
|
#include <stack>
|
|
#include <tuple>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
namespace spvutils {
|
|
|
|
// Used to generate and apply a Huffman coding scheme.
|
|
// |Val| is the type of variable being encoded (for example a string or a
|
|
// literal).
|
|
template <class Val>
|
|
class HuffmanCodec {
|
|
public:
|
|
// Huffman tree node.
|
|
struct Node {
|
|
Node() {}
|
|
|
|
// Creates Node from serialization leaving weight and id undefined.
|
|
Node(const Val& in_value, uint32_t in_left, uint32_t in_right)
|
|
: value(in_value), left(in_left), right(in_right) {}
|
|
|
|
Val value = Val();
|
|
uint32_t weight = 0;
|
|
// Ids are issued sequentially starting from 1. Ids are used as an ordering
|
|
// tie-breaker, to make sure that the ordering (and resulting coding scheme)
|
|
// are consistent accross multiple platforms.
|
|
uint32_t id = 0;
|
|
// Handles of children.
|
|
uint32_t left = 0;
|
|
uint32_t right = 0;
|
|
};
|
|
|
|
// Creates Huffman codec from a histogramm.
|
|
// Histogramm counts must not be zero.
|
|
explicit HuffmanCodec(const std::map<Val, uint32_t>& hist) {
|
|
if (hist.empty()) return;
|
|
|
|
// Heuristic estimate.
|
|
nodes_.reserve(3 * hist.size());
|
|
|
|
// Create NIL.
|
|
CreateNode();
|
|
|
|
// The queue is sorted in ascending order by weight (or by node id if
|
|
// weights are equal).
|
|
std::vector<uint32_t> queue_vector;
|
|
queue_vector.reserve(hist.size());
|
|
std::priority_queue<uint32_t, std::vector<uint32_t>,
|
|
std::function<bool(uint32_t, uint32_t)>>
|
|
queue(std::bind(&HuffmanCodec::LeftIsBigger, this,
|
|
std::placeholders::_1, std::placeholders::_2),
|
|
std::move(queue_vector));
|
|
|
|
// Put all leaves in the queue.
|
|
for (const auto& pair : hist) {
|
|
const uint32_t node = CreateNode();
|
|
MutableValueOf(node) = pair.first;
|
|
MutableWeightOf(node) = pair.second;
|
|
assert(WeightOf(node));
|
|
queue.push(node);
|
|
}
|
|
|
|
// Form the tree by combining two subtrees with the least weight,
|
|
// and pushing the root of the new tree in the queue.
|
|
while (true) {
|
|
// We push a node at the end of each iteration, so the queue is never
|
|
// supposed to be empty at this point, unless there are no leaves, but
|
|
// that case was already handled.
|
|
assert(!queue.empty());
|
|
const uint32_t right = queue.top();
|
|
queue.pop();
|
|
|
|
// If the queue is empty at this point, then the last node is
|
|
// the root of the complete Huffman tree.
|
|
if (queue.empty()) {
|
|
root_ = right;
|
|
break;
|
|
}
|
|
|
|
const uint32_t left = queue.top();
|
|
queue.pop();
|
|
|
|
// Combine left and right into a new tree and push it into the queue.
|
|
const uint32_t parent = CreateNode();
|
|
MutableWeightOf(parent) = WeightOf(right) + WeightOf(left);
|
|
MutableLeftOf(parent) = left;
|
|
MutableRightOf(parent) = right;
|
|
queue.push(parent);
|
|
}
|
|
|
|
// Traverse the tree and form encoding table.
|
|
CreateEncodingTable();
|
|
}
|
|
|
|
// Creates Huffman codec from saved tree structure.
|
|
// |nodes| is the list of nodes of the tree, nodes[0] being NIL.
|
|
// |root_handle| is the index of the root node.
|
|
HuffmanCodec(uint32_t root_handle, std::vector<Node>&& nodes) {
|
|
nodes_ = std::move(nodes);
|
|
assert(!nodes_.empty());
|
|
assert(root_handle > 0 && root_handle < nodes_.size());
|
|
assert(!LeftOf(0) && !RightOf(0));
|
|
|
|
root_ = root_handle;
|
|
|
|
// Traverse the tree and form encoding table.
|
|
CreateEncodingTable();
|
|
}
|
|
|
|
// Serializes the codec in the following text format:
|
|
// (<root_handle>, {
|
|
// {0, 0, 0},
|
|
// {val1, left1, right1},
|
|
// {val2, left2, right2},
|
|
// ...
|
|
// })
|
|
std::string SerializeToText(int indent_num_whitespaces) const {
|
|
const bool value_is_text = std::is_same<Val, std::string>::value;
|
|
|
|
const std::string indent1 = std::string(indent_num_whitespaces, ' ');
|
|
const std::string indent2 = std::string(indent_num_whitespaces + 2, ' ');
|
|
|
|
std::stringstream code;
|
|
code << "(" << root_ << ", {\n";
|
|
|
|
for (const Node& node : nodes_) {
|
|
code << indent2 << "{";
|
|
if (value_is_text)
|
|
code << "\"";
|
|
code << node.value;
|
|
if (value_is_text)
|
|
code << "\"";
|
|
code << ", " << node.left << ", " << node.right << "},\n";
|
|
}
|
|
|
|
code << indent1 << "})";
|
|
|
|
return code.str();
|
|
}
|
|
|
|
// Prints the Huffman tree in the following format:
|
|
// w------w------'x'
|
|
// w------'y'
|
|
// Where w stands for the weight of the node.
|
|
// Right tree branches appear above left branches. Taking the right path
|
|
// adds 1 to the code, taking the left adds 0.
|
|
void PrintTree(std::ostream& out) const {
|
|
PrintTreeInternal(out, root_, 0);
|
|
}
|
|
|
|
// Traverses the tree and prints the Huffman table: value, code
|
|
// and optionally node weight for every leaf.
|
|
void PrintTable(std::ostream& out, bool print_weights = true) {
|
|
std::queue<std::pair<uint32_t, std::string>> queue;
|
|
queue.emplace(root_, "");
|
|
|
|
while (!queue.empty()) {
|
|
const uint32_t node = queue.front().first;
|
|
const std::string code = queue.front().second;
|
|
queue.pop();
|
|
if (!RightOf(node) && !LeftOf(node)) {
|
|
out << ValueOf(node);
|
|
if (print_weights)
|
|
out << " " << WeightOf(node);
|
|
out << " " << code << std::endl;
|
|
} else {
|
|
if (LeftOf(node))
|
|
queue.emplace(LeftOf(node), code + "0");
|
|
|
|
if (RightOf(node))
|
|
queue.emplace(RightOf(node), code + "1");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Returns the Huffman table. The table was built at at construction time,
|
|
// this function just returns a const reference.
|
|
const std::unordered_map<Val, std::pair<uint64_t, size_t>>&
|
|
GetEncodingTable() const {
|
|
return encoding_table_;
|
|
}
|
|
|
|
// Encodes |val| and stores its Huffman code in the lower |num_bits| of
|
|
// |bits|. Returns false of |val| is not in the Huffman table.
|
|
bool Encode(const Val& val, uint64_t* bits, size_t* num_bits) const {
|
|
auto it = encoding_table_.find(val);
|
|
if (it == encoding_table_.end())
|
|
return false;
|
|
*bits = it->second.first;
|
|
*num_bits = it->second.second;
|
|
return true;
|
|
}
|
|
|
|
// Reads bits one-by-one using callback |read_bit| until a match is found.
|
|
// Matching value is stored in |val|. Returns false if |read_bit| terminates
|
|
// before a code was mathced.
|
|
// |read_bit| has type bool func(bool* bit). When called, the next bit is
|
|
// stored in |bit|. |read_bit| returns false if the stream terminates
|
|
// prematurely.
|
|
bool DecodeFromStream(
|
|
const std::function<bool(bool*)>& read_bit, Val* val) const {
|
|
uint32_t node = root_;
|
|
while (true) {
|
|
assert(node);
|
|
|
|
if (!RightOf(node) && !LeftOf(node)) {
|
|
*val = ValueOf(node);
|
|
return true;
|
|
}
|
|
|
|
bool go_right;
|
|
if (!read_bit(&go_right))
|
|
return false;
|
|
|
|
if (go_right)
|
|
node = RightOf(node);
|
|
else
|
|
node = LeftOf(node);
|
|
}
|
|
|
|
assert (0);
|
|
return false;
|
|
}
|
|
|
|
private:
|
|
// Returns value of the node referenced by |handle|.
|
|
Val ValueOf(uint32_t node) const {
|
|
return nodes_.at(node).value;
|
|
}
|
|
|
|
// Returns left child of |node|.
|
|
uint32_t LeftOf(uint32_t node) const {
|
|
return nodes_.at(node).left;
|
|
}
|
|
|
|
// Returns right child of |node|.
|
|
uint32_t RightOf(uint32_t node) const {
|
|
return nodes_.at(node).right;
|
|
}
|
|
|
|
// Returns weight of |node|.
|
|
uint32_t WeightOf(uint32_t node) const {
|
|
return nodes_.at(node).weight;
|
|
}
|
|
|
|
// Returns id of |node|.
|
|
uint32_t IdOf(uint32_t node) const {
|
|
return nodes_.at(node).id;
|
|
}
|
|
|
|
// Returns mutable reference to value of |node|.
|
|
Val& MutableValueOf(uint32_t node) {
|
|
assert(node);
|
|
return nodes_.at(node).value;
|
|
}
|
|
|
|
// Returns mutable reference to handle of left child of |node|.
|
|
uint32_t& MutableLeftOf(uint32_t node) {
|
|
assert(node);
|
|
return nodes_.at(node).left;
|
|
}
|
|
|
|
// Returns mutable reference to handle of right child of |node|.
|
|
uint32_t& MutableRightOf(uint32_t node) {
|
|
assert(node);
|
|
return nodes_.at(node).right;
|
|
}
|
|
|
|
// Returns mutable reference to weight of |node|.
|
|
uint32_t& MutableWeightOf(uint32_t node) {
|
|
return nodes_.at(node).weight;
|
|
}
|
|
|
|
// Returns mutable reference to id of |node|.
|
|
uint32_t& MutableIdOf(uint32_t node) {
|
|
return nodes_.at(node).id;
|
|
}
|
|
|
|
// Returns true if |left| has bigger weight than |right|. Node ids are
|
|
// used as tie-breaker.
|
|
bool LeftIsBigger(uint32_t left, uint32_t right) const {
|
|
if (WeightOf(left) == WeightOf(right)) {
|
|
assert (IdOf(left) != IdOf(right));
|
|
return IdOf(left) > IdOf(right);
|
|
}
|
|
return WeightOf(left) > WeightOf(right);
|
|
}
|
|
|
|
// Prints subtree (helper function used by PrintTree).
|
|
void PrintTreeInternal(std::ostream& out, uint32_t node, size_t depth) const {
|
|
if (!node)
|
|
return;
|
|
|
|
const size_t kTextFieldWidth = 7;
|
|
|
|
if (!RightOf(node) && !LeftOf(node)) {
|
|
out << ValueOf(node) << std::endl;
|
|
} else {
|
|
if (RightOf(node)) {
|
|
std::stringstream label;
|
|
label << std::setfill('-') << std::left << std::setw(kTextFieldWidth)
|
|
<< WeightOf(RightOf(node));
|
|
out << label.str();
|
|
PrintTreeInternal(out, RightOf(node), depth + 1);
|
|
}
|
|
|
|
if (LeftOf(node)) {
|
|
out << std::string(depth * kTextFieldWidth, ' ');
|
|
std::stringstream label;
|
|
label << std::setfill('-') << std::left << std::setw(kTextFieldWidth)
|
|
<< WeightOf(LeftOf(node));
|
|
out << label.str();
|
|
PrintTreeInternal(out, LeftOf(node), depth + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Traverses the Huffman tree and saves paths to the leaves as bit
|
|
// sequences to encoding_table_.
|
|
void CreateEncodingTable() {
|
|
struct Context {
|
|
Context(uint32_t in_node, uint64_t in_bits, size_t in_depth)
|
|
: node(in_node), bits(in_bits), depth(in_depth) {}
|
|
uint32_t node;
|
|
// Huffman tree depth cannot exceed 64 as histogramm counts are expected
|
|
// to be positive and limited by numeric_limits<uint32_t>::max().
|
|
// For practical applications tree depth would be much smaller than 64.
|
|
uint64_t bits;
|
|
size_t depth;
|
|
};
|
|
|
|
std::queue<Context> queue;
|
|
queue.emplace(root_, 0, 0);
|
|
|
|
while (!queue.empty()) {
|
|
const Context& context = queue.front();
|
|
const uint32_t node = context.node;
|
|
const uint64_t bits = context.bits;
|
|
const size_t depth = context.depth;
|
|
queue.pop();
|
|
|
|
if (!RightOf(node) && !LeftOf(node)) {
|
|
auto insertion_result = encoding_table_.emplace(
|
|
ValueOf(node), std::pair<uint64_t, size_t>(bits, depth));
|
|
assert(insertion_result.second);
|
|
(void)insertion_result;
|
|
} else {
|
|
if (LeftOf(node))
|
|
queue.emplace(LeftOf(node), bits, depth + 1);
|
|
|
|
if (RightOf(node))
|
|
queue.emplace(RightOf(node), bits | (1ULL << depth), depth + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Creates new Huffman tree node and stores it in the deleter array.
|
|
uint32_t CreateNode() {
|
|
const uint32_t handle = static_cast<uint32_t>(nodes_.size());
|
|
nodes_.emplace_back(Node());
|
|
nodes_.back().id = next_node_id_++;
|
|
return handle;
|
|
}
|
|
|
|
// Huffman tree root handle.
|
|
uint32_t root_ = 0;
|
|
|
|
// Huffman tree deleter.
|
|
std::vector<Node> nodes_;
|
|
|
|
// Encoding table value -> {bits, num_bits}.
|
|
// Huffman codes are expected to never exceed 64 bit length (this is in fact
|
|
// impossible if frequencies are stored as uint32_t).
|
|
std::unordered_map<Val, std::pair<uint64_t, size_t>> encoding_table_;
|
|
|
|
// Next node id issued by CreateNode();
|
|
uint32_t next_node_id_ = 1;
|
|
};
|
|
|
|
} // namespace spvutils
|
|
|
|
#endif // LIBSPIRV_UTIL_HUFFMAN_CODEC_H_
|