[subset] Implement Kahn's algo for topological sorting instead of BFS.

This commit is contained in:
Garret Rieger 2020-10-30 10:29:51 -07:00
parent 00f393dc3f
commit f4c78cc7dd
2 changed files with 124 additions and 33 deletions

View File

@ -35,10 +35,13 @@
struct graph_t
{
// TODO(garretrieger): add an error tracking system similar to what serialize_context_t
// does.
/*
* A topological sorting of an object graph. Ordered
* in reverse serialization order (first object in the
* serialization is at the end of the graph). This matches
* serialization is at the end of the list). This matches
* the 'packed' object stack used internally in the
* serializer
*/
@ -92,14 +95,11 @@ struct graph_t
}
/*
* Generates a new topological sorting of graph using BFS.
* Generates a new topological sorting of graph using Kahn's
* algorithm: https://en.wikipedia.org/wiki/Topological_sorting#Algorithms
*/
void sort_bfs ()
void sort_kahn ()
{
// BFS doesn't always produce a topological sort so this is just
// for testing re-ordering capabilities for now.
// Will need to use a more advanced topological sorting algorithm
if (objects_.length <= 1) {
// Graph of 1 or less doesn't need sorting.
return;
@ -108,25 +108,28 @@ struct graph_t
hb_vector_t<unsigned> queue;
hb_vector_t<hb_serialize_context_t::object_t> sorted_graph;
hb_map_t id_map;
hb_map_t edge_count;
incoming_edge_count (&edge_count);
// Object graphs are in reverse order, the first object is at the end
// of the vector.
// of the vector. Since the graph is topologically sorted it's safe to
// assume the first object has no incoming edges.
queue.push (objects_.length - 1);
int new_id = objects_.length - 1;
hb_set_t visited;
while (queue.length)
{
unsigned next_id = queue[0];
queue.remove(0);
visited.add(next_id);
hb_serialize_context_t::object_t& next = objects_[next_id];
sorted_graph.push (next);
id_map.set (next_id, new_id--);
for (const auto& link : next.links) {
if (!visited.has (link.objidx))
// TODO(garretrieger): sort children from smallest to largest
edge_count.set (link.objidx, edge_count.get (link.objidx) - 1);
if (!edge_count.get (link.objidx))
queue.push (link.objidx);
}
}
@ -138,19 +141,7 @@ struct graph_t
assert (false);
}
// Apply objidx remapping.
// TODO(garretrieger): extract this to a helper.
for (unsigned i = 0; i < sorted_graph.length; i++)
{
for (unsigned j = 0; j < sorted_graph[i].links.length; j++)
{
auto& link = sorted_graph[i].links[j];
if (!id_map.has (link.objidx))
// TODO(garretrieger): handle this.
assert (false);
link.objidx = id_map.get (link.objidx);
}
}
remap_obj_indices (id_map, &sorted_graph);
sorted_graph.as_array ().reverse ();
objects_ = sorted_graph;
@ -163,13 +154,53 @@ struct graph_t
bool will_overflow()
{
// TODO(garretrieger): implement me.
// Check for offsets that exceed their width or are negative if
// using a non-signed link.
// - Check for offsets that exceed their width or;
// - are negative if using a non-signed link.
return false;
}
private:
/*
* Updates all objidx's in all links using the provided mapping.
*/
void remap_obj_indices (const hb_map_t& id_map,
hb_vector_t<hb_serialize_context_t::object_t>* sorted_graph)
{
for (unsigned i = 0; i < sorted_graph->length; i++)
{
for (unsigned j = 0; j < (*sorted_graph)[i].links.length; j++)
{
auto& link = (*sorted_graph)[i].links[j];
if (!id_map.has (link.objidx))
// TODO(garretrieger): handle this.
assert (false);
link.objidx = id_map.get (link.objidx);
}
}
}
/*
* Creates a map from objid to # of incoming edges.
*/
void incoming_edge_count (hb_map_t* out)
{
for (unsigned i = 0; i < objects_.length; i++)
{
if (!out->has (i))
out->set (i, 0);
for (const auto& l : objects_[i].links)
{
unsigned id = l.objidx;
if (out->has (id))
out->set (id, out->get (id) + 1);
else
out->set (id, 1);
}
}
}
template <typename O> void
serialize_link_of_type (const hb_serialize_context_t::object_t::link_t& link,
char* head,
@ -220,9 +251,10 @@ inline void
hb_resolve_overflows (const hb_vector_t<hb_serialize_context_t::object_t *>& packed,
hb_serialize_context_t* c) {
graph_t sorted_graph (packed);
sorted_graph.sort_bfs ();
sorted_graph.sort_kahn ();
if (sorted_graph.will_overflow ()) {
// TODO(garretrieger): additional offset resolution strategies
// TODO(garretrieger): try additional offset resolution strategies
// - Dijkstra sort of weighted graph.
// - Promotion to extension lookups.
// - Table duplication.
// - Table splitting.

View File

@ -71,7 +71,7 @@ populate_serializer_simple (hb_serialize_context_t* c)
}
static void
populate_serializer_complex (hb_serialize_context_t* c)
populate_serializer_complex_1 (hb_serialize_context_t* c)
{
c->start_serialize<char> ();
@ -90,15 +90,41 @@ populate_serializer_complex (hb_serialize_context_t* c)
c->end_serialize();
}
static void test_sort_bfs ()
static void
populate_serializer_complex_2 (hb_serialize_context_t* c)
{
c->start_serialize<char> ();
unsigned obj_5 = add_object ("mno", 3, c);
unsigned obj_4 = add_object ("jkl", 3, c);
start_object ("ghi", 3, c);
add_offset (obj_4, c);
unsigned obj_3 = c->pop_pack (false);
start_object ("def", 3, c);
add_offset (obj_3, c);
unsigned obj_2 = c->pop_pack (false);
start_object ("abc", 3, c);
add_offset (obj_2, c);
add_offset (obj_4, c);
add_offset (obj_5, c);
c->pop_pack ();
c->end_serialize();
}
static void test_sort_kahn_1 ()
{
size_t buffer_size = 100;
void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_complex (&c);
populate_serializer_complex_1 (&c);
graph_t graph (c.object_graph ());
graph.sort_bfs ();
graph.sort_kahn ();
assert(strncmp (graph.objects_[3].head, "abc", 3) == 0);
assert(graph.objects_[3].links.length == 2);
@ -116,6 +142,38 @@ static void test_sort_bfs ()
assert(graph.objects_[0].links.length == 0);
}
static void test_sort_kahn_2 ()
{
size_t buffer_size = 100;
void* buffer = malloc (buffer_size);
hb_serialize_context_t c (buffer, buffer_size);
populate_serializer_complex_2 (&c);
graph_t graph (c.object_graph ());
graph.sort_kahn ();
assert(strncmp (graph.objects_[4].head, "abc", 3) == 0);
assert(graph.objects_[4].links.length == 3);
assert(graph.objects_[4].links[0].objidx == 3);
assert(graph.objects_[4].links[1].objidx == 0);
assert(graph.objects_[4].links[2].objidx == 2);
assert(strncmp (graph.objects_[3].head, "def", 3) == 0);
assert(graph.objects_[3].links.length == 1);
assert(graph.objects_[3].links[0].objidx == 1);
assert(strncmp (graph.objects_[2].head, "mno", 3) == 0);
assert(graph.objects_[2].links.length == 0);
assert(strncmp (graph.objects_[1].head, "ghi", 3) == 0);
assert(graph.objects_[1].links.length == 1);
assert(graph.objects_[1].links[0].objidx == 0);
assert(strncmp (graph.objects_[0].head, "jkl", 3) == 0);
assert(graph.objects_[0].links.length == 0);
}
static void
test_serialize ()
{
@ -142,5 +200,6 @@ int
main (int argc, char **argv)
{
test_serialize ();
test_sort_bfs ();
test_sort_kahn_1 ();
test_sort_kahn_2 ();
}