From f4c78cc7dd11c83aa3f3a3516e75f4fe689aff19 Mon Sep 17 00:00:00 2001 From: Garret Rieger Date: Fri, 30 Oct 2020 10:29:51 -0700 Subject: [PATCH] [subset] Implement Kahn's algo for topological sorting instead of BFS. --- src/hb-repacker.hh | 88 ++++++++++++++++++++++++++++++-------------- src/test-repacker.cc | 69 +++++++++++++++++++++++++++++++--- 2 files changed, 124 insertions(+), 33 deletions(-) diff --git a/src/hb-repacker.hh b/src/hb-repacker.hh index 2be1433f6..9d8fa2db7 100644 --- a/src/hb-repacker.hh +++ b/src/hb-repacker.hh @@ -35,10 +35,13 @@ struct graph_t { + // TODO(garretrieger): add an error tracking system similar to what serialize_context_t + // does. + /* * A topological sorting of an object graph. Ordered * in reverse serialization order (first object in the - * serialization is at the end of the graph). This matches + * serialization is at the end of the list). This matches * the 'packed' object stack used internally in the * serializer */ @@ -92,14 +95,11 @@ struct graph_t } /* - * Generates a new topological sorting of graph using BFS. + * Generates a new topological sorting of graph using Kahn's + * algorithm: https://en.wikipedia.org/wiki/Topological_sorting#Algorithms */ - void sort_bfs () + void sort_kahn () { - // BFS doesn't always produce a topological sort so this is just - // for testing re-ordering capabilities for now. - // Will need to use a more advanced topological sorting algorithm - if (objects_.length <= 1) { // Graph of 1 or less doesn't need sorting. return; @@ -108,25 +108,28 @@ struct graph_t hb_vector_t queue; hb_vector_t sorted_graph; hb_map_t id_map; + hb_map_t edge_count; + incoming_edge_count (&edge_count); // Object graphs are in reverse order, the first object is at the end - // of the vector. + // of the vector. Since the graph is topologically sorted it's safe to + // assume the first object has no incoming edges. queue.push (objects_.length - 1); int new_id = objects_.length - 1; - hb_set_t visited; while (queue.length) { unsigned next_id = queue[0]; queue.remove(0); - visited.add(next_id); hb_serialize_context_t::object_t& next = objects_[next_id]; sorted_graph.push (next); id_map.set (next_id, new_id--); for (const auto& link : next.links) { - if (!visited.has (link.objidx)) + // TODO(garretrieger): sort children from smallest to largest + edge_count.set (link.objidx, edge_count.get (link.objidx) - 1); + if (!edge_count.get (link.objidx)) queue.push (link.objidx); } } @@ -138,19 +141,7 @@ struct graph_t assert (false); } - // Apply objidx remapping. - // TODO(garretrieger): extract this to a helper. - for (unsigned i = 0; i < sorted_graph.length; i++) - { - for (unsigned j = 0; j < sorted_graph[i].links.length; j++) - { - auto& link = sorted_graph[i].links[j]; - if (!id_map.has (link.objidx)) - // TODO(garretrieger): handle this. - assert (false); - link.objidx = id_map.get (link.objidx); - } - } + remap_obj_indices (id_map, &sorted_graph); sorted_graph.as_array ().reverse (); objects_ = sorted_graph; @@ -163,13 +154,53 @@ struct graph_t bool will_overflow() { // TODO(garretrieger): implement me. - // Check for offsets that exceed their width or are negative if - // using a non-signed link. + // - Check for offsets that exceed their width or; + // - are negative if using a non-signed link. return false; } private: + /* + * Updates all objidx's in all links using the provided mapping. + */ + void remap_obj_indices (const hb_map_t& id_map, + hb_vector_t* sorted_graph) + { + for (unsigned i = 0; i < sorted_graph->length; i++) + { + for (unsigned j = 0; j < (*sorted_graph)[i].links.length; j++) + { + auto& link = (*sorted_graph)[i].links[j]; + if (!id_map.has (link.objidx)) + // TODO(garretrieger): handle this. + assert (false); + link.objidx = id_map.get (link.objidx); + } + } + } + + /* + * Creates a map from objid to # of incoming edges. + */ + void incoming_edge_count (hb_map_t* out) + { + for (unsigned i = 0; i < objects_.length; i++) + { + if (!out->has (i)) + out->set (i, 0); + + for (const auto& l : objects_[i].links) + { + unsigned id = l.objidx; + if (out->has (id)) + out->set (id, out->get (id) + 1); + else + out->set (id, 1); + } + } + } + template void serialize_link_of_type (const hb_serialize_context_t::object_t::link_t& link, char* head, @@ -220,9 +251,10 @@ inline void hb_resolve_overflows (const hb_vector_t& packed, hb_serialize_context_t* c) { graph_t sorted_graph (packed); - sorted_graph.sort_bfs (); + sorted_graph.sort_kahn (); if (sorted_graph.will_overflow ()) { - // TODO(garretrieger): additional offset resolution strategies + // TODO(garretrieger): try additional offset resolution strategies + // - Dijkstra sort of weighted graph. // - Promotion to extension lookups. // - Table duplication. // - Table splitting. diff --git a/src/test-repacker.cc b/src/test-repacker.cc index c181eb02e..4b287b62a 100644 --- a/src/test-repacker.cc +++ b/src/test-repacker.cc @@ -71,7 +71,7 @@ populate_serializer_simple (hb_serialize_context_t* c) } static void -populate_serializer_complex (hb_serialize_context_t* c) +populate_serializer_complex_1 (hb_serialize_context_t* c) { c->start_serialize (); @@ -90,15 +90,41 @@ populate_serializer_complex (hb_serialize_context_t* c) c->end_serialize(); } -static void test_sort_bfs () +static void +populate_serializer_complex_2 (hb_serialize_context_t* c) +{ + c->start_serialize (); + + unsigned obj_5 = add_object ("mno", 3, c); + + unsigned obj_4 = add_object ("jkl", 3, c); + + start_object ("ghi", 3, c); + add_offset (obj_4, c); + unsigned obj_3 = c->pop_pack (false); + + start_object ("def", 3, c); + add_offset (obj_3, c); + unsigned obj_2 = c->pop_pack (false); + + start_object ("abc", 3, c); + add_offset (obj_2, c); + add_offset (obj_4, c); + add_offset (obj_5, c); + c->pop_pack (); + + c->end_serialize(); +} + +static void test_sort_kahn_1 () { size_t buffer_size = 100; void* buffer = malloc (buffer_size); hb_serialize_context_t c (buffer, buffer_size); - populate_serializer_complex (&c); + populate_serializer_complex_1 (&c); graph_t graph (c.object_graph ()); - graph.sort_bfs (); + graph.sort_kahn (); assert(strncmp (graph.objects_[3].head, "abc", 3) == 0); assert(graph.objects_[3].links.length == 2); @@ -116,6 +142,38 @@ static void test_sort_bfs () assert(graph.objects_[0].links.length == 0); } +static void test_sort_kahn_2 () +{ + size_t buffer_size = 100; + void* buffer = malloc (buffer_size); + hb_serialize_context_t c (buffer, buffer_size); + populate_serializer_complex_2 (&c); + + graph_t graph (c.object_graph ()); + graph.sort_kahn (); + + + assert(strncmp (graph.objects_[4].head, "abc", 3) == 0); + assert(graph.objects_[4].links.length == 3); + assert(graph.objects_[4].links[0].objidx == 3); + assert(graph.objects_[4].links[1].objidx == 0); + assert(graph.objects_[4].links[2].objidx == 2); + + assert(strncmp (graph.objects_[3].head, "def", 3) == 0); + assert(graph.objects_[3].links.length == 1); + assert(graph.objects_[3].links[0].objidx == 1); + + assert(strncmp (graph.objects_[2].head, "mno", 3) == 0); + assert(graph.objects_[2].links.length == 0); + + assert(strncmp (graph.objects_[1].head, "ghi", 3) == 0); + assert(graph.objects_[1].links.length == 1); + assert(graph.objects_[1].links[0].objidx == 0); + + assert(strncmp (graph.objects_[0].head, "jkl", 3) == 0); + assert(graph.objects_[0].links.length == 0); +} + static void test_serialize () { @@ -142,5 +200,6 @@ int main (int argc, char **argv) { test_serialize (); - test_sort_bfs (); + test_sort_kahn_1 (); + test_sort_kahn_2 (); }