Tagged PDFs should output an IDTree.

The PDF spec requires that tagged PDFs have a unique
alphanumeric ID for each structure element node, and an
IDTree structure that maps IDs to an indirect reference
to the PDF node containing that element.

This patch adds support for the IDTree, and also fixes
the IDs we output for each node to be strings (not names)
and makes them zero-padded so that we can easily output
the IDs in sorted lexicographic order by making that match
numeric order.

PDFs output by Skia now pass CommonLook's
Integrity Validator; these ID issues were flagged
as errors previously.

Bug: chromium:607777
Bug: skia:8148
Change-Id: I9b367fe90aa018c21f34abd186e83c53cdde34cc
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/291689
Reviewed-by: Ben Wagner <bungeman@google.com>
Commit-Queue: Dominic Mazzoni <dmazzoni@chromium.org>
This commit is contained in:
Dominic Mazzoni 2020-05-29 12:11:53 -07:00 committed by Skia Commit-Bot
parent b437d7b5e4
commit 1dbc3b5339
2 changed files with 67 additions and 9 deletions

View File

@ -131,6 +131,17 @@ void SkPDF::AttributeList::appendStringArray(
}
struct SkPDFTagNode {
// Structure element nodes need a unique alphanumeric ID,
// and we need to be able to output them sorted in lexicographic
// order. This helper function takes one of our node IDs and
// builds an ID string that zero-pads the digits so that lexicographic
// order matches numeric order.
static SkString nodeIdToString(int nodeId) {
SkString idString;
idString.printf("node%08d", nodeId);
return idString;
}
SkPDFTagNode* fChildren = nullptr;
size_t fChildCount = 0;
struct MarkedContentInfo {
@ -250,10 +261,9 @@ static bool can_discard(SkPDFTagNode* node) {
return true;
}
SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
SkPDFTagNode* node,
SkPDFDocument* doc) {
SkPDFIndirectReference SkPDFTagTree::PrepareTagTreeToEmit(SkPDFIndirectReference parent,
SkPDFTagNode* node,
SkPDFDocument* doc) {
SkPDFIndirectReference ref = doc->reserveRef();
std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
SkPDFTagNode* children = node->fChildren;
@ -261,7 +271,7 @@ SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
for (size_t i = 0; i < childCount; ++i) {
SkPDFTagNode* child = &children[i];
if (!(can_discard(child))) {
kids->appendRef(prepare_tag_tree_to_emit(ref, child, doc));
kids->appendRef(PrepareTagTreeToEmit(ref, child, doc));
}
}
for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
@ -290,13 +300,18 @@ SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
}
dict.insertRef("P", parent);
dict.insertObject("K", std::move(kids));
SkString idString;
idString.printf("%d", node->fNodeId);
dict.insertName("ID", idString.c_str());
if (node->fAttributes) {
dict.insertObject("A", std::move(node->fAttributes));
}
// Each node has a unique ID that also needs to be referenced
// in a separate IDTree node, along with the lowest and highest
// unique ID string.
SkString idString = SkPDFTagNode::nodeIdToString(node->fNodeId);
dict.insertString("ID", idString.c_str());
IDTreeEntry idTreeEntry = {node->fNodeId, ref};
fIdTreeEntries.push_back(idTreeEntry);
return doc->emit(dict, ref);
}
@ -327,7 +342,7 @@ SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
// Build the StructTreeRoot.
SkPDFDict structTreeRoot("StructTreeRoot");
structTreeRoot.insertRef("K", prepare_tag_tree_to_emit(ref, fRoot, doc));
structTreeRoot.insertRef("K", PrepareTagTreeToEmit(ref, fRoot, doc));
structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));
// Build the parent tree, which is a mapping from the marked
@ -348,5 +363,37 @@ SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
}
parentTree.insertObject("Nums", std::move(parentTreeNums));
structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
// Build the IDTree, a mapping from every unique ID string to
// a reference to its corresponding structure element node.
if (!fIdTreeEntries.empty()) {
std::sort(fIdTreeEntries.begin(), fIdTreeEntries.end(),
[](const IDTreeEntry& a, const IDTreeEntry& b) {
return a.nodeId < b.nodeId;
});
SkPDFDict idTree;
SkPDFDict idTreeLeaf;
auto limits = SkPDFMakeArray();
SkString lowestNodeIdString = SkPDFTagNode::nodeIdToString(
fIdTreeEntries.begin()->nodeId);
limits->appendString(lowestNodeIdString);
SkString highestNodeIdString = SkPDFTagNode::nodeIdToString(
fIdTreeEntries.rbegin()->nodeId);
limits->appendString(highestNodeIdString);
idTreeLeaf.insertObject("Limits", std::move(limits));
auto names = SkPDFMakeArray();
for (const IDTreeEntry& entry : fIdTreeEntries) {
SkString idString = SkPDFTagNode::nodeIdToString(entry.nodeId);
names->appendString(idString);
names->appendRef(entry.ref);
}
idTreeLeaf.insertObject("Names", std::move(names));
auto idTreeKids = SkPDFMakeArray();
idTreeKids->appendRef(doc->emit(idTreeLeaf));
idTree.insertObject("Kids", std::move(idTreeKids));
structTreeRoot.insertRef("IDTree", doc->emit(idTree));
}
return doc->emit(structTreeRoot, ref);
}

View File

@ -28,15 +28,26 @@ public:
SkPDFIndirectReference makeStructTreeRoot(SkPDFDocument* doc);
private:
// An entry in a map from a node ID to an indirect reference to its
// corresponding structure element node.
struct IDTreeEntry {
int nodeId;
SkPDFIndirectReference ref;
};
static void Copy(SkPDF::StructureElementNode& node,
SkPDFTagNode* dst,
SkArenaAlloc* arena,
SkTHashMap<int, SkPDFTagNode*>* nodeMap);
SkPDFIndirectReference PrepareTagTreeToEmit(SkPDFIndirectReference parent,
SkPDFTagNode* node,
SkPDFDocument* doc);
SkArenaAlloc fArena;
SkTHashMap<int, SkPDFTagNode*> fNodeMap;
SkPDFTagNode* fRoot = nullptr;
SkTArray<SkTArray<SkPDFTagNode*>> fMarksPerPage;
std::vector<IDTreeEntry> fIdTreeEntries;
SkPDFTagTree(const SkPDFTagTree&) = delete;
SkPDFTagTree& operator=(const SkPDFTagTree&) = delete;