Re-land: Support adding attributes to PDF document structure nodes.

Originally landed: https://skia-review.googlesource.com/c/skia/+/268878
Reverted: https://skia-review.googlesource.com/c/skia/+/271858

The issue was with compilation when PDF support is disabled. See
the diff between patchsets 1 and 2.

This is an important part of writing a tagged PDF. Many of the nodes
in the document structure tree need additional attributes, just like
in HTML.

This change aims to add support for a few useful attributes, not to
be comprehensive.

Bug: chromium:1039816
Change-Id: I15f8b6c41d4fdaa4b6e21775ab6d26ec57eb0f5d
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/271916
Commit-Queue: Dominic Mazzoni <dmazzoni@chromium.org>
Reviewed-by: Mike Reed <reed@google.com>
This commit is contained in:
Dominic Mazzoni 2020-02-19 10:59:29 -08:00 committed by Skia Commit-Bot
parent d2d4c5e8c5
commit 7dfb46e7f3
7 changed files with 269 additions and 14 deletions

View File

@ -23,6 +23,9 @@ Milestone 82
related calls is treated as a request to do no color correction at decode related calls is treated as a request to do no color correction at decode
time. time.
* Add new APIs to add attributes to document structure node when
creating a tagged PDF.
* Remove CGFontRef parameter from SkCreateTypefaceFromCTFont. * Remove CGFontRef parameter from SkCreateTypefaceFromCTFont.
Use CTFontManagerCreateFontDescriptorFromData instead of Use CTFontManagerCreateFontDescriptorFromData instead of
CGFontCreateWithDataProvider to create CTFonts to avoid memory use issues. CGFontCreateWithDataProvider to create CTFonts to avoid memory use issues.

View File

@ -169,6 +169,7 @@ tests_sources = [
"$_tests/PDFMetadataAttributeTest.cpp", "$_tests/PDFMetadataAttributeTest.cpp",
"$_tests/PDFOpaqueSrcModeToSrcOverTest.cpp", "$_tests/PDFOpaqueSrcModeToSrcOverTest.cpp",
"$_tests/PDFPrimitivesTest.cpp", "$_tests/PDFPrimitivesTest.cpp",
"$_tests/PDFTaggedTableTest.cpp",
"$_tests/PDFTaggedTest.cpp", "$_tests/PDFTaggedTest.cpp",
"$_tests/PackBitsTest.cpp", "$_tests/PackBitsTest.cpp",
"$_tests/PackedConfigsTextureTest.cpp", "$_tests/PackedConfigsTextureTest.cpp",

View File

@ -5,15 +5,21 @@
#include "include/core/SkDocument.h" #include "include/core/SkDocument.h"
#include <vector>
#include "include/core/SkColor.h"
#include "include/core/SkMilestone.h" #include "include/core/SkMilestone.h"
#include "include/core/SkScalar.h" #include "include/core/SkScalar.h"
#include "include/core/SkString.h" #include "include/core/SkString.h"
#include "include/core/SkTime.h" #include "include/core/SkTime.h"
#include "include/private/SkNoncopyable.h"
#define SKPDF_STRING(X) SKPDF_STRING_IMPL(X) #define SKPDF_STRING(X) SKPDF_STRING_IMPL(X)
#define SKPDF_STRING_IMPL(X) #X #define SKPDF_STRING_IMPL(X) #X
class SkExecutor; class SkExecutor;
class SkPDFArray;
class SkPDFTagTree;
namespace SkPDF { namespace SkPDF {
@ -71,16 +77,42 @@ enum class DocumentStructureType {
kForm, //!< Form control (not like an HTML FORM element) kForm, //!< Form control (not like an HTML FORM element)
}; };
/** Attributes for nodes in the PDF tree. */
class SK_API AttributeList : SkNoncopyable {
public:
AttributeList();
~AttributeList();
// Each attribute must have an owner (e.g. "Layout", "List", "Table", etc)
// and an attribute name (e.g. "BBox", "RowSpan", etc.) from PDF32000_2008 14.8.5,
// and then a value of the proper type according to the spec.
void appendInt(const char* owner, const char* name, int value);
void appendFloat(const char* owner, const char* name, float value);
void appendString(const char* owner, const char* name, const char* value);
void appendFloatArray(const char* owner,
const char* name,
const std::vector<float>& value);
void appendStringArray(const char* owner,
const char* name,
const std::vector<SkString>& value);
private:
friend class ::SkPDFTagTree;
std::unique_ptr<SkPDFArray> fAttrs;
};
/** A node in a PDF structure tree, giving a semantic representation /** A node in a PDF structure tree, giving a semantic representation
of the content. Each node ID is associated with content of the content. Each node ID is associated with content
by passing the SkCanvas and node ID to SkPDF::SetNodeId() when drawing. by passing the SkCanvas and node ID to SkPDF::SetNodeId() when drawing.
NodeIDs should be unique within each tree. NodeIDs should be unique within each tree.
*/ */
struct StructureElementNode { struct StructureElementNode {
const StructureElementNode* fChildren = nullptr; StructureElementNode* fChildren = nullptr;
size_t fChildCount; size_t fChildCount = 0;
int fNodeId; int fNodeId = 0;
DocumentStructureType fType; DocumentStructureType fType = DocumentStructureType::kNonStruct;
AttributeList fAttributes;
}; };
/** Optional metadata to be passed into the PDF factory function. /** Optional metadata to be passed into the PDF factory function.
@ -149,7 +181,7 @@ struct Metadata {
a semantic representation of the content. The caller a semantic representation of the content. The caller
should retain ownership. should retain ownership.
*/ */
const StructureElementNode* fStructureElementTreeRoot = nullptr; StructureElementNode* fStructureElementTreeRoot = nullptr;
/** Executor to handle threaded work within PDF Backend. If this is nullptr, /** Executor to handle threaded work within PDF Backend. If this is nullptr,
then all work will be done serially on the main thread. To have worker then all work will be done serially on the main thread. To have worker

View File

@ -8,8 +8,14 @@
#include "include/core/SkCanvas.h" #include "include/core/SkCanvas.h"
#include "include/docs/SkPDFDocument.h" #include "include/docs/SkPDFDocument.h"
class SkPDFArray {};
sk_sp<SkDocument> SkPDF::MakeDocument(SkWStream*, const SkPDF::Metadata&) { return nullptr; } sk_sp<SkDocument> SkPDF::MakeDocument(SkWStream*, const SkPDF::Metadata&) { return nullptr; }
void SkPDF::SetNodeId(SkCanvas* c, int n) { void SkPDF::SetNodeId(SkCanvas* c, int n) {
c->drawAnnotation({0, 0, 0, 0}, "PDF_Node_Key", SkData::MakeWithCopy(&n, sizeof(n)).get()); c->drawAnnotation({0, 0, 0, 0}, "PDF_Node_Key", SkData::MakeWithCopy(&n, sizeof(n)).get());
} }
SkPDF::AttributeList::AttributeList() = default;
SkPDF::AttributeList::~AttributeList() = default;

View File

@ -66,6 +66,70 @@ static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
SK_ABORT("bad tag"); SK_ABORT("bad tag");
} }
SkPDF::AttributeList::AttributeList() = default;
SkPDF::AttributeList::~AttributeList() = default;
void SkPDF::AttributeList::appendInt(
const char* owner, const char* name, int value) {
if (!fAttrs)
fAttrs = SkPDFMakeArray();
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
attrDict->insertName("O", owner);
attrDict->insertInt(name, value);
fAttrs->appendObject(std::move(attrDict));
}
void SkPDF::AttributeList::appendFloat(
const char* owner, const char* name, float value) {
if (!fAttrs)
fAttrs = SkPDFMakeArray();
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
attrDict->insertName("O", owner);
attrDict->insertScalar(name, value);
fAttrs->appendObject(std::move(attrDict));
}
void SkPDF::AttributeList::appendString(
const char* owner, const char* name, const char* value) {
if (!fAttrs)
fAttrs = SkPDFMakeArray();
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
attrDict->insertName("O", owner);
attrDict->insertName(name, value);
fAttrs->appendObject(std::move(attrDict));
}
void SkPDF::AttributeList::appendFloatArray(
const char* owner, const char* name, const std::vector<float>& value) {
if (!fAttrs)
fAttrs = SkPDFMakeArray();
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
attrDict->insertName("O", owner);
std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
for (float element : value) {
pdfArray->appendScalar(element);
}
attrDict->insertObject(name, std::move(pdfArray));
fAttrs->appendObject(std::move(attrDict));
}
void SkPDF::AttributeList::appendStringArray(
const char* owner,
const char* name,
const std::vector<SkString>& value) {
if (!fAttrs)
fAttrs = SkPDFMakeArray();
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
attrDict->insertName("O", owner);
std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
for (SkString element : value) {
pdfArray->appendName(element);
}
attrDict->insertObject(name, std::move(pdfArray));
fAttrs->appendObject(std::move(attrDict));
}
struct SkPDFTagNode { struct SkPDFTagNode {
SkPDFTagNode* fChildren = nullptr; SkPDFTagNode* fChildren = nullptr;
size_t fChildCount = 0; size_t fChildCount = 0;
@ -82,16 +146,18 @@ struct SkPDFTagNode {
kYes, kYes,
kNo, kNo,
} fCanDiscard = kUnknown; } fCanDiscard = kUnknown;
std::unique_ptr<SkPDFArray> fAttributes;
}; };
SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {} SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
SkPDFTagTree::~SkPDFTagTree() = default; SkPDFTagTree::~SkPDFTagTree() = default;
static void copy(const SkPDF::StructureElementNode& node, // static
SkPDFTagNode* dst, void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node,
SkArenaAlloc* arena, SkPDFTagNode* dst,
SkTHashMap<int, SkPDFTagNode*>* nodeMap) { SkArenaAlloc* arena,
SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
nodeMap->set(node.fNodeId, dst); nodeMap->set(node.fNodeId, dst);
size_t childCount = node.fChildCount; size_t childCount = node.fChildCount;
SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount); SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
@ -100,14 +166,15 @@ static void copy(const SkPDF::StructureElementNode& node,
dst->fType = node.fType; dst->fType = node.fType;
dst->fChildren = children; dst->fChildren = children;
for (size_t i = 0; i < childCount; ++i) { for (size_t i = 0; i < childCount; ++i) {
copy(node.fChildren[i], &children[i], arena, nodeMap); Copy(node.fChildren[i], &children[i], arena, nodeMap);
} }
dst->fAttributes = std::move(node.fAttributes.fAttrs);
} }
void SkPDFTagTree::init(const SkPDF::StructureElementNode* node) { void SkPDFTagTree::init(SkPDF::StructureElementNode* node) {
if (node) { if (node) {
fRoot = fArena.make<SkPDFTagNode>(); fRoot = fArena.make<SkPDFTagNode>();
copy(*node, fRoot, &fArena, &fNodeMap); Copy(*node, fRoot, &fArena, &fNodeMap);
} }
} }
@ -184,6 +251,13 @@ SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
dict.insertName("S", tag_name_from_type(node->fType)); dict.insertName("S", tag_name_from_type(node->fType));
dict.insertRef("P", parent); dict.insertRef("P", parent);
dict.insertObject("K", std::move(kids)); dict.insertObject("K", std::move(kids));
SkString idString;
idString.printf("%d", node->fNodeId);
dict.insertName("ID", idString.c_str());
if (node->fAttributes) {
dict.insertObject("A", std::move(node->fAttributes));
}
return doc->emit(dict, ref); return doc->emit(dict, ref);
} }
@ -223,4 +297,3 @@ SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
structTreeRoot.insertRef("ParentTree", doc->emit(parentTree)); structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
return doc->emit(structTreeRoot, ref); return doc->emit(structTreeRoot, ref);
} }

View File

@ -21,12 +21,17 @@ class SkPDFTagTree {
public: public:
SkPDFTagTree(); SkPDFTagTree();
~SkPDFTagTree(); ~SkPDFTagTree();
void init(const SkPDF::StructureElementNode*); void init(SkPDF::StructureElementNode*);
void reset(); void reset();
int getMarkIdForNodeId(int nodeId, unsigned pageIndex); int getMarkIdForNodeId(int nodeId, unsigned pageIndex);
SkPDFIndirectReference makeStructTreeRoot(SkPDFDocument* doc); SkPDFIndirectReference makeStructTreeRoot(SkPDFDocument* doc);
private: private:
static void Copy(SkPDF::StructureElementNode& node,
SkPDFTagNode* dst,
SkArenaAlloc* arena,
SkTHashMap<int, SkPDFTagNode*>* nodeMap);
SkArenaAlloc fArena; SkArenaAlloc fArena;
SkTHashMap<int, SkPDFTagNode*> fNodeMap; SkTHashMap<int, SkPDFTagNode*> fNodeMap;
SkPDFTagNode* fRoot = nullptr; SkPDFTagNode* fRoot = nullptr;

View File

@ -0,0 +1,135 @@
/*
* Copyright 2020 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "tests/Test.h"
#include "include/core/SkCanvas.h"
#include "include/core/SkFont.h"
#include "include/core/SkStream.h"
#include "include/docs/SkPDFDocument.h"
using PDFTag = SkPDF::StructureElementNode;
// Test building a tagged PDF containing a table.
// Add this to args.gn to output the PDF to a file:
// extra_cflags = [ "-DSK_PDF_TEST_TAGS_OUTPUT_PATH=\"/tmp/table.pdf\"" ]
DEF_TEST(SkPDF_tagged_table, r) {
REQUIRE_PDF_DOCUMENT(SkPDF_tagged, r);
#ifdef SK_PDF_TEST_TAGS_OUTPUT_PATH
SkFILEWStream outputStream(SK_PDF_TEST_TAGS_OUTPUT_PATH);
#else
SkDynamicMemoryWStream outputStream;
#endif
SkSize pageSize = SkSize::Make(612, 792); // U.S. Letter
SkPDF::Metadata metadata;
metadata.fTitle = "Example Tagged Table PDF";
metadata.fCreator = "Skia";
SkTime::DateTime now;
SkTime::GetDateTime(&now);
metadata.fCreation = now;
metadata.fModified = now;
constexpr int kRowCount = 5;
constexpr int kColCount = 4;
const char* cellData[kRowCount * kColCount] = {
"Car", "Engine", "City MPG", "Highway MPG",
"Mitsubishi Mirage ES", "Gas", "28", "47",
"Toyota Prius Three", "Hybrid", "43", "59",
"Nissan Leaf SL", "Electric", "N/A", nullptr,
"Tesla Model 3", nullptr, "N/A", nullptr
};
// The document tag.
PDFTag root;
root.fNodeId = 1;
root.fType = SkPDF::DocumentStructureType::kDocument;
root.fChildCount = 2;
PDFTag rootChildren[2];
// Heading.
PDFTag& h1 = rootChildren[0];
h1.fNodeId = 2;
h1.fType = SkPDF::DocumentStructureType::kH1;
h1.fChildCount = 0;
// Table.
PDFTag& table = rootChildren[1];
table.fNodeId = 3;
table.fType = SkPDF::DocumentStructureType::kTable;
table.fChildCount = 5;
table.fAttributes.appendFloatArray("Layout", "BBox", {72, 72, 360, 360});
PDFTag rows[kRowCount];
PDFTag all_cells[kRowCount * kColCount];
for (int rowIndex = 0; rowIndex < kRowCount; rowIndex++) {
PDFTag& row = rows[rowIndex];
row.fNodeId = 4 + rowIndex;
row.fType = SkPDF::DocumentStructureType::kTR;
row.fChildCount = kColCount;
PDFTag* cells = &all_cells[rowIndex * kColCount];
for (int colIndex = 0; colIndex < kColCount; colIndex++) {
int cellIndex = rowIndex * kColCount + colIndex;
PDFTag& cell = cells[colIndex];
cell.fNodeId = 10 + cellIndex;
if (!cellData[cellIndex])
cell.fType = SkPDF::DocumentStructureType::kNonStruct;
else if (rowIndex == 0 || colIndex == 0)
cell.fType = SkPDF::DocumentStructureType::kTH;
else
cell.fType = SkPDF::DocumentStructureType::kTD;
cell.fChildCount = 0;
if (cellIndex == 13) {
cell.fAttributes.appendInt("Table", "RowSpan", 2);
} else if (cellIndex == 14 || cellIndex == 18) {
cell.fAttributes.appendInt("Table", "ColSpan", 2);
} else if (cell.fType == SkPDF::DocumentStructureType::kTH) {
cell.fAttributes.appendString(
"Table", "Scope", rowIndex == 0 ? "Column" : "Row");
}
}
row.fChildren = cells;
}
table.fChildren = rows;
root.fChildren = rootChildren;
metadata.fStructureElementTreeRoot = &root;
sk_sp<SkDocument> document = SkPDF::MakeDocument(
&outputStream, metadata);
SkPaint paint;
paint.setColor(SK_ColorBLACK);
SkCanvas* canvas =
document->beginPage(pageSize.width(),
pageSize.height());
SkPDF::SetNodeId(canvas, 2);
SkFont font(nullptr, 36);
canvas->drawString("Tagged PDF Table", 72, 72, font, paint);
font.setSize(14);
for (int rowIndex = 0; rowIndex < kRowCount; rowIndex++) {
for (int colIndex = 0; colIndex < kColCount; colIndex++) {
int cellIndex = rowIndex * kColCount + colIndex;
const char* str = cellData[cellIndex];
if (!str)
continue;
int x = 72 + colIndex * 108 + (colIndex > 0 ? 72 : 0);
int y = 144 + rowIndex * 48;
SkPDF::SetNodeId(canvas, 10 + cellIndex);
canvas->drawString(str, x, y, font, paint);
}
}
document->endPage();
document->close();
outputStream.flush();
}