Support adding attributes to PDF document structure nodes.

This is an important part of writing a tagged PDF. Many of the nodes
in the document structure tree need additional attributes, just like
in HTML.

This change aims to add support for a few useful attributes, not to
be comprehensive.

Bug: chromium:1039816

Change-Id: I64a6b36b0b4ec42fd27ae4ad702afce95c95af5d
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/268878
Commit-Queue: Dominic Mazzoni <dmazzoni@chromium.org>
Commit-Queue: Mike Reed <reed@google.com>
Auto-Submit: Dominic Mazzoni <dmazzoni@chromium.org>
Reviewed-by: Mike Reed <reed@google.com>
Reviewed-by: Derek Sollenberger <djsollen@google.com>
This commit is contained in:
Dominic Mazzoni 2020-02-18 09:48:36 -08:00 committed by Skia Commit-Bot
parent 2e941d00c8
commit 80474156d1
6 changed files with 263 additions and 14 deletions

View File

@ -23,6 +23,9 @@ Milestone 82
related calls is treated as a request to do no color correction at decode
time.
* Add new APIs to add attributes to document structure node when
creating a tagged PDF.
* Remove CGFontRef parameter from SkCreateTypefaceFromCTFont.
Use CTFontManagerCreateFontDescriptorFromData instead of
CGFontCreateWithDataProvider to create CTFonts to avoid memory use issues.

View File

@ -169,6 +169,7 @@ tests_sources = [
"$_tests/PDFMetadataAttributeTest.cpp",
"$_tests/PDFOpaqueSrcModeToSrcOverTest.cpp",
"$_tests/PDFPrimitivesTest.cpp",
"$_tests/PDFTaggedTableTest.cpp",
"$_tests/PDFTaggedTest.cpp",
"$_tests/PackBitsTest.cpp",
"$_tests/PackedConfigsTextureTest.cpp",

View File

@ -5,15 +5,21 @@
#include "include/core/SkDocument.h"
#include <vector>
#include "include/core/SkColor.h"
#include "include/core/SkMilestone.h"
#include "include/core/SkScalar.h"
#include "include/core/SkString.h"
#include "include/core/SkTime.h"
#include "include/private/SkNoncopyable.h"
#define SKPDF_STRING(X) SKPDF_STRING_IMPL(X)
#define SKPDF_STRING_IMPL(X) #X
class SkExecutor;
class SkPDFArray;
class SkPDFTagTree;
namespace SkPDF {
@ -71,16 +77,42 @@ enum class DocumentStructureType {
kForm, //!< Form control (not like an HTML FORM element)
};
/** Attributes for nodes in the PDF tree. */
class SK_API AttributeList : SkNoncopyable {
public:
AttributeList();
~AttributeList();
// Each attribute must have an owner (e.g. "Layout", "List", "Table", etc)
// and an attribute name (e.g. "BBox", "RowSpan", etc.) from PDF32000_2008 14.8.5,
// and then a value of the proper type according to the spec.
void appendInt(const char* owner, const char* name, int value);
void appendFloat(const char* owner, const char* name, float value);
void appendString(const char* owner, const char* name, const char* value);
void appendFloatArray(const char* owner,
const char* name,
const std::vector<float>& value);
void appendStringArray(const char* owner,
const char* name,
const std::vector<SkString>& value);
private:
friend class ::SkPDFTagTree;
std::unique_ptr<SkPDFArray> fAttrs;
};
/** A node in a PDF structure tree, giving a semantic representation
of the content. Each node ID is associated with content
by passing the SkCanvas and node ID to SkPDF::SetNodeId() when drawing.
NodeIDs should be unique within each tree.
*/
struct StructureElementNode {
const StructureElementNode* fChildren = nullptr;
size_t fChildCount;
int fNodeId;
DocumentStructureType fType;
StructureElementNode* fChildren = nullptr;
size_t fChildCount = 0;
int fNodeId = 0;
DocumentStructureType fType = DocumentStructureType::kNonStruct;
AttributeList fAttributes;
};
/** Optional metadata to be passed into the PDF factory function.
@ -149,7 +181,7 @@ struct Metadata {
a semantic representation of the content. The caller
should retain ownership.
*/
const StructureElementNode* fStructureElementTreeRoot = nullptr;
StructureElementNode* fStructureElementTreeRoot = nullptr;
/** Executor to handle threaded work within PDF Backend. If this is nullptr,
then all work will be done serially on the main thread. To have worker

View File

@ -66,6 +66,70 @@ static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
SK_ABORT("bad tag");
}
SkPDF::AttributeList::AttributeList() = default;
SkPDF::AttributeList::~AttributeList() = default;
void SkPDF::AttributeList::appendInt(
const char* owner, const char* name, int value) {
if (!fAttrs)
fAttrs = SkPDFMakeArray();
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
attrDict->insertName("O", owner);
attrDict->insertInt(name, value);
fAttrs->appendObject(std::move(attrDict));
}
void SkPDF::AttributeList::appendFloat(
const char* owner, const char* name, float value) {
if (!fAttrs)
fAttrs = SkPDFMakeArray();
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
attrDict->insertName("O", owner);
attrDict->insertScalar(name, value);
fAttrs->appendObject(std::move(attrDict));
}
void SkPDF::AttributeList::appendString(
const char* owner, const char* name, const char* value) {
if (!fAttrs)
fAttrs = SkPDFMakeArray();
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
attrDict->insertName("O", owner);
attrDict->insertName(name, value);
fAttrs->appendObject(std::move(attrDict));
}
void SkPDF::AttributeList::appendFloatArray(
const char* owner, const char* name, const std::vector<float>& value) {
if (!fAttrs)
fAttrs = SkPDFMakeArray();
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
attrDict->insertName("O", owner);
std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
for (float element : value) {
pdfArray->appendScalar(element);
}
attrDict->insertObject(name, std::move(pdfArray));
fAttrs->appendObject(std::move(attrDict));
}
void SkPDF::AttributeList::appendStringArray(
const char* owner,
const char* name,
const std::vector<SkString>& value) {
if (!fAttrs)
fAttrs = SkPDFMakeArray();
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
attrDict->insertName("O", owner);
std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
for (SkString element : value) {
pdfArray->appendName(element);
}
attrDict->insertObject(name, std::move(pdfArray));
fAttrs->appendObject(std::move(attrDict));
}
struct SkPDFTagNode {
SkPDFTagNode* fChildren = nullptr;
size_t fChildCount = 0;
@ -82,13 +146,15 @@ struct SkPDFTagNode {
kYes,
kNo,
} fCanDiscard = kUnknown;
std::unique_ptr<SkPDFArray> fAttributes;
};
SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
SkPDFTagTree::~SkPDFTagTree() = default;
static void copy(const SkPDF::StructureElementNode& node,
// static
void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node,
SkPDFTagNode* dst,
SkArenaAlloc* arena,
SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
@ -100,14 +166,15 @@ static void copy(const SkPDF::StructureElementNode& node,
dst->fType = node.fType;
dst->fChildren = children;
for (size_t i = 0; i < childCount; ++i) {
copy(node.fChildren[i], &children[i], arena, nodeMap);
Copy(node.fChildren[i], &children[i], arena, nodeMap);
}
dst->fAttributes = std::move(node.fAttributes.fAttrs);
}
void SkPDFTagTree::init(const SkPDF::StructureElementNode* node) {
void SkPDFTagTree::init(SkPDF::StructureElementNode* node) {
if (node) {
fRoot = fArena.make<SkPDFTagNode>();
copy(*node, fRoot, &fArena, &fNodeMap);
Copy(*node, fRoot, &fArena, &fNodeMap);
}
}
@ -184,6 +251,13 @@ SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
dict.insertName("S", tag_name_from_type(node->fType));
dict.insertRef("P", parent);
dict.insertObject("K", std::move(kids));
SkString idString;
idString.printf("%d", node->fNodeId);
dict.insertName("ID", idString.c_str());
if (node->fAttributes) {
dict.insertObject("A", std::move(node->fAttributes));
}
return doc->emit(dict, ref);
}
@ -223,4 +297,3 @@ SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
return doc->emit(structTreeRoot, ref);
}

View File

@ -21,12 +21,17 @@ class SkPDFTagTree {
public:
SkPDFTagTree();
~SkPDFTagTree();
void init(const SkPDF::StructureElementNode*);
void init(SkPDF::StructureElementNode*);
void reset();
int getMarkIdForNodeId(int nodeId, unsigned pageIndex);
SkPDFIndirectReference makeStructTreeRoot(SkPDFDocument* doc);
private:
static void Copy(SkPDF::StructureElementNode& node,
SkPDFTagNode* dst,
SkArenaAlloc* arena,
SkTHashMap<int, SkPDFTagNode*>* nodeMap);
SkArenaAlloc fArena;
SkTHashMap<int, SkPDFTagNode*> fNodeMap;
SkPDFTagNode* fRoot = nullptr;

View File

@ -0,0 +1,135 @@
/*
* Copyright 2020 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "tests/Test.h"
#include "include/core/SkCanvas.h"
#include "include/core/SkFont.h"
#include "include/core/SkStream.h"
#include "include/docs/SkPDFDocument.h"
using PDFTag = SkPDF::StructureElementNode;
// Test building a tagged PDF containing a table.
// Add this to args.gn to output the PDF to a file:
// extra_cflags = [ "-DSK_PDF_TEST_TAGS_OUTPUT_PATH=\"/tmp/table.pdf\"" ]
DEF_TEST(SkPDF_tagged_table, r) {
REQUIRE_PDF_DOCUMENT(SkPDF_tagged, r);
#ifdef SK_PDF_TEST_TAGS_OUTPUT_PATH
SkFILEWStream outputStream(SK_PDF_TEST_TAGS_OUTPUT_PATH);
#else
SkDynamicMemoryWStream outputStream;
#endif
SkSize pageSize = SkSize::Make(612, 792); // U.S. Letter
SkPDF::Metadata metadata;
metadata.fTitle = "Example Tagged Table PDF";
metadata.fCreator = "Skia";
SkTime::DateTime now;
SkTime::GetDateTime(&now);
metadata.fCreation = now;
metadata.fModified = now;
constexpr int kRowCount = 5;
constexpr int kColCount = 4;
const char* cellData[kRowCount * kColCount] = {
"Car", "Engine", "City MPG", "Highway MPG",
"Mitsubishi Mirage ES", "Gas", "28", "47",
"Toyota Prius Three", "Hybrid", "43", "59",
"Nissan Leaf SL", "Electric", "N/A", nullptr,
"Tesla Model 3", nullptr, "N/A", nullptr
};
// The document tag.
PDFTag root;
root.fNodeId = 1;
root.fType = SkPDF::DocumentStructureType::kDocument;
root.fChildCount = 2;
PDFTag rootChildren[2];
// Heading.
PDFTag& h1 = rootChildren[0];
h1.fNodeId = 2;
h1.fType = SkPDF::DocumentStructureType::kH1;
h1.fChildCount = 0;
// Table.
PDFTag& table = rootChildren[1];
table.fNodeId = 3;
table.fType = SkPDF::DocumentStructureType::kTable;
table.fChildCount = 5;
table.fAttributes.appendFloatArray("Layout", "BBox", {72, 72, 360, 360});
PDFTag rows[kRowCount];
PDFTag all_cells[kRowCount * kColCount];
for (int rowIndex = 0; rowIndex < kRowCount; rowIndex++) {
PDFTag& row = rows[rowIndex];
row.fNodeId = 4 + rowIndex;
row.fType = SkPDF::DocumentStructureType::kTR;
row.fChildCount = kColCount;
PDFTag* cells = &all_cells[rowIndex * kColCount];
for (int colIndex = 0; colIndex < kColCount; colIndex++) {
int cellIndex = rowIndex * kColCount + colIndex;
PDFTag& cell = cells[colIndex];
cell.fNodeId = 10 + cellIndex;
if (!cellData[cellIndex])
cell.fType = SkPDF::DocumentStructureType::kNonStruct;
else if (rowIndex == 0 || colIndex == 0)
cell.fType = SkPDF::DocumentStructureType::kTH;
else
cell.fType = SkPDF::DocumentStructureType::kTD;
cell.fChildCount = 0;
if (cellIndex == 13) {
cell.fAttributes.appendInt("Table", "RowSpan", 2);
} else if (cellIndex == 14 || cellIndex == 18) {
cell.fAttributes.appendInt("Table", "ColSpan", 2);
} else if (cell.fType == SkPDF::DocumentStructureType::kTH) {
cell.fAttributes.appendString(
"Table", "Scope", rowIndex == 0 ? "Column" : "Row");
}
}
row.fChildren = cells;
}
table.fChildren = rows;
root.fChildren = rootChildren;
metadata.fStructureElementTreeRoot = &root;
sk_sp<SkDocument> document = SkPDF::MakeDocument(
&outputStream, metadata);
SkPaint paint;
paint.setColor(SK_ColorBLACK);
SkCanvas* canvas =
document->beginPage(pageSize.width(),
pageSize.height());
SkPDF::SetNodeId(canvas, 2);
SkFont font(nullptr, 36);
canvas->drawString("Tagged PDF Table", 72, 72, font, paint);
font.setSize(14);
for (int rowIndex = 0; rowIndex < kRowCount; rowIndex++) {
for (int colIndex = 0; colIndex < kColCount; colIndex++) {
int cellIndex = rowIndex * kColCount + colIndex;
const char* str = cellData[cellIndex];
if (!str)
continue;
int x = 72 + colIndex * 108 + (colIndex > 0 ? 72 : 0);
int y = 144 + rowIndex * 48;
SkPDF::SetNodeId(canvas, 10 + cellIndex);
canvas->drawString(str, x, y, font, paint);
}
}
document->endPage();
document->close();
outputStream.flush();
}