Add initial support for generating tagged PDFs.

Adds an interface for the document creator to pass in a tree
of tags indicating the structure of the document, each with a type
(from a predetermined enum of possible types) and a node ID.
It also adds a setNodeId function to SkCanvas so that page content
can be associated with a particular tag. If both the tag tree and
marked content are present, Skia can now output a properly tagged
PDF.

An example program is included. When used properly, the PDF generated
by this patch is valid and the tags are parsed properly by Adobe
Acrobat. It handles many corner cases like content that spans more
than one page, or tags that don't correspond to any marked content, or
marked content that doesn't correspond to any tags.

However, it doesn't implement all of the features of PDF accessibility
yet, there are some additional attributes that can be associated with
some tags that need to be supported, too, in order to properly tag
things like figures and tables.

Bug: skia:8148
Change-Id: I2e448eca8ded8e1b29ba685663b557ae7ad7e23e
Reviewed-on: https://skia-review.googlesource.com/141138
Reviewed-by: Hal Canary <halcanary@google.com>
This commit is contained in:
Dominic Mazzoni 2018-09-25 20:29:15 -07:00 committed by Hal Canary
parent b400d4d7e0
commit 656cefe65d
10 changed files with 668 additions and 3 deletions

View File

@ -46,6 +46,8 @@ skia_pdf_sources = [
"$_src/pdf/SkPDFResourceDict.h", "$_src/pdf/SkPDFResourceDict.h",
"$_src/pdf/SkPDFShader.cpp", "$_src/pdf/SkPDFShader.cpp",
"$_src/pdf/SkPDFShader.h", "$_src/pdf/SkPDFShader.h",
"$_src/pdf/SkPDFTag.cpp",
"$_src/pdf/SkPDFTag.h",
"$_src/pdf/SkPDFTypes.cpp", "$_src/pdf/SkPDFTypes.cpp",
"$_src/pdf/SkPDFTypes.h", "$_src/pdf/SkPDFTypes.h",
"$_src/pdf/SkPDFUtils.cpp", "$_src/pdf/SkPDFUtils.cpp",

View File

@ -168,6 +168,7 @@ tests_sources = [
"$_tests/PDFMetadataAttributeTest.cpp", "$_tests/PDFMetadataAttributeTest.cpp",
"$_tests/PDFOpaqueSrcModeToSrcOverTest.cpp", "$_tests/PDFOpaqueSrcModeToSrcOverTest.cpp",
"$_tests/PDFPrimitivesTest.cpp", "$_tests/PDFPrimitivesTest.cpp",
"$_tests/PDFTaggedTest.cpp",
"$_tests/OffsetSimplePolyTest.cpp", "$_tests/OffsetSimplePolyTest.cpp",
"$_tests/OnFlushCallbackTest.cpp", "$_tests/OnFlushCallbackTest.cpp",
"$_tests/PathRendererCacheTests.cpp", "$_tests/PathRendererCacheTests.cpp",

View File

@ -11,6 +11,67 @@
namespace SkPDF { namespace SkPDF {
/** Table 333 in PDF 32000-1:2008
*/
enum class DocumentStructureType {
kDocument,
kPart,
kArt, // Article
kSect, // Section
kDiv,
kBlockQuote,
kCaption,
kTOC, // Table of Contents
kTOCI, // Table of Contents Item
kIndex,
kNonStruct,
kPrivate,
kH, // Heading
kH1, // Heading level 1
kH2,
kH3,
kH4,
kH5,
kH6, // Heading level 6
kP, // Paragraph
kL, // List
kLI, // List item
kLbl, // List item label
kLBody, // List item body
kTable,
kTR,
kTH,
kTD,
kTHead,
kTBody,
kTFoot,
kSpan,
kQuote,
kNote,
kReference,
kBibEntry,
kCode,
kLink,
kAnnot,
kRuby,
kWarichu,
kFigure,
kFormula,
kForm, // Form control (not like an HTML FORM element)
};
/**
* A node in a PDF structure tree, giving a semantic representation
* of the content. Each node ID is associated with content
* by passing the SkCanvas and node ID to SkPDF::SetNodeId() when drawing.
*/
struct StructureElementNode {
const StructureElementNode* fChildren = nullptr;
size_t fChildCount;
int fNodeId;
DocumentStructureType fType;
};
/** Optional metadata to be passed into the PDF factory function. /** Optional metadata to be passed into the PDF factory function.
*/ */
struct Metadata { struct Metadata {
@ -73,8 +134,27 @@ struct Metadata {
opaque, it will be encoded (using JPEG) with that quality setting. opaque, it will be encoded (using JPEG) with that quality setting.
*/ */
int fEncodingQuality = 101; int fEncodingQuality = 101;
/**
* An optional tree of structured document tags that provide
* a semantic representation of the content. The caller
* should retain ownership.
*/
const StructureElementNode* fStructureElementTreeRoot = nullptr;
}; };
/** Associate a node ID with subsequent drawing commands in an
SkCanvas. The same node ID can appear in a StructureElementNode
in order to associate a document's structure element tree with
its content.
A node ID of zero indicates no node ID.
@param canvas The canvas used to draw to the PDF.
@param nodeId The node ID for subsequent drawing commands.
*/
SK_API void SetNodeId(SkCanvas* dst, int nodeID);
/** Create a PDF-backed document, writing the results into a SkWStream. /** Create a PDF-backed document, writing the results into a SkWStream.
PDF pages are sized in point units. 1 pt == 1/72 inch == 127/360 mm. PDF pages are sized in point units. 1 pt == 1/72 inch == 127/360 mm.

View File

@ -527,6 +527,7 @@ SkPDFDevice::SkPDFDevice(SkISize pageSize, SkPDFDocument* doc, const SkMatrix& t
: INHERITED(SkImageInfo::MakeUnknown(pageSize.width(), pageSize.height()), : INHERITED(SkImageInfo::MakeUnknown(pageSize.width(), pageSize.height()),
SkSurfaceProps(0, kUnknown_SkPixelGeometry)) SkSurfaceProps(0, kUnknown_SkPixelGeometry))
, fInitialTransform(transform) , fInitialTransform(transform)
, fNodeId(0)
, fDocument(doc) , fDocument(doc)
{ {
SkASSERT(!pageSize.isEmpty()); SkASSERT(!pageSize.isEmpty());
@ -550,6 +551,13 @@ void SkPDFDevice::drawAnnotation(const SkRect& rect, const char key[], SkData* v
return; return;
} }
if (rect.isEmpty()) { if (rect.isEmpty()) {
if (!strcmp(key, SkPDFGetNodeIdKey())) {
int nodeID;
if (value->size() != sizeof(nodeID)) { return; }
memcpy(&nodeID, value->data(), sizeof(nodeID));
fNodeId = nodeID;
return;
}
if (!strcmp(SkAnnotationKeys::Define_Named_Dest_Key(), key)) { if (!strcmp(SkAnnotationKeys::Define_Named_Dest_Key(), key)) {
SkPoint transformedPoint; SkPoint transformedPoint;
this->ctm().mapXY(rect.x(), rect.y(), &transformedPoint); this->ctm().mapXY(rect.x(), rect.y(), &transformedPoint);
@ -1176,6 +1184,19 @@ void SkPDFDevice::internalDrawGlyphRun(const SkGlyphRun& glyphRun, SkPoint offse
SkDynamicMemoryWStream* out = content.stream(); SkDynamicMemoryWStream* out = content.stream();
out->writeText("BT\n"); out->writeText("BT\n");
int markId = -1;
if (fNodeId) {
markId = fDocument->getMarkIdForNodeId(fNodeId);
}
if (markId != -1) {
out->writeText("/P <</MCID ");
out->writeDecAsText(markId);
out->writeText(" >>BDC\n");
}
SK_AT_SCOPE_EXIT(if (markId != -1) out->writeText("EMC\n"));
SK_AT_SCOPE_EXIT(out->writeText("ET\n")); SK_AT_SCOPE_EXIT(out->writeText("ET\n"));
const SkGlyphID maxGlyphID = SkToU16(typeface->countGlyphs() - 1); const SkGlyphID maxGlyphID = SkToU16(typeface->countGlyphs() - 1);

View File

@ -167,6 +167,7 @@ private:
std::vector<sk_sp<SkPDFObject>> fXObjectResources; std::vector<sk_sp<SkPDFObject>> fXObjectResources;
std::vector<sk_sp<SkPDFObject>> fShaderResources; std::vector<sk_sp<SkPDFObject>> fShaderResources;
std::vector<sk_sp<SkPDFFont>> fFontResources; std::vector<sk_sp<SkPDFFont>> fFontResources;
int fNodeId;
SkSinglyLinkedList<SkDynamicMemoryWStream> fContentEntries; SkSinglyLinkedList<SkDynamicMemoryWStream> fContentEntries;
struct GraphicStackState { struct GraphicStackState {

View File

@ -11,12 +11,19 @@
#include "SkMakeUnique.h" #include "SkMakeUnique.h"
#include "SkPDFCanon.h" #include "SkPDFCanon.h"
#include "SkPDFDevice.h" #include "SkPDFDevice.h"
#include "SkPDFTag.h"
#include "SkPDFUtils.h" #include "SkPDFUtils.h"
#include "SkStream.h" #include "SkStream.h"
#include "SkTo.h" #include "SkTo.h"
#include <utility> #include <utility>
// For use in SkCanvas::drawAnnotation
const char* SkPDFGetNodeIdKey() {
static constexpr char key[] = "PDF_Node_Key";
return key;
}
SkPDFObjectSerializer::SkPDFObjectSerializer() : fBaseOffset(0), fNextToBeSerialized(0) {} SkPDFObjectSerializer::SkPDFObjectSerializer() : fBaseOffset(0), fNextToBeSerialized(0) {}
SkPDFObjectSerializer::~SkPDFObjectSerializer() { SkPDFObjectSerializer::~SkPDFObjectSerializer() {
@ -192,6 +199,9 @@ SkPDFDocument::SkPDFDocument(SkWStream* stream,
fInverseRasterScale = kDpiForRasterScaleOne / fMetadata.fRasterDPI; fInverseRasterScale = kDpiForRasterScaleOne / fMetadata.fRasterDPI;
fRasterScale = fMetadata.fRasterDPI / kDpiForRasterScaleOne; fRasterScale = fMetadata.fRasterDPI / kDpiForRasterScaleOne;
} }
if (fMetadata.fStructureElementTreeRoot) {
fTagRoot = recursiveBuildTagTree(*fMetadata.fStructureElementTreeRoot, nullptr);
}
} }
SkPDFDocument::~SkPDFDocument() { SkPDFDocument::~SkPDFDocument() {
@ -265,6 +275,9 @@ void SkPDFDocument::onEndPage() {
} }
this->serialize(contentObject); this->serialize(contentObject);
page->insertObjRef("Contents", std::move(contentObject)); page->insertObjRef("Contents", std::move(contentObject));
// The StructParents unique identifier for each page is just its
// 0-based page index.
page->insertInt("StructParents", static_cast<int>(fPages.size()));
fPages.emplace_back(std::move(page)); fPages.emplace_back(std::move(page));
} }
@ -423,6 +436,39 @@ static sk_sp<SkPDFArray> make_srgb_output_intents() {
return intentArray; return intentArray;
} }
sk_sp<SkPDFDict> SkPDFDocument::getPage(int pageIndex) const {
SkASSERT(pageIndex >= 0 && pageIndex < static_cast<int>(fPages.size()));
return fPages[pageIndex];
}
int SkPDFDocument::getMarkIdForNodeId(int nodeId) {
sk_sp<SkPDFTag>* tagPtr = fNodeIdToTag.find(nodeId);
if (tagPtr == nullptr) {
return -1;
}
sk_sp<SkPDFTag> tag = *tagPtr;
int pageIndex = static_cast<int>(fPages.size());
while (fMarksPerPage.count() < pageIndex + 1) {
fMarksPerPage.push_back();
}
int markId = fMarksPerPage[pageIndex].count();
tag->addMarkedContent(pageIndex, markId);
fMarksPerPage[pageIndex].push_back(std::move(tag));
return markId;
}
sk_sp<SkPDFTag> SkPDFDocument::recursiveBuildTagTree(
const SkPDF::StructureElementNode& node, sk_sp<SkPDFTag> parent) {
sk_sp<SkPDFTag> tag = sk_make_sp<SkPDFTag>(node.fNodeId, node.fType, parent);
fNodeIdToTag.set(tag->fNodeId, tag);
tag->fChildren.reserve(node.fChildCount);
for (size_t i = 0; i < node.fChildCount; i++) {
tag->appendChild(recursiveBuildTagTree(node.fChildren[i], tag));
}
return tag;
}
void SkPDFDocument::onClose(SkWStream* stream) { void SkPDFDocument::onClose(SkWStream* stream) {
SkASSERT(fCanvas.imageInfo().dimensions().isZero()); SkASSERT(fCanvas.imageInfo().dimensions().isZero());
if (fPages.empty()) { if (fPages.empty()) {
@ -437,14 +483,63 @@ void SkPDFDocument::onClose(SkWStream* stream) {
// no one has ever asked for this feature. // no one has ever asked for this feature.
docCatalog->insertObject("OutputIntents", make_srgb_output_intents()); docCatalog->insertObject("OutputIntents", make_srgb_output_intents());
} }
SkASSERT(!fPages.empty());
docCatalog->insertObjRef("Pages", generate_page_tree(&fPages)); std::vector<sk_sp<SkPDFDict>> pagesCopy(fPages);
SkASSERT(fPages.empty()); SkASSERT(!pagesCopy.empty());
docCatalog->insertObjRef("Pages", generate_page_tree(&pagesCopy));
SkASSERT(pagesCopy.empty());
if (fDests->size() > 0) { if (fDests->size() > 0) {
docCatalog->insertObjRef("Dests", std::move(fDests)); docCatalog->insertObjRef("Dests", std::move(fDests));
} }
// Handle tagged PDFs.
if (fTagRoot) {
// In the document catalog, indicate that this PDF is tagged.
auto markInfo = sk_make_sp<SkPDFDict>("MarkInfo");
markInfo->insertBool("Marked", true);
docCatalog->insertObject("MarkInfo", markInfo);
// Prepare the tag tree, this automatically skips over any
// tags that weren't referenced from any marked content.
bool success = fTagRoot->prepareTagTreeToEmit(*this);
if (!success) {
SkDEBUGFAIL("PDF has tag tree but no marked content.");
}
// Build the StructTreeRoot.
auto structTreeRoot = sk_make_sp<SkPDFDict>("StructTreeRoot");
docCatalog->insertObjRef("StructTreeRoot", structTreeRoot);
structTreeRoot->insertObjRef("K", fTagRoot);
int pageCount = static_cast<int>(fPages.size());
structTreeRoot->insertInt("ParentTreeNextKey", pageCount);
// The parent of the tag root is the StructTreeRoot.
fTagRoot->insertObjRef("P", structTreeRoot);
// Build the parent tree, which is a mapping from the marked
// content IDs on each page to their corressponding tags.
auto parentTree = sk_make_sp<SkPDFDict>("ParentTree");
structTreeRoot->insertObjRef("ParentTree", parentTree);
structTreeRoot->insertInt("ParentTreeNextKey", pageCount);
auto parentTreeNums = sk_make_sp<SkPDFArray>();
parentTree->insertObject("Nums", parentTreeNums);
for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) {
// Exit now if there are no more pages with marked content.
if (fMarksPerPage.count() <= pageIndex) {
break;
}
parentTreeNums->appendInt(pageIndex);
auto markToTagArray = sk_make_sp<SkPDFArray>();
parentTreeNums->appendObjRef(markToTagArray);
for (int i = 0; i < fMarksPerPage[pageIndex].count(); i++) {
markToTagArray->appendObjRef(fMarksPerPage[pageIndex][i]);
}
}
}
// Build font subsetting info before calling addObjectRecursively(). // Build font subsetting info before calling addObjectRecursively().
SkPDFCanon* canon = &fCanon; SkPDFCanon* canon = &fCanon;
fFonts.foreach([canon](SkPDFFont* p){ p->getFontSubset(canon); }); fFonts.foreach([canon](SkPDFFont* p){ p->getFontSubset(canon); });
@ -456,6 +551,12 @@ void SkPDFDocument::onClose(SkWStream* stream) {
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
void SkPDF::SetNodeId(SkCanvas* canvas, int nodeID) {
sk_sp<SkData> payload = SkData::MakeWithCopy(&nodeID, sizeof(nodeID));
const char* key = SkPDFGetNodeIdKey();
canvas->drawAnnotation({0, 0, 0, 0}, key, payload.get());
}
sk_sp<SkDocument> SkPDF::MakeDocument(SkWStream* stream, const SkPDF::Metadata& metadata) { sk_sp<SkDocument> SkPDF::MakeDocument(SkWStream* stream, const SkPDF::Metadata& metadata) {
SkPDF::Metadata meta = metadata; SkPDF::Metadata meta = metadata;
if (meta.fRasterDPI <= 0) { if (meta.fRasterDPI <= 0) {

View File

@ -14,6 +14,9 @@
#include "SkPDFMetadata.h" #include "SkPDFMetadata.h"
class SkPDFDevice; class SkPDFDevice;
class SkPDFTag;
const char* SkPDFGetNodeIdKey();
// Logically part of SkPDFDocument (like SkPDFCanon), but separate to // Logically part of SkPDFDocument (like SkPDFCanon), but separate to
// keep similar functionality together. // keep similar functionality together.
@ -64,7 +67,14 @@ public:
void registerFont(SkPDFFont* f) { fFonts.add(f); } void registerFont(SkPDFFont* f) { fFonts.add(f); }
const SkPDF::Metadata& metadata() const { return fMetadata; } const SkPDF::Metadata& metadata() const { return fMetadata; }
sk_sp<SkPDFDict> getPage(int pageIndex) const;
// Returns -1 if no mark ID.
int getMarkIdForNodeId(int nodeId);
private: private:
sk_sp<SkPDFTag> recursiveBuildTagTree(const SkPDF::StructureElementNode& node,
sk_sp<SkPDFTag> parent);
SkPDFObjectSerializer fObjectSerializer; SkPDFObjectSerializer fObjectSerializer;
SkPDFCanon fCanon; SkPDFCanon fCanon;
SkCanvas fCanvas; SkCanvas fCanvas;
@ -78,6 +88,15 @@ private:
SkScalar fRasterScale = 1; SkScalar fRasterScale = 1;
SkScalar fInverseRasterScale = 1; SkScalar fInverseRasterScale = 1;
// For tagged PDFs.
// The tag root, which owns its child tags and so on.
sk_sp<SkPDFTag> fTagRoot;
// Array of page -> array of marks mapping to tags.
SkTArray<SkTArray<sk_sp<SkPDFTag>>> fMarksPerPage;
// A mapping from node ID to tag for fast lookup.
SkTHashMap<int, sk_sp<SkPDFTag>> fNodeIdToTag;
void reset(); void reset();
}; };

206
src/pdf/SkPDFTag.cpp Normal file
View File

@ -0,0 +1,206 @@
/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkPDFDocumentPriv.h"
#include "SkPDFTag.h"
namespace {
// Table 333 in PDF 32000-1:2008
const char* tagNameFromType(SkPDF::DocumentStructureType type) {
switch (type) {
case SkPDF::DocumentStructureType::kDocument:
return "Document";
case SkPDF::DocumentStructureType::kPart:
return "Part";
case SkPDF::DocumentStructureType::kArt:
return "Art";
case SkPDF::DocumentStructureType::kSect:
return "Sect";
case SkPDF::DocumentStructureType::kDiv:
return "Div";
case SkPDF::DocumentStructureType::kBlockQuote:
return "BlockQuote";
case SkPDF::DocumentStructureType::kCaption:
return "Caption";
case SkPDF::DocumentStructureType::kTOC:
return "TOC";
case SkPDF::DocumentStructureType::kTOCI:
return "TOCI";
case SkPDF::DocumentStructureType::kIndex:
return "Index";
case SkPDF::DocumentStructureType::kNonStruct:
return "NonStruct";
case SkPDF::DocumentStructureType::kPrivate:
return "Private";
case SkPDF::DocumentStructureType::kH:
return "H";
case SkPDF::DocumentStructureType::kH1:
return "H1";
case SkPDF::DocumentStructureType::kH2:
return "H2";
case SkPDF::DocumentStructureType::kH3:
return "H3";
case SkPDF::DocumentStructureType::kH4:
return "H4";
case SkPDF::DocumentStructureType::kH5:
return "H5";
case SkPDF::DocumentStructureType::kH6:
return "H6";
case SkPDF::DocumentStructureType::kP:
return "P";
case SkPDF::DocumentStructureType::kL:
return "L";
case SkPDF::DocumentStructureType::kLI:
return "LI";
case SkPDF::DocumentStructureType::kLbl:
return "Lbl";
case SkPDF::DocumentStructureType::kLBody:
return "LBody";
case SkPDF::DocumentStructureType::kTable:
return "Table";
case SkPDF::DocumentStructureType::kTR:
return "TR";
case SkPDF::DocumentStructureType::kTH:
return "TH";
case SkPDF::DocumentStructureType::kTD:
return "TD";
case SkPDF::DocumentStructureType::kTHead:
return "THead";
case SkPDF::DocumentStructureType::kTBody:
return "TBody";
case SkPDF::DocumentStructureType::kTFoot:
return "TFoot";
case SkPDF::DocumentStructureType::kSpan:
return "Span";
case SkPDF::DocumentStructureType::kQuote:
return "Quote";
case SkPDF::DocumentStructureType::kNote:
return "Note";
case SkPDF::DocumentStructureType::kReference:
return "Reference";
case SkPDF::DocumentStructureType::kBibEntry:
return "BibEntry";
case SkPDF::DocumentStructureType::kCode:
return "Code";
case SkPDF::DocumentStructureType::kLink:
return "Link";
case SkPDF::DocumentStructureType::kAnnot:
return "Annot";
case SkPDF::DocumentStructureType::kRuby:
return "Ruby";
case SkPDF::DocumentStructureType::kWarichu:
return "Warichu";
case SkPDF::DocumentStructureType::kFigure:
return "Figure";
case SkPDF::DocumentStructureType::kFormula:
return "Formula";
case SkPDF::DocumentStructureType::kForm:
return "Form";
}
SK_ABORT("bad tag");
return "";
}
} // namespace
SkPDFTag::SkPDFTag(int nodeId, SkPDF::DocumentStructureType type, sk_sp<SkPDFTag> parent)
: SkPDFDict("StructElem")
, fNodeId(nodeId) {
insertName("S", tagNameFromType(type));
if (parent) {
insertObjRef("P", std::move(parent));
}
}
SkPDFTag::~SkPDFTag() {
}
void SkPDFTag::appendChild(sk_sp<SkPDFTag> child) {
fChildren.emplace_back(child);
}
void SkPDFTag::drop() {
// Disconnect the tree so as not to cause reference count loops.
fChildren.reset();
SkPDFDict::drop();
}
void SkPDFTag::addMarkedContent(int pageIndex, int markId) {
MarkedContentInfo mark;
mark.pageIndex = pageIndex;
mark.markId = markId;
fMarkedContent.emplace_back(mark);
}
bool SkPDFTag::prepareTagTreeToEmit(const SkPDFDocument& document) {
// Scan the marked content. If it's all on the page, output a
// Pg to the dict. If not, we'll use MCR dicts, below.
bool allSamePage = true;
if (fMarkedContent.count() > 0) {
int firstPageIndex = fMarkedContent[0].pageIndex;
for (int i = 1; i < fMarkedContent.count(); i++) {
if (fMarkedContent[i].pageIndex != firstPageIndex) {
allSamePage = false;
break;
}
}
if (allSamePage) {
insertObjRef("Pg", document.getPage(firstPageIndex));
}
}
// Recursively prepare all child tags of this node.
SkTArray<sk_sp<SkPDFTag>> validChildren;
for (int i = 0; i < fChildren.count(); i++) {
if (fChildren[i]->prepareTagTreeToEmit(document)) {
validChildren.push_back(fChildren[i]);
}
}
// fChildren is no longer needed.
fChildren.reset();
// Now set the kids of this node, which includes both child tags
// and marked content IDs.
if (validChildren.count() + fMarkedContent.count() == 1) {
// If there's just one valid kid, or one marked content,
// we can just output the reference directly with no array.
if (validChildren.count() == 1) {
insertObjRef("K", validChildren[0]);
} else {
insertInt("K", fMarkedContent[0].markId);
}
return true;
} else if (validChildren.count() + fMarkedContent.count() > 1) {
// If there's more than one kid, output them in an array.
auto kids = sk_make_sp<SkPDFArray>();
for (int i = 0; i < validChildren.count(); i++) {
kids->appendObjRef(validChildren[i]);
}
for (int i = 0; i < fMarkedContent.count(); i++) {
if (allSamePage) {
kids->appendInt(fMarkedContent[i].markId);
} else {
auto mcr = sk_make_sp<SkPDFDict>("MCR");
mcr->insertObjRef("Pg", document.getPage(fMarkedContent[i].pageIndex));
mcr->insertInt("MCID", fMarkedContent[i].markId);
kids->appendObject(mcr);
}
}
insertObject("K", kids);
return true;
}
// This tag didn't have any marked content or any children with
// marked content, so return false. This subtree will be omitted
// from the structure tree.
return false;
}

67
src/pdf/SkPDFTag.h Normal file
View File

@ -0,0 +1,67 @@
/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkPDFTag_DEFINED
#define SkPDFTag_DEFINED
#include "SkDocument.h"
#include "SkPDFTypes.h"
#include "SkRefCnt.h"
class SkPDFDocument;
/** \class SkPDFTag
A PDF Tag represents a semantic tag in the tag tree for an
accessible tagged PDF. Documents can create an accessible PDF by
creating a tree of SkPDFTags representing the semantic tree
structure of the overall document, and then calling
SkPDF::SetNodeId with the SkCanvas used to draw to the page and
the same corresponding node IDs to mark the content for each
page. It's allowed for the marked content for one tag to span
multiple pages.
*/
class SkPDFTag final : public SkPDFDict {
public:
SkPDFTag(int nodeId, SkPDF::DocumentStructureType type, sk_sp<SkPDFTag> parent);
~SkPDFTag() override;
void appendChild(sk_sp<SkPDFTag> child);
private:
friend class SkPDFDocument;
void drop() override;
void addMarkedContent(int pageIndex, int markId);
// Should be called after all content has been emitted. Fills in
// all of the SkPDFDict fields in this tag and all descendants.
// Returns true if this tag is valid, and false if no tag in this
// subtree was referred to by any marked content.
bool prepareTagTreeToEmit(const SkPDFDocument& document);
struct MarkedContentInfo {
int pageIndex;
int markId;
};
// This tag's node ID, which must correspond to the node ID set
// on the SkCanvas when content inside this tag is drawn.
// The node IDs are arbitrary and are not output to the PDF.
int fNodeId;
// The children of this tag. Some tags like lists and tables require
// a particular hierarchical structure, similar to HTML.
SkTArray<sk_sp<SkPDFTag>> fChildren;
// An array consisting of a [page index, mark ID] pair for each piece
// of marked content associated with this tag.
SkTArray<MarkedContentInfo> fMarkedContent;
};
#endif

167
tests/PDFTaggedTest.cpp Normal file
View File

@ -0,0 +1,167 @@
/*
* Copyright 2018 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "Test.h"
#include "SkCanvas.h"
#include "SkPDFDocument.h"
#include "SkStream.h"
using PDFTag = SkPDF::StructureElementNode;
// Test building a tagged PDF.
// Add this to args.gn to output the PDF to a file:
// extra_cflags = [ "-DSK_PDF_TEST_TAGS_OUTPUT_PATH=\"/tmp/foo.pdf\"" ]
DEF_TEST(SkPDF_tagged, r) {
#ifdef SK_PDF_TEST_TAGS_OUTPUT_PATH
SkFILEWStream outputStream(SK_PDF_TEST_TAGS_OUTPUT_PATH);
#else
SkDynamicMemoryWStream outputStream;
#endif
SkSize pageSize = SkSize::Make(612, 792); // U.S. Letter
SkPDF::Metadata metadata;
metadata.fTitle = "Example Tagged PDF";
metadata.fCreator = "Skia";
SkTime::DateTime now;
SkTime::GetDateTime(&now);
metadata.fCreation = now;
metadata.fModified = now;
// The document tag.
PDFTag root;
root.fNodeId = 1;
root.fType = SkPDF::DocumentStructureType::kDocument;
root.fChildCount = 5;
PDFTag rootChildren[5];
root.fChildren = rootChildren;
// Heading.
PDFTag& h1 = rootChildren[0];
h1.fNodeId = 2;
h1.fType = SkPDF::DocumentStructureType::kH1;
h1.fChildCount = 0;
// Initial paragraph.
PDFTag& p = rootChildren[1];
p.fNodeId = 3;
p.fType = SkPDF::DocumentStructureType::kP;
p.fChildCount = 0;
// Hidden div. This is never referenced by marked content
// so it should not appear in the resulting PDF.
PDFTag& div = rootChildren[2];
div.fNodeId = 4;
div.fType = SkPDF::DocumentStructureType::kDiv;
div.fChildCount = 0;
// A bulleted list of two items.
PDFTag& l = rootChildren[3];
l.fNodeId = 5;
l.fType = SkPDF::DocumentStructureType::kL;
l.fChildCount = 4;
PDFTag listChildren[4];
l.fChildren = listChildren;
PDFTag& lm1 = listChildren[0];
lm1.fNodeId = 6;
lm1.fType = SkPDF::DocumentStructureType::kLbl;
lm1.fChildCount = 0;
PDFTag& li1 = listChildren[1];
li1.fNodeId = 7;
li1.fType = SkPDF::DocumentStructureType::kLI;
li1.fChildCount = 0;
PDFTag& lm2 = listChildren[2];
lm2.fNodeId = 8;
lm2.fType = SkPDF::DocumentStructureType::kLbl;
lm2.fChildCount = 0;
PDFTag& li2 = listChildren[3];
li2.fNodeId = 9;
li2.fType = SkPDF::DocumentStructureType::kLI;
li2.fChildCount = 0;
// Paragraph spanning two pages.
PDFTag& p2 = rootChildren[4];
p2.fNodeId = 10;
p2.fType = SkPDF::DocumentStructureType::kP;
p2.fChildCount = 0;
metadata.fStructureElementTreeRoot = &root;
sk_sp<SkDocument> document = SkPDF::MakeDocument(
&outputStream, metadata);
SkPaint paint;
paint.setColor(SK_ColorBLACK);
paint.setSubpixelText(true);
paint.setHinting(SkPaint::kNo_Hinting);
// First page.
SkCanvas* canvas =
document->beginPage(pageSize.width(),
pageSize.height());
SkPDF::SetNodeId(canvas, 2);
paint.setTextSize(36);
const char* message = "This is the title";
canvas->translate(72, 72);
canvas->drawText(message, strlen(message), 0, 0, paint);
SkPDF::SetNodeId(canvas, 3);
paint.setTextSize(14);
message = "This is a simple paragraph.";
canvas->translate(0, 72);
canvas->drawText(message, strlen(message), 0, 0, paint);
SkPDF::SetNodeId(canvas, 6);
paint.setTextSize(14);
message = "*";
canvas->translate(0, 72);
canvas->drawText(message, strlen(message), 0, 0, paint);
SkPDF::SetNodeId(canvas, 7);
message = "List item 1";
canvas->translate(36, 0);
canvas->drawText(message, strlen(message), 0, 0, paint);
SkPDF::SetNodeId(canvas, 8);
message = "*";
canvas->translate(-36, 36);
canvas->drawText(message, strlen(message), 0, 0, paint);
SkPDF::SetNodeId(canvas, 9);
message = "List item 2";
canvas->translate(36, 0);
canvas->drawText(message, strlen(message), 0, 0, paint);
SkPDF::SetNodeId(canvas, 10);
message = "This is a paragraph that starts on one page";
canvas->translate(-36, 6 * 72);
canvas->drawText(message, strlen(message), 0, 0, paint);
document->endPage();
// Second page.
canvas = document->beginPage(pageSize.width(),
pageSize.height());
SkPDF::SetNodeId(canvas, 10);
message = "and finishes on the second page.";
canvas->translate(72, 72);
canvas->drawText(message, strlen(message), 0, 0, paint);
// This has a node ID but never shows up in the tag tree so it
// won't be tagged.
SkPDF::SetNodeId(canvas, 999);
message = "Page 2";
canvas->translate(468, -36);
canvas->drawText(message, strlen(message), 0, 0, paint);
document->endPage();
document->close();
outputStream.flush();
}