Add initial support for generating tagged PDFs.
Adds an interface for the document creator to pass in a tree of tags indicating the structure of the document, each with a type (from a predetermined enum of possible types) and a node ID. It also adds a setNodeId function to SkCanvas so that page content can be associated with a particular tag. If both the tag tree and marked content are present, Skia can now output a properly tagged PDF. An example program is included. When used properly, the PDF generated by this patch is valid and the tags are parsed properly by Adobe Acrobat. It handles many corner cases like content that spans more than one page, or tags that don't correspond to any marked content, or marked content that doesn't correspond to any tags. However, it doesn't implement all of the features of PDF accessibility yet, there are some additional attributes that can be associated with some tags that need to be supported, too, in order to properly tag things like figures and tables. Bug: skia:8148 Change-Id: I2e448eca8ded8e1b29ba685663b557ae7ad7e23e Reviewed-on: https://skia-review.googlesource.com/141138 Reviewed-by: Hal Canary <halcanary@google.com>
This commit is contained in:
parent
b400d4d7e0
commit
656cefe65d
@ -46,6 +46,8 @@ skia_pdf_sources = [
|
|||||||
"$_src/pdf/SkPDFResourceDict.h",
|
"$_src/pdf/SkPDFResourceDict.h",
|
||||||
"$_src/pdf/SkPDFShader.cpp",
|
"$_src/pdf/SkPDFShader.cpp",
|
||||||
"$_src/pdf/SkPDFShader.h",
|
"$_src/pdf/SkPDFShader.h",
|
||||||
|
"$_src/pdf/SkPDFTag.cpp",
|
||||||
|
"$_src/pdf/SkPDFTag.h",
|
||||||
"$_src/pdf/SkPDFTypes.cpp",
|
"$_src/pdf/SkPDFTypes.cpp",
|
||||||
"$_src/pdf/SkPDFTypes.h",
|
"$_src/pdf/SkPDFTypes.h",
|
||||||
"$_src/pdf/SkPDFUtils.cpp",
|
"$_src/pdf/SkPDFUtils.cpp",
|
||||||
|
@ -168,6 +168,7 @@ tests_sources = [
|
|||||||
"$_tests/PDFMetadataAttributeTest.cpp",
|
"$_tests/PDFMetadataAttributeTest.cpp",
|
||||||
"$_tests/PDFOpaqueSrcModeToSrcOverTest.cpp",
|
"$_tests/PDFOpaqueSrcModeToSrcOverTest.cpp",
|
||||||
"$_tests/PDFPrimitivesTest.cpp",
|
"$_tests/PDFPrimitivesTest.cpp",
|
||||||
|
"$_tests/PDFTaggedTest.cpp",
|
||||||
"$_tests/OffsetSimplePolyTest.cpp",
|
"$_tests/OffsetSimplePolyTest.cpp",
|
||||||
"$_tests/OnFlushCallbackTest.cpp",
|
"$_tests/OnFlushCallbackTest.cpp",
|
||||||
"$_tests/PathRendererCacheTests.cpp",
|
"$_tests/PathRendererCacheTests.cpp",
|
||||||
|
@ -11,6 +11,67 @@
|
|||||||
|
|
||||||
namespace SkPDF {
|
namespace SkPDF {
|
||||||
|
|
||||||
|
/** Table 333 in PDF 32000-1:2008
|
||||||
|
*/
|
||||||
|
enum class DocumentStructureType {
|
||||||
|
kDocument,
|
||||||
|
kPart,
|
||||||
|
kArt, // Article
|
||||||
|
kSect, // Section
|
||||||
|
kDiv,
|
||||||
|
kBlockQuote,
|
||||||
|
kCaption,
|
||||||
|
kTOC, // Table of Contents
|
||||||
|
kTOCI, // Table of Contents Item
|
||||||
|
kIndex,
|
||||||
|
kNonStruct,
|
||||||
|
kPrivate,
|
||||||
|
kH, // Heading
|
||||||
|
kH1, // Heading level 1
|
||||||
|
kH2,
|
||||||
|
kH3,
|
||||||
|
kH4,
|
||||||
|
kH5,
|
||||||
|
kH6, // Heading level 6
|
||||||
|
kP, // Paragraph
|
||||||
|
kL, // List
|
||||||
|
kLI, // List item
|
||||||
|
kLbl, // List item label
|
||||||
|
kLBody, // List item body
|
||||||
|
kTable,
|
||||||
|
kTR,
|
||||||
|
kTH,
|
||||||
|
kTD,
|
||||||
|
kTHead,
|
||||||
|
kTBody,
|
||||||
|
kTFoot,
|
||||||
|
kSpan,
|
||||||
|
kQuote,
|
||||||
|
kNote,
|
||||||
|
kReference,
|
||||||
|
kBibEntry,
|
||||||
|
kCode,
|
||||||
|
kLink,
|
||||||
|
kAnnot,
|
||||||
|
kRuby,
|
||||||
|
kWarichu,
|
||||||
|
kFigure,
|
||||||
|
kFormula,
|
||||||
|
kForm, // Form control (not like an HTML FORM element)
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A node in a PDF structure tree, giving a semantic representation
|
||||||
|
* of the content. Each node ID is associated with content
|
||||||
|
* by passing the SkCanvas and node ID to SkPDF::SetNodeId() when drawing.
|
||||||
|
*/
|
||||||
|
struct StructureElementNode {
|
||||||
|
const StructureElementNode* fChildren = nullptr;
|
||||||
|
size_t fChildCount;
|
||||||
|
int fNodeId;
|
||||||
|
DocumentStructureType fType;
|
||||||
|
};
|
||||||
|
|
||||||
/** Optional metadata to be passed into the PDF factory function.
|
/** Optional metadata to be passed into the PDF factory function.
|
||||||
*/
|
*/
|
||||||
struct Metadata {
|
struct Metadata {
|
||||||
@ -73,8 +134,27 @@ struct Metadata {
|
|||||||
opaque, it will be encoded (using JPEG) with that quality setting.
|
opaque, it will be encoded (using JPEG) with that quality setting.
|
||||||
*/
|
*/
|
||||||
int fEncodingQuality = 101;
|
int fEncodingQuality = 101;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An optional tree of structured document tags that provide
|
||||||
|
* a semantic representation of the content. The caller
|
||||||
|
* should retain ownership.
|
||||||
|
*/
|
||||||
|
const StructureElementNode* fStructureElementTreeRoot = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/** Associate a node ID with subsequent drawing commands in an
|
||||||
|
SkCanvas. The same node ID can appear in a StructureElementNode
|
||||||
|
in order to associate a document's structure element tree with
|
||||||
|
its content.
|
||||||
|
|
||||||
|
A node ID of zero indicates no node ID.
|
||||||
|
|
||||||
|
@param canvas The canvas used to draw to the PDF.
|
||||||
|
@param nodeId The node ID for subsequent drawing commands.
|
||||||
|
*/
|
||||||
|
SK_API void SetNodeId(SkCanvas* dst, int nodeID);
|
||||||
|
|
||||||
/** Create a PDF-backed document, writing the results into a SkWStream.
|
/** Create a PDF-backed document, writing the results into a SkWStream.
|
||||||
|
|
||||||
PDF pages are sized in point units. 1 pt == 1/72 inch == 127/360 mm.
|
PDF pages are sized in point units. 1 pt == 1/72 inch == 127/360 mm.
|
||||||
|
@ -527,6 +527,7 @@ SkPDFDevice::SkPDFDevice(SkISize pageSize, SkPDFDocument* doc, const SkMatrix& t
|
|||||||
: INHERITED(SkImageInfo::MakeUnknown(pageSize.width(), pageSize.height()),
|
: INHERITED(SkImageInfo::MakeUnknown(pageSize.width(), pageSize.height()),
|
||||||
SkSurfaceProps(0, kUnknown_SkPixelGeometry))
|
SkSurfaceProps(0, kUnknown_SkPixelGeometry))
|
||||||
, fInitialTransform(transform)
|
, fInitialTransform(transform)
|
||||||
|
, fNodeId(0)
|
||||||
, fDocument(doc)
|
, fDocument(doc)
|
||||||
{
|
{
|
||||||
SkASSERT(!pageSize.isEmpty());
|
SkASSERT(!pageSize.isEmpty());
|
||||||
@ -550,6 +551,13 @@ void SkPDFDevice::drawAnnotation(const SkRect& rect, const char key[], SkData* v
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (rect.isEmpty()) {
|
if (rect.isEmpty()) {
|
||||||
|
if (!strcmp(key, SkPDFGetNodeIdKey())) {
|
||||||
|
int nodeID;
|
||||||
|
if (value->size() != sizeof(nodeID)) { return; }
|
||||||
|
memcpy(&nodeID, value->data(), sizeof(nodeID));
|
||||||
|
fNodeId = nodeID;
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (!strcmp(SkAnnotationKeys::Define_Named_Dest_Key(), key)) {
|
if (!strcmp(SkAnnotationKeys::Define_Named_Dest_Key(), key)) {
|
||||||
SkPoint transformedPoint;
|
SkPoint transformedPoint;
|
||||||
this->ctm().mapXY(rect.x(), rect.y(), &transformedPoint);
|
this->ctm().mapXY(rect.x(), rect.y(), &transformedPoint);
|
||||||
@ -1176,6 +1184,19 @@ void SkPDFDevice::internalDrawGlyphRun(const SkGlyphRun& glyphRun, SkPoint offse
|
|||||||
SkDynamicMemoryWStream* out = content.stream();
|
SkDynamicMemoryWStream* out = content.stream();
|
||||||
|
|
||||||
out->writeText("BT\n");
|
out->writeText("BT\n");
|
||||||
|
|
||||||
|
int markId = -1;
|
||||||
|
if (fNodeId) {
|
||||||
|
markId = fDocument->getMarkIdForNodeId(fNodeId);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (markId != -1) {
|
||||||
|
out->writeText("/P <</MCID ");
|
||||||
|
out->writeDecAsText(markId);
|
||||||
|
out->writeText(" >>BDC\n");
|
||||||
|
}
|
||||||
|
SK_AT_SCOPE_EXIT(if (markId != -1) out->writeText("EMC\n"));
|
||||||
|
|
||||||
SK_AT_SCOPE_EXIT(out->writeText("ET\n"));
|
SK_AT_SCOPE_EXIT(out->writeText("ET\n"));
|
||||||
|
|
||||||
const SkGlyphID maxGlyphID = SkToU16(typeface->countGlyphs() - 1);
|
const SkGlyphID maxGlyphID = SkToU16(typeface->countGlyphs() - 1);
|
||||||
|
@ -167,6 +167,7 @@ private:
|
|||||||
std::vector<sk_sp<SkPDFObject>> fXObjectResources;
|
std::vector<sk_sp<SkPDFObject>> fXObjectResources;
|
||||||
std::vector<sk_sp<SkPDFObject>> fShaderResources;
|
std::vector<sk_sp<SkPDFObject>> fShaderResources;
|
||||||
std::vector<sk_sp<SkPDFFont>> fFontResources;
|
std::vector<sk_sp<SkPDFFont>> fFontResources;
|
||||||
|
int fNodeId;
|
||||||
|
|
||||||
SkSinglyLinkedList<SkDynamicMemoryWStream> fContentEntries;
|
SkSinglyLinkedList<SkDynamicMemoryWStream> fContentEntries;
|
||||||
struct GraphicStackState {
|
struct GraphicStackState {
|
||||||
|
@ -11,12 +11,19 @@
|
|||||||
#include "SkMakeUnique.h"
|
#include "SkMakeUnique.h"
|
||||||
#include "SkPDFCanon.h"
|
#include "SkPDFCanon.h"
|
||||||
#include "SkPDFDevice.h"
|
#include "SkPDFDevice.h"
|
||||||
|
#include "SkPDFTag.h"
|
||||||
#include "SkPDFUtils.h"
|
#include "SkPDFUtils.h"
|
||||||
#include "SkStream.h"
|
#include "SkStream.h"
|
||||||
#include "SkTo.h"
|
#include "SkTo.h"
|
||||||
|
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
// For use in SkCanvas::drawAnnotation
|
||||||
|
const char* SkPDFGetNodeIdKey() {
|
||||||
|
static constexpr char key[] = "PDF_Node_Key";
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
SkPDFObjectSerializer::SkPDFObjectSerializer() : fBaseOffset(0), fNextToBeSerialized(0) {}
|
SkPDFObjectSerializer::SkPDFObjectSerializer() : fBaseOffset(0), fNextToBeSerialized(0) {}
|
||||||
|
|
||||||
SkPDFObjectSerializer::~SkPDFObjectSerializer() {
|
SkPDFObjectSerializer::~SkPDFObjectSerializer() {
|
||||||
@ -192,6 +199,9 @@ SkPDFDocument::SkPDFDocument(SkWStream* stream,
|
|||||||
fInverseRasterScale = kDpiForRasterScaleOne / fMetadata.fRasterDPI;
|
fInverseRasterScale = kDpiForRasterScaleOne / fMetadata.fRasterDPI;
|
||||||
fRasterScale = fMetadata.fRasterDPI / kDpiForRasterScaleOne;
|
fRasterScale = fMetadata.fRasterDPI / kDpiForRasterScaleOne;
|
||||||
}
|
}
|
||||||
|
if (fMetadata.fStructureElementTreeRoot) {
|
||||||
|
fTagRoot = recursiveBuildTagTree(*fMetadata.fStructureElementTreeRoot, nullptr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SkPDFDocument::~SkPDFDocument() {
|
SkPDFDocument::~SkPDFDocument() {
|
||||||
@ -265,6 +275,9 @@ void SkPDFDocument::onEndPage() {
|
|||||||
}
|
}
|
||||||
this->serialize(contentObject);
|
this->serialize(contentObject);
|
||||||
page->insertObjRef("Contents", std::move(contentObject));
|
page->insertObjRef("Contents", std::move(contentObject));
|
||||||
|
// The StructParents unique identifier for each page is just its
|
||||||
|
// 0-based page index.
|
||||||
|
page->insertInt("StructParents", static_cast<int>(fPages.size()));
|
||||||
fPages.emplace_back(std::move(page));
|
fPages.emplace_back(std::move(page));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -423,6 +436,39 @@ static sk_sp<SkPDFArray> make_srgb_output_intents() {
|
|||||||
return intentArray;
|
return intentArray;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sk_sp<SkPDFDict> SkPDFDocument::getPage(int pageIndex) const {
|
||||||
|
SkASSERT(pageIndex >= 0 && pageIndex < static_cast<int>(fPages.size()));
|
||||||
|
return fPages[pageIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
int SkPDFDocument::getMarkIdForNodeId(int nodeId) {
|
||||||
|
sk_sp<SkPDFTag>* tagPtr = fNodeIdToTag.find(nodeId);
|
||||||
|
if (tagPtr == nullptr) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
sk_sp<SkPDFTag> tag = *tagPtr;
|
||||||
|
int pageIndex = static_cast<int>(fPages.size());
|
||||||
|
while (fMarksPerPage.count() < pageIndex + 1) {
|
||||||
|
fMarksPerPage.push_back();
|
||||||
|
}
|
||||||
|
int markId = fMarksPerPage[pageIndex].count();
|
||||||
|
tag->addMarkedContent(pageIndex, markId);
|
||||||
|
fMarksPerPage[pageIndex].push_back(std::move(tag));
|
||||||
|
return markId;
|
||||||
|
}
|
||||||
|
|
||||||
|
sk_sp<SkPDFTag> SkPDFDocument::recursiveBuildTagTree(
|
||||||
|
const SkPDF::StructureElementNode& node, sk_sp<SkPDFTag> parent) {
|
||||||
|
sk_sp<SkPDFTag> tag = sk_make_sp<SkPDFTag>(node.fNodeId, node.fType, parent);
|
||||||
|
fNodeIdToTag.set(tag->fNodeId, tag);
|
||||||
|
tag->fChildren.reserve(node.fChildCount);
|
||||||
|
for (size_t i = 0; i < node.fChildCount; i++) {
|
||||||
|
tag->appendChild(recursiveBuildTagTree(node.fChildren[i], tag));
|
||||||
|
}
|
||||||
|
return tag;
|
||||||
|
}
|
||||||
|
|
||||||
void SkPDFDocument::onClose(SkWStream* stream) {
|
void SkPDFDocument::onClose(SkWStream* stream) {
|
||||||
SkASSERT(fCanvas.imageInfo().dimensions().isZero());
|
SkASSERT(fCanvas.imageInfo().dimensions().isZero());
|
||||||
if (fPages.empty()) {
|
if (fPages.empty()) {
|
||||||
@ -437,14 +483,63 @@ void SkPDFDocument::onClose(SkWStream* stream) {
|
|||||||
// no one has ever asked for this feature.
|
// no one has ever asked for this feature.
|
||||||
docCatalog->insertObject("OutputIntents", make_srgb_output_intents());
|
docCatalog->insertObject("OutputIntents", make_srgb_output_intents());
|
||||||
}
|
}
|
||||||
SkASSERT(!fPages.empty());
|
|
||||||
docCatalog->insertObjRef("Pages", generate_page_tree(&fPages));
|
std::vector<sk_sp<SkPDFDict>> pagesCopy(fPages);
|
||||||
SkASSERT(fPages.empty());
|
SkASSERT(!pagesCopy.empty());
|
||||||
|
docCatalog->insertObjRef("Pages", generate_page_tree(&pagesCopy));
|
||||||
|
SkASSERT(pagesCopy.empty());
|
||||||
|
|
||||||
if (fDests->size() > 0) {
|
if (fDests->size() > 0) {
|
||||||
docCatalog->insertObjRef("Dests", std::move(fDests));
|
docCatalog->insertObjRef("Dests", std::move(fDests));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle tagged PDFs.
|
||||||
|
if (fTagRoot) {
|
||||||
|
// In the document catalog, indicate that this PDF is tagged.
|
||||||
|
auto markInfo = sk_make_sp<SkPDFDict>("MarkInfo");
|
||||||
|
markInfo->insertBool("Marked", true);
|
||||||
|
docCatalog->insertObject("MarkInfo", markInfo);
|
||||||
|
|
||||||
|
// Prepare the tag tree, this automatically skips over any
|
||||||
|
// tags that weren't referenced from any marked content.
|
||||||
|
bool success = fTagRoot->prepareTagTreeToEmit(*this);
|
||||||
|
if (!success) {
|
||||||
|
SkDEBUGFAIL("PDF has tag tree but no marked content.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the StructTreeRoot.
|
||||||
|
auto structTreeRoot = sk_make_sp<SkPDFDict>("StructTreeRoot");
|
||||||
|
docCatalog->insertObjRef("StructTreeRoot", structTreeRoot);
|
||||||
|
structTreeRoot->insertObjRef("K", fTagRoot);
|
||||||
|
int pageCount = static_cast<int>(fPages.size());
|
||||||
|
structTreeRoot->insertInt("ParentTreeNextKey", pageCount);
|
||||||
|
|
||||||
|
// The parent of the tag root is the StructTreeRoot.
|
||||||
|
fTagRoot->insertObjRef("P", structTreeRoot);
|
||||||
|
|
||||||
|
// Build the parent tree, which is a mapping from the marked
|
||||||
|
// content IDs on each page to their corressponding tags.
|
||||||
|
auto parentTree = sk_make_sp<SkPDFDict>("ParentTree");
|
||||||
|
structTreeRoot->insertObjRef("ParentTree", parentTree);
|
||||||
|
structTreeRoot->insertInt("ParentTreeNextKey", pageCount);
|
||||||
|
auto parentTreeNums = sk_make_sp<SkPDFArray>();
|
||||||
|
parentTree->insertObject("Nums", parentTreeNums);
|
||||||
|
for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) {
|
||||||
|
// Exit now if there are no more pages with marked content.
|
||||||
|
if (fMarksPerPage.count() <= pageIndex) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
parentTreeNums->appendInt(pageIndex);
|
||||||
|
auto markToTagArray = sk_make_sp<SkPDFArray>();
|
||||||
|
parentTreeNums->appendObjRef(markToTagArray);
|
||||||
|
|
||||||
|
for (int i = 0; i < fMarksPerPage[pageIndex].count(); i++) {
|
||||||
|
markToTagArray->appendObjRef(fMarksPerPage[pageIndex][i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Build font subsetting info before calling addObjectRecursively().
|
// Build font subsetting info before calling addObjectRecursively().
|
||||||
SkPDFCanon* canon = &fCanon;
|
SkPDFCanon* canon = &fCanon;
|
||||||
fFonts.foreach([canon](SkPDFFont* p){ p->getFontSubset(canon); });
|
fFonts.foreach([canon](SkPDFFont* p){ p->getFontSubset(canon); });
|
||||||
@ -456,6 +551,12 @@ void SkPDFDocument::onClose(SkWStream* stream) {
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
void SkPDF::SetNodeId(SkCanvas* canvas, int nodeID) {
|
||||||
|
sk_sp<SkData> payload = SkData::MakeWithCopy(&nodeID, sizeof(nodeID));
|
||||||
|
const char* key = SkPDFGetNodeIdKey();
|
||||||
|
canvas->drawAnnotation({0, 0, 0, 0}, key, payload.get());
|
||||||
|
}
|
||||||
|
|
||||||
sk_sp<SkDocument> SkPDF::MakeDocument(SkWStream* stream, const SkPDF::Metadata& metadata) {
|
sk_sp<SkDocument> SkPDF::MakeDocument(SkWStream* stream, const SkPDF::Metadata& metadata) {
|
||||||
SkPDF::Metadata meta = metadata;
|
SkPDF::Metadata meta = metadata;
|
||||||
if (meta.fRasterDPI <= 0) {
|
if (meta.fRasterDPI <= 0) {
|
||||||
|
@ -14,6 +14,9 @@
|
|||||||
#include "SkPDFMetadata.h"
|
#include "SkPDFMetadata.h"
|
||||||
|
|
||||||
class SkPDFDevice;
|
class SkPDFDevice;
|
||||||
|
class SkPDFTag;
|
||||||
|
|
||||||
|
const char* SkPDFGetNodeIdKey();
|
||||||
|
|
||||||
// Logically part of SkPDFDocument (like SkPDFCanon), but separate to
|
// Logically part of SkPDFDocument (like SkPDFCanon), but separate to
|
||||||
// keep similar functionality together.
|
// keep similar functionality together.
|
||||||
@ -64,7 +67,14 @@ public:
|
|||||||
void registerFont(SkPDFFont* f) { fFonts.add(f); }
|
void registerFont(SkPDFFont* f) { fFonts.add(f); }
|
||||||
const SkPDF::Metadata& metadata() const { return fMetadata; }
|
const SkPDF::Metadata& metadata() const { return fMetadata; }
|
||||||
|
|
||||||
|
sk_sp<SkPDFDict> getPage(int pageIndex) const;
|
||||||
|
// Returns -1 if no mark ID.
|
||||||
|
int getMarkIdForNodeId(int nodeId);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
sk_sp<SkPDFTag> recursiveBuildTagTree(const SkPDF::StructureElementNode& node,
|
||||||
|
sk_sp<SkPDFTag> parent);
|
||||||
|
|
||||||
SkPDFObjectSerializer fObjectSerializer;
|
SkPDFObjectSerializer fObjectSerializer;
|
||||||
SkPDFCanon fCanon;
|
SkPDFCanon fCanon;
|
||||||
SkCanvas fCanvas;
|
SkCanvas fCanvas;
|
||||||
@ -78,6 +88,15 @@ private:
|
|||||||
SkScalar fRasterScale = 1;
|
SkScalar fRasterScale = 1;
|
||||||
SkScalar fInverseRasterScale = 1;
|
SkScalar fInverseRasterScale = 1;
|
||||||
|
|
||||||
|
// For tagged PDFs.
|
||||||
|
|
||||||
|
// The tag root, which owns its child tags and so on.
|
||||||
|
sk_sp<SkPDFTag> fTagRoot;
|
||||||
|
// Array of page -> array of marks mapping to tags.
|
||||||
|
SkTArray<SkTArray<sk_sp<SkPDFTag>>> fMarksPerPage;
|
||||||
|
// A mapping from node ID to tag for fast lookup.
|
||||||
|
SkTHashMap<int, sk_sp<SkPDFTag>> fNodeIdToTag;
|
||||||
|
|
||||||
void reset();
|
void reset();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
206
src/pdf/SkPDFTag.cpp
Normal file
206
src/pdf/SkPDFTag.cpp
Normal file
@ -0,0 +1,206 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2018 Google Inc.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license that can be
|
||||||
|
* found in the LICENSE file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "SkPDFDocumentPriv.h"
|
||||||
|
#include "SkPDFTag.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// Table 333 in PDF 32000-1:2008
|
||||||
|
const char* tagNameFromType(SkPDF::DocumentStructureType type) {
|
||||||
|
switch (type) {
|
||||||
|
case SkPDF::DocumentStructureType::kDocument:
|
||||||
|
return "Document";
|
||||||
|
case SkPDF::DocumentStructureType::kPart:
|
||||||
|
return "Part";
|
||||||
|
case SkPDF::DocumentStructureType::kArt:
|
||||||
|
return "Art";
|
||||||
|
case SkPDF::DocumentStructureType::kSect:
|
||||||
|
return "Sect";
|
||||||
|
case SkPDF::DocumentStructureType::kDiv:
|
||||||
|
return "Div";
|
||||||
|
case SkPDF::DocumentStructureType::kBlockQuote:
|
||||||
|
return "BlockQuote";
|
||||||
|
case SkPDF::DocumentStructureType::kCaption:
|
||||||
|
return "Caption";
|
||||||
|
case SkPDF::DocumentStructureType::kTOC:
|
||||||
|
return "TOC";
|
||||||
|
case SkPDF::DocumentStructureType::kTOCI:
|
||||||
|
return "TOCI";
|
||||||
|
case SkPDF::DocumentStructureType::kIndex:
|
||||||
|
return "Index";
|
||||||
|
case SkPDF::DocumentStructureType::kNonStruct:
|
||||||
|
return "NonStruct";
|
||||||
|
case SkPDF::DocumentStructureType::kPrivate:
|
||||||
|
return "Private";
|
||||||
|
case SkPDF::DocumentStructureType::kH:
|
||||||
|
return "H";
|
||||||
|
case SkPDF::DocumentStructureType::kH1:
|
||||||
|
return "H1";
|
||||||
|
case SkPDF::DocumentStructureType::kH2:
|
||||||
|
return "H2";
|
||||||
|
case SkPDF::DocumentStructureType::kH3:
|
||||||
|
return "H3";
|
||||||
|
case SkPDF::DocumentStructureType::kH4:
|
||||||
|
return "H4";
|
||||||
|
case SkPDF::DocumentStructureType::kH5:
|
||||||
|
return "H5";
|
||||||
|
case SkPDF::DocumentStructureType::kH6:
|
||||||
|
return "H6";
|
||||||
|
case SkPDF::DocumentStructureType::kP:
|
||||||
|
return "P";
|
||||||
|
case SkPDF::DocumentStructureType::kL:
|
||||||
|
return "L";
|
||||||
|
case SkPDF::DocumentStructureType::kLI:
|
||||||
|
return "LI";
|
||||||
|
case SkPDF::DocumentStructureType::kLbl:
|
||||||
|
return "Lbl";
|
||||||
|
case SkPDF::DocumentStructureType::kLBody:
|
||||||
|
return "LBody";
|
||||||
|
case SkPDF::DocumentStructureType::kTable:
|
||||||
|
return "Table";
|
||||||
|
case SkPDF::DocumentStructureType::kTR:
|
||||||
|
return "TR";
|
||||||
|
case SkPDF::DocumentStructureType::kTH:
|
||||||
|
return "TH";
|
||||||
|
case SkPDF::DocumentStructureType::kTD:
|
||||||
|
return "TD";
|
||||||
|
case SkPDF::DocumentStructureType::kTHead:
|
||||||
|
return "THead";
|
||||||
|
case SkPDF::DocumentStructureType::kTBody:
|
||||||
|
return "TBody";
|
||||||
|
case SkPDF::DocumentStructureType::kTFoot:
|
||||||
|
return "TFoot";
|
||||||
|
case SkPDF::DocumentStructureType::kSpan:
|
||||||
|
return "Span";
|
||||||
|
case SkPDF::DocumentStructureType::kQuote:
|
||||||
|
return "Quote";
|
||||||
|
case SkPDF::DocumentStructureType::kNote:
|
||||||
|
return "Note";
|
||||||
|
case SkPDF::DocumentStructureType::kReference:
|
||||||
|
return "Reference";
|
||||||
|
case SkPDF::DocumentStructureType::kBibEntry:
|
||||||
|
return "BibEntry";
|
||||||
|
case SkPDF::DocumentStructureType::kCode:
|
||||||
|
return "Code";
|
||||||
|
case SkPDF::DocumentStructureType::kLink:
|
||||||
|
return "Link";
|
||||||
|
case SkPDF::DocumentStructureType::kAnnot:
|
||||||
|
return "Annot";
|
||||||
|
case SkPDF::DocumentStructureType::kRuby:
|
||||||
|
return "Ruby";
|
||||||
|
case SkPDF::DocumentStructureType::kWarichu:
|
||||||
|
return "Warichu";
|
||||||
|
case SkPDF::DocumentStructureType::kFigure:
|
||||||
|
return "Figure";
|
||||||
|
case SkPDF::DocumentStructureType::kFormula:
|
||||||
|
return "Formula";
|
||||||
|
case SkPDF::DocumentStructureType::kForm:
|
||||||
|
return "Form";
|
||||||
|
}
|
||||||
|
|
||||||
|
SK_ABORT("bad tag");
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
SkPDFTag::SkPDFTag(int nodeId, SkPDF::DocumentStructureType type, sk_sp<SkPDFTag> parent)
|
||||||
|
: SkPDFDict("StructElem")
|
||||||
|
, fNodeId(nodeId) {
|
||||||
|
insertName("S", tagNameFromType(type));
|
||||||
|
if (parent) {
|
||||||
|
insertObjRef("P", std::move(parent));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
SkPDFTag::~SkPDFTag() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void SkPDFTag::appendChild(sk_sp<SkPDFTag> child) {
|
||||||
|
fChildren.emplace_back(child);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SkPDFTag::drop() {
|
||||||
|
// Disconnect the tree so as not to cause reference count loops.
|
||||||
|
fChildren.reset();
|
||||||
|
|
||||||
|
SkPDFDict::drop();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SkPDFTag::addMarkedContent(int pageIndex, int markId) {
|
||||||
|
MarkedContentInfo mark;
|
||||||
|
mark.pageIndex = pageIndex;
|
||||||
|
mark.markId = markId;
|
||||||
|
fMarkedContent.emplace_back(mark);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool SkPDFTag::prepareTagTreeToEmit(const SkPDFDocument& document) {
|
||||||
|
// Scan the marked content. If it's all on the page, output a
|
||||||
|
// Pg to the dict. If not, we'll use MCR dicts, below.
|
||||||
|
bool allSamePage = true;
|
||||||
|
if (fMarkedContent.count() > 0) {
|
||||||
|
int firstPageIndex = fMarkedContent[0].pageIndex;
|
||||||
|
for (int i = 1; i < fMarkedContent.count(); i++) {
|
||||||
|
if (fMarkedContent[i].pageIndex != firstPageIndex) {
|
||||||
|
allSamePage = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allSamePage) {
|
||||||
|
insertObjRef("Pg", document.getPage(firstPageIndex));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursively prepare all child tags of this node.
|
||||||
|
SkTArray<sk_sp<SkPDFTag>> validChildren;
|
||||||
|
for (int i = 0; i < fChildren.count(); i++) {
|
||||||
|
if (fChildren[i]->prepareTagTreeToEmit(document)) {
|
||||||
|
validChildren.push_back(fChildren[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// fChildren is no longer needed.
|
||||||
|
fChildren.reset();
|
||||||
|
|
||||||
|
// Now set the kids of this node, which includes both child tags
|
||||||
|
// and marked content IDs.
|
||||||
|
if (validChildren.count() + fMarkedContent.count() == 1) {
|
||||||
|
// If there's just one valid kid, or one marked content,
|
||||||
|
// we can just output the reference directly with no array.
|
||||||
|
if (validChildren.count() == 1) {
|
||||||
|
insertObjRef("K", validChildren[0]);
|
||||||
|
} else {
|
||||||
|
insertInt("K", fMarkedContent[0].markId);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
} else if (validChildren.count() + fMarkedContent.count() > 1) {
|
||||||
|
// If there's more than one kid, output them in an array.
|
||||||
|
auto kids = sk_make_sp<SkPDFArray>();
|
||||||
|
for (int i = 0; i < validChildren.count(); i++) {
|
||||||
|
kids->appendObjRef(validChildren[i]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < fMarkedContent.count(); i++) {
|
||||||
|
if (allSamePage) {
|
||||||
|
kids->appendInt(fMarkedContent[i].markId);
|
||||||
|
} else {
|
||||||
|
auto mcr = sk_make_sp<SkPDFDict>("MCR");
|
||||||
|
mcr->insertObjRef("Pg", document.getPage(fMarkedContent[i].pageIndex));
|
||||||
|
mcr->insertInt("MCID", fMarkedContent[i].markId);
|
||||||
|
kids->appendObject(mcr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
insertObject("K", kids);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This tag didn't have any marked content or any children with
|
||||||
|
// marked content, so return false. This subtree will be omitted
|
||||||
|
// from the structure tree.
|
||||||
|
return false;
|
||||||
|
}
|
67
src/pdf/SkPDFTag.h
Normal file
67
src/pdf/SkPDFTag.h
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2018 Google Inc.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license that can be
|
||||||
|
* found in the LICENSE file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SkPDFTag_DEFINED
|
||||||
|
#define SkPDFTag_DEFINED
|
||||||
|
|
||||||
|
#include "SkDocument.h"
|
||||||
|
#include "SkPDFTypes.h"
|
||||||
|
#include "SkRefCnt.h"
|
||||||
|
|
||||||
|
class SkPDFDocument;
|
||||||
|
|
||||||
|
/** \class SkPDFTag
|
||||||
|
|
||||||
|
A PDF Tag represents a semantic tag in the tag tree for an
|
||||||
|
accessible tagged PDF. Documents can create an accessible PDF by
|
||||||
|
creating a tree of SkPDFTags representing the semantic tree
|
||||||
|
structure of the overall document, and then calling
|
||||||
|
SkPDF::SetNodeId with the SkCanvas used to draw to the page and
|
||||||
|
the same corresponding node IDs to mark the content for each
|
||||||
|
page. It's allowed for the marked content for one tag to span
|
||||||
|
multiple pages.
|
||||||
|
*/
|
||||||
|
class SkPDFTag final : public SkPDFDict {
|
||||||
|
public:
|
||||||
|
SkPDFTag(int nodeId, SkPDF::DocumentStructureType type, sk_sp<SkPDFTag> parent);
|
||||||
|
~SkPDFTag() override;
|
||||||
|
|
||||||
|
void appendChild(sk_sp<SkPDFTag> child);
|
||||||
|
|
||||||
|
private:
|
||||||
|
friend class SkPDFDocument;
|
||||||
|
|
||||||
|
void drop() override;
|
||||||
|
|
||||||
|
void addMarkedContent(int pageIndex, int markId);
|
||||||
|
|
||||||
|
// Should be called after all content has been emitted. Fills in
|
||||||
|
// all of the SkPDFDict fields in this tag and all descendants.
|
||||||
|
// Returns true if this tag is valid, and false if no tag in this
|
||||||
|
// subtree was referred to by any marked content.
|
||||||
|
bool prepareTagTreeToEmit(const SkPDFDocument& document);
|
||||||
|
|
||||||
|
struct MarkedContentInfo {
|
||||||
|
int pageIndex;
|
||||||
|
int markId;
|
||||||
|
};
|
||||||
|
|
||||||
|
// This tag's node ID, which must correspond to the node ID set
|
||||||
|
// on the SkCanvas when content inside this tag is drawn.
|
||||||
|
// The node IDs are arbitrary and are not output to the PDF.
|
||||||
|
int fNodeId;
|
||||||
|
|
||||||
|
// The children of this tag. Some tags like lists and tables require
|
||||||
|
// a particular hierarchical structure, similar to HTML.
|
||||||
|
SkTArray<sk_sp<SkPDFTag>> fChildren;
|
||||||
|
|
||||||
|
// An array consisting of a [page index, mark ID] pair for each piece
|
||||||
|
// of marked content associated with this tag.
|
||||||
|
SkTArray<MarkedContentInfo> fMarkedContent;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
167
tests/PDFTaggedTest.cpp
Normal file
167
tests/PDFTaggedTest.cpp
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2018 Google Inc.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license that can be
|
||||||
|
* found in the LICENSE file.
|
||||||
|
*/
|
||||||
|
#include "Test.h"
|
||||||
|
|
||||||
|
#include "SkCanvas.h"
|
||||||
|
#include "SkPDFDocument.h"
|
||||||
|
#include "SkStream.h"
|
||||||
|
|
||||||
|
using PDFTag = SkPDF::StructureElementNode;
|
||||||
|
|
||||||
|
// Test building a tagged PDF.
|
||||||
|
// Add this to args.gn to output the PDF to a file:
|
||||||
|
// extra_cflags = [ "-DSK_PDF_TEST_TAGS_OUTPUT_PATH=\"/tmp/foo.pdf\"" ]
|
||||||
|
DEF_TEST(SkPDF_tagged, r) {
|
||||||
|
#ifdef SK_PDF_TEST_TAGS_OUTPUT_PATH
|
||||||
|
SkFILEWStream outputStream(SK_PDF_TEST_TAGS_OUTPUT_PATH);
|
||||||
|
#else
|
||||||
|
SkDynamicMemoryWStream outputStream;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SkSize pageSize = SkSize::Make(612, 792); // U.S. Letter
|
||||||
|
|
||||||
|
SkPDF::Metadata metadata;
|
||||||
|
metadata.fTitle = "Example Tagged PDF";
|
||||||
|
metadata.fCreator = "Skia";
|
||||||
|
SkTime::DateTime now;
|
||||||
|
SkTime::GetDateTime(&now);
|
||||||
|
metadata.fCreation = now;
|
||||||
|
metadata.fModified = now;
|
||||||
|
|
||||||
|
// The document tag.
|
||||||
|
PDFTag root;
|
||||||
|
root.fNodeId = 1;
|
||||||
|
root.fType = SkPDF::DocumentStructureType::kDocument;
|
||||||
|
root.fChildCount = 5;
|
||||||
|
PDFTag rootChildren[5];
|
||||||
|
root.fChildren = rootChildren;
|
||||||
|
|
||||||
|
// Heading.
|
||||||
|
PDFTag& h1 = rootChildren[0];
|
||||||
|
h1.fNodeId = 2;
|
||||||
|
h1.fType = SkPDF::DocumentStructureType::kH1;
|
||||||
|
h1.fChildCount = 0;
|
||||||
|
|
||||||
|
// Initial paragraph.
|
||||||
|
PDFTag& p = rootChildren[1];
|
||||||
|
p.fNodeId = 3;
|
||||||
|
p.fType = SkPDF::DocumentStructureType::kP;
|
||||||
|
p.fChildCount = 0;
|
||||||
|
|
||||||
|
// Hidden div. This is never referenced by marked content
|
||||||
|
// so it should not appear in the resulting PDF.
|
||||||
|
PDFTag& div = rootChildren[2];
|
||||||
|
div.fNodeId = 4;
|
||||||
|
div.fType = SkPDF::DocumentStructureType::kDiv;
|
||||||
|
div.fChildCount = 0;
|
||||||
|
|
||||||
|
// A bulleted list of two items.
|
||||||
|
PDFTag& l = rootChildren[3];
|
||||||
|
l.fNodeId = 5;
|
||||||
|
l.fType = SkPDF::DocumentStructureType::kL;
|
||||||
|
l.fChildCount = 4;
|
||||||
|
PDFTag listChildren[4];
|
||||||
|
l.fChildren = listChildren;
|
||||||
|
|
||||||
|
PDFTag& lm1 = listChildren[0];
|
||||||
|
lm1.fNodeId = 6;
|
||||||
|
lm1.fType = SkPDF::DocumentStructureType::kLbl;
|
||||||
|
lm1.fChildCount = 0;
|
||||||
|
PDFTag& li1 = listChildren[1];
|
||||||
|
li1.fNodeId = 7;
|
||||||
|
li1.fType = SkPDF::DocumentStructureType::kLI;
|
||||||
|
li1.fChildCount = 0;
|
||||||
|
|
||||||
|
PDFTag& lm2 = listChildren[2];
|
||||||
|
lm2.fNodeId = 8;
|
||||||
|
lm2.fType = SkPDF::DocumentStructureType::kLbl;
|
||||||
|
lm2.fChildCount = 0;
|
||||||
|
PDFTag& li2 = listChildren[3];
|
||||||
|
li2.fNodeId = 9;
|
||||||
|
li2.fType = SkPDF::DocumentStructureType::kLI;
|
||||||
|
li2.fChildCount = 0;
|
||||||
|
|
||||||
|
// Paragraph spanning two pages.
|
||||||
|
PDFTag& p2 = rootChildren[4];
|
||||||
|
p2.fNodeId = 10;
|
||||||
|
p2.fType = SkPDF::DocumentStructureType::kP;
|
||||||
|
p2.fChildCount = 0;
|
||||||
|
|
||||||
|
metadata.fStructureElementTreeRoot = &root;
|
||||||
|
sk_sp<SkDocument> document = SkPDF::MakeDocument(
|
||||||
|
&outputStream, metadata);
|
||||||
|
|
||||||
|
SkPaint paint;
|
||||||
|
paint.setColor(SK_ColorBLACK);
|
||||||
|
paint.setSubpixelText(true);
|
||||||
|
paint.setHinting(SkPaint::kNo_Hinting);
|
||||||
|
|
||||||
|
// First page.
|
||||||
|
SkCanvas* canvas =
|
||||||
|
document->beginPage(pageSize.width(),
|
||||||
|
pageSize.height());
|
||||||
|
SkPDF::SetNodeId(canvas, 2);
|
||||||
|
paint.setTextSize(36);
|
||||||
|
const char* message = "This is the title";
|
||||||
|
canvas->translate(72, 72);
|
||||||
|
canvas->drawText(message, strlen(message), 0, 0, paint);
|
||||||
|
|
||||||
|
SkPDF::SetNodeId(canvas, 3);
|
||||||
|
paint.setTextSize(14);
|
||||||
|
message = "This is a simple paragraph.";
|
||||||
|
canvas->translate(0, 72);
|
||||||
|
canvas->drawText(message, strlen(message), 0, 0, paint);
|
||||||
|
|
||||||
|
SkPDF::SetNodeId(canvas, 6);
|
||||||
|
paint.setTextSize(14);
|
||||||
|
message = "*";
|
||||||
|
canvas->translate(0, 72);
|
||||||
|
canvas->drawText(message, strlen(message), 0, 0, paint);
|
||||||
|
|
||||||
|
SkPDF::SetNodeId(canvas, 7);
|
||||||
|
message = "List item 1";
|
||||||
|
canvas->translate(36, 0);
|
||||||
|
canvas->drawText(message, strlen(message), 0, 0, paint);
|
||||||
|
|
||||||
|
SkPDF::SetNodeId(canvas, 8);
|
||||||
|
message = "*";
|
||||||
|
canvas->translate(-36, 36);
|
||||||
|
canvas->drawText(message, strlen(message), 0, 0, paint);
|
||||||
|
|
||||||
|
SkPDF::SetNodeId(canvas, 9);
|
||||||
|
message = "List item 2";
|
||||||
|
canvas->translate(36, 0);
|
||||||
|
canvas->drawText(message, strlen(message), 0, 0, paint);
|
||||||
|
|
||||||
|
SkPDF::SetNodeId(canvas, 10);
|
||||||
|
message = "This is a paragraph that starts on one page";
|
||||||
|
canvas->translate(-36, 6 * 72);
|
||||||
|
canvas->drawText(message, strlen(message), 0, 0, paint);
|
||||||
|
|
||||||
|
document->endPage();
|
||||||
|
|
||||||
|
// Second page.
|
||||||
|
canvas = document->beginPage(pageSize.width(),
|
||||||
|
pageSize.height());
|
||||||
|
SkPDF::SetNodeId(canvas, 10);
|
||||||
|
message = "and finishes on the second page.";
|
||||||
|
canvas->translate(72, 72);
|
||||||
|
canvas->drawText(message, strlen(message), 0, 0, paint);
|
||||||
|
|
||||||
|
// This has a node ID but never shows up in the tag tree so it
|
||||||
|
// won't be tagged.
|
||||||
|
SkPDF::SetNodeId(canvas, 999);
|
||||||
|
message = "Page 2";
|
||||||
|
canvas->translate(468, -36);
|
||||||
|
canvas->drawText(message, strlen(message), 0, 0, paint);
|
||||||
|
|
||||||
|
document->endPage();
|
||||||
|
|
||||||
|
document->close();
|
||||||
|
|
||||||
|
outputStream.flush();
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user