2a016bad67
At the time Chromium is painting, we're passing node IDs along with painting commands to enable tagging. However, this assumes that all nodes will end up in the structure tree, which we might not want. Instead, allow the client to prune the structure tree later before telling Skia to generate the PDF, but keep all of the node IDs to be matched up with. As an example, suppose the doc looks like this: root id=1 paragraph id=2 div id=3 text1 id=4 link id=5 text2 id=6 The pruned tree passed to Skia would look like this: root id=1 paragraph id=2 extra_ids=3,4 link id=5 extra_ids=6 We need to pass the extra node IDs into Skia so that when content is tagged with id=4, we know to map that to the paragraph node with id=2 instead. Note that the resulting PDF document will *not* have any of these extra IDs, they're all remapped and consolidated. While it's not strictly necessary that this is done in Skia, it's easiest to implement it here. Doing the same upstream would require replaying an SkPicture and rewriting all of the node IDs. Bug: chromium:607777 Change-Id: I0ecb62651e60b84cc5b9d053d7f7d3b9efda1470 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/272462 Reviewed-by: Ben Wagner <bungeman@google.com> Commit-Queue: Dominic Mazzoni <dmazzoni@chromium.org>
253 lines
8.6 KiB
C++
253 lines
8.6 KiB
C++
// Copyright 2018 Google LLC.
|
|
// Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
|
|
#ifndef SkPDFDocument_DEFINED
|
|
#define SkPDFDocument_DEFINED
|
|
|
|
#include "include/core/SkDocument.h"
|
|
|
|
#include <vector>
|
|
|
|
#include "include/core/SkColor.h"
|
|
#include "include/core/SkMilestone.h"
|
|
#include "include/core/SkScalar.h"
|
|
#include "include/core/SkString.h"
|
|
#include "include/core/SkTime.h"
|
|
#include "include/private/SkNoncopyable.h"
|
|
|
|
#define SKPDF_STRING(X) SKPDF_STRING_IMPL(X)
|
|
#define SKPDF_STRING_IMPL(X) #X
|
|
|
|
class SkExecutor;
|
|
class SkPDFArray;
|
|
class SkPDFTagTree;
|
|
|
|
namespace SkPDF {
|
|
|
|
/** Table 333 in PDF 32000-1:2008 §14.8.4.2
|
|
*/
|
|
enum class DocumentStructureType {
|
|
kDocument, //!< Document
|
|
kPart, //!< Part
|
|
kArt, //!< Article
|
|
kSect, //!< Section
|
|
kDiv, //!< Division
|
|
kBlockQuote, //!< Block quotation
|
|
kCaption, //!< Caption
|
|
kTOC, //!< Table of Contents
|
|
kTOCI, //!< Table of Contents Item
|
|
kIndex, //!< Index
|
|
kNonStruct, //!< Nonstructural element
|
|
kPrivate, //!< Private element
|
|
kH, //!< Heading
|
|
kH1, //!< Heading level 1
|
|
kH2, //!< Heading level 2
|
|
kH3, //!< Heading level 3
|
|
kH4, //!< Heading level 4
|
|
kH5, //!< Heading level 5
|
|
kH6, //!< Heading level 6
|
|
kP, //!< Paragraph
|
|
kL, //!< List
|
|
kLI, //!< List item
|
|
kLbl, //!< List item label
|
|
kLBody, //!< List item body
|
|
kTable, //!< Table
|
|
kTR, //!< Table row
|
|
kTH, //!< Table header cell
|
|
kTD, //!< Table data cell
|
|
kTHead, //!< Table header row group
|
|
kTBody, //!< Table body row group
|
|
kTFoot, //!< table footer row group
|
|
kSpan, //!< Span
|
|
kQuote, //!< Quotation
|
|
kNote, //!< Note
|
|
kReference, //!< Reference
|
|
kBibEntry, //!< Bibliography entry
|
|
kCode, //!< Code
|
|
kLink, //!< Link
|
|
kAnnot, //!< Annotation
|
|
kRuby, //!< Ruby annotation
|
|
kRB, //!< Ruby base text
|
|
kRT, //!< Ruby annotation text
|
|
kRP, //!< Ruby punctuation
|
|
kWarichu, //!< Warichu annotation
|
|
kWT, //!< Warichu text
|
|
kWP, //!< Warichu punctuation
|
|
kFigure, //!< Figure
|
|
kFormula, //!< Formula
|
|
kForm, //!< Form control (not like an HTML FORM element)
|
|
};
|
|
|
|
/** Attributes for nodes in the PDF tree. */
|
|
class SK_API AttributeList : SkNoncopyable {
|
|
public:
|
|
AttributeList();
|
|
~AttributeList();
|
|
|
|
// Each attribute must have an owner (e.g. "Layout", "List", "Table", etc)
|
|
// and an attribute name (e.g. "BBox", "RowSpan", etc.) from PDF32000_2008 14.8.5,
|
|
// and then a value of the proper type according to the spec.
|
|
void appendInt(const char* owner, const char* name, int value);
|
|
void appendFloat(const char* owner, const char* name, float value);
|
|
void appendString(const char* owner, const char* name, const char* value);
|
|
void appendFloatArray(const char* owner,
|
|
const char* name,
|
|
const std::vector<float>& value);
|
|
void appendStringArray(const char* owner,
|
|
const char* name,
|
|
const std::vector<SkString>& value);
|
|
|
|
private:
|
|
friend class ::SkPDFTagTree;
|
|
|
|
std::unique_ptr<SkPDFArray> fAttrs;
|
|
};
|
|
|
|
/** A node in a PDF structure tree, giving a semantic representation
|
|
of the content. Each node ID is associated with content
|
|
by passing the SkCanvas and node ID to SkPDF::SetNodeId() when drawing.
|
|
NodeIDs should be unique within each tree.
|
|
*/
|
|
struct StructureElementNode {
|
|
SkString fTypeString;
|
|
std::vector<std::unique_ptr<StructureElementNode>> fChildVector;
|
|
int fNodeId = 0;
|
|
std::vector<int> fAdditionalNodeIds;
|
|
AttributeList fAttributes;
|
|
SkString fAlt;
|
|
SkString fLang;
|
|
|
|
// Deprecated. Use fChildVector instead.
|
|
StructureElementNode* fChildren = nullptr;
|
|
size_t fChildCount = 0;
|
|
|
|
// Deprecated. Use fTypeString instead.
|
|
DocumentStructureType fType = DocumentStructureType::kNonStruct;
|
|
};
|
|
|
|
/** Optional metadata to be passed into the PDF factory function.
|
|
*/
|
|
struct Metadata {
|
|
/** The document's title.
|
|
*/
|
|
SkString fTitle;
|
|
|
|
/** The name of the person who created the document.
|
|
*/
|
|
SkString fAuthor;
|
|
|
|
/** The subject of the document.
|
|
*/
|
|
SkString fSubject;
|
|
|
|
/** Keywords associated with the document. Commas may be used to delineate
|
|
keywords within the string.
|
|
*/
|
|
SkString fKeywords;
|
|
|
|
/** If the document was converted to PDF from another format,
|
|
the name of the conforming product that created the
|
|
original document from which it was converted.
|
|
*/
|
|
SkString fCreator;
|
|
|
|
/** The product that is converting this document to PDF.
|
|
*/
|
|
SkString fProducer = SkString("Skia/PDF m" SKPDF_STRING(SK_MILESTONE));
|
|
|
|
/** The date and time the document was created.
|
|
The zero default value represents an unknown/unset time.
|
|
*/
|
|
SkTime::DateTime fCreation = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
/** The date and time the document was most recently modified.
|
|
The zero default value represents an unknown/unset time.
|
|
*/
|
|
SkTime::DateTime fModified = {0, 0, 0, 0, 0, 0, 0, 0};
|
|
|
|
/** The DPI (pixels-per-inch) at which features without native PDF support
|
|
will be rasterized (e.g. draw image with perspective, draw text with
|
|
perspective, ...) A larger DPI would create a PDF that reflects the
|
|
original intent with better fidelity, but it can make for larger PDF
|
|
files too, which would use more memory while rendering, and it would be
|
|
slower to be processed or sent online or to printer.
|
|
*/
|
|
SkScalar fRasterDPI = SK_ScalarDefaultRasterDPI;
|
|
|
|
/** If true, include XMP metadata, a document UUID, and sRGB output intent
|
|
information. This adds length to the document and makes it
|
|
non-reproducable, but are necessary features for PDF/A-2b conformance
|
|
*/
|
|
bool fPDFA = false;
|
|
|
|
/** Encoding quality controls the trade-off between size and quality. By
|
|
default this is set to 101 percent, which corresponds to lossless
|
|
encoding. If this value is set to a value <= 100, and the image is
|
|
opaque, it will be encoded (using JPEG) with that quality setting.
|
|
*/
|
|
int fEncodingQuality = 101;
|
|
|
|
/** An optional tree of structured document tags that provide
|
|
a semantic representation of the content. The caller
|
|
should retain ownership.
|
|
*/
|
|
StructureElementNode* fStructureElementTreeRoot = nullptr;
|
|
|
|
/** Executor to handle threaded work within PDF Backend. If this is nullptr,
|
|
then all work will be done serially on the main thread. To have worker
|
|
threads assist with various tasks, set this to a valid SkExecutor
|
|
instance. Currently used for executing Deflate algorithm in parallel.
|
|
|
|
If set, the PDF output will be non-reproducible in the order and
|
|
internal numbering of objects, but should render the same.
|
|
|
|
Experimental.
|
|
*/
|
|
SkExecutor* fExecutor = nullptr;
|
|
|
|
/** Preferred Subsetter. Only respected if both are compiled in.
|
|
|
|
The Sfntly subsetter is deprecated.
|
|
|
|
Experimental.
|
|
*/
|
|
enum Subsetter {
|
|
kHarfbuzz_Subsetter,
|
|
kSfntly_Subsetter,
|
|
} fSubsetter = kHarfbuzz_Subsetter;
|
|
};
|
|
|
|
/** Associate a node ID with subsequent drawing commands in an
|
|
SkCanvas. The same node ID can appear in a StructureElementNode
|
|
in order to associate a document's structure element tree with
|
|
its content.
|
|
|
|
A node ID of zero indicates no node ID.
|
|
|
|
@param canvas The canvas used to draw to the PDF.
|
|
@param nodeId The node ID for subsequent drawing commands.
|
|
*/
|
|
SK_API void SetNodeId(SkCanvas* dst, int nodeID);
|
|
|
|
/** Create a PDF-backed document, writing the results into a SkWStream.
|
|
|
|
PDF pages are sized in point units. 1 pt == 1/72 inch == 127/360 mm.
|
|
|
|
@param stream A PDF document will be written to this stream. The document may write
|
|
to the stream at anytime during its lifetime, until either close() is
|
|
called or the document is deleted.
|
|
@param metadata a PDFmetadata object. Any fields may be left empty.
|
|
|
|
@returns NULL if there is an error, otherwise a newly created PDF-backed SkDocument.
|
|
*/
|
|
SK_API sk_sp<SkDocument> MakeDocument(SkWStream* stream, const Metadata& metadata);
|
|
|
|
static inline sk_sp<SkDocument> MakeDocument(SkWStream* stream) {
|
|
return MakeDocument(stream, Metadata());
|
|
}
|
|
|
|
} // namespace SkPDF
|
|
|
|
#undef SKPDF_STRING
|
|
#undef SKPDF_STRING_IMPL
|
|
#endif // SkPDFDocument_DEFINED
|