SKPDF: refactor pdfcatalog and pdfdocument

SkPDFCatalog:
-   remove first-page-specific code (no longer needed, never
    used) (e.g. addObject()).
-   Make use of SkHashMap for lookups (simplifies code).
-   inline all small methods
-   emitXrefTable moved to SkPDFDocument.cpp
-   no longer store offsets in this data structure (moved to
    SkPDFDocument.cpp)
-   setFileOffset gone.
-   own substitute refs directly.

SkPDFDocument::EmitPDF()
-   All sites that call into SkPDFCatalog modified.
-   catalog.addObject only called in a single place, after the
    resouceSet is built
-   offsets moved to local array.

SkPDFPage:
-   finalizePage no longer deals with SkPDFCatalog or resource sets.
-   GeneratePageTree no longer deals with SkPDFCatalog

SkPDFObjRef
-   emitObject respects the substitution map

Unit Tests:
-   respect SkPDFCatalog::addObject signature change.

SkTHash:
-   #include SkChecksum for SkGoodHash
-   Copyright notice added

Review URL: https://codereview.chromium.org/1033543002
This commit is contained in:
halcanary 2015-03-25 11:29:18 -07:00 committed by Commit bot
parent 05245900bf
commit f41061cc8d
7 changed files with 72 additions and 299 deletions

View File

@ -8,151 +8,36 @@
#include "SkPDFCatalog.h"
#include "SkPDFTypes.h"
#include "SkStream.h"
#include "SkTypes.h"
SkPDFCatalog::SkPDFCatalog()
: fFirstPageCount(0)
, fNextObjNum(1)
, fNextFirstPageObjNum(0) {}
SkPDFCatalog::SkPDFCatalog() {}
SkPDFCatalog::~SkPDFCatalog() {
fSubstituteResourcesRemaining.safeUnrefAll();
fSubstituteResourcesFirstPage.safeUnrefAll();
fSubstituteMap.foreach(
[](SkPDFObject*, SkPDFObject** v) { (*v)->unref(); });
}
SkPDFObject* SkPDFCatalog::addObject(SkPDFObject* obj, bool onFirstPage) {
if (findObjectIndex(obj) != -1) { // object already added
return obj;
bool SkPDFCatalog::addObject(SkPDFObject* obj) {
if (fObjectNumbers.find(obj)) {
return false;
}
SkASSERT(fNextFirstPageObjNum == 0);
if (onFirstPage) {
fFirstPageCount++;
}
Rec newEntry(obj, onFirstPage);
fCatalog.append(1, &newEntry);
return obj;
fObjectNumbers.set(obj, fObjectNumbers.count() + 1);
return true;
}
void SkPDFCatalog::setFileOffset(SkPDFObject* obj, off_t offset) {
int objIndex = assignObjNum(obj) - 1;
SkASSERT(fCatalog[objIndex].fObjNumAssigned);
SkASSERT(fCatalog[objIndex].fFileOffset == 0);
fCatalog[objIndex].fFileOffset = offset;
}
int32_t SkPDFCatalog::getObjectNumber(SkPDFObject* obj) {
return (int32_t)assignObjNum(obj);
}
int SkPDFCatalog::findObjectIndex(SkPDFObject* obj) {
for (int i = 0; i < fCatalog.count(); i++) {
if (fCatalog[i].fObject == obj) {
return i;
}
}
// If it's not in the main array, check if it's a substitute object.
for (int i = 0; i < fSubstituteMap.count(); ++i) {
if (fSubstituteMap[i].fSubstitute == obj) {
return findObjectIndex(fSubstituteMap[i].fOriginal);
}
}
Rec newEntry(obj, false);
fCatalog.append(1, &newEntry);
return fCatalog.count() - 1;
}
int SkPDFCatalog::assignObjNum(SkPDFObject* obj) {
int pos = findObjectIndex(obj);
// If this assert fails, it means you probably forgot to add an object
// to the resource list.
SkASSERT(pos >= 0);
uint32_t currentIndex = pos;
if (fCatalog[currentIndex].fObjNumAssigned) {
return currentIndex + 1;
}
// First assignment.
if (fNextFirstPageObjNum == 0) {
fNextFirstPageObjNum = fCatalog.count() - fFirstPageCount + 1;
}
uint32_t objNum;
if (fCatalog[currentIndex].fOnFirstPage) {
objNum = fNextFirstPageObjNum;
fNextFirstPageObjNum++;
} else {
objNum = fNextObjNum;
fNextObjNum++;
}
// When we assign an object an object number, we put it in that array
// offset (minus 1 because object number 0 is reserved).
SkASSERT(!fCatalog[objNum - 1].fObjNumAssigned);
if (objNum - 1 != currentIndex) {
SkTSwap(fCatalog[objNum - 1], fCatalog[currentIndex]);
}
fCatalog[objNum - 1].fObjNumAssigned = true;
return objNum;
}
int32_t SkPDFCatalog::emitXrefTable(SkWStream* stream, bool firstPage) {
int first = -1;
int last = fCatalog.count() - 1;
// TODO(vandebo): Support linearized format.
// int last = fCatalog.count() - fFirstPageCount - 1;
// if (firstPage) {
// first = fCatalog.count() - fFirstPageCount;
// last = fCatalog.count() - 1;
// }
stream->writeText("xref\n");
stream->writeDecAsText(first + 1);
stream->writeText(" ");
stream->writeDecAsText(last - first + 1);
stream->writeText("\n");
if (first == -1) {
stream->writeText("0000000000 65535 f \n");
first++;
}
for (int i = first; i <= last; i++) {
// For 32 bits platforms, the maximum offset has to fit within off_t
// which is a 32 bits signed integer on these platforms.
SkDEBUGCODE(static const off_t kMaxOff = SK_MaxS32;)
SkASSERT(fCatalog[i].fFileOffset > 0);
SkASSERT(fCatalog[i].fFileOffset < kMaxOff);
stream->writeBigDecAsText(fCatalog[i].fFileOffset, 10);
stream->writeText(" 00000 n \n");
}
return fCatalog.count() + 1;
int32_t SkPDFCatalog::getObjectNumber(SkPDFObject* obj) const {
int32_t* objectNumberFound = fObjectNumbers.find(obj);
SkASSERT(objectNumberFound);
return *objectNumberFound;
}
void SkPDFCatalog::setSubstitute(SkPDFObject* original,
SkPDFObject* substitute) {
#if defined(SK_DEBUG)
// Sanity check: is the original already in substitute list?
for (int i = 0; i < fSubstituteMap.count(); ++i) {
if (original == fSubstituteMap[i].fSubstitute ||
original == fSubstituteMap[i].fOriginal) {
SkASSERT(false);
return;
}
}
#endif
SubstituteMapping newMapping(original, substitute);
fSubstituteMap.append(1, &newMapping);
SkASSERT(original != substitute);
SkASSERT(!fSubstituteMap.find(original));
fSubstituteMap.set(original, SkRef(substitute));
}
SkPDFObject* SkPDFCatalog::getSubstituteObject(SkPDFObject* object) {
for (int i = 0; i < fSubstituteMap.count(); ++i) {
if (object == fSubstituteMap[i].fOriginal) {
return fSubstituteMap[i].fSubstitute;
}
}
return object;
SkPDFObject* SkPDFCatalog::getSubstituteObject(SkPDFObject* object) const {
SkPDFObject** found = fSubstituteMap.find(object);
return found ? *found : object;
}

View File

@ -10,99 +10,44 @@
#ifndef SkPDFCatalog_DEFINED
#define SkPDFCatalog_DEFINED
#include <sys/types.h>
#include "SkPDFTypes.h"
#include "SkTDArray.h"
#include "SkTHash.h"
/** \class SkPDFCatalog
The PDF catalog manages object numbers and file offsets. It is used
The PDF catalog manages object numbers. It is used
to create the PDF cross reference table.
*/
class SkPDFCatalog {
public:
/** Create a PDF catalog.
*/
SkPDFCatalog();
~SkPDFCatalog();
/** Add the passed object to the catalog. Refs obj.
/** Add the passed object to the catalog.
* @param obj The object to add.
* @param onFirstPage Is the object on the first page.
* @return The obj argument is returned.
* @return True iff the object was not already added to the catalog.
*/
SkPDFObject* addObject(SkPDFObject* obj, bool onFirstPage);
/** Inform the catalog of the object's position in the final stream.
* The object should already have been added to the catalog.
* @param obj The object to add.
* @param offset The byte offset in the output stream of this object.
*/
void setFileOffset(SkPDFObject* obj, off_t offset);
bool addObject(SkPDFObject* obj);
/** Get the object number for the passed object.
* @param obj The object of interest.
*/
int32_t getObjectNumber(SkPDFObject* obj);
/** Output the cross reference table for objects in the catalog.
* Returns the total number of objects.
* @param stream The writable output stream to send the output to.
* @param firstPage If true, include first page objects only, otherwise
* include all objects not on the first page.
*/
int32_t emitXrefTable(SkWStream* stream, bool firstPage);
int32_t getObjectNumber(SkPDFObject* obj) const;
/** Set substitute object for the passed object.
Refs substitute.
*/
void setSubstitute(SkPDFObject* original, SkPDFObject* substitute);
/** Find and return any substitute object set for the passed object. If
* there is none, return the passed object.
*/
SkPDFObject* getSubstituteObject(SkPDFObject* object);
SkPDFObject* getSubstituteObject(SkPDFObject* object) const;
private:
struct Rec {
Rec(SkPDFObject* object, bool onFirstPage)
: fObject(object),
fFileOffset(0),
fObjNumAssigned(false),
fOnFirstPage(onFirstPage) {
}
SkPDFObject* fObject;
off_t fFileOffset;
bool fObjNumAssigned;
bool fOnFirstPage;
};
struct SubstituteMapping {
SubstituteMapping(SkPDFObject* original, SkPDFObject* substitute)
: fOriginal(original), fSubstitute(substitute) {
}
SkPDFObject* fOriginal;
SkPDFObject* fSubstitute;
};
// TODO(vandebo): Make this a hash if it's a performance problem.
SkTDArray<Rec> fCatalog;
// TODO(arthurhsu): Make this a hash if it's a performance problem.
SkTDArray<SubstituteMapping> fSubstituteMap;
SkTSet<SkPDFObject*> fSubstituteResourcesFirstPage;
SkTSet<SkPDFObject*> fSubstituteResourcesRemaining;
// Number of objects on the first page.
uint32_t fFirstPageCount;
// Next object number to assign (on page > 1).
uint32_t fNextObjNum;
// Next object number to assign on the first page.
uint32_t fNextFirstPageObjNum;
int findObjectIndex(SkPDFObject* obj);
int assignObjNum(SkPDFObject* obj);
SkTHashMap<SkPDFObject*, int32_t> fObjectNumbers;
SkTHashMap<SkPDFObject*, SkPDFObject*> fSubstituteMap;
};
#endif

View File

@ -17,10 +17,8 @@
static void perform_font_subsetting(SkPDFCatalog* catalog,
const SkTDArray<SkPDFPage*>& pages,
SkTDArray<SkPDFObject*>* substitutes) {
const SkTDArray<SkPDFPage*>& pages) {
SkASSERT(catalog);
SkASSERT(substitutes);
SkPDFGlyphSetMap usage;
for (int i = 0; i < pages.count(); ++i) {
@ -29,11 +27,10 @@ static void perform_font_subsetting(SkPDFCatalog* catalog,
SkPDFGlyphSetMap::F2BIter iterator(usage);
const SkPDFGlyphSetMap::FontGlyphSetPair* entry = iterator.next();
while (entry) {
SkPDFFont* subsetFont =
entry->fFont->getFontSubset(entry->fGlyphSet);
SkAutoTUnref<SkPDFFont> subsetFont(
entry->fFont->getFontSubset(entry->fGlyphSet));
if (subsetFont) {
catalog->setSubstitute(entry->fFont, subsetFont);
substitutes->push(subsetFont); // Transfer ownership to substitutes
catalog->setSubstitute(entry->fFont, subsetFont.get());
}
entry = iterator.next();
}
@ -72,24 +69,26 @@ bool SkPDFDocument::EmitPDF(const SkTDArray<const SkPDFDevice*>& pageDevices,
}
SkTDArray<SkPDFPage*> pages;
SkAutoTUnref<SkPDFDict> dests(SkNEW(SkPDFDict));
for (int i = 0; i < pageDevices.count(); i++) {
SkASSERT(pageDevices[i]);
SkASSERT(i == 0 ||
pageDevices[i - 1]->getCanon() == pageDevices[i]->getCanon());
// Reference from new passed to pages.
pages.push(SkNEW_ARGS(SkPDFPage, (pageDevices[i])));
SkAutoTUnref<SkPDFPage> page(SkNEW_ARGS(SkPDFPage, (pageDevices[i])));
page->finalizePage();
page->appendDestinations(dests);
pages.push(page.detach());
}
SkPDFCatalog catalog;
SkTDArray<SkPDFDict*> pageTree;
SkAutoTUnref<SkPDFDict> docCatalog(SkNEW_ARGS(SkPDFDict, ("Catalog")));
SkTSet<SkPDFObject*> firstPageResources;
SkTSet<SkPDFObject*> otherPageResources;
SkTDArray<SkPDFObject*> substitutes;
catalog.addObject(docCatalog.get(), true);
SkPDFDict* pageTreeRoot;
SkPDFPage::GeneratePageTree(pages, &catalog, &pageTree, &pageTreeRoot);
SkPDFPage::GeneratePageTree(pages, &pageTree, &pageTreeRoot);
docCatalog->insert("Pages", new SkPDFObjRef(pageTreeRoot))->unref();
/* TODO(vandebo): output intent
@ -102,78 +101,47 @@ bool SkPDFDocument::EmitPDF(const SkTDArray<const SkPDFDevice*>& pageDevices,
docCatalog->insert("OutputIntent", intentArray.get());
*/
SkAutoTUnref<SkPDFDict> dests(SkNEW(SkPDFDict));
bool firstPage = true;
/* The references returned in newResources are transfered to
* firstPageResources or otherPageResources depending on firstPage and
* knownResources doesn't have a reference but just relies on the other
* two sets to maintain a reference.
*/
SkTSet<SkPDFObject*> knownResources;
// mergeInto returns the number of duplicates.
// If there are duplicates, there is a bug and we mess ref counting.
SkDEBUGCODE(int duplicates = ) knownResources.mergeInto(firstPageResources);
SkASSERT(duplicates == 0);
for (int i = 0; i < pages.count(); i++) {
if (i == 1) {
firstPage = false;
SkDEBUGCODE(duplicates = )
knownResources.mergeInto(otherPageResources);
}
SkTSet<SkPDFObject*> newResources;
pages[i]->finalizePage(&catalog, firstPage, knownResources,
&newResources);
for (int j = 0; j < newResources.count(); j++) {
catalog.addObject(newResources[i], firstPage);
}
if (firstPage) {
SkDEBUGCODE(duplicates = )
firstPageResources.mergeInto(newResources);
} else {
SkDEBUGCODE(duplicates = )
otherPageResources.mergeInto(newResources);
}
SkASSERT(duplicates == 0);
SkDEBUGCODE(duplicates = ) knownResources.mergeInto(newResources);
SkASSERT(duplicates == 0);
pages[i]->appendDestinations(dests);
}
if (dests->size() > 0) {
SkPDFDict* raw_dests = dests.get();
firstPageResources.add(dests.detach()); // Transfer ownership.
catalog.addObject(raw_dests, true /* onFirstPage */);
docCatalog->insert("Dests", SkNEW_ARGS(SkPDFObjRef, (raw_dests)))
docCatalog->insert("Dests", SkNEW_ARGS(SkPDFObjRef, (dests.get())))
->unref();
}
// Build font subsetting info before proceeding.
perform_font_subsetting(&catalog, pages, &substitutes);
perform_font_subsetting(&catalog, pages);
SkTSet<SkPDFObject*> resourceSet;
if (resourceSet.add(docCatalog.get())) {
docCatalog->addResources(&resourceSet, &catalog);
}
for (int i = 0; i < resourceSet.count(); ++i) {
SkAssertResult(catalog.addObject(resourceSet[i]));
}
size_t baseOffset = SkToOffT(stream->bytesWritten());
emit_pdf_header(stream);
SkTDArray<int32_t> offsets;
for (int i = 0; i < resourceSet.count(); ++i) {
SkPDFObject* object = resourceSet[i];
catalog.setFileOffset(object,
SkToOffT(stream->bytesWritten() - baseOffset));
offsets.push(SkToS32(stream->bytesWritten() - baseOffset));
SkASSERT(object == catalog.getSubstituteObject(object));
stream->writeDecAsText(catalog.getObjectNumber(object));
SkASSERT(catalog.getObjectNumber(object) == i + 1);
stream->writeDecAsText(i + 1);
stream->writeText(" 0 obj\n"); // Generation number is always 0.
object->emitObject(stream, &catalog);
stream->writeText("\nendobj\n");
}
int32_t xRefFileOffset = SkToS32(stream->bytesWritten() - baseOffset);
int64_t objCount = catalog.emitXrefTable(stream, pages.count() > 1);
int32_t objCount = SkToS32(offsets.count() + 1);
stream->writeText("xref\n0 ");
stream->writeDecAsText(objCount + 1);
stream->writeText("\n0000000000 65535 f \n");
for (int i = 0; i < offsets.count(); i++) {
SkASSERT(offsets[i] > 0);
stream->writeBigDecAsText(offsets[i], 10);
stream->writeText(" 00000 n \n");
}
emit_pdf_footer(stream, &catalog, docCatalog.get(), objCount,
xRefFileOffset);
@ -184,12 +152,6 @@ bool SkPDFDocument::EmitPDF(const SkTDArray<const SkPDFDevice*>& pageDevices,
}
pageTree.safeUnrefAll();
pages.unrefAll();
firstPageResources.safeUnrefAll();
otherPageResources.safeUnrefAll();
substitutes.unrefAll();
docCatalog.reset(NULL);
return true;
}

View File

@ -21,9 +21,7 @@ SkPDFPage::SkPDFPage(const SkPDFDevice* content)
SkPDFPage::~SkPDFPage() {}
void SkPDFPage::finalizePage(SkPDFCatalog* catalog, bool firstPage,
const SkTSet<SkPDFObject*>& knownResourceObjects,
SkTSet<SkPDFObject*>* newResourceObjects) {
void SkPDFPage::finalizePage() {
if (fContentStream.get() == NULL) {
SkAutoTUnref<SkPDFResourceDict> deviceResourceDict(
fDevice->createResourceDict());
@ -38,12 +36,10 @@ void SkPDFPage::finalizePage(SkPDFCatalog* catalog, bool firstPage,
fContentStream.reset(new SkPDFStream(content.get()));
insert("Contents", new SkPDFObjRef(fContentStream.get()))->unref();
}
catalog->addObject(fContentStream.get(), firstPage);
}
// static
void SkPDFPage::GeneratePageTree(const SkTDArray<SkPDFPage*>& pages,
SkPDFCatalog* catalog,
SkTDArray<SkPDFDict*>* pageTree,
SkPDFDict** rootNode) {
// PDF wants a tree describing all the pages in the document. We arbitrary
@ -94,10 +90,8 @@ void SkPDFPage::GeneratePageTree(const SkTDArray<SkPDFPage*>& pages,
// Probably doesn't matter because they are so small.
if (curNodes[i] != pages[0]) {
pageTree->push(curNodes[i]); // Transfer reference.
catalog->addObject(curNodes[i], false);
} else {
SkSafeUnref(curNodes[i]);
catalog->addObject(curNodes[i], true);
}
}
@ -123,7 +117,6 @@ void SkPDFPage::GeneratePageTree(const SkTDArray<SkPDFPage*>& pages,
} while (curNodes.count() > 1);
pageTree->push(curNodes[0]); // Transfer reference.
catalog->addObject(curNodes[0], false);
if (rootNode) {
*rootNode = curNodes[0];
}

View File

@ -36,20 +36,9 @@ public:
/** Before a page and its contents can be sized and emitted, it must
* be finalized. No changes to the PDFDevice will be honored after
* finalizePage has been called. This function adds the page content
* to the passed catalog, so it must be called for each document
* that the page is part of.
* @param catalog The catalog to add page content objects to.
* @param firstPage Indicate if this is the first page of a document.
* @param newResourceObjects All the resource objects (recursively) used on
* the page are added to this array. This gives
* the caller a chance to deduplicate resources
* across pages.
* @param knownResourceObjects The set of resources to be ignored.
* finalizePage has been called.
*/
void finalizePage(SkPDFCatalog* catalog, bool firstPage,
const SkTSet<SkPDFObject*>& knownResourceObjects,
SkTSet<SkPDFObject*>* newResourceObjects);
void finalizePage();
/** Add destinations for this page to the supplied dictionary.
* @param dict Dictionary to add destinations to.
@ -63,13 +52,11 @@ public:
* it must be torn down explicitly. The first page is not added to
* the pageTree dictionary array so the caller can handle it specially.
* @param pages The ordered vector of page objects.
* @param catalog The catalog to add new objects into.
* @param pageTree An output vector with all of the internal and leaf
* nodes of the pageTree.
* @param rootNode An output parameter set to the root node.
*/
static void GeneratePageTree(const SkTDArray<SkPDFPage*>& pages,
SkPDFCatalog* catalog,
SkTDArray<SkPDFDict*>* pageTree,
SkPDFDict** rootNode);

View File

@ -26,7 +26,8 @@ SkPDFObjRef::SkPDFObjRef(SkPDFObject* obj) : fObj(obj) {
SkPDFObjRef::~SkPDFObjRef() {}
void SkPDFObjRef::emitObject(SkWStream* stream, SkPDFCatalog* catalog) {
stream->writeDecAsText(catalog->getObjectNumber(fObj.get()));
SkPDFObject* obj = catalog->getSubstituteObject(fObj);
stream->writeDecAsText(catalog->getObjectNumber(obj));
stream->writeText(" 0 R"); // Generation number is always 0.
}

View File

@ -77,7 +77,7 @@ static void CheckObjectOutput(skiatest::Reporter* reporter, SkPDFObject* obj,
static char footer[] = "\nendobj\n";
static size_t footerLen = strlen(footer);
catalog.addObject(obj, false);
catalog.addObject(obj);
size_t indirectSize = get_output_size(obj, &catalog, true);
REPORTER_ASSERT(reporter,
@ -149,9 +149,9 @@ static void TestCatalog(skiatest::Reporter* reporter) {
int1.get()->ref();
SkAutoTUnref<SkPDFInt> int1Again(int1.get());
catalog.addObject(int1.get(), false);
catalog.addObject(int2.get(), false);
catalog.addObject(int3.get(), false);
catalog.addObject(int1.get());
catalog.addObject(int2.get());
catalog.addObject(int3.get());
REPORTER_ASSERT(reporter, catalog.getObjectNumber(int1.get()) == 1);
REPORTER_ASSERT(reporter, catalog.getObjectNumber(int2.get()) == 2);
@ -165,8 +165,8 @@ static void TestObjectRef(skiatest::Reporter* reporter) {
SkAutoTUnref<SkPDFObjRef> int2ref(new SkPDFObjRef(int2.get()));
SkPDFCatalog catalog;
catalog.addObject(int1.get(), false);
catalog.addObject(int2.get(), false);
catalog.addObject(int1.get());
catalog.addObject(int2.get());
REPORTER_ASSERT(reporter, catalog.getObjectNumber(int1.get()) == 1);
REPORTER_ASSERT(reporter, catalog.getObjectNumber(int2.get()) == 2);
@ -186,7 +186,7 @@ static void TestSubstitute(skiatest::Reporter* reporter) {
stub->insert("Value", new SkPDFInt(44))->unref();
SkPDFCatalog catalog;
catalog.addObject(proxy.get(), false);
catalog.addObject(proxy.get());
catalog.setSubstitute(proxy.get(), stub.get());
REPORTER_ASSERT(reporter, stub.get() == catalog.getSubstituteObject(proxy));