SkPDF: Fix Type3 ToUnicode table.

This seems to fix text extraction on Adobe Reader

  - Registry/Ordering is now set to Skia/SkiaOrdering.
  - Type3 fonts now get a FontDescriptor (force symbolic font).
  - CMapName is now Skia-Identity-SkiaOrdering
  - CMap behaves correctly for single-byte fonts.

Also:
  - SkTestTypeface returns tounicode map for testing.
  - Unit test updated

All PDFs render the same

BUG=skia:5606
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2292303004

Review-Url: https://codereview.chromium.org/2292303004
This commit is contained in:
halcanary 2016-08-31 12:52:35 -07:00 committed by Commit bot
parent 41a8f323f7
commit 3d01c62e19
4 changed files with 56 additions and 32 deletions

View File

@ -150,7 +150,15 @@ SkAdvancedTypefaceMetrics* SkTestTypeface::onGetAdvancedTypefaceMetrics(
// pdf only
SkAdvancedTypefaceMetrics* info = new SkAdvancedTypefaceMetrics;
info->fFontName.set(fTestFont->fName);
info->fLastGlyphID = SkToU16(onCountGlyphs() - 1);
int glyphCount = this->onCountGlyphs();
info->fLastGlyphID = SkToU16(glyphCount - 1);
SkTDArray<SkUnichar>& toUnicode = info->fGlyphToUnicode;
toUnicode.setCount(glyphCount);
SkASSERT(glyphCount == SkToInt(fTestFont->fCharCodesCount));
for (int gid = 0; gid < glyphCount; ++gid) {
toUnicode[gid] = SkToS32(fTestFont->fCharCodes[gid]);
}
return info;
}

View File

@ -29,7 +29,7 @@ namespace {
// PDF's notion of symbolic vs non-symbolic is related to the character set, not
// symbols vs. characters. Rarely is a font the right character set to call it
// non-symbolic, so always call it symbolic. (PDF 1.4 spec, section 5.7.1)
static const int kPdfSymbolic = 4;
static const int32_t kPdfSymbolic = 4;
struct SkPDFType0Font final : public SkPDFFont {
SkPDFType0Font(SkPDFFont::Info, const SkAdvancedTypefaceMetrics&);
@ -426,8 +426,9 @@ void SkPDFType0Font::getFontSubset(SkPDFCanon* canon) {
}
auto sysInfo = sk_make_sp<SkPDFDict>();
sysInfo->insertString("Registry", "Adobe");
sysInfo->insertString("Ordering", "Identity");
sysInfo->insertString("Registry", "Skia");
// TODO: Registry+Ordering should be globally unique!
sysInfo->insertString("Ordering", "SkiaOrdering");
sysInfo->insertInt("Supplement", 0);
newCIDFont->insertObject("CIDSystemInfo", std::move(sysInfo));
@ -597,6 +598,7 @@ static void add_type3_font_info(SkPDFCanon* canon,
const SkBitSet& subset,
SkGlyphID firstGlyphID,
SkGlyphID lastGlyphID) {
const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon);
SkASSERT(lastGlyphID >= firstGlyphID);
// Remove unused glyphs at the end of the range.
// Keep the lastGlyphID >= firstGlyphID invariant true.
@ -684,8 +686,7 @@ static void add_type3_font_info(SkPDFCanon* canon,
fontBBox->appendInt(bbox.top());
font->insertObject("FontBBox", std::move(fontBBox));
font->insertName("CIDToGIDMap", "Identity");
const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon);
if (metrics /* && metrics->fGlyphToUnicode.count() > 0 */) {
if (metrics && metrics->fGlyphToUnicode.count() > 0) {
font->insertObjRef("ToUnicode",
SkPDFMakeToUnicodeCmap(metrics->fGlyphToUnicode,
&subset,
@ -693,6 +694,16 @@ static void add_type3_font_info(SkPDFCanon* canon,
firstGlyphID,
lastGlyphID));
}
auto descriptor = sk_make_sp<SkPDFDict>("FontDescriptor");
int32_t fontDescriptorFlags = kPdfSymbolic;
if (metrics) {
// Type3 FontDescriptor does not require all the same fields.
descriptor->insertName("FontName", metrics->fFontName);
descriptor->insertInt("ItalicAngle", metrics->fItalicAngle);
fontDescriptorFlags |= (int32_t)metrics->fStyle;
}
descriptor->insertInt("Flags", fontDescriptorFlags);
font->insertObjRef("FontDescriptor", std::move(descriptor));
font->insertObject("Widths", std::move(widthArray));
font->insertObject("Encoding", std::move(encoding));
font->insertObject("CharProcs", std::move(charProcs));

View File

@ -10,8 +10,7 @@
#include "SkUtils.h"
static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
SkGlyphID firstGlyphID,
SkGlyphID lastGlyphID) {
bool multibyte) {
// 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
// It's there to prevent old version Adobe Readers from malfunctioning.
const char* kHeader =
@ -26,8 +25,8 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
// different. This is not a reference object.
const char* kSysInfo =
"/CIDSystemInfo\n"
"<< /Registry (Adobe)\n"
"/Ordering (UCS)\n"
"<< /Registry (Skia)\n"
"/Ordering (SkiaOrdering)\n"
"/Supplement 0\n"
">> def\n";
cmap->writeText(kSysInfo);
@ -36,18 +35,16 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
// /CMapType 2 means ToUnicode.
// Codespace range just tells the PDF processor the valid range.
const char* kTypeInfoHeader =
"/CMapName /Adobe-Identity-UCS def\n"
"/CMapName /Skia-Identity-SkiaOrdering def\n"
"/CMapType 2 def\n"
"1 begincodespacerange\n";
cmap->writeText(kTypeInfoHeader);
// e.g. "<0000> <FFFF>\n"
SkString range;
range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID);
cmap->writeText(range.c_str());
const char* kTypeInfoFooter = "endcodespacerange\n";
cmap->writeText(kTypeInfoFooter);
if (multibyte) {
cmap->writeText("<0000> <FFFF>\n");
} else {
cmap->writeText("<00> <FF>\n");
}
cmap->writeText("endcodespacerange\n");
}
static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
@ -82,7 +79,18 @@ static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
}
}
static void write_glyph(SkDynamicMemoryWStream* cmap,
bool multiByte,
SkGlyphID gid) {
if (multiByte) {
SkPDFUtils::WriteUInt16BE(cmap, gid);
} else {
SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
}
}
static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
bool multiByte,
SkDynamicMemoryWStream* cmap) {
// PDF spec defines that every bf* list can have at most 100 entries.
for (int i = 0; i < bfchar.count(); i += 100) {
@ -92,7 +100,7 @@ static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
cmap->writeText(" beginbfchar\n");
for (int j = 0; j < count; ++j) {
cmap->writeText("<");
SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId);
write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
cmap->writeText("> <");
write_utf16be(cmap, bfchar[i + j].fUnicode);
cmap->writeText(">\n");
@ -102,6 +110,7 @@ static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
}
static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
bool multiByte,
SkDynamicMemoryWStream* cmap) {
// PDF spec defines that every bf* list can have at most 100 entries.
for (int i = 0; i < bfrange.count(); i += 100) {
@ -111,9 +120,9 @@ static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
cmap->writeText(" beginbfrange\n");
for (int j = 0; j < count; ++j) {
cmap->writeText("<");
SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart);
write_glyph(cmap, multiByte, bfrange[i + j].fStart);
cmap->writeText("> <");
SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd);
write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
cmap->writeText("> <");
write_utf16be(cmap, bfrange[i + j].fUnicode);
cmap->writeText(">\n");
@ -206,8 +215,8 @@ void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
// The spec requires all bfchar entries for a font must come before bfrange
// entries.
append_bfchar_section(bfcharEntries, cmap);
append_bfrange_section(bfrangeEntries, cmap);
append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
}
sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
@ -217,11 +226,7 @@ sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
SkGlyphID firstGlyphID,
SkGlyphID lastGlyphID) {
SkDynamicMemoryWStream cmap;
if (multiByteGlyphs) {
append_tounicode_header(&cmap, firstGlyphID, lastGlyphID);
} else {
append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1);
}
append_tounicode_header(&cmap, multiByteGlyphs);
SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
firstGlyphID, lastGlyphID);
append_cmap_footer(&cmap);

View File

@ -125,11 +125,11 @@ endbfchar\n";
char expectedResultSingleBytes[] =
"2 beginbfchar\n\
<0001> <0000>\n\
<0002> <0000>\n\
<01> <0000>\n\
<02> <0000>\n\
endbfchar\n\
1 beginbfrange\n\
<0003> <0006> <1010>\n\
<03> <06> <1010>\n\
endbfrange\n";
REPORTER_ASSERT(reporter, stream_equals(buffer, 0,