SkPDF: Fix Type3 ToUnicode table.

This seems to fix text extraction on Adobe Reader - Registry/Ordering is now set to Skia/SkiaOrdering. - Type3 fonts now get a FontDescriptor (force symbolic font). - CMapName is now Skia-Identity-SkiaOrdering - CMap behaves correctly for single-byte fonts. Also: - SkTestTypeface returns tounicode map for testing. - Unit test updated All PDFs render the same BUG=skia:5606 GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2292303004 Review-Url: https://codereview.chromium.org/2292303004
2016-08-31 12:52:35 -07:00 · 2016-08-31 12:52:35 -07:00 · 3d01c62e19
commit 3d01c62e19
parent 41a8f323f7
4 changed files with 56 additions and 32 deletions
--- a/src/fonts/SkTestScalerContext.cpp
+++ b/src/fonts/SkTestScalerContext.cpp
@ -150,7 +150,15 @@ SkAdvancedTypefaceMetrics* SkTestTypeface::onGetAdvancedTypefaceMetrics(
 // pdf only
    SkAdvancedTypefaceMetrics* info = new SkAdvancedTypefaceMetrics;
    info->fFontName.set(fTestFont->fName);
-    info->fLastGlyphID = SkToU16(onCountGlyphs() - 1);
+    int glyphCount = this->onCountGlyphs();
+    info->fLastGlyphID = SkToU16(glyphCount - 1);
+
+    SkTDArray<SkUnichar>& toUnicode = info->fGlyphToUnicode;
+    toUnicode.setCount(glyphCount);
+    SkASSERT(glyphCount == SkToInt(fTestFont->fCharCodesCount));
+    for (int gid = 0; gid < glyphCount; ++gid) {
+        toUnicode[gid] = SkToS32(fTestFont->fCharCodes[gid]);
+    }
    return info;
 }

--- a/src/pdf/SkPDFFont.cpp
+++ b/src/pdf/SkPDFFont.cpp
@ -29,7 +29,7 @@ namespace {
 // PDF's notion of symbolic vs non-symbolic is related to the character set, not
 // symbols vs. characters.  Rarely is a font the right character set to call it
 // non-symbolic, so always call it symbolic.  (PDF 1.4 spec, section 5.7.1)
-static const int kPdfSymbolic = 4;
+static const int32_t kPdfSymbolic = 4;

 struct SkPDFType0Font final : public SkPDFFont {
    SkPDFType0Font(SkPDFFont::Info, const SkAdvancedTypefaceMetrics&);
@ -426,8 +426,9 @@ void SkPDFType0Font::getFontSubset(SkPDFCanon* canon) {
    }

    auto sysInfo = sk_make_sp<SkPDFDict>();
-    sysInfo->insertString("Registry", "Adobe");
-    sysInfo->insertString("Ordering", "Identity");
+    sysInfo->insertString("Registry", "Skia");
+    // TODO: Registry+Ordering should be globally unique!
+    sysInfo->insertString("Ordering", "SkiaOrdering");
    sysInfo->insertInt("Supplement", 0);
    newCIDFont->insertObject("CIDSystemInfo", std::move(sysInfo));

@ -597,6 +598,7 @@ static void add_type3_font_info(SkPDFCanon* canon,
                                const SkBitSet& subset,
                                SkGlyphID firstGlyphID,
                                SkGlyphID lastGlyphID) {
+    const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon);
    SkASSERT(lastGlyphID >= firstGlyphID);
    // Remove unused glyphs at the end of the range.
    // Keep the lastGlyphID >= firstGlyphID invariant true.
@ -684,8 +686,7 @@ static void add_type3_font_info(SkPDFCanon* canon,
    fontBBox->appendInt(bbox.top());
    font->insertObject("FontBBox", std::move(fontBBox));
    font->insertName("CIDToGIDMap", "Identity");
-    const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon);
-    if (metrics /* && metrics->fGlyphToUnicode.count() > 0 */) {
+    if (metrics && metrics->fGlyphToUnicode.count() > 0) {
        font->insertObjRef("ToUnicode",
                           SkPDFMakeToUnicodeCmap(metrics->fGlyphToUnicode,
                                                  &subset,
@ -693,6 +694,16 @@ static void add_type3_font_info(SkPDFCanon* canon,
                                                  firstGlyphID,
                                                  lastGlyphID));
    }
+    auto descriptor = sk_make_sp<SkPDFDict>("FontDescriptor");
+    int32_t fontDescriptorFlags = kPdfSymbolic;
+    if (metrics) {
+        // Type3 FontDescriptor does not require all the same fields.
+        descriptor->insertName("FontName", metrics->fFontName);
+        descriptor->insertInt("ItalicAngle", metrics->fItalicAngle);
+        fontDescriptorFlags |= (int32_t)metrics->fStyle;
+    }
+    descriptor->insertInt("Flags", fontDescriptorFlags);
+    font->insertObjRef("FontDescriptor", std::move(descriptor));
    font->insertObject("Widths", std::move(widthArray));
    font->insertObject("Encoding", std::move(encoding));
    font->insertObject("CharProcs", std::move(charProcs));
--- a/src/pdf/SkPDFMakeToUnicodeCmap.cpp
+++ b/src/pdf/SkPDFMakeToUnicodeCmap.cpp
@ -10,8 +10,7 @@
 #include "SkUtils.h"

 static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
-                                    SkGlyphID firstGlyphID,
-                                    SkGlyphID lastGlyphID) {
+                                    bool multibyte) {
    // 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
    // It's there to prevent old version Adobe Readers from malfunctioning.
    const char* kHeader =
@ -26,8 +25,8 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
    // different. This is not a reference object.
    const char* kSysInfo =
        "/CIDSystemInfo\n"
-        "<<  /Registry (Adobe)\n"
-        "/Ordering (UCS)\n"
+        "<<  /Registry (Skia)\n"
+        "/Ordering (SkiaOrdering)\n"
        "/Supplement 0\n"
        ">> def\n";
    cmap->writeText(kSysInfo);
@ -36,18 +35,16 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
    // /CMapType 2 means ToUnicode.
    // Codespace range just tells the PDF processor the valid range.
    const char* kTypeInfoHeader =
-        "/CMapName /Adobe-Identity-UCS def\n"
+        "/CMapName /Skia-Identity-SkiaOrdering def\n"
        "/CMapType 2 def\n"
        "1 begincodespacerange\n";
    cmap->writeText(kTypeInfoHeader);
-
-    // e.g.     "<0000> <FFFF>\n"
-    SkString range;
-    range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID);
-    cmap->writeText(range.c_str());
-
-    const char* kTypeInfoFooter = "endcodespacerange\n";
-    cmap->writeText(kTypeInfoFooter);
+    if (multibyte) {
+        cmap->writeText("<0000> <FFFF>\n");
+    } else {
+        cmap->writeText("<00> <FF>\n");
+    }
+    cmap->writeText("endcodespacerange\n");
 }

 static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
@ -82,7 +79,18 @@ static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
    }
 }

+static void write_glyph(SkDynamicMemoryWStream* cmap,
+                        bool multiByte,
+                        SkGlyphID gid) {
+    if (multiByte) {
+        SkPDFUtils::WriteUInt16BE(cmap, gid);
+    } else {
+        SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
+    }
+}
+
 static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
+                                  bool multiByte,
                                  SkDynamicMemoryWStream* cmap) {
    // PDF spec defines that every bf* list can have at most 100 entries.
    for (int i = 0; i < bfchar.count(); i += 100) {
@ -92,7 +100,7 @@ static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
        cmap->writeText(" beginbfchar\n");
        for (int j = 0; j < count; ++j) {
            cmap->writeText("<");
-            SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId);
+            write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
            cmap->writeText("> <");
            write_utf16be(cmap, bfchar[i + j].fUnicode);
            cmap->writeText(">\n");
@ -102,6 +110,7 @@ static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
 }

 static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
+                                   bool multiByte,
                                   SkDynamicMemoryWStream* cmap) {
    // PDF spec defines that every bf* list can have at most 100 entries.
    for (int i = 0; i < bfrange.count(); i += 100) {
@ -111,9 +120,9 @@ static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
        cmap->writeText(" beginbfrange\n");
        for (int j = 0; j < count; ++j) {
            cmap->writeText("<");
-            SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart);
+            write_glyph(cmap, multiByte, bfrange[i + j].fStart);
            cmap->writeText("> <");
-            SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd);
+            write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
            cmap->writeText("> <");
            write_utf16be(cmap, bfrange[i + j].fUnicode);
            cmap->writeText(">\n");
@ -206,8 +215,8 @@ void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,

    // The spec requires all bfchar entries for a font must come before bfrange
    // entries.
-    append_bfchar_section(bfcharEntries, cmap);
-    append_bfrange_section(bfrangeEntries, cmap);
+    append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
+    append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
 }

 sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
@ -217,11 +226,7 @@ sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
        SkGlyphID firstGlyphID,
        SkGlyphID lastGlyphID) {
    SkDynamicMemoryWStream cmap;
-    if (multiByteGlyphs) {
-        append_tounicode_header(&cmap, firstGlyphID, lastGlyphID);
-    } else {
-        append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1);
-    }
+    append_tounicode_header(&cmap, multiByteGlyphs);
    SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
                            firstGlyphID, lastGlyphID);
    append_cmap_footer(&cmap);
--- a/tests/PDFGlyphsToUnicodeTest.cpp
+++ b/tests/PDFGlyphsToUnicodeTest.cpp
@ -125,11 +125,11 @@ endbfchar\n";

    char expectedResultSingleBytes[] =
 "2 beginbfchar\n\
-<0001> <0000>\n\
-<0002> <0000>\n\
+<01> <0000>\n\
+<02> <0000>\n\
 endbfchar\n\
 1 beginbfrange\n\
-<0003> <0006> <1010>\n\
+<03> <06> <1010>\n\
 endbfrange\n";

    REPORTER_ASSERT(reporter, stream_equals(buffer, 0,