From 3d01c62e19df9f369cdfaeff82ec8af2c0be75f1 Mon Sep 17 00:00:00 2001 From: halcanary Date: Wed, 31 Aug 2016 12:52:35 -0700 Subject: [PATCH] SkPDF: Fix Type3 ToUnicode table. This seems to fix text extraction on Adobe Reader - Registry/Ordering is now set to Skia/SkiaOrdering. - Type3 fonts now get a FontDescriptor (force symbolic font). - CMapName is now Skia-Identity-SkiaOrdering - CMap behaves correctly for single-byte fonts. Also: - SkTestTypeface returns tounicode map for testing. - Unit test updated All PDFs render the same BUG=skia:5606 GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2292303004 Review-Url: https://codereview.chromium.org/2292303004 --- src/fonts/SkTestScalerContext.cpp | 10 +++++- src/pdf/SkPDFFont.cpp | 21 +++++++++--- src/pdf/SkPDFMakeToUnicodeCmap.cpp | 51 ++++++++++++++++-------------- tests/PDFGlyphsToUnicodeTest.cpp | 6 ++-- 4 files changed, 56 insertions(+), 32 deletions(-) diff --git a/src/fonts/SkTestScalerContext.cpp b/src/fonts/SkTestScalerContext.cpp index fcb65a83c1..09b20ba71b 100644 --- a/src/fonts/SkTestScalerContext.cpp +++ b/src/fonts/SkTestScalerContext.cpp @@ -150,7 +150,15 @@ SkAdvancedTypefaceMetrics* SkTestTypeface::onGetAdvancedTypefaceMetrics( // pdf only SkAdvancedTypefaceMetrics* info = new SkAdvancedTypefaceMetrics; info->fFontName.set(fTestFont->fName); - info->fLastGlyphID = SkToU16(onCountGlyphs() - 1); + int glyphCount = this->onCountGlyphs(); + info->fLastGlyphID = SkToU16(glyphCount - 1); + + SkTDArray& toUnicode = info->fGlyphToUnicode; + toUnicode.setCount(glyphCount); + SkASSERT(glyphCount == SkToInt(fTestFont->fCharCodesCount)); + for (int gid = 0; gid < glyphCount; ++gid) { + toUnicode[gid] = SkToS32(fTestFont->fCharCodes[gid]); + } return info; } diff --git a/src/pdf/SkPDFFont.cpp b/src/pdf/SkPDFFont.cpp index 93f48332d8..32e365388a 100644 --- a/src/pdf/SkPDFFont.cpp +++ b/src/pdf/SkPDFFont.cpp @@ -29,7 +29,7 @@ namespace { // PDF's notion of symbolic vs non-symbolic is related to the character set, not // symbols vs. characters. Rarely is a font the right character set to call it // non-symbolic, so always call it symbolic. (PDF 1.4 spec, section 5.7.1) -static const int kPdfSymbolic = 4; +static const int32_t kPdfSymbolic = 4; struct SkPDFType0Font final : public SkPDFFont { SkPDFType0Font(SkPDFFont::Info, const SkAdvancedTypefaceMetrics&); @@ -426,8 +426,9 @@ void SkPDFType0Font::getFontSubset(SkPDFCanon* canon) { } auto sysInfo = sk_make_sp(); - sysInfo->insertString("Registry", "Adobe"); - sysInfo->insertString("Ordering", "Identity"); + sysInfo->insertString("Registry", "Skia"); + // TODO: Registry+Ordering should be globally unique! + sysInfo->insertString("Ordering", "SkiaOrdering"); sysInfo->insertInt("Supplement", 0); newCIDFont->insertObject("CIDSystemInfo", std::move(sysInfo)); @@ -597,6 +598,7 @@ static void add_type3_font_info(SkPDFCanon* canon, const SkBitSet& subset, SkGlyphID firstGlyphID, SkGlyphID lastGlyphID) { + const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon); SkASSERT(lastGlyphID >= firstGlyphID); // Remove unused glyphs at the end of the range. // Keep the lastGlyphID >= firstGlyphID invariant true. @@ -684,8 +686,7 @@ static void add_type3_font_info(SkPDFCanon* canon, fontBBox->appendInt(bbox.top()); font->insertObject("FontBBox", std::move(fontBBox)); font->insertName("CIDToGIDMap", "Identity"); - const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon); - if (metrics /* && metrics->fGlyphToUnicode.count() > 0 */) { + if (metrics && metrics->fGlyphToUnicode.count() > 0) { font->insertObjRef("ToUnicode", SkPDFMakeToUnicodeCmap(metrics->fGlyphToUnicode, &subset, @@ -693,6 +694,16 @@ static void add_type3_font_info(SkPDFCanon* canon, firstGlyphID, lastGlyphID)); } + auto descriptor = sk_make_sp("FontDescriptor"); + int32_t fontDescriptorFlags = kPdfSymbolic; + if (metrics) { + // Type3 FontDescriptor does not require all the same fields. + descriptor->insertName("FontName", metrics->fFontName); + descriptor->insertInt("ItalicAngle", metrics->fItalicAngle); + fontDescriptorFlags |= (int32_t)metrics->fStyle; + } + descriptor->insertInt("Flags", fontDescriptorFlags); + font->insertObjRef("FontDescriptor", std::move(descriptor)); font->insertObject("Widths", std::move(widthArray)); font->insertObject("Encoding", std::move(encoding)); font->insertObject("CharProcs", std::move(charProcs)); diff --git a/src/pdf/SkPDFMakeToUnicodeCmap.cpp b/src/pdf/SkPDFMakeToUnicodeCmap.cpp index 5186cbbda1..7fc5c59be3 100644 --- a/src/pdf/SkPDFMakeToUnicodeCmap.cpp +++ b/src/pdf/SkPDFMakeToUnicodeCmap.cpp @@ -10,8 +10,7 @@ #include "SkUtils.h" static void append_tounicode_header(SkDynamicMemoryWStream* cmap, - SkGlyphID firstGlyphID, - SkGlyphID lastGlyphID) { + bool multibyte) { // 12 dict begin: 12 is an Adobe-suggested value. Shall not change. // It's there to prevent old version Adobe Readers from malfunctioning. const char* kHeader = @@ -26,8 +25,8 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap, // different. This is not a reference object. const char* kSysInfo = "/CIDSystemInfo\n" - "<< /Registry (Adobe)\n" - "/Ordering (UCS)\n" + "<< /Registry (Skia)\n" + "/Ordering (SkiaOrdering)\n" "/Supplement 0\n" ">> def\n"; cmap->writeText(kSysInfo); @@ -36,18 +35,16 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap, // /CMapType 2 means ToUnicode. // Codespace range just tells the PDF processor the valid range. const char* kTypeInfoHeader = - "/CMapName /Adobe-Identity-UCS def\n" + "/CMapName /Skia-Identity-SkiaOrdering def\n" "/CMapType 2 def\n" "1 begincodespacerange\n"; cmap->writeText(kTypeInfoHeader); - - // e.g. "<0000> \n" - SkString range; - range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID); - cmap->writeText(range.c_str()); - - const char* kTypeInfoFooter = "endcodespacerange\n"; - cmap->writeText(kTypeInfoFooter); + if (multibyte) { + cmap->writeText("<0000> \n"); + } else { + cmap->writeText("<00> \n"); + } + cmap->writeText("endcodespacerange\n"); } static void append_cmap_footer(SkDynamicMemoryWStream* cmap) { @@ -82,7 +79,18 @@ static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) { } } +static void write_glyph(SkDynamicMemoryWStream* cmap, + bool multiByte, + SkGlyphID gid) { + if (multiByte) { + SkPDFUtils::WriteUInt16BE(cmap, gid); + } else { + SkPDFUtils::WriteUInt8(cmap, SkToU8(gid)); + } +} + static void append_bfchar_section(const SkTDArray& bfchar, + bool multiByte, SkDynamicMemoryWStream* cmap) { // PDF spec defines that every bf* list can have at most 100 entries. for (int i = 0; i < bfchar.count(); i += 100) { @@ -92,7 +100,7 @@ static void append_bfchar_section(const SkTDArray& bfchar, cmap->writeText(" beginbfchar\n"); for (int j = 0; j < count; ++j) { cmap->writeText("<"); - SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId); + write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId); cmap->writeText("> <"); write_utf16be(cmap, bfchar[i + j].fUnicode); cmap->writeText(">\n"); @@ -102,6 +110,7 @@ static void append_bfchar_section(const SkTDArray& bfchar, } static void append_bfrange_section(const SkTDArray& bfrange, + bool multiByte, SkDynamicMemoryWStream* cmap) { // PDF spec defines that every bf* list can have at most 100 entries. for (int i = 0; i < bfrange.count(); i += 100) { @@ -111,9 +120,9 @@ static void append_bfrange_section(const SkTDArray& bfrange, cmap->writeText(" beginbfrange\n"); for (int j = 0; j < count; ++j) { cmap->writeText("<"); - SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart); + write_glyph(cmap, multiByte, bfrange[i + j].fStart); cmap->writeText("> <"); - SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd); + write_glyph(cmap, multiByte, bfrange[i + j].fEnd); cmap->writeText("> <"); write_utf16be(cmap, bfrange[i + j].fUnicode); cmap->writeText(">\n"); @@ -206,8 +215,8 @@ void SkPDFAppendCmapSections(const SkTDArray& glyphToUnicode, // The spec requires all bfchar entries for a font must come before bfrange // entries. - append_bfchar_section(bfcharEntries, cmap); - append_bfrange_section(bfrangeEntries, cmap); + append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap); + append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap); } sk_sp SkPDFMakeToUnicodeCmap( @@ -217,11 +226,7 @@ sk_sp SkPDFMakeToUnicodeCmap( SkGlyphID firstGlyphID, SkGlyphID lastGlyphID) { SkDynamicMemoryWStream cmap; - if (multiByteGlyphs) { - append_tounicode_header(&cmap, firstGlyphID, lastGlyphID); - } else { - append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1); - } + append_tounicode_header(&cmap, multiByteGlyphs); SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs, firstGlyphID, lastGlyphID); append_cmap_footer(&cmap); diff --git a/tests/PDFGlyphsToUnicodeTest.cpp b/tests/PDFGlyphsToUnicodeTest.cpp index d83ce664bc..3ba8870774 100644 --- a/tests/PDFGlyphsToUnicodeTest.cpp +++ b/tests/PDFGlyphsToUnicodeTest.cpp @@ -125,11 +125,11 @@ endbfchar\n"; char expectedResultSingleBytes[] = "2 beginbfchar\n\ -<0001> <0000>\n\ -<0002> <0000>\n\ +<01> <0000>\n\ +<02> <0000>\n\ endbfchar\n\ 1 beginbfrange\n\ -<0003> <0006> <1010>\n\ +<03> <06> <1010>\n\ endbfrange\n"; REPORTER_ASSERT(reporter, stream_equals(buffer, 0,