SkPDF: Fix Type3 ToUnicode table.
This seems to fix text extraction on Adobe Reader - Registry/Ordering is now set to Skia/SkiaOrdering. - Type3 fonts now get a FontDescriptor (force symbolic font). - CMapName is now Skia-Identity-SkiaOrdering - CMap behaves correctly for single-byte fonts. Also: - SkTestTypeface returns tounicode map for testing. - Unit test updated All PDFs render the same BUG=skia:5606 GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2292303004 Review-Url: https://codereview.chromium.org/2292303004
This commit is contained in:
parent
41a8f323f7
commit
3d01c62e19
@ -150,7 +150,15 @@ SkAdvancedTypefaceMetrics* SkTestTypeface::onGetAdvancedTypefaceMetrics(
|
||||
// pdf only
|
||||
SkAdvancedTypefaceMetrics* info = new SkAdvancedTypefaceMetrics;
|
||||
info->fFontName.set(fTestFont->fName);
|
||||
info->fLastGlyphID = SkToU16(onCountGlyphs() - 1);
|
||||
int glyphCount = this->onCountGlyphs();
|
||||
info->fLastGlyphID = SkToU16(glyphCount - 1);
|
||||
|
||||
SkTDArray<SkUnichar>& toUnicode = info->fGlyphToUnicode;
|
||||
toUnicode.setCount(glyphCount);
|
||||
SkASSERT(glyphCount == SkToInt(fTestFont->fCharCodesCount));
|
||||
for (int gid = 0; gid < glyphCount; ++gid) {
|
||||
toUnicode[gid] = SkToS32(fTestFont->fCharCodes[gid]);
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
|
@ -29,7 +29,7 @@ namespace {
|
||||
// PDF's notion of symbolic vs non-symbolic is related to the character set, not
|
||||
// symbols vs. characters. Rarely is a font the right character set to call it
|
||||
// non-symbolic, so always call it symbolic. (PDF 1.4 spec, section 5.7.1)
|
||||
static const int kPdfSymbolic = 4;
|
||||
static const int32_t kPdfSymbolic = 4;
|
||||
|
||||
struct SkPDFType0Font final : public SkPDFFont {
|
||||
SkPDFType0Font(SkPDFFont::Info, const SkAdvancedTypefaceMetrics&);
|
||||
@ -426,8 +426,9 @@ void SkPDFType0Font::getFontSubset(SkPDFCanon* canon) {
|
||||
}
|
||||
|
||||
auto sysInfo = sk_make_sp<SkPDFDict>();
|
||||
sysInfo->insertString("Registry", "Adobe");
|
||||
sysInfo->insertString("Ordering", "Identity");
|
||||
sysInfo->insertString("Registry", "Skia");
|
||||
// TODO: Registry+Ordering should be globally unique!
|
||||
sysInfo->insertString("Ordering", "SkiaOrdering");
|
||||
sysInfo->insertInt("Supplement", 0);
|
||||
newCIDFont->insertObject("CIDSystemInfo", std::move(sysInfo));
|
||||
|
||||
@ -597,6 +598,7 @@ static void add_type3_font_info(SkPDFCanon* canon,
|
||||
const SkBitSet& subset,
|
||||
SkGlyphID firstGlyphID,
|
||||
SkGlyphID lastGlyphID) {
|
||||
const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon);
|
||||
SkASSERT(lastGlyphID >= firstGlyphID);
|
||||
// Remove unused glyphs at the end of the range.
|
||||
// Keep the lastGlyphID >= firstGlyphID invariant true.
|
||||
@ -684,8 +686,7 @@ static void add_type3_font_info(SkPDFCanon* canon,
|
||||
fontBBox->appendInt(bbox.top());
|
||||
font->insertObject("FontBBox", std::move(fontBBox));
|
||||
font->insertName("CIDToGIDMap", "Identity");
|
||||
const SkAdvancedTypefaceMetrics* metrics = SkPDFFont::GetMetrics(typeface, canon);
|
||||
if (metrics /* && metrics->fGlyphToUnicode.count() > 0 */) {
|
||||
if (metrics && metrics->fGlyphToUnicode.count() > 0) {
|
||||
font->insertObjRef("ToUnicode",
|
||||
SkPDFMakeToUnicodeCmap(metrics->fGlyphToUnicode,
|
||||
&subset,
|
||||
@ -693,6 +694,16 @@ static void add_type3_font_info(SkPDFCanon* canon,
|
||||
firstGlyphID,
|
||||
lastGlyphID));
|
||||
}
|
||||
auto descriptor = sk_make_sp<SkPDFDict>("FontDescriptor");
|
||||
int32_t fontDescriptorFlags = kPdfSymbolic;
|
||||
if (metrics) {
|
||||
// Type3 FontDescriptor does not require all the same fields.
|
||||
descriptor->insertName("FontName", metrics->fFontName);
|
||||
descriptor->insertInt("ItalicAngle", metrics->fItalicAngle);
|
||||
fontDescriptorFlags |= (int32_t)metrics->fStyle;
|
||||
}
|
||||
descriptor->insertInt("Flags", fontDescriptorFlags);
|
||||
font->insertObjRef("FontDescriptor", std::move(descriptor));
|
||||
font->insertObject("Widths", std::move(widthArray));
|
||||
font->insertObject("Encoding", std::move(encoding));
|
||||
font->insertObject("CharProcs", std::move(charProcs));
|
||||
|
@ -10,8 +10,7 @@
|
||||
#include "SkUtils.h"
|
||||
|
||||
static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
|
||||
SkGlyphID firstGlyphID,
|
||||
SkGlyphID lastGlyphID) {
|
||||
bool multibyte) {
|
||||
// 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
|
||||
// It's there to prevent old version Adobe Readers from malfunctioning.
|
||||
const char* kHeader =
|
||||
@ -26,8 +25,8 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
|
||||
// different. This is not a reference object.
|
||||
const char* kSysInfo =
|
||||
"/CIDSystemInfo\n"
|
||||
"<< /Registry (Adobe)\n"
|
||||
"/Ordering (UCS)\n"
|
||||
"<< /Registry (Skia)\n"
|
||||
"/Ordering (SkiaOrdering)\n"
|
||||
"/Supplement 0\n"
|
||||
">> def\n";
|
||||
cmap->writeText(kSysInfo);
|
||||
@ -36,18 +35,16 @@ static void append_tounicode_header(SkDynamicMemoryWStream* cmap,
|
||||
// /CMapType 2 means ToUnicode.
|
||||
// Codespace range just tells the PDF processor the valid range.
|
||||
const char* kTypeInfoHeader =
|
||||
"/CMapName /Adobe-Identity-UCS def\n"
|
||||
"/CMapName /Skia-Identity-SkiaOrdering def\n"
|
||||
"/CMapType 2 def\n"
|
||||
"1 begincodespacerange\n";
|
||||
cmap->writeText(kTypeInfoHeader);
|
||||
|
||||
// e.g. "<0000> <FFFF>\n"
|
||||
SkString range;
|
||||
range.appendf("<%04X> <%04X>\n", firstGlyphID, lastGlyphID);
|
||||
cmap->writeText(range.c_str());
|
||||
|
||||
const char* kTypeInfoFooter = "endcodespacerange\n";
|
||||
cmap->writeText(kTypeInfoFooter);
|
||||
if (multibyte) {
|
||||
cmap->writeText("<0000> <FFFF>\n");
|
||||
} else {
|
||||
cmap->writeText("<00> <FF>\n");
|
||||
}
|
||||
cmap->writeText("endcodespacerange\n");
|
||||
}
|
||||
|
||||
static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
|
||||
@ -82,7 +79,18 @@ static void write_utf16be(SkDynamicMemoryWStream* wStream, SkUnichar utf32) {
|
||||
}
|
||||
}
|
||||
|
||||
static void write_glyph(SkDynamicMemoryWStream* cmap,
|
||||
bool multiByte,
|
||||
SkGlyphID gid) {
|
||||
if (multiByte) {
|
||||
SkPDFUtils::WriteUInt16BE(cmap, gid);
|
||||
} else {
|
||||
SkPDFUtils::WriteUInt8(cmap, SkToU8(gid));
|
||||
}
|
||||
}
|
||||
|
||||
static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
|
||||
bool multiByte,
|
||||
SkDynamicMemoryWStream* cmap) {
|
||||
// PDF spec defines that every bf* list can have at most 100 entries.
|
||||
for (int i = 0; i < bfchar.count(); i += 100) {
|
||||
@ -92,7 +100,7 @@ static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
|
||||
cmap->writeText(" beginbfchar\n");
|
||||
for (int j = 0; j < count; ++j) {
|
||||
cmap->writeText("<");
|
||||
SkPDFUtils::WriteUInt16BE(cmap, bfchar[i + j].fGlyphId);
|
||||
write_glyph(cmap, multiByte, bfchar[i + j].fGlyphId);
|
||||
cmap->writeText("> <");
|
||||
write_utf16be(cmap, bfchar[i + j].fUnicode);
|
||||
cmap->writeText(">\n");
|
||||
@ -102,6 +110,7 @@ static void append_bfchar_section(const SkTDArray<BFChar>& bfchar,
|
||||
}
|
||||
|
||||
static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
|
||||
bool multiByte,
|
||||
SkDynamicMemoryWStream* cmap) {
|
||||
// PDF spec defines that every bf* list can have at most 100 entries.
|
||||
for (int i = 0; i < bfrange.count(); i += 100) {
|
||||
@ -111,9 +120,9 @@ static void append_bfrange_section(const SkTDArray<BFRange>& bfrange,
|
||||
cmap->writeText(" beginbfrange\n");
|
||||
for (int j = 0; j < count; ++j) {
|
||||
cmap->writeText("<");
|
||||
SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fStart);
|
||||
write_glyph(cmap, multiByte, bfrange[i + j].fStart);
|
||||
cmap->writeText("> <");
|
||||
SkPDFUtils::WriteUInt16BE(cmap, bfrange[i + j].fEnd);
|
||||
write_glyph(cmap, multiByte, bfrange[i + j].fEnd);
|
||||
cmap->writeText("> <");
|
||||
write_utf16be(cmap, bfrange[i + j].fUnicode);
|
||||
cmap->writeText(">\n");
|
||||
@ -206,8 +215,8 @@ void SkPDFAppendCmapSections(const SkTDArray<SkUnichar>& glyphToUnicode,
|
||||
|
||||
// The spec requires all bfchar entries for a font must come before bfrange
|
||||
// entries.
|
||||
append_bfchar_section(bfcharEntries, cmap);
|
||||
append_bfrange_section(bfrangeEntries, cmap);
|
||||
append_bfchar_section(bfcharEntries, multiByteGlyphs, cmap);
|
||||
append_bfrange_section(bfrangeEntries, multiByteGlyphs, cmap);
|
||||
}
|
||||
|
||||
sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
|
||||
@ -217,11 +226,7 @@ sk_sp<SkPDFStream> SkPDFMakeToUnicodeCmap(
|
||||
SkGlyphID firstGlyphID,
|
||||
SkGlyphID lastGlyphID) {
|
||||
SkDynamicMemoryWStream cmap;
|
||||
if (multiByteGlyphs) {
|
||||
append_tounicode_header(&cmap, firstGlyphID, lastGlyphID);
|
||||
} else {
|
||||
append_tounicode_header(&cmap, 1, lastGlyphID - firstGlyphID + 1);
|
||||
}
|
||||
append_tounicode_header(&cmap, multiByteGlyphs);
|
||||
SkPDFAppendCmapSections(glyphToUnicode, subset, &cmap, multiByteGlyphs,
|
||||
firstGlyphID, lastGlyphID);
|
||||
append_cmap_footer(&cmap);
|
||||
|
@ -125,11 +125,11 @@ endbfchar\n";
|
||||
|
||||
char expectedResultSingleBytes[] =
|
||||
"2 beginbfchar\n\
|
||||
<0001> <0000>\n\
|
||||
<0002> <0000>\n\
|
||||
<01> <0000>\n\
|
||||
<02> <0000>\n\
|
||||
endbfchar\n\
|
||||
1 beginbfrange\n\
|
||||
<0003> <0006> <1010>\n\
|
||||
<03> <06> <1010>\n\
|
||||
endbfrange\n";
|
||||
|
||||
REPORTER_ASSERT(reporter, stream_equals(buffer, 0,
|
||||
|
Loading…
Reference in New Issue
Block a user