[PDF] Add a ToUnicode mapping for fonts.
This makes text in PDFs searchable and copy&paste-able. Code from arthurhsu@chromium.org. Original review: http://codereview.appspot.com/4428082/ Review URL: http://codereview.appspot.com/4525042 git-svn-id: http://skia.googlecode.com/svn/trunk@1280 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
339ac3d0a7
commit
6744d498fc
6
include/core/SkAdvancedTypefaceMetrics.h
Normal file → Executable file
6
include/core/SkAdvancedTypefaceMetrics.h
Normal file → Executable file
@ -82,6 +82,8 @@ public:
|
||||
kHAdvance_PerGlyphInfo = 0x1, // Populate horizontal advance data.
|
||||
kVAdvance_PerGlyphInfo = 0x2, // Populate vertical advance data.
|
||||
kGlyphNames_PerGlyphInfo = 0x4, // Populate glyph names (Type 1 only).
|
||||
kToUnicode_PerGlyphInfo = 0x8, // Populate ToUnicode table, ignored
|
||||
// for Type 1 fonts
|
||||
};
|
||||
|
||||
template <typename Data>
|
||||
@ -113,6 +115,10 @@ public:
|
||||
|
||||
// The names of each glyph, only populated for postscript fonts.
|
||||
SkTScopedPtr<SkAutoTArray<SkString> > fGlyphNames;
|
||||
|
||||
// The mapping from glyph to Unicode, only populated if
|
||||
// kToUnicode_PerGlyphInfo is passed to GetAdvancedTypefaceMetrics.
|
||||
SkTDArray<SkUnichar> fGlyphToUnicode;
|
||||
};
|
||||
|
||||
namespace skia_advanced_typeface_metrics_utils {
|
||||
|
@ -130,6 +130,7 @@ private:
|
||||
*/
|
||||
void populateType3Font(int16_t glyphID);
|
||||
bool addFontDescriptor(int16_t defaultWidth);
|
||||
void populateToUnicodeTable();
|
||||
void addWidthInfoFromRange(int16_t defaultWidth,
|
||||
const SkAdvancedTypefaceMetrics::WidthRange* widthRangeEntry);
|
||||
/** Set fFirstGlyphID and fLastGlyphID to span at most 255 glyphs,
|
||||
|
121
src/pdf/SkPDFFont.cpp
Normal file → Executable file
121
src/pdf/SkPDFFont.cpp
Normal file → Executable file
@ -319,6 +319,92 @@ SkPDFArray* composeAdvanceData(
|
||||
|
||||
} // namespace
|
||||
|
||||
static void append_tounicode_header(SkDynamicMemoryWStream* cmap) {
|
||||
// 12 dict begin: 12 is an Adobe-suggested value. Shall not change.
|
||||
// It's there to prevent old version Adobe Readers from malfunctioning.
|
||||
const char* kHeader =
|
||||
"/CIDInit /ProcSet findresource begin\n"
|
||||
"12 dict begin\n"
|
||||
"begincmap\n";
|
||||
cmap->writeText(kHeader);
|
||||
|
||||
// The /CIDSystemInfo must be consistent to the one in
|
||||
// SkPDFFont::populateCIDFont().
|
||||
// We can not pass over the system info object here because the format is
|
||||
// different. This is not a reference object.
|
||||
const char* kSysInfo =
|
||||
"/CIDSystemInfo\n"
|
||||
"<< /Registry (Adobe)\n"
|
||||
"/Ordering (UCS)\n"
|
||||
"/Supplement 0\n"
|
||||
">> def\n";
|
||||
cmap->writeText(kSysInfo);
|
||||
|
||||
// The CMapName must be consistent to /CIDSystemInfo above.
|
||||
// /CMapType 2 means ToUnicode.
|
||||
// We specify codespacerange from 0x0000 to 0xFFFF because we convert our
|
||||
// code table from unsigned short (16-bits). Codespace range just tells the
|
||||
// PDF processor the valid range. It does not matter whether a complete
|
||||
// mapping is provided or not.
|
||||
const char* kTypeInfo =
|
||||
"/CMapName /Adobe-Identity-UCS def\n"
|
||||
"/CMapType 2 def\n"
|
||||
"1 begincodespacerange\n"
|
||||
"<0000> <FFFF>\n"
|
||||
"endcodespacerange\n";
|
||||
cmap->writeText(kTypeInfo);
|
||||
}
|
||||
|
||||
static void append_cmap_bfchar_table(uint16_t* glyph_id, SkUnichar* unicode,
|
||||
size_t count,
|
||||
SkDynamicMemoryWStream* cmap) {
|
||||
cmap->writeDecAsText(count);
|
||||
cmap->writeText(" beginbfchar\n");
|
||||
for (size_t i = 0; i < count; ++i) {
|
||||
cmap->writeText("<");
|
||||
cmap->writeHexAsText(glyph_id[i], 4);
|
||||
cmap->writeText("> <");
|
||||
cmap->writeHexAsText(unicode[i], 4);
|
||||
cmap->writeText(">\n");
|
||||
}
|
||||
cmap->writeText("endbfchar\n");
|
||||
}
|
||||
|
||||
static void append_cmap_footer(SkDynamicMemoryWStream* cmap) {
|
||||
const char* kFooter =
|
||||
"endcmap\n"
|
||||
"CMapName currentdict /CMap defineresource pop\n"
|
||||
"end\n"
|
||||
"end";
|
||||
cmap->writeText(kFooter);
|
||||
}
|
||||
|
||||
// Generate <bfchar> table according to PDF spec 1.4 and Adobe Technote 5014.
|
||||
static void append_cmap_bfchar_sections(
|
||||
const SkTDArray<SkUnichar>& glyphUnicode,
|
||||
SkDynamicMemoryWStream* cmap) {
|
||||
// PDF spec defines that every bf* list can have at most 100 entries.
|
||||
const size_t kMaxEntries = 100;
|
||||
uint16_t glyphId[kMaxEntries];
|
||||
SkUnichar unicode[kMaxEntries];
|
||||
size_t index = 0;
|
||||
for (int i = 0; i < glyphUnicode.count(); i++) {
|
||||
if (glyphUnicode[i]) {
|
||||
glyphId[index] = i;
|
||||
unicode[index] = glyphUnicode[i];
|
||||
++index;
|
||||
}
|
||||
if (index == kMaxEntries) {
|
||||
append_cmap_bfchar_table(glyphId, unicode, index, cmap);
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (index) {
|
||||
append_cmap_bfchar_table(glyphId, unicode, index, cmap);
|
||||
}
|
||||
}
|
||||
|
||||
/* Font subset design: It would be nice to be able to subset fonts
|
||||
* (particularly type 3 fonts), but it's a lot of work and not a priority.
|
||||
*
|
||||
@ -404,9 +490,13 @@ SkPDFFont* SkPDFFont::getFontResource(SkTypeface* typeface, uint16_t glyphID) {
|
||||
fontInfo = relatedFont->fFontInfo;
|
||||
fontDescriptor = relatedFont->fDescriptor.get();
|
||||
} else {
|
||||
fontInfo = SkFontHost::GetAdvancedTypefaceMetrics(fontID, SkTBitOr(
|
||||
SkAdvancedTypefaceMetrics::kHAdvance_PerGlyphInfo,
|
||||
SkAdvancedTypefaceMetrics::kGlyphNames_PerGlyphInfo));
|
||||
SkAdvancedTypefaceMetrics::PerGlyphInfo info;
|
||||
info = SkAdvancedTypefaceMetrics::kHAdvance_PerGlyphInfo;
|
||||
info = SkTBitOr<SkAdvancedTypefaceMetrics::PerGlyphInfo>(
|
||||
info, SkAdvancedTypefaceMetrics::kGlyphNames_PerGlyphInfo);
|
||||
info = SkTBitOr<SkAdvancedTypefaceMetrics::PerGlyphInfo>(
|
||||
info, SkAdvancedTypefaceMetrics::kToUnicode_PerGlyphInfo);
|
||||
fontInfo = SkFontHost::GetAdvancedTypefaceMetrics(fontID, info);
|
||||
SkSafeUnref(fontInfo.get()); // SkRefPtr and Get both took a reference.
|
||||
}
|
||||
|
||||
@ -497,7 +587,6 @@ SkPDFFont::SkPDFFont(class SkAdvancedTypefaceMetrics* fontInfo,
|
||||
}
|
||||
|
||||
void SkPDFFont::populateType0Font() {
|
||||
// TODO(vandebo) add a ToUnicode mapping.
|
||||
fMultiByteGlyphs = true;
|
||||
|
||||
insert("Subtype", new SkPDFName("Type0"))->unref();
|
||||
@ -512,6 +601,26 @@ void SkPDFFont::populateType0Font() {
|
||||
new SkPDFFont(fFontInfo.get(), fTypeface.get(), 1, true, NULL));
|
||||
descendantFonts->append(new SkPDFObjRef(fResources.top()))->unref();
|
||||
insert("DescendantFonts", descendantFonts.get());
|
||||
|
||||
populateToUnicodeTable();
|
||||
}
|
||||
|
||||
void SkPDFFont::populateToUnicodeTable() {
|
||||
if (fFontInfo.get() == NULL ||
|
||||
fFontInfo->fGlyphToUnicode.begin() == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
SkDynamicMemoryWStream cmap;
|
||||
append_tounicode_header(&cmap);
|
||||
append_cmap_bfchar_sections(fFontInfo->fGlyphToUnicode, &cmap);
|
||||
append_cmap_footer(&cmap);
|
||||
SkRefPtr<SkMemoryStream> cmapStream = new SkMemoryStream();
|
||||
cmapStream->unref(); // SkRefPtr and new took a reference.
|
||||
cmapStream->setMemoryOwned(cmap.detach(), cmap.getOffset());
|
||||
SkRefPtr<SkPDFStream> pdfCmap = new SkPDFStream(cmapStream.get());
|
||||
fResources.push(pdfCmap.get()); // Pass reference from new.
|
||||
insert("ToUnicode", new SkPDFObjRef(pdfCmap.get()))->unref();
|
||||
}
|
||||
|
||||
void SkPDFFont::populateCIDFont() {
|
||||
@ -522,6 +631,7 @@ void SkPDFFont::populateCIDFont() {
|
||||
insert("Subtype", new SkPDFName("CIDFontType0"))->unref();
|
||||
} else if (fFontInfo->fType == SkAdvancedTypefaceMetrics::kTrueType_Font) {
|
||||
insert("Subtype", new SkPDFName("CIDFontType2"))->unref();
|
||||
insert("CIDToGIDMap", new SkPDFName("Identity"))->unref();
|
||||
} else {
|
||||
SkASSERT(false);
|
||||
}
|
||||
@ -697,9 +807,12 @@ void SkPDFFont::populateType3Font(int16_t glyphID) {
|
||||
insert("FirstChar", new SkPDFInt(fFirstGlyphID))->unref();
|
||||
insert("LastChar", new SkPDFInt(fLastGlyphID))->unref();
|
||||
insert("Widths", widthArray.get());
|
||||
insert("CIDToGIDMap", new SkPDFName("Identity"))->unref();
|
||||
|
||||
if (fFontInfo && fFontInfo->fLastGlyphID <= 255)
|
||||
fFontInfo = NULL;
|
||||
|
||||
populateToUnicodeTable();
|
||||
}
|
||||
|
||||
bool SkPDFFont::addFontDescriptor(int16_t defaultWidth) {
|
||||
|
@ -339,6 +339,56 @@ static bool getWidthAdvance(FT_Face face, int gId, int16_t* data) {
|
||||
return true;
|
||||
}
|
||||
|
||||
static void populate_glyph_to_unicode(FT_Face& face,
|
||||
SkTDArray<SkUnichar>* glyphToUnicode) {
|
||||
// Check and see if we have Unicode cmaps.
|
||||
for (int i = 0; i < face->num_charmaps; ++i) {
|
||||
// CMaps known to support Unicode:
|
||||
// Platform ID Encoding ID Name
|
||||
// ----------- ----------- -----------------------------------
|
||||
// 0 0,1 Apple Unicode
|
||||
// 0 3 Apple Unicode 2.0 (preferred)
|
||||
// 3 1 Microsoft Unicode UCS-2
|
||||
// 3 10 Microsoft Unicode UCS-4 (preferred)
|
||||
//
|
||||
// See Apple TrueType Reference Manual
|
||||
// http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html
|
||||
// http://developer.apple.com/fonts/TTRefMan/RM06/Chap6name.html#ID
|
||||
// Microsoft OpenType Specification
|
||||
// http://www.microsoft.com/typography/otspec/cmap.htm
|
||||
|
||||
FT_UShort platformId = face->charmaps[i]->platform_id;
|
||||
FT_UShort encodingId = face->charmaps[i]->encoding_id;
|
||||
|
||||
if (platformId != 0 && platformId != 3) {
|
||||
continue;
|
||||
}
|
||||
if (platformId == 3 && encodingId != 1 && encodingId != 10) {
|
||||
continue;
|
||||
}
|
||||
bool preferredMap = ((platformId == 3 && encodingId == 10) ||
|
||||
(platformId == 0 && encodingId == 3));
|
||||
|
||||
FT_Set_Charmap(face, face->charmaps[i]);
|
||||
if (glyphToUnicode->isEmpty()) {
|
||||
glyphToUnicode->setCount(face->num_glyphs);
|
||||
memset(glyphToUnicode->begin(), 0,
|
||||
sizeof(SkUnichar) * face->num_glyphs);
|
||||
}
|
||||
|
||||
// Iterate through each cmap entry.
|
||||
FT_UInt glyphIndex;
|
||||
for (SkUnichar charCode = FT_Get_First_Char(face, &glyphIndex);
|
||||
glyphIndex != 0;
|
||||
charCode = FT_Get_Next_Char(face, charCode, &glyphIndex)) {
|
||||
if (charCode &&
|
||||
((*glyphToUnicode)[glyphIndex] == 0 || preferredMap)) {
|
||||
(*glyphToUnicode)[glyphIndex] = charCode;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
SkAdvancedTypefaceMetrics* SkFontHost::GetAdvancedTypefaceMetrics(
|
||||
uint32_t fontID,
|
||||
@ -509,6 +559,12 @@ SkAdvancedTypefaceMetrics* SkFontHost::GetAdvancedTypefaceMetrics(
|
||||
}
|
||||
}
|
||||
|
||||
if (perGlyphInfo & SkAdvancedTypefaceMetrics::kToUnicode_PerGlyphInfo &&
|
||||
info->fType != SkAdvancedTypefaceMetrics::kType1_Font &&
|
||||
face->num_charmaps) {
|
||||
populate_glyph_to_unicode(face, &(info->fGlyphToUnicode));
|
||||
}
|
||||
|
||||
if (!canEmbed(face))
|
||||
info->fType = SkAdvancedTypefaceMetrics::kNotEmbeddable_Font;
|
||||
|
||||
|
56
src/ports/SkFontHost_win.cpp
Normal file → Executable file
56
src/ports/SkFontHost_win.cpp
Normal file → Executable file
@ -194,6 +194,58 @@ static void GetLogFontByID(SkFontID fontID, LOGFONT* lf) {
|
||||
}
|
||||
}
|
||||
|
||||
// Construct Glyph to Unicode table.
|
||||
// Unicode code points that require conjugate pairs in utf16 are not
|
||||
// supported.
|
||||
// TODO(arthurhsu): Add support for conjugate pairs. It looks like that may
|
||||
// require parsing the TTF cmap table (platform 4, encoding 12) directly instead
|
||||
// of calling GetFontUnicodeRange().
|
||||
static void populate_glyph_to_unicode(HDC fontHdc, const unsigned glyphCount,
|
||||
SkTDArray<SkUnichar>* glyphToUnicode) {
|
||||
DWORD glyphSetBufferSize = GetFontUnicodeRanges(fontHdc, NULL);
|
||||
if (!glyphSetBufferSize) {
|
||||
return;
|
||||
}
|
||||
|
||||
SkAutoTDeleteArray<BYTE> glyphSetBuffer(new BYTE[glyphSetBufferSize]);
|
||||
GLYPHSET* glyphSet =
|
||||
reinterpret_cast<LPGLYPHSET>(glyphSetBuffer.get());
|
||||
if (GetFontUnicodeRanges(fontHdc, glyphSet) != glyphSetBufferSize) {
|
||||
return;
|
||||
}
|
||||
|
||||
glyphToUnicode->setCount(glyphCount);
|
||||
memset(glyphToUnicode->begin(), 0, glyphCount * sizeof(SkUnichar));
|
||||
for (DWORD i = 0; i < glyphSet->cRanges; ++i) {
|
||||
// There is no guarantee that within a Unicode range, the corresponding
|
||||
// glyph id in a font file are continuous. So, even if we have ranges,
|
||||
// we can't just use the first and last entry of the range to compute
|
||||
// result. We need to enumerate them one by one.
|
||||
int count = glyphSet->ranges[i].cGlyphs;
|
||||
SkAutoTArray<WCHAR> chars(count + 1);
|
||||
chars[count] = 0; // termintate string
|
||||
SkAutoTArray<WORD> glyph(count);
|
||||
for (USHORT j = 0; j < count; ++j) {
|
||||
chars[j] = glyphSet->ranges[i].wcLow + j;
|
||||
}
|
||||
GetGlyphIndicesW(fontHdc, chars.get(), count, glyph.get(),
|
||||
GGI_MARK_NONEXISTING_GLYPHS);
|
||||
// If the glyph ID is valid, and the glyph is not mapped, then we will
|
||||
// fill in the char id into the vector. If the glyph is mapped already,
|
||||
// skip it.
|
||||
// TODO(arthurhsu): better improve this. e.g. Get all used char ids from
|
||||
// font cache, then generate this mapping table from there. It's
|
||||
// unlikely to have collisions since glyph reuse happens mostly for
|
||||
// different Unicode pages.
|
||||
for (USHORT j = 0; j < count; ++j) {
|
||||
if (glyph[j] != 0xffff && glyph[j] < glyphCount &&
|
||||
(*glyphToUnicode)[glyph[j]] == 0) {
|
||||
(*glyphToUnicode)[glyph[j]] = chars[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class SkScalerContext_Windows : public SkScalerContext {
|
||||
@ -649,6 +701,10 @@ SkAdvancedTypefaceMetrics* SkFontHost::GetAdvancedTypefaceMetrics(
|
||||
info->fFontName.set(lf.lfFaceName);
|
||||
#endif
|
||||
|
||||
if (perGlyphInfo & SkAdvancedTypefaceMetrics::kToUnicode_PerGlyphInfo) {
|
||||
populate_glyph_to_unicode(hdc, glyphCount, &(info->fGlyphToUnicode));
|
||||
}
|
||||
|
||||
if (otm.otmTextMetrics.tmPitchAndFamily & TMPF_TRUETYPE) {
|
||||
info->fType = SkAdvancedTypefaceMetrics::kTrueType_Font;
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user