Cleaning codepoints/grapheme mess; it also helps with ICU API

Renamed all codepoints into utf16Index

Change-Id: Ie915395a56ac825637f6dbb25824cd1635a5b0a6
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/296438
Reviewed-by: Ben Wagner <bungeman@google.com>
Commit-Queue: Julia Lavrova <jlavrova@google.com>
This commit is contained in:
Julia Lavrova 2020-06-15 10:20:08 -04:00 committed by Skia Commit-Bot
parent 4190f27b41
commit c4d49056b4
9 changed files with 103 additions and 176 deletions

View File

@ -2873,6 +2873,8 @@ protected:
font_collection->getParagraphCache()->turnOn(false);
const std::u16string text = u"❤️🕵🏾‍♀️ 🕵🏾 👩🏾‍⚕️ 👨🏾‍⚕️ 👩🏾‍🌾 👨🏾‍🌾 👩🏾‍🍳 👨🏾‍🍳 👩🏾‍🎓 👨🏾‍🎓 👩🏾‍🎤 👨🏾‍🎤 👩🏾‍🏫 👨🏾‍🏫 👩🏾‍🏭 👨🏾‍🏭 👩🏾‍💻 👨🏾‍💻 👩🏾‍💼 👨🏾‍💼 👩🏾‍🔧 👨🏾‍🔧 👩🏾‍🔬 👨🏾‍🔬 👩🏾‍🎨 👨🏾‍🎨 👩🏾‍🚒 👨🏾‍🚒 👩🏾‍✈️ 👨🏾‍✈️ 👩🏾‍🚀 👨🏾‍🚀 👩🏾‍⚖️ 👨🏾‍⚖️ 🤶🏾 🎅🏾";
//u"\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC66\uD83D\uDC69\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC67\uD83C\uDDFA\uD83C\uDDF8";
canvas->drawColor(SK_ColorWHITE);
ParagraphStyle paragraph_style;

View File

@ -648,7 +648,7 @@ TextRange OneLineShaper::clusteredText(GlyphRange& glyphs) {
if (dir == Dir::right) {
while (index < fCurrentRun->fTextRange.end) {
if (this->fParagraph->codeUnitHasProperty(index,
CodeUnitFlags::kGraphemeBreakBefore)) {
CodeUnitFlags::kGraphemeStart)) {
return index;
}
++index;
@ -657,7 +657,7 @@ TextRange OneLineShaper::clusteredText(GlyphRange& glyphs) {
} else {
while (index > fCurrentRun->fTextRange.start) {
if (this->fParagraph->codeUnitHasProperty(index,
CodeUnitFlags::kGraphemeBreakBefore)) {
CodeUnitFlags::kGraphemeStart)) {
return index;
}
--index;

View File

@ -39,8 +39,8 @@ public:
, fCodeUnitProperties(paragraph->fCodeUnitProperties)
, fWords(paragraph->fWords)
, fBidiRegions(paragraph->fBidiRegions)
, fGraphemes16(paragraph->fGraphemes16)
, fCodepoints(paragraph->fCodepoints) { }
, fUTF8IndexForUTF16Index(paragraph->fUTF8IndexForUTF16Index)
, fUTF16IndexForUTF8Index(paragraph->fUTF16IndexForUTF8Index) { }
// Input == key
ParagraphCacheKey fKey;
@ -51,8 +51,8 @@ public:
SkTArray<CodeUnitFlags> fCodeUnitProperties;
std::vector<size_t> fWords;
SkTArray<BidiRegion> fBidiRegions;
SkTArray<Grapheme, true> fGraphemes16;
SkTArray<CodepointRepresentation, true> fCodepoints;
SkTArray<TextIndex, true> fUTF8IndexForUTF16Index;
SkTArray<size_t, true> fUTF16IndexForUTF8Index;
};
uint32_t ParagraphCache::KeyHash::mix(uint32_t hash, uint32_t data) const {
@ -207,8 +207,8 @@ void ParagraphCache::updateTo(ParagraphImpl* paragraph, const Entry* entry) {
paragraph->fCodeUnitProperties = entry->fValue->fCodeUnitProperties;
paragraph->fWords = entry->fValue->fWords;
paragraph->fBidiRegions = entry->fValue->fBidiRegions;
paragraph->fGraphemes16 = entry->fValue->fGraphemes16;
paragraph->fCodepoints = entry->fValue->fCodepoints;
paragraph->fUTF8IndexForUTF16Index = entry->fValue->fUTF8IndexForUTF16Index;
paragraph->fUTF16IndexForUTF8Index = entry->fValue->fUTF16IndexForUTF8Index;
for (auto& run : paragraph->fRuns) {
run.setMaster(paragraph);
}

View File

@ -146,8 +146,8 @@ void ParagraphImpl::layout(SkScalar rawWidth) {
this->fCodeUnitProperties.push_back_n(fText.size() + 1, CodeUnitFlags::kNoCodeUnitFlag);
this->fWords.clear();
this->fBidiRegions.reset();
this->fGraphemes16.reset();
this->fCodepoints.reset();
this->fUTF8IndexForUTF16Index.reset();
this->fUTF16IndexForUTF8Index.reset();
this->fRuns.reset();
if (!this->shapeTextIntoEndlessLine()) {
this->resetContext();
@ -354,28 +354,10 @@ bool ParagraphImpl::computeCodeUnitProperties() {
while (!breaker.eof()) {
auto currentPos = breaker.next();
fCodeUnitProperties[currentPos] |= CodeUnitFlags::kGraphemeBreakBefore;
fCodeUnitProperties[currentPos] |= CodeUnitFlags::kGraphemeStart;
}
}
/*
SkString breaks;
SkString graphemes;
SkString whitespaces;
size_t index = 0;
for (auto flag : fIcuFlags) {
if ((flag & IcuFlagTypes::kHardLineBreak) != 0) {
breaks += "H";
} else if ((flag & IcuFlagTypes::kSoftLineBreak) != 0) {
breaks += "S";
} else {
breaks += " ";
}
graphemes += (flag & IcuFlagTypes::kGrapheme) == 0 ? " " : "G";
whitespaces += (flag & IcuFlagTypes::kWhiteSpace) == 0 ? " " : "W";
++index;
}
SkDebugf("%s\n%s\n%s\n", breaks.c_str(), graphemes.c_str(), whitespaces.c_str());
*/
return true;
}
@ -781,56 +763,6 @@ TextLine& ParagraphImpl::addLine(SkVector offset,
return fLines.emplace_back(this, offset, advance, blocks, text, textWithSpaces, clusters, clustersWithGhosts, widthWithSpaces, sizes);
}
void ParagraphImpl::markGraphemes16() {
if (!fGraphemes16.empty()) {
return;
}
// Fill out code points 16
auto ptr = fText.c_str();
auto end = fText.c_str() + fText.size();
while (ptr < end) {
size_t index = ptr - fText.c_str();
SkUnichar u = SkUTF::NextUTF8(&ptr, end);
uint16_t buffer[2];
size_t count = SkUTF::ToUTF16(u, buffer);
fCodepoints.emplace_back(EMPTY_INDEX, index, count > 1 ? 2 : 1);
if (count > 1) {
fCodepoints.emplace_back(EMPTY_INDEX, index, 1);
}
}
CodepointRange codepoints(0ul, 0ul);
forEachCodeUnitPropertyRange(
CodeUnitFlags::kGraphemeBreakBefore,
[&](TextRange textRange) {
// Collect all the codepoints that belong to the grapheme
while (codepoints.end < fCodepoints.size()
&& fCodepoints[codepoints.end].fTextIndex < textRange.end) {
++codepoints.end;
}
if (textRange.start == textRange.end) {
return true;
}
//SkDebugf("Grapheme #%d [%d:%d)\n", fGraphemes16.size(), startPos, endPos);
// Update all the codepoints that belong to this grapheme
for (auto i = codepoints.start; i < codepoints.end; ++i) {
//SkDebugf(" [%d] = %d + %d\n", i, fCodePoints[i].fTextIndex, fCodePoints[i].fIndex);
fCodepoints[i].fGrapheme = fGraphemes16.size();
}
fGraphemes16.emplace_back(codepoints, textRange);
codepoints.start = codepoints.end;
return true;
});
}
// Returns a vector of bounding boxes that enclose all text between
// start and end glyph indexes, including start and excluding end
std::vector<TextBox> ParagraphImpl::getRectsForRange(unsigned start,
@ -847,9 +779,9 @@ std::vector<TextBox> ParagraphImpl::getRectsForRange(unsigned start,
return results;
}
markGraphemes16();
ensureUTF16Mapping();
if (start >= end || start > fCodepoints.size() || end == 0) {
if (start >= end || start > fUTF8IndexForUTF16Index.size() || end == 0) {
return results;
}
@ -862,16 +794,11 @@ std::vector<TextBox> ParagraphImpl::getRectsForRange(unsigned start,
// One flutter test fails because of it but the editing experience is correct
// (although you have to press the cursor many times before it moves to the next grapheme).
TextRange text(fText.size(), fText.size());
if (start < fCodepoints.size()) {
auto codepoint = fCodepoints[start];
auto grapheme = fGraphemes16[codepoint.fGrapheme];
text.start = grapheme.fTextRange.start;
if (start < fUTF8IndexForUTF16Index.size()) {
text.start = findGraphemeStart(fUTF8IndexForUTF16Index[start]);
}
if (end < fCodepoints.size()) {
auto codepoint = fCodepoints[end];
auto grapheme = fGraphemes16[codepoint.fGrapheme];
text.end = grapheme.fTextRange.start;
if (end < fUTF8IndexForUTF16Index.size()) {
text.end = findGraphemeStart(fUTF8IndexForUTF16Index[end]);
}
for (auto& line : fLines) {
@ -929,7 +856,8 @@ PositionWithAffinity ParagraphImpl::getGlyphPositionAtCoordinate(SkScalar dx, Sk
return {0, Affinity::kDownstream};
}
markGraphemes16();
ensureUTF16Mapping();
for (auto& line : fLines) {
// Let's figure out if we can stop looking
auto offsetY = line.offset().fY;
@ -1053,8 +981,8 @@ void ParagraphImpl::setState(InternalState state) {
fCodeUnitProperties.push_back_n(fText.size() + 1, kNoCodeUnitFlag);
fWords.clear();
fBidiRegions.reset();
fGraphemes16.reset();
fCodepoints.reset();
fUTF8IndexForUTF16Index.reset();
fUTF16IndexForUTF8Index.reset();
[[fallthrough]];
case kShaped:
@ -1159,5 +1087,47 @@ void ParagraphImpl::updateBackgroundPaint(size_t from, size_t to, SkPaint paint)
}
}
TextIndex ParagraphImpl::findGraphemeStart(TextIndex index) {
if (index == fText.size()) {
return index;
}
while (index > 0 &&
(fCodeUnitProperties[index] & CodeUnitFlags::kGraphemeStart) == 0) {
--index;
}
return index;
}
void ParagraphImpl::ensureUTF16Mapping() {
if (!fUTF16IndexForUTF8Index.empty()) {
return;
}
// Fill out code points 16
auto ptr = fText.c_str();
auto end = fText.c_str() + fText.size();
while (ptr < end) {
size_t index = ptr - fText.c_str();
SkUnichar u = SkUTF::NextUTF8(&ptr, end);
// All utf8 units refer to the same codepoint
size_t next = ptr - fText.c_str();
for (auto i = index; i < next; ++i) {
fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
}
SkASSERT(fUTF16IndexForUTF8Index.size() == next);
// One or two codepoints refer to the same text index
uint16_t buffer[2];
size_t count = SkUTF::ToUTF16(u, buffer);
fUTF8IndexForUTF16Index.emplace_back(index);
if (count > 1) {
fUTF8IndexForUTF16Index.emplace_back(index);
}
}
fUTF16IndexForUTF8Index.emplace_back(fUTF8IndexForUTF16Index.size());
fUTF8IndexForUTF16Index.emplace_back(fText.size());
}
} // namespace textlayout
} // namespace skia

View File

@ -38,7 +38,7 @@ namespace textlayout {
enum CodeUnitFlags {
kNoCodeUnitFlag = 0x0,
kPartOfWhiteSpace = 0x1,
kGraphemeBreakBefore = 0x2,
kGraphemeStart = 0x2,
kSoftLineBreakBefore = 0x4,
kHardLineBreakBefore = 0x8,
};
@ -138,8 +138,12 @@ public:
const ParagraphStyle& paragraphStyle() const { return fParagraphStyle; }
SkSpan<Cluster> clusters() { return SkSpan<Cluster>(fClusters.begin(), fClusters.size()); }
sk_sp<FontCollection> fontCollection() const { return fFontCollection; }
SkSpan<CodepointRepresentation> codepoints(){ return SkSpan<CodepointRepresentation>(fCodepoints.begin(), fCodepoints.size()); }
void formatLines(SkScalar maxWidth);
void ensureUTF16Mapping();
TextIndex findGraphemeStart(TextIndex index);
size_t getUTF16Index(TextIndex index) {
return fUTF16IndexForUTF8Index[index];
}
bool strutEnabled() const { return paragraphStyle().getStrutStyle().getStrutEnabled(); }
bool strutForceHeight() const {
@ -226,8 +230,6 @@ private:
void calculateBoundaries();
void markGraphemes16();
void computeEmptyMetrics();
// Input
@ -249,8 +251,10 @@ private:
SkTArray<size_t> fClustersIndexFromCodeUnit;
std::vector<size_t> fWords;
SkTArray<BidiRegion> fBidiRegions;
SkTArray<Grapheme, true> fGraphemes16;
SkTArray<CodepointRepresentation, true> fCodepoints;
// These two arrays are used in measuring methods (getRectsForRange, getGlyphPositionAtCoordinate)
// They are filled lazily whenever they need and cached
SkTArray<TextIndex, true> fUTF8IndexForUTF16Index;
SkTArray<size_t, true> fUTF16IndexForUTF8Index;
size_t fUnresolvedGlyphs;
SkTArray<TextLine, false> fLines; // kFormatted (cached: width, max lines, ellipsis, text align)

View File

@ -353,7 +353,7 @@ bool Cluster::isSoftBreak() const {
}
bool Cluster::isGraphemeBreak() const {
return fMaster->codeUnitHasProperty(fTextRange.end,CodeUnitFlags::kGraphemeBreakBefore);
return fMaster->codeUnitHasProperty(fTextRange.end,CodeUnitFlags::kGraphemeStart);
}
Cluster::Cluster(ParagraphImpl* master,

View File

@ -40,9 +40,6 @@ const SkRange<size_t> EMPTY_CLUSTERS = EMPTY_RANGE;
typedef size_t GraphemeIndex;
typedef SkRange<GraphemeIndex> GraphemeRange;
typedef size_t CodepointIndex;
typedef SkRange<CodepointIndex> CodepointRange;
typedef size_t GlyphIndex;
typedef SkRange<GlyphIndex> GlyphRange;
@ -206,23 +203,6 @@ private:
uint8_t fBidiLevel;
};
struct CodepointRepresentation {
CodepointRepresentation(GraphemeIndex graphemeIndex, TextIndex textIndex, size_t index)
: fGrapheme(graphemeIndex), fTextIndex(textIndex), fIndex(index) { }
GraphemeIndex fGrapheme;
TextIndex fTextIndex; // Used for getGlyphPositionAtCoordinate
size_t fIndex;
};
struct Grapheme {
Grapheme(CodepointRange codepoints, TextRange textRange)
: fCodepointRange(codepoints), fTextRange(textRange) { }
CodepointRange fCodepointRange;
TextRange fTextRange; // Used for getRectsForRange
};
class Cluster {
public:
enum BreakType {

View File

@ -1130,29 +1130,19 @@ PositionWithAffinity TextLine::getGlyphPositionAtCoordinate(SkScalar dx) {
[this, dx, &result, &lookingForHit]
(TextRange textRange, const TextStyle& style, const TextLine::ClipContext& context) {
auto findCodepointByTextIndex = [this](ClusterIndex clusterIndex8) {
auto codepoints = fMaster->codepoints();
auto codepoint = std::lower_bound(
codepoints.begin(), codepoints.end(),
clusterIndex8,
[](const CodepointRepresentation& lhs, size_t rhs) -> bool { return lhs.fTextIndex < rhs; });
return codepoint - codepoints.begin();
};
auto offsetX = this->offset().fX;
if (dx < context.clip.fLeft + offsetX) {
// All the other runs are placed right of this one
auto codepointIndex = findCodepointByTextIndex(context.run->globalClusterIndex(context.pos));
result = { SkToS32(codepointIndex), kDownstream };
auto utf16Index = fMaster->getUTF16Index(context.run->globalClusterIndex(context.pos));
result = { SkToS32(utf16Index), kDownstream };
lookingForHit = false;
return false;
}
if (dx >= context.clip.fRight + offsetX) {
// We have to keep looking ; just in case keep the last one as the closest
auto codepointIndex = findCodepointByTextIndex(context.run->globalClusterIndex(context.pos + context.size));
result = { SkToS32(codepointIndex), kUpstream };
auto utf16Index = fMaster->getUTF16Index(context.run->globalClusterIndex(context.pos + context.size));
result = { SkToS32(utf16Index), kUpstream };
return true;
}
@ -1169,53 +1159,34 @@ PositionWithAffinity TextLine::getGlyphPositionAtCoordinate(SkScalar dx) {
found = index;
}
auto glyphStart = context.run->positionX(found) + context.fTextShift + offsetX;
auto glyphWidth = context.run->positionX(found + 1) - context.run->positionX(found);
auto glyphemeStart = context.run->positionX(found) + context.fTextShift + offsetX;
auto glyphemeWidth = context.run->positionX(found + 1) - context.run->positionX(found);
// Find the grapheme range that contains the point
auto clusterIndex8 = context.run->globalClusterIndex(found);
auto clusterEnd8 = context.run->globalClusterIndex(found + 1);
// Find the grapheme positions in codepoints that contains the point
auto codepointIndex = findCodepointByTextIndex(clusterIndex8);
CodepointRange codepoints(codepointIndex, codepointIndex);
auto masterCodepoints = fMaster->codepoints();
if (context.run->leftToRight()) {
for (codepoints.end = codepointIndex;
codepoints.end < masterCodepoints.size(); ++codepoints.end) {
auto& cp = masterCodepoints[codepoints.end];
if (cp.fTextIndex >= clusterEnd8) {
break;
}
}
} else {
for (codepoints.end = codepointIndex;
codepoints.end > 0; --codepoints.end) {
auto& cp = masterCodepoints[codepoints.end];
if (cp.fTextIndex <= clusterEnd8) {
break;
}
}
std::swap(codepoints.start, codepoints.end);
}
auto graphemeSize = codepoints.width();
auto graphemeStart = fMaster->findGraphemeStart(clusterIndex8);
auto graphemeWidth =
fMaster->findGraphemeStart(clusterEnd8) - graphemeStart;
auto utf16Index = fMaster->getUTF16Index(clusterIndex8);
// We only need to inspect one glyph (maybe not even the entire glyph)
SkScalar center;
bool insideGlyph = false;
if (graphemeSize > 1) {
auto averageCodepointWidth = glyphWidth / graphemeSize;
auto delta = dx - glyphStart;
auto insideIndex = SkScalarFloorToInt(delta / averageCodepointWidth);
insideGlyph = delta > averageCodepointWidth;
center = glyphStart + averageCodepointWidth * insideIndex + averageCodepointWidth / 2;
codepointIndex += insideIndex;
if (graphemeWidth > 1) {
auto averageGlyphWidth = glyphemeWidth / graphemeWidth;
auto delta = dx - glyphemeStart;
auto insideIndex = SkScalarFloorToInt(delta / averageGlyphWidth);
insideGlyph = delta > averageGlyphWidth;
center = glyphemeStart + averageGlyphWidth * insideIndex + averageGlyphWidth / 2;
utf16Index += insideIndex;
} else {
center = glyphStart + glyphWidth / 2;
center = glyphemeStart + glyphemeWidth / 2;
}
if ((dx < center) == context.run->leftToRight() || insideGlyph) {
result = { SkToS32(codepointIndex), kDownstream };
result = { SkToS32(utf16Index), kDownstream };
} else {
result = { SkToS32(codepointIndex + 1), kUpstream };
result = { SkToS32(utf16Index + 1), kUpstream };
}
// No need to continue
lookingForHit = false;

View File

@ -2267,19 +2267,19 @@ DEF_TEST(SkParagraph_GetGlyphPositionAtCoordinateParagraph, reporter) {
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(301, 2.2f).position == 11);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(302, 2.6f).position == 11);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(301, 2.1f).position == 11);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(100000, 20).position == 18);//
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(100000, 20).position == 18);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(450, 20).position == 16);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(100000, 90).position == 36);//
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(100000, 90).position == 36);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(-100000, 90).position == 18);
REPORTER_ASSERT(reporter,
paragraph->getGlyphPositionAtCoordinate(20, -80).position == 1);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(1, 90).position == 18);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(1, 170).position == 36);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(10000, 180).position == 72);//
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(10000, 180).position == 72);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(70, 180).position == 56);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(1, 270).position == 72);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(35, 90).position == 19);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(10000, 10000).position == 77);//
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(10000, 10000).position == 77);
REPORTER_ASSERT(reporter, paragraph->getGlyphPositionAtCoordinate(85, 10000).position == 75);
}