Add nbsp info to the cache

Change-Id: I9d4cd6f3d7c2871c1279c3a7190187bbfee1b56b Reviewed-on: https://skia-review.googlesource.com/c/skia/+/383856 Commit-Queue: Julia Lavrova <jlavrova@google.com> Reviewed-by: Ben Wagner <bungeman@google.com>
2021-03-11 15:47:13 -05:00 · 2021-03-11 15:47:13 -05:00 · 98a76eb6ea
commit 98a76eb6ea
parent 708faba16b
9 changed files with 94 additions and 130 deletions
--- a/modules/skparagraph/samples/SampleParagraph.cpp
+++ b/modules/skparagraph/samples/SampleParagraph.cpp
@ -2575,7 +2575,7 @@ protected:
                         cluster.textRange().start, cluster.textRange().end,
                         cluster.isSoftBreak() ? "soft" :
                         cluster.isHardBreak() ? "hard" :
-                         cluster.isWhitespaces() ? "spaces" : "");
+                         cluster.isWhitespaceBreak() ? "spaces" : "");
            }

            auto lines = impl->lines();
--- a/modules/skparagraph/src/ParagraphImpl.cpp
+++ b/modules/skparagraph/src/ParagraphImpl.cpp
@ -28,11 +28,6 @@ namespace textlayout {

 namespace {

-static inline SkUnichar nextUtf8Unit(const char** ptr, const char* end) {
-    SkUnichar val = SkUTF::NextUTF8(ptr, end);
-    return val < 0 ? 0xFFFD : val;
-}
-
 SkScalar littleRound(SkScalar a) {
    // This rounding is done to match Flutter tests. Must be removed..
    auto val = std::fabs(a);
@ -270,14 +265,20 @@ bool ParagraphImpl::computeCodeUnitProperties() {
        return false;
    }

-    // Get white spaces
-    std::vector<SkUnicode::Position> whitespaces;
-    if (!fUnicode->getWhitespaces(fText.c_str(), fText.size(), &whitespaces)) {
-        return false;
-    }
-    for (auto whitespace : whitespaces) {
-        fCodeUnitProperties[whitespace] |= CodeUnitFlags::kPartOfWhiteSpace;
-    }
+    // Get all spaces
+    fUnicode->forEachCodepoint(fText.c_str(), fText.size(),
+       [this](SkUnichar unichar, int32_t start, int32_t end) {
+            if (fUnicode->isWhitespace(unichar)) {
+                for (auto i = start; i < end; ++i) {
+                    fCodeUnitProperties[i] |=  CodeUnitFlags::kPartOfWhiteSpaceBreak;
+                }
+            }
+            if (fUnicode->isSpace(unichar)) {
+                for (auto i = start; i < end; ++i) {
+                    fCodeUnitProperties[i] |=  CodeUnitFlags::kPartOfIntraWordBreak;
+                }
+            }
+       });

    // Get line breaks
    std::vector<SkUnicode::LineBreakBefore> lineBreaks;
@ -376,7 +377,7 @@ void ParagraphImpl::spaceGlyphs() {

            // Process word spacing
            if (currentStyle->fStyle.getWordSpacing() != 0) {
-                if (cluster->isWhitespaces() && cluster->isSoftBreak()) {
+                if (cluster->isWhitespaceBreak() && cluster->isSoftBreak()) {
                    if (!soFarWhitespacesOnly) {
                        shift += run.addSpacesAtTheEnd(currentStyle->fStyle.getWordSpacing(), cluster);
                    }
@ -387,7 +388,7 @@ void ParagraphImpl::spaceGlyphs() {
                shift += run.addSpacesEvenly(currentStyle->fStyle.getLetterSpacing(), cluster);
            }

-            if (soFarWhitespacesOnly && !cluster->isWhitespaces()) {
+            if (soFarWhitespacesOnly && !cluster->isWhitespaceBreak()) {
                soFarWhitespacesOnly = false;
            }
        });
@ -716,66 +717,6 @@ SkRange<size_t> ParagraphImpl::getWordBoundary(unsigned offset) {
    return { SkToU32(start), SkToU32(end) };
 }

-void ParagraphImpl::forEachCodeUnitPropertyRange(CodeUnitFlags property, CodeUnitRangeVisitor visitor) {
-
-    size_t first = 0;
-    for (size_t i = 1; i < fText.size(); ++i) {
-        auto properties = fCodeUnitProperties[i];
-        if (properties & property) {
-            visitor({first, i});
-            first = i;
-        }
-
-    }
-    visitor({first, fText.size()});
-}
-
-size_t ParagraphImpl::getWhitespacesLength(TextRange textRange) {
-    size_t len = 0;
-    for (auto i = textRange.start; i < textRange.end; ++i) {
-        auto properties = fCodeUnitProperties[i];
-        if (properties & CodeUnitFlags::kPartOfWhiteSpace) {
-            ++len;
-        }
-    }
-    return len;
-}
-
-static bool is_ascii_7bit_space(int c) {
-    SkASSERT(c >= 0 && c <= 127);
-
-    // Extracted from https://en.wikipedia.org/wiki/Whitespace_character
-    //
-    enum WS {
-        kHT    = 9,
-        kLF    = 10,
-        kVT    = 11,
-        kFF    = 12,
-        kCR    = 13,
-        kSP    = 32,    // too big to use as shift
-    };
-#define M(shift)    (1 << (shift))
-    constexpr uint32_t kSpaceMask = M(kHT) | M(kLF) | M(kVT) | M(kFF) | M(kCR);
-    // we check for Space (32) explicitly, since it is too large to shift
-    return (c == kSP) || (c <= 31 && (kSpaceMask & M(c)));
-#undef M
-}
-
-bool ParagraphImpl::isSpace(TextRange textRange) {
-    auto text = ParagraphImpl::text(textRange);
-    const char* ch = text.begin();
-    if (text.end() - ch == 1 && *(unsigned char*)ch <= 0x7F) {
-        return is_ascii_7bit_space(*ch);
-    }
-    while (ch != text.end()) {
-        SkUnichar unicode = nextUtf8Unit(&ch, text.end());
-        if (!fUnicode->isSpace(unicode)) {
-            return false;
-        }
-    }
-    return true;
-}
-
 void ParagraphImpl::getLineMetrics(std::vector<LineMetrics>& metrics) {
    metrics.clear();
    for (auto& line : fLines) {
--- a/modules/skparagraph/src/ParagraphImpl.h
+++ b/modules/skparagraph/src/ParagraphImpl.h
@ -37,11 +37,12 @@ namespace skia {
 namespace textlayout {

 enum CodeUnitFlags {
-    kNoCodeUnitFlag = 0x0,
-    kPartOfWhiteSpace = 0x1,
-    kGraphemeStart = 0x2,
-    kSoftLineBreakBefore = 0x4,
-    kHardLineBreakBefore = 0x8,
+    kNoCodeUnitFlag = 0x00,
+    kPartOfWhiteSpaceBreak = 0x01,
+    kGraphemeStart = 0x02,
+    kSoftLineBreakBefore = 0x04,
+    kHardLineBreakBefore = 0x08,
+    kPartOfIntraWordBreak = 0x10,
 };
 }  // namespace textlayout
 }  // namespace skia
@ -217,11 +218,6 @@ public:
        }
    }

-    using CodeUnitRangeVisitor = std::function<bool(TextRange textRange)>;
-    void forEachCodeUnitPropertyRange(CodeUnitFlags property, CodeUnitRangeVisitor visitor);
-    size_t getWhitespacesLength(TextRange textRange);
-    bool isSpace(TextRange textRange);
-
    bool codeUnitHasProperty(size_t index, CodeUnitFlags property) const { return (fCodeUnitProperties[index] & property) == property; }

    SkUnicode* getUnicode() { return fUnicode.get(); }
--- a/modules/skparagraph/src/Run.cpp
+++ b/modules/skparagraph/src/Run.cpp
@ -366,6 +366,26 @@ bool Cluster::isGraphemeBreak() const {
    return fOwner->codeUnitHasProperty(fTextRange.end, CodeUnitFlags::kGraphemeStart);
 }

+static bool is_ascii_7bit_space(int c) {
+    SkASSERT(c >= 0 && c <= 127);
+
+    // Extracted from https://en.wikipedia.org/wiki/Whitespace_character
+    //
+    enum WS {
+        kHT    = 9,
+        kLF    = 10,
+        kVT    = 11,
+        kFF    = 12,
+        kCR    = 13,
+        kSP    = 32,    // too big to use as shift
+    };
+#define M(shift)    (1 << (shift))
+    constexpr uint32_t kSpaceMask = M(kHT) | M(kLF) | M(kVT) | M(kFF) | M(kCR);
+    // we check for Space (32) explicitly, since it is too large to shift
+    return (c == kSP) || (c <= 31 && (kSpaceMask & M(c)));
+#undef M
+}
+
 Cluster::Cluster(ParagraphImpl* owner,
        RunIndex runIndex,
        size_t start,
@ -383,9 +403,28 @@ Cluster::Cluster(ParagraphImpl* owner,
        , fSpacing(0)
        , fHeight(height)
        , fHalfLetterSpacing(0.0) {
-    size_t len = fOwner->getWhitespacesLength(fTextRange);
-    fIsWhiteSpaces = (len == this->fTextRange.width());
-    fIsSpaces = fOwner->isSpace(fTextRange);
+    size_t whiteSpacesBreakLen = 0;
+    size_t intraWordBreakLen = 0;
+
+    const char* ch = text.begin();
+    if (text.end() - ch == 1 && *(unsigned char*)ch <= 0x7F) {
+        // I am not even sure it's worth it if we do not save a unicode call
+        if (is_ascii_7bit_space(*ch)) {
+            ++whiteSpacesBreakLen;
+        }
+    } else {
+        for (auto i = fTextRange.start; i < fTextRange.end; ++i) {
+            if (fOwner->codeUnitHasProperty(i, CodeUnitFlags::kPartOfWhiteSpaceBreak)) {
+                ++whiteSpacesBreakLen;
+            }
+            if (fOwner->codeUnitHasProperty(i, CodeUnitFlags::kPartOfIntraWordBreak)) {
+                ++intraWordBreakLen;
+            }
+        }
+    }
+
+    fIsWhiteSpaceBreak = whiteSpacesBreakLen == fTextRange.width();
+    fIsIntraWordBreak = intraWordBreakLen == fTextRange.width();
    fIsHardBreak = fOwner->codeUnitHasProperty(fTextRange.end, CodeUnitFlags::kHardLineBreakBefore);
 }

--- a/modules/skparagraph/src/Run.h
+++ b/modules/skparagraph/src/Run.h
@ -247,9 +247,10 @@ public:
        fWidth += shift;
    }

-    bool isWhitespaces() const { return fIsWhiteSpaces; }
-    bool isSpaces() const { return fIsSpaces; }
+    bool isWhitespaceBreak() const { return fIsWhiteSpaceBreak; }
+    bool isIntraWordBreak() const { return fIsIntraWordBreak; }
    bool isHardBreak() const { return fIsHardBreak; }
+
    bool isSoftBreak() const;
    bool isGraphemeBreak() const;
    bool canBreakLineAfter() const { return isHardBreak() || isSoftBreak(); }
@ -298,8 +299,9 @@ private:
    SkScalar fSpacing;
    SkScalar fHeight;
    SkScalar fHalfLetterSpacing;
-    bool fIsWhiteSpaces;
-    bool fIsSpaces;
+
+    bool fIsWhiteSpaceBreak;
+    bool fIsIntraWordBreak;
    bool fIsHardBreak;
 };

--- a/modules/skparagraph/src/TextLine.cpp
+++ b/modules/skparagraph/src/TextLine.cpp
@ -415,7 +415,7 @@ void TextLine::justify(SkScalar maxWidth) {
    bool whitespacePatch = false;
    this->iterateThroughClustersInGlyphsOrder(false, false,
        [&whitespacePatches, &textLen, &whitespacePatch](const Cluster* cluster, bool ghost) {
-            if (cluster->isWhitespaces()) {
+            if (cluster->isWhitespaceBreak()) {
                if (!whitespacePatch) {
                    whitespacePatch = true;
                    ++whitespacePatches;
@ -448,7 +448,7 @@ void TextLine::justify(SkScalar maxWidth) {
        }

        auto prevShift = shift;
-        if (cluster->isWhitespaces()) {
+        if (cluster->isWhitespaceBreak()) {
            if (!whitespacePatch) {
                shift += step;
                whitespacePatch = true;
--- a/modules/skparagraph/src/TextWrapper.cpp
+++ b/modules/skparagraph/src/TextWrapper.cpp
@ -36,7 +36,7 @@ void TextWrapper::lookAhead(SkScalar maxWidth, Cluster* endOfClusters) {
        auto roundedWidth = littleRound(width);
        if (cluster->isHardBreak()) {
        } else if (roundedWidth > maxWidth) {
-            if (cluster->isWhitespaces()) {
+            if (cluster->isWhitespaceBreak()) {
                // It's the end of the word
                fClusters.extend(cluster);
                fMinIntrinsicWidth = std::max(fMinIntrinsicWidth, getClustersTrimmedWidth());
@ -64,7 +64,7 @@ void TextWrapper::lookAhead(SkScalar maxWidth, Cluster* endOfClusters) {
            SkScalar nextWordLength = fClusters.width();
            SkScalar nextShortWordLength = nextWordLength;
            for (auto further = cluster; further != endOfClusters; ++further) {
-                if (further->isSoftBreak() || further->isHardBreak() || further->isWhitespaces()) {
+                if (further->isSoftBreak() || further->isHardBreak() || further->isWhitespaceBreak()) {
                    break;
                }
                if (further->run().isPlaceholder()) {
@ -72,7 +72,7 @@ void TextWrapper::lookAhead(SkScalar maxWidth, Cluster* endOfClusters) {
                  break;
                }

-                if (further->isSpaces() && nextWordLength <= maxWidth) {
+                if (nextWordLength > 0 && nextWordLength <= maxWidth && further->isIntraWordBreak()) {
                    // The cluster is spaces but not the end of the word in a normal sense
                    nextNonBreakingSpace = further;
                    nextShortWordLength = nextWordLength;
@ -179,7 +179,7 @@ void TextWrapper::trimEndSpaces(TextAlign align) {
    fEndLine.saveBreak();
    // Skip all space cluster at the end
    for (auto cluster = fEndLine.endCluster();
-         cluster >= fEndLine.startCluster() && cluster->isWhitespaces();
+         cluster >= fEndLine.startCluster() && cluster->isWhitespaceBreak();
         --cluster) {
        fEndLine.trim(cluster);
    }
@ -195,7 +195,7 @@ SkScalar TextWrapper::getClustersTrimmedWidth() {
            continue;
        }
        if (trailingSpaces) {
-            if (!cluster->isWhitespaces()) {
+            if (!cluster->isWhitespaceBreak()) {
                width += cluster->trimmedWidth(cluster->endPos());
                trailingSpaces = false;
            }
@ -213,7 +213,7 @@ std::tuple<Cluster*, size_t, SkScalar> TextWrapper::trimStartSpaces(Cluster* end
        // End of line is always end of cluster, but need to skip \n
        auto width = fEndLine.width();
        auto cluster = fEndLine.endCluster() + 1;
-        while (cluster < fEndLine.breakCluster() && cluster->isWhitespaces()) {
+        while (cluster < fEndLine.breakCluster() && cluster->isWhitespaceBreak()) {
            width += cluster->width();
            ++cluster;
        }
@ -224,7 +224,7 @@ std::tuple<Cluster*, size_t, SkScalar> TextWrapper::trimStartSpaces(Cluster* end
    // It's a soft line break so we need to move lineStart forward skipping all the spaces
    auto width = fEndLine.widthWithGhostSpaces();
    auto cluster = fEndLine.breakCluster() + 1;
-    while (cluster < endOfClusters && cluster->isWhitespaces()) {
+    while (cluster < endOfClusters && cluster->isWhitespaceBreak()) {
        width += cluster->width();
        ++cluster;
    }
@ -391,7 +391,7 @@ void TextWrapper::breakTextIntoLines(ParagraphImpl* parent,
                softLineMaxIntrinsicWidth = 0;
                fMinIntrinsicWidth = std::max(fMinIntrinsicWidth, lastWordLength);
                lastWordLength = 0;
-            } else if (cluster->isWhitespaces()) {
+            } else if (cluster->isWhitespaceBreak()) {
                // Whitespaces end the word
                softLineMaxIntrinsicWidth += cluster->width();
                fMinIntrinsicWidth = std::max(fMinIntrinsicWidth, lastWordLength);
--- a/modules/skshaper/src/SkUnicode.h
+++ b/modules/skshaper/src/SkUnicode.h
@ -134,8 +134,19 @@ class SKUNICODE_API SkUnicode {
               (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
        virtual bool getGraphemes
               (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
-        virtual bool getWhitespaces
-               (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
+
+        template <typename Callback>
+        void forEachCodepoint(const char* utf8, int32_t utf8Units, Callback&& callback) {
+            const char* current = utf8;
+            const char* end = utf8 + utf8Units;
+            while (current < end) {
+                auto before = current - utf8;
+                SkUnichar unichar = SkUTF::NextUTF8(&current, end);
+                if (unichar < 0) unichar = 0xFFFD;
+                auto after = current - utf8;
+                callback(unichar, before, after);
+            }
+        }

        virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0;

--- a/modules/skshaper/src/SkUnicode_icu.cpp
+++ b/modules/skshaper/src/SkUnicode_icu.cpp
@ -381,26 +381,6 @@ class SkUnicode_icu : public SkUnicode {
        return true;
    }

-    static bool extractWhitespaces(const char utf8[],
-                                   int utf8Units,
-                                   std::vector<Position>* whitespaces) {
-
-        const char* start = utf8;
-        const char* end = utf8 + utf8Units;
-        const char* ch = start;
-        while (ch < end) {
-            auto index = ch - start;
-            auto unichar = utf8_next(&ch, end);
-            if (u_isWhitespace(unichar)) {
-                auto ending = ch - start;
-                for (auto k = index; k < ending; ++k) {
-                  whitespaces->emplace_back(k);
-                }
-            }
-        }
-        return true;
-    }
-
    static int utf8ToUtf16(const char* utf8, size_t utf8Units, std::unique_ptr<uint16_t[]>* utf16) {
        int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
        if (utf16Units < 0) {
@ -516,11 +496,6 @@ public:
        });
    }

-    bool getWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* results) override {
-
-        return extractWhitespaces(utf8, utf8Units, results);
-    }
-
    void reorderVisual(const BidiLevel runLevels[],
                       int levelsCount,
                       int32_t logicalFromVisual[]) override {