Revert "Reland "ICU API: only in SkParagraph, simplified (relanding reverted).""

This reverts commit 16fbc24772. Reason for revert: Checking to see if this is blocking the G3 roll Original change's description: > Reland "ICU API: only in SkParagraph, simplified (relanding reverted)." > > This reverts commit a30095d17c. > > Reason for revert: Fixing the build > > Original change's description: > > Revert "ICU API: only in SkParagraph, simplified (relanding reverted)." > > > > This reverts commit 7479eda3b6. > > > > Reason for revert: Breaking build > > > > Original change's description: > > > ICU API: only in SkParagraph, simplified (relanding reverted). > > > > > > Reverted commit: https://skia-review.googlesource.com/c/skia/+/296128/ > > > > > > Change-Id: Iaf793bff94a6060579c7d6176d477e598c047be6 > > > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303261 > > > Reviewed-by: Mike Reed <reed@google.com> > > > Commit-Queue: Julia Lavrova <jlavrova@google.com> > > > > TBR=reed@google.com,jlavrova@google.com > > > > Change-Id: Idd4c41e22aa59e24bdbd07f2fa5e9258c1bbb7a7 > > No-Presubmit: true > > No-Tree-Checks: true > > No-Try: true > > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303358 > > Reviewed-by: Julia Lavrova <jlavrova@google.com> > > Commit-Queue: Julia Lavrova <jlavrova@google.com> > > TBR=reed@google.com,jlavrova@google.com > > Change-Id: Iea5da4535ea2e388e8e632e6c556b66c8781631a > Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303377 > Reviewed-by: Ben Wagner <bungeman@google.com> > Reviewed-by: Julia Lavrova <jlavrova@google.com> > Commit-Queue: Julia Lavrova <jlavrova@google.com> TBR=bungeman@google.com,reed@google.com,jlavrova@google.com Change-Id: I1edfecc56add670b251adf44892265088fd32c42 No-Presubmit: true No-Tree-Checks: true No-Try: true Reviewed-on: https://skia-review.googlesource.com/c/skia/+/304058 Reviewed-by: Robert Phillips <robertphillips@google.com> Commit-Queue: Robert Phillips <robertphillips@google.com>
2020-07-20 16:38:59 +00:00 · 2020-07-20 16:38:59 +00:00 · e36a466723
commit e36a466723
parent aace25e473
15 changed files with 282 additions and 473 deletions
--- a/modules/skparagraph/src/OneLineShaper.cpp
+++ b/modules/skparagraph/src/OneLineShaper.cpp
@ -2,13 +2,23 @@

 #include "modules/skparagraph/src/Iterators.h"
 #include "modules/skparagraph/src/OneLineShaper.h"
-#include "modules/skparagraph/src/ParagraphUtil.h"
+#include <unicode/uchar.h>
 #include <algorithm>
 #include <unordered_set>
+#include "src/utils/SkUTF.h"

 namespace skia {
 namespace textlayout {

+namespace {
+
+SkUnichar utf8_next(const char** ptr, const char* end) {
+    SkUnichar val = SkUTF::NextUTF8(ptr, end);
+    return val < 0 ? 0xFFFD : val;
+}
+
+}
+
 void OneLineShaper::commitRunBuffer(const RunInfo&) {

    fCurrentRun->commit();
@ -303,8 +313,8 @@ void OneLineShaper::sortOutGlyphs(std::function<void(GlyphRange)>&& sortOutUnres
            block.end = i;
        } else {
            const char* cluster = text.begin() + clusterIndex(i);
-            SkUnichar codepoint = nextUtf8Unit(&cluster, text.end());
-            if (isControl(codepoint)) {
+            SkUnichar codepoint = utf8_next(&cluster, text.end());
+            if (u_iscntrl(codepoint)) {
                // This codepoint does not have to be resolved; let's pretend it's resolved
                if (block.start == EMPTY_INDEX) {
                    // Keep skipping resolved code points
@ -409,7 +419,7 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,
            // We have the global cache for all already found typefaces for SkUnichar
            // but we still need to keep track of all SkUnichars used in this unresolved block
            SkTHashSet<SkUnichar> alreadyTried;
-            SkUnichar unicode = nextUtf8Unit(&ch, unresolvedText.end());
+            SkUnichar unicode = utf8_next(&ch, unresolvedText.end());
            while (true) {

                sk_sp<SkTypeface> typeface;
@ -447,7 +457,7 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,

                // We can stop here or we can switch to another DIFFERENT codepoint
                while (ch != unresolvedText.end()) {
-                    unicode = nextUtf8Unit(&ch, unresolvedText.end());
+                    unicode = utf8_next(&ch, unresolvedText.end());
                    auto found = alreadyTried.find(unicode);
                    if (found == nullptr) {
                        alreadyTried.add(unicode);
@ -462,6 +472,10 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,

 bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {

+    if (!fParagraph->getBidiRegions()) {
+        return false;
+    }
+
    size_t bidiIndex = 0;

    SkScalar advanceX = 0;
@ -471,8 +485,8 @@ bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {
            // Shape the text by bidi regions
            while (bidiIndex < fParagraph->fBidiRegions.size()) {
                BidiRegion& bidiRegion = fParagraph->fBidiRegions[bidiIndex];
-                auto start = std::max(bidiRegion.start, placeholder.fTextBefore.start);
-                auto end = std::min(bidiRegion.end, placeholder.fTextBefore.end);
+                auto start = std::max(bidiRegion.text.start, placeholder.fTextBefore.start);
+                auto end = std::min(bidiRegion.text.end, placeholder.fTextBefore.end);

                // Set up the iterators (the style iterator points to a bigger region that it could
                TextRange textRange(start, end);
@ -480,11 +494,11 @@ bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {
                SkSpan<Block> styleSpan(fParagraph->blocks(blockRange));

                // Shape the text between placeholders
-                if (!shape(textRange, styleSpan, advanceX, start, bidiRegion.level)) {
+                if (!shape(textRange, styleSpan, advanceX, start, bidiRegion.direction)) {
                    return false;
                }

-                if (end == bidiRegion.end) {
+                if (end == bidiRegion.text.end) {
                    ++bidiIndex;
                } else /*if (end == placeholder.fTextBefore.end)*/ {
                    break;
--- a/modules/skparagraph/src/ParagraphCache.cpp
+++ b/modules/skparagraph/src/ParagraphCache.cpp
@ -50,7 +50,7 @@ public:
    // ICU results
    SkTArray<CodeUnitFlags> fCodeUnitProperties;
    std::vector<size_t> fWords;
-    std::vector<BidiRegion> fBidiRegions;
+    SkTArray<BidiRegion> fBidiRegions;
    SkTArray<TextIndex, true> fUTF8IndexForUTF16Index;
    SkTArray<size_t, true> fUTF16IndexForUTF8Index;
 };
--- a/modules/skparagraph/src/ParagraphImpl.cpp
+++ b/modules/skparagraph/src/ParagraphImpl.cpp
@ -25,6 +25,12 @@
 #endif

 #include <math.h>
+#include <unicode/ubidi.h>
+#include <unicode/uloc.h>
+#include <unicode/umachine.h>
+#include <unicode/ustring.h>
+#include <unicode/utext.h>
+#include <unicode/utypes.h>
 #include <algorithm>
 #include <utility>

@ -34,6 +40,9 @@ namespace textlayout {

 namespace {

+using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>>;
+using ICUBiDi  = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close), ubidi_close>>;
+
 SkScalar littleRound(SkScalar a) {
    // This rounding is done to match Flutter tests. Must be removed..
    auto val = std::fabs(a);
@ -45,6 +54,13 @@ SkScalar littleRound(SkScalar a) {
        return SkScalarFloorToScalar(a);
    }
 }
+
+/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
+static inline SkUnichar utf8_next(const char** ptr, const char* end) {
+    SkUnichar val = SkUTF::NextUTF8(ptr, end);
+    return val < 0 ? 0xFFFD : val;
+}
+
 }

 TextRange operator*(const TextRange& a, const TextRange& b) {
@ -83,7 +99,6 @@ ParagraphImpl::ParagraphImpl(const SkString& text,
        , fOldWidth(0)
        , fOldHeight(0)
        , fOrigin(SkRect::MakeEmpty()) {
-    fICU = ::skia::SkUnicode::Make();
 }

 ParagraphImpl::ParagraphImpl(const std::u16string& utf16text,
@ -130,7 +145,7 @@ void ParagraphImpl::layout(SkScalar rawWidth) {
        this->fCodeUnitProperties.reset();
        this->fCodeUnitProperties.push_back_n(fText.size() + 1, CodeUnitFlags::kNoCodeUnitFlag);
        this->fWords.clear();
-        this->fBidiRegions.clear();
+        this->fBidiRegions.reset();
        this->fUTF8IndexForUTF16Index.reset();
        this->fUTF16IndexForUTF8Index.reset();
        this->fRuns.reset();
@ -229,6 +244,72 @@ void ParagraphImpl::resetContext() {
    fExceededMaxLines = false;
 }

+class TextBreaker {
+public:
+    TextBreaker() : fInitialized(false), fPos(-1) {}
+
+    bool initialize(SkSpan<const char> text, UBreakIteratorType type) {
+
+        UErrorCode status = U_ZERO_ERROR;
+        fIterator = nullptr;
+        fSize = text.size();
+        UText sUtf8UText = UTEXT_INITIALIZER;
+        std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>> utf8UText(
+            utext_openUTF8(&sUtf8UText, text.begin(), text.size(), &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Could not create utf8UText: %s", u_errorName(status));
+            return false;
+        }
+        fIterator.reset(ubrk_open(type, "en", nullptr, 0, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Could not create line break iterator: %s", u_errorName(status));
+            SK_ABORT("");
+        }
+
+        ubrk_setUText(fIterator.get(), utf8UText.get(), &status);
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Could not setText on break iterator: %s", u_errorName(status));
+            return false;
+        }
+
+        fInitialized = true;
+        fPos = 0;
+        return true;
+    }
+
+    bool initialized() const { return fInitialized; }
+
+    size_t first() {
+        fPos = ubrk_first(fIterator.get());
+        return eof() ? fSize : fPos;
+    }
+
+    size_t next() {
+        fPos = ubrk_next(fIterator.get());
+        return eof() ? fSize : fPos;
+    }
+
+    size_t preceding(size_t offset) {
+        auto pos = ubrk_preceding(fIterator.get(), offset);
+        return pos == UBRK_DONE ? 0 : pos;
+    }
+
+    size_t following(size_t offset) {
+        auto pos = ubrk_following(fIterator.get(), offset);
+        return pos == UBRK_DONE ? fSize : pos;
+    }
+
+    int32_t status() { return ubrk_getRuleStatus(fIterator.get()); }
+
+    bool eof() { return fPos == UBRK_DONE; }
+
+private:
+    std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), ubrk_close>> fIterator;
+    bool fInitialized;
+    int32_t fPos;
+    size_t fSize;
+};
+
 // shapeTextIntoEndlessLine is the thing that calls this method
 // (that contains all ICU dependencies except for words)
 bool ParagraphImpl::computeCodeUnitProperties() {
@ -239,41 +320,165 @@ bool ParagraphImpl::computeCodeUnitProperties() {
    }
    #endif

-    // Get bidi regions
-    Direction textDirection = fParagraphStyle.getTextDirection() == TextDirection::kLtr
-                              ? Direction::kLTR
-                              : Direction::kRTL;
-    if (!fICU->getBidiRegions(fText.c_str(), fText.size(), textDirection, &fBidiRegions)) {
+    {
+        const char* start = fText.c_str();
+        const char* end = start + fText.size();
+        const char* ch = start;
+        while (ch < end) {
+            auto index = ch - start;
+            auto unichar = utf8_next(&ch, end);
+            if (u_isWhitespace(unichar)) {
+                auto ending = ch - start;
+                for (auto k = index; k < ending; ++k) {
+                  fCodeUnitProperties[k] |= CodeUnitFlags::kPartOfWhiteSpace;
+                }
+            }
+        }
+    }
+    {
+        TextBreaker breaker;
+        if (!breaker.initialize(this->text(), UBRK_LINE)) {
+            return false;
+        }
+        while (!breaker.eof()) {
+            size_t currentPos = breaker.next();
+          fCodeUnitProperties[currentPos] |=
+              breaker.status() == UBRK_LINE_HARD ? CodeUnitFlags::kHardLineBreakBefore : CodeUnitFlags::kSoftLineBreakBefore;
+        }
+    }
+    {
+        TextBreaker breaker;
+        if (!breaker.initialize(this->text(), UBRK_CHARACTER)) {
+            return false;
+        }
+
+        while (!breaker.eof()) {
+            auto currentPos = breaker.next();
+          fCodeUnitProperties[currentPos] |= CodeUnitFlags::kGraphemeStart;
+        }
+    }
+
+    return true;
+}
+
+// getWordBoundary is the thing that calls this method lazily
+bool ParagraphImpl::computeWords() {
+
+    if (!fWords.empty()) {
+        return true;
+    }
+
+    UErrorCode errorCode = U_ZERO_ERROR;
+
+    auto iter = ubrk_open(UBRK_WORD, uloc_getDefault(), nullptr, 0, &errorCode);
+    if (U_FAILURE(errorCode)) {
+        SkDEBUGF("Could not create line break iterator: %s", u_errorName(errorCode));
        return false;
    }

-    // Get white spaces
-    std::vector<Position> whitespaces;
-    if (!fICU->getWhitespaces(fText.c_str(), fText.size(), &whitespaces)) {
+    // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
+    int32_t utf16Units;
+    u_strFromUTF8(nullptr, 0, &utf16Units, fText.c_str(), fText.size(), &errorCode);
+    errorCode = U_ZERO_ERROR;
+    std::unique_ptr<UChar[]> utf16(new UChar[utf16Units]);
+    u_strFromUTF8(utf16.get(), utf16Units, nullptr, fText.c_str(), fText.size(), &errorCode);
+    if (U_FAILURE(errorCode)) {
+        SkDEBUGF("Invalid utf8 input: %s", u_errorName(errorCode));
        return false;
    }
-    for (auto whitespace : whitespaces) {
-        fCodeUnitProperties[whitespace] |= CodeUnitFlags::kPartOfWhiteSpace;
-    }

-    // Get line breaks
-    std::vector<LineBreakBefore> lineBreaks;
-    if (!fICU->getLineBreaks(fText.c_str(), fText.size(), &lineBreaks)) {
+    UText sUtf16UText = UTEXT_INITIALIZER;
+    ICUUText utf8UText(utext_openUChars(&sUtf16UText, utf16.get(), utf16Units, &errorCode));
+    if (U_FAILURE(errorCode)) {
+        SkDEBUGF("Could not create utf8UText: %s", u_errorName(errorCode));
        return false;
    }
-    for (auto& lineBreak : lineBreaks) {
-        fCodeUnitProperties[lineBreak.pos] |= lineBreak.breakType == LineBreakType::kHardLineBreak
-                                           ? CodeUnitFlags::kHardLineBreakBefore
-                                           : CodeUnitFlags::kSoftLineBreakBefore;
-    }

-    // Get graphemes
-    std::vector<Position> graphemes;
-    if (!fICU->getGraphemes(fText.c_str(), fText.size(), &graphemes)) {
+    ubrk_setUText(iter, utf8UText.get(), &errorCode);
+    if (U_FAILURE(errorCode)) {
+        SkDEBUGF("Could not setText on break iterator: %s", u_errorName(errorCode));
        return false;
    }
-    for (auto pos : graphemes) {
-        fCodeUnitProperties[pos] |= CodeUnitFlags::kGraphemeStart;
+
+    int32_t pos = ubrk_first(iter);
+    while (pos != UBRK_DONE) {
+        fWords.emplace_back(pos);
+        pos = ubrk_next(iter);
+    }
+
+    return true;
+}
+
+bool ParagraphImpl::getBidiRegions() {
+
+    if (!fBidiRegions.empty()) {
+        return true;
+    }
+
+    // ubidi only accepts utf16 (though internally it basically works on utf32 chars).
+    // We want an ubidi_setPara(UBiDi*, UText*, UBiDiLevel, UBiDiLevel*, UErrorCode*);
+    size_t utf8Bytes = fText.size();
+    const char* utf8 = fText.c_str();
+    uint8_t bidiLevel = fParagraphStyle.getTextDirection() == TextDirection::kLtr
+                            ? UBIDI_LTR
+                            : UBIDI_RTL;
+    if (!SkTFitsIn<int32_t>(utf8Bytes)) {
+        SkDEBUGF("Bidi error: text too long");
+        return false;
+    }
+
+    // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
+    UErrorCode status = U_ZERO_ERROR;
+    int32_t utf16Units;
+    u_strFromUTF8(nullptr, 0, &utf16Units, utf8, utf8Bytes, &status);
+    status = U_ZERO_ERROR;
+    std::unique_ptr<UChar[]> utf16(new UChar[utf16Units]);
+    u_strFromUTF8(utf16.get(), utf16Units, nullptr, utf8, utf8Bytes, &status);
+    if (U_FAILURE(status)) {
+        SkDEBUGF("Invalid utf8 input: %s", u_errorName(status));
+        return false;
+    }
+
+    ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status));
+    if (U_FAILURE(status)) {
+        SkDEBUGF("Bidi error: %s", u_errorName(status));
+        return false;
+    }
+    SkASSERT(bidi);
+
+    // The required lifetime of utf16 isn't well documented.
+    // It appears it isn't used after ubidi_setPara except through ubidi_getText.
+    ubidi_setPara(bidi.get(), utf16.get(), utf16Units, bidiLevel, nullptr, &status);
+    if (U_FAILURE(status)) {
+        SkDEBUGF("Bidi error: %s", u_errorName(status));
+        return false;
+    }
+
+    SkTArray<BidiRegion> bidiRegions;
+    const char* start8 = utf8;
+    const char* end8 = utf8 + utf8Bytes;
+    TextRange textRange(0, 0);
+    UBiDiLevel currentLevel = 0;
+
+    int32_t pos16 = 0;
+    int32_t end16 = ubidi_getLength(bidi.get());
+    while (pos16 < end16) {
+        auto level = ubidi_getLevelAt(bidi.get(), pos16);
+        if (pos16 == 0) {
+            currentLevel = level;
+        } else if (level != currentLevel) {
+            textRange.end = start8 - utf8;
+            fBidiRegions.emplace_back(textRange.start, textRange.end, currentLevel);
+            currentLevel = level;
+            textRange = TextRange(textRange.end, textRange.end);
+        }
+        SkUnichar u = utf8_next(&start8, end8);
+        pos16 += SkUTF::ToUTF16(u);
+    }
+
+    textRange.end = start8 - utf8;
+    if (!textRange.empty()) {
+        fBidiRegions.emplace_back(textRange.start, textRange.end, currentLevel);
    }

    return true;
@ -678,23 +883,21 @@ PositionWithAffinity ParagraphImpl::getGlyphPositionAtCoordinate(SkScalar dx, Sk
 // By "glyph" they mean a character index - indicated by Minikin's code
 SkRange<size_t> ParagraphImpl::getWordBoundary(unsigned offset) {

-    if (fWords.empty()) {
-        if (!fICU->getWords(fText.c_str(), fText.size(), &fWords)) {
-            return {0, 0 };
-        }
+    if (!computeWords()) {
+        return {0, 0 };
    }

    int32_t start = 0;
    int32_t end = 0;
    for (size_t i = 0; i < fWords.size(); ++i) {
-        auto word = fWords[i];
-        if (word <= offset) {
-            start = word;
-            end = word;
-        } else if (word > offset) {
-            end = word;
-            break;
-        }
+      auto word = fWords[i];
+      if (word <= offset) {
+        start = word;
+        end = word;
+      } else if (word > offset) {
+        end = word;
+        break;
+      }
    }

    //SkDebugf("getWordBoundary(%d): %d - %d\n", offset, start, end);
@ -777,7 +980,7 @@ void ParagraphImpl::setState(InternalState state) {
            fCodeUnitProperties.reset();
            fCodeUnitProperties.push_back_n(fText.size() + 1, kNoCodeUnitFlag);
            fWords.clear();
-            fBidiRegions.clear();
+            fBidiRegions.reset();
            fUTF8IndexForUTF16Index.reset();
            fUTF16IndexForUTF8Index.reset();
            [[fallthrough]];
--- a/modules/skparagraph/src/ParagraphImpl.h
+++ b/modules/skparagraph/src/ParagraphImpl.h
@ -23,9 +23,9 @@
 #include "modules/skparagraph/include/TextShadow.h"
 #include "modules/skparagraph/include/TextStyle.h"
 #include "modules/skparagraph/src/Run.h"
-#include "modules/skshaper/src/SkUnicode.h"
 #include "src/core/SkSpan.h"

+#include <unicode/ubrk.h>
 #include <memory>
 #include <string>
 #include <vector>
@ -83,14 +83,14 @@ struct ResolvedFontDescriptor {
    SkFont fFont;
    TextIndex fTextStart;
 };
-/*
+
 struct BidiRegion {
    BidiRegion(size_t start, size_t end, uint8_t dir)
        : text(start, end), direction(dir) { }
    TextRange text;
    uint8_t direction;
 };
-*/
+
 class ParagraphImpl final : public Paragraph {

 public:
@ -186,6 +186,8 @@ public:
    void resolveStrut();

    bool computeCodeUnitProperties();
+    bool computeWords();
+    bool getBidiRegions();

    void buildClusterTable();
    void spaceGlyphs();
@ -217,8 +219,6 @@ public:

    bool codeUnitHasProperty(size_t index, CodeUnitFlags property) const { return (fCodeUnitProperties[index] & property) == property; }

-    SkUnicode* getICU() { return fICU.get(); }
-
 private:
    friend class ParagraphBuilder;
    friend class ParagraphCacheKey;
@ -250,7 +250,7 @@ private:
    SkTArray<CodeUnitFlags> fCodeUnitProperties;
    SkTArray<size_t> fClustersIndexFromCodeUnit;
    std::vector<size_t> fWords;
-    std::vector<BidiRegion> fBidiRegions;
+    SkTArray<BidiRegion> fBidiRegions;
    // These two arrays are used in measuring methods (getRectsForRange, getGlyphPositionAtCoordinate)
    // They are filled lazily whenever they need and cached
    SkTArray<TextIndex, true> fUTF8IndexForUTF16Index;
@ -269,8 +269,6 @@ private:
    SkScalar fOldHeight;
    SkScalar fMaxWidthWithTrailingSpaces;
    SkRect fOrigin;
-
-    std::unique_ptr<SkUnicode> fICU;
 };
 }  // namespace textlayout
 }  // namespace skia
--- a/modules/skparagraph/src/ParagraphUtil.cpp
+++ b/modules/skparagraph/src/ParagraphUtil.cpp
@ -4,10 +4,8 @@
 #include "include/core/SkTypes.h"
 #include "include/private/SkTo.h"
 #include "modules/skparagraph/src/ParagraphUtil.h"
-#include "src/utils/SkUTF.h"

 #include <unicode/umachine.h>
-#include <unicode/uchar.h>
 #include <unicode/ustring.h>
 #include <unicode/utypes.h>
 #include <string>
@ -32,14 +30,5 @@ SkString SkStringFromU16String(const std::u16string& utf16text) {
    return dst;
 }

-SkUnichar nextUtf8Unit(const char** ptr, const char* end) {
-    SkUnichar val = SkUTF::NextUTF8(ptr, end);
-    return val < 0 ? 0xFFFD : val;
-}
-
-bool isControl(SkUnichar utf8) {
-    return u_iscntrl(utf8);
-}
-
 }
 }
--- a/modules/skparagraph/src/ParagraphUtil.h
+++ b/modules/skparagraph/src/ParagraphUtil.h
@ -8,8 +8,6 @@
 namespace skia {
 namespace textlayout {
 SkString SkStringFromU16String(const std::u16string& utf16text);
-SkUnichar nextUtf8Unit(const char** ptr, const char* end);
-bool isControl(SkUnichar utf8);
 }
 }

--- a/modules/skparagraph/src/TextLine.cpp
+++ b/modules/skparagraph/src/TextLine.cpp
@ -21,6 +21,7 @@
 #include "modules/skshaper/include/SkShaper.h"
 #include "src/core/SkSpan.h"

+#include <unicode/ubidi.h>
 #include <algorithm>
 #include <iterator>
 #include <limits>
@ -130,20 +131,21 @@ TextLine::TextLine(ParagraphImpl* master,

    // This is just chosen to catch the common/fast cases. Feel free to tweak.
    constexpr int kPreallocCount = 4;
-    SkAutoSTArray<kPreallocCount, BidiLevel> runLevels(numRuns);
+
+    SkAutoSTArray<kPreallocCount, UBiDiLevel> runLevels(numRuns);
+
    size_t runLevelsIndex = 0;
    for (auto runIndex = start.runIndex(); runIndex <= end.runIndex(); ++runIndex) {
        auto& run = fMaster->run(runIndex);
        runLevels[runLevelsIndex++] = run.fBidiLevel;
-        fMaxRunMetrics.add(
-            InternalLineMetrics(run.fFontMetrics.fAscent, run.fFontMetrics.fDescent, run.fFontMetrics.fLeading));
+        fMaxRunMetrics.add(InternalLineMetrics(run.fFontMetrics.fAscent, run.fFontMetrics.fDescent,
+                                               run.fFontMetrics.fLeading));
    }
    SkASSERT(runLevelsIndex == numRuns);

    SkAutoSTArray<kPreallocCount, int32_t> logicalOrder(numRuns);

-    // TODO: hide all these logic in SkUnicode?
-    fMaster->getICU()->reorderVisual(runLevels.data(), numRuns, logicalOrder.data());
+    ubidi_reorderVisual(runLevels.data(), SkToU32(numRuns), logicalOrder.data());
    auto firstRunIndex = start.runIndex();
    for (auto index : logicalOrder) {
        fRunsInVisualOrder.push_back(firstRunIndex + index);
--- a/modules/skparagraph/tests/SkParagraphTest.cpp
+++ b/modules/skparagraph/tests/SkParagraphTest.cpp
@ -4579,8 +4579,7 @@ DEF_TEST(SkParagraph_WhitespacesInMultipleFonts, reporter) {
    }
 }

-// Disable until I sort out fonts
-DEF_TEST_DISABLED(SkParagraph_JSON1, reporter) {
+DEF_TEST(SkParagraph_JSON1, reporter) {
    sk_sp<ResourceFontCollection> fontCollection = sk_make_sp<ResourceFontCollection>();
    if (!fontCollection->fontsFound()) return;
    const char* text = "👨‍👩‍👧‍👦";
@ -4618,8 +4617,7 @@ DEF_TEST_DISABLED(SkParagraph_JSON1, reporter) {
    REPORTER_ASSERT(reporter, cluster <= 2);
 }

-// Disable until I sort out fonts
-DEF_TEST_DISABLED(SkParagraph_JSON2, reporter) {
+DEF_TEST(SkParagraph_JSON2, reporter) {
    sk_sp<ResourceFontCollection> fontCollection = sk_make_sp<ResourceFontCollection>();
    if (!fontCollection->fontsFound()) return;
    const char* text = "p〠q";
--- a/modules/skshaper/BUILD.gn
+++ b/modules/skshaper/BUILD.gn
@ -21,7 +21,6 @@ if (skia_enable_skshaper) {
    }
    if (skia_use_icu && skia_use_harfbuzz) {
      defines += [ "SK_SHAPER_HARFBUZZ_AVAILABLE" ]
-      defines += [ "SK_UNICODE_AVAILABLE" ]
    }
  }

@ -31,16 +30,12 @@ if (skia_enable_skshaper) {
    public_configs = [ ":public_config" ]
    public = skia_shaper_public
    deps = [ "../..:skia" ]
-    defines = [
-      "SKSHAPER_IMPLEMENTATION=1",
-      "SKUNICODE_IMPLEMENTATION=1",
-    ]
+    defines = [ "SKSHAPER_IMPLEMENTATION=1" ]
    sources = skia_shaper_primitive_sources
    if (skia_use_fonthost_mac) {
      sources += skia_shaper_coretext_sources
    }
    if (skia_use_icu && skia_use_harfbuzz) {
-      sources += skia_shaper_icu_sources
      sources += skia_shaper_harfbuzz_sources
      deps += [
        "//third_party/harfbuzz",
--- a/modules/skshaper/skshaper.gni
+++ b/modules/skshaper/skshaper.gni
@ -13,6 +13,5 @@ skia_shaper_primitive_sources = [
  "$_src/SkShaper.cpp",
  "$_src/SkShaper_primitive.cpp",
 ]
-skia_shaper_icu_sources = [ "$_src/SkUnicode_icu.cpp" ]
 skia_shaper_harfbuzz_sources = [ "$_src/SkShaper_harfbuzz.cpp" ]
 skia_shaper_coretext_sources = [ "$_src/SkShaper_coretext.cpp" ]
--- a/modules/skshaper/src/SkShaper.cpp
+++ b/modules/skshaper/src/SkShaper.cpp
@ -13,10 +13,6 @@
 #include "include/core/SkTypeface.h"
 #include "include/private/SkTFitsIn.h"
 #include "modules/skshaper/include/SkShaper.h"
-
-#ifdef SK_UNICODE_AVAILABLE
-#include "modules/skshaper/src/SkUnicode.h"
-#endif
 #include "src/core/SkTextBlobPriv.h"
 #include "src/utils/SkUTF.h"

--- a/modules/skshaper/src/SkUnicode.h
+++ b/modules/skshaper/src/SkUnicode.h
@ -1,101 +0,0 @@
-/*
- * Copyright 2020 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-#ifndef SkUnicode_DEFINED
-#define SkUnicode_DEFINED
-
-#include "include/core/SkTypes.h"
-#include "src/core/SkSpan.h"
-#include <vector>
-
-#if !defined(SKUNICODE_IMPLEMENTATION)
-    #define SKUNICODE_IMPLEMENTATION 0
-#endif
-
-#if !defined(SKUNICODE_API)
-    #if defined(SKSHAPER_DLL)
-        #if defined(_MSC_VER)
-            #if SKUNICODE_IMPLEMENTATION
-                #define SKUNICODE_API __declspec(dllexport)
-            #else
-                #define SKUNICODE_API __declspec(dllimport)
-            #endif
-        #else
-            #define SKUNICODE_API __attribute__((visibility("default")))
-        #endif
-    #else
-        #define SKUNICODE_API
-    #endif
-#endif
-
-namespace skia {
-
-enum class UtfFormat {
-    kUTF8,
-    kUTF16
-};
-// Bidi
-typedef size_t Position;
-typedef uint8_t BidiLevel;
-enum class Direction {
-    kLTR,
-    kRTL,
-};
-struct BidiRegion {
-    BidiRegion(Position start, Position end, BidiLevel level)
-      : start(start), end(end), level(level) { }
-    Position start;
-    Position end;
-    BidiLevel level;
-};
-// LineBreaks
-enum class LineBreakType {
-    kSoftLineBreak,
-    kHardLineBreak
-};
-struct LineBreakBefore {
-    LineBreakBefore(Position pos, LineBreakType breakType)
-      : pos(pos), breakType(breakType) { }
-    Position pos;
-    LineBreakType breakType;
-};
-// Other breaks
-enum class UBreakType {
-    kWords,
-    kGraphemes,
-    kLines
-};
-struct Range {
-    Position start;
-    Position end;
-};
-
-class SKUNICODE_API SkUnicode {
-    public:
-        typedef uint32_t ScriptID;
-        typedef uint32_t CombiningClass;
-        typedef uint32_t GeneralCategory;
-        virtual ~SkUnicode() = default;
-        // High level methods (that we actually use somewhere=SkParagraph)
-        virtual bool getBidiRegions
-               (const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* results) = 0;
-        virtual bool getLineBreaks
-               (const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) = 0;
-        virtual bool getWords
-               (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
-        virtual bool getGraphemes
-               (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
-        virtual bool getWhitespaces
-               (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
-
-        virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0;
-
-        static std::unique_ptr<SkUnicode> Make();
-};
-
-}
-
-#endif // SkUnicode_DEFINED
--- a/modules/skshaper/src/SkUnicode_icu.cpp
+++ b/modules/skshaper/src/SkUnicode_icu.cpp
@ -1,243 +0,0 @@
-/*
-* Copyright 2020 Google Inc.
-*
-* Use of this source code is governed by a BSD-style license that can be
-* found in the LICENSE file.
-*/
-#include "include/private/SkTFitsIn.h"
-#include "include/private/SkTemplates.h"
-#include "modules/skshaper/src/SkUnicode.h"
-#include "src/utils/SkUTF.h"
-#include <unicode/ubidi.h>
-#include <unicode/ubrk.h>
-#include <unicode/utext.h>
-#include <unicode/utypes.h>
-#include <vector>
-#include <functional>
-
-using ICUBiDi = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close), ubidi_close>>;
-using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>>;
-using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), ubrk_close>>;
-
-/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
-static inline SkUnichar utf8_next(const char** ptr, const char* end) {
-    SkUnichar val = SkUTF::NextUTF8(ptr, end);
-    return val < 0 ? 0xFFFD : val;
-}
-
-namespace skia {
-
-class SkUnicode_icu : public SkUnicode {
-
-    static UBreakIteratorType convertType(UBreakType type) {
-        switch (type) {
-            case UBreakType::kLines: return UBRK_LINE;
-            case UBreakType::kGraphemes: return UBRK_CHARACTER;
-            case UBreakType::kWords: return UBRK_WORD;
-            default:
-              SkDEBUGF("Convert error: wrong break type");
-              return UBRK_CHARACTER;
-        }
-    }
-
-    static int convertUtf8ToUtf16(const char* utf8, size_t utf8Units, std::unique_ptr<uint16_t[]>* utf16) {
-        int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
-        if (utf16Units < 0) {
-            SkDEBUGF("Convert error: Invalid utf8 input");
-            return utf16Units;
-        }
-        *utf16 = std::unique_ptr<uint16_t[]>(new uint16_t[utf16Units]);
-        SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16->get(), utf16Units, utf8, utf8Units);
-        SkASSERT(dstLen == utf16Units);
-        return utf16Units;
-    }
-
-    static bool extractBidi(const char utf8[], int utf8Units,  Direction dir, std::vector<BidiRegion>* bidiRegions) {
-
-        // Convert to UTF16 since for now bidi iterator only operates on utf16
-        std::unique_ptr<uint16_t[]> utf16;
-        auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
-        if (utf16Units < 0) {
-            return false;
-        }
-
-        // Create bidi iterator
-        UErrorCode status = U_ZERO_ERROR;
-        ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status));
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Bidi error: %s", u_errorName(status));
-            return false;
-        }
-        SkASSERT(bidi);
-        uint8_t bidiLevel = (dir == Direction::kLTR) ? UBIDI_LTR : UBIDI_RTL;
-        // The required lifetime of utf16 isn't well documented.
-        // It appears it isn't used after ubidi_setPara except through ubidi_getText.
-        ubidi_setPara(bidi.get(), (const UChar*)utf16.get(), utf16Units, bidiLevel, nullptr, &status);
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Bidi error: %s", u_errorName(status));
-            return false;
-        }
-
-        // Iterate through bidi regions and the result positions into utf8
-        const char* start8 = utf8;
-        const char* end8 = utf8 + utf8Units;
-        BidiLevel currentLevel = 0;
-
-        Position pos8 = 0;
-        Position pos16 = 0;
-        Position end16 = ubidi_getLength(bidi.get());
-        while (pos16 < end16) {
-            auto level = ubidi_getLevelAt(bidi.get(), pos16);
-            if (pos16 == 0) {
-                currentLevel = level;
-            } else if (level != currentLevel) {
-                Position end = start8 - utf8;
-                bidiRegions->emplace_back(pos8, end, currentLevel);
-                currentLevel = level;
-                pos8 = end;
-            }
-            SkUnichar u = utf8_next(&start8, end8);
-            pos16 += SkUTF::ToUTF16(u);
-        }
-        Position end = start8 - utf8;
-        if (end != pos8) {
-            bidiRegions->emplace_back(pos8, end, currentLevel);
-        }
-        return true;
-    }
-
-    static bool extractWords(uint16_t utf16[], int utf16Units, std::vector<Position>* words) {
-
-        UErrorCode status = U_ZERO_ERROR;
-
-        UBreakIteratorType breakType = convertType(UBreakType::kWords);
-        ICUBreakIterator iterator(ubrk_open(breakType, uloc_getDefault(), nullptr, 0, &status));
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Break error: %s", u_errorName(status));
-            return false;
-        }
-        SkASSERT(iterator);
-
-        UText sUtf16UText = UTEXT_INITIALIZER;
-        ICUUText utf16UText(utext_openUChars(&sUtf16UText, (UChar*)utf16, utf16Units, &status));
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Break error: %s", u_errorName(status));
-            return false;
-        }
-
-        ubrk_setUText(iterator.get(), utf16UText.get(), &status);
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Break error: %s", u_errorName(status));
-            return false;
-        }
-
-        // Get the words
-        int32_t pos = ubrk_first(iterator.get());
-        while (pos != UBRK_DONE) {
-            words->emplace_back(pos);
-            pos = ubrk_next(iterator.get());
-        }
-
-        return true;
-    }
-
-    static bool extractPositions(const char utf8[], int utf8Units, UBreakType type, std::function<void(int, int)> add) {
-
-        UErrorCode status = U_ZERO_ERROR;
-        UText sUtf8UText = UTEXT_INITIALIZER;
-        ICUUText text(utext_openUTF8(&sUtf8UText, &utf8[0], utf8Units, &status));
-
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Break error: %s", u_errorName(status));
-            return false;
-        }
-        SkASSERT(text);
-
-        ICUBreakIterator iterator(ubrk_open(convertType(type), uloc_getDefault(), nullptr, 0, &status));
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Break error: %s", u_errorName(status));
-        }
-
-        ubrk_setUText(iterator.get(), text.get(), &status);
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Break error: %s", u_errorName(status));
-            return false;
-        }
-
-        auto iter = iterator.get();
-        int32_t pos = ubrk_first(iter);
-        while (pos != UBRK_DONE) {
-            add(pos, ubrk_getRuleStatus(iter));
-            pos = ubrk_next(iter);
-        }
-        return true;
-    }
-
-    static bool extractWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* whitespaces) {
-
-        const char* start = utf8;
-        const char* end = utf8 + utf8Units;
-        const char* ch = start;
-        while (ch < end) {
-            auto index = ch - start;
-            auto unichar = utf8_next(&ch, end);
-            if (u_isWhitespace(unichar)) {
-                auto ending = ch - start;
-                for (auto k = index; k < ending; ++k) {
-                  whitespaces->emplace_back(k);
-                }
-            }
-        }
-        return true;
-    }
-
-public:
-    ~SkUnicode_icu() override { }
-
-    bool getBidiRegions(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* results) override {
-        return extractBidi(utf8, utf8Units, dir, results);
-    }
-
-    bool getLineBreaks(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) override {
-
-        return extractPositions(utf8, utf8Units, UBreakType::kLines,
-            [results](int pos, int status) {
-                    results->emplace_back(pos,status == UBRK_LINE_HARD
-                                                        ? LineBreakType::kHardLineBreak
-                                                        : LineBreakType::kSoftLineBreak);
-        });
-    }
-
-    bool getWords(const char utf8[], int utf8Units, std::vector<Position>* results) override {
-
-        // Convert to UTF16 since we want the results in utf16
-        std::unique_ptr<uint16_t[]> utf16;
-        auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
-        if (utf16Units < 0) {
-            return false;
-        }
-
-        return extractWords(utf16.get(), utf16Units, results);
-    }
-
-    bool getGraphemes(const char utf8[], int utf8Units, std::vector<Position>* results) override {
-
-        return extractPositions(utf8, utf8Units, UBreakType::kGraphemes,
-            [results](int pos, int status) { results->emplace_back(pos);
-        });
-    }
-
-    bool getWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* results) override {
-
-        return extractWhitespaces(utf8, utf8Units, results);
-    }
-
-    void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override {
-        ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
-    }
-};
-
-std::unique_ptr<SkUnicode> SkUnicode::Make() { return std::make_unique<SkUnicode_icu>(); }
-
-}
-
--- a/src/utils/SkUTF.cpp
+++ b/src/utils/SkUTF.cpp
@ -251,36 +251,3 @@ size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) {
    return 1 + extra;
 }

-int SkUTF::UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength) {
-    if (!dst) {
-        dstCapacity = 0;
-    }
-
-    int dstLength = 0;
-    uint16_t* endDst = dst + dstCapacity;
-    const char* endSrc = src + srcByteLength;
-    while (src < endSrc) {
-        SkUnichar uni = NextUTF8(&src, endSrc);
-        if (uni < 0) {
-            return -1;
-        }
-
-        uint16_t utf16[2];
-        size_t count = ToUTF16(uni, utf16);
-        if (count == 0) {
-            return -1;
-        }
-        dstLength += count;
-
-        if (dst) {
-            uint16_t* elems = utf16;
-            while (dst < endDst && count > 0) {
-                *dst++ = *elems++;
-                count -= 1;
-            }
-        }
-    }
-    return dstLength;
-}
-
-
--- a/src/utils/SkUTF.h
+++ b/src/utils/SkUTF.h
@ -64,12 +64,6 @@ SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr
 */
 SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);

-/** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence.
- *  If dst is not null, it is filled with the corresponding values up to its capacity.
- *  If there is an error, -1 is returned and the dst[] buffer is undefined.
- */
-SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength);
-
 }  // namespace SkUTF

 #endif  // SkUTF_DEFINED