ICU API: only in SkParagraph, simplified (relanding reverted).

Reverted commit: https://skia-review.googlesource.com/c/skia/+/296128/ Change-Id: Iaf793bff94a6060579c7d6176d477e598c047be6 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303261 Reviewed-by: Mike Reed <reed@google.com> Commit-Queue: Julia Lavrova <jlavrova@google.com>
2020-06-12 15:34:45 -04:00 · 2020-06-12 15:34:45 -04:00 · 7479eda3b6
commit 7479eda3b6
parent 9d960f1e0f
14 changed files with 455 additions and 279 deletions
--- a/modules/skparagraph/src/OneLineShaper.cpp
+++ b/modules/skparagraph/src/OneLineShaper.cpp
@ -2,23 +2,13 @@

 #include "modules/skparagraph/src/Iterators.h"
 #include "modules/skparagraph/src/OneLineShaper.h"
-#include <unicode/uchar.h>
+#include "modules/skparagraph/src/ParagraphUtil.h"
 #include <algorithm>
 #include <unordered_set>
-#include "src/utils/SkUTF.h"

 namespace skia {
 namespace textlayout {

-namespace {
-
-SkUnichar utf8_next(const char** ptr, const char* end) {
-    SkUnichar val = SkUTF::NextUTF8(ptr, end);
-    return val < 0 ? 0xFFFD : val;
-}
-
-}
-
 void OneLineShaper::commitRunBuffer(const RunInfo&) {

    fCurrentRun->commit();
@ -313,8 +303,8 @@ void OneLineShaper::sortOutGlyphs(std::function<void(GlyphRange)>&& sortOutUnres
            block.end = i;
        } else {
            const char* cluster = text.begin() + clusterIndex(i);
-            SkUnichar codepoint = utf8_next(&cluster, text.end());
-            if (u_iscntrl(codepoint)) {
+            SkUnichar codepoint = nextUtf8Unit(&cluster, text.end());
+            if (isControl(codepoint)) {
                // This codepoint does not have to be resolved; let's pretend it's resolved
                if (block.start == EMPTY_INDEX) {
                    // Keep skipping resolved code points
@ -419,7 +409,7 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,
            // We have the global cache for all already found typefaces for SkUnichar
            // but we still need to keep track of all SkUnichars used in this unresolved block
            SkTHashSet<SkUnichar> alreadyTried;
-            SkUnichar unicode = utf8_next(&ch, unresolvedText.end());
+            SkUnichar unicode = nextUtf8Unit(&ch, unresolvedText.end());
            while (true) {

                sk_sp<SkTypeface> typeface;
@ -457,7 +447,7 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,

                // We can stop here or we can switch to another DIFFERENT codepoint
                while (ch != unresolvedText.end()) {
-                    unicode = utf8_next(&ch, unresolvedText.end());
+                    unicode = nextUtf8Unit(&ch, unresolvedText.end());
                    auto found = alreadyTried.find(unicode);
                    if (found == nullptr) {
                        alreadyTried.add(unicode);
@ -472,10 +462,6 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,

 bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {

-    if (!fParagraph->getBidiRegions()) {
-        return false;
-    }
-
    size_t bidiIndex = 0;

    SkScalar advanceX = 0;
@ -485,8 +471,8 @@ bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {
            // Shape the text by bidi regions
            while (bidiIndex < fParagraph->fBidiRegions.size()) {
                BidiRegion& bidiRegion = fParagraph->fBidiRegions[bidiIndex];
-                auto start = std::max(bidiRegion.text.start, placeholder.fTextBefore.start);
-                auto end = std::min(bidiRegion.text.end, placeholder.fTextBefore.end);
+                auto start = std::max(bidiRegion.start, placeholder.fTextBefore.start);
+                auto end = std::min(bidiRegion.end, placeholder.fTextBefore.end);

                // Set up the iterators (the style iterator points to a bigger region that it could
                TextRange textRange(start, end);
@ -494,11 +480,11 @@ bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {
                SkSpan<Block> styleSpan(fParagraph->blocks(blockRange));

                // Shape the text between placeholders
-                if (!shape(textRange, styleSpan, advanceX, start, bidiRegion.direction)) {
+                if (!shape(textRange, styleSpan, advanceX, start, bidiRegion.level)) {
                    return false;
                }

-                if (end == bidiRegion.text.end) {
+                if (end == bidiRegion.end) {
                    ++bidiIndex;
                } else /*if (end == placeholder.fTextBefore.end)*/ {
                    break;
--- a/modules/skparagraph/src/ParagraphCache.cpp
+++ b/modules/skparagraph/src/ParagraphCache.cpp
@ -50,7 +50,7 @@ public:
    // ICU results
    SkTArray<CodeUnitFlags> fCodeUnitProperties;
    std::vector<size_t> fWords;
-    SkTArray<BidiRegion> fBidiRegions;
+    std::vector<BidiRegion> fBidiRegions;
    SkTArray<TextIndex, true> fUTF8IndexForUTF16Index;
    SkTArray<size_t, true> fUTF16IndexForUTF8Index;
 };
--- a/modules/skparagraph/src/ParagraphImpl.cpp
+++ b/modules/skparagraph/src/ParagraphImpl.cpp
@ -25,12 +25,6 @@
 #endif

 #include <math.h>
-#include <unicode/ubidi.h>
-#include <unicode/uloc.h>
-#include <unicode/umachine.h>
-#include <unicode/ustring.h>
-#include <unicode/utext.h>
-#include <unicode/utypes.h>
 #include <algorithm>
 #include <utility>

@ -40,9 +34,6 @@ namespace textlayout {

 namespace {

-using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>>;
-using ICUBiDi  = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close), ubidi_close>>;
-
 SkScalar littleRound(SkScalar a) {
    // This rounding is done to match Flutter tests. Must be removed..
    auto val = std::fabs(a);
@ -54,13 +45,6 @@ SkScalar littleRound(SkScalar a) {
        return SkScalarFloorToScalar(a);
    }
 }
-
-/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
-static inline SkUnichar utf8_next(const char** ptr, const char* end) {
-    SkUnichar val = SkUTF::NextUTF8(ptr, end);
-    return val < 0 ? 0xFFFD : val;
-}
-
 }

 TextRange operator*(const TextRange& a, const TextRange& b) {
@ -99,6 +83,7 @@ ParagraphImpl::ParagraphImpl(const SkString& text,
        , fOldWidth(0)
        , fOldHeight(0)
        , fOrigin(SkRect::MakeEmpty()) {
+    fICU = SkUnicode_Make();
 }

 ParagraphImpl::ParagraphImpl(const std::u16string& utf16text,
@ -145,7 +130,7 @@ void ParagraphImpl::layout(SkScalar rawWidth) {
        this->fCodeUnitProperties.reset();
        this->fCodeUnitProperties.push_back_n(fText.size() + 1, CodeUnitFlags::kNoCodeUnitFlag);
        this->fWords.clear();
-        this->fBidiRegions.reset();
+        this->fBidiRegions.clear();
        this->fUTF8IndexForUTF16Index.reset();
        this->fUTF16IndexForUTF8Index.reset();
        this->fRuns.reset();
@ -244,72 +229,6 @@ void ParagraphImpl::resetContext() {
    fExceededMaxLines = false;
 }

-class TextBreaker {
-public:
-    TextBreaker() : fInitialized(false), fPos(-1) {}
-
-    bool initialize(SkSpan<const char> text, UBreakIteratorType type) {
-
-        UErrorCode status = U_ZERO_ERROR;
-        fIterator = nullptr;
-        fSize = text.size();
-        UText sUtf8UText = UTEXT_INITIALIZER;
-        std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>> utf8UText(
-            utext_openUTF8(&sUtf8UText, text.begin(), text.size(), &status));
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Could not create utf8UText: %s", u_errorName(status));
-            return false;
-        }
-        fIterator.reset(ubrk_open(type, "en", nullptr, 0, &status));
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Could not create line break iterator: %s", u_errorName(status));
-            SK_ABORT("");
-        }
-
-        ubrk_setUText(fIterator.get(), utf8UText.get(), &status);
-        if (U_FAILURE(status)) {
-            SkDEBUGF("Could not setText on break iterator: %s", u_errorName(status));
-            return false;
-        }
-
-        fInitialized = true;
-        fPos = 0;
-        return true;
-    }
-
-    bool initialized() const { return fInitialized; }
-
-    size_t first() {
-        fPos = ubrk_first(fIterator.get());
-        return eof() ? fSize : fPos;
-    }
-
-    size_t next() {
-        fPos = ubrk_next(fIterator.get());
-        return eof() ? fSize : fPos;
-    }
-
-    size_t preceding(size_t offset) {
-        auto pos = ubrk_preceding(fIterator.get(), offset);
-        return pos == UBRK_DONE ? 0 : pos;
-    }
-
-    size_t following(size_t offset) {
-        auto pos = ubrk_following(fIterator.get(), offset);
-        return pos == UBRK_DONE ? fSize : pos;
-    }
-
-    int32_t status() { return ubrk_getRuleStatus(fIterator.get()); }
-
-    bool eof() { return fPos == UBRK_DONE; }
-
-private:
-    std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), ubrk_close>> fIterator;
-    bool fInitialized;
-    int32_t fPos;
-    size_t fSize;
-};
-
 // shapeTextIntoEndlessLine is the thing that calls this method
 // (that contains all ICU dependencies except for words)
 bool ParagraphImpl::computeCodeUnitProperties() {
@ -320,165 +239,41 @@ bool ParagraphImpl::computeCodeUnitProperties() {
    }
    #endif

-    {
-        const char* start = fText.c_str();
-        const char* end = start + fText.size();
-        const char* ch = start;
-        while (ch < end) {
-            auto index = ch - start;
-            auto unichar = utf8_next(&ch, end);
-            if (u_isWhitespace(unichar)) {
-                auto ending = ch - start;
-                for (auto k = index; k < ending; ++k) {
-                  fCodeUnitProperties[k] |= CodeUnitFlags::kPartOfWhiteSpace;
-                }
-            }
-        }
-    }
-    {
-        TextBreaker breaker;
-        if (!breaker.initialize(this->text(), UBRK_LINE)) {
-            return false;
-        }
-        while (!breaker.eof()) {
-            size_t currentPos = breaker.next();
-          fCodeUnitProperties[currentPos] |=
-              breaker.status() == UBRK_LINE_HARD ? CodeUnitFlags::kHardLineBreakBefore : CodeUnitFlags::kSoftLineBreakBefore;
-        }
-    }
-    {
-        TextBreaker breaker;
-        if (!breaker.initialize(this->text(), UBRK_CHARACTER)) {
-            return false;
-        }
-
-        while (!breaker.eof()) {
-            auto currentPos = breaker.next();
-          fCodeUnitProperties[currentPos] |= CodeUnitFlags::kGraphemeStart;
-        }
-    }
-
-    return true;
-}
-
-// getWordBoundary is the thing that calls this method lazily
-bool ParagraphImpl::computeWords() {
-
-    if (!fWords.empty()) {
-        return true;
-    }
-
-    UErrorCode errorCode = U_ZERO_ERROR;
-
-    auto iter = ubrk_open(UBRK_WORD, uloc_getDefault(), nullptr, 0, &errorCode);
-    if (U_FAILURE(errorCode)) {
-        SkDEBUGF("Could not create line break iterator: %s", u_errorName(errorCode));
+    // Get bidi regions
+    Direction textDirection = fParagraphStyle.getTextDirection() == TextDirection::kLtr
+                              ? Direction::kLTR
+                              : Direction::kRTL;
+    if (!fICU->getBidiRegions(fText.c_str(), fText.size(), textDirection, &fBidiRegions)) {
        return false;
    }

-    // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
-    int32_t utf16Units;
-    u_strFromUTF8(nullptr, 0, &utf16Units, fText.c_str(), fText.size(), &errorCode);
-    errorCode = U_ZERO_ERROR;
-    std::unique_ptr<UChar[]> utf16(new UChar[utf16Units]);
-    u_strFromUTF8(utf16.get(), utf16Units, nullptr, fText.c_str(), fText.size(), &errorCode);
-    if (U_FAILURE(errorCode)) {
-        SkDEBUGF("Invalid utf8 input: %s", u_errorName(errorCode));
+    // Get white spaces
+    std::vector<Position> whitespaces;
+    if (!fICU->getWhitespaces(fText.c_str(), fText.size(), &whitespaces)) {
        return false;
    }
+    for (auto whitespace : whitespaces) {
+        fCodeUnitProperties[whitespace] |= CodeUnitFlags::kPartOfWhiteSpace;
+    }

-    UText sUtf16UText = UTEXT_INITIALIZER;
-    ICUUText utf8UText(utext_openUChars(&sUtf16UText, utf16.get(), utf16Units, &errorCode));
-    if (U_FAILURE(errorCode)) {
-        SkDEBUGF("Could not create utf8UText: %s", u_errorName(errorCode));
+    // Get line breaks
+    std::vector<LineBreakBefore> lineBreaks;
+    if (!fICU->getLineBreaks(fText.c_str(), fText.size(), &lineBreaks)) {
        return false;
    }
+    for (auto& lineBreak : lineBreaks) {
+        fCodeUnitProperties[lineBreak.pos] |= lineBreak.breakType == LineBreakType::kHardLineBreak
+                                           ? CodeUnitFlags::kHardLineBreakBefore
+                                           : CodeUnitFlags::kSoftLineBreakBefore;
+    }

-    ubrk_setUText(iter, utf8UText.get(), &errorCode);
-    if (U_FAILURE(errorCode)) {
-        SkDEBUGF("Could not setText on break iterator: %s", u_errorName(errorCode));
+    // Get graphemes
+    std::vector<Position> graphemes;
+    if (!fICU->getGraphemes(fText.c_str(), fText.size(), &graphemes)) {
        return false;
    }
-
-    int32_t pos = ubrk_first(iter);
-    while (pos != UBRK_DONE) {
-        fWords.emplace_back(pos);
-        pos = ubrk_next(iter);
-    }
-
-    return true;
-}
-
-bool ParagraphImpl::getBidiRegions() {
-
-    if (!fBidiRegions.empty()) {
-        return true;
-    }
-
-    // ubidi only accepts utf16 (though internally it basically works on utf32 chars).
-    // We want an ubidi_setPara(UBiDi*, UText*, UBiDiLevel, UBiDiLevel*, UErrorCode*);
-    size_t utf8Bytes = fText.size();
-    const char* utf8 = fText.c_str();
-    uint8_t bidiLevel = fParagraphStyle.getTextDirection() == TextDirection::kLtr
-                            ? UBIDI_LTR
-                            : UBIDI_RTL;
-    if (!SkTFitsIn<int32_t>(utf8Bytes)) {
-        SkDEBUGF("Bidi error: text too long");
-        return false;
-    }
-
-    // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
-    UErrorCode status = U_ZERO_ERROR;
-    int32_t utf16Units;
-    u_strFromUTF8(nullptr, 0, &utf16Units, utf8, utf8Bytes, &status);
-    status = U_ZERO_ERROR;
-    std::unique_ptr<UChar[]> utf16(new UChar[utf16Units]);
-    u_strFromUTF8(utf16.get(), utf16Units, nullptr, utf8, utf8Bytes, &status);
-    if (U_FAILURE(status)) {
-        SkDEBUGF("Invalid utf8 input: %s", u_errorName(status));
-        return false;
-    }
-
-    ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status));
-    if (U_FAILURE(status)) {
-        SkDEBUGF("Bidi error: %s", u_errorName(status));
-        return false;
-    }
-    SkASSERT(bidi);
-
-    // The required lifetime of utf16 isn't well documented.
-    // It appears it isn't used after ubidi_setPara except through ubidi_getText.
-    ubidi_setPara(bidi.get(), utf16.get(), utf16Units, bidiLevel, nullptr, &status);
-    if (U_FAILURE(status)) {
-        SkDEBUGF("Bidi error: %s", u_errorName(status));
-        return false;
-    }
-
-    SkTArray<BidiRegion> bidiRegions;
-    const char* start8 = utf8;
-    const char* end8 = utf8 + utf8Bytes;
-    TextRange textRange(0, 0);
-    UBiDiLevel currentLevel = 0;
-
-    int32_t pos16 = 0;
-    int32_t end16 = ubidi_getLength(bidi.get());
-    while (pos16 < end16) {
-        auto level = ubidi_getLevelAt(bidi.get(), pos16);
-        if (pos16 == 0) {
-            currentLevel = level;
-        } else if (level != currentLevel) {
-            textRange.end = start8 - utf8;
-            fBidiRegions.emplace_back(textRange.start, textRange.end, currentLevel);
-            currentLevel = level;
-            textRange = TextRange(textRange.end, textRange.end);
-        }
-        SkUnichar u = utf8_next(&start8, end8);
-        pos16 += SkUTF::ToUTF16(u);
-    }
-
-    textRange.end = start8 - utf8;
-    if (!textRange.empty()) {
-        fBidiRegions.emplace_back(textRange.start, textRange.end, currentLevel);
+    for (auto pos : graphemes) {
+        fCodeUnitProperties[pos] |= CodeUnitFlags::kGraphemeStart;
    }

    return true;
@ -883,21 +678,23 @@ PositionWithAffinity ParagraphImpl::getGlyphPositionAtCoordinate(SkScalar dx, Sk
 // By "glyph" they mean a character index - indicated by Minikin's code
 SkRange<size_t> ParagraphImpl::getWordBoundary(unsigned offset) {

-    if (!computeWords()) {
-        return {0, 0 };
+    if (fWords.empty()) {
+        if (!fICU->getWords(fText.c_str(), fText.size(), &fWords)) {
+            return {0, 0 };
+        }
    }

    int32_t start = 0;
    int32_t end = 0;
    for (size_t i = 0; i < fWords.size(); ++i) {
-      auto word = fWords[i];
-      if (word <= offset) {
-        start = word;
-        end = word;
-      } else if (word > offset) {
-        end = word;
-        break;
-      }
+        auto word = fWords[i];
+        if (word <= offset) {
+            start = word;
+            end = word;
+        } else if (word > offset) {
+            end = word;
+            break;
+        }
    }

    //SkDebugf("getWordBoundary(%d): %d - %d\n", offset, start, end);
@ -980,7 +777,7 @@ void ParagraphImpl::setState(InternalState state) {
            fCodeUnitProperties.reset();
            fCodeUnitProperties.push_back_n(fText.size() + 1, kNoCodeUnitFlag);
            fWords.clear();
-            fBidiRegions.reset();
+            fBidiRegions.clear();
            fUTF8IndexForUTF16Index.reset();
            fUTF16IndexForUTF8Index.reset();
            [[fallthrough]];
--- a/modules/skparagraph/src/ParagraphImpl.h
+++ b/modules/skparagraph/src/ParagraphImpl.h
@ -23,9 +23,9 @@
 #include "modules/skparagraph/include/TextShadow.h"
 #include "modules/skparagraph/include/TextStyle.h"
 #include "modules/skparagraph/src/Run.h"
+#include "modules/skshaper/src/SkUnicode.h"
 #include "src/core/SkSpan.h"

-#include <unicode/ubrk.h>
 #include <memory>
 #include <string>
 #include <vector>
@ -83,14 +83,14 @@ struct ResolvedFontDescriptor {
    SkFont fFont;
    TextIndex fTextStart;
 };
-
+/*
 struct BidiRegion {
    BidiRegion(size_t start, size_t end, uint8_t dir)
        : text(start, end), direction(dir) { }
    TextRange text;
    uint8_t direction;
 };
-
+*/
 class ParagraphImpl final : public Paragraph {

 public:
@ -186,8 +186,6 @@ public:
    void resolveStrut();

    bool computeCodeUnitProperties();
-    bool computeWords();
-    bool getBidiRegions();

    void buildClusterTable();
    void spaceGlyphs();
@ -250,7 +248,7 @@ private:
    SkTArray<CodeUnitFlags> fCodeUnitProperties;
    SkTArray<size_t> fClustersIndexFromCodeUnit;
    std::vector<size_t> fWords;
-    SkTArray<BidiRegion> fBidiRegions;
+    std::vector<BidiRegion> fBidiRegions;
    // These two arrays are used in measuring methods (getRectsForRange, getGlyphPositionAtCoordinate)
    // They are filled lazily whenever they need and cached
    SkTArray<TextIndex, true> fUTF8IndexForUTF16Index;
@ -269,6 +267,8 @@ private:
    SkScalar fOldHeight;
    SkScalar fMaxWidthWithTrailingSpaces;
    SkRect fOrigin;
+
+    std::unique_ptr<SkUnicode> fICU;
 };
 }  // namespace textlayout
 }  // namespace skia
--- a/modules/skparagraph/src/ParagraphUtil.cpp
+++ b/modules/skparagraph/src/ParagraphUtil.cpp
@ -4,8 +4,10 @@
 #include "include/core/SkTypes.h"
 #include "include/private/SkTo.h"
 #include "modules/skparagraph/src/ParagraphUtil.h"
+#include "src/utils/SkUTF.h"

 #include <unicode/umachine.h>
+#include <unicode/uchar.h>
 #include <unicode/ustring.h>
 #include <unicode/utypes.h>
 #include <string>
@ -30,5 +32,14 @@ SkString SkStringFromU16String(const std::u16string& utf16text) {
    return dst;
 }

+SkUnichar nextUtf8Unit(const char** ptr, const char* end) {
+    SkUnichar val = SkUTF::NextUTF8(ptr, end);
+    return val < 0 ? 0xFFFD : val;
+}
+
+bool isControl(SkUnichar utf8) {
+    return u_iscntrl(utf8);
+}
+
 }
 }
--- a/modules/skparagraph/src/ParagraphUtil.h
+++ b/modules/skparagraph/src/ParagraphUtil.h
@ -8,6 +8,8 @@
 namespace skia {
 namespace textlayout {
 SkString SkStringFromU16String(const std::u16string& utf16text);
+SkUnichar nextUtf8Unit(const char** ptr, const char* end);
+bool isControl(SkUnichar utf8);
 }
 }

--- a/modules/skparagraph/src/TextLine.cpp
+++ b/modules/skparagraph/src/TextLine.cpp
@ -21,7 +21,6 @@
 #include "modules/skshaper/include/SkShaper.h"
 #include "src/core/SkSpan.h"

-#include <unicode/ubidi.h>
 #include <algorithm>
 #include <iterator>
 #include <limits>
@ -131,21 +130,20 @@ TextLine::TextLine(ParagraphImpl* master,

    // This is just chosen to catch the common/fast cases. Feel free to tweak.
    constexpr int kPreallocCount = 4;
-
-    SkAutoSTArray<kPreallocCount, UBiDiLevel> runLevels(numRuns);
-
+    SkAutoSTArray<kPreallocCount, BidiLevel> runLevels(numRuns);
    size_t runLevelsIndex = 0;
    for (auto runIndex = start.runIndex(); runIndex <= end.runIndex(); ++runIndex) {
        auto& run = fMaster->run(runIndex);
        runLevels[runLevelsIndex++] = run.fBidiLevel;
-        fMaxRunMetrics.add(InternalLineMetrics(run.fFontMetrics.fAscent, run.fFontMetrics.fDescent,
-                                               run.fFontMetrics.fLeading));
+        fMaxRunMetrics.add(
+            InternalLineMetrics(run.fFontMetrics.fAscent, run.fFontMetrics.fDescent, run.fFontMetrics.fLeading));
    }
    SkASSERT(runLevelsIndex == numRuns);

    SkAutoSTArray<kPreallocCount, int32_t> logicalOrder(numRuns);

-    ubidi_reorderVisual(runLevels.data(), SkToU32(numRuns), logicalOrder.data());
+    // TODO: hide all these logic in SkUnicode?
+    SkUnicode::ReorderVisual(runLevels.data(), numRuns, logicalOrder.data());
    auto firstRunIndex = start.runIndex();
    for (auto index : logicalOrder) {
        fRunsInVisualOrder.push_back(firstRunIndex + index);
--- a/modules/skshaper/skshaper.gni
+++ b/modules/skshaper/skshaper.gni
@ -12,6 +12,7 @@ skia_shaper_public = [ "$_include/SkShaper.h" ]
 skia_shaper_primitive_sources = [
  "$_src/SkShaper.cpp",
  "$_src/SkShaper_primitive.cpp",
+  "$_src/SkUnicode_icu.cpp",
 ]
 skia_shaper_harfbuzz_sources = [ "$_src/SkShaper_harfbuzz.cpp" ]
 skia_shaper_coretext_sources = [ "$_src/SkShaper_coretext.cpp" ]
--- a/modules/skshaper/src/SkShaper.cpp
+++ b/modules/skshaper/src/SkShaper.cpp
@ -13,6 +13,7 @@
 #include "include/core/SkTypeface.h"
 #include "include/private/SkTFitsIn.h"
 #include "modules/skshaper/include/SkShaper.h"
+#include "modules/skshaper/src/SkUnicode.h"
 #include "src/core/SkTextBlobPriv.h"
 #include "src/utils/SkUTF.h"

--- a/modules/skshaper/src/SkShaper_primitive.cpp
+++ b/modules/skshaper/src/SkShaper_primitive.cpp
@ -10,6 +10,7 @@
 #include "include/core/SkTypeface.h"
 #include "include/private/SkTo.h"
 #include "modules/skshaper/include/SkShaper.h"
+#include "modules/skshaper/src/SkUnicode.h"
 #include "src/utils/SkUTF.h"

 class SkShaperPrimitive : public SkShaper {
--- a/modules/skshaper/src/SkUnicode.h
+++ b/modules/skshaper/src/SkUnicode.h
@ -0,0 +1,82 @@
+/*
+ * Copyright 2020 Google Inc.
+ *
+ * Use of this source code is governed by a BSD-style license that can be
+ * found in the LICENSE file.
+ */
+#ifndef SkUnicode_DEFINED
+#define SkUnicode_DEFINED
+
+#include "include/core/SkTypes.h"
+#include "src/core/SkSpan.h"
+#include <vector>
+#include <unicode/utf.h>
+
+namespace skia {
+
+enum class UtfFormat {
+    kUTF8,
+    kUTF16
+};
+// Bidi
+typedef size_t Position;
+typedef uint8_t BidiLevel;
+enum class Direction {
+    kLTR,
+    kRTL,
+};
+struct BidiRegion {
+    BidiRegion(Position start, Position end, BidiLevel level)
+      : start(start), end(end), level(level) { }
+    Position start;
+    Position end;
+    BidiLevel level;
+};
+// LineBreaks
+enum class LineBreakType {
+    kSoftLineBreak,
+    kHardLineBreak
+};
+struct LineBreakBefore {
+    LineBreakBefore(Position pos, LineBreakType breakType)
+      : pos(pos), breakType(breakType) { }
+    Position pos;
+    LineBreakType breakType;
+};
+// Other breaks
+enum class UBreakType {
+    kWords,
+    kGraphemes,
+    kLines
+};
+struct Range {
+    Position start;
+    Position end;
+};
+
+class SkUnicode {
+    public:
+        typedef uint32_t ScriptID;
+        typedef uint32_t CombiningClass;
+        typedef uint32_t GeneralCategory;
+        virtual ~SkUnicode() {}
+        // High level methods (that we actually use somewhere=SkParagraph)
+        virtual bool getBidiRegions
+               (const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* results) = 0;
+        virtual bool getLineBreaks
+               (const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) = 0;
+        virtual bool getWords
+               (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
+        virtual bool getGraphemes
+               (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
+        virtual bool getWhitespaces
+               (const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
+
+        static void ReorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]);
+};
+
+std::unique_ptr<SkUnicode> SkUnicode_Make();
+
+}
+
+#endif // SkUnicode_DEFINED
--- a/modules/skshaper/src/SkUnicode_icu.cpp
+++ b/modules/skshaper/src/SkUnicode_icu.cpp
@ -0,0 +1,258 @@
+/*
+* Copyright 2020 Google Inc.
+*
+* Use of this source code is governed by a BSD-style license that can be
+* found in the LICENSE file.
+*/
+#include "include/private/SkTFitsIn.h"
+#include "include/private/SkTemplates.h"
+#include "modules/skshaper/src/SkUnicode.h"
+#include "src/utils/SkUTF.h"
+#include <unicode/ubidi.h>
+#include <unicode/ubrk.h>
+#include <unicode/utext.h>
+#include <unicode/utypes.h>
+#include <vector>
+#include <functional>
+
+using ICUBiDi = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close), ubidi_close>>;
+using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>>;
+using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), ubrk_close>>;
+
+/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
+static inline SkUnichar utf8_next(const char** ptr, const char* end) {
+    SkUnichar val = SkUTF::NextUTF8(ptr, end);
+    return val < 0 ? 0xFFFD : val;
+}
+
+namespace skia {
+
+class SkUnicode_icu : public SkUnicode {
+
+    struct InputData {
+        SkSpan<const char> fUtf8;
+        SkSpan<uint16_t> fUtf16;
+        Direction fTextDirection;
+    };
+
+    struct OutputData {
+        UtfFormat fUtfFormat;
+        std::vector<BidiRegion> fBidiRegions;
+        std::vector<Position> fWords;
+        std::vector<LineBreakBefore> fLineBreaks;
+        std::vector<Position> fGraphemes;
+        std::vector<Position> fWhitespaces;
+    };
+
+    static UBreakIteratorType convertType(UBreakType type) {
+        switch (type) {
+            case UBreakType::kLines: return UBRK_LINE;
+            case UBreakType::kGraphemes: return UBRK_CHARACTER;
+            case UBreakType::kWords: return UBRK_WORD;
+            default:
+              SkDEBUGF("Convert error: wrong break type");
+              return UBRK_CHARACTER;
+        }
+    }
+
+    static int convertUtf8ToUtf16(const char* utf8, size_t utf8Units, std::unique_ptr<uint16_t[]>* utf16) {
+        int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
+        if (utf16Units < 0) {
+            SkDEBUGF("Convert error: Invalid utf8 input");
+            return utf16Units;
+        }
+        *utf16 = std::unique_ptr<uint16_t[]>(new uint16_t[utf16Units]);
+        SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16->get(), utf16Units, utf8, utf8Units);
+        SkASSERT(dstLen == utf16Units);
+        return utf16Units;
+    }
+
+public:
+
+    bool extractBidi(const char utf8[], int utf8Units,  Direction dir, std::vector<BidiRegion>* bidiRegions) {
+
+        // Convert to UTF16 since for now bidi iterator only operates on utf16
+        std::unique_ptr<uint16_t[]> utf16;
+        auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
+        if (utf16Units < 0) {
+            return false;
+        }
+
+        // Create bidi iterator
+        UErrorCode status = U_ZERO_ERROR;
+        ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Bidi error: %s", u_errorName(status));
+            return false;
+        }
+        SkASSERT(bidi);
+        uint8_t bidiLevel = (dir == Direction::kLTR) ? UBIDI_LTR : UBIDI_RTL;
+        // The required lifetime of utf16 isn't well documented.
+        // It appears it isn't used after ubidi_setPara except through ubidi_getText.
+        ubidi_setPara(bidi.get(), (const UChar*)utf16.get(), utf16Units, bidiLevel, nullptr, &status);
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Bidi error: %s", u_errorName(status));
+            return false;
+        }
+
+        // Iterate through bidi regions and the result positions into utf8
+        const char* start8 = utf8;
+        const char* end8 = utf8 + utf8Units;
+        BidiLevel currentLevel = 0;
+
+        Position pos8 = 0;
+        Position pos16 = 0;
+        Position end16 = ubidi_getLength(bidi.get());
+        while (pos16 < end16) {
+            auto level = ubidi_getLevelAt(bidi.get(), pos16);
+            if (pos16 == 0) {
+                currentLevel = level;
+            } else if (level != currentLevel) {
+                Position end = start8 - utf8;
+                bidiRegions->emplace_back(pos8, end, currentLevel);
+                currentLevel = level;
+                pos8 = end;
+            }
+            SkUnichar u = utf8_next(&start8, end8);
+            pos16 += SkUTF::ToUTF16(u);
+        }
+        Position end = start8 - utf8;
+        if (end != pos8) {
+            bidiRegions->emplace_back(pos8, end, currentLevel);
+        }
+        return true;
+    }
+
+    bool extractWords(uint16_t utf16[], int utf16Units, std::vector<Position>* words) {
+
+        UErrorCode status = U_ZERO_ERROR;
+
+        UBreakIteratorType breakType = convertType(UBreakType::kWords);
+        ICUBreakIterator iterator(ubrk_open(breakType, uloc_getDefault(), nullptr, 0, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+        SkASSERT(iterator);
+
+        UText sUtf16UText = UTEXT_INITIALIZER;
+        ICUUText utf16UText(utext_openUChars(&sUtf16UText, (UChar*)utf16, utf16Units, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+
+        ubrk_setUText(iterator.get(), utf16UText.get(), &status);
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+
+        // Get the words
+        int32_t pos = ubrk_first(iterator.get());
+        while (pos != UBRK_DONE) {
+            words->emplace_back(pos);
+            pos = ubrk_next(iterator.get());
+        }
+
+        return true;
+    }
+
+    bool extractPositions(const char utf8[], int utf8Units, UBreakType type, std::function<void(int, int)> add) {
+
+        UErrorCode status = U_ZERO_ERROR;
+        UText sUtf8UText = UTEXT_INITIALIZER;
+        ICUUText text(utext_openUTF8(&sUtf8UText, &utf8[0], utf8Units, &status));
+
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+        SkASSERT(text);
+
+        ICUBreakIterator iterator(ubrk_open(convertType(type), uloc_getDefault(), nullptr, 0, &status));
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+        }
+
+        ubrk_setUText(iterator.get(), text.get(), &status);
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+
+        auto iter = iterator.get();
+        int32_t pos = ubrk_first(iter);
+        while (pos != UBRK_DONE) {
+            add(pos, ubrk_getRuleStatus(iter));
+            pos = ubrk_next(iter);
+        }
+        return true;
+    }
+
+    bool extractWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* whitespaces) {
+
+        const char* start = utf8;
+        const char* end = utf8 + utf8Units;
+        const char* ch = start;
+        while (ch < end) {
+            auto index = ch - start;
+            auto unichar = utf8_next(&ch, end);
+            if (u_isWhitespace(unichar)) {
+                auto ending = ch - start;
+                for (auto k = index; k < ending; ++k) {
+                  whitespaces->emplace_back(k);
+                }
+            }
+        }
+        return true;
+    }
+
+    bool getBidiRegions(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* results) override {
+
+        return extractBidi(utf8, utf8Units, dir, results);
+    }
+
+    bool getLineBreaks(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) override {
+
+        return extractPositions(utf8, utf8Units, UBreakType::kLines,
+            [results](int pos, int status) {
+                    results->emplace_back(pos,status == UBRK_LINE_HARD
+                                                        ? LineBreakType::kHardLineBreak
+                                                        : LineBreakType::kSoftLineBreak);
+        });
+    }
+
+    bool getWords(const char utf8[], int utf8Units, std::vector<Position>* results) override {
+
+        // Convert to UTF16 since we want the results in utf16
+        std::unique_ptr<uint16_t[]> utf16;
+        auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
+        if (utf16Units < 0) {
+            return false;
+        }
+
+        return extractWords(utf16.get(), utf16Units, results);
+    }
+
+    bool getGraphemes(const char utf8[], int utf8Units, std::vector<Position>* results) override {
+
+        return extractPositions(utf8, utf8Units, UBreakType::kGraphemes,
+            [results](int pos, int status) { results->emplace_back(pos);
+        });
+    }
+
+    bool getWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* results) override {
+
+        return extractWhitespaces(utf8, utf8Units, results);
+    }
+};
+
+void SkUnicode::ReorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) {
+    ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
+}
+
+std::unique_ptr<SkUnicode> SkUnicode_Make() { return std::make_unique<SkUnicode_icu>(); }
+
+}
+
--- a/src/utils/SkUTF.cpp
+++ b/src/utils/SkUTF.cpp
@ -251,3 +251,36 @@ size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) {
    return 1 + extra;
 }

+int SkUTF::UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength) {
+    if (!dst) {
+        dstCapacity = 0;
+    }
+
+    int dstLength = 0;
+    uint16_t* endDst = dst + dstCapacity;
+    const char* endSrc = src + srcByteLength;
+    while (src < endSrc) {
+        SkUnichar uni = NextUTF8(&src, endSrc);
+        if (uni < 0) {
+            return -1;
+        }
+
+        uint16_t utf16[2];
+        size_t count = ToUTF16(uni, utf16);
+        if (count == 0) {
+            return -1;
+        }
+        dstLength += count;
+
+        if (dst) {
+            uint16_t* elems = utf16;
+            while (dst < endDst && count > 0) {
+                *dst++ = *elems++;
+                count -= 1;
+            }
+        }
+    }
+    return dstLength;
+}
+
+
--- a/src/utils/SkUTF.h
+++ b/src/utils/SkUTF.h
@ -64,6 +64,12 @@ SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr
 */
 SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);

+/** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence.
+ *  If dst is not null, it is filled with the corresponding values up to its capacity.
+ *  If there is an error, -1 is returned and the dst[] buffer is undefined.
+ */
+SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength);
+
 }  // namespace SkUTF

 #endif  // SkUTF_DEFINED