From 64e3d040e911687a9e65514696c421a50953a6fe Mon Sep 17 00:00:00 2001 From: Julia Lavrova Date: Thu, 6 Aug 2020 14:25:52 -0400 Subject: [PATCH] ICU: SkShaper (bidi iterator only) Change-Id: I845cc0a962790ce37600f943473f21f619ee029b Reviewed-on: https://skia-review.googlesource.com/c/skia/+/308508 Reviewed-by: Ben Wagner Commit-Queue: Julia Lavrova --- modules/skparagraph/src/ParagraphImpl.cpp | 2 +- modules/skshaper/include/SkShaper.h | 5 +- modules/skshaper/src/SkShaper.cpp | 8 +- modules/skshaper/src/SkShaper_harfbuzz.cpp | 87 ++++++++---------- modules/skshaper/src/SkUnicode.h | 32 ++++++- modules/skshaper/src/SkUnicode_icu.cpp | 102 +++++++++++++++++++-- 6 files changed, 174 insertions(+), 62 deletions(-) diff --git a/modules/skparagraph/src/ParagraphImpl.cpp b/modules/skparagraph/src/ParagraphImpl.cpp index 7849f9a9bc..b0c0470c82 100644 --- a/modules/skparagraph/src/ParagraphImpl.cpp +++ b/modules/skparagraph/src/ParagraphImpl.cpp @@ -83,7 +83,7 @@ ParagraphImpl::ParagraphImpl(const SkString& text, , fOldWidth(0) , fOldHeight(0) , fOrigin(SkRect::MakeEmpty()) { - fICU = ::skia::SkUnicode::Make(); + fICU = ::SkUnicode::Make(); } ParagraphImpl::ParagraphImpl(const std::u16string& utf16text, diff --git a/modules/skshaper/include/SkShaper.h b/modules/skshaper/include/SkShaper.h index c0864af5ab..a8f724fb9c 100644 --- a/modules/skshaper/include/SkShaper.h +++ b/modules/skshaper/include/SkShaper.h @@ -39,6 +39,7 @@ class SkFont; class SkFontMgr; +class SkUnicode; /** Shapes text using HarfBuzz and places the shaped text into a @@ -133,9 +134,9 @@ public: static std::unique_ptr MakeBiDiRunIterator(const char* utf8, size_t utf8Bytes, uint8_t bidiLevel); - #ifdef SK_SHAPER_HARFBUZZ_AVAILABLE + #ifdef SK_UNICODE_AVAILABLE static std::unique_ptr - MakeIcuBiDiRunIterator(const char* utf8, size_t utf8Bytes, uint8_t bidiLevel); + MakeSkUnicodeBidiRunIterator(SkUnicode* unicode, const char* utf8, size_t utf8Bytes, uint8_t bidiLevel); #endif class TrivialBiDiRunIterator : public TrivialRunIterator { public: diff --git a/modules/skshaper/src/SkShaper.cpp b/modules/skshaper/src/SkShaper.cpp index bd2df519c2..185d349c4b 100644 --- a/modules/skshaper/src/SkShaper.cpp +++ b/modules/skshaper/src/SkShaper.cpp @@ -38,9 +38,13 @@ std::unique_ptr SkShaper::Make(sk_sp fontmgr) { std::unique_ptr SkShaper::MakeBiDiRunIterator(const char* utf8, size_t utf8Bytes, uint8_t bidiLevel) { -#ifdef SK_SHAPER_HARFBUZZ_AVAILABLE +#ifdef SK_UNICODE_AVAILABLE + auto unicode = SkUnicode::Make(); std::unique_ptr bidi = - SkShaper::MakeIcuBiDiRunIterator(utf8, utf8Bytes, bidiLevel); + SkShaper::MakeSkUnicodeBidiRunIterator(unicode.get(), + utf8, + utf8Bytes, + bidiLevel); if (bidi) { return bidi; } diff --git a/modules/skshaper/src/SkShaper_harfbuzz.cpp b/modules/skshaper/src/SkShaper_harfbuzz.cpp index 905747f975..fd36b9c8c9 100644 --- a/modules/skshaper/src/SkShaper_harfbuzz.cpp +++ b/modules/skshaper/src/SkShaper_harfbuzz.cpp @@ -26,6 +26,7 @@ #include "include/private/SkTemplates.h" #include "include/private/SkTo.h" #include "modules/skshaper/include/SkShaper.h" +#include "modules/skshaper/src/SkUnicode.h" #include "src/core/SkLRUCache.h" #include "src/core/SkSpan.h" #include "src/core/SkTDPQueue.h" @@ -34,7 +35,6 @@ #include #include #include -#include #include #include #include @@ -71,10 +71,11 @@ using HBBlob = resource; using HBFont = resource; using HBBuffer = resource; -using ICUBiDi = resource; using ICUBrk = resource; using ICUUText = resource; +using SkUnicodeBidi = std::unique_ptr; + hb_position_t skhb_position(SkScalar value) { // Treat HarfBuzz hb_position_t as 16.16 fixed-point. constexpr int kHbPosition1 = 1 << 16; @@ -343,25 +344,26 @@ static inline SkUnichar utf8_next(const char** ptr, const char* end) { return val < 0 ? 0xFFFD : val; } -class IcuBiDiRunIterator final : public SkShaper::BiDiRunIterator { +class SkUnicodeBidiRunIterator final : public SkShaper::BiDiRunIterator { public: - IcuBiDiRunIterator(const char* utf8, const char* end, ICUBiDi bidi) + SkUnicodeBidiRunIterator(const char* utf8, const char* end, SkUnicodeBidi bidi) : fBidi(std::move(bidi)) , fEndOfCurrentRun(utf8) , fBegin(utf8) , fEnd(end) , fUTF16LogicalPosition(0) - , fLevel(UBIDI_DEFAULT_LTR) + , fLevel(SkBidiIterator::kLTR) {} + void consume() override { - SkASSERT(fUTF16LogicalPosition < ubidi_getLength(fBidi.get())); - int32_t endPosition = ubidi_getLength(fBidi.get()); - fLevel = ubidi_getLevelAt(fBidi.get(), fUTF16LogicalPosition); + SkASSERT(fUTF16LogicalPosition < fBidi->getLength()); + int32_t endPosition = fBidi->getLength(); + fLevel = fBidi->getLevelAt(fUTF16LogicalPosition); SkUnichar u = utf8_next(&fEndOfCurrentRun, fEnd); fUTF16LogicalPosition += SkUTF::ToUTF16(u); - UBiDiLevel level; + SkBidiIterator::Level level; while (fUTF16LogicalPosition < endPosition) { - level = ubidi_getLevelAt(fBidi.get(), fUTF16LogicalPosition); + level = fBidi->getLevelAt(fUTF16LogicalPosition); if (level != fLevel) { break; } @@ -374,19 +376,18 @@ public: return fEndOfCurrentRun - fBegin; } bool atEnd() const override { - return fUTF16LogicalPosition == ubidi_getLength(fBidi.get()); + return fUTF16LogicalPosition == fBidi->getLength(); } - - UBiDiLevel currentLevel() const override { + SkBidiIterator::Level currentLevel() const override { return fLevel; } private: - ICUBiDi fBidi; + SkUnicodeBidi fBidi; char const * fEndOfCurrentRun; char const * const fBegin; char const * const fEnd; int32_t fUTF16LogicalPosition; - UBiDiLevel fLevel; + SkBidiIterator::Level fLevel; }; class HbIcuScriptRunIterator final : public SkShaper::ScriptRunIterator { @@ -509,7 +510,7 @@ struct ShapedGlyph { bool fUnsafeToBreak; }; struct ShapedRun { - ShapedRun(SkShaper::RunHandler::Range utf8Range, const SkFont& font, UBiDiLevel level, + ShapedRun(SkShaper::RunHandler::Range utf8Range, const SkFont& font, SkBidiIterator::Level level, std::unique_ptr glyphs, size_t numGlyphs, SkVector advance = {0, 0}) : fUtf8Range(utf8Range), fFont(font), fLevel(level) , fGlyphs(std::move(glyphs)), fNumGlyphs(numGlyphs), fAdvance(advance) @@ -517,7 +518,7 @@ struct ShapedRun { SkShaper::RunHandler::Range fUtf8Range; SkFont fFont; - UBiDiLevel fLevel; + SkBidiIterator::Level fLevel; std::unique_ptr fGlyphs; size_t fNumGlyphs; SkVector fAdvance; @@ -527,7 +528,7 @@ struct ShapedLine { SkVector fAdvance = { 0, 0 }; }; -constexpr bool is_LTR(UBiDiLevel level) { +constexpr bool is_LTR(SkBidiIterator::Level level) { return (level & 1) == 0; } @@ -565,12 +566,12 @@ void emit(const ShapedLine& line, SkShaper::RunHandler* handler) { handler->beginLine(); int numRuns = line.runs.size(); - SkAutoSTMalloc<4, UBiDiLevel> runLevels(numRuns); + SkAutoSTMalloc<4, SkBidiIterator::Level> runLevels(numRuns); for (int i = 0; i < numRuns; ++i) { runLevels[i] = line.runs[i].fLevel; } SkAutoSTMalloc<4, int32_t> logicalFromVisual(numRuns); - ubidi_reorderVisual(runLevels, numRuns, logicalFromVisual); + SkBidiIterator::ReorderVisual(runLevels, numRuns, logicalFromVisual); for (int i = 0; i < numRuns; ++i) { int logicalIndex = logicalFromVisual[i]; @@ -667,6 +668,7 @@ protected: const FontRunIterator&, const Feature*, size_t featuresSize) const; private: + std::unique_ptr fUnicode = SkUnicode::Make(); const sk_sp fFontMgr; HBBuffer fBuffer; hb_language_t fUndefinedLanguage; @@ -804,9 +806,12 @@ void ShaperHarfBuzz::shape(const char* utf8, size_t utf8Bytes, SkScalar width, RunHandler* handler) const { - UBiDiLevel defaultLevel = leftToRight ? UBIDI_DEFAULT_LTR : UBIDI_DEFAULT_RTL; + SkBidiIterator::Level defaultLevel = leftToRight ? SkBidiIterator::kLTR : SkBidiIterator::kRTL; + std::unique_ptr bidi(MakeSkUnicodeBidiRunIterator(fUnicode.get(), + utf8, + utf8Bytes, + defaultLevel)); - std::unique_ptr bidi(MakeIcuBiDiRunIterator(utf8, utf8Bytes, defaultLevel)); if (!bidi) { return; } @@ -1182,12 +1187,12 @@ void ShapeThenWrap::wrap(char const * const utf8, size_t utf8Bytes, } int numRuns = current.fRunIndex - previousBreak.fRunIndex + 1; - SkAutoSTMalloc<4, UBiDiLevel> runLevels(numRuns); + SkAutoSTMalloc<4, SkBidiIterator::Level> runLevels(numRuns); for (int i = 0; i < numRuns; ++i) { runLevels[i] = runs[previousBreak.fRunIndex + i].fLevel; } SkAutoSTMalloc<4, int32_t> logicalFromVisual(numRuns); - ubidi_reorderVisual(runLevels, numRuns, logicalFromVisual); + SkBidiIterator::ReorderVisual(runLevels, numRuns, logicalFromVisual); // step through the runs in reverse visual order and the glyphs in reverse logical order // until a visible glyph is found and force them to the end of the visual line. @@ -1437,7 +1442,7 @@ ShapedRun ShaperHarfBuzz::shape(char const * const utf8, } // namespace std::unique_ptr -SkShaper::MakeIcuBiDiRunIterator(const char* utf8, size_t utf8Bytes, uint8_t bidiLevel) { +SkShaper::MakeSkUnicodeBidiRunIterator(SkUnicode* unicode, const char* utf8, size_t utf8Bytes, uint8_t bidiLevel) { // ubidi only accepts utf16 (though internally it basically works on utf32 chars). // We want an ubidi_setPara(UBiDi*, UText*, UBiDiLevel, UBiDiLevel*, UErrorCode*); if (!SkTFitsIn(utf8Bytes)) { @@ -1445,35 +1450,23 @@ SkShaper::MakeIcuBiDiRunIterator(const char* utf8, size_t utf8Bytes, uint8_t bid return nullptr; } - UErrorCode status = U_ZERO_ERROR; - - // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR - int32_t utf16Units; - u_strFromUTF8(nullptr, 0, &utf16Units, utf8, utf8Bytes, &status); - status = U_ZERO_ERROR; - std::unique_ptr utf16(new UChar[utf16Units]); - u_strFromUTF8(utf16.get(), utf16Units, nullptr, utf8, utf8Bytes, &status); - if (U_FAILURE(status)) { - SkDEBUGF("Invalid utf8 input: %s", u_errorName(status)); + int32_t utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Bytes); + if (utf16Units < 0) { + SkDEBUGF("Invalid utf8 input\n"); return nullptr; } - ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status)); - if (U_FAILURE(status)) { - SkDEBUGF("Bidi error: %s", u_errorName(status)); - return nullptr; - } - SkASSERT(bidi); + std::unique_ptr utf16(new uint16_t[utf16Units]); + (void)SkUTF::UTF8ToUTF16(utf16.get(), utf16Units, utf8, utf8Bytes); - // The required lifetime of utf16 isn't well documented. - // It appears it isn't used after ubidi_setPara except through ubidi_getText. - ubidi_setPara(bidi.get(), utf16.get(), utf16Units, bidiLevel, nullptr, &status); - if (U_FAILURE(status)) { - SkDEBUGF("Bidi error: %s", u_errorName(status)); + auto bidiDir = (bidiLevel % 2 == 0) ? SkBidiIterator::kLTR : SkBidiIterator::kRTL; + SkUnicodeBidi bidi = unicode->makeBidiIterator(utf16.get(), utf16Units, bidiDir); + if (!bidi) { + SkDEBUGF("Bidi error\n"); return nullptr; } - return std::make_unique(utf8, utf8 + utf8Bytes, std::move(bidi)); + return std::make_unique(utf8, utf8 + utf8Bytes, std::move(bidi)); } std::unique_ptr diff --git a/modules/skshaper/src/SkUnicode.h b/modules/skshaper/src/SkUnicode.h index 2219ce3919..0b5b36395a 100644 --- a/modules/skshaper/src/SkUnicode.h +++ b/modules/skshaper/src/SkUnicode.h @@ -31,8 +31,6 @@ #endif #endif -namespace skia { - enum class UtfFormat { kUTF8, kUTF16 @@ -73,12 +71,40 @@ struct Range { Position end; }; +class SKUNICODE_API SkBidiIterator { +public: + typedef int32_t Position; + typedef uint8_t Level; + struct Region { + Region(Position start, Position end, Level level) + : start(start), end(end), level(level) { } + Position start; + Position end; + Level level; + }; + enum Direction { + kLTR, + kRTL, + }; + virtual ~SkBidiIterator() {} + virtual Position getLength() = 0; + virtual Level getLevelAt(Position) = 0; + static void ReorderVisual(const Level runLevels[], int levelsCount, int32_t logicalFromVisual[]); +}; + class SKUNICODE_API SkUnicode { public: typedef uint32_t ScriptID; typedef uint32_t CombiningClass; typedef uint32_t GeneralCategory; virtual ~SkUnicode() = default; + + // Iterators (used in SkShaper) + virtual std::unique_ptr makeBidiIterator + (const uint16_t text[], int count, SkBidiIterator::Direction) = 0; + virtual std::unique_ptr makeBidiIterator + (const char text[], int count, SkBidiIterator::Direction) = 0; + // High level methods (that we actually use somewhere=SkParagraph) virtual bool getBidiRegions (const char utf8[], int utf8Units, Direction dir, std::vector* results) = 0; @@ -96,6 +122,4 @@ class SKUNICODE_API SkUnicode { static std::unique_ptr Make(); }; -} // namespace skia - #endif // SkUnicode_DEFINED diff --git a/modules/skshaper/src/SkUnicode_icu.cpp b/modules/skshaper/src/SkUnicode_icu.cpp index 4e426c28d8..7f218b92ba 100644 --- a/modules/skshaper/src/SkUnicode_icu.cpp +++ b/modules/skshaper/src/SkUnicode_icu.cpp @@ -15,7 +15,7 @@ #include #include -using ICUBiDi = std::unique_ptr>; +using SkUnicodeBidi = std::unique_ptr>; using ICUUText = std::unique_ptr>; using ICUBreakIterator = std::unique_ptr>; @@ -25,7 +25,92 @@ static inline SkUnichar utf8_next(const char** ptr, const char* end) { return val < 0 ? 0xFFFD : val; } -namespace skia { +class SkBidiIterator_icu : public SkBidiIterator { + SkUnicodeBidi fBidi; +public: + explicit SkBidiIterator_icu(SkUnicodeBidi bidi) : fBidi(std::move(bidi)) {} + Position getLength() override { return ubidi_getLength(fBidi.get()); } + Level getLevelAt(Position pos) override { return ubidi_getLevelAt(fBidi.get(), pos); } + + static std::unique_ptr makeBidiIterator(const uint16_t utf16[], int utf16Units, Direction dir) { + UErrorCode status = U_ZERO_ERROR; + SkUnicodeBidi bidi(ubidi_openSized(utf16Units, 0, &status)); + if (U_FAILURE(status)) { + SkDEBUGF("Bidi error: %s", u_errorName(status)); + return nullptr; + } + SkASSERT(bidi); + uint8_t bidiLevel = (dir == SkBidiIterator::kLTR) ? UBIDI_LTR : UBIDI_RTL; + // The required lifetime of utf16 isn't well documented. + // It appears it isn't used after ubidi_setPara except through ubidi_getText. + ubidi_setPara(bidi.get(), (const UChar*)utf16, utf16Units, bidiLevel, nullptr, &status); + if (U_FAILURE(status)) { + SkDEBUGF("Bidi error: %s", u_errorName(status)); + return nullptr; + } + return std::unique_ptr(new SkBidiIterator_icu(std::move(bidi))); + } + + // ICU bidi iterator works with utf16 but clients (Flutter for instance) may work with utf8 + // This method allows the clients not to think about all these details + static std::unique_ptr makeBidiIterator(const char utf8[], int utf8Units, Direction dir) { + // Convert utf8 into utf16 since ubidi only accepts utf16 + if (!SkTFitsIn(utf8Units)) { + SkDEBUGF("Bidi error: text too long"); + return nullptr; + } + + // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR + int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units); + if (utf16Units < 0) { + SkDEBUGF("Bidi error: Invalid utf8 input"); + return nullptr; + } + std::unique_ptr utf16(new uint16_t[utf16Units]); + SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16.get(), utf16Units, utf8, utf8Units); + SkASSERT(dstLen == utf16Units); + + return makeBidiIterator(utf16.get(), utf16Units, dir); + } + + // This method returns the final results only: a list of bidi regions + // (this is all SkParagraph really needs; SkShaper however uses the iterator itself) + static std::vector getBidiRegions(const char utf8[], int utf8Units, Direction dir) { + + auto bidiIterator = makeBidiIterator(utf8, utf8Units, dir); + std::vector bidiRegions; + const char* start8 = utf8; + const char* end8 = utf8 + utf8Units; + SkBidiIterator::Level currentLevel = 0; + + Position pos8 = 0; + Position pos16 = 0; + Position end16 = bidiIterator->getLength(); + while (pos16 < end16) { + auto level = bidiIterator->getLevelAt(pos16); + if (pos16 == 0) { + currentLevel = level; + } else if (level != currentLevel) { + auto end = start8 - utf8; + bidiRegions.emplace_back(pos8, end, currentLevel); + currentLevel = level; + pos8 = end; + } + SkUnichar u = utf8_next(&start8, end8); + pos16 += SkUTF::ToUTF16(u); + } + auto end = start8 - utf8; + if (end != pos8) { + bidiRegions.emplace_back(pos8, end, currentLevel); + } + return bidiRegions; + } +}; + +void SkBidiIterator::ReorderVisual(const Level runLevels[], int levelsCount, + int32_t logicalFromVisual[]) { + ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual); +} class SkUnicode_icu : public SkUnicode { @@ -63,7 +148,7 @@ class SkUnicode_icu : public SkUnicode { // Create bidi iterator UErrorCode status = U_ZERO_ERROR; - ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status)); + SkUnicodeBidi bidi(ubidi_openSized(utf16Units, 0, &status)); if (U_FAILURE(status)) { SkDEBUGF("Bidi error: %s", u_errorName(status)); return false; @@ -193,6 +278,14 @@ class SkUnicode_icu : public SkUnicode { public: ~SkUnicode_icu() override { } + std::unique_ptr makeBidiIterator(const uint16_t text[], int count, + SkBidiIterator::Direction dir) override { + return SkBidiIterator_icu::makeBidiIterator(text, count, dir); + } + std::unique_ptr makeBidiIterator(const char text[], int count, + SkBidiIterator::Direction dir) override { + return SkBidiIterator_icu::makeBidiIterator(text, count, dir); + } bool getBidiRegions(const char utf8[], int utf8Units, Direction dir, std::vector* results) override { return extractBidi(utf8, utf8Units, dir, results); @@ -238,6 +331,3 @@ public: }; std::unique_ptr SkUnicode::Make() { return std::make_unique(); } - -} // namespace skia -