diff --git a/modules/skparagraph/src/OneLineShaper.cpp b/modules/skparagraph/src/OneLineShaper.cpp index 7f0760b90e..eaa4d0b63b 100644 --- a/modules/skparagraph/src/OneLineShaper.cpp +++ b/modules/skparagraph/src/OneLineShaper.cpp @@ -2,13 +2,23 @@ #include "modules/skparagraph/src/Iterators.h" #include "modules/skparagraph/src/OneLineShaper.h" -#include "modules/skparagraph/src/ParagraphUtil.h" +#include #include #include +#include "src/utils/SkUTF.h" namespace skia { namespace textlayout { +namespace { + +SkUnichar utf8_next(const char** ptr, const char* end) { + SkUnichar val = SkUTF::NextUTF8(ptr, end); + return val < 0 ? 0xFFFD : val; +} + +} + void OneLineShaper::commitRunBuffer(const RunInfo&) { fCurrentRun->commit(); @@ -303,8 +313,8 @@ void OneLineShaper::sortOutGlyphs(std::function&& sortOutUnres block.end = i; } else { const char* cluster = text.begin() + clusterIndex(i); - SkUnichar codepoint = nextUtf8Unit(&cluster, text.end()); - if (isControl(codepoint)) { + SkUnichar codepoint = utf8_next(&cluster, text.end()); + if (u_iscntrl(codepoint)) { // This codepoint does not have to be resolved; let's pretend it's resolved if (block.start == EMPTY_INDEX) { // Keep skipping resolved code points @@ -409,7 +419,7 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle, // We have the global cache for all already found typefaces for SkUnichar // but we still need to keep track of all SkUnichars used in this unresolved block SkTHashSet alreadyTried; - SkUnichar unicode = nextUtf8Unit(&ch, unresolvedText.end()); + SkUnichar unicode = utf8_next(&ch, unresolvedText.end()); while (true) { sk_sp typeface; @@ -447,7 +457,7 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle, // We can stop here or we can switch to another DIFFERENT codepoint while (ch != unresolvedText.end()) { - unicode = nextUtf8Unit(&ch, unresolvedText.end()); + unicode = utf8_next(&ch, unresolvedText.end()); auto found = alreadyTried.find(unicode); if (found == nullptr) { alreadyTried.add(unicode); @@ -462,6 +472,10 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle, bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) { + if (!fParagraph->getBidiRegions()) { + return false; + } + size_t bidiIndex = 0; SkScalar advanceX = 0; @@ -471,8 +485,8 @@ bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) { // Shape the text by bidi regions while (bidiIndex < fParagraph->fBidiRegions.size()) { BidiRegion& bidiRegion = fParagraph->fBidiRegions[bidiIndex]; - auto start = std::max(bidiRegion.start, placeholder.fTextBefore.start); - auto end = std::min(bidiRegion.end, placeholder.fTextBefore.end); + auto start = std::max(bidiRegion.text.start, placeholder.fTextBefore.start); + auto end = std::min(bidiRegion.text.end, placeholder.fTextBefore.end); // Set up the iterators (the style iterator points to a bigger region that it could TextRange textRange(start, end); @@ -480,11 +494,11 @@ bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) { SkSpan styleSpan(fParagraph->blocks(blockRange)); // Shape the text between placeholders - if (!shape(textRange, styleSpan, advanceX, start, bidiRegion.level)) { + if (!shape(textRange, styleSpan, advanceX, start, bidiRegion.direction)) { return false; } - if (end == bidiRegion.end) { + if (end == bidiRegion.text.end) { ++bidiIndex; } else /*if (end == placeholder.fTextBefore.end)*/ { break; diff --git a/modules/skparagraph/src/ParagraphCache.cpp b/modules/skparagraph/src/ParagraphCache.cpp index a96999b42e..2464acaee6 100644 --- a/modules/skparagraph/src/ParagraphCache.cpp +++ b/modules/skparagraph/src/ParagraphCache.cpp @@ -50,7 +50,7 @@ public: // ICU results SkTArray fCodeUnitProperties; std::vector fWords; - std::vector fBidiRegions; + SkTArray fBidiRegions; SkTArray fUTF8IndexForUTF16Index; SkTArray fUTF16IndexForUTF8Index; }; diff --git a/modules/skparagraph/src/ParagraphImpl.cpp b/modules/skparagraph/src/ParagraphImpl.cpp index 86027fcf8a..7389ef37d6 100644 --- a/modules/skparagraph/src/ParagraphImpl.cpp +++ b/modules/skparagraph/src/ParagraphImpl.cpp @@ -25,6 +25,12 @@ #endif #include +#include +#include +#include +#include +#include +#include #include #include @@ -34,6 +40,9 @@ namespace textlayout { namespace { +using ICUUText = std::unique_ptr>; +using ICUBiDi = std::unique_ptr>; + SkScalar littleRound(SkScalar a) { // This rounding is done to match Flutter tests. Must be removed.. auto val = std::fabs(a); @@ -45,6 +54,13 @@ SkScalar littleRound(SkScalar a) { return SkScalarFloorToScalar(a); } } + +/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */ +static inline SkUnichar utf8_next(const char** ptr, const char* end) { + SkUnichar val = SkUTF::NextUTF8(ptr, end); + return val < 0 ? 0xFFFD : val; +} + } TextRange operator*(const TextRange& a, const TextRange& b) { @@ -83,7 +99,6 @@ ParagraphImpl::ParagraphImpl(const SkString& text, , fOldWidth(0) , fOldHeight(0) , fOrigin(SkRect::MakeEmpty()) { - fICU = ::skia::SkUnicode::Make(); } ParagraphImpl::ParagraphImpl(const std::u16string& utf16text, @@ -130,7 +145,7 @@ void ParagraphImpl::layout(SkScalar rawWidth) { this->fCodeUnitProperties.reset(); this->fCodeUnitProperties.push_back_n(fText.size() + 1, CodeUnitFlags::kNoCodeUnitFlag); this->fWords.clear(); - this->fBidiRegions.clear(); + this->fBidiRegions.reset(); this->fUTF8IndexForUTF16Index.reset(); this->fUTF16IndexForUTF8Index.reset(); this->fRuns.reset(); @@ -229,6 +244,72 @@ void ParagraphImpl::resetContext() { fExceededMaxLines = false; } +class TextBreaker { +public: + TextBreaker() : fInitialized(false), fPos(-1) {} + + bool initialize(SkSpan text, UBreakIteratorType type) { + + UErrorCode status = U_ZERO_ERROR; + fIterator = nullptr; + fSize = text.size(); + UText sUtf8UText = UTEXT_INITIALIZER; + std::unique_ptr> utf8UText( + utext_openUTF8(&sUtf8UText, text.begin(), text.size(), &status)); + if (U_FAILURE(status)) { + SkDEBUGF("Could not create utf8UText: %s", u_errorName(status)); + return false; + } + fIterator.reset(ubrk_open(type, "en", nullptr, 0, &status)); + if (U_FAILURE(status)) { + SkDEBUGF("Could not create line break iterator: %s", u_errorName(status)); + SK_ABORT(""); + } + + ubrk_setUText(fIterator.get(), utf8UText.get(), &status); + if (U_FAILURE(status)) { + SkDEBUGF("Could not setText on break iterator: %s", u_errorName(status)); + return false; + } + + fInitialized = true; + fPos = 0; + return true; + } + + bool initialized() const { return fInitialized; } + + size_t first() { + fPos = ubrk_first(fIterator.get()); + return eof() ? fSize : fPos; + } + + size_t next() { + fPos = ubrk_next(fIterator.get()); + return eof() ? fSize : fPos; + } + + size_t preceding(size_t offset) { + auto pos = ubrk_preceding(fIterator.get(), offset); + return pos == UBRK_DONE ? 0 : pos; + } + + size_t following(size_t offset) { + auto pos = ubrk_following(fIterator.get(), offset); + return pos == UBRK_DONE ? fSize : pos; + } + + int32_t status() { return ubrk_getRuleStatus(fIterator.get()); } + + bool eof() { return fPos == UBRK_DONE; } + +private: + std::unique_ptr> fIterator; + bool fInitialized; + int32_t fPos; + size_t fSize; +}; + // shapeTextIntoEndlessLine is the thing that calls this method // (that contains all ICU dependencies except for words) bool ParagraphImpl::computeCodeUnitProperties() { @@ -239,41 +320,165 @@ bool ParagraphImpl::computeCodeUnitProperties() { } #endif - // Get bidi regions - Direction textDirection = fParagraphStyle.getTextDirection() == TextDirection::kLtr - ? Direction::kLTR - : Direction::kRTL; - if (!fICU->getBidiRegions(fText.c_str(), fText.size(), textDirection, &fBidiRegions)) { + { + const char* start = fText.c_str(); + const char* end = start + fText.size(); + const char* ch = start; + while (ch < end) { + auto index = ch - start; + auto unichar = utf8_next(&ch, end); + if (u_isWhitespace(unichar)) { + auto ending = ch - start; + for (auto k = index; k < ending; ++k) { + fCodeUnitProperties[k] |= CodeUnitFlags::kPartOfWhiteSpace; + } + } + } + } + { + TextBreaker breaker; + if (!breaker.initialize(this->text(), UBRK_LINE)) { + return false; + } + while (!breaker.eof()) { + size_t currentPos = breaker.next(); + fCodeUnitProperties[currentPos] |= + breaker.status() == UBRK_LINE_HARD ? CodeUnitFlags::kHardLineBreakBefore : CodeUnitFlags::kSoftLineBreakBefore; + } + } + { + TextBreaker breaker; + if (!breaker.initialize(this->text(), UBRK_CHARACTER)) { + return false; + } + + while (!breaker.eof()) { + auto currentPos = breaker.next(); + fCodeUnitProperties[currentPos] |= CodeUnitFlags::kGraphemeStart; + } + } + + return true; +} + +// getWordBoundary is the thing that calls this method lazily +bool ParagraphImpl::computeWords() { + + if (!fWords.empty()) { + return true; + } + + UErrorCode errorCode = U_ZERO_ERROR; + + auto iter = ubrk_open(UBRK_WORD, uloc_getDefault(), nullptr, 0, &errorCode); + if (U_FAILURE(errorCode)) { + SkDEBUGF("Could not create line break iterator: %s", u_errorName(errorCode)); return false; } - // Get white spaces - std::vector whitespaces; - if (!fICU->getWhitespaces(fText.c_str(), fText.size(), &whitespaces)) { + // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR + int32_t utf16Units; + u_strFromUTF8(nullptr, 0, &utf16Units, fText.c_str(), fText.size(), &errorCode); + errorCode = U_ZERO_ERROR; + std::unique_ptr utf16(new UChar[utf16Units]); + u_strFromUTF8(utf16.get(), utf16Units, nullptr, fText.c_str(), fText.size(), &errorCode); + if (U_FAILURE(errorCode)) { + SkDEBUGF("Invalid utf8 input: %s", u_errorName(errorCode)); return false; } - for (auto whitespace : whitespaces) { - fCodeUnitProperties[whitespace] |= CodeUnitFlags::kPartOfWhiteSpace; - } - // Get line breaks - std::vector lineBreaks; - if (!fICU->getLineBreaks(fText.c_str(), fText.size(), &lineBreaks)) { + UText sUtf16UText = UTEXT_INITIALIZER; + ICUUText utf8UText(utext_openUChars(&sUtf16UText, utf16.get(), utf16Units, &errorCode)); + if (U_FAILURE(errorCode)) { + SkDEBUGF("Could not create utf8UText: %s", u_errorName(errorCode)); return false; } - for (auto& lineBreak : lineBreaks) { - fCodeUnitProperties[lineBreak.pos] |= lineBreak.breakType == LineBreakType::kHardLineBreak - ? CodeUnitFlags::kHardLineBreakBefore - : CodeUnitFlags::kSoftLineBreakBefore; - } - // Get graphemes - std::vector graphemes; - if (!fICU->getGraphemes(fText.c_str(), fText.size(), &graphemes)) { + ubrk_setUText(iter, utf8UText.get(), &errorCode); + if (U_FAILURE(errorCode)) { + SkDEBUGF("Could not setText on break iterator: %s", u_errorName(errorCode)); return false; } - for (auto pos : graphemes) { - fCodeUnitProperties[pos] |= CodeUnitFlags::kGraphemeStart; + + int32_t pos = ubrk_first(iter); + while (pos != UBRK_DONE) { + fWords.emplace_back(pos); + pos = ubrk_next(iter); + } + + return true; +} + +bool ParagraphImpl::getBidiRegions() { + + if (!fBidiRegions.empty()) { + return true; + } + + // ubidi only accepts utf16 (though internally it basically works on utf32 chars). + // We want an ubidi_setPara(UBiDi*, UText*, UBiDiLevel, UBiDiLevel*, UErrorCode*); + size_t utf8Bytes = fText.size(); + const char* utf8 = fText.c_str(); + uint8_t bidiLevel = fParagraphStyle.getTextDirection() == TextDirection::kLtr + ? UBIDI_LTR + : UBIDI_RTL; + if (!SkTFitsIn(utf8Bytes)) { + SkDEBUGF("Bidi error: text too long"); + return false; + } + + // Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR + UErrorCode status = U_ZERO_ERROR; + int32_t utf16Units; + u_strFromUTF8(nullptr, 0, &utf16Units, utf8, utf8Bytes, &status); + status = U_ZERO_ERROR; + std::unique_ptr utf16(new UChar[utf16Units]); + u_strFromUTF8(utf16.get(), utf16Units, nullptr, utf8, utf8Bytes, &status); + if (U_FAILURE(status)) { + SkDEBUGF("Invalid utf8 input: %s", u_errorName(status)); + return false; + } + + ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status)); + if (U_FAILURE(status)) { + SkDEBUGF("Bidi error: %s", u_errorName(status)); + return false; + } + SkASSERT(bidi); + + // The required lifetime of utf16 isn't well documented. + // It appears it isn't used after ubidi_setPara except through ubidi_getText. + ubidi_setPara(bidi.get(), utf16.get(), utf16Units, bidiLevel, nullptr, &status); + if (U_FAILURE(status)) { + SkDEBUGF("Bidi error: %s", u_errorName(status)); + return false; + } + + SkTArray bidiRegions; + const char* start8 = utf8; + const char* end8 = utf8 + utf8Bytes; + TextRange textRange(0, 0); + UBiDiLevel currentLevel = 0; + + int32_t pos16 = 0; + int32_t end16 = ubidi_getLength(bidi.get()); + while (pos16 < end16) { + auto level = ubidi_getLevelAt(bidi.get(), pos16); + if (pos16 == 0) { + currentLevel = level; + } else if (level != currentLevel) { + textRange.end = start8 - utf8; + fBidiRegions.emplace_back(textRange.start, textRange.end, currentLevel); + currentLevel = level; + textRange = TextRange(textRange.end, textRange.end); + } + SkUnichar u = utf8_next(&start8, end8); + pos16 += SkUTF::ToUTF16(u); + } + + textRange.end = start8 - utf8; + if (!textRange.empty()) { + fBidiRegions.emplace_back(textRange.start, textRange.end, currentLevel); } return true; @@ -678,23 +883,21 @@ PositionWithAffinity ParagraphImpl::getGlyphPositionAtCoordinate(SkScalar dx, Sk // By "glyph" they mean a character index - indicated by Minikin's code SkRange ParagraphImpl::getWordBoundary(unsigned offset) { - if (fWords.empty()) { - if (!fICU->getWords(fText.c_str(), fText.size(), &fWords)) { - return {0, 0 }; - } + if (!computeWords()) { + return {0, 0 }; } int32_t start = 0; int32_t end = 0; for (size_t i = 0; i < fWords.size(); ++i) { - auto word = fWords[i]; - if (word <= offset) { - start = word; - end = word; - } else if (word > offset) { - end = word; - break; - } + auto word = fWords[i]; + if (word <= offset) { + start = word; + end = word; + } else if (word > offset) { + end = word; + break; + } } //SkDebugf("getWordBoundary(%d): %d - %d\n", offset, start, end); @@ -777,7 +980,7 @@ void ParagraphImpl::setState(InternalState state) { fCodeUnitProperties.reset(); fCodeUnitProperties.push_back_n(fText.size() + 1, kNoCodeUnitFlag); fWords.clear(); - fBidiRegions.clear(); + fBidiRegions.reset(); fUTF8IndexForUTF16Index.reset(); fUTF16IndexForUTF8Index.reset(); [[fallthrough]]; diff --git a/modules/skparagraph/src/ParagraphImpl.h b/modules/skparagraph/src/ParagraphImpl.h index c43233c9a4..bda3d1fed3 100644 --- a/modules/skparagraph/src/ParagraphImpl.h +++ b/modules/skparagraph/src/ParagraphImpl.h @@ -23,9 +23,9 @@ #include "modules/skparagraph/include/TextShadow.h" #include "modules/skparagraph/include/TextStyle.h" #include "modules/skparagraph/src/Run.h" -#include "modules/skshaper/src/SkUnicode.h" #include "src/core/SkSpan.h" +#include #include #include #include @@ -83,14 +83,14 @@ struct ResolvedFontDescriptor { SkFont fFont; TextIndex fTextStart; }; -/* + struct BidiRegion { BidiRegion(size_t start, size_t end, uint8_t dir) : text(start, end), direction(dir) { } TextRange text; uint8_t direction; }; -*/ + class ParagraphImpl final : public Paragraph { public: @@ -186,6 +186,8 @@ public: void resolveStrut(); bool computeCodeUnitProperties(); + bool computeWords(); + bool getBidiRegions(); void buildClusterTable(); void spaceGlyphs(); @@ -217,8 +219,6 @@ public: bool codeUnitHasProperty(size_t index, CodeUnitFlags property) const { return (fCodeUnitProperties[index] & property) == property; } - SkUnicode* getICU() { return fICU.get(); } - private: friend class ParagraphBuilder; friend class ParagraphCacheKey; @@ -250,7 +250,7 @@ private: SkTArray fCodeUnitProperties; SkTArray fClustersIndexFromCodeUnit; std::vector fWords; - std::vector fBidiRegions; + SkTArray fBidiRegions; // These two arrays are used in measuring methods (getRectsForRange, getGlyphPositionAtCoordinate) // They are filled lazily whenever they need and cached SkTArray fUTF8IndexForUTF16Index; @@ -269,8 +269,6 @@ private: SkScalar fOldHeight; SkScalar fMaxWidthWithTrailingSpaces; SkRect fOrigin; - - std::unique_ptr fICU; }; } // namespace textlayout } // namespace skia diff --git a/modules/skparagraph/src/ParagraphUtil.cpp b/modules/skparagraph/src/ParagraphUtil.cpp index 9a32ddef78..d78ad65687 100644 --- a/modules/skparagraph/src/ParagraphUtil.cpp +++ b/modules/skparagraph/src/ParagraphUtil.cpp @@ -4,10 +4,8 @@ #include "include/core/SkTypes.h" #include "include/private/SkTo.h" #include "modules/skparagraph/src/ParagraphUtil.h" -#include "src/utils/SkUTF.h" #include -#include #include #include #include @@ -32,14 +30,5 @@ SkString SkStringFromU16String(const std::u16string& utf16text) { return dst; } -SkUnichar nextUtf8Unit(const char** ptr, const char* end) { - SkUnichar val = SkUTF::NextUTF8(ptr, end); - return val < 0 ? 0xFFFD : val; -} - -bool isControl(SkUnichar utf8) { - return u_iscntrl(utf8); -} - } } diff --git a/modules/skparagraph/src/ParagraphUtil.h b/modules/skparagraph/src/ParagraphUtil.h index 1374f2dd3b..a32025ab80 100644 --- a/modules/skparagraph/src/ParagraphUtil.h +++ b/modules/skparagraph/src/ParagraphUtil.h @@ -8,8 +8,6 @@ namespace skia { namespace textlayout { SkString SkStringFromU16String(const std::u16string& utf16text); -SkUnichar nextUtf8Unit(const char** ptr, const char* end); -bool isControl(SkUnichar utf8); } } diff --git a/modules/skparagraph/src/TextLine.cpp b/modules/skparagraph/src/TextLine.cpp index ef7ab3be32..560dd12086 100644 --- a/modules/skparagraph/src/TextLine.cpp +++ b/modules/skparagraph/src/TextLine.cpp @@ -21,6 +21,7 @@ #include "modules/skshaper/include/SkShaper.h" #include "src/core/SkSpan.h" +#include #include #include #include @@ -130,20 +131,21 @@ TextLine::TextLine(ParagraphImpl* master, // This is just chosen to catch the common/fast cases. Feel free to tweak. constexpr int kPreallocCount = 4; - SkAutoSTArray runLevels(numRuns); + + SkAutoSTArray runLevels(numRuns); + size_t runLevelsIndex = 0; for (auto runIndex = start.runIndex(); runIndex <= end.runIndex(); ++runIndex) { auto& run = fMaster->run(runIndex); runLevels[runLevelsIndex++] = run.fBidiLevel; - fMaxRunMetrics.add( - InternalLineMetrics(run.fFontMetrics.fAscent, run.fFontMetrics.fDescent, run.fFontMetrics.fLeading)); + fMaxRunMetrics.add(InternalLineMetrics(run.fFontMetrics.fAscent, run.fFontMetrics.fDescent, + run.fFontMetrics.fLeading)); } SkASSERT(runLevelsIndex == numRuns); SkAutoSTArray logicalOrder(numRuns); - // TODO: hide all these logic in SkUnicode? - fMaster->getICU()->reorderVisual(runLevels.data(), numRuns, logicalOrder.data()); + ubidi_reorderVisual(runLevels.data(), SkToU32(numRuns), logicalOrder.data()); auto firstRunIndex = start.runIndex(); for (auto index : logicalOrder) { fRunsInVisualOrder.push_back(firstRunIndex + index); diff --git a/modules/skparagraph/tests/SkParagraphTest.cpp b/modules/skparagraph/tests/SkParagraphTest.cpp index cae8b0e8d2..e4f821d3ac 100644 --- a/modules/skparagraph/tests/SkParagraphTest.cpp +++ b/modules/skparagraph/tests/SkParagraphTest.cpp @@ -4579,8 +4579,7 @@ DEF_TEST(SkParagraph_WhitespacesInMultipleFonts, reporter) { } } -// Disable until I sort out fonts -DEF_TEST_DISABLED(SkParagraph_JSON1, reporter) { +DEF_TEST(SkParagraph_JSON1, reporter) { sk_sp fontCollection = sk_make_sp(); if (!fontCollection->fontsFound()) return; const char* text = "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ"; @@ -4618,8 +4617,7 @@ DEF_TEST_DISABLED(SkParagraph_JSON1, reporter) { REPORTER_ASSERT(reporter, cluster <= 2); } -// Disable until I sort out fonts -DEF_TEST_DISABLED(SkParagraph_JSON2, reporter) { +DEF_TEST(SkParagraph_JSON2, reporter) { sk_sp fontCollection = sk_make_sp(); if (!fontCollection->fontsFound()) return; const char* text = "pใ€ q"; diff --git a/modules/skshaper/BUILD.gn b/modules/skshaper/BUILD.gn index 5acd98151b..e595d85949 100644 --- a/modules/skshaper/BUILD.gn +++ b/modules/skshaper/BUILD.gn @@ -21,7 +21,6 @@ if (skia_enable_skshaper) { } if (skia_use_icu && skia_use_harfbuzz) { defines += [ "SK_SHAPER_HARFBUZZ_AVAILABLE" ] - defines += [ "SK_UNICODE_AVAILABLE" ] } } @@ -31,16 +30,12 @@ if (skia_enable_skshaper) { public_configs = [ ":public_config" ] public = skia_shaper_public deps = [ "../..:skia" ] - defines = [ - "SKSHAPER_IMPLEMENTATION=1", - "SKUNICODE_IMPLEMENTATION=1", - ] + defines = [ "SKSHAPER_IMPLEMENTATION=1" ] sources = skia_shaper_primitive_sources if (skia_use_fonthost_mac) { sources += skia_shaper_coretext_sources } if (skia_use_icu && skia_use_harfbuzz) { - sources += skia_shaper_icu_sources sources += skia_shaper_harfbuzz_sources deps += [ "//third_party/harfbuzz", diff --git a/modules/skshaper/skshaper.gni b/modules/skshaper/skshaper.gni index d126464ea3..425cf3f4b9 100644 --- a/modules/skshaper/skshaper.gni +++ b/modules/skshaper/skshaper.gni @@ -13,6 +13,5 @@ skia_shaper_primitive_sources = [ "$_src/SkShaper.cpp", "$_src/SkShaper_primitive.cpp", ] -skia_shaper_icu_sources = [ "$_src/SkUnicode_icu.cpp" ] skia_shaper_harfbuzz_sources = [ "$_src/SkShaper_harfbuzz.cpp" ] skia_shaper_coretext_sources = [ "$_src/SkShaper_coretext.cpp" ] diff --git a/modules/skshaper/src/SkShaper.cpp b/modules/skshaper/src/SkShaper.cpp index bd2df519c2..5ab3a0465a 100644 --- a/modules/skshaper/src/SkShaper.cpp +++ b/modules/skshaper/src/SkShaper.cpp @@ -13,10 +13,6 @@ #include "include/core/SkTypeface.h" #include "include/private/SkTFitsIn.h" #include "modules/skshaper/include/SkShaper.h" - -#ifdef SK_UNICODE_AVAILABLE -#include "modules/skshaper/src/SkUnicode.h" -#endif #include "src/core/SkTextBlobPriv.h" #include "src/utils/SkUTF.h" diff --git a/modules/skshaper/src/SkUnicode.h b/modules/skshaper/src/SkUnicode.h deleted file mode 100644 index 0d0006eda9..0000000000 --- a/modules/skshaper/src/SkUnicode.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright 2020 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ -#ifndef SkUnicode_DEFINED -#define SkUnicode_DEFINED - -#include "include/core/SkTypes.h" -#include "src/core/SkSpan.h" -#include - -#if !defined(SKUNICODE_IMPLEMENTATION) - #define SKUNICODE_IMPLEMENTATION 0 -#endif - -#if !defined(SKUNICODE_API) - #if defined(SKSHAPER_DLL) - #if defined(_MSC_VER) - #if SKUNICODE_IMPLEMENTATION - #define SKUNICODE_API __declspec(dllexport) - #else - #define SKUNICODE_API __declspec(dllimport) - #endif - #else - #define SKUNICODE_API __attribute__((visibility("default"))) - #endif - #else - #define SKUNICODE_API - #endif -#endif - -namespace skia { - -enum class UtfFormat { - kUTF8, - kUTF16 -}; -// Bidi -typedef size_t Position; -typedef uint8_t BidiLevel; -enum class Direction { - kLTR, - kRTL, -}; -struct BidiRegion { - BidiRegion(Position start, Position end, BidiLevel level) - : start(start), end(end), level(level) { } - Position start; - Position end; - BidiLevel level; -}; -// LineBreaks -enum class LineBreakType { - kSoftLineBreak, - kHardLineBreak -}; -struct LineBreakBefore { - LineBreakBefore(Position pos, LineBreakType breakType) - : pos(pos), breakType(breakType) { } - Position pos; - LineBreakType breakType; -}; -// Other breaks -enum class UBreakType { - kWords, - kGraphemes, - kLines -}; -struct Range { - Position start; - Position end; -}; - -class SKUNICODE_API SkUnicode { - public: - typedef uint32_t ScriptID; - typedef uint32_t CombiningClass; - typedef uint32_t GeneralCategory; - virtual ~SkUnicode() = default; - // High level methods (that we actually use somewhere=SkParagraph) - virtual bool getBidiRegions - (const char utf8[], int utf8Units, Direction dir, std::vector* results) = 0; - virtual bool getLineBreaks - (const char utf8[], int utf8Units, std::vector* results) = 0; - virtual bool getWords - (const char utf8[], int utf8Units, std::vector* results) = 0; - virtual bool getGraphemes - (const char utf8[], int utf8Units, std::vector* results) = 0; - virtual bool getWhitespaces - (const char utf8[], int utf8Units, std::vector* results) = 0; - - virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0; - - static std::unique_ptr Make(); -}; - -} - -#endif // SkUnicode_DEFINED diff --git a/modules/skshaper/src/SkUnicode_icu.cpp b/modules/skshaper/src/SkUnicode_icu.cpp deleted file mode 100644 index 8900b4ea5a..0000000000 --- a/modules/skshaper/src/SkUnicode_icu.cpp +++ /dev/null @@ -1,243 +0,0 @@ -/* -* Copyright 2020 Google Inc. -* -* Use of this source code is governed by a BSD-style license that can be -* found in the LICENSE file. -*/ -#include "include/private/SkTFitsIn.h" -#include "include/private/SkTemplates.h" -#include "modules/skshaper/src/SkUnicode.h" -#include "src/utils/SkUTF.h" -#include -#include -#include -#include -#include -#include - -using ICUBiDi = std::unique_ptr>; -using ICUUText = std::unique_ptr>; -using ICUBreakIterator = std::unique_ptr>; - -/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */ -static inline SkUnichar utf8_next(const char** ptr, const char* end) { - SkUnichar val = SkUTF::NextUTF8(ptr, end); - return val < 0 ? 0xFFFD : val; -} - -namespace skia { - -class SkUnicode_icu : public SkUnicode { - - static UBreakIteratorType convertType(UBreakType type) { - switch (type) { - case UBreakType::kLines: return UBRK_LINE; - case UBreakType::kGraphemes: return UBRK_CHARACTER; - case UBreakType::kWords: return UBRK_WORD; - default: - SkDEBUGF("Convert error: wrong break type"); - return UBRK_CHARACTER; - } - } - - static int convertUtf8ToUtf16(const char* utf8, size_t utf8Units, std::unique_ptr* utf16) { - int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units); - if (utf16Units < 0) { - SkDEBUGF("Convert error: Invalid utf8 input"); - return utf16Units; - } - *utf16 = std::unique_ptr(new uint16_t[utf16Units]); - SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16->get(), utf16Units, utf8, utf8Units); - SkASSERT(dstLen == utf16Units); - return utf16Units; - } - - static bool extractBidi(const char utf8[], int utf8Units, Direction dir, std::vector* bidiRegions) { - - // Convert to UTF16 since for now bidi iterator only operates on utf16 - std::unique_ptr utf16; - auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16); - if (utf16Units < 0) { - return false; - } - - // Create bidi iterator - UErrorCode status = U_ZERO_ERROR; - ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status)); - if (U_FAILURE(status)) { - SkDEBUGF("Bidi error: %s", u_errorName(status)); - return false; - } - SkASSERT(bidi); - uint8_t bidiLevel = (dir == Direction::kLTR) ? UBIDI_LTR : UBIDI_RTL; - // The required lifetime of utf16 isn't well documented. - // It appears it isn't used after ubidi_setPara except through ubidi_getText. - ubidi_setPara(bidi.get(), (const UChar*)utf16.get(), utf16Units, bidiLevel, nullptr, &status); - if (U_FAILURE(status)) { - SkDEBUGF("Bidi error: %s", u_errorName(status)); - return false; - } - - // Iterate through bidi regions and the result positions into utf8 - const char* start8 = utf8; - const char* end8 = utf8 + utf8Units; - BidiLevel currentLevel = 0; - - Position pos8 = 0; - Position pos16 = 0; - Position end16 = ubidi_getLength(bidi.get()); - while (pos16 < end16) { - auto level = ubidi_getLevelAt(bidi.get(), pos16); - if (pos16 == 0) { - currentLevel = level; - } else if (level != currentLevel) { - Position end = start8 - utf8; - bidiRegions->emplace_back(pos8, end, currentLevel); - currentLevel = level; - pos8 = end; - } - SkUnichar u = utf8_next(&start8, end8); - pos16 += SkUTF::ToUTF16(u); - } - Position end = start8 - utf8; - if (end != pos8) { - bidiRegions->emplace_back(pos8, end, currentLevel); - } - return true; - } - - static bool extractWords(uint16_t utf16[], int utf16Units, std::vector* words) { - - UErrorCode status = U_ZERO_ERROR; - - UBreakIteratorType breakType = convertType(UBreakType::kWords); - ICUBreakIterator iterator(ubrk_open(breakType, uloc_getDefault(), nullptr, 0, &status)); - if (U_FAILURE(status)) { - SkDEBUGF("Break error: %s", u_errorName(status)); - return false; - } - SkASSERT(iterator); - - UText sUtf16UText = UTEXT_INITIALIZER; - ICUUText utf16UText(utext_openUChars(&sUtf16UText, (UChar*)utf16, utf16Units, &status)); - if (U_FAILURE(status)) { - SkDEBUGF("Break error: %s", u_errorName(status)); - return false; - } - - ubrk_setUText(iterator.get(), utf16UText.get(), &status); - if (U_FAILURE(status)) { - SkDEBUGF("Break error: %s", u_errorName(status)); - return false; - } - - // Get the words - int32_t pos = ubrk_first(iterator.get()); - while (pos != UBRK_DONE) { - words->emplace_back(pos); - pos = ubrk_next(iterator.get()); - } - - return true; - } - - static bool extractPositions(const char utf8[], int utf8Units, UBreakType type, std::function add) { - - UErrorCode status = U_ZERO_ERROR; - UText sUtf8UText = UTEXT_INITIALIZER; - ICUUText text(utext_openUTF8(&sUtf8UText, &utf8[0], utf8Units, &status)); - - if (U_FAILURE(status)) { - SkDEBUGF("Break error: %s", u_errorName(status)); - return false; - } - SkASSERT(text); - - ICUBreakIterator iterator(ubrk_open(convertType(type), uloc_getDefault(), nullptr, 0, &status)); - if (U_FAILURE(status)) { - SkDEBUGF("Break error: %s", u_errorName(status)); - } - - ubrk_setUText(iterator.get(), text.get(), &status); - if (U_FAILURE(status)) { - SkDEBUGF("Break error: %s", u_errorName(status)); - return false; - } - - auto iter = iterator.get(); - int32_t pos = ubrk_first(iter); - while (pos != UBRK_DONE) { - add(pos, ubrk_getRuleStatus(iter)); - pos = ubrk_next(iter); - } - return true; - } - - static bool extractWhitespaces(const char utf8[], int utf8Units, std::vector* whitespaces) { - - const char* start = utf8; - const char* end = utf8 + utf8Units; - const char* ch = start; - while (ch < end) { - auto index = ch - start; - auto unichar = utf8_next(&ch, end); - if (u_isWhitespace(unichar)) { - auto ending = ch - start; - for (auto k = index; k < ending; ++k) { - whitespaces->emplace_back(k); - } - } - } - return true; - } - -public: - ~SkUnicode_icu() override { } - - bool getBidiRegions(const char utf8[], int utf8Units, Direction dir, std::vector* results) override { - return extractBidi(utf8, utf8Units, dir, results); - } - - bool getLineBreaks(const char utf8[], int utf8Units, std::vector* results) override { - - return extractPositions(utf8, utf8Units, UBreakType::kLines, - [results](int pos, int status) { - results->emplace_back(pos,status == UBRK_LINE_HARD - ? LineBreakType::kHardLineBreak - : LineBreakType::kSoftLineBreak); - }); - } - - bool getWords(const char utf8[], int utf8Units, std::vector* results) override { - - // Convert to UTF16 since we want the results in utf16 - std::unique_ptr utf16; - auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16); - if (utf16Units < 0) { - return false; - } - - return extractWords(utf16.get(), utf16Units, results); - } - - bool getGraphemes(const char utf8[], int utf8Units, std::vector* results) override { - - return extractPositions(utf8, utf8Units, UBreakType::kGraphemes, - [results](int pos, int status) { results->emplace_back(pos); - }); - } - - bool getWhitespaces(const char utf8[], int utf8Units, std::vector* results) override { - - return extractWhitespaces(utf8, utf8Units, results); - } - - void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override { - ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual); - } -}; - -std::unique_ptr SkUnicode::Make() { return std::make_unique(); } - -} - diff --git a/src/utils/SkUTF.cpp b/src/utils/SkUTF.cpp index ff174625a4..8a9d5bd1f7 100644 --- a/src/utils/SkUTF.cpp +++ b/src/utils/SkUTF.cpp @@ -251,36 +251,3 @@ size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) { return 1 + extra; } -int SkUTF::UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength) { - if (!dst) { - dstCapacity = 0; - } - - int dstLength = 0; - uint16_t* endDst = dst + dstCapacity; - const char* endSrc = src + srcByteLength; - while (src < endSrc) { - SkUnichar uni = NextUTF8(&src, endSrc); - if (uni < 0) { - return -1; - } - - uint16_t utf16[2]; - size_t count = ToUTF16(uni, utf16); - if (count == 0) { - return -1; - } - dstLength += count; - - if (dst) { - uint16_t* elems = utf16; - while (dst < endDst && count > 0) { - *dst++ = *elems++; - count -= 1; - } - } - } - return dstLength; -} - - diff --git a/src/utils/SkUTF.h b/src/utils/SkUTF.h index eb1ee9159f..676ce4a76f 100644 --- a/src/utils/SkUTF.h +++ b/src/utils/SkUTF.h @@ -64,12 +64,6 @@ SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr */ SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr); -/** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence. - * If dst is not null, it is filled with the corresponding values up to its capacity. - * If there is an error, -1 is returned and the dst[] buffer is undefined. - */ -SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength); - } // namespace SkUTF #endif // SkUTF_DEFINED