ICU API: only in SkParagraph, simplified (relanding reverted).

Reverted commit: https://skia-review.googlesource.com/c/skia/+/296128/

Change-Id: Iaf793bff94a6060579c7d6176d477e598c047be6
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303261
Reviewed-by: Mike Reed <reed@google.com>
Commit-Queue: Julia Lavrova <jlavrova@google.com>
This commit is contained in:
Julia Lavrova 2020-06-12 15:34:45 -04:00 committed by Skia Commit-Bot
parent 9d960f1e0f
commit 7479eda3b6
14 changed files with 455 additions and 279 deletions

View File

@ -2,23 +2,13 @@
#include "modules/skparagraph/src/Iterators.h"
#include "modules/skparagraph/src/OneLineShaper.h"
#include <unicode/uchar.h>
#include "modules/skparagraph/src/ParagraphUtil.h"
#include <algorithm>
#include <unordered_set>
#include "src/utils/SkUTF.h"
namespace skia {
namespace textlayout {
namespace {
SkUnichar utf8_next(const char** ptr, const char* end) {
SkUnichar val = SkUTF::NextUTF8(ptr, end);
return val < 0 ? 0xFFFD : val;
}
}
void OneLineShaper::commitRunBuffer(const RunInfo&) {
fCurrentRun->commit();
@ -313,8 +303,8 @@ void OneLineShaper::sortOutGlyphs(std::function<void(GlyphRange)>&& sortOutUnres
block.end = i;
} else {
const char* cluster = text.begin() + clusterIndex(i);
SkUnichar codepoint = utf8_next(&cluster, text.end());
if (u_iscntrl(codepoint)) {
SkUnichar codepoint = nextUtf8Unit(&cluster, text.end());
if (isControl(codepoint)) {
// This codepoint does not have to be resolved; let's pretend it's resolved
if (block.start == EMPTY_INDEX) {
// Keep skipping resolved code points
@ -419,7 +409,7 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,
// We have the global cache for all already found typefaces for SkUnichar
// but we still need to keep track of all SkUnichars used in this unresolved block
SkTHashSet<SkUnichar> alreadyTried;
SkUnichar unicode = utf8_next(&ch, unresolvedText.end());
SkUnichar unicode = nextUtf8Unit(&ch, unresolvedText.end());
while (true) {
sk_sp<SkTypeface> typeface;
@ -457,7 +447,7 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,
// We can stop here or we can switch to another DIFFERENT codepoint
while (ch != unresolvedText.end()) {
unicode = utf8_next(&ch, unresolvedText.end());
unicode = nextUtf8Unit(&ch, unresolvedText.end());
auto found = alreadyTried.find(unicode);
if (found == nullptr) {
alreadyTried.add(unicode);
@ -472,10 +462,6 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,
bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {
if (!fParagraph->getBidiRegions()) {
return false;
}
size_t bidiIndex = 0;
SkScalar advanceX = 0;
@ -485,8 +471,8 @@ bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {
// Shape the text by bidi regions
while (bidiIndex < fParagraph->fBidiRegions.size()) {
BidiRegion& bidiRegion = fParagraph->fBidiRegions[bidiIndex];
auto start = std::max(bidiRegion.text.start, placeholder.fTextBefore.start);
auto end = std::min(bidiRegion.text.end, placeholder.fTextBefore.end);
auto start = std::max(bidiRegion.start, placeholder.fTextBefore.start);
auto end = std::min(bidiRegion.end, placeholder.fTextBefore.end);
// Set up the iterators (the style iterator points to a bigger region that it could
TextRange textRange(start, end);
@ -494,11 +480,11 @@ bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {
SkSpan<Block> styleSpan(fParagraph->blocks(blockRange));
// Shape the text between placeholders
if (!shape(textRange, styleSpan, advanceX, start, bidiRegion.direction)) {
if (!shape(textRange, styleSpan, advanceX, start, bidiRegion.level)) {
return false;
}
if (end == bidiRegion.text.end) {
if (end == bidiRegion.end) {
++bidiIndex;
} else /*if (end == placeholder.fTextBefore.end)*/ {
break;

View File

@ -50,7 +50,7 @@ public:
// ICU results
SkTArray<CodeUnitFlags> fCodeUnitProperties;
std::vector<size_t> fWords;
SkTArray<BidiRegion> fBidiRegions;
std::vector<BidiRegion> fBidiRegions;
SkTArray<TextIndex, true> fUTF8IndexForUTF16Index;
SkTArray<size_t, true> fUTF16IndexForUTF8Index;
};

View File

@ -25,12 +25,6 @@
#endif
#include <math.h>
#include <unicode/ubidi.h>
#include <unicode/uloc.h>
#include <unicode/umachine.h>
#include <unicode/ustring.h>
#include <unicode/utext.h>
#include <unicode/utypes.h>
#include <algorithm>
#include <utility>
@ -40,9 +34,6 @@ namespace textlayout {
namespace {
using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>>;
using ICUBiDi = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close), ubidi_close>>;
SkScalar littleRound(SkScalar a) {
// This rounding is done to match Flutter tests. Must be removed..
auto val = std::fabs(a);
@ -54,13 +45,6 @@ SkScalar littleRound(SkScalar a) {
return SkScalarFloorToScalar(a);
}
}
/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
static inline SkUnichar utf8_next(const char** ptr, const char* end) {
SkUnichar val = SkUTF::NextUTF8(ptr, end);
return val < 0 ? 0xFFFD : val;
}
}
TextRange operator*(const TextRange& a, const TextRange& b) {
@ -99,6 +83,7 @@ ParagraphImpl::ParagraphImpl(const SkString& text,
, fOldWidth(0)
, fOldHeight(0)
, fOrigin(SkRect::MakeEmpty()) {
fICU = SkUnicode_Make();
}
ParagraphImpl::ParagraphImpl(const std::u16string& utf16text,
@ -145,7 +130,7 @@ void ParagraphImpl::layout(SkScalar rawWidth) {
this->fCodeUnitProperties.reset();
this->fCodeUnitProperties.push_back_n(fText.size() + 1, CodeUnitFlags::kNoCodeUnitFlag);
this->fWords.clear();
this->fBidiRegions.reset();
this->fBidiRegions.clear();
this->fUTF8IndexForUTF16Index.reset();
this->fUTF16IndexForUTF8Index.reset();
this->fRuns.reset();
@ -244,72 +229,6 @@ void ParagraphImpl::resetContext() {
fExceededMaxLines = false;
}
class TextBreaker {
public:
TextBreaker() : fInitialized(false), fPos(-1) {}
bool initialize(SkSpan<const char> text, UBreakIteratorType type) {
UErrorCode status = U_ZERO_ERROR;
fIterator = nullptr;
fSize = text.size();
UText sUtf8UText = UTEXT_INITIALIZER;
std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>> utf8UText(
utext_openUTF8(&sUtf8UText, text.begin(), text.size(), &status));
if (U_FAILURE(status)) {
SkDEBUGF("Could not create utf8UText: %s", u_errorName(status));
return false;
}
fIterator.reset(ubrk_open(type, "en", nullptr, 0, &status));
if (U_FAILURE(status)) {
SkDEBUGF("Could not create line break iterator: %s", u_errorName(status));
SK_ABORT("");
}
ubrk_setUText(fIterator.get(), utf8UText.get(), &status);
if (U_FAILURE(status)) {
SkDEBUGF("Could not setText on break iterator: %s", u_errorName(status));
return false;
}
fInitialized = true;
fPos = 0;
return true;
}
bool initialized() const { return fInitialized; }
size_t first() {
fPos = ubrk_first(fIterator.get());
return eof() ? fSize : fPos;
}
size_t next() {
fPos = ubrk_next(fIterator.get());
return eof() ? fSize : fPos;
}
size_t preceding(size_t offset) {
auto pos = ubrk_preceding(fIterator.get(), offset);
return pos == UBRK_DONE ? 0 : pos;
}
size_t following(size_t offset) {
auto pos = ubrk_following(fIterator.get(), offset);
return pos == UBRK_DONE ? fSize : pos;
}
int32_t status() { return ubrk_getRuleStatus(fIterator.get()); }
bool eof() { return fPos == UBRK_DONE; }
private:
std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), ubrk_close>> fIterator;
bool fInitialized;
int32_t fPos;
size_t fSize;
};
// shapeTextIntoEndlessLine is the thing that calls this method
// (that contains all ICU dependencies except for words)
bool ParagraphImpl::computeCodeUnitProperties() {
@ -320,165 +239,41 @@ bool ParagraphImpl::computeCodeUnitProperties() {
}
#endif
{
const char* start = fText.c_str();
const char* end = start + fText.size();
const char* ch = start;
while (ch < end) {
auto index = ch - start;
auto unichar = utf8_next(&ch, end);
if (u_isWhitespace(unichar)) {
auto ending = ch - start;
for (auto k = index; k < ending; ++k) {
fCodeUnitProperties[k] |= CodeUnitFlags::kPartOfWhiteSpace;
}
}
}
}
{
TextBreaker breaker;
if (!breaker.initialize(this->text(), UBRK_LINE)) {
return false;
}
while (!breaker.eof()) {
size_t currentPos = breaker.next();
fCodeUnitProperties[currentPos] |=
breaker.status() == UBRK_LINE_HARD ? CodeUnitFlags::kHardLineBreakBefore : CodeUnitFlags::kSoftLineBreakBefore;
}
}
{
TextBreaker breaker;
if (!breaker.initialize(this->text(), UBRK_CHARACTER)) {
return false;
}
while (!breaker.eof()) {
auto currentPos = breaker.next();
fCodeUnitProperties[currentPos] |= CodeUnitFlags::kGraphemeStart;
}
}
return true;
}
// getWordBoundary is the thing that calls this method lazily
bool ParagraphImpl::computeWords() {
if (!fWords.empty()) {
return true;
}
UErrorCode errorCode = U_ZERO_ERROR;
auto iter = ubrk_open(UBRK_WORD, uloc_getDefault(), nullptr, 0, &errorCode);
if (U_FAILURE(errorCode)) {
SkDEBUGF("Could not create line break iterator: %s", u_errorName(errorCode));
// Get bidi regions
Direction textDirection = fParagraphStyle.getTextDirection() == TextDirection::kLtr
? Direction::kLTR
: Direction::kRTL;
if (!fICU->getBidiRegions(fText.c_str(), fText.size(), textDirection, &fBidiRegions)) {
return false;
}
// Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
int32_t utf16Units;
u_strFromUTF8(nullptr, 0, &utf16Units, fText.c_str(), fText.size(), &errorCode);
errorCode = U_ZERO_ERROR;
std::unique_ptr<UChar[]> utf16(new UChar[utf16Units]);
u_strFromUTF8(utf16.get(), utf16Units, nullptr, fText.c_str(), fText.size(), &errorCode);
if (U_FAILURE(errorCode)) {
SkDEBUGF("Invalid utf8 input: %s", u_errorName(errorCode));
// Get white spaces
std::vector<Position> whitespaces;
if (!fICU->getWhitespaces(fText.c_str(), fText.size(), &whitespaces)) {
return false;
}
for (auto whitespace : whitespaces) {
fCodeUnitProperties[whitespace] |= CodeUnitFlags::kPartOfWhiteSpace;
}
UText sUtf16UText = UTEXT_INITIALIZER;
ICUUText utf8UText(utext_openUChars(&sUtf16UText, utf16.get(), utf16Units, &errorCode));
if (U_FAILURE(errorCode)) {
SkDEBUGF("Could not create utf8UText: %s", u_errorName(errorCode));
// Get line breaks
std::vector<LineBreakBefore> lineBreaks;
if (!fICU->getLineBreaks(fText.c_str(), fText.size(), &lineBreaks)) {
return false;
}
for (auto& lineBreak : lineBreaks) {
fCodeUnitProperties[lineBreak.pos] |= lineBreak.breakType == LineBreakType::kHardLineBreak
? CodeUnitFlags::kHardLineBreakBefore
: CodeUnitFlags::kSoftLineBreakBefore;
}
ubrk_setUText(iter, utf8UText.get(), &errorCode);
if (U_FAILURE(errorCode)) {
SkDEBUGF("Could not setText on break iterator: %s", u_errorName(errorCode));
// Get graphemes
std::vector<Position> graphemes;
if (!fICU->getGraphemes(fText.c_str(), fText.size(), &graphemes)) {
return false;
}
int32_t pos = ubrk_first(iter);
while (pos != UBRK_DONE) {
fWords.emplace_back(pos);
pos = ubrk_next(iter);
}
return true;
}
bool ParagraphImpl::getBidiRegions() {
if (!fBidiRegions.empty()) {
return true;
}
// ubidi only accepts utf16 (though internally it basically works on utf32 chars).
// We want an ubidi_setPara(UBiDi*, UText*, UBiDiLevel, UBiDiLevel*, UErrorCode*);
size_t utf8Bytes = fText.size();
const char* utf8 = fText.c_str();
uint8_t bidiLevel = fParagraphStyle.getTextDirection() == TextDirection::kLtr
? UBIDI_LTR
: UBIDI_RTL;
if (!SkTFitsIn<int32_t>(utf8Bytes)) {
SkDEBUGF("Bidi error: text too long");
return false;
}
// Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
UErrorCode status = U_ZERO_ERROR;
int32_t utf16Units;
u_strFromUTF8(nullptr, 0, &utf16Units, utf8, utf8Bytes, &status);
status = U_ZERO_ERROR;
std::unique_ptr<UChar[]> utf16(new UChar[utf16Units]);
u_strFromUTF8(utf16.get(), utf16Units, nullptr, utf8, utf8Bytes, &status);
if (U_FAILURE(status)) {
SkDEBUGF("Invalid utf8 input: %s", u_errorName(status));
return false;
}
ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status));
if (U_FAILURE(status)) {
SkDEBUGF("Bidi error: %s", u_errorName(status));
return false;
}
SkASSERT(bidi);
// The required lifetime of utf16 isn't well documented.
// It appears it isn't used after ubidi_setPara except through ubidi_getText.
ubidi_setPara(bidi.get(), utf16.get(), utf16Units, bidiLevel, nullptr, &status);
if (U_FAILURE(status)) {
SkDEBUGF("Bidi error: %s", u_errorName(status));
return false;
}
SkTArray<BidiRegion> bidiRegions;
const char* start8 = utf8;
const char* end8 = utf8 + utf8Bytes;
TextRange textRange(0, 0);
UBiDiLevel currentLevel = 0;
int32_t pos16 = 0;
int32_t end16 = ubidi_getLength(bidi.get());
while (pos16 < end16) {
auto level = ubidi_getLevelAt(bidi.get(), pos16);
if (pos16 == 0) {
currentLevel = level;
} else if (level != currentLevel) {
textRange.end = start8 - utf8;
fBidiRegions.emplace_back(textRange.start, textRange.end, currentLevel);
currentLevel = level;
textRange = TextRange(textRange.end, textRange.end);
}
SkUnichar u = utf8_next(&start8, end8);
pos16 += SkUTF::ToUTF16(u);
}
textRange.end = start8 - utf8;
if (!textRange.empty()) {
fBidiRegions.emplace_back(textRange.start, textRange.end, currentLevel);
for (auto pos : graphemes) {
fCodeUnitProperties[pos] |= CodeUnitFlags::kGraphemeStart;
}
return true;
@ -883,21 +678,23 @@ PositionWithAffinity ParagraphImpl::getGlyphPositionAtCoordinate(SkScalar dx, Sk
// By "glyph" they mean a character index - indicated by Minikin's code
SkRange<size_t> ParagraphImpl::getWordBoundary(unsigned offset) {
if (!computeWords()) {
return {0, 0 };
if (fWords.empty()) {
if (!fICU->getWords(fText.c_str(), fText.size(), &fWords)) {
return {0, 0 };
}
}
int32_t start = 0;
int32_t end = 0;
for (size_t i = 0; i < fWords.size(); ++i) {
auto word = fWords[i];
if (word <= offset) {
start = word;
end = word;
} else if (word > offset) {
end = word;
break;
}
auto word = fWords[i];
if (word <= offset) {
start = word;
end = word;
} else if (word > offset) {
end = word;
break;
}
}
//SkDebugf("getWordBoundary(%d): %d - %d\n", offset, start, end);
@ -980,7 +777,7 @@ void ParagraphImpl::setState(InternalState state) {
fCodeUnitProperties.reset();
fCodeUnitProperties.push_back_n(fText.size() + 1, kNoCodeUnitFlag);
fWords.clear();
fBidiRegions.reset();
fBidiRegions.clear();
fUTF8IndexForUTF16Index.reset();
fUTF16IndexForUTF8Index.reset();
[[fallthrough]];

View File

@ -23,9 +23,9 @@
#include "modules/skparagraph/include/TextShadow.h"
#include "modules/skparagraph/include/TextStyle.h"
#include "modules/skparagraph/src/Run.h"
#include "modules/skshaper/src/SkUnicode.h"
#include "src/core/SkSpan.h"
#include <unicode/ubrk.h>
#include <memory>
#include <string>
#include <vector>
@ -83,14 +83,14 @@ struct ResolvedFontDescriptor {
SkFont fFont;
TextIndex fTextStart;
};
/*
struct BidiRegion {
BidiRegion(size_t start, size_t end, uint8_t dir)
: text(start, end), direction(dir) { }
TextRange text;
uint8_t direction;
};
*/
class ParagraphImpl final : public Paragraph {
public:
@ -186,8 +186,6 @@ public:
void resolveStrut();
bool computeCodeUnitProperties();
bool computeWords();
bool getBidiRegions();
void buildClusterTable();
void spaceGlyphs();
@ -250,7 +248,7 @@ private:
SkTArray<CodeUnitFlags> fCodeUnitProperties;
SkTArray<size_t> fClustersIndexFromCodeUnit;
std::vector<size_t> fWords;
SkTArray<BidiRegion> fBidiRegions;
std::vector<BidiRegion> fBidiRegions;
// These two arrays are used in measuring methods (getRectsForRange, getGlyphPositionAtCoordinate)
// They are filled lazily whenever they need and cached
SkTArray<TextIndex, true> fUTF8IndexForUTF16Index;
@ -269,6 +267,8 @@ private:
SkScalar fOldHeight;
SkScalar fMaxWidthWithTrailingSpaces;
SkRect fOrigin;
std::unique_ptr<SkUnicode> fICU;
};
} // namespace textlayout
} // namespace skia

View File

@ -4,8 +4,10 @@
#include "include/core/SkTypes.h"
#include "include/private/SkTo.h"
#include "modules/skparagraph/src/ParagraphUtil.h"
#include "src/utils/SkUTF.h"
#include <unicode/umachine.h>
#include <unicode/uchar.h>
#include <unicode/ustring.h>
#include <unicode/utypes.h>
#include <string>
@ -30,5 +32,14 @@ SkString SkStringFromU16String(const std::u16string& utf16text) {
return dst;
}
SkUnichar nextUtf8Unit(const char** ptr, const char* end) {
SkUnichar val = SkUTF::NextUTF8(ptr, end);
return val < 0 ? 0xFFFD : val;
}
bool isControl(SkUnichar utf8) {
return u_iscntrl(utf8);
}
}
}

View File

@ -8,6 +8,8 @@
namespace skia {
namespace textlayout {
SkString SkStringFromU16String(const std::u16string& utf16text);
SkUnichar nextUtf8Unit(const char** ptr, const char* end);
bool isControl(SkUnichar utf8);
}
}

View File

@ -21,7 +21,6 @@
#include "modules/skshaper/include/SkShaper.h"
#include "src/core/SkSpan.h"
#include <unicode/ubidi.h>
#include <algorithm>
#include <iterator>
#include <limits>
@ -131,21 +130,20 @@ TextLine::TextLine(ParagraphImpl* master,
// This is just chosen to catch the common/fast cases. Feel free to tweak.
constexpr int kPreallocCount = 4;
SkAutoSTArray<kPreallocCount, UBiDiLevel> runLevels(numRuns);
SkAutoSTArray<kPreallocCount, BidiLevel> runLevels(numRuns);
size_t runLevelsIndex = 0;
for (auto runIndex = start.runIndex(); runIndex <= end.runIndex(); ++runIndex) {
auto& run = fMaster->run(runIndex);
runLevels[runLevelsIndex++] = run.fBidiLevel;
fMaxRunMetrics.add(InternalLineMetrics(run.fFontMetrics.fAscent, run.fFontMetrics.fDescent,
run.fFontMetrics.fLeading));
fMaxRunMetrics.add(
InternalLineMetrics(run.fFontMetrics.fAscent, run.fFontMetrics.fDescent, run.fFontMetrics.fLeading));
}
SkASSERT(runLevelsIndex == numRuns);
SkAutoSTArray<kPreallocCount, int32_t> logicalOrder(numRuns);
ubidi_reorderVisual(runLevels.data(), SkToU32(numRuns), logicalOrder.data());
// TODO: hide all these logic in SkUnicode?
SkUnicode::ReorderVisual(runLevels.data(), numRuns, logicalOrder.data());
auto firstRunIndex = start.runIndex();
for (auto index : logicalOrder) {
fRunsInVisualOrder.push_back(firstRunIndex + index);

View File

@ -12,6 +12,7 @@ skia_shaper_public = [ "$_include/SkShaper.h" ]
skia_shaper_primitive_sources = [
"$_src/SkShaper.cpp",
"$_src/SkShaper_primitive.cpp",
"$_src/SkUnicode_icu.cpp",
]
skia_shaper_harfbuzz_sources = [ "$_src/SkShaper_harfbuzz.cpp" ]
skia_shaper_coretext_sources = [ "$_src/SkShaper_coretext.cpp" ]

View File

@ -13,6 +13,7 @@
#include "include/core/SkTypeface.h"
#include "include/private/SkTFitsIn.h"
#include "modules/skshaper/include/SkShaper.h"
#include "modules/skshaper/src/SkUnicode.h"
#include "src/core/SkTextBlobPriv.h"
#include "src/utils/SkUTF.h"

View File

@ -10,6 +10,7 @@
#include "include/core/SkTypeface.h"
#include "include/private/SkTo.h"
#include "modules/skshaper/include/SkShaper.h"
#include "modules/skshaper/src/SkUnicode.h"
#include "src/utils/SkUTF.h"
class SkShaperPrimitive : public SkShaper {

View File

@ -0,0 +1,82 @@
/*
* Copyright 2020 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkUnicode_DEFINED
#define SkUnicode_DEFINED
#include "include/core/SkTypes.h"
#include "src/core/SkSpan.h"
#include <vector>
#include <unicode/utf.h>
namespace skia {
enum class UtfFormat {
kUTF8,
kUTF16
};
// Bidi
typedef size_t Position;
typedef uint8_t BidiLevel;
enum class Direction {
kLTR,
kRTL,
};
struct BidiRegion {
BidiRegion(Position start, Position end, BidiLevel level)
: start(start), end(end), level(level) { }
Position start;
Position end;
BidiLevel level;
};
// LineBreaks
enum class LineBreakType {
kSoftLineBreak,
kHardLineBreak
};
struct LineBreakBefore {
LineBreakBefore(Position pos, LineBreakType breakType)
: pos(pos), breakType(breakType) { }
Position pos;
LineBreakType breakType;
};
// Other breaks
enum class UBreakType {
kWords,
kGraphemes,
kLines
};
struct Range {
Position start;
Position end;
};
class SkUnicode {
public:
typedef uint32_t ScriptID;
typedef uint32_t CombiningClass;
typedef uint32_t GeneralCategory;
virtual ~SkUnicode() {}
// High level methods (that we actually use somewhere=SkParagraph)
virtual bool getBidiRegions
(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* results) = 0;
virtual bool getLineBreaks
(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) = 0;
virtual bool getWords
(const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
virtual bool getGraphemes
(const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
virtual bool getWhitespaces
(const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
static void ReorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]);
};
std::unique_ptr<SkUnicode> SkUnicode_Make();
}
#endif // SkUnicode_DEFINED

View File

@ -0,0 +1,258 @@
/*
* Copyright 2020 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "include/private/SkTFitsIn.h"
#include "include/private/SkTemplates.h"
#include "modules/skshaper/src/SkUnicode.h"
#include "src/utils/SkUTF.h"
#include <unicode/ubidi.h>
#include <unicode/ubrk.h>
#include <unicode/utext.h>
#include <unicode/utypes.h>
#include <vector>
#include <functional>
using ICUBiDi = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close), ubidi_close>>;
using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>>;
using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), ubrk_close>>;
/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
static inline SkUnichar utf8_next(const char** ptr, const char* end) {
SkUnichar val = SkUTF::NextUTF8(ptr, end);
return val < 0 ? 0xFFFD : val;
}
namespace skia {
class SkUnicode_icu : public SkUnicode {
struct InputData {
SkSpan<const char> fUtf8;
SkSpan<uint16_t> fUtf16;
Direction fTextDirection;
};
struct OutputData {
UtfFormat fUtfFormat;
std::vector<BidiRegion> fBidiRegions;
std::vector<Position> fWords;
std::vector<LineBreakBefore> fLineBreaks;
std::vector<Position> fGraphemes;
std::vector<Position> fWhitespaces;
};
static UBreakIteratorType convertType(UBreakType type) {
switch (type) {
case UBreakType::kLines: return UBRK_LINE;
case UBreakType::kGraphemes: return UBRK_CHARACTER;
case UBreakType::kWords: return UBRK_WORD;
default:
SkDEBUGF("Convert error: wrong break type");
return UBRK_CHARACTER;
}
}
static int convertUtf8ToUtf16(const char* utf8, size_t utf8Units, std::unique_ptr<uint16_t[]>* utf16) {
int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
if (utf16Units < 0) {
SkDEBUGF("Convert error: Invalid utf8 input");
return utf16Units;
}
*utf16 = std::unique_ptr<uint16_t[]>(new uint16_t[utf16Units]);
SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16->get(), utf16Units, utf8, utf8Units);
SkASSERT(dstLen == utf16Units);
return utf16Units;
}
public:
bool extractBidi(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* bidiRegions) {
// Convert to UTF16 since for now bidi iterator only operates on utf16
std::unique_ptr<uint16_t[]> utf16;
auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
if (utf16Units < 0) {
return false;
}
// Create bidi iterator
UErrorCode status = U_ZERO_ERROR;
ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status));
if (U_FAILURE(status)) {
SkDEBUGF("Bidi error: %s", u_errorName(status));
return false;
}
SkASSERT(bidi);
uint8_t bidiLevel = (dir == Direction::kLTR) ? UBIDI_LTR : UBIDI_RTL;
// The required lifetime of utf16 isn't well documented.
// It appears it isn't used after ubidi_setPara except through ubidi_getText.
ubidi_setPara(bidi.get(), (const UChar*)utf16.get(), utf16Units, bidiLevel, nullptr, &status);
if (U_FAILURE(status)) {
SkDEBUGF("Bidi error: %s", u_errorName(status));
return false;
}
// Iterate through bidi regions and the result positions into utf8
const char* start8 = utf8;
const char* end8 = utf8 + utf8Units;
BidiLevel currentLevel = 0;
Position pos8 = 0;
Position pos16 = 0;
Position end16 = ubidi_getLength(bidi.get());
while (pos16 < end16) {
auto level = ubidi_getLevelAt(bidi.get(), pos16);
if (pos16 == 0) {
currentLevel = level;
} else if (level != currentLevel) {
Position end = start8 - utf8;
bidiRegions->emplace_back(pos8, end, currentLevel);
currentLevel = level;
pos8 = end;
}
SkUnichar u = utf8_next(&start8, end8);
pos16 += SkUTF::ToUTF16(u);
}
Position end = start8 - utf8;
if (end != pos8) {
bidiRegions->emplace_back(pos8, end, currentLevel);
}
return true;
}
bool extractWords(uint16_t utf16[], int utf16Units, std::vector<Position>* words) {
UErrorCode status = U_ZERO_ERROR;
UBreakIteratorType breakType = convertType(UBreakType::kWords);
ICUBreakIterator iterator(ubrk_open(breakType, uloc_getDefault(), nullptr, 0, &status));
if (U_FAILURE(status)) {
SkDEBUGF("Break error: %s", u_errorName(status));
return false;
}
SkASSERT(iterator);
UText sUtf16UText = UTEXT_INITIALIZER;
ICUUText utf16UText(utext_openUChars(&sUtf16UText, (UChar*)utf16, utf16Units, &status));
if (U_FAILURE(status)) {
SkDEBUGF("Break error: %s", u_errorName(status));
return false;
}
ubrk_setUText(iterator.get(), utf16UText.get(), &status);
if (U_FAILURE(status)) {
SkDEBUGF("Break error: %s", u_errorName(status));
return false;
}
// Get the words
int32_t pos = ubrk_first(iterator.get());
while (pos != UBRK_DONE) {
words->emplace_back(pos);
pos = ubrk_next(iterator.get());
}
return true;
}
bool extractPositions(const char utf8[], int utf8Units, UBreakType type, std::function<void(int, int)> add) {
UErrorCode status = U_ZERO_ERROR;
UText sUtf8UText = UTEXT_INITIALIZER;
ICUUText text(utext_openUTF8(&sUtf8UText, &utf8[0], utf8Units, &status));
if (U_FAILURE(status)) {
SkDEBUGF("Break error: %s", u_errorName(status));
return false;
}
SkASSERT(text);
ICUBreakIterator iterator(ubrk_open(convertType(type), uloc_getDefault(), nullptr, 0, &status));
if (U_FAILURE(status)) {
SkDEBUGF("Break error: %s", u_errorName(status));
}
ubrk_setUText(iterator.get(), text.get(), &status);
if (U_FAILURE(status)) {
SkDEBUGF("Break error: %s", u_errorName(status));
return false;
}
auto iter = iterator.get();
int32_t pos = ubrk_first(iter);
while (pos != UBRK_DONE) {
add(pos, ubrk_getRuleStatus(iter));
pos = ubrk_next(iter);
}
return true;
}
bool extractWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* whitespaces) {
const char* start = utf8;
const char* end = utf8 + utf8Units;
const char* ch = start;
while (ch < end) {
auto index = ch - start;
auto unichar = utf8_next(&ch, end);
if (u_isWhitespace(unichar)) {
auto ending = ch - start;
for (auto k = index; k < ending; ++k) {
whitespaces->emplace_back(k);
}
}
}
return true;
}
bool getBidiRegions(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* results) override {
return extractBidi(utf8, utf8Units, dir, results);
}
bool getLineBreaks(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) override {
return extractPositions(utf8, utf8Units, UBreakType::kLines,
[results](int pos, int status) {
results->emplace_back(pos,status == UBRK_LINE_HARD
? LineBreakType::kHardLineBreak
: LineBreakType::kSoftLineBreak);
});
}
bool getWords(const char utf8[], int utf8Units, std::vector<Position>* results) override {
// Convert to UTF16 since we want the results in utf16
std::unique_ptr<uint16_t[]> utf16;
auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
if (utf16Units < 0) {
return false;
}
return extractWords(utf16.get(), utf16Units, results);
}
bool getGraphemes(const char utf8[], int utf8Units, std::vector<Position>* results) override {
return extractPositions(utf8, utf8Units, UBreakType::kGraphemes,
[results](int pos, int status) { results->emplace_back(pos);
});
}
bool getWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* results) override {
return extractWhitespaces(utf8, utf8Units, results);
}
};
void SkUnicode::ReorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) {
ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
}
std::unique_ptr<SkUnicode> SkUnicode_Make() { return std::make_unique<SkUnicode_icu>(); }
}

View File

@ -251,3 +251,36 @@ size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) {
return 1 + extra;
}
int SkUTF::UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength) {
if (!dst) {
dstCapacity = 0;
}
int dstLength = 0;
uint16_t* endDst = dst + dstCapacity;
const char* endSrc = src + srcByteLength;
while (src < endSrc) {
SkUnichar uni = NextUTF8(&src, endSrc);
if (uni < 0) {
return -1;
}
uint16_t utf16[2];
size_t count = ToUTF16(uni, utf16);
if (count == 0) {
return -1;
}
dstLength += count;
if (dst) {
uint16_t* elems = utf16;
while (dst < endDst && count > 0) {
*dst++ = *elems++;
count -= 1;
}
}
}
return dstLength;
}

View File

@ -64,6 +64,12 @@ SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr
*/
SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);
/** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence.
* If dst is not null, it is filled with the corresponding values up to its capacity.
* If there is an error, -1 is returned and the dst[] buffer is undefined.
*/
SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength);
} // namespace SkUTF
#endif // SkUTF_DEFINED