ICU API: only in SkParagraph, simplified (relanding reverted).
Reverted commit: https://skia-review.googlesource.com/c/skia/+/296128/ Change-Id: Iaf793bff94a6060579c7d6176d477e598c047be6 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/303261 Reviewed-by: Mike Reed <reed@google.com> Commit-Queue: Julia Lavrova <jlavrova@google.com>
This commit is contained in:
parent
9d960f1e0f
commit
7479eda3b6
@ -2,23 +2,13 @@
|
||||
|
||||
#include "modules/skparagraph/src/Iterators.h"
|
||||
#include "modules/skparagraph/src/OneLineShaper.h"
|
||||
#include <unicode/uchar.h>
|
||||
#include "modules/skparagraph/src/ParagraphUtil.h"
|
||||
#include <algorithm>
|
||||
#include <unordered_set>
|
||||
#include "src/utils/SkUTF.h"
|
||||
|
||||
namespace skia {
|
||||
namespace textlayout {
|
||||
|
||||
namespace {
|
||||
|
||||
SkUnichar utf8_next(const char** ptr, const char* end) {
|
||||
SkUnichar val = SkUTF::NextUTF8(ptr, end);
|
||||
return val < 0 ? 0xFFFD : val;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void OneLineShaper::commitRunBuffer(const RunInfo&) {
|
||||
|
||||
fCurrentRun->commit();
|
||||
@ -313,8 +303,8 @@ void OneLineShaper::sortOutGlyphs(std::function<void(GlyphRange)>&& sortOutUnres
|
||||
block.end = i;
|
||||
} else {
|
||||
const char* cluster = text.begin() + clusterIndex(i);
|
||||
SkUnichar codepoint = utf8_next(&cluster, text.end());
|
||||
if (u_iscntrl(codepoint)) {
|
||||
SkUnichar codepoint = nextUtf8Unit(&cluster, text.end());
|
||||
if (isControl(codepoint)) {
|
||||
// This codepoint does not have to be resolved; let's pretend it's resolved
|
||||
if (block.start == EMPTY_INDEX) {
|
||||
// Keep skipping resolved code points
|
||||
@ -419,7 +409,7 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,
|
||||
// We have the global cache for all already found typefaces for SkUnichar
|
||||
// but we still need to keep track of all SkUnichars used in this unresolved block
|
||||
SkTHashSet<SkUnichar> alreadyTried;
|
||||
SkUnichar unicode = utf8_next(&ch, unresolvedText.end());
|
||||
SkUnichar unicode = nextUtf8Unit(&ch, unresolvedText.end());
|
||||
while (true) {
|
||||
|
||||
sk_sp<SkTypeface> typeface;
|
||||
@ -457,7 +447,7 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,
|
||||
|
||||
// We can stop here or we can switch to another DIFFERENT codepoint
|
||||
while (ch != unresolvedText.end()) {
|
||||
unicode = utf8_next(&ch, unresolvedText.end());
|
||||
unicode = nextUtf8Unit(&ch, unresolvedText.end());
|
||||
auto found = alreadyTried.find(unicode);
|
||||
if (found == nullptr) {
|
||||
alreadyTried.add(unicode);
|
||||
@ -472,10 +462,6 @@ void OneLineShaper::matchResolvedFonts(const TextStyle& textStyle,
|
||||
|
||||
bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {
|
||||
|
||||
if (!fParagraph->getBidiRegions()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t bidiIndex = 0;
|
||||
|
||||
SkScalar advanceX = 0;
|
||||
@ -485,8 +471,8 @@ bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {
|
||||
// Shape the text by bidi regions
|
||||
while (bidiIndex < fParagraph->fBidiRegions.size()) {
|
||||
BidiRegion& bidiRegion = fParagraph->fBidiRegions[bidiIndex];
|
||||
auto start = std::max(bidiRegion.text.start, placeholder.fTextBefore.start);
|
||||
auto end = std::min(bidiRegion.text.end, placeholder.fTextBefore.end);
|
||||
auto start = std::max(bidiRegion.start, placeholder.fTextBefore.start);
|
||||
auto end = std::min(bidiRegion.end, placeholder.fTextBefore.end);
|
||||
|
||||
// Set up the iterators (the style iterator points to a bigger region that it could
|
||||
TextRange textRange(start, end);
|
||||
@ -494,11 +480,11 @@ bool OneLineShaper::iterateThroughShapingRegions(const ShapeVisitor& shape) {
|
||||
SkSpan<Block> styleSpan(fParagraph->blocks(blockRange));
|
||||
|
||||
// Shape the text between placeholders
|
||||
if (!shape(textRange, styleSpan, advanceX, start, bidiRegion.direction)) {
|
||||
if (!shape(textRange, styleSpan, advanceX, start, bidiRegion.level)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (end == bidiRegion.text.end) {
|
||||
if (end == bidiRegion.end) {
|
||||
++bidiIndex;
|
||||
} else /*if (end == placeholder.fTextBefore.end)*/ {
|
||||
break;
|
||||
|
@ -50,7 +50,7 @@ public:
|
||||
// ICU results
|
||||
SkTArray<CodeUnitFlags> fCodeUnitProperties;
|
||||
std::vector<size_t> fWords;
|
||||
SkTArray<BidiRegion> fBidiRegions;
|
||||
std::vector<BidiRegion> fBidiRegions;
|
||||
SkTArray<TextIndex, true> fUTF8IndexForUTF16Index;
|
||||
SkTArray<size_t, true> fUTF16IndexForUTF8Index;
|
||||
};
|
||||
|
@ -25,12 +25,6 @@
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <unicode/ubidi.h>
|
||||
#include <unicode/uloc.h>
|
||||
#include <unicode/umachine.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utext.h>
|
||||
#include <unicode/utypes.h>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
@ -40,9 +34,6 @@ namespace textlayout {
|
||||
|
||||
namespace {
|
||||
|
||||
using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>>;
|
||||
using ICUBiDi = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close), ubidi_close>>;
|
||||
|
||||
SkScalar littleRound(SkScalar a) {
|
||||
// This rounding is done to match Flutter tests. Must be removed..
|
||||
auto val = std::fabs(a);
|
||||
@ -54,13 +45,6 @@ SkScalar littleRound(SkScalar a) {
|
||||
return SkScalarFloorToScalar(a);
|
||||
}
|
||||
}
|
||||
|
||||
/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
|
||||
static inline SkUnichar utf8_next(const char** ptr, const char* end) {
|
||||
SkUnichar val = SkUTF::NextUTF8(ptr, end);
|
||||
return val < 0 ? 0xFFFD : val;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TextRange operator*(const TextRange& a, const TextRange& b) {
|
||||
@ -99,6 +83,7 @@ ParagraphImpl::ParagraphImpl(const SkString& text,
|
||||
, fOldWidth(0)
|
||||
, fOldHeight(0)
|
||||
, fOrigin(SkRect::MakeEmpty()) {
|
||||
fICU = SkUnicode_Make();
|
||||
}
|
||||
|
||||
ParagraphImpl::ParagraphImpl(const std::u16string& utf16text,
|
||||
@ -145,7 +130,7 @@ void ParagraphImpl::layout(SkScalar rawWidth) {
|
||||
this->fCodeUnitProperties.reset();
|
||||
this->fCodeUnitProperties.push_back_n(fText.size() + 1, CodeUnitFlags::kNoCodeUnitFlag);
|
||||
this->fWords.clear();
|
||||
this->fBidiRegions.reset();
|
||||
this->fBidiRegions.clear();
|
||||
this->fUTF8IndexForUTF16Index.reset();
|
||||
this->fUTF16IndexForUTF8Index.reset();
|
||||
this->fRuns.reset();
|
||||
@ -244,72 +229,6 @@ void ParagraphImpl::resetContext() {
|
||||
fExceededMaxLines = false;
|
||||
}
|
||||
|
||||
class TextBreaker {
|
||||
public:
|
||||
TextBreaker() : fInitialized(false), fPos(-1) {}
|
||||
|
||||
bool initialize(SkSpan<const char> text, UBreakIteratorType type) {
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
fIterator = nullptr;
|
||||
fSize = text.size();
|
||||
UText sUtf8UText = UTEXT_INITIALIZER;
|
||||
std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>> utf8UText(
|
||||
utext_openUTF8(&sUtf8UText, text.begin(), text.size(), &status));
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Could not create utf8UText: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
fIterator.reset(ubrk_open(type, "en", nullptr, 0, &status));
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Could not create line break iterator: %s", u_errorName(status));
|
||||
SK_ABORT("");
|
||||
}
|
||||
|
||||
ubrk_setUText(fIterator.get(), utf8UText.get(), &status);
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Could not setText on break iterator: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
|
||||
fInitialized = true;
|
||||
fPos = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool initialized() const { return fInitialized; }
|
||||
|
||||
size_t first() {
|
||||
fPos = ubrk_first(fIterator.get());
|
||||
return eof() ? fSize : fPos;
|
||||
}
|
||||
|
||||
size_t next() {
|
||||
fPos = ubrk_next(fIterator.get());
|
||||
return eof() ? fSize : fPos;
|
||||
}
|
||||
|
||||
size_t preceding(size_t offset) {
|
||||
auto pos = ubrk_preceding(fIterator.get(), offset);
|
||||
return pos == UBRK_DONE ? 0 : pos;
|
||||
}
|
||||
|
||||
size_t following(size_t offset) {
|
||||
auto pos = ubrk_following(fIterator.get(), offset);
|
||||
return pos == UBRK_DONE ? fSize : pos;
|
||||
}
|
||||
|
||||
int32_t status() { return ubrk_getRuleStatus(fIterator.get()); }
|
||||
|
||||
bool eof() { return fPos == UBRK_DONE; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), ubrk_close>> fIterator;
|
||||
bool fInitialized;
|
||||
int32_t fPos;
|
||||
size_t fSize;
|
||||
};
|
||||
|
||||
// shapeTextIntoEndlessLine is the thing that calls this method
|
||||
// (that contains all ICU dependencies except for words)
|
||||
bool ParagraphImpl::computeCodeUnitProperties() {
|
||||
@ -320,165 +239,41 @@ bool ParagraphImpl::computeCodeUnitProperties() {
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
const char* start = fText.c_str();
|
||||
const char* end = start + fText.size();
|
||||
const char* ch = start;
|
||||
while (ch < end) {
|
||||
auto index = ch - start;
|
||||
auto unichar = utf8_next(&ch, end);
|
||||
if (u_isWhitespace(unichar)) {
|
||||
auto ending = ch - start;
|
||||
for (auto k = index; k < ending; ++k) {
|
||||
fCodeUnitProperties[k] |= CodeUnitFlags::kPartOfWhiteSpace;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
TextBreaker breaker;
|
||||
if (!breaker.initialize(this->text(), UBRK_LINE)) {
|
||||
return false;
|
||||
}
|
||||
while (!breaker.eof()) {
|
||||
size_t currentPos = breaker.next();
|
||||
fCodeUnitProperties[currentPos] |=
|
||||
breaker.status() == UBRK_LINE_HARD ? CodeUnitFlags::kHardLineBreakBefore : CodeUnitFlags::kSoftLineBreakBefore;
|
||||
}
|
||||
}
|
||||
{
|
||||
TextBreaker breaker;
|
||||
if (!breaker.initialize(this->text(), UBRK_CHARACTER)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
while (!breaker.eof()) {
|
||||
auto currentPos = breaker.next();
|
||||
fCodeUnitProperties[currentPos] |= CodeUnitFlags::kGraphemeStart;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// getWordBoundary is the thing that calls this method lazily
|
||||
bool ParagraphImpl::computeWords() {
|
||||
|
||||
if (!fWords.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
UErrorCode errorCode = U_ZERO_ERROR;
|
||||
|
||||
auto iter = ubrk_open(UBRK_WORD, uloc_getDefault(), nullptr, 0, &errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
SkDEBUGF("Could not create line break iterator: %s", u_errorName(errorCode));
|
||||
// Get bidi regions
|
||||
Direction textDirection = fParagraphStyle.getTextDirection() == TextDirection::kLtr
|
||||
? Direction::kLTR
|
||||
: Direction::kRTL;
|
||||
if (!fICU->getBidiRegions(fText.c_str(), fText.size(), textDirection, &fBidiRegions)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
|
||||
int32_t utf16Units;
|
||||
u_strFromUTF8(nullptr, 0, &utf16Units, fText.c_str(), fText.size(), &errorCode);
|
||||
errorCode = U_ZERO_ERROR;
|
||||
std::unique_ptr<UChar[]> utf16(new UChar[utf16Units]);
|
||||
u_strFromUTF8(utf16.get(), utf16Units, nullptr, fText.c_str(), fText.size(), &errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
SkDEBUGF("Invalid utf8 input: %s", u_errorName(errorCode));
|
||||
// Get white spaces
|
||||
std::vector<Position> whitespaces;
|
||||
if (!fICU->getWhitespaces(fText.c_str(), fText.size(), &whitespaces)) {
|
||||
return false;
|
||||
}
|
||||
for (auto whitespace : whitespaces) {
|
||||
fCodeUnitProperties[whitespace] |= CodeUnitFlags::kPartOfWhiteSpace;
|
||||
}
|
||||
|
||||
UText sUtf16UText = UTEXT_INITIALIZER;
|
||||
ICUUText utf8UText(utext_openUChars(&sUtf16UText, utf16.get(), utf16Units, &errorCode));
|
||||
if (U_FAILURE(errorCode)) {
|
||||
SkDEBUGF("Could not create utf8UText: %s", u_errorName(errorCode));
|
||||
// Get line breaks
|
||||
std::vector<LineBreakBefore> lineBreaks;
|
||||
if (!fICU->getLineBreaks(fText.c_str(), fText.size(), &lineBreaks)) {
|
||||
return false;
|
||||
}
|
||||
for (auto& lineBreak : lineBreaks) {
|
||||
fCodeUnitProperties[lineBreak.pos] |= lineBreak.breakType == LineBreakType::kHardLineBreak
|
||||
? CodeUnitFlags::kHardLineBreakBefore
|
||||
: CodeUnitFlags::kSoftLineBreakBefore;
|
||||
}
|
||||
|
||||
ubrk_setUText(iter, utf8UText.get(), &errorCode);
|
||||
if (U_FAILURE(errorCode)) {
|
||||
SkDEBUGF("Could not setText on break iterator: %s", u_errorName(errorCode));
|
||||
// Get graphemes
|
||||
std::vector<Position> graphemes;
|
||||
if (!fICU->getGraphemes(fText.c_str(), fText.size(), &graphemes)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int32_t pos = ubrk_first(iter);
|
||||
while (pos != UBRK_DONE) {
|
||||
fWords.emplace_back(pos);
|
||||
pos = ubrk_next(iter);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ParagraphImpl::getBidiRegions() {
|
||||
|
||||
if (!fBidiRegions.empty()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// ubidi only accepts utf16 (though internally it basically works on utf32 chars).
|
||||
// We want an ubidi_setPara(UBiDi*, UText*, UBiDiLevel, UBiDiLevel*, UErrorCode*);
|
||||
size_t utf8Bytes = fText.size();
|
||||
const char* utf8 = fText.c_str();
|
||||
uint8_t bidiLevel = fParagraphStyle.getTextDirection() == TextDirection::kLtr
|
||||
? UBIDI_LTR
|
||||
: UBIDI_RTL;
|
||||
if (!SkTFitsIn<int32_t>(utf8Bytes)) {
|
||||
SkDEBUGF("Bidi error: text too long");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Getting the length like this seems to always set U_BUFFER_OVERFLOW_ERROR
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
int32_t utf16Units;
|
||||
u_strFromUTF8(nullptr, 0, &utf16Units, utf8, utf8Bytes, &status);
|
||||
status = U_ZERO_ERROR;
|
||||
std::unique_ptr<UChar[]> utf16(new UChar[utf16Units]);
|
||||
u_strFromUTF8(utf16.get(), utf16Units, nullptr, utf8, utf8Bytes, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Invalid utf8 input: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
|
||||
ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status));
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Bidi error: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
SkASSERT(bidi);
|
||||
|
||||
// The required lifetime of utf16 isn't well documented.
|
||||
// It appears it isn't used after ubidi_setPara except through ubidi_getText.
|
||||
ubidi_setPara(bidi.get(), utf16.get(), utf16Units, bidiLevel, nullptr, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Bidi error: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
|
||||
SkTArray<BidiRegion> bidiRegions;
|
||||
const char* start8 = utf8;
|
||||
const char* end8 = utf8 + utf8Bytes;
|
||||
TextRange textRange(0, 0);
|
||||
UBiDiLevel currentLevel = 0;
|
||||
|
||||
int32_t pos16 = 0;
|
||||
int32_t end16 = ubidi_getLength(bidi.get());
|
||||
while (pos16 < end16) {
|
||||
auto level = ubidi_getLevelAt(bidi.get(), pos16);
|
||||
if (pos16 == 0) {
|
||||
currentLevel = level;
|
||||
} else if (level != currentLevel) {
|
||||
textRange.end = start8 - utf8;
|
||||
fBidiRegions.emplace_back(textRange.start, textRange.end, currentLevel);
|
||||
currentLevel = level;
|
||||
textRange = TextRange(textRange.end, textRange.end);
|
||||
}
|
||||
SkUnichar u = utf8_next(&start8, end8);
|
||||
pos16 += SkUTF::ToUTF16(u);
|
||||
}
|
||||
|
||||
textRange.end = start8 - utf8;
|
||||
if (!textRange.empty()) {
|
||||
fBidiRegions.emplace_back(textRange.start, textRange.end, currentLevel);
|
||||
for (auto pos : graphemes) {
|
||||
fCodeUnitProperties[pos] |= CodeUnitFlags::kGraphemeStart;
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -883,21 +678,23 @@ PositionWithAffinity ParagraphImpl::getGlyphPositionAtCoordinate(SkScalar dx, Sk
|
||||
// By "glyph" they mean a character index - indicated by Minikin's code
|
||||
SkRange<size_t> ParagraphImpl::getWordBoundary(unsigned offset) {
|
||||
|
||||
if (!computeWords()) {
|
||||
return {0, 0 };
|
||||
if (fWords.empty()) {
|
||||
if (!fICU->getWords(fText.c_str(), fText.size(), &fWords)) {
|
||||
return {0, 0 };
|
||||
}
|
||||
}
|
||||
|
||||
int32_t start = 0;
|
||||
int32_t end = 0;
|
||||
for (size_t i = 0; i < fWords.size(); ++i) {
|
||||
auto word = fWords[i];
|
||||
if (word <= offset) {
|
||||
start = word;
|
||||
end = word;
|
||||
} else if (word > offset) {
|
||||
end = word;
|
||||
break;
|
||||
}
|
||||
auto word = fWords[i];
|
||||
if (word <= offset) {
|
||||
start = word;
|
||||
end = word;
|
||||
} else if (word > offset) {
|
||||
end = word;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//SkDebugf("getWordBoundary(%d): %d - %d\n", offset, start, end);
|
||||
@ -980,7 +777,7 @@ void ParagraphImpl::setState(InternalState state) {
|
||||
fCodeUnitProperties.reset();
|
||||
fCodeUnitProperties.push_back_n(fText.size() + 1, kNoCodeUnitFlag);
|
||||
fWords.clear();
|
||||
fBidiRegions.reset();
|
||||
fBidiRegions.clear();
|
||||
fUTF8IndexForUTF16Index.reset();
|
||||
fUTF16IndexForUTF8Index.reset();
|
||||
[[fallthrough]];
|
||||
|
@ -23,9 +23,9 @@
|
||||
#include "modules/skparagraph/include/TextShadow.h"
|
||||
#include "modules/skparagraph/include/TextStyle.h"
|
||||
#include "modules/skparagraph/src/Run.h"
|
||||
#include "modules/skshaper/src/SkUnicode.h"
|
||||
#include "src/core/SkSpan.h"
|
||||
|
||||
#include <unicode/ubrk.h>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@ -83,14 +83,14 @@ struct ResolvedFontDescriptor {
|
||||
SkFont fFont;
|
||||
TextIndex fTextStart;
|
||||
};
|
||||
|
||||
/*
|
||||
struct BidiRegion {
|
||||
BidiRegion(size_t start, size_t end, uint8_t dir)
|
||||
: text(start, end), direction(dir) { }
|
||||
TextRange text;
|
||||
uint8_t direction;
|
||||
};
|
||||
|
||||
*/
|
||||
class ParagraphImpl final : public Paragraph {
|
||||
|
||||
public:
|
||||
@ -186,8 +186,6 @@ public:
|
||||
void resolveStrut();
|
||||
|
||||
bool computeCodeUnitProperties();
|
||||
bool computeWords();
|
||||
bool getBidiRegions();
|
||||
|
||||
void buildClusterTable();
|
||||
void spaceGlyphs();
|
||||
@ -250,7 +248,7 @@ private:
|
||||
SkTArray<CodeUnitFlags> fCodeUnitProperties;
|
||||
SkTArray<size_t> fClustersIndexFromCodeUnit;
|
||||
std::vector<size_t> fWords;
|
||||
SkTArray<BidiRegion> fBidiRegions;
|
||||
std::vector<BidiRegion> fBidiRegions;
|
||||
// These two arrays are used in measuring methods (getRectsForRange, getGlyphPositionAtCoordinate)
|
||||
// They are filled lazily whenever they need and cached
|
||||
SkTArray<TextIndex, true> fUTF8IndexForUTF16Index;
|
||||
@ -269,6 +267,8 @@ private:
|
||||
SkScalar fOldHeight;
|
||||
SkScalar fMaxWidthWithTrailingSpaces;
|
||||
SkRect fOrigin;
|
||||
|
||||
std::unique_ptr<SkUnicode> fICU;
|
||||
};
|
||||
} // namespace textlayout
|
||||
} // namespace skia
|
||||
|
@ -4,8 +4,10 @@
|
||||
#include "include/core/SkTypes.h"
|
||||
#include "include/private/SkTo.h"
|
||||
#include "modules/skparagraph/src/ParagraphUtil.h"
|
||||
#include "src/utils/SkUTF.h"
|
||||
|
||||
#include <unicode/umachine.h>
|
||||
#include <unicode/uchar.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/utypes.h>
|
||||
#include <string>
|
||||
@ -30,5 +32,14 @@ SkString SkStringFromU16String(const std::u16string& utf16text) {
|
||||
return dst;
|
||||
}
|
||||
|
||||
SkUnichar nextUtf8Unit(const char** ptr, const char* end) {
|
||||
SkUnichar val = SkUTF::NextUTF8(ptr, end);
|
||||
return val < 0 ? 0xFFFD : val;
|
||||
}
|
||||
|
||||
bool isControl(SkUnichar utf8) {
|
||||
return u_iscntrl(utf8);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,8 @@
|
||||
namespace skia {
|
||||
namespace textlayout {
|
||||
SkString SkStringFromU16String(const std::u16string& utf16text);
|
||||
SkUnichar nextUtf8Unit(const char** ptr, const char* end);
|
||||
bool isControl(SkUnichar utf8);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,6 @@
|
||||
#include "modules/skshaper/include/SkShaper.h"
|
||||
#include "src/core/SkSpan.h"
|
||||
|
||||
#include <unicode/ubidi.h>
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
@ -131,21 +130,20 @@ TextLine::TextLine(ParagraphImpl* master,
|
||||
|
||||
// This is just chosen to catch the common/fast cases. Feel free to tweak.
|
||||
constexpr int kPreallocCount = 4;
|
||||
|
||||
SkAutoSTArray<kPreallocCount, UBiDiLevel> runLevels(numRuns);
|
||||
|
||||
SkAutoSTArray<kPreallocCount, BidiLevel> runLevels(numRuns);
|
||||
size_t runLevelsIndex = 0;
|
||||
for (auto runIndex = start.runIndex(); runIndex <= end.runIndex(); ++runIndex) {
|
||||
auto& run = fMaster->run(runIndex);
|
||||
runLevels[runLevelsIndex++] = run.fBidiLevel;
|
||||
fMaxRunMetrics.add(InternalLineMetrics(run.fFontMetrics.fAscent, run.fFontMetrics.fDescent,
|
||||
run.fFontMetrics.fLeading));
|
||||
fMaxRunMetrics.add(
|
||||
InternalLineMetrics(run.fFontMetrics.fAscent, run.fFontMetrics.fDescent, run.fFontMetrics.fLeading));
|
||||
}
|
||||
SkASSERT(runLevelsIndex == numRuns);
|
||||
|
||||
SkAutoSTArray<kPreallocCount, int32_t> logicalOrder(numRuns);
|
||||
|
||||
ubidi_reorderVisual(runLevels.data(), SkToU32(numRuns), logicalOrder.data());
|
||||
// TODO: hide all these logic in SkUnicode?
|
||||
SkUnicode::ReorderVisual(runLevels.data(), numRuns, logicalOrder.data());
|
||||
auto firstRunIndex = start.runIndex();
|
||||
for (auto index : logicalOrder) {
|
||||
fRunsInVisualOrder.push_back(firstRunIndex + index);
|
||||
|
@ -12,6 +12,7 @@ skia_shaper_public = [ "$_include/SkShaper.h" ]
|
||||
skia_shaper_primitive_sources = [
|
||||
"$_src/SkShaper.cpp",
|
||||
"$_src/SkShaper_primitive.cpp",
|
||||
"$_src/SkUnicode_icu.cpp",
|
||||
]
|
||||
skia_shaper_harfbuzz_sources = [ "$_src/SkShaper_harfbuzz.cpp" ]
|
||||
skia_shaper_coretext_sources = [ "$_src/SkShaper_coretext.cpp" ]
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include "include/core/SkTypeface.h"
|
||||
#include "include/private/SkTFitsIn.h"
|
||||
#include "modules/skshaper/include/SkShaper.h"
|
||||
#include "modules/skshaper/src/SkUnicode.h"
|
||||
#include "src/core/SkTextBlobPriv.h"
|
||||
#include "src/utils/SkUTF.h"
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include "include/core/SkTypeface.h"
|
||||
#include "include/private/SkTo.h"
|
||||
#include "modules/skshaper/include/SkShaper.h"
|
||||
#include "modules/skshaper/src/SkUnicode.h"
|
||||
#include "src/utils/SkUTF.h"
|
||||
|
||||
class SkShaperPrimitive : public SkShaper {
|
||||
|
82
modules/skshaper/src/SkUnicode.h
Normal file
82
modules/skshaper/src/SkUnicode.h
Normal file
@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright 2020 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
#ifndef SkUnicode_DEFINED
|
||||
#define SkUnicode_DEFINED
|
||||
|
||||
#include "include/core/SkTypes.h"
|
||||
#include "src/core/SkSpan.h"
|
||||
#include <vector>
|
||||
#include <unicode/utf.h>
|
||||
|
||||
namespace skia {
|
||||
|
||||
enum class UtfFormat {
|
||||
kUTF8,
|
||||
kUTF16
|
||||
};
|
||||
// Bidi
|
||||
typedef size_t Position;
|
||||
typedef uint8_t BidiLevel;
|
||||
enum class Direction {
|
||||
kLTR,
|
||||
kRTL,
|
||||
};
|
||||
struct BidiRegion {
|
||||
BidiRegion(Position start, Position end, BidiLevel level)
|
||||
: start(start), end(end), level(level) { }
|
||||
Position start;
|
||||
Position end;
|
||||
BidiLevel level;
|
||||
};
|
||||
// LineBreaks
|
||||
enum class LineBreakType {
|
||||
kSoftLineBreak,
|
||||
kHardLineBreak
|
||||
};
|
||||
struct LineBreakBefore {
|
||||
LineBreakBefore(Position pos, LineBreakType breakType)
|
||||
: pos(pos), breakType(breakType) { }
|
||||
Position pos;
|
||||
LineBreakType breakType;
|
||||
};
|
||||
// Other breaks
|
||||
enum class UBreakType {
|
||||
kWords,
|
||||
kGraphemes,
|
||||
kLines
|
||||
};
|
||||
struct Range {
|
||||
Position start;
|
||||
Position end;
|
||||
};
|
||||
|
||||
class SkUnicode {
|
||||
public:
|
||||
typedef uint32_t ScriptID;
|
||||
typedef uint32_t CombiningClass;
|
||||
typedef uint32_t GeneralCategory;
|
||||
virtual ~SkUnicode() {}
|
||||
// High level methods (that we actually use somewhere=SkParagraph)
|
||||
virtual bool getBidiRegions
|
||||
(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* results) = 0;
|
||||
virtual bool getLineBreaks
|
||||
(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) = 0;
|
||||
virtual bool getWords
|
||||
(const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
|
||||
virtual bool getGraphemes
|
||||
(const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
|
||||
virtual bool getWhitespaces
|
||||
(const char utf8[], int utf8Units, std::vector<Position>* results) = 0;
|
||||
|
||||
static void ReorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]);
|
||||
};
|
||||
|
||||
std::unique_ptr<SkUnicode> SkUnicode_Make();
|
||||
|
||||
}
|
||||
|
||||
#endif // SkUnicode_DEFINED
|
258
modules/skshaper/src/SkUnicode_icu.cpp
Normal file
258
modules/skshaper/src/SkUnicode_icu.cpp
Normal file
@ -0,0 +1,258 @@
|
||||
/*
|
||||
* Copyright 2020 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
#include "include/private/SkTFitsIn.h"
|
||||
#include "include/private/SkTemplates.h"
|
||||
#include "modules/skshaper/src/SkUnicode.h"
|
||||
#include "src/utils/SkUTF.h"
|
||||
#include <unicode/ubidi.h>
|
||||
#include <unicode/ubrk.h>
|
||||
#include <unicode/utext.h>
|
||||
#include <unicode/utypes.h>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
using ICUBiDi = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close), ubidi_close>>;
|
||||
using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>>;
|
||||
using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), ubrk_close>>;
|
||||
|
||||
/** Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. */
|
||||
static inline SkUnichar utf8_next(const char** ptr, const char* end) {
|
||||
SkUnichar val = SkUTF::NextUTF8(ptr, end);
|
||||
return val < 0 ? 0xFFFD : val;
|
||||
}
|
||||
|
||||
namespace skia {
|
||||
|
||||
class SkUnicode_icu : public SkUnicode {
|
||||
|
||||
struct InputData {
|
||||
SkSpan<const char> fUtf8;
|
||||
SkSpan<uint16_t> fUtf16;
|
||||
Direction fTextDirection;
|
||||
};
|
||||
|
||||
struct OutputData {
|
||||
UtfFormat fUtfFormat;
|
||||
std::vector<BidiRegion> fBidiRegions;
|
||||
std::vector<Position> fWords;
|
||||
std::vector<LineBreakBefore> fLineBreaks;
|
||||
std::vector<Position> fGraphemes;
|
||||
std::vector<Position> fWhitespaces;
|
||||
};
|
||||
|
||||
static UBreakIteratorType convertType(UBreakType type) {
|
||||
switch (type) {
|
||||
case UBreakType::kLines: return UBRK_LINE;
|
||||
case UBreakType::kGraphemes: return UBRK_CHARACTER;
|
||||
case UBreakType::kWords: return UBRK_WORD;
|
||||
default:
|
||||
SkDEBUGF("Convert error: wrong break type");
|
||||
return UBRK_CHARACTER;
|
||||
}
|
||||
}
|
||||
|
||||
static int convertUtf8ToUtf16(const char* utf8, size_t utf8Units, std::unique_ptr<uint16_t[]>* utf16) {
|
||||
int utf16Units = SkUTF::UTF8ToUTF16(nullptr, 0, utf8, utf8Units);
|
||||
if (utf16Units < 0) {
|
||||
SkDEBUGF("Convert error: Invalid utf8 input");
|
||||
return utf16Units;
|
||||
}
|
||||
*utf16 = std::unique_ptr<uint16_t[]>(new uint16_t[utf16Units]);
|
||||
SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16->get(), utf16Units, utf8, utf8Units);
|
||||
SkASSERT(dstLen == utf16Units);
|
||||
return utf16Units;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
bool extractBidi(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* bidiRegions) {
|
||||
|
||||
// Convert to UTF16 since for now bidi iterator only operates on utf16
|
||||
std::unique_ptr<uint16_t[]> utf16;
|
||||
auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
|
||||
if (utf16Units < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create bidi iterator
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
ICUBiDi bidi(ubidi_openSized(utf16Units, 0, &status));
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Bidi error: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
SkASSERT(bidi);
|
||||
uint8_t bidiLevel = (dir == Direction::kLTR) ? UBIDI_LTR : UBIDI_RTL;
|
||||
// The required lifetime of utf16 isn't well documented.
|
||||
// It appears it isn't used after ubidi_setPara except through ubidi_getText.
|
||||
ubidi_setPara(bidi.get(), (const UChar*)utf16.get(), utf16Units, bidiLevel, nullptr, &status);
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Bidi error: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Iterate through bidi regions and the result positions into utf8
|
||||
const char* start8 = utf8;
|
||||
const char* end8 = utf8 + utf8Units;
|
||||
BidiLevel currentLevel = 0;
|
||||
|
||||
Position pos8 = 0;
|
||||
Position pos16 = 0;
|
||||
Position end16 = ubidi_getLength(bidi.get());
|
||||
while (pos16 < end16) {
|
||||
auto level = ubidi_getLevelAt(bidi.get(), pos16);
|
||||
if (pos16 == 0) {
|
||||
currentLevel = level;
|
||||
} else if (level != currentLevel) {
|
||||
Position end = start8 - utf8;
|
||||
bidiRegions->emplace_back(pos8, end, currentLevel);
|
||||
currentLevel = level;
|
||||
pos8 = end;
|
||||
}
|
||||
SkUnichar u = utf8_next(&start8, end8);
|
||||
pos16 += SkUTF::ToUTF16(u);
|
||||
}
|
||||
Position end = start8 - utf8;
|
||||
if (end != pos8) {
|
||||
bidiRegions->emplace_back(pos8, end, currentLevel);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool extractWords(uint16_t utf16[], int utf16Units, std::vector<Position>* words) {
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
UBreakIteratorType breakType = convertType(UBreakType::kWords);
|
||||
ICUBreakIterator iterator(ubrk_open(breakType, uloc_getDefault(), nullptr, 0, &status));
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Break error: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
SkASSERT(iterator);
|
||||
|
||||
UText sUtf16UText = UTEXT_INITIALIZER;
|
||||
ICUUText utf16UText(utext_openUChars(&sUtf16UText, (UChar*)utf16, utf16Units, &status));
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Break error: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
|
||||
ubrk_setUText(iterator.get(), utf16UText.get(), &status);
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Break error: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the words
|
||||
int32_t pos = ubrk_first(iterator.get());
|
||||
while (pos != UBRK_DONE) {
|
||||
words->emplace_back(pos);
|
||||
pos = ubrk_next(iterator.get());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool extractPositions(const char utf8[], int utf8Units, UBreakType type, std::function<void(int, int)> add) {
|
||||
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
UText sUtf8UText = UTEXT_INITIALIZER;
|
||||
ICUUText text(utext_openUTF8(&sUtf8UText, &utf8[0], utf8Units, &status));
|
||||
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Break error: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
SkASSERT(text);
|
||||
|
||||
ICUBreakIterator iterator(ubrk_open(convertType(type), uloc_getDefault(), nullptr, 0, &status));
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Break error: %s", u_errorName(status));
|
||||
}
|
||||
|
||||
ubrk_setUText(iterator.get(), text.get(), &status);
|
||||
if (U_FAILURE(status)) {
|
||||
SkDEBUGF("Break error: %s", u_errorName(status));
|
||||
return false;
|
||||
}
|
||||
|
||||
auto iter = iterator.get();
|
||||
int32_t pos = ubrk_first(iter);
|
||||
while (pos != UBRK_DONE) {
|
||||
add(pos, ubrk_getRuleStatus(iter));
|
||||
pos = ubrk_next(iter);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool extractWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* whitespaces) {
|
||||
|
||||
const char* start = utf8;
|
||||
const char* end = utf8 + utf8Units;
|
||||
const char* ch = start;
|
||||
while (ch < end) {
|
||||
auto index = ch - start;
|
||||
auto unichar = utf8_next(&ch, end);
|
||||
if (u_isWhitespace(unichar)) {
|
||||
auto ending = ch - start;
|
||||
for (auto k = index; k < ending; ++k) {
|
||||
whitespaces->emplace_back(k);
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool getBidiRegions(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* results) override {
|
||||
|
||||
return extractBidi(utf8, utf8Units, dir, results);
|
||||
}
|
||||
|
||||
bool getLineBreaks(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) override {
|
||||
|
||||
return extractPositions(utf8, utf8Units, UBreakType::kLines,
|
||||
[results](int pos, int status) {
|
||||
results->emplace_back(pos,status == UBRK_LINE_HARD
|
||||
? LineBreakType::kHardLineBreak
|
||||
: LineBreakType::kSoftLineBreak);
|
||||
});
|
||||
}
|
||||
|
||||
bool getWords(const char utf8[], int utf8Units, std::vector<Position>* results) override {
|
||||
|
||||
// Convert to UTF16 since we want the results in utf16
|
||||
std::unique_ptr<uint16_t[]> utf16;
|
||||
auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
|
||||
if (utf16Units < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return extractWords(utf16.get(), utf16Units, results);
|
||||
}
|
||||
|
||||
bool getGraphemes(const char utf8[], int utf8Units, std::vector<Position>* results) override {
|
||||
|
||||
return extractPositions(utf8, utf8Units, UBreakType::kGraphemes,
|
||||
[results](int pos, int status) { results->emplace_back(pos);
|
||||
});
|
||||
}
|
||||
|
||||
bool getWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* results) override {
|
||||
|
||||
return extractWhitespaces(utf8, utf8Units, results);
|
||||
}
|
||||
};
|
||||
|
||||
void SkUnicode::ReorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) {
|
||||
ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
|
||||
}
|
||||
|
||||
std::unique_ptr<SkUnicode> SkUnicode_Make() { return std::make_unique<SkUnicode_icu>(); }
|
||||
|
||||
}
|
||||
|
@ -251,3 +251,36 @@ size_t SkUTF::ToUTF16(SkUnichar uni, uint16_t utf16[2]) {
|
||||
return 1 + extra;
|
||||
}
|
||||
|
||||
int SkUTF::UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength) {
|
||||
if (!dst) {
|
||||
dstCapacity = 0;
|
||||
}
|
||||
|
||||
int dstLength = 0;
|
||||
uint16_t* endDst = dst + dstCapacity;
|
||||
const char* endSrc = src + srcByteLength;
|
||||
while (src < endSrc) {
|
||||
SkUnichar uni = NextUTF8(&src, endSrc);
|
||||
if (uni < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint16_t utf16[2];
|
||||
size_t count = ToUTF16(uni, utf16);
|
||||
if (count == 0) {
|
||||
return -1;
|
||||
}
|
||||
dstLength += count;
|
||||
|
||||
if (dst) {
|
||||
uint16_t* elems = utf16;
|
||||
while (dst < endDst && count > 0) {
|
||||
*dst++ = *elems++;
|
||||
count -= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return dstLength;
|
||||
}
|
||||
|
||||
|
||||
|
@ -64,6 +64,12 @@ SK_SPI size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr
|
||||
*/
|
||||
SK_SPI size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);
|
||||
|
||||
/** Returns the number of resulting UTF16 values needed to convert the src utf8 sequence.
|
||||
* If dst is not null, it is filled with the corresponding values up to its capacity.
|
||||
* If there is an error, -1 is returned and the dst[] buffer is undefined.
|
||||
*/
|
||||
SK_SPI int UTF8ToUTF16(uint16_t dst[], int dstCapacity, const char src[], size_t srcByteLength);
|
||||
|
||||
} // namespace SkUTF
|
||||
|
||||
#endif // SkUTF_DEFINED
|
||||
|
Loading…
Reference in New Issue
Block a user