diff --git a/bench/XferU64Bench.cpp b/bench/XferU64Bench.cpp new file mode 100644 index 0000000000..0cc2ba131a --- /dev/null +++ b/bench/XferU64Bench.cpp @@ -0,0 +1,106 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "Benchmark.h" +#include "SkString.h" +#include "SkXfermode.h" + +#define USE_AA (1 << 31) // merge with Xfermode::PMFlags w/o conflict + +#define INNER_LOOPS 1000 + +// Benchmark that draws non-AA rects or AA text with an SkXfermode::Mode. +class XferU64Bench : public Benchmark { +public: + XferU64Bench(bool doN, uint32_t flags) + : fDoN(doN) + , fFlags(flags & ~USE_AA) + { + SkXfermode::Mode mode = SkXfermode::kSrcOver_Mode; + + fProc1 = SkXfermode::GetU64Proc1(mode, fFlags); + fProcN = SkXfermode::GetU64ProcN(mode, fFlags); + fName.printf("xferu64_%s_%c_%s_%s", + (flags & USE_AA) ? "aa" : "bw", + fDoN ? 'N' : '1', + (flags & SkXfermode::kSrcIsOpaque_U64Flag) ? "opaque" : "alpha", + (flags & SkXfermode::kDstIsFloat16_U64Flag) ? "f16" : "u16"); + + for (int i = 0; i < N; ++i) { + fSrc[i] = {{ 1, 1, 1, 1 }}; + fDst[i] = 0; + fAAStorage[i] = i * 255 / (N - 1); + } + + if (flags & USE_AA) { + fAA = fAAStorage; + } else { + fAA = nullptr; + } + } + +protected: + bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; } + + const char* onGetName() override { return fName.c_str(); } + + void onDraw(int loops, SkCanvas*) override { + const SkXfermode::U64State state{ nullptr, fFlags }; + + for (int i = 0; i < loops * INNER_LOOPS; ++i) { + if (fDoN) { + fProcN(state, fDst, fSrc, N, fAA); + } else { + fProc1(state, fDst, fSrc[0], N, fAA); + } + } + } + +private: + SkString fName; + SkXfermode::U64Proc1 fProc1; + SkXfermode::U64ProcN fProcN; + const SkAlpha* fAA; + bool fDoN; + uint32_t fFlags; + + enum { + N = 1000, + }; + SkPM4f fSrc[N]; + uint64_t fDst[N]; + uint8_t fAAStorage[N]; + + typedef Benchmark INHERITED; +}; + +#define F00 0 +#define F01 (SkXfermode::kSrcIsOpaque_U64Flag) +#define F10 (SkXfermode::kDstIsFloat16_U64Flag) +#define F11 (SkXfermode::kDstIsFloat16_U64Flag | SkXfermode::kSrcIsOpaque_U64Flag) + +#if 0 +DEF_BENCH( return new XferU64Bench(true, F10 | USE_AA); ) +DEF_BENCH( return new XferU64Bench(true, F11 | USE_AA); ) +DEF_BENCH( return new XferU64Bench(true, F10); ) +DEF_BENCH( return new XferU64Bench(true, F11); ) + +DEF_BENCH( return new XferU64Bench(true, F00 | USE_AA); ) +DEF_BENCH( return new XferU64Bench(true, F01 | USE_AA); ) +DEF_BENCH( return new XferU64Bench(true, F00); ) +DEF_BENCH( return new XferU64Bench(true, F01); ) +#endif + +DEF_BENCH( return new XferU64Bench(false, F10 | USE_AA); ) +DEF_BENCH( return new XferU64Bench(false, F11 | USE_AA); ) +DEF_BENCH( return new XferU64Bench(false, F10); ) +DEF_BENCH( return new XferU64Bench(false, F11); ) + +DEF_BENCH( return new XferU64Bench(false, F00 | USE_AA); ) +DEF_BENCH( return new XferU64Bench(false, F01 | USE_AA); ) +DEF_BENCH( return new XferU64Bench(false, F00); ) +DEF_BENCH( return new XferU64Bench(false, F01); ) diff --git a/gm/xferu64.cpp b/gm/xferu64.cpp new file mode 100644 index 0000000000..03fc8864b5 --- /dev/null +++ b/gm/xferu64.cpp @@ -0,0 +1,149 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "gm.h" +#include "SkCanvas.h" +#include "SkImageInfo.h" +#include "SkXfermode.h" +#include "SkHalf.h" +#include "SkImage.h" + +static SkPMColor f16_to_pmcolor(uint64_t src) { + SkPMColor dst; + const SkHalf* sptr = reinterpret_cast(&src); + uint8_t* dptr = reinterpret_cast(&dst); + for (int i = 0; i < 4; ++i) { + float f = SkHalfToFloat(sptr[i]); + dptr[i] = SkToU8((int)(f * 255 + 0.5f)); + } + return dst; +} + +static SkPMColor u16_to_pmcolor(uint64_t src) { + SkPMColor dst; + const uint16_t* sptr = reinterpret_cast(&src); + uint8_t* dptr = reinterpret_cast(&dst); + for (int i = 0; i < 4; ++i) { + dptr[i] = sptr[i] >> 8; + } + return dst; +} + +static SkImage* new_u64_image(const SkBitmap& src, uint32_t flags) { + SkBitmap dst; + dst.allocN32Pixels(src.width(), src.height()); + SkPixmap srcPM, dstPM; + src.peekPixels(&srcPM); + dst.peekPixels(&dstPM); + + for (int y = 0; y < srcPM.height(); ++y) { + for (int x = 0; x < srcPM.width(); ++x) { + uint64_t srcP = *srcPM.addr64(x, y); + uint32_t* dstP = dstPM.writable_addr32(x, y); + + if (flags & SkXfermode::kDstIsFloat16_U64Flag) { + *dstP = f16_to_pmcolor(srcP); + } else { + *dstP = u16_to_pmcolor(srcP); + } + } + } + return SkImage::NewRasterCopy(dstPM.info(), dstPM.addr(), dstPM.rowBytes()); +} + +static void draw_rect(SkCanvas* canvas, const SkRect& r, SkColor c, uint32_t u64_flags, + const SkAlpha aa[]) { + const SkIRect ir = r.round(); + const SkImageInfo info = SkImageInfo::Make(ir.width(), ir.height(), + kRGBA_F16_SkColorType, kPremul_SkAlphaType); + + SkBitmap bm; + bm.allocPixels(info); + SkPixmap pm; + bm.peekPixels(&pm); + memset(pm.writable_addr(), 0, pm.getSafeSize()); + + if (SkColorGetA(c) == 0xFF) { + u64_flags |= SkXfermode::kSrcIsOpaque_PM4fFlag; + } + + const SkXfermode::U64State state { nullptr, u64_flags }; + + const SkPM4f src = SkColor4f::FromColor(c).premul(); + auto proc1 = SkXfermode::GetU64Proc1(SkXfermode::kSrcOver_Mode, u64_flags); + for (int y = 0; y < ir.height()/2; ++y) { + proc1(state, pm.writable_addr64(0, y), src, ir.width(), aa); + } + + SkPM4f buffer[1000]; + for (int i = 0; i < ir.width(); ++i) { + buffer[i] = src; + } + auto procN = SkXfermode::GetU64ProcN(SkXfermode::kSrcOver_Mode, u64_flags); + for (int y = ir.height()/2 + 1; y < ir.height(); ++y) { + procN(state, pm.writable_addr64(0, y), buffer, ir.width(), aa); + } + + SkAutoTUnref image(new_u64_image(bm, u64_flags)); + canvas->drawImage(image, r.left(), r.top(), nullptr); +} + +/* + * Test SkXfer4fProcs directly for src-over, comparing them to current SkColor blits. + */ +DEF_SIMPLE_GM(xfer_u64_srcover, canvas, 580, 760) { + const int IW = 50; + const SkScalar W = IW; + const SkScalar H = 100; + + const int32_t flags[] = { + -1, // normal + 0, // U16 components + SkXfermode::kDstIsFloat16_U64Flag, // F16 components + }; + const SkColor colors[] = { + SK_ColorBLACK, SK_ColorRED, SK_ColorGREEN, SK_ColorBLUE, + 0x88000000, 0x88FF0000, 0x8800FF00, 0x880000FF + }; + + uint8_t aa_scanline[IW]; + for (int i = 0; i < IW; ++i) { + aa_scanline[i] = i * 255 / (IW - 1); + } + uint8_t const* aa_table[] = { nullptr, aa_scanline }; + + SkBitmap mask; + mask.installPixels(SkImageInfo::MakeA8(IW, 1), aa_scanline, IW); + + canvas->translate(20, 20); + + const SkRect r = SkRect::MakeWH(W, H); + for (const uint8_t* aa : aa_table) { + canvas->save(); + for (auto flag : flags) { + canvas->save(); + for (SkColor c : colors) { + if (flag < 0) { + SkPaint p; + p.setColor(c); + if (aa) { + canvas->drawBitmapRect(mask, r, &p); + } else { + canvas->drawRect(r, p); + } + } else { + draw_rect(canvas, r, c, flag, aa); + } + canvas->translate(W + 20, 0); + } + canvas->restore(); + canvas->translate(0, H + 20); + } + canvas->restore(); + canvas->translate(0, (H + 20) * SK_ARRAY_COUNT(flags) + 20); + } +} diff --git a/gyp/core.gypi b/gyp/core.gypi index db54377723..1700a05331 100644 --- a/gyp/core.gypi +++ b/gyp/core.gypi @@ -302,6 +302,7 @@ '<(skia_src_path)/core/SkWriter32.cpp', '<(skia_src_path)/core/SkXfermode.cpp', '<(skia_src_path)/core/SkXfermode4f.cpp', + '<(skia_src_path)/core/SkXfermodeU64.cpp', '<(skia_src_path)/core/SkXfermode_proccoeff.h', '<(skia_src_path)/core/SkXfermodeInterpretation.cpp', '<(skia_src_path)/core/SkXfermodeInterpretation.h', diff --git a/include/core/SkXfermode.h b/include/core/SkXfermode.h index 76c57a51dd..21d77f8747 100644 --- a/include/core/SkXfermode.h +++ b/include/core/SkXfermode.h @@ -233,6 +233,21 @@ public: virtual PM4fProc1 getPM4fProc1(uint32_t flags) const; virtual PM4fProcN getPM4fProcN(uint32_t flags) const; + enum U64Flags { + kSrcIsOpaque_U64Flag = 1 << 0, + kDstIsFloat16_U64Flag = 1 << 1, // else U16 bit components + }; + struct U64State { + const SkXfermode* fXfer; + uint32_t fFlags; + }; + typedef void (*U64Proc1)(const U64State&, uint64_t dst[], const SkPM4f& src, int count, + const SkAlpha coverage[]); + typedef void (*U64ProcN)(const U64State&, uint64_t dst[], const SkPM4f src[], int count, + const SkAlpha coverage[]); + static U64Proc1 GetU64Proc1(Mode, uint32_t flags); + static U64ProcN GetU64ProcN(Mode, uint32_t flags); + protected: SkXfermode() {} /** The default implementation of xfer32/xfer16/xferA8 in turn call this diff --git a/src/core/SkXfermodeU64.cpp b/src/core/SkXfermodeU64.cpp new file mode 100644 index 0000000000..6435e6a2e2 --- /dev/null +++ b/src/core/SkXfermodeU64.cpp @@ -0,0 +1,181 @@ +/* + * Copyright 2016 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "SkHalf.h" +#include "SkPM4fPriv.h" +#include "SkUtils.h" +#include "SkXfermode.h" + +static void sk_memset64(uint64_t dst[], uint64_t value, int count) { + for (int i = 0; i < count; ++i) { + dst[i] = value; + } +} + +struct U64ProcPair { + SkXfermode::U64Proc1 fP1; + SkXfermode::U64ProcN fPN; +}; + +enum DstType { + kU16_Dst, + kF16_Dst, +}; + +static Sk4f lerp_by_coverage(const Sk4f& src, const Sk4f& dst, uint8_t srcCoverage) { + return dst + (src - dst) * Sk4f(srcCoverage * (1/255.0f)); +} + +template Sk4f unit_to_dst_bias(const Sk4f& x4) { + return (D == kU16_Dst) ? x4 * Sk4f(65535) : x4; +} + +// returns value already biased by 65535 +static Sk4f load_from_u16(uint64_t value) { + return SkNx_cast(Sk4h::Load(&value)); +} + +// takes floats already biased by 65535 +static uint64_t store_to_u16(const Sk4f& x4) { + uint64_t value; + SkNx_cast(x4 + Sk4f(0.5f)).store(&value); + return value; +} + +static Sk4f load_from_f16(uint64_t value) { + const uint16_t* u16 = reinterpret_cast(&value); + float f4[4]; + for (int i = 0; i < 4; ++i) { + f4[i] = SkHalfToFloat(u16[i]); + } + return Sk4f::Load(f4); +} + +static uint64_t store_to_f16(const Sk4f& x4) { + uint64_t value; + uint16_t* u16 = reinterpret_cast(&value); + + float f4[4]; + x4.store(f4); + for (int i = 0; i < 4; ++i) { + u16[i] = SkFloatToHalf(f4[i]); + } + return value; +} + +// Returns dst in its "natural" bias (either unit-float or 16bit int) +// +template Sk4f load_from_dst(uint64_t dst) { + return (D == kU16_Dst) ? load_from_u16(dst) : load_from_f16(dst); +} + +// Assumes x4 is already in the "natural" bias (either unit-float or 16bit int) +template uint64_t store_to_dst(const Sk4f& x4) { + return (D == kU16_Dst) ? store_to_u16(x4) : store_to_f16(x4); +} + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +template void src_1(const SkXfermode::U64State& state, uint64_t dst[], + const SkPM4f& src, int count, const SkAlpha aa[]) { + const Sk4f s4 = unit_to_dst_bias(Sk4f::Load(src.fVec)); + if (aa) { + for (int i = 0; i < count; ++i) { + const Sk4f d4 = load_from_dst(dst[i]); + dst[i] = store_to_dst(lerp_by_coverage(s4, d4, aa[i])); + } + } else { + sk_memset64(dst, store_to_dst(s4), count); + } +} + +template void src_n(const SkXfermode::U64State& state, uint64_t dst[], + const SkPM4f src[], int count, const SkAlpha aa[]) { + if (aa) { + for (int i = 0; i < count; ++i) { + const Sk4f s4 = unit_to_dst_bias(Sk4f::Load(src[i].fVec)); + const Sk4f d4 = load_from_dst(dst[i]); + dst[i] = store_to_dst(lerp_by_coverage(s4, d4, aa[i])); + } + } else { + for (int i = 0; i < count; ++i) { + const Sk4f s4 = unit_to_dst_bias(Sk4f::Load(src[i].fVec)); + dst[i] = store_to_dst(s4); + } + } +} + +const U64ProcPair gU64Procs_Src[] = { + { src_1, src_n }, // U16 alpha + { src_1, src_n }, // U16 opaque + { src_1, src_n }, // F16 alpha + { src_1, src_n }, // F16 opaque +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +template void srcover_1(const SkXfermode::U64State& state, uint64_t dst[], + const SkPM4f& src, int count, const SkAlpha aa[]) { + const Sk4f s4 = Sk4f::Load(src.fVec); + const Sk4f dst_scale = Sk4f(1 - get_alpha(s4)); + const Sk4f s4bias = unit_to_dst_bias(s4); + for (int i = 0; i < count; ++i) { + const Sk4f d4bias = load_from_dst(dst[i]); + const Sk4f r4bias = s4bias + d4bias * dst_scale; + if (aa) { + dst[i] = store_to_dst(lerp_by_coverage(r4bias, d4bias, aa[i])); + } else { + dst[i] = store_to_dst(r4bias); + } + } +} + +template void srcover_n(const SkXfermode::U64State& state, uint64_t dst[], + const SkPM4f src[], int count, const SkAlpha aa[]) { + for (int i = 0; i < count; ++i) { + const Sk4f s4 = Sk4f::Load(src[i].fVec); + const Sk4f dst_scale = Sk4f(1 - get_alpha(s4)); + const Sk4f s4bias = unit_to_dst_bias(s4); + const Sk4f d4bias = load_from_dst(dst[i]); + const Sk4f r4bias = s4bias + d4bias * dst_scale; + if (aa) { + dst[i] = store_to_dst(lerp_by_coverage(r4bias, d4bias, aa[i])); + } else { + dst[i] = store_to_dst(r4bias); + } + } +} + +const U64ProcPair gU64Procs_SrcOver[] = { + { srcover_1, srcover_n }, // U16 alpha + { src_1, src_n }, // U16 opaque + { srcover_1, srcover_n }, // F16 alpha + { src_1, src_n }, // F16 opaque +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////// + +static U64ProcPair find_procs(SkXfermode::Mode mode, uint32_t flags) { + SkASSERT(0 == (flags & ~3)); + flags &= 3; + + switch (mode) { + case SkXfermode::kSrc_Mode: return gU64Procs_Src[flags]; + case SkXfermode::kSrcOver_Mode: return gU64Procs_SrcOver[flags]; + default: + break; + } + return { nullptr, nullptr }; +} + +SkXfermode::U64Proc1 SkXfermode::GetU64Proc1(Mode mode, uint32_t flags) { + return find_procs(mode, flags).fP1; +} + +SkXfermode::U64ProcN SkXfermode::GetU64ProcN(Mode mode, uint32_t flags) { + return find_procs(mode, flags).fPN; +}