Reland "Add ConvertPixels versions of PremulAlphaRoundTrip"
This is a reland of b61804e94c
Original change's description:
> Add ConvertPixels versions of PremulAlphaRoundTrip
>
> Prior to the force_highp trick, the GrConvertPixels version failed, just
> like the GPU would do if we disabled the canvas2D fast path. With the
> highp trick, all tests pass.
>
> Bug: skia:12592
> Change-Id: I63ad2fd3b67863b6a736316e7c7b3b9bd2ee8970
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/467516
> Commit-Queue: Brian Osman <brianosman@google.com>
> Reviewed-by: Brian Salomon <bsalomon@google.com>
Bug: skia:12592
Change-Id: I4c83c8d20959ab13cc493748a22fff5133706a77
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/468458
Reviewed-by: Brian Salomon <bsalomon@google.com>
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Brian Osman <brianosman@google.com>
This commit is contained in:
parent
fa26a656cf
commit
1b61d866a6
@ -3076,16 +3076,46 @@ static void start_pipeline(const size_t x0, const size_t y0,
|
||||
|
||||
// ~~~~~~ Commonly used helper functions ~~~~~~ //
|
||||
|
||||
/**
|
||||
* Helpers to to properly rounded division (by 255). The ideal answer we want to compute is slow,
|
||||
* thanks to a division by a non-power of two:
|
||||
* [1] (v + 127) / 255
|
||||
*
|
||||
* There is a two-step process that computes the correct answer for all inputs:
|
||||
* [2] (v + 128 + ((v + 128) >> 8)) >> 8
|
||||
*
|
||||
* There is also a single iteration approximation, but it's wrong (+-1) ~25% of the time:
|
||||
* [3] (v + 255) >> 8;
|
||||
*
|
||||
* We offer two different implementations here, depending on the requirements of the calling stage.
|
||||
*/
|
||||
|
||||
/**
|
||||
* div255 favors speed over accuracy. It uses formula [2] on NEON (where we can compute it as fast
|
||||
* as [3]), and uses [3] elsewhere.
|
||||
*/
|
||||
SI U16 div255(U16 v) {
|
||||
#if 0
|
||||
return (v+127)/255; // The ideal rounding divide by 255.
|
||||
#elif 1 && defined(JUMPER_IS_NEON)
|
||||
// With NEON we can compute (v+127)/255 as (v + ((v+128)>>8) + 128)>>8
|
||||
// just as fast as we can do the approximation below, so might as well be correct!
|
||||
// First we compute v + ((v+128)>>8), then one more round of (...+128)>>8 to finish up.
|
||||
#if defined(JUMPER_IS_NEON)
|
||||
// With NEON we can compute [2] just as fast as [3], so let's be correct.
|
||||
// First we compute v + ((v+128)>>8), then one more round of (...+128)>>8 to finish up:
|
||||
return vrshrq_n_u16(vrsraq_n_u16(v, v, 8), 8);
|
||||
#else
|
||||
return (v+255)/256; // A good approximation of (v+127)/255.
|
||||
// Otherwise, use [3], which is never wrong by more than 1:
|
||||
return (v+255)/256;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* div255_accurate guarantees the right answer on all platforms, at the expense of performance.
|
||||
*/
|
||||
SI U16 div255_accurate(U16 v) {
|
||||
#if defined(JUMPER_IS_NEON)
|
||||
// Our NEON implementation of div255 is already correct for all inputs:
|
||||
return div255(v);
|
||||
#else
|
||||
// This is [2] (the same formulation as NEON), but written without the benefit of intrinsics:
|
||||
v += 128;
|
||||
return (v+(v/256))/256;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -3315,14 +3345,14 @@ STAGE_PP(clamp_gamut, Ctx::None) {
|
||||
}
|
||||
|
||||
STAGE_PP(premul, Ctx::None) {
|
||||
r = div255(r * a);
|
||||
g = div255(g * a);
|
||||
b = div255(b * a);
|
||||
r = div255_accurate(r * a);
|
||||
g = div255_accurate(g * a);
|
||||
b = div255_accurate(b * a);
|
||||
}
|
||||
STAGE_PP(premul_dst, Ctx::None) {
|
||||
dr = div255(dr * da);
|
||||
dg = div255(dg * da);
|
||||
db = div255(db * da);
|
||||
dr = div255_accurate(dr * da);
|
||||
dg = div255_accurate(dg * da);
|
||||
db = div255_accurate(db * da);
|
||||
}
|
||||
|
||||
STAGE_PP(force_opaque , Ctx::None) { a = 255; }
|
||||
|
@ -9,6 +9,8 @@
|
||||
#include "include/core/SkCanvas.h"
|
||||
#include "include/core/SkSurface.h"
|
||||
#include "include/gpu/GrDirectContext.h"
|
||||
#include "src/core/SkConvertPixels.h"
|
||||
#include "src/gpu/GrPixmap.h"
|
||||
#include "tests/Test.h"
|
||||
#include "tools/ToolUtils.h"
|
||||
|
||||
@ -105,3 +107,118 @@ DEF_GPUTEST_FOR_RENDERING_CONTEXTS(PremulAlphaRoundTrip_Gpu, reporter, ctxInfo)
|
||||
SkBudgeted::kNo, info));
|
||||
test_premul_alpha_roundtrip(reporter, surf.get());
|
||||
}
|
||||
|
||||
DEF_TEST(PremulAlphaRoundTripGrConvertPixels, reporter) {
|
||||
// Code that does the same thing as above, but using GrConvertPixels. This simulates what
|
||||
// happens if you run the above on a machine with a GPU that doesn't have a valid PM/UPM
|
||||
// conversion pair of FPs.
|
||||
const SkImageInfo upmInfo =
|
||||
SkImageInfo::Make(256, 256, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType);
|
||||
const SkImageInfo pmInfo =
|
||||
SkImageInfo::Make(256, 256, kRGBA_8888_SkColorType, kPremul_SkAlphaType);
|
||||
|
||||
GrPixmap src = GrPixmap::Allocate(upmInfo);
|
||||
uint32_t* srcPixels = (uint32_t*)src.addr();
|
||||
for (int y = 0; y < 256; ++y) {
|
||||
for (int x = 0; x < 256; ++x) {
|
||||
srcPixels[y * 256 + x] = pack_unpremul_rgba(SkColorSetARGB(y, x, x, x));
|
||||
}
|
||||
}
|
||||
|
||||
GrPixmap surf = GrPixmap::Allocate(pmInfo);
|
||||
GrConvertPixels(surf, src);
|
||||
|
||||
GrPixmap read1 = GrPixmap::Allocate(upmInfo);
|
||||
GrConvertPixels(read1, surf);
|
||||
|
||||
GrPixmap surf2 = GrPixmap::Allocate(pmInfo);
|
||||
GrConvertPixels(surf2, read1);
|
||||
|
||||
GrPixmap read2 = GrPixmap::Allocate(upmInfo);
|
||||
GrConvertPixels(read2, surf2);
|
||||
|
||||
auto get_pixel = [](const GrPixmap& pm, int x, int y) {
|
||||
const uint32_t* addr = (const uint32_t*)pm.addr();
|
||||
return addr[y * 256 + x];
|
||||
};
|
||||
auto dump_pixel_history = [&](int x, int y) {
|
||||
SkDebugf("Pixel history for (%d, %d):\n", x, y);
|
||||
SkDebugf("Src : %08x\n", get_pixel(src, x, y));
|
||||
SkDebugf(" -> : %08x\n", get_pixel(surf, x, y));
|
||||
SkDebugf(" <- : %08x\n", get_pixel(read1, x, y));
|
||||
SkDebugf(" -> : %08x\n", get_pixel(surf2, x, y));
|
||||
SkDebugf(" <- : %08x\n", get_pixel(read2, x, y));
|
||||
};
|
||||
|
||||
bool success = true;
|
||||
for (int y = 0; y < 256 && success; ++y) {
|
||||
const uint32_t* pixels1 = (const uint32_t*) read1.addr();
|
||||
const uint32_t* pixels2 = (const uint32_t*) read2.addr();
|
||||
for (int x = 0; x < 256 && success; ++x) {
|
||||
uint32_t c1 = pixels1[y * 256 + x],
|
||||
c2 = pixels2[y * 256 + x];
|
||||
// If this ever fails, it's helpful to see where it goes wrong.
|
||||
if (c1 != c2) {
|
||||
dump_pixel_history(x, y);
|
||||
}
|
||||
REPORTER_ASSERT(reporter, success = c1 == c2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DEF_TEST(PremulAlphaRoundTripSkConvertPixels, reporter) {
|
||||
// ... and now using SkConvertPixels, just for completeness
|
||||
const SkImageInfo upmInfo =
|
||||
SkImageInfo::Make(256, 256, kRGBA_8888_SkColorType, kUnpremul_SkAlphaType);
|
||||
const SkImageInfo pmInfo =
|
||||
SkImageInfo::Make(256, 256, kRGBA_8888_SkColorType, kPremul_SkAlphaType);
|
||||
|
||||
SkBitmap src; src.allocPixels(upmInfo);
|
||||
uint32_t* srcPixels = src.getAddr32(0, 0);
|
||||
for (int y = 0; y < 256; ++y) {
|
||||
for (int x = 0; x < 256; ++x) {
|
||||
srcPixels[y * 256 + x] = pack_unpremul_rgba(SkColorSetARGB(y, x, x, x));
|
||||
}
|
||||
}
|
||||
|
||||
auto convert = [](const SkBitmap& dst, const SkBitmap& src){
|
||||
SkAssertResult(SkConvertPixels(dst.info(), dst.getAddr(0, 0), dst.rowBytes(),
|
||||
src.info(), src.getAddr(0, 0), src.rowBytes()));
|
||||
};
|
||||
|
||||
SkBitmap surf; surf.allocPixels(pmInfo);
|
||||
convert(surf, src);
|
||||
|
||||
SkBitmap read1; read1.allocPixels(upmInfo);
|
||||
convert(read1, surf);
|
||||
|
||||
SkBitmap surf2; surf2.allocPixels(pmInfo);
|
||||
convert(surf2, read1);
|
||||
|
||||
SkBitmap read2; read2.allocPixels(upmInfo);
|
||||
convert(read2, surf2);
|
||||
|
||||
auto dump_pixel_history = [&](int x, int y) {
|
||||
SkDebugf("Pixel history for (%d, %d):\n", x, y);
|
||||
SkDebugf("Src : %08x\n", *src.getAddr32(x, y));
|
||||
SkDebugf(" -> : %08x\n", *surf.getAddr32(x, y));
|
||||
SkDebugf(" <- : %08x\n", *read1.getAddr32(x, y));
|
||||
SkDebugf(" -> : %08x\n", *surf2.getAddr32(x, y));
|
||||
SkDebugf(" <- : %08x\n", *read2.getAddr32(x, y));
|
||||
};
|
||||
|
||||
bool success = true;
|
||||
for (int y = 0; y < 256 && success; ++y) {
|
||||
const uint32_t* pixels1 = read1.getAddr32(0, 0);
|
||||
const uint32_t* pixels2 = read2.getAddr32(0, 0);
|
||||
for (int x = 0; x < 256 && success; ++x) {
|
||||
uint32_t c1 = pixels1[y * 256 + x],
|
||||
c2 = pixels2[y * 256 + x];
|
||||
// If this ever fails, it's helpful to see where it goes wrong.
|
||||
if (c1 != c2) {
|
||||
dump_pixel_history(x, y);
|
||||
}
|
||||
REPORTER_ASSERT(reporter, success = c1 == c2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user