Sk4px blit mask.
Local SKP nanobenching ranges SSE between 1.05x and 0.87x, much more heavily weighted toward <1.0x ratios (speedups). I profiled the top five regressions (1.05x-1.01x) and they look like noise. Will follow up after broad bot results. NEON looks similar but less extreme than SSE changes, ranging between 1.02x and 0.95x, again mostly speedups in 0.99x-0.97x range. The old code trifurcated into black, opaque-but-not-black, and general versions as a function of the constant src color. I did not see a significant difference between general and opaque-but-not-black, and I don't think a black version would be faster using SIMD. So we have here just one version of the code, the general version. Somewhat fantastically, I see no pixel diffs on GMs or SKPs. I will be following up with more CLs for the other procs called by SkBlitMask. BUG=skia: Review URL: https://codereview.chromium.org/1278253003
This commit is contained in:
parent
c699873ac7
commit
4977983510
@ -165,6 +165,34 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// As above, but with dst4' = fn(dst4, alpha4).
|
||||
template <typename Fn, typename Dst>
|
||||
static void MapDstAlpha(int n, Dst* dst, const SkAlpha* a, const Fn& fn) {
|
||||
while (n > 0) {
|
||||
if (n >= 8) {
|
||||
Sk4px dst0 = fn(Load4(dst+0), Load4Alphas(a+0)),
|
||||
dst4 = fn(Load4(dst+4), Load4Alphas(a+4));
|
||||
dst0.store4(dst+0);
|
||||
dst4.store4(dst+4);
|
||||
dst += 8; a += 8; n -= 8;
|
||||
continue; // Keep our stride at 8 pixels as long as possible.
|
||||
}
|
||||
SkASSERT(n <= 7);
|
||||
if (n >= 4) {
|
||||
fn(Load4(dst), Load4Alphas(a)).store4(dst);
|
||||
dst += 4; a += 4; n -= 4;
|
||||
}
|
||||
if (n >= 2) {
|
||||
fn(Load2(dst), Load2Alphas(a)).store2(dst);
|
||||
dst += 2; a += 2; n -= 2;
|
||||
}
|
||||
if (n >= 1) {
|
||||
fn(Load1(dst), DupAlpha(*a)).store1(dst);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// As above, but with dst4' = fn(dst4, src4, alpha4).
|
||||
template <typename Fn, typename Dst>
|
||||
static void MapDstSrcAlpha(int n, Dst* dst, const SkPMColor* src, const SkAlpha* a,
|
||||
|
@ -47,18 +47,6 @@ public:
|
||||
typedef void (*RowProc)(void* dst, const void* mask,
|
||||
const SkPMColor* src, int width);
|
||||
|
||||
/**
|
||||
* Public entry-point to return a blitmask ColorProc.
|
||||
* May return NULL if config or format are not supported.
|
||||
*/
|
||||
static ColorProc ColorFactory(SkColorType, SkMask::Format, SkColor);
|
||||
|
||||
/**
|
||||
* Return either platform specific optimized blitmask ColorProc,
|
||||
* or NULL if no optimized routine is available.
|
||||
*/
|
||||
static ColorProc PlatformColorProcs(SkColorType, SkMask::Format, SkColor);
|
||||
|
||||
/**
|
||||
* Public entry-point to return a blitcolor BlitLCD16RowProc.
|
||||
*/
|
||||
|
@ -8,68 +8,7 @@
|
||||
#include "SkBlitMask.h"
|
||||
#include "SkColor.h"
|
||||
#include "SkColorPriv.h"
|
||||
|
||||
static void D32_A8_Color(void* SK_RESTRICT dst, size_t dstRB,
|
||||
const void* SK_RESTRICT maskPtr, size_t maskRB,
|
||||
SkColor color, int width, int height) {
|
||||
SkPMColor pmc = SkPreMultiplyColor(color);
|
||||
size_t dstOffset = dstRB - (width << 2);
|
||||
size_t maskOffset = maskRB - width;
|
||||
SkPMColor* SK_RESTRICT device = (SkPMColor *)dst;
|
||||
const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
|
||||
|
||||
do {
|
||||
int w = width;
|
||||
do {
|
||||
unsigned aa = *mask++;
|
||||
*device = SkBlendARGB32(pmc, *device, aa);
|
||||
device += 1;
|
||||
} while (--w != 0);
|
||||
device = (uint32_t*)((char*)device + dstOffset);
|
||||
mask += maskOffset;
|
||||
} while (--height != 0);
|
||||
}
|
||||
|
||||
static void D32_A8_Opaque(void* SK_RESTRICT dst, size_t dstRB,
|
||||
const void* SK_RESTRICT maskPtr, size_t maskRB,
|
||||
SkColor color, int width, int height) {
|
||||
SkPMColor pmc = SkPreMultiplyColor(color);
|
||||
SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
|
||||
const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
|
||||
|
||||
maskRB -= width;
|
||||
dstRB -= (width << 2);
|
||||
do {
|
||||
int w = width;
|
||||
do {
|
||||
unsigned aa = *mask++;
|
||||
*device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
|
||||
device += 1;
|
||||
} while (--w != 0);
|
||||
device = (uint32_t*)((char*)device + dstRB);
|
||||
mask += maskRB;
|
||||
} while (--height != 0);
|
||||
}
|
||||
|
||||
static void D32_A8_Black(void* SK_RESTRICT dst, size_t dstRB,
|
||||
const void* SK_RESTRICT maskPtr, size_t maskRB,
|
||||
SkColor, int width, int height) {
|
||||
SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
|
||||
const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
|
||||
|
||||
maskRB -= width;
|
||||
dstRB -= (width << 2);
|
||||
do {
|
||||
int w = width;
|
||||
do {
|
||||
unsigned aa = *mask++;
|
||||
*device = (aa << SK_A32_SHIFT) + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
|
||||
device += 1;
|
||||
} while (--w != 0);
|
||||
device = (uint32_t*)((char*)device + dstRB);
|
||||
mask += maskRB;
|
||||
} while (--height != 0);
|
||||
}
|
||||
#include "SkOpts.h"
|
||||
|
||||
SkBlitMask::BlitLCD16RowProc SkBlitMask::BlitLCD16RowFactory(bool isOpaque) {
|
||||
BlitLCD16RowProc proc = PlatformBlitRowProcs16(isOpaque);
|
||||
@ -112,51 +51,25 @@ static void D32_LCD16_Proc(void* SK_RESTRICT dst, size_t dstRB,
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static SkBlitMask::ColorProc D32_A8_Factory(SkColor color) {
|
||||
if (SK_ColorBLACK == color) {
|
||||
return D32_A8_Black;
|
||||
} else if (0xFF == SkColorGetA(color)) {
|
||||
return D32_A8_Opaque;
|
||||
} else {
|
||||
return D32_A8_Color;
|
||||
}
|
||||
}
|
||||
|
||||
SkBlitMask::ColorProc SkBlitMask::ColorFactory(SkColorType ct,
|
||||
SkMask::Format format,
|
||||
SkColor color) {
|
||||
ColorProc proc = PlatformColorProcs(ct, format, color);
|
||||
if (proc) {
|
||||
return proc;
|
||||
}
|
||||
|
||||
switch (ct) {
|
||||
case kN32_SkColorType:
|
||||
switch (format) {
|
||||
case SkMask::kA8_Format:
|
||||
return D32_A8_Factory(color);
|
||||
case SkMask::kLCD16_Format:
|
||||
return D32_LCD16_Proc;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool SkBlitMask::BlitColor(const SkPixmap& device, const SkMask& mask,
|
||||
const SkIRect& clip, SkColor color) {
|
||||
ColorProc proc = ColorFactory(device.colorType(), mask.fFormat, color);
|
||||
if (proc) {
|
||||
int x = clip.fLeft;
|
||||
int y = clip.fTop;
|
||||
proc(device.writable_addr32(x, y), device.rowBytes(), mask.getAddr(x, y),
|
||||
mask.fRowBytes, color, clip.width(), clip.height());
|
||||
int x = clip.fLeft, y = clip.fTop;
|
||||
|
||||
if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kA8_Format) {
|
||||
SkOpts::blit_mask_d32_a8(device.writable_addr32(x,y), device.rowBytes(),
|
||||
(const SkAlpha*)mask.getAddr(x,y), mask.fRowBytes,
|
||||
color, clip.width(), clip.height());
|
||||
return true;
|
||||
}
|
||||
|
||||
if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kLCD16_Format) {
|
||||
// TODO: Is this reachable code? Seems like no.
|
||||
D32_LCD16_Proc(device.writable_addr32(x,y), device.rowBytes(),
|
||||
mask.getAddr(x,y), mask.fRowBytes,
|
||||
color, clip.width(), clip.height());
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "SkOpts.h"
|
||||
|
||||
#define SK_OPTS_NS portable
|
||||
#include "SkBlitMask_opts.h"
|
||||
#include "SkBlurImageFilter_opts.h"
|
||||
#include "SkFloatingPoint_opts.h"
|
||||
#include "SkMorphologyImageFilter_opts.h"
|
||||
@ -50,6 +51,8 @@ namespace SkOpts {
|
||||
decltype(texture_compressor) texture_compressor = portable::texture_compressor;
|
||||
decltype(fill_block_dimensions) fill_block_dimensions = portable::fill_block_dimensions;
|
||||
|
||||
decltype(blit_mask_d32_a8) blit_mask_d32_a8 = portable::blit_mask_d32_a8;
|
||||
|
||||
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
|
||||
void Init_sse2();
|
||||
void Init_ssse3();
|
||||
|
@ -43,6 +43,7 @@ namespace SkOpts {
|
||||
extern TextureCompressor (*texture_compressor)(SkColorType, SkTextureCompressor::Format);
|
||||
extern bool (*fill_block_dimensions)(SkTextureCompressor::Format, int* x, int* y);
|
||||
|
||||
extern void (*blit_mask_d32_a8)(SkPMColor*, size_t, const SkAlpha*, size_t, SkColor, int, int);
|
||||
}
|
||||
|
||||
#endif//SkOpts_DEFINED
|
||||
|
37
src/opts/SkBlitMask_opts.h
Normal file
37
src/opts/SkBlitMask_opts.h
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright 2015 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SkBlitMask_opts_DEFINED
|
||||
#define SkBlitMask_opts_DEFINED
|
||||
|
||||
#include "Sk4px.h"
|
||||
|
||||
namespace SK_OPTS_NS {
|
||||
|
||||
static void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB,
|
||||
const SkAlpha* mask, size_t maskRB,
|
||||
SkColor color, int w, int h) {
|
||||
auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color));
|
||||
|
||||
auto fn = [&](const Sk4px& d, const Sk4px& aa) {
|
||||
// = (s + d(1-sa))aa + d(1-aa)
|
||||
// = s*aa + d(1-sa*aa)
|
||||
auto left = s.approxMulDiv255(aa),
|
||||
right = d.approxMulDiv255(left.alphas().inv());
|
||||
return left + right; // This does not overflow (exhaustively checked).
|
||||
};
|
||||
|
||||
while (h --> 0) {
|
||||
Sk4px::MapDstAlpha(w, dst, mask, fn);
|
||||
dst += dstRB / sizeof(*dst);
|
||||
mask += maskRB / sizeof(*mask);
|
||||
}
|
||||
}
|
||||
|
||||
} // SK_OPTS_NS
|
||||
|
||||
#endif//SkBlitMask_opts_DEFINED
|
@ -11,32 +11,6 @@
|
||||
#include "SkUtilsArm.h"
|
||||
#include "SkBlitMask_opts_arm_neon.h"
|
||||
|
||||
SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkColorType dstCT,
|
||||
SkMask::Format maskFormat,
|
||||
SkColor color) {
|
||||
#if SK_ARM_NEON_IS_NONE
|
||||
return NULL;
|
||||
#else
|
||||
/* ** This has been disabled until we can diagnose and fix the SIGILL generated
|
||||
** in the NEON code. See http://skbug.com/2067 for details.
|
||||
#if SK_ARM_NEON_IS_DYNAMIC
|
||||
if (!sk_cpu_arm_has_neon()) {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
if ((kN32_SkColorType == dstCT) &&
|
||||
(SkMask::kA8_Format == maskFormat)) {
|
||||
return D32_A8_Factory_neon(color);
|
||||
}
|
||||
*/
|
||||
#endif
|
||||
|
||||
// We don't need to handle the SkMask::kLCD16_Format case as the default
|
||||
// LCD16 will call us through SkBlitMask::PlatformBlitRowProcs16()
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
|
||||
if (isOpaque) {
|
||||
return SK_ARM_NEON_WRAP(SkBlitLCD16OpaqueRow);
|
||||
|
@ -8,129 +8,6 @@
|
||||
#include "SkBlitMask.h"
|
||||
#include "SkColor_opts_neon.h"
|
||||
|
||||
static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB,
|
||||
const void* SK_RESTRICT maskPtr, size_t maskRB,
|
||||
SkColor, int width, int height) {
|
||||
SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
|
||||
const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
|
||||
|
||||
maskRB -= width;
|
||||
dstRB -= (width << 2);
|
||||
do {
|
||||
int w = width;
|
||||
while (w >= 8) {
|
||||
uint8x8_t vmask = vld1_u8(mask);
|
||||
uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask);
|
||||
uint8x8x4_t vdevice = vld4_u8((uint8_t*)device);
|
||||
|
||||
vdevice = SkAlphaMulQ_neon8(vdevice, vscale);
|
||||
vdevice.val[NEON_A] += vmask;
|
||||
|
||||
vst4_u8((uint8_t*)device, vdevice);
|
||||
|
||||
mask += 8;
|
||||
device += 8;
|
||||
w -= 8;
|
||||
}
|
||||
while (w-- > 0) {
|
||||
unsigned aa = *mask++;
|
||||
*device = (aa << SK_A32_SHIFT)
|
||||
+ SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
|
||||
device += 1;
|
||||
};
|
||||
device = (uint32_t*)((char*)device + dstRB);
|
||||
mask += maskRB;
|
||||
} while (--height != 0);
|
||||
}
|
||||
|
||||
template <bool isColor>
|
||||
static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
|
||||
const void* SK_RESTRICT maskPtr, size_t maskRB,
|
||||
SkColor color, int width, int height) {
|
||||
SkPMColor pmc = SkPreMultiplyColor(color);
|
||||
SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
|
||||
const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
|
||||
uint8x8x4_t vpmc;
|
||||
|
||||
maskRB -= width;
|
||||
dstRB -= (width << 2);
|
||||
|
||||
if (width >= 8) {
|
||||
vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc));
|
||||
vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc));
|
||||
vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc));
|
||||
vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc));
|
||||
}
|
||||
do {
|
||||
int w = width;
|
||||
while (w >= 8) {
|
||||
uint8x8_t vmask = vld1_u8(mask);
|
||||
uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask);
|
||||
if (isColor) {
|
||||
vscale = vsubw_u8(vdupq_n_u16(256),
|
||||
SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256));
|
||||
} else {
|
||||
vscale = vsubw_u8(vdupq_n_u16(256), vmask);
|
||||
}
|
||||
uint8x8x4_t vdev = vld4_u8((uint8_t*)device);
|
||||
|
||||
vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)
|
||||
+ SkAlphaMul_neon8(vdev.val[NEON_A], vscale);
|
||||
vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256)
|
||||
+ SkAlphaMul_neon8(vdev.val[NEON_R], vscale);
|
||||
vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256)
|
||||
+ SkAlphaMul_neon8(vdev.val[NEON_G], vscale);
|
||||
vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256)
|
||||
+ SkAlphaMul_neon8(vdev.val[NEON_B], vscale);
|
||||
|
||||
vst4_u8((uint8_t*)device, vdev);
|
||||
|
||||
mask += 8;
|
||||
device += 8;
|
||||
w -= 8;
|
||||
}
|
||||
|
||||
while (w--) {
|
||||
unsigned aa = *mask++;
|
||||
if (isColor) {
|
||||
*device = SkBlendARGB32(pmc, *device, aa);
|
||||
} else {
|
||||
*device = SkAlphaMulQ(pmc, SkAlpha255To256(aa))
|
||||
+ SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
|
||||
}
|
||||
device += 1;
|
||||
};
|
||||
|
||||
device = (uint32_t*)((char*)device + dstRB);
|
||||
mask += maskRB;
|
||||
|
||||
} while (--height != 0);
|
||||
}
|
||||
|
||||
static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB,
|
||||
const void* SK_RESTRICT maskPtr, size_t maskRB,
|
||||
SkColor color, int width, int height) {
|
||||
D32_A8_Opaque_Color_neon<false>(dst, dstRB, maskPtr, maskRB, color, width, height);
|
||||
}
|
||||
|
||||
static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
|
||||
const void* SK_RESTRICT maskPtr, size_t maskRB,
|
||||
SkColor color, int width, int height) {
|
||||
D32_A8_Opaque_Color_neon<true>(dst, dstRB, maskPtr, maskRB, color, width, height);
|
||||
}
|
||||
|
||||
SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) {
|
||||
if (SK_ColorBLACK == color) {
|
||||
return D32_A8_Black_neon;
|
||||
} else if (0xFF == SkColorGetA(color)) {
|
||||
return D32_A8_Opaque_neon;
|
||||
} else {
|
||||
return D32_A8_Color_neon;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[],
|
||||
SkColor color, int width,
|
||||
SkPMColor opaqueDst) {
|
||||
|
@ -11,8 +11,6 @@
|
||||
#include "SkColor.h"
|
||||
#include "SkBlitMask.h"
|
||||
|
||||
extern SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color);
|
||||
|
||||
extern void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[],
|
||||
SkColor color, int width,
|
||||
SkPMColor opaqueDst);
|
||||
|
@ -7,12 +7,6 @@
|
||||
|
||||
#include "SkBlitMask.h"
|
||||
|
||||
SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkColorType dstCT,
|
||||
SkMask::Format maskFormat,
|
||||
SkColor color) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
|
||||
return NULL;
|
||||
}
|
||||
|
@ -301,54 +301,6 @@ void Color32A_D565_SSE2(uint16_t dst[], SkPMColor src, int count, int x, int y)
|
||||
}
|
||||
}
|
||||
|
||||
void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
|
||||
size_t maskRB, SkColor origColor,
|
||||
int width, int height) {
|
||||
SkPMColor color = SkPreMultiplyColor(origColor);
|
||||
size_t dstOffset = dstRB - (width << 2);
|
||||
size_t maskOffset = maskRB - width;
|
||||
SkPMColor* dst = (SkPMColor *)device;
|
||||
const uint8_t* mask = (const uint8_t*)maskPtr;
|
||||
do {
|
||||
int count = width;
|
||||
if (count >= 4) {
|
||||
while (((size_t)dst & 0x0F) != 0 && (count > 0)) {
|
||||
*dst = SkBlendARGB32(color, *dst, *mask);
|
||||
mask++;
|
||||
dst++;
|
||||
count--;
|
||||
}
|
||||
__m128i *d = reinterpret_cast<__m128i*>(dst);
|
||||
__m128i src_pixel = _mm_set1_epi32(color);
|
||||
while (count >= 4) {
|
||||
// Load 4 dst pixels
|
||||
__m128i dst_pixel = _mm_load_si128(d);
|
||||
|
||||
// Set the alpha value
|
||||
__m128i alpha_wide = _mm_cvtsi32_si128(*reinterpret_cast<const uint32_t*>(mask));
|
||||
alpha_wide = _mm_unpacklo_epi8(alpha_wide, _mm_setzero_si128());
|
||||
alpha_wide = _mm_unpacklo_epi16(alpha_wide, _mm_setzero_si128());
|
||||
|
||||
__m128i result = SkBlendARGB32_SSE2(src_pixel, dst_pixel, alpha_wide);
|
||||
_mm_store_si128(d, result);
|
||||
// Load the next 4 dst pixels and alphas
|
||||
mask = mask + 4;
|
||||
d++;
|
||||
count -= 4;
|
||||
}
|
||||
dst = reinterpret_cast<SkPMColor*>(d);
|
||||
}
|
||||
while (count > 0) {
|
||||
*dst= SkBlendARGB32(color, *dst, *mask);
|
||||
dst += 1;
|
||||
mask++;
|
||||
count --;
|
||||
}
|
||||
dst = (SkPMColor *)((char*)dst + dstOffset);
|
||||
mask += maskOffset;
|
||||
} while (--height != 0);
|
||||
}
|
||||
|
||||
// The following (left) shifts cause the top 5 bits of the mask components to
|
||||
// line up with the corresponding components in an SkPMColor.
|
||||
// Note that the mask's RGB16 order may differ from the SkPMColor order.
|
||||
|
@ -25,10 +25,6 @@ void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
|
||||
void Color32A_D565_SSE2(uint16_t dst[], SkPMColor src, int count, int x,
|
||||
int y);
|
||||
|
||||
void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* mask,
|
||||
size_t maskRB, SkColor color,
|
||||
int width, int height);
|
||||
|
||||
void SkBlitLCD16Row_SSE2(SkPMColor dst[], const uint16_t src[],
|
||||
SkColor color, int width, SkPMColor);
|
||||
void SkBlitLCD16OpaqueRow_SSE2(SkPMColor dst[], const uint16_t src[],
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "SkOpts.h"
|
||||
|
||||
#define SK_OPTS_NS neon
|
||||
#include "SkBlitMask_opts.h"
|
||||
#include "SkBlurImageFilter_opts.h"
|
||||
#include "SkFloatingPoint_opts.h"
|
||||
#include "SkMorphologyImageFilter_opts.h"
|
||||
@ -33,5 +34,7 @@ namespace SkOpts {
|
||||
|
||||
texture_compressor = neon::texture_compressor;
|
||||
fill_block_dimensions = neon::fill_block_dimensions;
|
||||
|
||||
blit_mask_d32_a8 = neon::blit_mask_d32_a8;
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "SkOpts.h"
|
||||
|
||||
#define SK_OPTS_NS sse2
|
||||
#include "SkBlitMask_opts.h"
|
||||
#include "SkBlurImageFilter_opts.h"
|
||||
#include "SkMorphologyImageFilter_opts.h"
|
||||
#include "SkUtils_opts.h"
|
||||
@ -27,5 +28,7 @@ namespace SkOpts {
|
||||
dilate_y = sse2::dilate_y;
|
||||
erode_x = sse2::erode_x;
|
||||
erode_y = sse2::erode_y;
|
||||
|
||||
blit_mask_d32_a8 = sse2::blit_mask_d32_a8;
|
||||
}
|
||||
}
|
||||
|
@ -252,30 +252,6 @@ SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkColorType dstCT,
|
||||
SkMask::Format maskFormat,
|
||||
SkColor color) {
|
||||
if (SkMask::kA8_Format != maskFormat) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ColorProc proc = NULL;
|
||||
if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
|
||||
switch (dstCT) {
|
||||
case kN32_SkColorType:
|
||||
// The SSE2 version is not (yet) faster for black, so we check
|
||||
// for that.
|
||||
if (SK_ColorBLACK != color) {
|
||||
proc = SkARGB32_A8_BlitMask_SSE2;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return proc;
|
||||
}
|
||||
|
||||
SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
|
||||
if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
|
||||
if (isOpaque) {
|
||||
|
Loading…
Reference in New Issue
Block a user