diff --git a/src/core/Sk4px.h b/src/core/Sk4px.h index 24a21c66c1..ffde1af504 100644 --- a/src/core/Sk4px.h +++ b/src/core/Sk4px.h @@ -165,6 +165,34 @@ public: } } + // As above, but with dst4' = fn(dst4, alpha4). + template + static void MapDstAlpha(int n, Dst* dst, const SkAlpha* a, const Fn& fn) { + while (n > 0) { + if (n >= 8) { + Sk4px dst0 = fn(Load4(dst+0), Load4Alphas(a+0)), + dst4 = fn(Load4(dst+4), Load4Alphas(a+4)); + dst0.store4(dst+0); + dst4.store4(dst+4); + dst += 8; a += 8; n -= 8; + continue; // Keep our stride at 8 pixels as long as possible. + } + SkASSERT(n <= 7); + if (n >= 4) { + fn(Load4(dst), Load4Alphas(a)).store4(dst); + dst += 4; a += 4; n -= 4; + } + if (n >= 2) { + fn(Load2(dst), Load2Alphas(a)).store2(dst); + dst += 2; a += 2; n -= 2; + } + if (n >= 1) { + fn(Load1(dst), DupAlpha(*a)).store1(dst); + } + break; + } + } + // As above, but with dst4' = fn(dst4, src4, alpha4). template static void MapDstSrcAlpha(int n, Dst* dst, const SkPMColor* src, const SkAlpha* a, diff --git a/src/core/SkBlitMask.h b/src/core/SkBlitMask.h index 3fc306f8c8..d39c094cb6 100644 --- a/src/core/SkBlitMask.h +++ b/src/core/SkBlitMask.h @@ -47,18 +47,6 @@ public: typedef void (*RowProc)(void* dst, const void* mask, const SkPMColor* src, int width); - /** - * Public entry-point to return a blitmask ColorProc. - * May return NULL if config or format are not supported. - */ - static ColorProc ColorFactory(SkColorType, SkMask::Format, SkColor); - - /** - * Return either platform specific optimized blitmask ColorProc, - * or NULL if no optimized routine is available. - */ - static ColorProc PlatformColorProcs(SkColorType, SkMask::Format, SkColor); - /** * Public entry-point to return a blitcolor BlitLCD16RowProc. */ diff --git a/src/core/SkBlitMask_D32.cpp b/src/core/SkBlitMask_D32.cpp index 210810801a..fb2991c919 100644 --- a/src/core/SkBlitMask_D32.cpp +++ b/src/core/SkBlitMask_D32.cpp @@ -8,68 +8,7 @@ #include "SkBlitMask.h" #include "SkColor.h" #include "SkColorPriv.h" - -static void D32_A8_Color(void* SK_RESTRICT dst, size_t dstRB, - const void* SK_RESTRICT maskPtr, size_t maskRB, - SkColor color, int width, int height) { - SkPMColor pmc = SkPreMultiplyColor(color); - size_t dstOffset = dstRB - (width << 2); - size_t maskOffset = maskRB - width; - SkPMColor* SK_RESTRICT device = (SkPMColor *)dst; - const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; - - do { - int w = width; - do { - unsigned aa = *mask++; - *device = SkBlendARGB32(pmc, *device, aa); - device += 1; - } while (--w != 0); - device = (uint32_t*)((char*)device + dstOffset); - mask += maskOffset; - } while (--height != 0); -} - -static void D32_A8_Opaque(void* SK_RESTRICT dst, size_t dstRB, - const void* SK_RESTRICT maskPtr, size_t maskRB, - SkColor color, int width, int height) { - SkPMColor pmc = SkPreMultiplyColor(color); - SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; - const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; - - maskRB -= width; - dstRB -= (width << 2); - do { - int w = width; - do { - unsigned aa = *mask++; - *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); - device += 1; - } while (--w != 0); - device = (uint32_t*)((char*)device + dstRB); - mask += maskRB; - } while (--height != 0); -} - -static void D32_A8_Black(void* SK_RESTRICT dst, size_t dstRB, - const void* SK_RESTRICT maskPtr, size_t maskRB, - SkColor, int width, int height) { - SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; - const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; - - maskRB -= width; - dstRB -= (width << 2); - do { - int w = width; - do { - unsigned aa = *mask++; - *device = (aa << SK_A32_SHIFT) + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); - device += 1; - } while (--w != 0); - device = (uint32_t*)((char*)device + dstRB); - mask += maskRB; - } while (--height != 0); -} +#include "SkOpts.h" SkBlitMask::BlitLCD16RowProc SkBlitMask::BlitLCD16RowFactory(bool isOpaque) { BlitLCD16RowProc proc = PlatformBlitRowProcs16(isOpaque); @@ -112,51 +51,25 @@ static void D32_LCD16_Proc(void* SK_RESTRICT dst, size_t dstRB, /////////////////////////////////////////////////////////////////////////////// -static SkBlitMask::ColorProc D32_A8_Factory(SkColor color) { - if (SK_ColorBLACK == color) { - return D32_A8_Black; - } else if (0xFF == SkColorGetA(color)) { - return D32_A8_Opaque; - } else { - return D32_A8_Color; - } -} - -SkBlitMask::ColorProc SkBlitMask::ColorFactory(SkColorType ct, - SkMask::Format format, - SkColor color) { - ColorProc proc = PlatformColorProcs(ct, format, color); - if (proc) { - return proc; - } - - switch (ct) { - case kN32_SkColorType: - switch (format) { - case SkMask::kA8_Format: - return D32_A8_Factory(color); - case SkMask::kLCD16_Format: - return D32_LCD16_Proc; - default: - break; - } - break; - default: - break; - } - return NULL; -} - bool SkBlitMask::BlitColor(const SkPixmap& device, const SkMask& mask, const SkIRect& clip, SkColor color) { - ColorProc proc = ColorFactory(device.colorType(), mask.fFormat, color); - if (proc) { - int x = clip.fLeft; - int y = clip.fTop; - proc(device.writable_addr32(x, y), device.rowBytes(), mask.getAddr(x, y), - mask.fRowBytes, color, clip.width(), clip.height()); + int x = clip.fLeft, y = clip.fTop; + + if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kA8_Format) { + SkOpts::blit_mask_d32_a8(device.writable_addr32(x,y), device.rowBytes(), + (const SkAlpha*)mask.getAddr(x,y), mask.fRowBytes, + color, clip.width(), clip.height()); return true; } + + if (device.colorType() == kN32_SkColorType && mask.fFormat == SkMask::kLCD16_Format) { + // TODO: Is this reachable code? Seems like no. + D32_LCD16_Proc(device.writable_addr32(x,y), device.rowBytes(), + mask.getAddr(x,y), mask.fRowBytes, + color, clip.width(), clip.height()); + return true; + } + return false; } diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 17eab7e51c..2bfc1af899 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -9,6 +9,7 @@ #include "SkOpts.h" #define SK_OPTS_NS portable +#include "SkBlitMask_opts.h" #include "SkBlurImageFilter_opts.h" #include "SkFloatingPoint_opts.h" #include "SkMorphologyImageFilter_opts.h" @@ -50,6 +51,8 @@ namespace SkOpts { decltype(texture_compressor) texture_compressor = portable::texture_compressor; decltype(fill_block_dimensions) fill_block_dimensions = portable::fill_block_dimensions; + decltype(blit_mask_d32_a8) blit_mask_d32_a8 = portable::blit_mask_d32_a8; + // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_sse2(); void Init_ssse3(); diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h index 206b7ff655..e0ef7dc424 100644 --- a/src/core/SkOpts.h +++ b/src/core/SkOpts.h @@ -43,6 +43,7 @@ namespace SkOpts { extern TextureCompressor (*texture_compressor)(SkColorType, SkTextureCompressor::Format); extern bool (*fill_block_dimensions)(SkTextureCompressor::Format, int* x, int* y); + extern void (*blit_mask_d32_a8)(SkPMColor*, size_t, const SkAlpha*, size_t, SkColor, int, int); } #endif//SkOpts_DEFINED diff --git a/src/opts/SkBlitMask_opts.h b/src/opts/SkBlitMask_opts.h new file mode 100644 index 0000000000..9129560ef4 --- /dev/null +++ b/src/opts/SkBlitMask_opts.h @@ -0,0 +1,37 @@ +/* + * Copyright 2015 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#ifndef SkBlitMask_opts_DEFINED +#define SkBlitMask_opts_DEFINED + +#include "Sk4px.h" + +namespace SK_OPTS_NS { + +static void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB, + const SkAlpha* mask, size_t maskRB, + SkColor color, int w, int h) { + auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); + + auto fn = [&](const Sk4px& d, const Sk4px& aa) { + // = (s + d(1-sa))aa + d(1-aa) + // = s*aa + d(1-sa*aa) + auto left = s.approxMulDiv255(aa), + right = d.approxMulDiv255(left.alphas().inv()); + return left + right; // This does not overflow (exhaustively checked). + }; + + while (h --> 0) { + Sk4px::MapDstAlpha(w, dst, mask, fn); + dst += dstRB / sizeof(*dst); + mask += maskRB / sizeof(*mask); + } +} + +} // SK_OPTS_NS + +#endif//SkBlitMask_opts_DEFINED diff --git a/src/opts/SkBlitMask_opts_arm.cpp b/src/opts/SkBlitMask_opts_arm.cpp index 11e172c0d1..e58be5c4a5 100644 --- a/src/opts/SkBlitMask_opts_arm.cpp +++ b/src/opts/SkBlitMask_opts_arm.cpp @@ -11,32 +11,6 @@ #include "SkUtilsArm.h" #include "SkBlitMask_opts_arm_neon.h" -SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkColorType dstCT, - SkMask::Format maskFormat, - SkColor color) { -#if SK_ARM_NEON_IS_NONE - return NULL; -#else -/* ** This has been disabled until we can diagnose and fix the SIGILL generated - ** in the NEON code. See http://skbug.com/2067 for details. -#if SK_ARM_NEON_IS_DYNAMIC - if (!sk_cpu_arm_has_neon()) { - return NULL; - } -#endif - if ((kN32_SkColorType == dstCT) && - (SkMask::kA8_Format == maskFormat)) { - return D32_A8_Factory_neon(color); - } -*/ -#endif - - // We don't need to handle the SkMask::kLCD16_Format case as the default - // LCD16 will call us through SkBlitMask::PlatformBlitRowProcs16() - - return NULL; -} - SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { if (isOpaque) { return SK_ARM_NEON_WRAP(SkBlitLCD16OpaqueRow); diff --git a/src/opts/SkBlitMask_opts_arm_neon.cpp b/src/opts/SkBlitMask_opts_arm_neon.cpp index 3361a5d19d..ad12369db6 100644 --- a/src/opts/SkBlitMask_opts_arm_neon.cpp +++ b/src/opts/SkBlitMask_opts_arm_neon.cpp @@ -8,129 +8,6 @@ #include "SkBlitMask.h" #include "SkColor_opts_neon.h" -static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB, - const void* SK_RESTRICT maskPtr, size_t maskRB, - SkColor, int width, int height) { - SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; - const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; - - maskRB -= width; - dstRB -= (width << 2); - do { - int w = width; - while (w >= 8) { - uint8x8_t vmask = vld1_u8(mask); - uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); - uint8x8x4_t vdevice = vld4_u8((uint8_t*)device); - - vdevice = SkAlphaMulQ_neon8(vdevice, vscale); - vdevice.val[NEON_A] += vmask; - - vst4_u8((uint8_t*)device, vdevice); - - mask += 8; - device += 8; - w -= 8; - } - while (w-- > 0) { - unsigned aa = *mask++; - *device = (aa << SK_A32_SHIFT) - + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); - device += 1; - }; - device = (uint32_t*)((char*)device + dstRB); - mask += maskRB; - } while (--height != 0); -} - -template -static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB, - const void* SK_RESTRICT maskPtr, size_t maskRB, - SkColor color, int width, int height) { - SkPMColor pmc = SkPreMultiplyColor(color); - SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; - const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; - uint8x8x4_t vpmc; - - maskRB -= width; - dstRB -= (width << 2); - - if (width >= 8) { - vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); - vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); - vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); - vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); - } - do { - int w = width; - while (w >= 8) { - uint8x8_t vmask = vld1_u8(mask); - uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask); - if (isColor) { - vscale = vsubw_u8(vdupq_n_u16(256), - SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)); - } else { - vscale = vsubw_u8(vdupq_n_u16(256), vmask); - } - uint8x8x4_t vdev = vld4_u8((uint8_t*)device); - - vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256) - + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); - vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256) - + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); - vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256) - + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); - vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256) - + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); - - vst4_u8((uint8_t*)device, vdev); - - mask += 8; - device += 8; - w -= 8; - } - - while (w--) { - unsigned aa = *mask++; - if (isColor) { - *device = SkBlendARGB32(pmc, *device, aa); - } else { - *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) - + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); - } - device += 1; - }; - - device = (uint32_t*)((char*)device + dstRB); - mask += maskRB; - - } while (--height != 0); -} - -static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB, - const void* SK_RESTRICT maskPtr, size_t maskRB, - SkColor color, int width, int height) { - D32_A8_Opaque_Color_neon(dst, dstRB, maskPtr, maskRB, color, width, height); -} - -static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB, - const void* SK_RESTRICT maskPtr, size_t maskRB, - SkColor color, int width, int height) { - D32_A8_Opaque_Color_neon(dst, dstRB, maskPtr, maskRB, color, width, height); -} - -SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) { - if (SK_ColorBLACK == color) { - return D32_A8_Black_neon; - } else if (0xFF == SkColorGetA(color)) { - return D32_A8_Opaque_neon; - } else { - return D32_A8_Color_neon; - } -} - -//////////////////////////////////////////////////////////////////////////////// - void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[], SkColor color, int width, SkPMColor opaqueDst) { diff --git a/src/opts/SkBlitMask_opts_arm_neon.h b/src/opts/SkBlitMask_opts_arm_neon.h index 7af51c1c1a..86366a461a 100644 --- a/src/opts/SkBlitMask_opts_arm_neon.h +++ b/src/opts/SkBlitMask_opts_arm_neon.h @@ -11,8 +11,6 @@ #include "SkColor.h" #include "SkBlitMask.h" -extern SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color); - extern void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[], SkColor color, int width, SkPMColor opaqueDst); diff --git a/src/opts/SkBlitMask_opts_none.cpp b/src/opts/SkBlitMask_opts_none.cpp index 90f89a7129..5c318c731b 100644 --- a/src/opts/SkBlitMask_opts_none.cpp +++ b/src/opts/SkBlitMask_opts_none.cpp @@ -7,12 +7,6 @@ #include "SkBlitMask.h" -SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkColorType dstCT, - SkMask::Format maskFormat, - SkColor color) { - return NULL; -} - SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { return NULL; } diff --git a/src/opts/SkBlitRow_opts_SSE2.cpp b/src/opts/SkBlitRow_opts_SSE2.cpp index 7f5b6779cf..c017f7e4b3 100644 --- a/src/opts/SkBlitRow_opts_SSE2.cpp +++ b/src/opts/SkBlitRow_opts_SSE2.cpp @@ -301,54 +301,6 @@ void Color32A_D565_SSE2(uint16_t dst[], SkPMColor src, int count, int x, int y) } } -void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr, - size_t maskRB, SkColor origColor, - int width, int height) { - SkPMColor color = SkPreMultiplyColor(origColor); - size_t dstOffset = dstRB - (width << 2); - size_t maskOffset = maskRB - width; - SkPMColor* dst = (SkPMColor *)device; - const uint8_t* mask = (const uint8_t*)maskPtr; - do { - int count = width; - if (count >= 4) { - while (((size_t)dst & 0x0F) != 0 && (count > 0)) { - *dst = SkBlendARGB32(color, *dst, *mask); - mask++; - dst++; - count--; - } - __m128i *d = reinterpret_cast<__m128i*>(dst); - __m128i src_pixel = _mm_set1_epi32(color); - while (count >= 4) { - // Load 4 dst pixels - __m128i dst_pixel = _mm_load_si128(d); - - // Set the alpha value - __m128i alpha_wide = _mm_cvtsi32_si128(*reinterpret_cast(mask)); - alpha_wide = _mm_unpacklo_epi8(alpha_wide, _mm_setzero_si128()); - alpha_wide = _mm_unpacklo_epi16(alpha_wide, _mm_setzero_si128()); - - __m128i result = SkBlendARGB32_SSE2(src_pixel, dst_pixel, alpha_wide); - _mm_store_si128(d, result); - // Load the next 4 dst pixels and alphas - mask = mask + 4; - d++; - count -= 4; - } - dst = reinterpret_cast(d); - } - while (count > 0) { - *dst= SkBlendARGB32(color, *dst, *mask); - dst += 1; - mask++; - count --; - } - dst = (SkPMColor *)((char*)dst + dstOffset); - mask += maskOffset; - } while (--height != 0); -} - // The following (left) shifts cause the top 5 bits of the mask components to // line up with the corresponding components in an SkPMColor. // Note that the mask's RGB16 order may differ from the SkPMColor order. diff --git a/src/opts/SkBlitRow_opts_SSE2.h b/src/opts/SkBlitRow_opts_SSE2.h index 6c0611f49f..560edf4923 100644 --- a/src/opts/SkBlitRow_opts_SSE2.h +++ b/src/opts/SkBlitRow_opts_SSE2.h @@ -25,10 +25,6 @@ void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, void Color32A_D565_SSE2(uint16_t dst[], SkPMColor src, int count, int x, int y); -void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* mask, - size_t maskRB, SkColor color, - int width, int height); - void SkBlitLCD16Row_SSE2(SkPMColor dst[], const uint16_t src[], SkColor color, int width, SkPMColor); void SkBlitLCD16OpaqueRow_SSE2(SkPMColor dst[], const uint16_t src[], diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp index bc3ca3e641..789a977238 100644 --- a/src/opts/SkOpts_neon.cpp +++ b/src/opts/SkOpts_neon.cpp @@ -8,6 +8,7 @@ #include "SkOpts.h" #define SK_OPTS_NS neon +#include "SkBlitMask_opts.h" #include "SkBlurImageFilter_opts.h" #include "SkFloatingPoint_opts.h" #include "SkMorphologyImageFilter_opts.h" @@ -33,5 +34,7 @@ namespace SkOpts { texture_compressor = neon::texture_compressor; fill_block_dimensions = neon::fill_block_dimensions; + + blit_mask_d32_a8 = neon::blit_mask_d32_a8; } } diff --git a/src/opts/SkOpts_sse2.cpp b/src/opts/SkOpts_sse2.cpp index 8837efee55..3440676935 100644 --- a/src/opts/SkOpts_sse2.cpp +++ b/src/opts/SkOpts_sse2.cpp @@ -8,6 +8,7 @@ #include "SkOpts.h" #define SK_OPTS_NS sse2 +#include "SkBlitMask_opts.h" #include "SkBlurImageFilter_opts.h" #include "SkMorphologyImageFilter_opts.h" #include "SkUtils_opts.h" @@ -27,5 +28,7 @@ namespace SkOpts { dilate_y = sse2::dilate_y; erode_x = sse2::erode_x; erode_y = sse2::erode_y; + + blit_mask_d32_a8 = sse2::blit_mask_d32_a8; } } diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp index c560b914f9..34045d4977 100644 --- a/src/opts/opts_check_x86.cpp +++ b/src/opts/opts_check_x86.cpp @@ -252,30 +252,6 @@ SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { //////////////////////////////////////////////////////////////////////////////// -SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkColorType dstCT, - SkMask::Format maskFormat, - SkColor color) { - if (SkMask::kA8_Format != maskFormat) { - return NULL; - } - - ColorProc proc = NULL; - if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { - switch (dstCT) { - case kN32_SkColorType: - // The SSE2 version is not (yet) faster for black, so we check - // for that. - if (SK_ColorBLACK != color) { - proc = SkARGB32_A8_BlitMask_SSE2; - } - break; - default: - break; - } - } - return proc; -} - SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { if (isOpaque) {