From 490b61569d27c9b7ba164fbc4394994d2e7cb022 Mon Sep 17 00:00:00 2001 From: mtklein Date: Fri, 31 Jul 2015 11:50:27 -0700 Subject: [PATCH] Port SkXfermode opts to SkOpts.h Renames Sk4pxXfermode.h to SkXfermode_opts.h, and refactors it a tiny bit internally. This moves xfermode optimization from being "compile-time everywhere but NEON" to simply "runtime everywhere". I don't anticipate any effect on perf or correctness. BUG=skia:4117 Review URL: https://codereview.chromium.org/1264543006 --- gyp/opts.gypi | 7 ----- include/core/SkFloatingPoint.h | 5 +++- include/private/SkOpts.h | 6 ++++ src/core/SkOpts.cpp | 9 +++--- src/core/SkXfermode.cpp | 19 ++----------- src/opts/SkOpts_neon.cpp | 10 ++++--- src/opts/SkOpts_sse2.cpp | 6 ++-- .../SkXfermode_opts.h} | 11 +------- src/opts/SkXfermode_opts_arm.cpp | 28 ------------------- src/opts/SkXfermode_opts_arm_neon.cpp | 14 ---------- src/opts/SkXfermode_opts_none.cpp | 20 ------------- 11 files changed, 28 insertions(+), 107 deletions(-) rename src/{core/Sk4pxXfermode.h => opts/SkXfermode_opts.h} (96%) delete mode 100644 src/opts/SkXfermode_opts_arm.cpp delete mode 100644 src/opts/SkXfermode_opts_arm_neon.cpp delete mode 100644 src/opts/SkXfermode_opts_none.cpp diff --git a/gyp/opts.gypi b/gyp/opts.gypi index 0c3cf208ca..3b0dc7a460 100644 --- a/gyp/opts.gypi +++ b/gyp/opts.gypi @@ -10,7 +10,6 @@ '<(skia_src_path)/opts/SkBlurImage_opts_none.cpp', '<(skia_src_path)/opts/SkMorphology_opts_none.cpp', '<(skia_src_path)/opts/SkTextureCompression_opts_none.cpp', - '<(skia_src_path)/opts/SkXfermode_opts_none.cpp', ], 'armv7_sources': [ @@ -20,7 +19,6 @@ '<(skia_src_path)/opts/SkBlurImage_opts_arm.cpp', '<(skia_src_path)/opts/SkMorphology_opts_arm.cpp', '<(skia_src_path)/opts/SkTextureCompression_opts_arm.cpp', - '<(skia_src_path)/opts/SkXfermode_opts_arm.cpp', ], 'neon_sources': [ '<(skia_src_path)/opts/SkBitmapProcState_arm_neon.cpp', @@ -30,7 +28,6 @@ '<(skia_src_path)/opts/SkBlurImage_opts_neon.cpp', '<(skia_src_path)/opts/SkMorphology_opts_neon.cpp', '<(skia_src_path)/opts/SkTextureCompression_opts_neon.cpp', - '<(skia_src_path)/opts/SkXfermode_opts_arm_neon.cpp', '<(skia_src_path)/opts/SkOpts_neon.cpp', ], 'arm64_sources': [ @@ -46,8 +43,6 @@ '<(skia_src_path)/opts/SkMorphology_opts_arm.cpp', '<(skia_src_path)/opts/SkMorphology_opts_neon.cpp', '<(skia_src_path)/opts/SkTextureCompression_opts_none.cpp', - '<(skia_src_path)/opts/SkXfermode_opts_arm.cpp', - '<(skia_src_path)/opts/SkXfermode_opts_arm_neon.cpp', '<(skia_src_path)/opts/SkOpts_neon.cpp', ], @@ -58,7 +53,6 @@ '<(skia_src_path)/opts/SkBlurImage_opts_none.cpp', '<(skia_src_path)/opts/SkMorphology_opts_none.cpp', '<(skia_src_path)/opts/SkTextureCompression_opts_none.cpp', - '<(skia_src_path)/opts/SkXfermode_opts_none.cpp', ], 'sse2_sources': [ @@ -68,7 +62,6 @@ '<(skia_src_path)/opts/SkBlurImage_opts_SSE2.cpp', '<(skia_src_path)/opts/SkMorphology_opts_SSE2.cpp', '<(skia_src_path)/opts/SkTextureCompression_opts_none.cpp', - '<(skia_src_path)/opts/SkXfermode_opts_none.cpp', '<(skia_src_path)/opts/opts_check_x86.cpp', '<(skia_src_path)/opts/SkOpts_sse2.cpp', ], diff --git a/include/core/SkFloatingPoint.h b/include/core/SkFloatingPoint.h index 2ca1d088d3..7427562323 100644 --- a/include/core/SkFloatingPoint.h +++ b/include/core/SkFloatingPoint.h @@ -11,7 +11,6 @@ #define SkFloatingPoint_DEFINED #include "SkTypes.h" -#include "../private/SkOpts.h" #include #include @@ -128,6 +127,10 @@ extern const uint32_t gIEEENegativeInfinity; #define SK_FloatInfinity (*SkTCast(&gIEEEInfinity)) #define SK_FloatNegativeInfinity (*SkTCast(&gIEEENegativeInfinity)) +// We forward declare this to break an #include cycle. +// (SkScalar -> SkFloatingPoint -> SkOpts.h -> SkXfermode -> SkColor -> SkScalar) +namespace SkOpts { extern float (*rsqrt)(float); } + // Fast, approximate inverse square root. // Compare to name-brand "1.0f / sk_float_sqrt(x)". Should be around 10x faster on SSE, 2x on NEON. static inline float sk_float_rsqrt(const float x) { diff --git a/include/private/SkOpts.h b/include/private/SkOpts.h index 0594588e29..9239f8efec 100644 --- a/include/private/SkOpts.h +++ b/include/private/SkOpts.h @@ -9,6 +9,9 @@ #define SkOpts_DEFINED #include "SkTypes.h" +#include "SkXfermode.h" + +struct ProcCoeff; namespace SkOpts { // Call to replace pointers to portable functions with pointers to CPU-specific functions. @@ -24,6 +27,9 @@ namespace SkOpts { // See SkUtils.h extern void (*memset16)(uint16_t[], uint16_t, int); extern void (*memset32)(uint32_t[], uint32_t, int); + + // May return nullptr if we haven't specialized the given Mode. + extern SkXfermode* (*create_xfermode)(const ProcCoeff&, SkXfermode::Mode); } #endif//SkOpts_DEFINED diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp index 45715a5c15..c833f19c81 100644 --- a/src/core/SkOpts.cpp +++ b/src/core/SkOpts.cpp @@ -7,6 +7,7 @@ #include "SkOnce.h" #include "SkOpts.h" +#include "SkXfermode_opts.h" #if defined(SK_CPU_X86) #if defined(SK_BUILD_FOR_WIN32) @@ -41,10 +42,10 @@ static void memsetT(T dst[], T val, int n) { while (n --> 0) { *dst++ = val; } } namespace SkOpts { // Define default function pointer values here... - decltype(rsqrt) rsqrt = portable::rsqrt; - decltype(memset16) memset16 = portable::memsetT; - decltype(memset32) memset32 = portable::memsetT; - + decltype(rsqrt) rsqrt = portable::rsqrt; + decltype(memset16) memset16 = portable::memsetT; + decltype(memset32) memset32 = portable::memsetT; + decltype(create_xfermode) create_xfermode = SkCreate4pxXfermode; // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. void Init_sse2(); diff --git a/src/core/SkXfermode.cpp b/src/core/SkXfermode.cpp index 46703aa654..59b689ceeb 100644 --- a/src/core/SkXfermode.cpp +++ b/src/core/SkXfermode.cpp @@ -8,10 +8,10 @@ #include "SkXfermode.h" #include "SkXfermode_proccoeff.h" -#include "Sk4pxXfermode.h" #include "SkColorPriv.h" #include "SkLazyPtr.h" #include "SkMathPriv.h" +#include "SkOpts.h" #include "SkPMFloat.h" #include "SkReadBuffer.h" #include "SkString.h" @@ -997,30 +997,15 @@ void SkProcCoeffXfermode::toString(SkString* str) const { #endif -extern SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, SkXfermode::Mode mode); -extern SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode); - // Technically, can't be static and passed as a template parameter. So we use anonymous namespace. namespace { SkXfermode* create_mode(int iMode) { SkXfermode::Mode mode = (SkXfermode::Mode)iMode; ProcCoeff rec = gProcCoeffs[mode]; - if (auto proc = SkPlatformXfermodeProcFactory(mode)) { - rec.fProc = proc; - } - - // Check for compile-time SIMD xfermode. - if (auto xfermode = SkCreate4pxXfermode(rec, mode)) { + if (auto xfermode = SkOpts::create_xfermode(rec, mode)) { return xfermode; } - - // Check for runtime-detected SIMD xfermode. - if (auto xfermode = SkPlatformXfermodeFactory(rec, mode)) { - return xfermode; - } - - // Serial fallback. return SkNEW_ARGS(SkProcCoeffXfermode, (rec, mode)); } } // namespace diff --git a/src/opts/SkOpts_neon.cpp b/src/opts/SkOpts_neon.cpp index cbb247efe5..775cdd4bbc 100644 --- a/src/opts/SkOpts_neon.cpp +++ b/src/opts/SkOpts_neon.cpp @@ -5,8 +5,9 @@ * found in the LICENSE file. */ -#include "SkOpts.h" #include "SkFloatingPoint.h" +#include "SkOpts.h" +#include "SkXfermode_opts.h" namespace neon { // This helps identify methods from this file when debugging / profiling. @@ -66,8 +67,9 @@ static void memset32(uint32_t* dst, uint32_t value, int n) { namespace SkOpts { void Init_neon() { - rsqrt = neon::rsqrt; - memset16 = neon::memset16; - memset32 = neon::memset32; + rsqrt = neon::rsqrt; + memset16 = neon::memset16; + memset32 = neon::memset32; + create_xfermode = SkCreate4pxXfermode; } } diff --git a/src/opts/SkOpts_sse2.cpp b/src/opts/SkOpts_sse2.cpp index 69c55bc698..d80c6ff352 100644 --- a/src/opts/SkOpts_sse2.cpp +++ b/src/opts/SkOpts_sse2.cpp @@ -6,6 +6,7 @@ */ #include "SkOpts.h" +#include "SkXfermode_opts.h" namespace sse2 { // This helps identify methods from this file when debugging / profiling. @@ -49,7 +50,8 @@ static void memset32(uint32_t* dst, uint32_t val, int n) { namespace SkOpts { void Init_sse2() { - memset16 = sse2::memset16; - memset32 = sse2::memset32; + memset16 = sse2::memset16; + memset32 = sse2::memset32; + create_xfermode = SkCreate4pxXfermode; } } diff --git a/src/core/Sk4pxXfermode.h b/src/opts/SkXfermode_opts.h similarity index 96% rename from src/core/Sk4pxXfermode.h rename to src/opts/SkXfermode_opts.h index ad822edb8b..6bc76fe559 100644 --- a/src/core/Sk4pxXfermode.h +++ b/src/opts/SkXfermode_opts.h @@ -16,13 +16,6 @@ // Each gets its own independent instantiation by wrapping in an anonymous namespace. namespace { -#if defined(SK_CPU_ARM32) && !defined(SK_ARM_HAS_NEON) - // Signals SkXfermode.cpp to look for runtime-detected NEON. - static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode) { - return nullptr; - } -#else - // Most xfermodes can be done most efficiently 4 pixels at a time in 8 or 16-bit fixed point. #define XFERMODE(Name) static Sk4px SK_VECTORCALL Name(Sk4px s, Sk4px d) @@ -283,7 +276,7 @@ private: typedef SkProcCoeffXfermode INHERITED; }; -static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode) { +static SkXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode::Mode mode) { switch (mode) { #define CASE(Mode) case SkXfermode::k##Mode##_Mode: \ return SkNEW_ARGS(Sk4pxXfermode, (rec, mode, &Mode, &xfer_aa)) @@ -323,8 +316,6 @@ static SkProcCoeffXfermode* SkCreate4pxXfermode(const ProcCoeff& rec, SkXfermode return nullptr; } -#endif - } // namespace #endif//Sk4pxXfermode_DEFINED diff --git a/src/opts/SkXfermode_opts_arm.cpp b/src/opts/SkXfermode_opts_arm.cpp deleted file mode 100644 index 05c330fe81..0000000000 --- a/src/opts/SkXfermode_opts_arm.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "SkXfermode.h" -#include "SkXfermode_proccoeff.h" -#include "SkUtilsArm.h" - -// If we find we do have NEON, we'll call this method from SkXfermodes_opts_arm_neon.cpp. -SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, - SkXfermode::Mode mode); - -// If we don't have NEON, we'll call this method and return NULL. -SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec, SkXfermode::Mode mode); -SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec, SkXfermode::Mode mode) { - return NULL; -} - -SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, SkXfermode::Mode mode); -SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, SkXfermode::Mode mode) { - return SK_ARM_NEON_WRAP(SkPlatformXfermodeFactory_impl)(rec, mode); -} - -SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode); -SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) { return NULL; } diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp deleted file mode 100644 index ae0fd17b25..0000000000 --- a/src/opts/SkXfermode_opts_arm_neon.cpp +++ /dev/null @@ -1,14 +0,0 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -// Including Sk4pxXfermode.h from this file should find SK_ARM_HAS_NEON is defined. -#include "Sk4pxXfermode.h" - -SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& r, SkXfermode::Mode m); -SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& r, SkXfermode::Mode m) { - return SkCreate4pxXfermode(r, m); -} diff --git a/src/opts/SkXfermode_opts_none.cpp b/src/opts/SkXfermode_opts_none.cpp deleted file mode 100644 index 832d92eccd..0000000000 --- a/src/opts/SkXfermode_opts_none.cpp +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Copyright 2015 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "SkXfermode.h" -#include "SkXfermode_proccoeff.h" - - -SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, SkXfermode::Mode mode); -SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, SkXfermode::Mode mode) { - return NULL; -} - -SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode); -SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) { - return NULL; -}