Move immintrin/arm_neon includes to where they are used.

On my Mac (so, immintrin), this improves compile time, both wall and cpu,
by about 16%.  To test I ran this on an SSD with files hot in their caches:

  $ env CC=/usr/bin/clang CXX=/usr/bin/clang++ ./gyp_skia && \
    ninja -C out/Release -t clean && \
    time ninja -C out/Release

  Before: 159 wall / 3367 cpu
          159 wall / 3368 cpu

  After:  137 wall / 2860 cpu
          136 wall / 2863 cpu

I also tried further refining immintrin down to emmintrin / tmmintrin / smmintrin etc.
That made no signficant difference, so I've kept immintrin for its simplicity.

BUG=skia:
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2045633002
CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot

TBR=reed@google.com
No public API changes.

Review-Url: https://codereview.chromium.org/2045633002
This commit is contained in:
mtklein 2016-06-07 09:35:27 -07:00 committed by Commit bot
parent d62e28b19a
commit 12dfaaa53c
6 changed files with 19 additions and 6 deletions

View File

@ -14,12 +14,6 @@
#include "SkPostConfig.h" #include "SkPostConfig.h"
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#if defined(SK_ARM_HAS_NEON)
#include <arm_neon.h>
#elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
#include <immintrin.h>
#endif
// IWYU pragma: end_exports // IWYU pragma: end_exports
#include <string.h> #include <string.h>

View File

@ -15,6 +15,12 @@
#include <math.h> #include <math.h>
#include <float.h> #include <float.h>
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE1
#include <xmmintrin.h>
#elif defined(SK_ARM_HAS_NEON)
#include <arm_neon.h>
#endif
// For _POSIX_VERSION // For _POSIX_VERSION
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__))
#include <unistd.h> #include <unistd.h>

View File

@ -11,6 +11,10 @@
#include "SkColorPriv.h" #include "SkColorPriv.h"
#include "SkTypes.h" #include "SkTypes.h"
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
#include <immintrin.h>
#endif
namespace SK_OPTS_NS { namespace SK_OPTS_NS {
enum class BlurDirection { kX, kY }; enum class BlurDirection { kX, kY };

View File

@ -8,6 +8,8 @@
#ifndef SkNx_neon_DEFINED #ifndef SkNx_neon_DEFINED
#define SkNx_neon_DEFINED #define SkNx_neon_DEFINED
#include <arm_neon.h>
#define SKNX_IS_FAST #define SKNX_IS_FAST
// ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it: // ARMv8 has vrndmq_f32 to floor 4 floats. Here we emulate it:

View File

@ -9,6 +9,7 @@
#define SkNx_sse_DEFINED #define SkNx_sse_DEFINED
#include "SkCpu.h" #include "SkCpu.h"
#include <immintrin.h>
// This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything more recent. // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything more recent.
// If you do, make sure this is in a static inline function... anywhere else risks violating ODR. // If you do, make sure this is in a static inline function... anywhere else risks violating ODR.

View File

@ -10,6 +10,12 @@
#include "SkColorPriv.h" #include "SkColorPriv.h"
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
#include <immintrin.h>
#elif defined(SK_ARM_HAS_NEON)
#include <arm_neon.h>
#endif
namespace SK_OPTS_NS { namespace SK_OPTS_NS {
static void RGBA_to_rgbA_portable(uint32_t* dst, const void* vsrc, int count) { static void RGBA_to_rgbA_portable(uint32_t* dst, const void* vsrc, int count) {