arm: First step towards dynamic NEON support.

This patch adds minimal support for dynamic ARM NEON support,
i.e. the ability to probe the CPU at runtime for NEON and
provide alternate code paths when it is available.

- Add include/core/SkUtilsArm.h, which declares a few helper
  macros (e.g. SK_NEON_ARM_IS_DYNAMIC), plus the handy
  function 'sk_cpu_arm_has_neon()' which returns true if
  the target CPU supports the ARM NEON instruction set.

  Note that the header is in include/core/ because it will
  have to be included from NEON-specific code under src/code/

  It would probably be more logical to put it under include/opts/
  instead, but this would require moving all the NEON-specific
  stuff under src/code/ into src/opts/, which is not trivial
  due to the way the code is currently architected.

- Add src/core/SkUtilsArm.cpp which implements
  'sk_cpu_arm_has_neon' for ARM-based Linux systems, only
  when SK_NEON_ARM_IS_DYNAMIC is true.

  (For other cases, 'sk_cpu_arm_has_neon' is an inline function
   that returns a constant 'true' or 'false' value).

  There is no user-level accessible CPUID instruction on ARM,
  so do all CPU feature probing by parsing /proc/cpuinfo.
  This is Linux-specific.

  For Debug build types, the CPU probing result is printed
  to the Android log (or Linux command-line) for easier
  debugging.

- Create a new 'opts_neon' target (static library) which shall
  contain all the NEON-specific code paths for the library.

  This is necessary because -mfpu=neon impacts also non-scalar
  code. Just like with -mssse3 on x86, we can't build the rest
  of the library with this flag.

  Note that for now, we only include memset16_neon and
  memset32_neon in this library.

- Modify opts_check_arm.cpp to implement SK_ARM_NEON_IS_DYNAMIC
  properly.

Compared to a 'xoom' build, the only difference is the use of
NEON-optimized memset16/32 functions. Later patches will move
more NEON-specific code paths to 'opts_neon'.
Review URL: https://codereview.appspot.com/6247058

git-svn-id: http://skia.googlecode.com/svn/trunk@4069 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
digit@google.com 2012-05-30 13:54:41 +00:00
parent c20bc25b6e
commit eec9dbcace
6 changed files with 310 additions and 15 deletions

View File

@ -192,6 +192,9 @@
],
}],
[ 'skia_target_arch == "arm" and armv7 == 1', {
'variables': {
'arm_neon_optional%': 0,
},
'defines': [
'__ARM_ARCH__=7',
],
@ -207,7 +210,12 @@
'cflags': [
'-mfpu=neon',
],
}],
}],
[ 'arm_neon_optional == 1', {
'defines': [
'__ARM_HAVE_OPTIONAL_NEON_SUPPORT',
],
}],
],
}],
],

View File

@ -233,6 +233,7 @@
'../include/core/SkUnPreMultiply.h',
'../include/core/SkUnitMapper.h',
'../include/core/SkUtils.h',
'../include/core/SkUtilsArm.h',
'../include/core/SkWeakRefCnt.h',
'../include/core/SkWriter32.h',
'../include/core/SkXfermode.h',
@ -304,7 +305,13 @@
'dependencies': [
'android_system.gyp:ft2',
],
}],
}],
[ 'skia_os == "android" and skia_target_arch == "arm" and armv7 == 1', {
# The code in SkUtilsArm.cpp can be used on an ARM-based Linux system, not only Android.
'sources': [
'../src/core/SkUtilsArm.cpp',
],
}],
],
'direct_dependent_settings': {
'include_dirs': [

View File

@ -56,14 +56,22 @@
'cflags': [
'-fomit-frame-pointer',
],
'variables': {
'arm_neon_optional%': '<(arm_neon_optional>',
},
'sources': [
'../src/opts/opts_check_arm.cpp',
'../src/opts/memset.arm.S',
'../src/opts/memset16_neon.S',
'../src/opts/memset32_neon.S',
'../src/opts/SkBitmapProcState_opts_arm.cpp',
'../src/opts/SkBlitRow_opts_arm.cpp',
],
'conditions': [
[ 'arm_neon == 1 or arm_neon_optional == 1', {
'dependencies': [
'opts_neon',
]
}]
],
}],
[ 'skia_target_arch == "arm" and armv7 != 1', {
'sources': [
@ -107,6 +115,33 @@
}],
],
},
# NEON code must be compiled with -mfpu=neon which also affects scalar
# code. To support dynamic NEON code paths, we need to build all
# NEON-specific sources in a separate static library. The situation
# is very similar to the SSSE3 one.
{
'target_name': 'opts_neon',
'type': 'static_library',
'include_dirs': [
'../include/config',
'../include/core',
'../src/core',
],
'cflags!': [
'-fno-omit-frame-pointer',
'-mfpu=vfp', # remove them all, just in case.
'-mfpu=vfpv3',
'-mfpu=vfpv3-d16',
],
'cflags': [
'-fomit-frame-pointer',
'-mfpu=neon',
],
'sources': [
'../src/opts/memset16_neon.S',
'../src/opts/memset32_neon.S',
],
},
],
}

53
include/core/SkUtilsArm.h Normal file
View File

@ -0,0 +1,53 @@
/*
* Copyright 2012 The Android Open Source Project
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkUtilsArm_DEFINED
#define SkUtilsArm_DEFINED
#include "SkUtils.h"
// Define SK_ARM_NEON_MODE to one of the following values
// corresponding respectively to:
// - No ARM Neon support at all (not targetting ARMv7-A, or don't have NEON)
// - Full ARM Neon support (i.e. assume the CPU always supports it)
// - Optional ARM Neon support (i.e. probe CPU at runtime)
//
#define SK_ARM_NEON_MODE_NONE 0
#define SK_ARM_NEON_MODE_ALWAYS 1
#define SK_ARM_NEON_MODE_DYNAMIC 2
#if defined(__arm__) && defined(__ARM_HAVE_OPTIONAL_NEON_SUPPORT)
# define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_DYNAMIC
#elif defined(__arm__) && defined(__ARM_HAVE_NEON)
# define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_ALWAYS
#else
# define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_NONE
#endif
// Convenience test macros, always defined as 0 or 1
#define SK_ARM_NEON_IS_NONE (SK_ARM_NEON_MODE == SK_ARM_NEON_MODE_NONE)
#define SK_ARM_NEON_IS_ALWAYS (SK_ARM_NEON_MODE == SK_ARM_NEON_MODE_ALWAYS)
#define SK_ARM_NEON_IS_DYNAMIC (SK_ARM_NEON_MODE == SK_ARM_NEON_MODE_DYNAMIC)
// The sk_cpu_arm_has_neon() function returns true iff the target device
// is ARMv7-A and supports Neon instructions. In DYNAMIC mode, this actually
// probes the CPU at runtime (and caches the result).
#if SK_ARM_NEON_IS_NONE
static bool sk_cpu_arm_has_neon(void) {
return false;
}
#elif SK_ARM_NEON_IS_ALWAYS
static bool sk_cpu_arm_has_neon(void) {
return true;
}
#else // SK_ARM_NEON_IS_DYNAMIC
extern bool sk_cpu_arm_has_neon(void);
#endif
#endif // SkUtilsArm_DEFINED

178
src/core/SkUtilsArm.cpp Normal file
View File

@ -0,0 +1,178 @@
/*
* Copyright 2012 The Android Open Source Project
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkUtilsArm.h"
#if SK_ARM_NEON_IS_DYNAMIC
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <pthread.h>
// Set NEON_DEBUG to 1 to allow debugging of the CPU features probing.
// For now, we always set it for SK_DEBUG builds.
#ifdef SK_DEBUG
# define NEON_DEBUG 1
#else
# define NEON_DEBUG 0
#endif
#if NEON_DEBUG
# ifdef SK_BUILD_FOR_ANDROID
// used to declare PROP_VALUE_MAX and __system_property_get()
# include <sys/system_properties.h>
# endif
#endif
// A function used to determine at runtime if the target CPU supports
// the ARM NEON instruction set. This implementation is Linux-specific.
static bool sk_cpu_arm_check_neon(void)
{
bool result = false;
#if NEON_DEBUG
// Allow forcing the mode through the environment during debugging.
# ifdef SK_BUILD_FOR_ANDROID
// On Android, we use a system property
# define PROP_NAME "debug.skia.arm_neon_mode"
char prop[PROP_VALUE_MAX];
if (__system_property_get(PROP_NAME, prop) > 0) {
# else
# define PROP_NAME "SKIA_ARM_NEON_MODE"
// On ARM Linux, we use an environment variable
const char* prop = getenv(PROP_NAME);
if (prop != NULL) {
# endif
SkDebugf("%s: %s", PROP_NAME, prop);
if (!strcmp(prop, "1")) {
SkDebugf("Forcing ARM Neon mode to full!\n");
return true;
}
if (!strcmp(prop, "0")) {
SkDebugf("Disabling ARM NEON mode\n");
return false;
}
}
SkDebugf("Running dynamic CPU feature detection\n");
#endif
// There is no user-accessible CPUID instruction on ARM that we can use.
// Instead, we must parse /proc/cpuinfo and look for the 'neon' feature.
// For example, here's a typical output (Nexus S running ICS 4.0.3):
/*
Processor : ARMv7 Processor rev 2 (v7l)
BogoMIPS : 994.65
Features : swp half thumb fastmult vfp edsp thumbee neon vfpv3
CPU implementer : 0x41
CPU architecture: 7
CPU variant : 0x2
CPU part : 0xc08
CPU revision : 2
Hardware : herring
Revision : 000b
Serial : 3833c77d6dc000ec
*/
char buffer[4096];
// If we fail any of the following, assume we don't have NEON instructions
// This allows us to return immediately in case of error.
result = false;
do {
// open /proc/cpuinfo
int fd = TEMP_FAILURE_RETRY(open("/proc/cpuinfo", O_RDONLY));
if (fd < 0) {
SkDebugf("Could not open /proc/cpuinfo: %s\n", strerror(errno));
break;
}
// Read the file. To simplify our search, we're going to place two
// sentinel '\n' characters: one at the start of the buffer, and one at
// the end. This means we reserve the first and last buffer bytes.
buffer[0] = '\n';
int size = TEMP_FAILURE_RETRY(read(fd, buffer+1, sizeof(buffer)-2));
close(fd);
if (size < 0) { // should not happen
SkDebugf("Could not read /proc/cpuinfo: %s\n", strerror(errno));
break;
}
SkDebugf("START /proc/cpuinfo:\n%.*s\nEND /proc/cpuinfo\n",
size, buffer+1);
// Compute buffer limit, and place final sentinel
char* buffer_end = buffer + 1 + size;
buffer_end[0] = '\n';
// Now, find a line that starts with "Features", i.e. look for
// '\nFeatures ' in our buffer.
const char features[] = "\nFeatures\t";
const size_t features_len = sizeof(features)-1;
char* line = (char*) memmem(buffer, buffer_end - buffer,
features, features_len);
if (line == NULL) { // Weird, no Features line, bad kernel?
SkDebugf("Could not find a line starting with 'Features'"
"in /proc/cpuinfo ?\n");
break;
}
line += features_len; // Skip the "\nFeatures\t" prefix
// Find the end of the current line
char* line_end = (char*) memchr(line, '\n', buffer_end - line);
if (line_end == NULL)
line_end = buffer_end;
// Now find an instance of 'neon' in the flags list. We want to
// ensure it's only 'neon' and not something fancy like 'noneon'
// so check that it follows a space.
const char neon[] = " neon";
const size_t neon_len = sizeof(neon)-1;
const char* flag = (const char*) memmem(line, line_end - line,
neon, neon_len);
if (flag == NULL)
break;
// Ensure it is followed by a space or a newline.
if (flag[neon_len] != ' ' && flag[neon_len] != '\n')
break;
// Fine, we support Arm NEON !
result = true;
} while (0);
if (result) {
SkDebugf("Device supports ARM NEON instructions!\n");
} else {
SkDebugf("Device does NOT support ARM NEON instructions!\n");
}
return result;
}
static pthread_once_t sOnce;
static bool sHasArmNeon;
// called through pthread_once()
void sk_cpu_arm_probe_features(void)
{
sHasArmNeon = sk_cpu_arm_check_neon();
}
bool sk_cpu_arm_has_neon(void)
{
pthread_once(&sOnce, sk_cpu_arm_probe_features);
return sHasArmNeon;
}
#endif // SK_ARM_NEON_IS_DYNAMIC

View File

@ -16,7 +16,9 @@
#include "SkBlitRow.h"
#include "SkUtils.h"
#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
#include "SkUtilsArm.h"
#if defined(SK_CPU_LENDIAN) && !SK_ARM_NEON_IS_NONE
extern "C" void memset16_neon(uint16_t dst[], uint16_t value, int count);
extern "C" void memset32_neon(uint32_t dst[], uint32_t value, int count);
#endif
@ -27,22 +29,34 @@ extern "C" void arm_memset32(uint32_t* dst, uint32_t value, int count);
#endif
SkMemset16Proc SkMemset16GetPlatformProc() {
#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
return memset16_neon;
#elif defined(SK_CPU_LENDIAN)
return arm_memset16;
#else
#if !defined(SK_CPU_LENDIAN)
return NULL;
#elif SK_ARM_NEON_IS_DYNAMIC
if (sk_cpu_arm_has_neon()) {
return memset16_neon;
} else {
return arm_memset16;
}
#elif SK_ARM_NEON_IS_ALWAYS
return memset16_neon;
#else
return arm_memset16;
#endif
}
SkMemset32Proc SkMemset32GetPlatformProc() {
#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
return memset32_neon;
#elif defined(SK_CPU_LENDIAN)
return arm_memset32;
#else
#if !defined(SK_CPU_LENDIAN)
return NULL;
#elif SK_ARM_NEON_IS_DYNAMIC
if (sk_cpu_arm_has_neon()) {
return memset32_neon;
} else {
return arm_memset32;
}
#elif SK_ARM_NEON_IS_ALWAYS
return memset32_neon;
#else
return arm_memset32;
#endif
}