arm: First step towards dynamic NEON support.
This patch adds minimal support for dynamic ARM NEON support, i.e. the ability to probe the CPU at runtime for NEON and provide alternate code paths when it is available. - Add include/core/SkUtilsArm.h, which declares a few helper macros (e.g. SK_NEON_ARM_IS_DYNAMIC), plus the handy function 'sk_cpu_arm_has_neon()' which returns true if the target CPU supports the ARM NEON instruction set. Note that the header is in include/core/ because it will have to be included from NEON-specific code under src/code/ It would probably be more logical to put it under include/opts/ instead, but this would require moving all the NEON-specific stuff under src/code/ into src/opts/, which is not trivial due to the way the code is currently architected. - Add src/core/SkUtilsArm.cpp which implements 'sk_cpu_arm_has_neon' for ARM-based Linux systems, only when SK_NEON_ARM_IS_DYNAMIC is true. (For other cases, 'sk_cpu_arm_has_neon' is an inline function that returns a constant 'true' or 'false' value). There is no user-level accessible CPUID instruction on ARM, so do all CPU feature probing by parsing /proc/cpuinfo. This is Linux-specific. For Debug build types, the CPU probing result is printed to the Android log (or Linux command-line) for easier debugging. - Create a new 'opts_neon' target (static library) which shall contain all the NEON-specific code paths for the library. This is necessary because -mfpu=neon impacts also non-scalar code. Just like with -mssse3 on x86, we can't build the rest of the library with this flag. Note that for now, we only include memset16_neon and memset32_neon in this library. - Modify opts_check_arm.cpp to implement SK_ARM_NEON_IS_DYNAMIC properly. Compared to a 'xoom' build, the only difference is the use of NEON-optimized memset16/32 functions. Later patches will move more NEON-specific code paths to 'opts_neon'. Review URL: https://codereview.appspot.com/6247058 git-svn-id: http://skia.googlecode.com/svn/trunk@4069 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
c20bc25b6e
commit
eec9dbcace
@ -192,6 +192,9 @@
|
||||
],
|
||||
}],
|
||||
[ 'skia_target_arch == "arm" and armv7 == 1', {
|
||||
'variables': {
|
||||
'arm_neon_optional%': 0,
|
||||
},
|
||||
'defines': [
|
||||
'__ARM_ARCH__=7',
|
||||
],
|
||||
@ -207,7 +210,12 @@
|
||||
'cflags': [
|
||||
'-mfpu=neon',
|
||||
],
|
||||
}],
|
||||
}],
|
||||
[ 'arm_neon_optional == 1', {
|
||||
'defines': [
|
||||
'__ARM_HAVE_OPTIONAL_NEON_SUPPORT',
|
||||
],
|
||||
}],
|
||||
],
|
||||
}],
|
||||
],
|
||||
|
@ -233,6 +233,7 @@
|
||||
'../include/core/SkUnPreMultiply.h',
|
||||
'../include/core/SkUnitMapper.h',
|
||||
'../include/core/SkUtils.h',
|
||||
'../include/core/SkUtilsArm.h',
|
||||
'../include/core/SkWeakRefCnt.h',
|
||||
'../include/core/SkWriter32.h',
|
||||
'../include/core/SkXfermode.h',
|
||||
@ -304,7 +305,13 @@
|
||||
'dependencies': [
|
||||
'android_system.gyp:ft2',
|
||||
],
|
||||
}],
|
||||
}],
|
||||
[ 'skia_os == "android" and skia_target_arch == "arm" and armv7 == 1', {
|
||||
# The code in SkUtilsArm.cpp can be used on an ARM-based Linux system, not only Android.
|
||||
'sources': [
|
||||
'../src/core/SkUtilsArm.cpp',
|
||||
],
|
||||
}],
|
||||
],
|
||||
'direct_dependent_settings': {
|
||||
'include_dirs': [
|
||||
|
39
gyp/opts.gyp
39
gyp/opts.gyp
@ -56,14 +56,22 @@
|
||||
'cflags': [
|
||||
'-fomit-frame-pointer',
|
||||
],
|
||||
'variables': {
|
||||
'arm_neon_optional%': '<(arm_neon_optional>',
|
||||
},
|
||||
'sources': [
|
||||
'../src/opts/opts_check_arm.cpp',
|
||||
'../src/opts/memset.arm.S',
|
||||
'../src/opts/memset16_neon.S',
|
||||
'../src/opts/memset32_neon.S',
|
||||
'../src/opts/SkBitmapProcState_opts_arm.cpp',
|
||||
'../src/opts/SkBlitRow_opts_arm.cpp',
|
||||
],
|
||||
'conditions': [
|
||||
[ 'arm_neon == 1 or arm_neon_optional == 1', {
|
||||
'dependencies': [
|
||||
'opts_neon',
|
||||
]
|
||||
}]
|
||||
],
|
||||
}],
|
||||
[ 'skia_target_arch == "arm" and armv7 != 1', {
|
||||
'sources': [
|
||||
@ -107,6 +115,33 @@
|
||||
}],
|
||||
],
|
||||
},
|
||||
# NEON code must be compiled with -mfpu=neon which also affects scalar
|
||||
# code. To support dynamic NEON code paths, we need to build all
|
||||
# NEON-specific sources in a separate static library. The situation
|
||||
# is very similar to the SSSE3 one.
|
||||
{
|
||||
'target_name': 'opts_neon',
|
||||
'type': 'static_library',
|
||||
'include_dirs': [
|
||||
'../include/config',
|
||||
'../include/core',
|
||||
'../src/core',
|
||||
],
|
||||
'cflags!': [
|
||||
'-fno-omit-frame-pointer',
|
||||
'-mfpu=vfp', # remove them all, just in case.
|
||||
'-mfpu=vfpv3',
|
||||
'-mfpu=vfpv3-d16',
|
||||
],
|
||||
'cflags': [
|
||||
'-fomit-frame-pointer',
|
||||
'-mfpu=neon',
|
||||
],
|
||||
'sources': [
|
||||
'../src/opts/memset16_neon.S',
|
||||
'../src/opts/memset32_neon.S',
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
53
include/core/SkUtilsArm.h
Normal file
53
include/core/SkUtilsArm.h
Normal file
@ -0,0 +1,53 @@
|
||||
|
||||
/*
|
||||
* Copyright 2012 The Android Open Source Project
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SkUtilsArm_DEFINED
|
||||
#define SkUtilsArm_DEFINED
|
||||
|
||||
#include "SkUtils.h"
|
||||
|
||||
// Define SK_ARM_NEON_MODE to one of the following values
|
||||
// corresponding respectively to:
|
||||
// - No ARM Neon support at all (not targetting ARMv7-A, or don't have NEON)
|
||||
// - Full ARM Neon support (i.e. assume the CPU always supports it)
|
||||
// - Optional ARM Neon support (i.e. probe CPU at runtime)
|
||||
//
|
||||
#define SK_ARM_NEON_MODE_NONE 0
|
||||
#define SK_ARM_NEON_MODE_ALWAYS 1
|
||||
#define SK_ARM_NEON_MODE_DYNAMIC 2
|
||||
|
||||
#if defined(__arm__) && defined(__ARM_HAVE_OPTIONAL_NEON_SUPPORT)
|
||||
# define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_DYNAMIC
|
||||
#elif defined(__arm__) && defined(__ARM_HAVE_NEON)
|
||||
# define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_ALWAYS
|
||||
#else
|
||||
# define SK_ARM_NEON_MODE SK_ARM_NEON_MODE_NONE
|
||||
#endif
|
||||
|
||||
// Convenience test macros, always defined as 0 or 1
|
||||
#define SK_ARM_NEON_IS_NONE (SK_ARM_NEON_MODE == SK_ARM_NEON_MODE_NONE)
|
||||
#define SK_ARM_NEON_IS_ALWAYS (SK_ARM_NEON_MODE == SK_ARM_NEON_MODE_ALWAYS)
|
||||
#define SK_ARM_NEON_IS_DYNAMIC (SK_ARM_NEON_MODE == SK_ARM_NEON_MODE_DYNAMIC)
|
||||
|
||||
// The sk_cpu_arm_has_neon() function returns true iff the target device
|
||||
// is ARMv7-A and supports Neon instructions. In DYNAMIC mode, this actually
|
||||
// probes the CPU at runtime (and caches the result).
|
||||
|
||||
#if SK_ARM_NEON_IS_NONE
|
||||
static bool sk_cpu_arm_has_neon(void) {
|
||||
return false;
|
||||
}
|
||||
#elif SK_ARM_NEON_IS_ALWAYS
|
||||
static bool sk_cpu_arm_has_neon(void) {
|
||||
return true;
|
||||
}
|
||||
#else // SK_ARM_NEON_IS_DYNAMIC
|
||||
extern bool sk_cpu_arm_has_neon(void);
|
||||
#endif
|
||||
|
||||
#endif // SkUtilsArm_DEFINED
|
178
src/core/SkUtilsArm.cpp
Normal file
178
src/core/SkUtilsArm.cpp
Normal file
@ -0,0 +1,178 @@
|
||||
|
||||
/*
|
||||
* Copyright 2012 The Android Open Source Project
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "SkUtilsArm.h"
|
||||
|
||||
#if SK_ARM_NEON_IS_DYNAMIC
|
||||
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
|
||||
// Set NEON_DEBUG to 1 to allow debugging of the CPU features probing.
|
||||
// For now, we always set it for SK_DEBUG builds.
|
||||
#ifdef SK_DEBUG
|
||||
# define NEON_DEBUG 1
|
||||
#else
|
||||
# define NEON_DEBUG 0
|
||||
#endif
|
||||
|
||||
#if NEON_DEBUG
|
||||
# ifdef SK_BUILD_FOR_ANDROID
|
||||
// used to declare PROP_VALUE_MAX and __system_property_get()
|
||||
# include <sys/system_properties.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// A function used to determine at runtime if the target CPU supports
|
||||
// the ARM NEON instruction set. This implementation is Linux-specific.
|
||||
static bool sk_cpu_arm_check_neon(void)
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
#if NEON_DEBUG
|
||||
// Allow forcing the mode through the environment during debugging.
|
||||
# ifdef SK_BUILD_FOR_ANDROID
|
||||
// On Android, we use a system property
|
||||
# define PROP_NAME "debug.skia.arm_neon_mode"
|
||||
char prop[PROP_VALUE_MAX];
|
||||
if (__system_property_get(PROP_NAME, prop) > 0) {
|
||||
# else
|
||||
# define PROP_NAME "SKIA_ARM_NEON_MODE"
|
||||
// On ARM Linux, we use an environment variable
|
||||
const char* prop = getenv(PROP_NAME);
|
||||
if (prop != NULL) {
|
||||
# endif
|
||||
SkDebugf("%s: %s", PROP_NAME, prop);
|
||||
if (!strcmp(prop, "1")) {
|
||||
SkDebugf("Forcing ARM Neon mode to full!\n");
|
||||
return true;
|
||||
}
|
||||
if (!strcmp(prop, "0")) {
|
||||
SkDebugf("Disabling ARM NEON mode\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
SkDebugf("Running dynamic CPU feature detection\n");
|
||||
#endif
|
||||
|
||||
// There is no user-accessible CPUID instruction on ARM that we can use.
|
||||
// Instead, we must parse /proc/cpuinfo and look for the 'neon' feature.
|
||||
// For example, here's a typical output (Nexus S running ICS 4.0.3):
|
||||
/*
|
||||
Processor : ARMv7 Processor rev 2 (v7l)
|
||||
BogoMIPS : 994.65
|
||||
Features : swp half thumb fastmult vfp edsp thumbee neon vfpv3
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: 7
|
||||
CPU variant : 0x2
|
||||
CPU part : 0xc08
|
||||
CPU revision : 2
|
||||
|
||||
Hardware : herring
|
||||
Revision : 000b
|
||||
Serial : 3833c77d6dc000ec
|
||||
*/
|
||||
char buffer[4096];
|
||||
|
||||
// If we fail any of the following, assume we don't have NEON instructions
|
||||
// This allows us to return immediately in case of error.
|
||||
result = false;
|
||||
|
||||
do {
|
||||
// open /proc/cpuinfo
|
||||
int fd = TEMP_FAILURE_RETRY(open("/proc/cpuinfo", O_RDONLY));
|
||||
if (fd < 0) {
|
||||
SkDebugf("Could not open /proc/cpuinfo: %s\n", strerror(errno));
|
||||
break;
|
||||
}
|
||||
|
||||
// Read the file. To simplify our search, we're going to place two
|
||||
// sentinel '\n' characters: one at the start of the buffer, and one at
|
||||
// the end. This means we reserve the first and last buffer bytes.
|
||||
buffer[0] = '\n';
|
||||
int size = TEMP_FAILURE_RETRY(read(fd, buffer+1, sizeof(buffer)-2));
|
||||
close(fd);
|
||||
|
||||
if (size < 0) { // should not happen
|
||||
SkDebugf("Could not read /proc/cpuinfo: %s\n", strerror(errno));
|
||||
break;
|
||||
}
|
||||
|
||||
SkDebugf("START /proc/cpuinfo:\n%.*s\nEND /proc/cpuinfo\n",
|
||||
size, buffer+1);
|
||||
|
||||
// Compute buffer limit, and place final sentinel
|
||||
char* buffer_end = buffer + 1 + size;
|
||||
buffer_end[0] = '\n';
|
||||
|
||||
// Now, find a line that starts with "Features", i.e. look for
|
||||
// '\nFeatures ' in our buffer.
|
||||
const char features[] = "\nFeatures\t";
|
||||
const size_t features_len = sizeof(features)-1;
|
||||
|
||||
char* line = (char*) memmem(buffer, buffer_end - buffer,
|
||||
features, features_len);
|
||||
if (line == NULL) { // Weird, no Features line, bad kernel?
|
||||
SkDebugf("Could not find a line starting with 'Features'"
|
||||
"in /proc/cpuinfo ?\n");
|
||||
break;
|
||||
}
|
||||
|
||||
line += features_len; // Skip the "\nFeatures\t" prefix
|
||||
|
||||
// Find the end of the current line
|
||||
char* line_end = (char*) memchr(line, '\n', buffer_end - line);
|
||||
if (line_end == NULL)
|
||||
line_end = buffer_end;
|
||||
|
||||
// Now find an instance of 'neon' in the flags list. We want to
|
||||
// ensure it's only 'neon' and not something fancy like 'noneon'
|
||||
// so check that it follows a space.
|
||||
const char neon[] = " neon";
|
||||
const size_t neon_len = sizeof(neon)-1;
|
||||
const char* flag = (const char*) memmem(line, line_end - line,
|
||||
neon, neon_len);
|
||||
if (flag == NULL)
|
||||
break;
|
||||
|
||||
// Ensure it is followed by a space or a newline.
|
||||
if (flag[neon_len] != ' ' && flag[neon_len] != '\n')
|
||||
break;
|
||||
|
||||
// Fine, we support Arm NEON !
|
||||
result = true;
|
||||
|
||||
} while (0);
|
||||
|
||||
if (result) {
|
||||
SkDebugf("Device supports ARM NEON instructions!\n");
|
||||
} else {
|
||||
SkDebugf("Device does NOT support ARM NEON instructions!\n");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static pthread_once_t sOnce;
|
||||
static bool sHasArmNeon;
|
||||
|
||||
// called through pthread_once()
|
||||
void sk_cpu_arm_probe_features(void)
|
||||
{
|
||||
sHasArmNeon = sk_cpu_arm_check_neon();
|
||||
}
|
||||
|
||||
bool sk_cpu_arm_has_neon(void)
|
||||
{
|
||||
pthread_once(&sOnce, sk_cpu_arm_probe_features);
|
||||
return sHasArmNeon;
|
||||
}
|
||||
|
||||
#endif // SK_ARM_NEON_IS_DYNAMIC
|
@ -16,7 +16,9 @@
|
||||
#include "SkBlitRow.h"
|
||||
#include "SkUtils.h"
|
||||
|
||||
#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
|
||||
#include "SkUtilsArm.h"
|
||||
|
||||
#if defined(SK_CPU_LENDIAN) && !SK_ARM_NEON_IS_NONE
|
||||
extern "C" void memset16_neon(uint16_t dst[], uint16_t value, int count);
|
||||
extern "C" void memset32_neon(uint32_t dst[], uint32_t value, int count);
|
||||
#endif
|
||||
@ -27,22 +29,34 @@ extern "C" void arm_memset32(uint32_t* dst, uint32_t value, int count);
|
||||
#endif
|
||||
|
||||
SkMemset16Proc SkMemset16GetPlatformProc() {
|
||||
#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
|
||||
return memset16_neon;
|
||||
#elif defined(SK_CPU_LENDIAN)
|
||||
return arm_memset16;
|
||||
#else
|
||||
#if !defined(SK_CPU_LENDIAN)
|
||||
return NULL;
|
||||
#elif SK_ARM_NEON_IS_DYNAMIC
|
||||
if (sk_cpu_arm_has_neon()) {
|
||||
return memset16_neon;
|
||||
} else {
|
||||
return arm_memset16;
|
||||
}
|
||||
#elif SK_ARM_NEON_IS_ALWAYS
|
||||
return memset16_neon;
|
||||
#else
|
||||
return arm_memset16;
|
||||
#endif
|
||||
}
|
||||
|
||||
SkMemset32Proc SkMemset32GetPlatformProc() {
|
||||
#if defined(__ARM_HAVE_NEON) && defined(SK_CPU_LENDIAN)
|
||||
return memset32_neon;
|
||||
#elif defined(SK_CPU_LENDIAN)
|
||||
return arm_memset32;
|
||||
#else
|
||||
#if !defined(SK_CPU_LENDIAN)
|
||||
return NULL;
|
||||
#elif SK_ARM_NEON_IS_DYNAMIC
|
||||
if (sk_cpu_arm_has_neon()) {
|
||||
return memset32_neon;
|
||||
} else {
|
||||
return arm_memset32;
|
||||
}
|
||||
#elif SK_ARM_NEON_IS_ALWAYS
|
||||
return memset32_neon;
|
||||
#else
|
||||
return arm_memset32;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user