db6346a5b1
NOTREECHECKS=true NOTRY=true Reason for revert: Valgrind bot's seeing this code use uninitialized memory, and it's somehow blocking our roll into Chrome too: > ld: warning: could not create compact unwind for S32A_Opaque_BlitRow32_SSE4_asm: > stack subq instruction is too different from dwarf stack size > [10339/10982 | 3247.792] PACKAGE FRAMEWORK "Chromium Framework.framework", > POSTBUILDS > FAILED: ./gyp-mac-tool package-framework "Chromium Framework.framework" A && > (export > BUILT_PRODUCTS_DIR=/Volumes/data/b/build/slave/mac_gpu/build/src/out/Release; > export CONFIGURATION=Release; export CONTENTS_FOLDER_PATH="Chromium > Framework.framework/Versions/A"; export > DYLIB_INSTALL_NAME_BASE=@executable_path/../Versions/37.0.2056.0; export > EXECUTABLE_NAME="Chromium Framework"; export EXECUTABLE_PATH="Chromium > Framework.framework/Versions/A/Chromium Framework"; export > FULL_PRODUCT_NAME="Chromium Framework.framework"; export > INFOPLIST_PATH="Chromium Framework.framework/Versions/A/Resources/Info.plist"; > export LD_DYLIB_INSTALL_NAME="@executable_path/../Versions/37.0.2056.0/Chromium > Framework.framework/Chromium Framework"; export MACH_O_TYPE=mh_dylib; export > PRODUCT_NAME="Chromium Framework"; export > PRODUCT_TYPE=com.apple.product-type.framework; export > SDKROOT=/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.6.sdk; > export > SRCROOT=/Volumes/data/b/build/slave/mac_gpu/build/src/out/Release/../../chrome; > export SOURCE_ROOT="${SRCROOT}"; export > TARGET_BUILD_DIR=/Volumes/data/b/build/slave/mac_gpu/build/src/out/Release; > export TEMP_DIR="${TMPDIR}"; export UNLOCALIZED_RESOURCES_FOLDER_PATH="Chromium > Framework.framework/Versions/A/Resources"; export WRAPPER_NAME="Chromium > Framework.framework"; (cd ../../chrome && ../build/mac/tweak_info_plist.py > "--breakpad=1" "--breakpad_uploads=0" "--keystone=0" "--scm=1" > "--branding=Chromium" && ln -fns Versions/Current/Libraries > "${BUILT_PRODUCTS_DIR}/${WRAPPER_NAME}/Libraries" && > tools/build/mac/verify_order _ChromeMain > "${BUILT_PRODUCTS_DIR}/${EXECUTABLE_PATH}"); G=$?; ((exit $G) || rm -rf > 'Chromium Framework.framework') && exit $G) && touch "Chromium > Framework.framework" > tools/build/mac/verify_order: unordered symbols in > /Volumes/data/b/build/slave/mac_gpu/build/src/out/Release/Chromium > Framework.framework/Versions/A/Chromium Framework: > S32A_Opaque_BlitRow32_SSE4_asm > _S32A_Opaque_BlitRow32_SSE4_asm > ninja: build stopped: subcommand failed. Original issue's description: > Add SSE4 optimization of S32A_Opaque_Blitrow > > Adds optimization of Skia S32A_Opaque_Blitrow blitter using SSE4.2 SIMD > instruction set. Special case for when alpha is zero or opaque. > > Performance increase of 10%-400% compared to the existing SSE2 > optimization (measured on Silvermont architecture). > Noticeable in ~25 different skia bench subtests, especially in > bitmap_8888_*, repeatTile_*, and morph_*. > > bitmap_8888_A - 100% faster > bitmap_8888_A_source_transparent - 250% faster > bitmap_8888_A_source_opaque - 25% faster > bitmap_8888_A_scale_bicubic - 75% faster > > Signed-off-by: Henrik Smiding <henrik.smiding@intel.com> > > Committed: https://skia.googlesource.com/skia/+/e2527b147679b0c43019fae7d59cc3777d2d097e > > Committed: https://skia.googlesource.com/skia/+/b5c281e1e06af3be804309877de1dac6145686b9 R=reed@google.com, tomhudson@google.com, djsollen@google.com, joakim.landberg@intel.com, henrik.smiding@intel.com, mtklein@chromium.org Author: mtklein@google.com Review URL: https://codereview.chromium.org/336413007
247 lines
8.7 KiB
Python
247 lines
8.7 KiB
Python
# Gyp file for opts projects
|
|
{
|
|
'targets': [
|
|
# Due to an unfortunate intersection of lameness between gcc and gyp,
|
|
# we have to build the *_SSE2.cpp files in a separate target. The
|
|
# gcc lameness is that, in order to compile SSE2 intrinsics code, it
|
|
# must be passed the -msse2 flag. However, with this flag, it may
|
|
# emit SSE2 instructions even for scalar code, such as the CPUID
|
|
# test used to test for the presence of SSE2. So that, and all other
|
|
# code must be compiled *without* -msse2. The gyp lameness is that it
|
|
# does not allow file-specific CFLAGS, so we must create this extra
|
|
# target for those files to be compiled with -msse2.
|
|
#
|
|
# This is actually only a problem on 32-bit Linux (all Intel Macs have
|
|
# SSE2, Linux x86_64 has SSE2 by definition, and MSC will happily emit
|
|
# SSE2 from instrinsics, while generating plain ol' 386 for everything
|
|
# else). However, to keep the .gyp file simple and avoid platform-specific
|
|
# build breakage, we do this on all platforms.
|
|
|
|
# For about the same reason, we need to compile the ARM opts files
|
|
# separately as well.
|
|
{
|
|
'target_name': 'opts',
|
|
'product_name': 'skia_opts',
|
|
'type': 'static_library',
|
|
'standalone_static_library': 1,
|
|
'dependencies': [
|
|
'core.gyp:*',
|
|
'effects.gyp:*'
|
|
],
|
|
'include_dirs': [
|
|
'../src/core',
|
|
'../src/opts',
|
|
],
|
|
'conditions': [
|
|
[ 'skia_arch_type == "x86" and skia_os != "ios"', {
|
|
'conditions': [
|
|
[ 'skia_os in ["linux", "freebsd", "openbsd", "solaris", "nacl", "chromeos", "android"]', {
|
|
'cflags': [
|
|
'-msse2',
|
|
],
|
|
}],
|
|
],
|
|
'include_dirs': [
|
|
'../include/utils',
|
|
],
|
|
'dependencies': [
|
|
'opts_ssse3',
|
|
],
|
|
'sources': [
|
|
'../src/opts/opts_check_x86.cpp',
|
|
'../src/opts/SkBitmapProcState_opts_SSE2.cpp',
|
|
'../src/opts/SkBitmapFilter_opts_SSE2.cpp',
|
|
'../src/opts/SkBlitRow_opts_SSE2.cpp',
|
|
'../src/opts/SkBlitRect_opts_SSE2.cpp',
|
|
'../src/opts/SkBlurImage_opts_SSE2.cpp',
|
|
'../src/opts/SkMorphology_opts_SSE2.cpp',
|
|
'../src/opts/SkUtils_opts_SSE2.cpp',
|
|
'../src/opts/SkXfermode_opts_SSE2.cpp',
|
|
],
|
|
}],
|
|
[ 'skia_arch_type == "arm" and arm_version >= 7', {
|
|
# The assembly uses the frame pointer register (r7 in Thumb/r11 in
|
|
# ARM), the compiler doesn't like that.
|
|
'cflags!': [
|
|
'-fno-omit-frame-pointer',
|
|
'-mapcs-frame',
|
|
'-mapcs',
|
|
],
|
|
'cflags': [
|
|
'-fomit-frame-pointer',
|
|
'-mno-apcs-frame',
|
|
],
|
|
'variables': {
|
|
'arm_neon_optional%': '<(arm_neon_optional>',
|
|
},
|
|
'sources': [
|
|
'../src/opts/memset.arm.S',
|
|
'../src/opts/SkBitmapProcState_opts_arm.cpp',
|
|
'../src/opts/SkBlitMask_opts_arm.cpp',
|
|
'../src/opts/SkBlitRow_opts_arm.cpp',
|
|
'../src/opts/SkBlurImage_opts_arm.cpp',
|
|
'../src/opts/SkMorphology_opts_arm.cpp',
|
|
'../src/opts/SkUtils_opts_arm.cpp',
|
|
'../src/opts/SkXfermode_opts_arm.cpp',
|
|
],
|
|
'conditions': [
|
|
[ 'arm_neon == 1 or arm_neon_optional == 1', {
|
|
'dependencies': [
|
|
'opts_neon',
|
|
]
|
|
}],
|
|
[ 'skia_os == "ios"', {
|
|
'sources!': [
|
|
# these fail to compile under xcode for ios
|
|
'../src/opts/memset.arm.S',
|
|
'../src/opts/SkBitmapProcState_opts_arm.cpp',
|
|
'../src/opts/SkBlitRow_opts_arm.cpp',
|
|
],
|
|
}],
|
|
],
|
|
}],
|
|
[ 'skia_arch_type == "mips"', {
|
|
'sources': [
|
|
'../src/opts/SkBitmapProcState_opts_none.cpp',
|
|
'../src/opts/SkBlitMask_opts_none.cpp',
|
|
'../src/opts/SkBlurImage_opts_none.cpp',
|
|
'../src/opts/SkMorphology_opts_none.cpp',
|
|
'../src/opts/SkUtils_opts_none.cpp',
|
|
'../src/opts/SkXfermode_opts_none.cpp',
|
|
],
|
|
'conditions': [
|
|
[ '(mips_arch_variant == "mips32r2") \
|
|
and (mips_dsp == 1 or mips_dsp == 2)', {
|
|
'sources': [
|
|
'../src/opts/SkBlitRow_opts_mips_dsp.cpp',
|
|
],
|
|
}, {
|
|
'sources': [
|
|
'../src/opts/SkBlitRow_opts_none.cpp',
|
|
],
|
|
}],
|
|
],
|
|
}],
|
|
[ '(skia_arch_type == "arm" and arm_version < 7) \
|
|
or (skia_os == "ios") \
|
|
or (skia_os == "android" and skia_arch_type not in ["x86", "arm", "mips", "arm64"])', {
|
|
'sources': [
|
|
'../src/opts/SkBitmapProcState_opts_none.cpp',
|
|
'../src/opts/SkBlitMask_opts_none.cpp',
|
|
'../src/opts/SkBlitRow_opts_none.cpp',
|
|
'../src/opts/SkBlurImage_opts_none.cpp',
|
|
'../src/opts/SkMorphology_opts_none.cpp',
|
|
'../src/opts/SkUtils_opts_none.cpp',
|
|
'../src/opts/SkXfermode_opts_none.cpp',
|
|
],
|
|
}],
|
|
[ 'skia_android_framework', {
|
|
'cflags!': [
|
|
'-msse2',
|
|
'-mfpu=neon',
|
|
'-fomit-frame-pointer',
|
|
'-mno-apcs-frame',
|
|
]
|
|
}],
|
|
[ 'skia_arch_type == "arm64"', {
|
|
'sources': [
|
|
'../src/opts/SkBitmapProcState_arm_neon.cpp',
|
|
'../src/opts/SkBitmapProcState_matrixProcs_neon.cpp',
|
|
'../src/opts/SkBitmapProcState_opts_arm.cpp',
|
|
'../src/opts/SkBlitMask_opts_arm.cpp',
|
|
'../src/opts/SkBlitMask_opts_arm_neon.cpp',
|
|
'../src/opts/SkBlitRow_opts_arm.cpp',
|
|
'../src/opts/SkBlitRow_opts_arm_neon.cpp',
|
|
'../src/opts/SkBlurImage_opts_arm.cpp',
|
|
'../src/opts/SkBlurImage_opts_neon.cpp',
|
|
'../src/opts/SkMorphology_opts_arm.cpp',
|
|
'../src/opts/SkMorphology_opts_neon.cpp',
|
|
'../src/opts/SkUtils_opts_none.cpp',
|
|
'../src/opts/SkXfermode_opts_arm.cpp',
|
|
'../src/opts/SkXfermode_opts_arm_neon.cpp',
|
|
],
|
|
}],
|
|
],
|
|
},
|
|
# For the same lame reasons as what is done for skia_opts, we have to
|
|
# create another target specifically for SSSE3 code as we would not want
|
|
# to compile the SSE2 code with -mssse3 which would potentially allow
|
|
# gcc to generate SSSE3 code.
|
|
{
|
|
'target_name': 'opts_ssse3',
|
|
'product_name': 'skia_opts_ssse3',
|
|
'type': 'static_library',
|
|
'standalone_static_library': 1,
|
|
'dependencies': [
|
|
'core.gyp:*',
|
|
'effects.gyp:*'
|
|
],
|
|
'include_dirs': [
|
|
'../src/core',
|
|
],
|
|
'conditions': [
|
|
[ 'skia_os in ["linux", "freebsd", "openbsd", "solaris", "nacl", "chromeos", "android"] \
|
|
and not skia_android_framework', {
|
|
'cflags': [
|
|
'-mssse3',
|
|
],
|
|
}],
|
|
# (Mac has -mssse3 globally.)
|
|
[ 'skia_arch_type == "x86"', {
|
|
'sources': [
|
|
'../src/opts/SkBitmapProcState_opts_SSSE3.cpp',
|
|
],
|
|
}],
|
|
],
|
|
},
|
|
# NEON code must be compiled with -mfpu=neon which also affects scalar
|
|
# code. To support dynamic NEON code paths, we need to build all
|
|
# NEON-specific sources in a separate static library. The situation
|
|
# is very similar to the SSSE3 one.
|
|
{
|
|
'target_name': 'opts_neon',
|
|
'product_name': 'skia_opts_neon',
|
|
'type': 'static_library',
|
|
'standalone_static_library': 1,
|
|
'dependencies': [
|
|
'core.gyp:*',
|
|
'effects.gyp:*'
|
|
],
|
|
'include_dirs': [
|
|
'../src/core',
|
|
'../src/opts',
|
|
],
|
|
'cflags!': [
|
|
'-fno-omit-frame-pointer',
|
|
'-mfpu=vfp', # remove them all, just in case.
|
|
'-mfpu=vfpv3',
|
|
'-mfpu=vfpv3-d16',
|
|
],
|
|
'conditions': [
|
|
[ 'not skia_android_framework', {
|
|
'cflags': [
|
|
'-mfpu=neon',
|
|
'-fomit-frame-pointer',
|
|
],
|
|
}],
|
|
],
|
|
'ldflags': [
|
|
'-march=armv7-a',
|
|
'-Wl,--fix-cortex-a8',
|
|
],
|
|
'sources': [
|
|
'../src/opts/memset16_neon.S',
|
|
'../src/opts/memset32_neon.S',
|
|
'../src/opts/SkBitmapProcState_arm_neon.cpp',
|
|
'../src/opts/SkBitmapProcState_matrixProcs_neon.cpp',
|
|
'../src/opts/SkBitmapProcState_matrix_neon.h',
|
|
'../src/opts/SkBlitMask_opts_arm_neon.cpp',
|
|
'../src/opts/SkBlitRow_opts_arm_neon.cpp',
|
|
'../src/opts/SkBlurImage_opts_neon.cpp',
|
|
'../src/opts/SkMorphology_opts_neon.cpp',
|
|
'../src/opts/SkXfermode_opts_arm_neon.cpp',
|
|
],
|
|
},
|
|
],
|
|
}
|