2015-03-25 17:22:41 +00:00
|
|
|
# Copyright 2015 Google Inc.
|
|
|
|
#
|
|
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
|
|
# found in the LICENSE file.
|
2015-01-27 02:55:58 +00:00
|
|
|
{
|
|
|
|
'none_sources': [
|
|
|
|
'<(skia_src_path)/opts/SkBitmapProcState_opts_none.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitMask_opts_none.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitRow_opts_none.cpp',
|
|
|
|
],
|
|
|
|
|
|
|
|
'armv7_sources': [
|
|
|
|
'<(skia_src_path)/opts/SkBitmapProcState_opts_arm.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitMask_opts_arm.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitRow_opts_arm.cpp',
|
|
|
|
],
|
|
|
|
'neon_sources': [
|
|
|
|
'<(skia_src_path)/opts/SkBitmapProcState_arm_neon.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBitmapProcState_matrixProcs_neon.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitMask_opts_arm_neon.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitRow_opts_arm_neon.cpp',
|
2015-07-30 14:30:16 +00:00
|
|
|
'<(skia_src_path)/opts/SkOpts_neon.cpp',
|
2015-01-27 02:55:58 +00:00
|
|
|
],
|
|
|
|
'arm64_sources': [
|
|
|
|
'<(skia_src_path)/opts/SkBitmapProcState_arm_neon.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBitmapProcState_matrixProcs_neon.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBitmapProcState_opts_arm.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitMask_opts_arm.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitMask_opts_arm_neon.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitRow_opts_arm.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitRow_opts_arm_neon.cpp',
|
2015-07-30 14:30:16 +00:00
|
|
|
'<(skia_src_path)/opts/SkOpts_neon.cpp',
|
2015-01-27 02:55:58 +00:00
|
|
|
],
|
|
|
|
|
|
|
|
'mips_dsp_sources': [
|
|
|
|
'<(skia_src_path)/opts/SkBitmapProcState_opts_mips_dsp.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitMask_opts_none.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitRow_opts_mips_dsp.cpp',
|
|
|
|
],
|
|
|
|
|
|
|
|
'sse2_sources': [
|
|
|
|
'<(skia_src_path)/opts/SkBitmapFilter_opts_SSE2.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBitmapProcState_opts_SSE2.cpp',
|
|
|
|
'<(skia_src_path)/opts/SkBlitRow_opts_SSE2.cpp',
|
|
|
|
'<(skia_src_path)/opts/opts_check_x86.cpp',
|
|
|
|
],
|
|
|
|
'ssse3_sources': [
|
|
|
|
'<(skia_src_path)/opts/SkBitmapProcState_opts_SSSE3.cpp',
|
2015-07-30 14:30:16 +00:00
|
|
|
'<(skia_src_path)/opts/SkOpts_ssse3.cpp',
|
2015-01-27 02:55:58 +00:00
|
|
|
],
|
|
|
|
'sse41_sources': [
|
Revert of Revert of SSE4 opaque blend using intrinsics instead of assembly. (patchset #1 id:1 of https://codereview.chromium.org/873553003/)
Reason for revert:
Reverted the wrong CL.
Original issue's description:
> Revert of SSE4 opaque blend using intrinsics instead of assembly. (patchset #16 id:300001 of https://codereview.chromium.org/874863002/)
>
> Reason for revert:
> This causes a bug on the 'hittestpath' GM on MacMini 4,1
>
> See:
>
> https://gold.skia.org/#/triage/hittestpath?head=0
>
> for details.
>
> Original issue's description:
> > SSE4 opaque blend using intrinsics instead of assembly.
> >
> > Since we had such a hard time with the assembly versions of this blit (to the
> > point that we have them completely disabled everywhere), I thought I'd take
> > a shot at writing a version of the blit using intrinsics.
> >
> > The key feature of SSE4 we're exploiting is that we can use ptest (_mm_test*)
> > to skip the blend when the 16 src pixels we consider each loop are all opaque
> > or all transparent. _mm_shuffle_epi8 from SSSE3 also lends a hand to extract
> > all those alphas.
> >
> > It's worth looking to see if we can backport this type of logic to SSE2 using
> > _mm_movemask_epi8, or up to 32 pixels at a time using AVX.
> >
> > My local performance testing doesn't show this to be an unambiguous win
> > (there are probably microbenchmarks and SKPs where we'd be better off just
> > powering through the blend rather than looking at alphas), but the potential
> > does seem tantalizing enough to let skiaperf vet it on the bots. (< 1.0x is a win.)
> >
> > DM says it draws pixel perfect compare to the old code.
> >
> > Microbenchmarks:
> > bitmap_RGBA_8888_A_source_stripes_two 14us -> 14.4us 1.03x
> > bitmap_RGBA_8888_A_source_stripes_three 14.3us -> 14.5us 1.01x
> > bitmap_RGBA_8888_scale_bilerp 61.9us -> 62.2us 1.01x
> > bitmap_RGBA_8888_update_volatile_scale_rotate_bilerp 102us -> 101us 0.99x
> > bitmap_RGBA_8888_scale_rotate_bilerp 103us -> 101us 0.99x
> > bitmap_RGBA_8888_scale 18.4us -> 18.2us 0.99x
> > bitmap_RGBA_8888_A_scale_rotate_bicubic 71us -> 70us 0.99x
> > bitmap_RGBA_8888_update_scale_rotate_bilerp 103us -> 101us 0.99x
> > bitmap_RGBA_8888_A_scale_rotate_bilerp 112us -> 109us 0.98x
> > bitmap_RGBA_8888_update_volatile 5.72us -> 5.58us 0.98x
> > bitmap_RGBA_8888 5.73us -> 5.58us 0.97x
> > bitmap_RGBA_8888_update 5.78us -> 5.6us 0.97x
> > bitmap_RGBA_8888_A_scale_bilerp 70.7us -> 68us 0.96x
> > bitmap_RGBA_8888_A_scale_bicubic 23.7us -> 21.8us 0.92x
> > bitmap_RGBA_8888_A 13.9us -> 10.9us 0.78x
> > bitmap_RGBA_8888_A_source_opaque 14us -> 6.29us 0.45x
> > bitmap_RGBA_8888_A_source_transparent 14us -> 3.65us 0.26x
> >
> > Running over our ~70 SKP web page captures, this looks like we spend 0.7x
> > the time in S32A_Opaque_BlitRow compared to the SSE2 version, which should
> > be a decent predictor of real-world impact.
> >
> > BUG=chromium:399842
> >
> > Committed: https://skia.googlesource.com/skia/+/04bc91b972417038fecfa87c484771eac2b9b785
> >
> > CQ_EXTRA_TRYBOTS=client.skia:Test-Mac10.6-MacMini4.1-GeForce320M-x86_64-Release-Trybot
> >
> > Committed: https://skia.googlesource.com/skia/+/6dbfb21a6c88af6d94e8c823c3ad559f1a41b493
>
> TBR=henrik.smiding@intel.com,mtklein@google.com,herb@google.com,reed@google.com,thakis@chromium.org,mtklein@chromium.org
> NOPRESUBMIT=true
> NOTREECHECKS=true
> NOTRY=true
> BUG=chromium:399842
>
> Committed: https://skia.googlesource.com/skia/+/4988891a1173cd405bf1c1dd3a3668c451f45e4c
TBR=henrik.smiding@intel.com,mtklein@google.com,herb@google.com,reed@google.com,thakis@chromium.org,mtklein@chromium.org
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
BUG=chromium:399842
Review URL: https://codereview.chromium.org/894083002
2015-02-02 18:02:48 +00:00
|
|
|
'<(skia_src_path)/opts/SkBlitRow_opts_SSE4.cpp',
|
2015-07-30 14:30:16 +00:00
|
|
|
'<(skia_src_path)/opts/SkOpts_sse41.cpp',
|
2015-01-27 02:55:58 +00:00
|
|
|
],
|
2015-08-31 21:21:08 +00:00
|
|
|
# These targets are empty, but XCode doesn't like that, so add an empty file to each.
|
2015-08-24 17:32:02 +00:00
|
|
|
'sse42_sources': [
|
2015-08-31 21:21:08 +00:00
|
|
|
'<(skia_src_path)/core/SkForceCPlusPlusLinking.cpp',
|
2015-08-24 17:32:02 +00:00
|
|
|
],
|
|
|
|
'avx_sources': [
|
2016-02-08 23:50:22 +00:00
|
|
|
'<(skia_src_path)/core/SkForceCPlusPlusLinking.cpp',
|
2015-08-24 17:32:02 +00:00
|
|
|
],
|
|
|
|
'avx2_sources': [
|
Revert of AVX 2 SrcOver blits: color32, blitmask. (patchset #24 id:450001 of https://codereview.chromium.org/1532613002/ )
Reason for revert:
Bot failures
Original issue's description:
> AVX 2 SrcOver blits: color32, blitmask.
>
> As a follow up to the SSE 4.1 CL, this should look pretty familiar.
>
> I've made some organizational changes around how we load, store, pack, and unpack data that I think makes things clearer and more orthogonal, and it'll make it easier to try out a pmaddubsw lerp. I have backported these changes to the SSE 4.1 code, and I hope that I can actually get a lot of this code templated for sharing between the two later.
>
> Perf changes (relative to SSE 4.1):
> Xfermode_SrcOver: 1650 -> 1180 (0.71x) // large opaque blit
> Xfermode_SrcOver_aa: 1794 -> 1653 (0.92x) // large opaque + small transparent
> text_16_AA_{FF,BK,WT}: 1.72 -> 1.59 (0.92x) // small opaque blit
> text_16_AA_88: 1.83 -> 1.77 (0.97x) // small transparent blit
>
> This should be a big throughout win, and a small latency win.
> This should all be pixel-exact to the previous SSE 4.1 code.
>
>
> GOLD_TRYBOT_URL= https://gold.skia.org/search2?unt=true&query=source_type%3Dgm&master=false&issue=1532613002
> CQ_EXTRA_TRYBOTS=client.skia:Test-Ubuntu-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD-Trybot;client.skia.compile:Build-Ubuntu-GCC-x86_64-Release-CMake-Trybot,Build-Mac10.9-Clang-x86_64-Release-CMake-Trybot
>
> Committed: https://skia.googlesource.com/skia/+/5d2117015eb271e09faf4a7ddd89093c9d618a36
TBR=herb@google.com,mtklein@google.com,mtklein@chromium.org
# Skipping CQ checks because original CL landed less than 1 days ago.
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
Review URL: https://codereview.chromium.org/1632713002
2016-01-25 16:54:50 +00:00
|
|
|
'<(skia_src_path)/core/SkForceCPlusPlusLinking.cpp',
|
2015-08-24 17:32:02 +00:00
|
|
|
],
|
2015-01-27 02:55:58 +00:00
|
|
|
}
|