From bfe4db5c79cab60d18734a20bead4eae245a9264 Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Fri, 8 Jun 2018 15:58:18 -0400 Subject: [PATCH] remove unimportant bench Change-Id: I98d0a2295c4853e0c07d84e781c8b0236561d307 Reviewed-on: https://skia-review.googlesource.com/133584 Reviewed-by: Mike Klein Commit-Queue: Mike Klein --- bench/pack_int_uint16_t_Bench.cpp | 93 ------------------------------- gn/bench.gni | 1 - 2 files changed, 94 deletions(-) delete mode 100644 bench/pack_int_uint16_t_Bench.cpp diff --git a/bench/pack_int_uint16_t_Bench.cpp b/bench/pack_int_uint16_t_Bench.cpp deleted file mode 100644 index 5e1527d7ca..0000000000 --- a/bench/pack_int_uint16_t_Bench.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2016 Google Inc. - * - * Use of this source code is governed by a BSD-style license that can be - * found in the LICENSE file. - */ - -#include "Benchmark.h" -#include "SkTypes.h" - -/** - * There's a good variety of ways to pack from int down to uint16_t with SSE, - * depending on the specific instructions available. - * - * SSE2 offers an int -> int16_t pack instruction. We can use this in two ways: - * - subtract off 32768, int -> int16_t, add 32768 back (sse2_a) - * - first artificially sign extend the (positive) value in our int, then int -> int16_t (sse2_b) - * SSSE3 adds a byte shuffle, so we just put the bytes where we want them. (ssse3) - * SSE41 added an int -> uint16_t pack instruction. (sse41) - * - * Findings so far: - * - sse41 < ssse3 <<< sse2_b < sse2_a; - * - the ssse3 version is only slightly slower than the sse41 version, maybe not at all - * - the sse2_a is only slightly slower than the sse2_b version - * - the ssse3 and sse41 versions are about 3x faster than either sse2 version - * - the sse41 version seems to cause some code generation trouble. - */ - -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 - -#include - -template <__m128i (kernel)(__m128i)> -class pack_int_uint16_t_Bench : public Benchmark { -public: - pack_int_uint16_t_Bench(const char* impl) { - fName.append("pack_int_uint16_t_"); - fName.append(impl); - } - - bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; } - const char* onGetName() override { return fName.c_str(); } - - void onDraw(int loops, SkCanvas*) override { - __m128i x = _mm_set1_epi32(0x42424242); - while (loops --> 0) { - x = kernel(x); - } - - volatile int blackhole = 0; - blackhole ^= _mm_cvtsi128_si32(x); - } - - SkString fName; -}; - -namespace { - __m128i sse2_a(__m128i x) { - x = _mm_sub_epi32(x, _mm_set1_epi32(0x8000)); - return _mm_add_epi16(_mm_packs_epi32(x,x), _mm_set1_epi16((short)0x8000)); - } -} -DEF_BENCH( return new pack_int_uint16_t_Bench("sse2_a"); ) - -namespace { - __m128i sse2_b(__m128i x) { - x = _mm_srai_epi32(_mm_slli_epi32(x, 16), 16); - return _mm_packs_epi32(x,x); - } -} -DEF_BENCH( return new pack_int_uint16_t_Bench("sse2_b"); ) - -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 -namespace { - __m128i ssse3(__m128i x) { - // TODO: Can we force the bench to load the mask inside the loop? Would be more realistic. - const int _ = ~0; - return _mm_shuffle_epi8(x, _mm_setr_epi8(0,1, 4,5, 8,9, 12,13, _,_,_,_,_,_,_,_)); - } -} -DEF_BENCH( return new pack_int_uint16_t_Bench("ssse3"); ) -#endif - -#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41 -namespace { - __m128i sse41(__m128i x) { - return _mm_packus_epi32(x,x); - } -} -DEF_BENCH( return new pack_int_uint16_t_Bench("sse41"); ) -#endif - -#endif // SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 diff --git a/gn/bench.gni b/gn/bench.gni index e5880604b8..e73a973ccb 100644 --- a/gn/bench.gni +++ b/gn/bench.gni @@ -76,7 +76,6 @@ bench_sources = [ "$_bench/MorphologyBench.cpp", "$_bench/MultitextureImageBench.cpp", "$_bench/MutexBench.cpp", - "$_bench/pack_int_uint16_t_Bench.cpp", "$_bench/PatchBench.cpp", "$_bench/PathBench.cpp", "$_bench/PathIterBench.cpp",