clean up SkBitmapProcState::platformProcs()

Cq-Include-Trybots: luci.chromium.try:linux-blink-rel
Change-Id: Ief41c0442ce37ba0350d070a66a103095c07083c
Reviewed-on: https://skia-review.googlesource.com/c/172420
Reviewed-by: Mike Klein <mtklein@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2018-11-21 10:47:25 -05:00 committed by Skia Commit-Bot
parent f9910ea258
commit f8dfe4cbbf
9 changed files with 7 additions and 405 deletions

View File

@ -6,27 +6,13 @@
# Things are easiest for everyone if these source paths are absolute.
_src = get_path_info("../src", "abspath")
none = [
"$_src/opts/Sk4px_none.h",
"$_src/opts/SkBitmapProcState_opts_none.cpp",
]
armv7 = [ "$_src/opts/SkBitmapProcState_opts_none.cpp" ]
none = []
armv7 = []
neon = []
arm64 = [ "$_src/opts/SkBitmapProcState_opts_none.cpp" ]
arm64 = []
sse2 = []
crc32 = [ "$_src/opts/SkOpts_crc32.cpp" ]
sse2 = [
"$_src/opts/Sk4px_SSE2.h",
"$_src/opts/SkBitmapProcState_opts_SSE2.h",
"$_src/opts/SkBitmapProcState_opts_SSE2.cpp",
"$_src/opts/SkColor_opts_SSE2.h",
"$_src/opts/opts_check_x86.cpp",
]
ssse3 = [ "$_src/opts/SkOpts_ssse3.cpp" ]
sse41 = [ "$_src/opts/SkOpts_sse41.cpp" ]
sse42 = [ "$_src/opts/SkOpts_sse42.cpp" ]

View File

@ -261,10 +261,6 @@ bool SkBitmapProcState::chooseProcs() {
bool translate_only = (fInvMatrix.getType() & ~SkMatrix::kTranslate_Mask) == 0;
fMatrixProc = this->chooseMatrixProc(translate_only);
SkASSERT(fMatrixProc);
#if defined(SK_LEGACY_PLATFORM_MATRIX_PROCS)
// Look for platform specializations (only fMatrixProc anymore).
this->platformProcs();
#endif
if (fFilterQuality > kNone_SkFilterQuality) {
fSampleProc32 = SkOpts::S32_alpha_D32_filter_DX;

View File

@ -86,19 +86,6 @@ struct SkBitmapProcState : public SkBitmapProcInfo {
SkPMColor fPaintPMColor; // chooseProcs - A8 config
uint16_t fAlphaScale; // chooseProcs
/** Platforms implement this, and can optionally overwrite only the
following fields:
fShaderProc32
fMatrixProc
fSampleProc32
They will already have valid function pointers, so a platform that does
not have an accelerated version can just leave that field as is. A valid
implementation can do nothing (see SkBitmapProcState_opts_none.cpp)
*/
void platformProcs();
/** Given the byte size of the index buffer to be passed to the matrix proc,
return the maximum number of resulting pixels that can be computed
(i.e. the number of SkPMColor values to be written by the sample proc).
@ -164,16 +151,6 @@ private:
#define pack_two_shorts(pri, sec) PACK_TWO_SHORTS(pri, sec)
#endif
// These functions are generated via macros, but are exposed here so that
// platformProcs may test for them by name.
void S32_alpha_D32_filter_DX(const SkBitmapProcState& s,
const uint32_t xy[], int count, SkPMColor colors[]);
void ClampX_ClampY_filter_scale(const SkBitmapProcState& s, uint32_t xy[],
int count, int x, int y);
void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
int count, int x, int y);
// Helper class for mapping the middle of pixel (x, y) into SkFractionalInt bitmap space.
// Discussion:
// Overall, this code takes a point in destination space, and uses the center of the pixel

View File

@ -10,13 +10,8 @@
#define SCALE_FILTER_NAME MAKENAME(_filter_scale)
// declare functions externally to suppress warnings.
void SCALE_FILTER_NAME(const SkBitmapProcState& s,
uint32_t xy[], int count, int x, int y);
void SCALE_FILTER_NAME(const SkBitmapProcState& s,
uint32_t xy[], int count, int x, int y) {
static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
uint32_t xy[], int count, int x, int y) {
SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
SkMatrix::kScale_Mask)) == 0);
SkASSERT(s.fInvKy == 0);

View File

@ -313,15 +313,8 @@ static void nofilter_scale(const SkBitmapProcState& s,
#define CHECK_FOR_DECAL
#include "SkBitmapProcState_matrix.h" // will create ClampX_ClampY_filter_scale.
// This and ClampX_ClampY_filter_scale() are both extern for now so that opts_check_x86.cpp
// can identify and replace them. TODO: clean up when opts_check_x86.cpp is gone.
void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s,
uint32_t xy[], int count, int x, int y) {
nofilter_scale<clamp, true>(s, xy, count, x,y);
}
static const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
ClampX_ClampY_nofilter_scale,
nofilter_scale<clamp, true>,
ClampX_ClampY_filter_scale,
};

View File

@ -1,284 +0,0 @@
/*
* Copyright 2009 The Android Open Source Project
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkBitmapProcState_opts_SSE2.h"
#include "SkBitmapProcState_utils.h"
#include "SkColorData.h"
#include "SkTo.h"
#include <emmintrin.h>
// Temporarily go into 64bit so we don't overflow during the add. Since we shift down by 16
// in the end, the result should always fit back in 32bits.
static inline int32_t safe_fixed_add_shift(SkFixed a, SkFixed b) {
int64_t tmp = a;
return SkToS32((tmp + b) >> 16);
}
static inline uint32_t ClampX_ClampY_pack_filter(SkFixed f, unsigned max,
SkFixed one) {
unsigned i = SkClampMax(f >> 16, max);
i = (i << 4) | ((f >> 12) & 0xF);
return (i << 14) | SkClampMax(safe_fixed_add_shift(f, one), max);
}
/* SSE version of ClampX_ClampY_filter_scale()
* portable version is in core/SkBitmapProcState_matrix.h
*/
void ClampX_ClampY_filter_scale_SSE2(const SkBitmapProcState& s, uint32_t xy[],
int count, int x, int y) {
SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
SkMatrix::kScale_Mask)) == 0);
SkASSERT(s.fInvKy == 0);
const unsigned maxX = s.fPixmap.width() - 1;
const SkFixed one = s.fFilterOneX;
const SkFixed dx = s.fInvSx;
const SkBitmapProcStateAutoMapper mapper(s, x, y);
const SkFixed fy = mapper.fixedY();
const unsigned maxY = s.fPixmap.height() - 1;
// compute our two Y values up front
*xy++ = ClampX_ClampY_pack_filter(fy, maxY, s.fFilterOneY);
// now initialize fx
SkFixed fx = mapper.fixedX();
// test if we don't need to apply the tile proc
if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
if (count >= 4) {
// SSE version of decal_filter_scale
while ((size_t(xy) & 0x0F) != 0) {
SkASSERT((fx >> (16 + 14)) == 0);
*xy++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
fx += dx;
count--;
}
__m128i wide_1 = _mm_set1_epi32(1);
__m128i wide_dx4 = _mm_set1_epi32(dx * 4);
__m128i wide_fx = _mm_set_epi32(fx + dx * 3, fx + dx * 2,
fx + dx, fx);
while (count >= 4) {
__m128i wide_out;
wide_out = _mm_slli_epi32(_mm_srai_epi32(wide_fx, 12), 14);
wide_out = _mm_or_si128(wide_out, _mm_add_epi32(
_mm_srai_epi32(wide_fx, 16), wide_1));
_mm_store_si128(reinterpret_cast<__m128i*>(xy), wide_out);
xy += 4;
fx += dx * 4;
wide_fx = _mm_add_epi32(wide_fx, wide_dx4);
count -= 4;
} // while count >= 4
} // if count >= 4
while (count-- > 0) {
SkASSERT((fx >> (16 + 14)) == 0);
*xy++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
fx += dx;
}
} else {
// SSE2 only support 16bit interger max & min, so only process the case
// maxX less than the max 16bit interger. Actually maxX is the bitmap's
// height, there should be rare bitmap whose height will be greater
// than max 16bit interger in the real world.
if ((count >= 4) && (maxX <= 0xFFFF)) {
while (((size_t)xy & 0x0F) != 0) {
*xy++ = ClampX_ClampY_pack_filter(fx, maxX, one);
fx += dx;
count--;
}
__m128i wide_fx = _mm_set_epi32(fx + dx * 3, fx + dx * 2,
fx + dx, fx);
__m128i wide_dx4 = _mm_set1_epi32(dx * 4);
__m128i wide_one = _mm_set1_epi32(one);
__m128i wide_maxX = _mm_set1_epi32(maxX);
__m128i wide_mask = _mm_set1_epi32(0xF);
while (count >= 4) {
__m128i wide_i;
__m128i wide_lo;
__m128i wide_fx1;
// i = SkClampMax(f>>16,maxX)
wide_i = _mm_max_epi16(_mm_srli_epi32(wide_fx, 16),
_mm_setzero_si128());
wide_i = _mm_min_epi16(wide_i, wide_maxX);
// i<<4 | EXTRACT_LOW_BITS(fx)
wide_lo = _mm_srli_epi32(wide_fx, 12);
wide_lo = _mm_and_si128(wide_lo, wide_mask);
wide_i = _mm_slli_epi32(wide_i, 4);
wide_i = _mm_or_si128(wide_i, wide_lo);
// i<<14
wide_i = _mm_slli_epi32(wide_i, 14);
// SkClampMax(((f+one))>>16,max)
wide_fx1 = _mm_add_epi32(wide_fx, wide_one);
wide_fx1 = _mm_max_epi16(_mm_srli_epi32(wide_fx1, 16),
_mm_setzero_si128());
wide_fx1 = _mm_min_epi16(wide_fx1, wide_maxX);
// final combination
wide_i = _mm_or_si128(wide_i, wide_fx1);
_mm_store_si128(reinterpret_cast<__m128i*>(xy), wide_i);
wide_fx = _mm_add_epi32(wide_fx, wide_dx4);
fx += dx * 4;
xy += 4;
count -= 4;
} // while count >= 4
} // if count >= 4
/*
while (count-- > 0) {
*xy++ = ClampX_ClampY_pack_filter(fx, maxX, one);
fx += dx;
}
We'd like to write this as above, but that form allows fx to get 1-iteration too big/small
when count is 0, and this can trigger a UBSAN error, even though we won't in fact use that
last (undefined) value for fx.
Here is an alternative that should always be efficient, but seems much harder to read:
if (count > 0) {
for (;;) {
*xy++ = ClampX_ClampY_pack_filter(fx, maxX, one);
if (--count == 0) break;
fx += dx;
}
}
For now, we'll try this variant: more compact than the if/for version, and we hope the
compiler will get rid of the integer multiply.
*/
for (int i = 0; i < count; ++i) {
*xy++ = ClampX_ClampY_pack_filter(fx + i*dx, maxX, one);
}
}
}
/* SSE version of ClampX_ClampY_nofilter_scale()
* portable version is in core/SkBitmapProcState_matrix.h
*/
void ClampX_ClampY_nofilter_scale_SSE2(const SkBitmapProcState& s,
uint32_t xy[], int count, int x, int y) {
SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
SkMatrix::kScale_Mask)) == 0);
// we store y, x, x, x, x, x
const unsigned maxX = s.fPixmap.width() - 1;
const SkBitmapProcStateAutoMapper mapper(s, x, y);
const unsigned maxY = s.fPixmap.height() - 1;
*xy++ = SkClampMax(mapper.intY(), maxY);
SkFixed fx = mapper.fixedX();
if (0 == maxX) {
// all of the following X values must be 0
memset(xy, 0, count * sizeof(uint16_t));
return;
}
const SkFixed dx = s.fInvSx;
// test if we don't need to apply the tile proc
if ((unsigned)(fx >> 16) <= maxX &&
(unsigned)((fx + dx * (count - 1)) >> 16) <= maxX) {
// SSE version of decal_nofilter_scale
if (count >= 8) {
while (((size_t)xy & 0x0F) != 0) {
*xy++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
fx += 2 * dx;
count -= 2;
}
__m128i wide_dx4 = _mm_set1_epi32(dx * 4);
__m128i wide_dx8 = _mm_add_epi32(wide_dx4, wide_dx4);
__m128i wide_low = _mm_set_epi32(fx + dx * 3, fx + dx * 2,
fx + dx, fx);
__m128i wide_high = _mm_add_epi32(wide_low, wide_dx4);
while (count >= 8) {
__m128i wide_out_low = _mm_srli_epi32(wide_low, 16);
__m128i wide_out_high = _mm_srli_epi32(wide_high, 16);
__m128i wide_result = _mm_packs_epi32(wide_out_low,
wide_out_high);
_mm_store_si128(reinterpret_cast<__m128i*>(xy), wide_result);
wide_low = _mm_add_epi32(wide_low, wide_dx8);
wide_high = _mm_add_epi32(wide_high, wide_dx8);
xy += 4;
fx += dx * 8;
count -= 8;
}
} // if count >= 8
uint16_t* xx = reinterpret_cast<uint16_t*>(xy);
while (count-- > 0) {
*xx++ = SkToU16(fx >> 16);
fx += dx;
}
} else {
// SSE2 only support 16bit interger max & min, so only process the case
// maxX less than the max 16bit interger. Actually maxX is the bitmap's
// height, there should be rare bitmap whose height will be greater
// than max 16bit interger in the real world.
if ((count >= 8) && (maxX <= 0xFFFF)) {
while (((size_t)xy & 0x0F) != 0) {
*xy++ = pack_two_shorts(SkClampMax((fx + dx) >> 16, maxX),
SkClampMax(fx >> 16, maxX));
fx += 2 * dx;
count -= 2;
}
__m128i wide_dx4 = _mm_set1_epi32(dx * 4);
__m128i wide_dx8 = _mm_add_epi32(wide_dx4, wide_dx4);
__m128i wide_low = _mm_set_epi32(fx + dx * 3, fx + dx * 2,
fx + dx, fx);
__m128i wide_high = _mm_add_epi32(wide_low, wide_dx4);
__m128i wide_maxX = _mm_set1_epi32(maxX);
while (count >= 8) {
__m128i wide_out_low = _mm_srli_epi32(wide_low, 16);
__m128i wide_out_high = _mm_srli_epi32(wide_high, 16);
wide_out_low = _mm_max_epi16(wide_out_low,
_mm_setzero_si128());
wide_out_low = _mm_min_epi16(wide_out_low, wide_maxX);
wide_out_high = _mm_max_epi16(wide_out_high,
_mm_setzero_si128());
wide_out_high = _mm_min_epi16(wide_out_high, wide_maxX);
__m128i wide_result = _mm_packs_epi32(wide_out_low,
wide_out_high);
_mm_store_si128(reinterpret_cast<__m128i*>(xy), wide_result);
wide_low = _mm_add_epi32(wide_low, wide_dx8);
wide_high = _mm_add_epi32(wide_high, wide_dx8);
xy += 4;
fx += dx * 8;
count -= 8;
}
} // if count >= 8
uint16_t* xx = reinterpret_cast<uint16_t*>(xy);
while (count-- > 0) {
*xx++ = SkClampMax(fx >> 16, maxX);
fx += dx;
}
}
}

View File

@ -1,18 +0,0 @@
/*
* Copyright 2009 The Android Open Source Project
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkBitmapProcState_opts_SSE2_DEFINED
#define SkBitmapProcState_opts_SSE2_DEFINED
#include "SkBitmapProcState.h"
void ClampX_ClampY_filter_scale_SSE2(const SkBitmapProcState& s, uint32_t xy[],
int count, int x, int y);
void ClampX_ClampY_nofilter_scale_SSE2(const SkBitmapProcState& s,
uint32_t xy[], int count, int x, int y);
#endif

View File

@ -1,10 +0,0 @@
/*
* Copyright 2011 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkBitmapProcState.h"
void SkBitmapProcState::platformProcs() {}

View File

@ -1,33 +0,0 @@
/*
* Copyright 2009 The Android Open Source Project
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkBitmapProcState_opts_SSE2.h"
#include "SkCpu.h"
/*
*****************************************
*********This file is deprecated*********
*****************************************
* New CPU-specific work should be done in
* SkOpts framework. Run-time detection of
* available instruction set extensions is
* implemented in src/core/SkOpts.cpp file
*****************************************
*/
void SkBitmapProcState::platformProcs() {
if (!SkCpu::Supports(SkCpu::SSE2)) {
return;
}
if (fMatrixProc == ClampX_ClampY_filter_scale) {
fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
}
if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
}
}