clean up SkBitmapProcState::platformProcs()
Cq-Include-Trybots: luci.chromium.try:linux-blink-rel Change-Id: Ief41c0442ce37ba0350d070a66a103095c07083c Reviewed-on: https://skia-review.googlesource.com/c/172420 Reviewed-by: Mike Klein <mtklein@google.com> Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
parent
f9910ea258
commit
f8dfe4cbbf
22
gn/opts.gni
22
gn/opts.gni
@ -6,27 +6,13 @@
|
||||
# Things are easiest for everyone if these source paths are absolute.
|
||||
_src = get_path_info("../src", "abspath")
|
||||
|
||||
none = [
|
||||
"$_src/opts/Sk4px_none.h",
|
||||
"$_src/opts/SkBitmapProcState_opts_none.cpp",
|
||||
]
|
||||
|
||||
armv7 = [ "$_src/opts/SkBitmapProcState_opts_none.cpp" ]
|
||||
|
||||
none = []
|
||||
armv7 = []
|
||||
neon = []
|
||||
|
||||
arm64 = [ "$_src/opts/SkBitmapProcState_opts_none.cpp" ]
|
||||
arm64 = []
|
||||
sse2 = []
|
||||
|
||||
crc32 = [ "$_src/opts/SkOpts_crc32.cpp" ]
|
||||
|
||||
sse2 = [
|
||||
"$_src/opts/Sk4px_SSE2.h",
|
||||
"$_src/opts/SkBitmapProcState_opts_SSE2.h",
|
||||
"$_src/opts/SkBitmapProcState_opts_SSE2.cpp",
|
||||
"$_src/opts/SkColor_opts_SSE2.h",
|
||||
"$_src/opts/opts_check_x86.cpp",
|
||||
]
|
||||
|
||||
ssse3 = [ "$_src/opts/SkOpts_ssse3.cpp" ]
|
||||
sse41 = [ "$_src/opts/SkOpts_sse41.cpp" ]
|
||||
sse42 = [ "$_src/opts/SkOpts_sse42.cpp" ]
|
||||
|
@ -261,10 +261,6 @@ bool SkBitmapProcState::chooseProcs() {
|
||||
bool translate_only = (fInvMatrix.getType() & ~SkMatrix::kTranslate_Mask) == 0;
|
||||
fMatrixProc = this->chooseMatrixProc(translate_only);
|
||||
SkASSERT(fMatrixProc);
|
||||
#if defined(SK_LEGACY_PLATFORM_MATRIX_PROCS)
|
||||
// Look for platform specializations (only fMatrixProc anymore).
|
||||
this->platformProcs();
|
||||
#endif
|
||||
|
||||
if (fFilterQuality > kNone_SkFilterQuality) {
|
||||
fSampleProc32 = SkOpts::S32_alpha_D32_filter_DX;
|
||||
|
@ -86,19 +86,6 @@ struct SkBitmapProcState : public SkBitmapProcInfo {
|
||||
SkPMColor fPaintPMColor; // chooseProcs - A8 config
|
||||
uint16_t fAlphaScale; // chooseProcs
|
||||
|
||||
/** Platforms implement this, and can optionally overwrite only the
|
||||
following fields:
|
||||
|
||||
fShaderProc32
|
||||
fMatrixProc
|
||||
fSampleProc32
|
||||
|
||||
They will already have valid function pointers, so a platform that does
|
||||
not have an accelerated version can just leave that field as is. A valid
|
||||
implementation can do nothing (see SkBitmapProcState_opts_none.cpp)
|
||||
*/
|
||||
void platformProcs();
|
||||
|
||||
/** Given the byte size of the index buffer to be passed to the matrix proc,
|
||||
return the maximum number of resulting pixels that can be computed
|
||||
(i.e. the number of SkPMColor values to be written by the sample proc).
|
||||
@ -164,16 +151,6 @@ private:
|
||||
#define pack_two_shorts(pri, sec) PACK_TWO_SHORTS(pri, sec)
|
||||
#endif
|
||||
|
||||
// These functions are generated via macros, but are exposed here so that
|
||||
// platformProcs may test for them by name.
|
||||
void S32_alpha_D32_filter_DX(const SkBitmapProcState& s,
|
||||
const uint32_t xy[], int count, SkPMColor colors[]);
|
||||
|
||||
void ClampX_ClampY_filter_scale(const SkBitmapProcState& s, uint32_t xy[],
|
||||
int count, int x, int y);
|
||||
void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
|
||||
int count, int x, int y);
|
||||
|
||||
// Helper class for mapping the middle of pixel (x, y) into SkFractionalInt bitmap space.
|
||||
// Discussion:
|
||||
// Overall, this code takes a point in destination space, and uses the center of the pixel
|
||||
|
@ -10,13 +10,8 @@
|
||||
|
||||
#define SCALE_FILTER_NAME MAKENAME(_filter_scale)
|
||||
|
||||
// declare functions externally to suppress warnings.
|
||||
void SCALE_FILTER_NAME(const SkBitmapProcState& s,
|
||||
uint32_t xy[], int count, int x, int y);
|
||||
|
||||
|
||||
void SCALE_FILTER_NAME(const SkBitmapProcState& s,
|
||||
uint32_t xy[], int count, int x, int y) {
|
||||
static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
|
||||
uint32_t xy[], int count, int x, int y) {
|
||||
SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
|
||||
SkMatrix::kScale_Mask)) == 0);
|
||||
SkASSERT(s.fInvKy == 0);
|
||||
|
@ -313,15 +313,8 @@ static void nofilter_scale(const SkBitmapProcState& s,
|
||||
#define CHECK_FOR_DECAL
|
||||
#include "SkBitmapProcState_matrix.h" // will create ClampX_ClampY_filter_scale.
|
||||
|
||||
// This and ClampX_ClampY_filter_scale() are both extern for now so that opts_check_x86.cpp
|
||||
// can identify and replace them. TODO: clean up when opts_check_x86.cpp is gone.
|
||||
void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s,
|
||||
uint32_t xy[], int count, int x, int y) {
|
||||
nofilter_scale<clamp, true>(s, xy, count, x,y);
|
||||
}
|
||||
|
||||
static const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
|
||||
ClampX_ClampY_nofilter_scale,
|
||||
nofilter_scale<clamp, true>,
|
||||
ClampX_ClampY_filter_scale,
|
||||
};
|
||||
|
||||
|
@ -1,284 +0,0 @@
|
||||
/*
|
||||
* Copyright 2009 The Android Open Source Project
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "SkBitmapProcState_opts_SSE2.h"
|
||||
#include "SkBitmapProcState_utils.h"
|
||||
#include "SkColorData.h"
|
||||
#include "SkTo.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
// Temporarily go into 64bit so we don't overflow during the add. Since we shift down by 16
|
||||
// in the end, the result should always fit back in 32bits.
|
||||
static inline int32_t safe_fixed_add_shift(SkFixed a, SkFixed b) {
|
||||
int64_t tmp = a;
|
||||
return SkToS32((tmp + b) >> 16);
|
||||
}
|
||||
|
||||
static inline uint32_t ClampX_ClampY_pack_filter(SkFixed f, unsigned max,
|
||||
SkFixed one) {
|
||||
unsigned i = SkClampMax(f >> 16, max);
|
||||
i = (i << 4) | ((f >> 12) & 0xF);
|
||||
return (i << 14) | SkClampMax(safe_fixed_add_shift(f, one), max);
|
||||
}
|
||||
|
||||
/* SSE version of ClampX_ClampY_filter_scale()
|
||||
* portable version is in core/SkBitmapProcState_matrix.h
|
||||
*/
|
||||
void ClampX_ClampY_filter_scale_SSE2(const SkBitmapProcState& s, uint32_t xy[],
|
||||
int count, int x, int y) {
|
||||
SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
|
||||
SkMatrix::kScale_Mask)) == 0);
|
||||
SkASSERT(s.fInvKy == 0);
|
||||
|
||||
const unsigned maxX = s.fPixmap.width() - 1;
|
||||
const SkFixed one = s.fFilterOneX;
|
||||
const SkFixed dx = s.fInvSx;
|
||||
|
||||
const SkBitmapProcStateAutoMapper mapper(s, x, y);
|
||||
const SkFixed fy = mapper.fixedY();
|
||||
const unsigned maxY = s.fPixmap.height() - 1;
|
||||
// compute our two Y values up front
|
||||
*xy++ = ClampX_ClampY_pack_filter(fy, maxY, s.fFilterOneY);
|
||||
// now initialize fx
|
||||
SkFixed fx = mapper.fixedX();
|
||||
|
||||
// test if we don't need to apply the tile proc
|
||||
if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
|
||||
if (count >= 4) {
|
||||
// SSE version of decal_filter_scale
|
||||
while ((size_t(xy) & 0x0F) != 0) {
|
||||
SkASSERT((fx >> (16 + 14)) == 0);
|
||||
*xy++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
|
||||
fx += dx;
|
||||
count--;
|
||||
}
|
||||
|
||||
__m128i wide_1 = _mm_set1_epi32(1);
|
||||
__m128i wide_dx4 = _mm_set1_epi32(dx * 4);
|
||||
__m128i wide_fx = _mm_set_epi32(fx + dx * 3, fx + dx * 2,
|
||||
fx + dx, fx);
|
||||
|
||||
while (count >= 4) {
|
||||
__m128i wide_out;
|
||||
|
||||
wide_out = _mm_slli_epi32(_mm_srai_epi32(wide_fx, 12), 14);
|
||||
wide_out = _mm_or_si128(wide_out, _mm_add_epi32(
|
||||
_mm_srai_epi32(wide_fx, 16), wide_1));
|
||||
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(xy), wide_out);
|
||||
|
||||
xy += 4;
|
||||
fx += dx * 4;
|
||||
wide_fx = _mm_add_epi32(wide_fx, wide_dx4);
|
||||
count -= 4;
|
||||
} // while count >= 4
|
||||
} // if count >= 4
|
||||
|
||||
while (count-- > 0) {
|
||||
SkASSERT((fx >> (16 + 14)) == 0);
|
||||
*xy++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
|
||||
fx += dx;
|
||||
}
|
||||
} else {
|
||||
// SSE2 only support 16bit interger max & min, so only process the case
|
||||
// maxX less than the max 16bit interger. Actually maxX is the bitmap's
|
||||
// height, there should be rare bitmap whose height will be greater
|
||||
// than max 16bit interger in the real world.
|
||||
if ((count >= 4) && (maxX <= 0xFFFF)) {
|
||||
while (((size_t)xy & 0x0F) != 0) {
|
||||
*xy++ = ClampX_ClampY_pack_filter(fx, maxX, one);
|
||||
fx += dx;
|
||||
count--;
|
||||
}
|
||||
|
||||
__m128i wide_fx = _mm_set_epi32(fx + dx * 3, fx + dx * 2,
|
||||
fx + dx, fx);
|
||||
__m128i wide_dx4 = _mm_set1_epi32(dx * 4);
|
||||
__m128i wide_one = _mm_set1_epi32(one);
|
||||
__m128i wide_maxX = _mm_set1_epi32(maxX);
|
||||
__m128i wide_mask = _mm_set1_epi32(0xF);
|
||||
|
||||
while (count >= 4) {
|
||||
__m128i wide_i;
|
||||
__m128i wide_lo;
|
||||
__m128i wide_fx1;
|
||||
|
||||
// i = SkClampMax(f>>16,maxX)
|
||||
wide_i = _mm_max_epi16(_mm_srli_epi32(wide_fx, 16),
|
||||
_mm_setzero_si128());
|
||||
wide_i = _mm_min_epi16(wide_i, wide_maxX);
|
||||
|
||||
// i<<4 | EXTRACT_LOW_BITS(fx)
|
||||
wide_lo = _mm_srli_epi32(wide_fx, 12);
|
||||
wide_lo = _mm_and_si128(wide_lo, wide_mask);
|
||||
wide_i = _mm_slli_epi32(wide_i, 4);
|
||||
wide_i = _mm_or_si128(wide_i, wide_lo);
|
||||
|
||||
// i<<14
|
||||
wide_i = _mm_slli_epi32(wide_i, 14);
|
||||
|
||||
// SkClampMax(((f+one))>>16,max)
|
||||
wide_fx1 = _mm_add_epi32(wide_fx, wide_one);
|
||||
wide_fx1 = _mm_max_epi16(_mm_srli_epi32(wide_fx1, 16),
|
||||
_mm_setzero_si128());
|
||||
wide_fx1 = _mm_min_epi16(wide_fx1, wide_maxX);
|
||||
|
||||
// final combination
|
||||
wide_i = _mm_or_si128(wide_i, wide_fx1);
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(xy), wide_i);
|
||||
|
||||
wide_fx = _mm_add_epi32(wide_fx, wide_dx4);
|
||||
fx += dx * 4;
|
||||
xy += 4;
|
||||
count -= 4;
|
||||
} // while count >= 4
|
||||
} // if count >= 4
|
||||
|
||||
/*
|
||||
while (count-- > 0) {
|
||||
*xy++ = ClampX_ClampY_pack_filter(fx, maxX, one);
|
||||
fx += dx;
|
||||
}
|
||||
We'd like to write this as above, but that form allows fx to get 1-iteration too big/small
|
||||
when count is 0, and this can trigger a UBSAN error, even though we won't in fact use that
|
||||
last (undefined) value for fx.
|
||||
|
||||
Here is an alternative that should always be efficient, but seems much harder to read:
|
||||
|
||||
if (count > 0) {
|
||||
for (;;) {
|
||||
*xy++ = ClampX_ClampY_pack_filter(fx, maxX, one);
|
||||
if (--count == 0) break;
|
||||
fx += dx;
|
||||
}
|
||||
}
|
||||
|
||||
For now, we'll try this variant: more compact than the if/for version, and we hope the
|
||||
compiler will get rid of the integer multiply.
|
||||
*/
|
||||
for (int i = 0; i < count; ++i) {
|
||||
*xy++ = ClampX_ClampY_pack_filter(fx + i*dx, maxX, one);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* SSE version of ClampX_ClampY_nofilter_scale()
|
||||
* portable version is in core/SkBitmapProcState_matrix.h
|
||||
*/
|
||||
void ClampX_ClampY_nofilter_scale_SSE2(const SkBitmapProcState& s,
|
||||
uint32_t xy[], int count, int x, int y) {
|
||||
SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
|
||||
SkMatrix::kScale_Mask)) == 0);
|
||||
|
||||
// we store y, x, x, x, x, x
|
||||
const unsigned maxX = s.fPixmap.width() - 1;
|
||||
const SkBitmapProcStateAutoMapper mapper(s, x, y);
|
||||
const unsigned maxY = s.fPixmap.height() - 1;
|
||||
*xy++ = SkClampMax(mapper.intY(), maxY);
|
||||
SkFixed fx = mapper.fixedX();
|
||||
|
||||
if (0 == maxX) {
|
||||
// all of the following X values must be 0
|
||||
memset(xy, 0, count * sizeof(uint16_t));
|
||||
return;
|
||||
}
|
||||
|
||||
const SkFixed dx = s.fInvSx;
|
||||
|
||||
// test if we don't need to apply the tile proc
|
||||
if ((unsigned)(fx >> 16) <= maxX &&
|
||||
(unsigned)((fx + dx * (count - 1)) >> 16) <= maxX) {
|
||||
// SSE version of decal_nofilter_scale
|
||||
if (count >= 8) {
|
||||
while (((size_t)xy & 0x0F) != 0) {
|
||||
*xy++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
|
||||
fx += 2 * dx;
|
||||
count -= 2;
|
||||
}
|
||||
|
||||
__m128i wide_dx4 = _mm_set1_epi32(dx * 4);
|
||||
__m128i wide_dx8 = _mm_add_epi32(wide_dx4, wide_dx4);
|
||||
|
||||
__m128i wide_low = _mm_set_epi32(fx + dx * 3, fx + dx * 2,
|
||||
fx + dx, fx);
|
||||
__m128i wide_high = _mm_add_epi32(wide_low, wide_dx4);
|
||||
|
||||
while (count >= 8) {
|
||||
__m128i wide_out_low = _mm_srli_epi32(wide_low, 16);
|
||||
__m128i wide_out_high = _mm_srli_epi32(wide_high, 16);
|
||||
|
||||
__m128i wide_result = _mm_packs_epi32(wide_out_low,
|
||||
wide_out_high);
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(xy), wide_result);
|
||||
|
||||
wide_low = _mm_add_epi32(wide_low, wide_dx8);
|
||||
wide_high = _mm_add_epi32(wide_high, wide_dx8);
|
||||
|
||||
xy += 4;
|
||||
fx += dx * 8;
|
||||
count -= 8;
|
||||
}
|
||||
} // if count >= 8
|
||||
|
||||
uint16_t* xx = reinterpret_cast<uint16_t*>(xy);
|
||||
while (count-- > 0) {
|
||||
*xx++ = SkToU16(fx >> 16);
|
||||
fx += dx;
|
||||
}
|
||||
} else {
|
||||
// SSE2 only support 16bit interger max & min, so only process the case
|
||||
// maxX less than the max 16bit interger. Actually maxX is the bitmap's
|
||||
// height, there should be rare bitmap whose height will be greater
|
||||
// than max 16bit interger in the real world.
|
||||
if ((count >= 8) && (maxX <= 0xFFFF)) {
|
||||
while (((size_t)xy & 0x0F) != 0) {
|
||||
*xy++ = pack_two_shorts(SkClampMax((fx + dx) >> 16, maxX),
|
||||
SkClampMax(fx >> 16, maxX));
|
||||
fx += 2 * dx;
|
||||
count -= 2;
|
||||
}
|
||||
|
||||
__m128i wide_dx4 = _mm_set1_epi32(dx * 4);
|
||||
__m128i wide_dx8 = _mm_add_epi32(wide_dx4, wide_dx4);
|
||||
|
||||
__m128i wide_low = _mm_set_epi32(fx + dx * 3, fx + dx * 2,
|
||||
fx + dx, fx);
|
||||
__m128i wide_high = _mm_add_epi32(wide_low, wide_dx4);
|
||||
__m128i wide_maxX = _mm_set1_epi32(maxX);
|
||||
|
||||
while (count >= 8) {
|
||||
__m128i wide_out_low = _mm_srli_epi32(wide_low, 16);
|
||||
__m128i wide_out_high = _mm_srli_epi32(wide_high, 16);
|
||||
|
||||
wide_out_low = _mm_max_epi16(wide_out_low,
|
||||
_mm_setzero_si128());
|
||||
wide_out_low = _mm_min_epi16(wide_out_low, wide_maxX);
|
||||
wide_out_high = _mm_max_epi16(wide_out_high,
|
||||
_mm_setzero_si128());
|
||||
wide_out_high = _mm_min_epi16(wide_out_high, wide_maxX);
|
||||
|
||||
__m128i wide_result = _mm_packs_epi32(wide_out_low,
|
||||
wide_out_high);
|
||||
_mm_store_si128(reinterpret_cast<__m128i*>(xy), wide_result);
|
||||
|
||||
wide_low = _mm_add_epi32(wide_low, wide_dx8);
|
||||
wide_high = _mm_add_epi32(wide_high, wide_dx8);
|
||||
|
||||
xy += 4;
|
||||
fx += dx * 8;
|
||||
count -= 8;
|
||||
}
|
||||
} // if count >= 8
|
||||
|
||||
uint16_t* xx = reinterpret_cast<uint16_t*>(xy);
|
||||
while (count-- > 0) {
|
||||
*xx++ = SkClampMax(fx >> 16, maxX);
|
||||
fx += dx;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
* Copyright 2009 The Android Open Source Project
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SkBitmapProcState_opts_SSE2_DEFINED
|
||||
#define SkBitmapProcState_opts_SSE2_DEFINED
|
||||
|
||||
#include "SkBitmapProcState.h"
|
||||
|
||||
void ClampX_ClampY_filter_scale_SSE2(const SkBitmapProcState& s, uint32_t xy[],
|
||||
int count, int x, int y);
|
||||
void ClampX_ClampY_nofilter_scale_SSE2(const SkBitmapProcState& s,
|
||||
uint32_t xy[], int count, int x, int y);
|
||||
|
||||
#endif
|
@ -1,10 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "SkBitmapProcState.h"
|
||||
|
||||
void SkBitmapProcState::platformProcs() {}
|
@ -1,33 +0,0 @@
|
||||
/*
|
||||
* Copyright 2009 The Android Open Source Project
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "SkBitmapProcState_opts_SSE2.h"
|
||||
#include "SkCpu.h"
|
||||
|
||||
/*
|
||||
*****************************************
|
||||
*********This file is deprecated*********
|
||||
*****************************************
|
||||
* New CPU-specific work should be done in
|
||||
* SkOpts framework. Run-time detection of
|
||||
* available instruction set extensions is
|
||||
* implemented in src/core/SkOpts.cpp file
|
||||
*****************************************
|
||||
*/
|
||||
|
||||
void SkBitmapProcState::platformProcs() {
|
||||
if (!SkCpu::Supports(SkCpu::SSE2)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (fMatrixProc == ClampX_ClampY_filter_scale) {
|
||||
fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
|
||||
}
|
||||
if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
|
||||
fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user