Assume HQ is handled by pipeline, delete legacy code-path

CQ_INCLUDE_TRYBOTS=skia.primary:Test-Debian9-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD
Bug: skia:
Change-Id: If6f0d0a57463bf99a66d674e65a62ce3931d0116
Reviewed-on: https://skia-review.googlesource.com/24644
Commit-Queue: Mike Reed <reed@google.com>
Reviewed-by: Mike Klein <mtklein@chromium.org>
This commit is contained in:
Mike Reed 2017-07-19 17:20:37 -04:00 committed by Skia Commit-Bot
parent 3e583cba8a
commit e32500f064
26 changed files with 25 additions and 2381 deletions

View File

@ -297,22 +297,6 @@ opts("avx") {
} }
} }
opts("hsw") {
enabled = is_x86
sources = skia_opts.hsw_sources
if (is_win) {
cflags = [ "/arch:AVX2" ]
} else {
cflags = [
"-mavx2",
"-mbmi",
"-mbmi2",
"-mf16c",
"-mfma",
]
}
}
# Any feature of Skia that requires third-party code should be optional and use this template. # Any feature of Skia that requires third-party code should be optional and use this template.
template("optional") { template("optional") {
if (invoker.enabled) { if (invoker.enabled) {
@ -668,7 +652,6 @@ component("skia") {
":fontmgr_fontconfig", ":fontmgr_fontconfig",
":fontmgr_fuchsia", ":fontmgr_fuchsia",
":gpu", ":gpu",
":hsw",
":jpeg", ":jpeg",
":none", ":none",
":pdf", ":pdf",

View File

@ -1,161 +0,0 @@
/*
* Copyright 2013 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "Benchmark.h"
#include "SkBlurMask.h"
#include "SkCanvas.h"
#include "SkPaint.h"
#include "SkRandom.h"
#include "SkShader.h"
#include "SkString.h"
class BitmapScaleBench: public Benchmark {
int fLoopCount;
int fInputSize;
int fOutputSize;
SkString fName;
public:
BitmapScaleBench( int is, int os) {
fInputSize = is;
fOutputSize = os;
fLoopCount = 20;
}
protected:
SkBitmap fInputBitmap, fOutputBitmap;
SkMatrix fMatrix;
const char* onGetName() override {
return fName.c_str();
}
int inputSize() const {
return fInputSize;
}
int outputSize() const {
return fOutputSize;
}
float scale() const {
return float(outputSize())/inputSize();
}
SkIPoint onGetSize() override {
return SkIPoint::Make( fOutputSize, fOutputSize );
}
void setName(const char * name) {
fName.printf( "bitmap_scale_%s_%d_%d", name, fInputSize, fOutputSize );
}
void onDelayedSetup() override {
fInputBitmap.allocN32Pixels(fInputSize, fInputSize, true);
fInputBitmap.eraseColor(SK_ColorWHITE);
fOutputBitmap.allocN32Pixels(fOutputSize, fOutputSize, true);
fMatrix.setScale( scale(), scale() );
}
void onDraw(int loops, SkCanvas*) override {
SkPaint paint;
this->setupPaint(&paint);
preBenchSetup();
for (int i = 0; i < loops; i++) {
doScaleImage();
}
}
virtual void doScaleImage() = 0;
virtual void preBenchSetup() {}
private:
typedef Benchmark INHERITED;
};
class BitmapFilterScaleBench: public BitmapScaleBench {
public:
BitmapFilterScaleBench( int is, int os) : INHERITED(is, os) {
setName( "filter" );
}
protected:
void doScaleImage() override {
SkCanvas canvas( fOutputBitmap );
SkPaint paint;
paint.setFilterQuality(kHigh_SkFilterQuality);
fInputBitmap.notifyPixelsChanged();
canvas.concat(fMatrix);
canvas.drawBitmap(fInputBitmap, 0, 0, &paint );
}
private:
typedef BitmapScaleBench INHERITED;
};
DEF_BENCH(return new BitmapFilterScaleBench(10, 90);)
DEF_BENCH(return new BitmapFilterScaleBench(30, 90);)
DEF_BENCH(return new BitmapFilterScaleBench(80, 90);)
DEF_BENCH(return new BitmapFilterScaleBench(90, 90);)
DEF_BENCH(return new BitmapFilterScaleBench(90, 80);)
DEF_BENCH(return new BitmapFilterScaleBench(90, 30);)
DEF_BENCH(return new BitmapFilterScaleBench(90, 10);)
DEF_BENCH(return new BitmapFilterScaleBench(256, 64);)
DEF_BENCH(return new BitmapFilterScaleBench(64, 256);)
///////////////////////////////////////////////////////////////////////////////////////////////
#include "SkBitmapScaler.h"
class PixmapScalerBench: public Benchmark {
SkBitmapScaler::ResizeMethod fMethod;
SkString fName;
SkBitmap fSrc, fDst;
public:
PixmapScalerBench(SkBitmapScaler::ResizeMethod method, const char suffix[]) : fMethod(method) {
fName.printf("pixmapscaler_%s", suffix);
}
protected:
const char* onGetName() override {
return fName.c_str();
}
SkIPoint onGetSize() override { return{ 100, 100 }; }
bool isSuitableFor(Backend backend) override {
return backend == kNonRendering_Backend;
}
void onDelayedSetup() override {
fSrc.allocN32Pixels(640, 480);
fSrc.eraseColor(SK_ColorWHITE);
fDst.allocN32Pixels(300, 250);
}
void onDraw(int loops, SkCanvas*) override {
SkPixmap src, dst;
fSrc.peekPixels(&src);
fDst.peekPixels(&dst);
for (int i = 0; i < loops * 16; i++) {
SkBitmapScaler::Resize(dst, src, fMethod);
}
}
private:
typedef Benchmark INHERITED;
};
DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_LANCZOS3, "lanczos"); )
DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_MITCHELL, "mitchell"); )
DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_HAMMING, "hamming"); )
DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_TRIANGLE, "triangle"); )
DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_BOX, "box"); )

View File

@ -10,7 +10,6 @@
#include "Resources.h" #include "Resources.h"
#include "SkBitmapProcState.h" #include "SkBitmapProcState.h"
#include "SkBitmapScaler.h"
#include "SkGradientShader.h" #include "SkGradientShader.h"
#include "SkImageEncoder.h" #include "SkImageEncoder.h"
#include "SkStream.h" #include "SkStream.h"
@ -75,7 +74,7 @@ protected:
} }
SkISize onISize() override { SkISize onISize() override {
return SkISize::Make(1024, 768); return SkISize::Make(680, 130);
} }
void onDraw(SkCanvas* canvas) override { void onDraw(SkCanvas* canvas) override {

View File

@ -9,7 +9,6 @@
#include "sk_tool_utils.h" #include "sk_tool_utils.h"
#include "Resources.h" #include "Resources.h"
#include "SkBitmapScaler.h"
#include "SkGradientShader.h" #include "SkGradientShader.h"
#include "SkTypeface.h" #include "SkTypeface.h"
#include "SkStream.h" #include "SkStream.h"
@ -114,9 +113,7 @@ protected:
return str; return str;
} }
SkISize onISize() override { SkISize onISize() override { return { 150, 862 }; }
return { 824, 862 };
}
static void DrawAndFrame(SkCanvas* canvas, const SkBitmap& orig, SkScalar x, SkScalar y) { static void DrawAndFrame(SkCanvas* canvas, const SkBitmap& orig, SkScalar x, SkScalar y) {
SkBitmap bm; SkBitmap bm;
@ -169,25 +166,6 @@ protected:
bm.installPixels(curr); bm.installPixels(curr);
return bm; return bm;
}); });
const SkBitmapScaler::ResizeMethod methods[] = {
SkBitmapScaler::RESIZE_BOX,
SkBitmapScaler::RESIZE_TRIANGLE,
SkBitmapScaler::RESIZE_LANCZOS3,
SkBitmapScaler::RESIZE_HAMMING,
SkBitmapScaler::RESIZE_MITCHELL,
};
SkPixmap basePM;
orig.peekPixels(&basePM);
for (auto method : methods) {
canvas->translate(orig.width()/2 + 8.0f, 0);
drawLevels(canvas, orig, [method](const SkPixmap& prev, const SkPixmap& curr) {
SkBitmap bm;
SkBitmapScaler::Resize(&bm, prev, method, curr.width(), curr.height());
return bm;
});
}
} }
void onOnceBeforeDraw() override { void onOnceBeforeDraw() override {

View File

@ -17,7 +17,6 @@ bench_sources = [
"$_bench/BitmapBench.cpp", "$_bench/BitmapBench.cpp",
"$_bench/BitmapRectBench.cpp", "$_bench/BitmapRectBench.cpp",
"$_bench/BitmapRegionDecoderBench.cpp", "$_bench/BitmapRegionDecoderBench.cpp",
"$_bench/BitmapScaleBench.cpp",
"$_bench/BlendmodeBench.cpp", "$_bench/BlendmodeBench.cpp",
"$_bench/BlurBench.cpp", "$_bench/BlurBench.cpp",
"$_bench/BlurImageFilterBench.cpp", "$_bench/BlurImageFilterBench.cpp",

View File

@ -31,7 +31,6 @@ skia_core_sources = [
"$_src/core/SkBitmapController.cpp", "$_src/core/SkBitmapController.cpp",
"$_src/core/SkBitmapDevice.cpp", "$_src/core/SkBitmapDevice.cpp",
"$_src/core/SkBitmapDevice.h", "$_src/core/SkBitmapDevice.h",
"$_src/core/SkBitmapFilter.h",
"$_src/core/SkBitmapProcState.cpp", "$_src/core/SkBitmapProcState.cpp",
"$_src/core/SkBitmapProcState.h", "$_src/core/SkBitmapProcState.h",
"$_src/core/SkBitmapProcState_filter.h", "$_src/core/SkBitmapProcState_filter.h",
@ -44,8 +43,6 @@ skia_core_sources = [
"$_src/core/SkBitmapProcState_utils.h", "$_src/core/SkBitmapProcState_utils.h",
"$_src/core/SkBitmapProvider.cpp", "$_src/core/SkBitmapProvider.cpp",
"$_src/core/SkBitmapProvider.h", "$_src/core/SkBitmapProvider.h",
"$_src/core/SkBitmapScaler.h",
"$_src/core/SkBitmapScaler.cpp",
"$_src/core/SkBlendMode.cpp", "$_src/core/SkBlendMode.cpp",
"$_src/core/SkBlitBWMaskTemplate.h", "$_src/core/SkBlitBWMaskTemplate.h",
"$_src/core/SkBlitMask.h", "$_src/core/SkBlitMask.h",
@ -89,8 +86,6 @@ skia_core_sources = [
"$_src/core/SkColorTable.cpp", "$_src/core/SkColorTable.cpp",
"$_src/core/SkConvertPixels.cpp", "$_src/core/SkConvertPixels.cpp",
"$_src/core/SkConvertPixels.h", "$_src/core/SkConvertPixels.h",
"$_src/core/SkConvolver.cpp",
"$_src/core/SkConvolver.h",
"$_src/core/SkCoreBlitters.h", "$_src/core/SkCoreBlitters.h",
"$_src/core/SkCpu.cpp", "$_src/core/SkCpu.cpp",
"$_src/core/SkCpu.h", "$_src/core/SkCpu.h",

View File

@ -294,8 +294,7 @@ with open('Android.bp', 'w') as f:
defs['ssse3'] + defs['ssse3'] +
defs['sse41'] + defs['sse41'] +
defs['sse42'] + defs['sse42'] +
defs['avx' ] + defs['avx' ])),
defs['hsw' ])),
'tool_cflags' : bpfmt(8, tool_cflags), 'tool_cflags' : bpfmt(8, tool_cflags),
'tool_shared_libs' : bpfmt(8, tool_shared_libs), 'tool_shared_libs' : bpfmt(8, tool_shared_libs),

View File

@ -51,4 +51,3 @@ ssse3 = [
sse41 = [ "$_src/opts/SkOpts_sse41.cpp" ] sse41 = [ "$_src/opts/SkOpts_sse41.cpp" ]
sse42 = [ "$_src/opts/SkOpts_sse42.cpp" ] sse42 = [ "$_src/opts/SkOpts_sse42.cpp" ]
avx = [ "$_src/opts/SkOpts_avx.cpp" ] avx = [ "$_src/opts/SkOpts_avx.cpp" ]
hsw = [ "$_src/opts/SkOpts_hsw.cpp" ]

View File

@ -24,7 +24,7 @@ skia_opts = {
sse41_sources = sse41 sse41_sources = sse41
sse42_sources = sse42 sse42_sources = sse42
avx_sources = avx avx_sources = avx
hsw_sources = hsw hsw_sources = [] # remove after we update Chrome
} }
# Skia Chromium defines. These flags will be defined in chromium If these # Skia Chromium defines. These flags will be defined in chromium If these

View File

@ -6,15 +6,13 @@
*/ */
#include "SkBitmap.h" #include "SkBitmap.h"
#include "SkBitmapCache.h"
#include "SkBitmapController.h" #include "SkBitmapController.h"
#include "SkBitmapProvider.h" #include "SkBitmapProvider.h"
#include "SkMatrix.h" #include "SkMatrix.h"
#include "SkPixelRef.h" #include "SkMipMap.h"
#include "SkTemplates.h" #include "SkTemplates.h"
// RESIZE_LANCZOS3 is another good option, but chrome prefers mitchell at the moment
#define kHQ_RESIZE_METHOD SkBitmapScaler::RESIZE_MITCHELL
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
SkBitmapController::State* SkBitmapController::requestBitmap(const SkBitmapProvider& provider, SkBitmapController::State* SkBitmapController::requestBitmap(const SkBitmapProvider& provider,
@ -33,70 +31,24 @@ SkBitmapController::State* SkBitmapController::requestBitmap(const SkBitmapProvi
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
#include "SkBitmapCache.h"
#include "SkBitmapScaler.h"
#include "SkMipMap.h"
#include "SkResourceCache.h"
class SkDefaultBitmapControllerState : public SkBitmapController::State { class SkDefaultBitmapControllerState : public SkBitmapController::State {
public: public:
SkDefaultBitmapControllerState(const SkBitmapProvider&, SkDefaultBitmapControllerState(const SkBitmapProvider&, const SkMatrix& inv, SkFilterQuality);
const SkMatrix& inv,
SkFilterQuality,
bool canShadeHQ);
private: private:
SkBitmap fResultBitmap; SkBitmap fResultBitmap;
sk_sp<const SkMipMap> fCurrMip; sk_sp<const SkMipMap> fCurrMip;
bool fCanShadeHQ;
bool processHQRequest(const SkBitmapProvider&); bool processHighRequest(const SkBitmapProvider&);
bool processMediumRequest(const SkBitmapProvider&); bool processMediumRequest(const SkBitmapProvider&);
}; };
// Check to see that the size of the bitmap that would be produced by bool SkDefaultBitmapControllerState::processHighRequest(const SkBitmapProvider& provider) {
// scaling by the given inverted matrix is less than the maximum allowed.
static inline bool cache_size_okay(const SkBitmapProvider& provider, const SkMatrix& invMat) {
size_t maximumAllocation = SkResourceCache::GetEffectiveSingleAllocationByteLimit();
if (0 == maximumAllocation) {
return true;
}
// float matrixScaleFactor = 1.0 / (invMat.scaleX * invMat.scaleY);
// return ((origBitmapSize * matrixScaleFactor) < maximumAllocationSize);
// Skip the division step:
const size_t size = provider.info().getSafeSize(provider.info().minRowBytes());
SkScalar invScaleSqr = invMat.getScaleX() * invMat.getScaleY();
return size < (maximumAllocation * SkScalarAbs(invScaleSqr));
}
/*
* High quality is implemented by performing up-right scale-only filtering and then
* using bilerp for any remaining transformations.
*/
bool SkDefaultBitmapControllerState::processHQRequest(const SkBitmapProvider& provider) {
if (fQuality != kHigh_SkFilterQuality) { if (fQuality != kHigh_SkFilterQuality) {
return false; return false;
} }
// Our default return state is to downgrade the request to Medium, w/ or w/o setting fBitmap
// to a valid bitmap. If we succeed, we will set this to Low instead.
fQuality = kMedium_SkFilterQuality; fQuality = kMedium_SkFilterQuality;
#ifdef SK_USE_MIP_FOR_DOWNSCALE_HQ
return false;
#endif
bool supported = false;
switch (provider.info().colorType()) {
case kRGBA_8888_SkColorType:
case kBGRA_8888_SkColorType:
supported = true;
break;
default:
break;
}
if (!supported || !cache_size_okay(provider, fInvMatrix) || fInvMatrix.hasPerspective()) {
return false; // can't handle the reqeust
}
SkScalar invScaleX = fInvMatrix.getScaleX(); SkScalar invScaleX = fInvMatrix.getScaleX();
SkScalar invScaleY = fInvMatrix.getScaleY(); SkScalar invScaleY = fInvMatrix.getScaleY();
@ -111,68 +63,14 @@ bool SkDefaultBitmapControllerState::processHQRequest(const SkBitmapProvider& pr
invScaleX = SkScalarAbs(invScaleX); invScaleX = SkScalarAbs(invScaleX);
invScaleY = SkScalarAbs(invScaleY); invScaleY = SkScalarAbs(invScaleY);
if (SkScalarNearlyEqual(invScaleX, 1) && SkScalarNearlyEqual(invScaleY, 1)) { if (invScaleX >= 1 - SK_ScalarNearlyZero || invScaleY >= 1 - SK_ScalarNearlyZero) {
return false; // no need for HQ // we're down-scaling so abort HQ
return false;
} }
if (invScaleX > 1 || invScaleY > 1) { // Confirmed that we can use HQ (w/ rasterpipeline)
return false; // only use HQ when upsampling fQuality = kHigh_SkFilterQuality;
} (void)provider.asBitmap(&fResultBitmap);
// If the shader can natively handle HQ filtering, let it do it.
if (fCanShadeHQ) {
fQuality = kHigh_SkFilterQuality;
SkAssertResult(provider.asBitmap(&fResultBitmap));
return true;
}
const int dstW = SkScalarRoundToScalar(provider.width() / invScaleX);
const int dstH = SkScalarRoundToScalar(provider.height() / invScaleY);
const SkBitmapCacheDesc desc = provider.makeCacheDesc(dstW, dstH);
if (!SkBitmapCache::Find(desc, &fResultBitmap)) {
SkBitmap orig;
if (!provider.asBitmap(&orig)) {
return false;
}
SkPixmap src;
if (!orig.peekPixels(&src)) {
return false;
}
SkPixmap dst;
SkBitmapCache::RecPtr rec;
const SkImageInfo info = SkImageInfo::Make(desc.fScaledWidth, desc.fScaledHeight,
src.colorType(), src.alphaType());
if (provider.isVolatile()) {
if (!fResultBitmap.tryAllocPixels(info)) {
return false;
}
SkASSERT(fResultBitmap.getPixels());
fResultBitmap.peekPixels(&dst);
fResultBitmap.setImmutable(); // a little cheat, as we haven't resized yet, but ok
} else {
rec = SkBitmapCache::Alloc(desc, info, &dst);
if (!rec) {
return false;
}
}
if (!SkBitmapScaler::Resize(dst, src, kHQ_RESIZE_METHOD)) {
return false; // we failed to create fScaledBitmap
}
if (rec) {
SkBitmapCache::Add(std::move(rec), &fResultBitmap);
SkASSERT(fResultBitmap.getPixels());
provider.notifyAddedToCache();
}
}
SkASSERT(fResultBitmap.getPixels());
SkASSERT(fResultBitmap.isImmutable());
fInvMatrix.postScale(SkIntToScalar(dstW) / provider.width(),
SkIntToScalar(dstH) / provider.height());
fQuality = kLow_SkFilterQuality;
return true; return true;
} }
@ -235,20 +133,15 @@ bool SkDefaultBitmapControllerState::processMediumRequest(const SkBitmapProvider
SkDefaultBitmapControllerState::SkDefaultBitmapControllerState(const SkBitmapProvider& provider, SkDefaultBitmapControllerState::SkDefaultBitmapControllerState(const SkBitmapProvider& provider,
const SkMatrix& inv, const SkMatrix& inv,
SkFilterQuality qual, SkFilterQuality qual) {
bool canShadeHQ) {
fInvMatrix = inv; fInvMatrix = inv;
fQuality = qual; fQuality = qual;
fCanShadeHQ = canShadeHQ;
bool processed = this->processHQRequest(provider) || this->processMediumRequest(provider); if (this->processHighRequest(provider) || this->processMediumRequest(provider)) {
if (processed) {
SkASSERT(fResultBitmap.getPixels()); SkASSERT(fResultBitmap.getPixels());
} else { } else {
(void)provider.asBitmap(&fResultBitmap); (void)provider.asBitmap(&fResultBitmap);
} }
SkASSERT(fCanShadeHQ || fQuality <= kLow_SkFilterQuality);
// fResultBitmap.getPixels() may be null, but our caller knows to check fPixmap.addr() // fResultBitmap.getPixels() may be null, but our caller knows to check fPixmap.addr()
// and will destroy us if it is nullptr. // and will destroy us if it is nullptr.
@ -259,6 +152,5 @@ SkBitmapController::State* SkDefaultBitmapController::onRequestBitmap(const SkBi
const SkMatrix& inverse, const SkMatrix& inverse,
SkFilterQuality quality, SkFilterQuality quality,
void* storage, size_t size) { void* storage, size_t size) {
return SkInPlaceNewCheck<SkDefaultBitmapControllerState>(storage, size, return SkInPlaceNewCheck<SkDefaultBitmapControllerState>(storage, size, bm, inverse, quality);
bm, inverse, quality, fCanShadeHQ);
} }

View File

@ -57,14 +57,11 @@ protected:
class SkDefaultBitmapController : public SkBitmapController { class SkDefaultBitmapController : public SkBitmapController {
public: public:
enum class CanShadeHQ { kNo, kYes }; SkDefaultBitmapController() {}
SkDefaultBitmapController(CanShadeHQ canShadeHQ)
: fCanShadeHQ(canShadeHQ == CanShadeHQ::kYes) {}
protected: protected:
State* onRequestBitmap(const SkBitmapProvider&, const SkMatrix& inverse, SkFilterQuality, State* onRequestBitmap(const SkBitmapProvider&, const SkMatrix& inverse, SkFilterQuality,
void* storage, size_t storageSize) override; void* storage, size_t storageSize) override;
bool fCanShadeHQ;
}; };
#endif #endif

View File

@ -1,209 +0,0 @@
/*
* Copyright 2013 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkBitmapFilter_DEFINED
#define SkBitmapFilter_DEFINED
#include "SkFixed.h"
#include "SkMath.h"
#include "SkScalar.h"
#include "SkNx.h"
// size of the precomputed bitmap filter tables for high quality filtering.
// Used to precompute the shape of the filter kernel.
// Table size chosen from experiments to see where I could start to see a difference.
#define SKBITMAP_FILTER_TABLE_SIZE 128
class SkBitmapFilter {
public:
SkBitmapFilter(float width) : fWidth(width), fInvWidth(1.f/width) {
fPrecomputed = false;
fLookupMultiplier = this->invWidth() * (SKBITMAP_FILTER_TABLE_SIZE-1);
}
virtual ~SkBitmapFilter() {}
SkScalar lookupScalar(float x) const {
if (!fPrecomputed) {
precomputeTable();
}
int filter_idx = int(sk_float_abs(x * fLookupMultiplier));
SkASSERT(filter_idx < SKBITMAP_FILTER_TABLE_SIZE);
return fFilterTableScalar[filter_idx];
}
float width() const { return fWidth; }
float invWidth() const { return fInvWidth; }
virtual float evaluate(float x) const = 0;
virtual float evaluate_n(float val, float diff, int count, float* output) const {
float sum = 0;
for (int index = 0; index < count; index++) {
float filterValue = evaluate(val);
*output++ = filterValue;
sum += filterValue;
val += diff;
}
return sum;
}
protected:
float fWidth;
float fInvWidth;
float fLookupMultiplier;
mutable bool fPrecomputed;
mutable SkScalar fFilterTableScalar[SKBITMAP_FILTER_TABLE_SIZE];
private:
void precomputeTable() const {
fPrecomputed = true;
SkScalar *ftpScalar = fFilterTableScalar;
for (int x = 0; x < SKBITMAP_FILTER_TABLE_SIZE; ++x) {
float fx = ((float)x + .5f) * this->width() / SKBITMAP_FILTER_TABLE_SIZE;
float filter_value = evaluate(fx);
*ftpScalar++ = filter_value;
}
}
};
class SkMitchellFilter final : public SkBitmapFilter {
public:
SkMitchellFilter()
: INHERITED(2)
, fB(1.f / 3.f)
, fC(1.f / 3.f)
, fA1(-fB - 6*fC)
, fB1(6*fB + 30*fC)
, fC1(-12*fB - 48*fC)
, fD1(8*fB + 24*fC)
, fA2(12 - 9*fB - 6*fC)
, fB2(-18 + 12*fB + 6*fC)
, fD2(6 - 2*fB)
{}
float evaluate(float x) const override {
x = fabsf(x);
if (x > 2.f) {
return 0;
} else if (x > 1.f) {
return (((fA1 * x + fB1) * x + fC1) * x + fD1) * (1.f/6.f);
} else {
return ((fA2 * x + fB2) * x*x + fD2) * (1.f/6.f);
}
}
Sk4f evalcore_n(const Sk4f& val) const {
Sk4f x = val.abs();
Sk4f over2 = x > Sk4f(2);
Sk4f over1 = x > Sk4f(1);
Sk4f poly1 = (((Sk4f(fA1) * x + Sk4f(fB1)) * x + Sk4f(fC1)) * x + Sk4f(fD1))
* Sk4f(1.f/6.f);
Sk4f poly0 = ((Sk4f(fA2) * x + Sk4f(fB2)) * x*x + Sk4f(fD2)) * Sk4f(1.f/6.f);
return over2.thenElse(Sk4f(0), over1.thenElse(poly1, poly0));
}
float evaluate_n(float val, float diff, int count, float* output) const override {
Sk4f sum(0);
while (count >= 4) {
float v0 = val;
float v1 = val += diff;
float v2 = val += diff;
float v3 = val += diff;
val += diff;
Sk4f filterValue = evalcore_n(Sk4f(v0, v1, v2, v3));
filterValue.store(output);
output += 4;
sum = sum + filterValue;
count -= 4;
}
float sums[4];
sum.store(sums);
float result = sums[0] + sums[1] + sums[2] + sums[3];
result += INHERITED::evaluate_n(val, diff, count, output);
return result;
}
protected:
float fB, fC;
float fA1, fB1, fC1, fD1;
float fA2, fB2, fD2;
private:
typedef SkBitmapFilter INHERITED;
};
class SkGaussianFilter final : public SkBitmapFilter {
float fAlpha, fExpWidth;
public:
SkGaussianFilter(float a, float width = 2)
: SkBitmapFilter(width)
, fAlpha(a)
, fExpWidth(expf(-a * width * width))
{}
float evaluate(float x) const override {
return SkTMax(0.f, float(expf(-fAlpha*x*x) - fExpWidth));
}
};
class SkTriangleFilter final : public SkBitmapFilter {
public:
SkTriangleFilter(float width = 1) : SkBitmapFilter(width) {}
float evaluate(float x) const override {
return SkTMax(0.f, fWidth - fabsf(x));
}
};
class SkBoxFilter final : public SkBitmapFilter {
public:
SkBoxFilter(float width = 0.5f) : SkBitmapFilter(width) {}
float evaluate(float x) const override {
return (x >= -fWidth && x < fWidth) ? 1.0f : 0.0f;
}
};
class SkHammingFilter final : public SkBitmapFilter {
public:
SkHammingFilter(float width = 1) : SkBitmapFilter(width) {}
float evaluate(float x) const override {
if (x <= -fWidth || x >= fWidth) {
return 0.0f; // Outside of the window.
}
if (x > -FLT_EPSILON && x < FLT_EPSILON) {
return 1.0f; // Special case the sinc discontinuity at the origin.
}
const float xpi = x * static_cast<float>(SK_ScalarPI);
return ((sk_float_sin(xpi) / xpi) * // sinc(x)
(0.54f + 0.46f * sk_float_cos(xpi / fWidth))); // hamming(x)
}
};
class SkLanczosFilter final : public SkBitmapFilter {
public:
SkLanczosFilter(float width = 3.f) : SkBitmapFilter(width) {}
float evaluate(float x) const override {
if (x <= -fWidth || x >= fWidth) {
return 0.0f; // Outside of the window.
}
if (x > -FLT_EPSILON && x < FLT_EPSILON) {
return 1.0f; // Special case the discontinuity at the origin.
}
float xpi = x * static_cast<float>(SK_ScalarPI);
return (sk_float_sin(xpi) / xpi) * // sinc(x)
sk_float_sin(xpi / fWidth) / (xpi / fWidth); // sinc(x/fWidth)
}
};
#endif

View File

@ -12,7 +12,6 @@
#include "SkPaint.h" #include "SkPaint.h"
#include "SkShader.h" // for tilemodes #include "SkShader.h" // for tilemodes
#include "SkUtilsArm.h" #include "SkUtilsArm.h"
#include "SkBitmapScaler.h"
#include "SkMipMap.h" #include "SkMipMap.h"
#include "SkPixelRef.h" #include "SkPixelRef.h"
#include "SkImageEncoder.h" #include "SkImageEncoder.h"
@ -90,7 +89,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) {
fInvMatrix = inv; fInvMatrix = inv;
fFilterQuality = paint.getFilterQuality(); fFilterQuality = paint.getFilterQuality();
SkDefaultBitmapController controller(SkDefaultBitmapController::CanShadeHQ::kNo); SkDefaultBitmapController controller;
fBMState = controller.requestBitmap(fProvider, inv, paint.getFilterQuality(), fBMState = controller.requestBitmap(fProvider, inv, paint.getFilterQuality(),
fBMStateStorage.get(), fBMStateStorage.size()); fBMStateStorage.get(), fBMStateStorage.size());
// Note : we allow the controller to return an empty (zero-dimension) result. Should we? // Note : we allow the controller to return an empty (zero-dimension) result. Should we?
@ -102,6 +101,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) {
fRealInvMatrix = fBMState->invMatrix(); fRealInvMatrix = fBMState->invMatrix();
fPaintColor = paint.getColor(); fPaintColor = paint.getColor();
fFilterQuality = fBMState->quality(); fFilterQuality = fBMState->quality();
SkASSERT(fFilterQuality <= kLow_SkFilterQuality);
SkASSERT(fPixmap.addr()); SkASSERT(fPixmap.addr());
// Most of the scanline procs deal with "unit" texture coordinates, as this // Most of the scanline procs deal with "unit" texture coordinates, as this

View File

@ -10,8 +10,8 @@
#include "SkBitmap.h" #include "SkBitmap.h"
#include "SkBitmapController.h" #include "SkBitmapController.h"
#include "SkBitmapFilter.h"
#include "SkBitmapProvider.h" #include "SkBitmapProvider.h"
#include "SkFixed.h"
#include "SkFloatBits.h" #include "SkFloatBits.h"
#include "SkMatrix.h" #include "SkMatrix.h"
#include "SkMipMap.h" #include "SkMipMap.h"

View File

@ -1,254 +0,0 @@
/*
* Copyright 2015 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "SkBitmapScaler.h"
#include "SkBitmapFilter.h"
#include "SkConvolver.h"
#include "SkImageInfo.h"
#include "SkPixmap.h"
#include "SkRect.h"
#include "SkTArray.h"
// SkResizeFilter ----------------------------------------------------------------
// Encapsulates computation and storage of the filters required for one complete
// resize operation.
class SkResizeFilter {
public:
SkResizeFilter(SkBitmapScaler::ResizeMethod method,
int srcFullWidth, int srcFullHeight,
float destWidth, float destHeight,
const SkRect& destSubset);
~SkResizeFilter() { delete fBitmapFilter; }
// Returns the filled filter values.
const SkConvolutionFilter1D& xFilter() { return fXFilter; }
const SkConvolutionFilter1D& yFilter() { return fYFilter; }
private:
SkBitmapFilter* fBitmapFilter;
// Computes one set of filters either horizontally or vertically. The caller
// will specify the "min" and "max" rather than the bottom/top and
// right/bottom so that the same code can be re-used in each dimension.
//
// |srcDependLo| and |srcDependSize| gives the range for the source
// depend rectangle (horizontally or vertically at the caller's discretion
// -- see above for what this means).
//
// Likewise, the range of destination values to compute and the scale factor
// for the transform is also specified.
void computeFilters(int srcSize,
float destSubsetLo, float destSubsetSize,
float scale,
SkConvolutionFilter1D* output);
SkConvolutionFilter1D fXFilter;
SkConvolutionFilter1D fYFilter;
};
SkResizeFilter::SkResizeFilter(SkBitmapScaler::ResizeMethod method,
int srcFullWidth, int srcFullHeight,
float destWidth, float destHeight,
const SkRect& destSubset) {
SkASSERT(method >= SkBitmapScaler::RESIZE_FirstMethod &&
method <= SkBitmapScaler::RESIZE_LastMethod);
fBitmapFilter = nullptr;
switch(method) {
case SkBitmapScaler::RESIZE_BOX:
fBitmapFilter = new SkBoxFilter;
break;
case SkBitmapScaler::RESIZE_TRIANGLE:
fBitmapFilter = new SkTriangleFilter;
break;
case SkBitmapScaler::RESIZE_MITCHELL:
fBitmapFilter = new SkMitchellFilter;
break;
case SkBitmapScaler::RESIZE_HAMMING:
fBitmapFilter = new SkHammingFilter;
break;
case SkBitmapScaler::RESIZE_LANCZOS3:
fBitmapFilter = new SkLanczosFilter;
break;
}
float scaleX = destWidth / srcFullWidth;
float scaleY = destHeight / srcFullHeight;
this->computeFilters(srcFullWidth, destSubset.fLeft, destSubset.width(),
scaleX, &fXFilter);
if (srcFullWidth == srcFullHeight &&
destSubset.fLeft == destSubset.fTop &&
destSubset.width() == destSubset.height()&&
scaleX == scaleY) {
fYFilter = fXFilter;
} else {
this->computeFilters(srcFullHeight, destSubset.fTop, destSubset.height(),
scaleY, &fYFilter);
}
}
// TODO(egouriou): Take advantage of periods in the convolution.
// Practical resizing filters are periodic outside of the border area.
// For Lanczos, a scaling by a (reduced) factor of p/q (q pixels in the
// source become p pixels in the destination) will have a period of p.
// A nice consequence is a period of 1 when downscaling by an integral
// factor. Downscaling from typical display resolutions is also bound
// to produce interesting periods as those are chosen to have multiple
// small factors.
// Small periods reduce computational load and improve cache usage if
// the coefficients can be shared. For periods of 1 we can consider
// loading the factors only once outside the borders.
void SkResizeFilter::computeFilters(int srcSize,
float destSubsetLo, float destSubsetSize,
float scale,
SkConvolutionFilter1D* output) {
float destSubsetHi = destSubsetLo + destSubsetSize; // [lo, hi)
// When we're doing a magnification, the scale will be larger than one. This
// means the destination pixels are much smaller than the source pixels, and
// that the range covered by the filter won't necessarily cover any source
// pixel boundaries. Therefore, we use these clamped values (max of 1) for
// some computations.
float clampedScale = SkTMin(1.0f, scale);
// This is how many source pixels from the center we need to count
// to support the filtering function.
float srcSupport = fBitmapFilter->width() / clampedScale;
float invScale = 1.0f / scale;
SkSTArray<64, float, true> filterValuesArray;
SkSTArray<64, SkConvolutionFilter1D::ConvolutionFixed, true> fixedFilterValuesArray;
// Loop over all pixels in the output range. We will generate one set of
// filter values for each one. Those values will tell us how to blend the
// source pixels to compute the destination pixel.
// This is the pixel in the source directly under the pixel in the dest.
// Note that we base computations on the "center" of the pixels. To see
// why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
// downscale should "cover" the pixels around the pixel with *its center*
// at coordinates (2.5, 2.5) in the source, not those around (0, 0).
// Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
destSubsetLo = SkScalarFloorToScalar(destSubsetLo);
destSubsetHi = SkScalarCeilToScalar(destSubsetHi);
float srcPixel = (destSubsetLo + 0.5f) * invScale;
int destLimit = SkScalarTruncToInt(destSubsetHi - destSubsetLo);
output->reserveAdditional(destLimit, SkScalarCeilToInt(destLimit * srcSupport * 2));
for (int destI = 0; destI < destLimit; srcPixel += invScale, destI++) {
// Compute the (inclusive) range of source pixels the filter covers.
float srcBegin = SkTMax(0.f, SkScalarFloorToScalar(srcPixel - srcSupport));
float srcEnd = SkTMin(srcSize - 1.f, SkScalarCeilToScalar(srcPixel + srcSupport));
// Compute the unnormalized filter value at each location of the source
// it covers.
// Sum of the filter values for normalizing.
// Distance from the center of the filter, this is the filter coordinate
// in source space. We also need to consider the center of the pixel
// when comparing distance against 'srcPixel'. In the 5x downscale
// example used above the distance from the center of the filter to
// the pixel with coordinates (2, 2) should be 0, because its center
// is at (2.5, 2.5).
float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale;
int filterCount = SkScalarTruncToInt(srcEnd - srcBegin) + 1;
if (filterCount <= 0) {
// true when srcSize is equal to srcPixel - srcSupport; this may be a bug
return;
}
filterValuesArray.reset(filterCount);
float filterSum = fBitmapFilter->evaluate_n(destFilterDist, clampedScale, filterCount,
filterValuesArray.begin());
// The filter must be normalized so that we don't affect the brightness of
// the image. Convert to normalized fixed point.
int fixedSum = 0;
fixedFilterValuesArray.reset(filterCount);
const float* filterValues = filterValuesArray.begin();
SkConvolutionFilter1D::ConvolutionFixed* fixedFilterValues = fixedFilterValuesArray.begin();
float invFilterSum = 1 / filterSum;
for (int fixedI = 0; fixedI < filterCount; fixedI++) {
int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] * invFilterSum);
fixedSum += curFixed;
fixedFilterValues[fixedI] = SkToS16(curFixed);
}
SkASSERT(fixedSum <= 0x7FFF);
// The conversion to fixed point will leave some rounding errors, which
// we add back in to avoid affecting the brightness of the image. We
// arbitrarily add this to the center of the filter array (this won't always
// be the center of the filter function since it could get clipped on the
// edges, but it doesn't matter enough to worry about that case).
int leftovers = SkConvolutionFilter1D::FloatToFixed(1) - fixedSum;
fixedFilterValues[filterCount / 2] += leftovers;
// Now it's ready to go.
output->AddFilter(SkScalarFloorToInt(srcBegin), fixedFilterValues, filterCount);
}
}
///////////////////////////////////////////////////////////////////////////////////////////////////
static bool valid_for_resize(const SkPixmap& source, int dstW, int dstH) {
// TODO: Seems like we shouldn't care about the swizzle of source, just that it's 8888
return source.addr() && source.colorType() == kN32_SkColorType &&
source.width() >= 1 && source.height() >= 1 && dstW >= 1 && dstH >= 1;
}
bool SkBitmapScaler::Resize(const SkPixmap& result, const SkPixmap& source, ResizeMethod method) {
if (!valid_for_resize(source, result.width(), result.height())) {
return false;
}
if (!result.addr() || result.colorType() != source.colorType()) {
return false;
}
SkRect destSubset = SkRect::MakeIWH(result.width(), result.height());
SkResizeFilter filter(method, source.width(), source.height(),
result.width(), result.height(), destSubset);
// Get a subset encompassing this touched area. We construct the
// offsets and row strides such that it looks like a new bitmap, while
// referring to the old data.
const uint8_t* sourceSubset = reinterpret_cast<const uint8_t*>(source.addr());
return BGRAConvolve2D(sourceSubset, static_cast<int>(source.rowBytes()),
!source.isOpaque(), filter.xFilter(), filter.yFilter(),
static_cast<int>(result.rowBytes()),
static_cast<unsigned char*>(result.writable_addr()));
}
bool SkBitmapScaler::Resize(SkBitmap* resultPtr, const SkPixmap& source, ResizeMethod method,
int destWidth, int destHeight, SkBitmap::Allocator* allocator) {
// Preflight some of the checks, to avoid allocating the result if we don't need it.
if (!valid_for_resize(source, destWidth, destHeight)) {
return false;
}
SkBitmap result;
// Note: pass along the profile information even thought this is no the right answer because
// this could be scaling in sRGB.
result.setInfo(SkImageInfo::MakeN32(destWidth, destHeight, source.alphaType(),
sk_ref_sp(source.info().colorSpace())));
result.allocPixels(allocator);
SkPixmap resultPM;
if (!result.peekPixels(&resultPM) || !Resize(resultPM, source, method)) {
return false;
}
*resultPtr = result;
SkASSERT(resultPtr->getPixels());
return true;
}

View File

@ -1,46 +0,0 @@
/*
* Copyright 2013 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkBitmapScaler_DEFINED
#define SkBitmapScaler_DEFINED
#include "SkBitmap.h"
#include "SkConvolver.h"
/** \class SkBitmapScaler
Provides the interface for high quality image resampling.
*/
class SK_API SkBitmapScaler {
public:
enum ResizeMethod {
RESIZE_BOX,
RESIZE_TRIANGLE,
RESIZE_LANCZOS3,
RESIZE_HAMMING,
RESIZE_MITCHELL,
RESIZE_FirstMethod = RESIZE_BOX,
RESIZE_LastMethod = RESIZE_MITCHELL,
};
/**
* Given already-allocated src and dst pixmaps, this will scale the src pixels using the
* specified resize-method and write the results into the pixels pointed to by dst.
*/
static bool Resize(const SkPixmap& dst, const SkPixmap& src, ResizeMethod method);
/**
* Helper function that manages allocating a bitmap to hold the dst pixels, and then calls
* the pixmap version of Resize.
*/
static bool Resize(SkBitmap* result, const SkPixmap& src, ResizeMethod method,
int dest_width, int dest_height, SkBitmap::Allocator* = nullptr);
};
#endif

View File

@ -798,21 +798,16 @@ bool SkBlitter::UseRasterPipelineBlitter(const SkPixmap& device, const SkPaint&
if (paint.getColorFilter()) { if (paint.getColorFilter()) {
return true; return true;
} }
#ifndef SK_SUPPORT_LEGACY_HQ_SCALER
if (paint.getFilterQuality() == kHigh_SkFilterQuality) { if (paint.getFilterQuality() == kHigh_SkFilterQuality) {
return true; return true;
} }
#endif
// ... unless the blend mode is complicated enough. // ... unless the blend mode is complicated enough.
if (paint.getBlendMode() > SkBlendMode::kLastSeparableMode) { if (paint.getBlendMode() > SkBlendMode::kLastSeparableMode) {
return true; return true;
} }
// ... or unless we have to deal with perspective.
if (matrix.hasPerspective()) { if (matrix.hasPerspective()) {
return true; return true;
} }
// ... or unless the shader is raster pipeline-only. // ... or unless the shader is raster pipeline-only.
if (paint.getShader() && as_SB(paint.getShader())->isRasterPipelineOnly()) { if (paint.getShader() && as_SB(paint.getShader())->isRasterPipelineOnly()) {
return true; return true;

View File

@ -1,272 +0,0 @@
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "SkConvolver.h"
#include "SkOpts.h"
#include "SkTArray.h"
namespace {
// Stores a list of rows in a circular buffer. The usage is you write into it
// by calling AdvanceRow. It will keep track of which row in the buffer it
// should use next, and the total number of rows added.
class CircularRowBuffer {
public:
// The number of pixels in each row is given in |sourceRowPixelWidth|.
// The maximum number of rows needed in the buffer is |maxYFilterSize|
// (we only need to store enough rows for the biggest filter).
//
// We use the |firstInputRow| to compute the coordinates of all of the
// following rows returned by Advance().
CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,
int firstInputRow)
: fRowByteWidth(destRowPixelWidth * 4),
fNumRows(maxYFilterSize),
fNextRow(0),
fNextRowCoordinate(firstInputRow) {
fBuffer.reset(fRowByteWidth * maxYFilterSize);
fRowAddresses.reset(fNumRows);
}
// Moves to the next row in the buffer, returning a pointer to the beginning
// of it.
unsigned char* advanceRow() {
unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];
fNextRowCoordinate++;
// Set the pointer to the next row to use, wrapping around if necessary.
fNextRow++;
if (fNextRow == fNumRows) {
fNextRow = 0;
}
return row;
}
// Returns a pointer to an "unrolled" array of rows. These rows will start
// at the y coordinate placed into |*firstRowIndex| and will continue in
// order for the maximum number of rows in this circular buffer.
//
// The |firstRowIndex_| may be negative. This means the circular buffer
// starts before the top of the image (it hasn't been filled yet).
unsigned char* const* GetRowAddresses(int* firstRowIndex) {
// Example for a 4-element circular buffer holding coords 6-9.
// Row 0 Coord 8
// Row 1 Coord 9
// Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10.
// Row 3 Coord 7
//
// The "next" row is also the first (lowest) coordinate. This computation
// may yield a negative value, but that's OK, the math will work out
// since the user of this buffer will compute the offset relative
// to the firstRowIndex and the negative rows will never be used.
*firstRowIndex = fNextRowCoordinate - fNumRows;
int curRow = fNextRow;
for (int i = 0; i < fNumRows; i++) {
fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth];
// Advance to the next row, wrapping if necessary.
curRow++;
if (curRow == fNumRows) {
curRow = 0;
}
}
return &fRowAddresses[0];
}
private:
// The buffer storing the rows. They are packed, each one fRowByteWidth.
SkTArray<unsigned char> fBuffer;
// Number of bytes per row in the |buffer|.
int fRowByteWidth;
// The number of rows available in the buffer.
int fNumRows;
// The next row index we should write into. This wraps around as the
// circular buffer is used.
int fNextRow;
// The y coordinate of the |fNextRow|. This is incremented each time a
// new row is appended and does not wrap.
int fNextRowCoordinate;
// Buffer used by GetRowAddresses().
SkTArray<unsigned char*> fRowAddresses;
};
} // namespace
// SkConvolutionFilter1D ---------------------------------------------------------
SkConvolutionFilter1D::SkConvolutionFilter1D()
: fMaxFilter(0) {
}
SkConvolutionFilter1D::~SkConvolutionFilter1D() {
}
void SkConvolutionFilter1D::AddFilter(int filterOffset,
const ConvolutionFixed* filterValues,
int filterLength) {
// It is common for leading/trailing filter values to be zeros. In such
// cases it is beneficial to only store the central factors.
// For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
// a 1080p image this optimization gives a ~10% speed improvement.
int filterSize = filterLength;
int firstNonZero = 0;
while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) {
firstNonZero++;
}
if (firstNonZero < filterLength) {
// Here we have at least one non-zero factor.
int lastNonZero = filterLength - 1;
while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) {
lastNonZero--;
}
filterOffset += firstNonZero;
filterLength = lastNonZero + 1 - firstNonZero;
SkASSERT(filterLength > 0);
fFilterValues.append(filterLength, &filterValues[firstNonZero]);
} else {
// Here all the factors were zeroes.
filterLength = 0;
}
FilterInstance instance;
// We pushed filterLength elements onto fFilterValues
instance.fDataLocation = (static_cast<int>(fFilterValues.count()) -
filterLength);
instance.fOffset = filterOffset;
instance.fTrimmedLength = filterLength;
instance.fLength = filterSize;
fFilters.push(instance);
fMaxFilter = SkTMax(fMaxFilter, filterLength);
}
const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter(
int* specifiedFilterlength,
int* filterOffset,
int* filterLength) const {
const FilterInstance& filter = fFilters[0];
*filterOffset = filter.fOffset;
*filterLength = filter.fTrimmedLength;
*specifiedFilterlength = filter.fLength;
if (filter.fTrimmedLength == 0) {
return nullptr;
}
return &fFilterValues[filter.fDataLocation];
}
bool BGRAConvolve2D(const unsigned char* sourceData,
int sourceByteRowStride,
bool sourceHasAlpha,
const SkConvolutionFilter1D& filterX,
const SkConvolutionFilter1D& filterY,
int outputByteRowStride,
unsigned char* output) {
int maxYFilterSize = filterY.maxFilter();
// The next row in the input that we will generate a horizontally
// convolved row for. If the filter doesn't start at the beginning of the
// image (this is the case when we are only resizing a subset), then we
// don't want to generate any output rows before that. Compute the starting
// row for convolution as the first pixel for the first vertical filter.
int filterOffset, filterLength;
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
filterY.FilterForValue(0, &filterOffset, &filterLength);
int nextXRow = filterOffset;
// We loop over each row in the input doing a horizontal convolution. This
// will result in a horizontally convolved image. We write the results into
// a circular buffer of convolved rows and do vertical convolution as rows
// are available. This prevents us from having to store the entire
// intermediate image and helps cache coherency.
// We will need four extra rows to allow horizontal convolution could be done
// simultaneously. We also pad each row in row buffer to be aligned-up to
// 32 bytes.
// TODO(jiesun): We do not use aligned load from row buffer in vertical
// convolution pass yet. Somehow Windows does not like it.
int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F;
int rowBufferHeight = maxYFilterSize +
(SkOpts::convolve_4_rows_horizontally != nullptr ? 4 : 0);
// check for too-big allocation requests : crbug.com/528628
{
int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight);
// need some limit, to avoid over-committing success from malloc, but then
// crashing when we try to actually use the memory.
// 100meg seems big enough to allow "normal" zoom factors and image sizes through
// while avoiding the crash seen by the bug (crbug.com/528628)
if (size > 100 * 1024 * 1024) {
// SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size);
return false;
}
}
CircularRowBuffer rowBuffer(rowBufferWidth,
rowBufferHeight,
filterOffset);
// Loop over every possible output row, processing just enough horizontal
// convolutions to run each subsequent vertical convolution.
SkASSERT(outputByteRowStride >= filterX.numValues() * 4);
int numOutputRows = filterY.numValues();
// We need to check which is the last line to convolve before we advance 4
// lines in one iteration.
int lastFilterOffset, lastFilterLength;
filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,
&lastFilterLength);
for (int outY = 0; outY < numOutputRows; outY++) {
filterValues = filterY.FilterForValue(outY,
&filterOffset, &filterLength);
// Generate output rows until we have enough to run the current filter.
while (nextXRow < filterOffset + filterLength) {
if (SkOpts::convolve_4_rows_horizontally != nullptr &&
nextXRow + 3 < lastFilterOffset + lastFilterLength) {
const unsigned char* src[4];
unsigned char* outRow[4];
for (int i = 0; i < 4; ++i) {
src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRowStride];
outRow[i] = rowBuffer.advanceRow();
}
SkOpts::convolve_4_rows_horizontally(src, filterX, outRow, 4*rowBufferWidth);
nextXRow += 4;
} else {
SkOpts::convolve_horizontally(
&sourceData[(uint64_t)nextXRow * sourceByteRowStride],
filterX, rowBuffer.advanceRow(), sourceHasAlpha);
nextXRow++;
}
}
// Compute where in the output image this row of final data will go.
unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride];
// Get the list of rows that the circular buffer has, in order.
int firstRowInCircularBuffer;
unsigned char* const* rowsToConvolve =
rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);
// Now compute the start of the subset of those rows that the filter needs.
unsigned char* const* firstRowForFilter =
&rowsToConvolve[filterOffset - firstRowInCircularBuffer];
SkOpts::convolve_vertically(filterValues, filterLength,
firstRowForFilter,
filterX.numValues(), curOutputRow,
sourceHasAlpha);
}
return true;
}

View File

@ -1,173 +0,0 @@
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SK_CONVOLVER_H
#define SK_CONVOLVER_H
#include "SkSize.h"
#include "SkTDArray.h"
// avoid confusion with Mac OS X's math library (Carbon)
#if defined(__APPLE__)
#undef FloatToConvolutionFixed
#undef ConvolutionFixedToFloat
#undef FloatToFixed
#undef FixedToFloat
#endif
// Represents a filter in one dimension. Each output pixel has one entry in this
// object for the filter values contributing to it. You build up the filter
// list by calling AddFilter for each output pixel (in order).
//
// We do 2-dimensional convolution by first convolving each row by one
// SkConvolutionFilter1D, then convolving each column by another one.
//
// Entries are stored in ConvolutionFixed point, shifted left by kShiftBits.
class SkConvolutionFilter1D {
public:
typedef short ConvolutionFixed;
// The number of bits that ConvolutionFixed point values are shifted by.
enum { kShiftBits = 14 };
SK_API SkConvolutionFilter1D();
SK_API ~SkConvolutionFilter1D();
// Convert between floating point and our ConvolutionFixed point representation.
static ConvolutionFixed FloatToFixed(float f) {
return static_cast<ConvolutionFixed>(f * (1 << kShiftBits));
}
static unsigned char FixedToChar(ConvolutionFixed x) {
return static_cast<unsigned char>(x >> kShiftBits);
}
static float FixedToFloat(ConvolutionFixed x) {
// The cast relies on ConvolutionFixed being a short, implying that on
// the platforms we care about all (16) bits will fit into
// the mantissa of a (32-bit) float.
static_assert(sizeof(ConvolutionFixed) == 2, "ConvolutionFixed_type_should_fit_in_float_mantissa");
float raw = static_cast<float>(x);
return ldexpf(raw, -kShiftBits);
}
// Returns the maximum pixel span of a filter.
int maxFilter() const { return fMaxFilter; }
// Returns the number of filters in this filter. This is the dimension of the
// output image.
int numValues() const { return static_cast<int>(fFilters.count()); }
void reserveAdditional(int filterCount, int filterValueCount) {
fFilters.setReserve(fFilters.count() + filterCount);
fFilterValues.setReserve(fFilterValues.count() + filterValueCount);
}
// Appends the given list of scaling values for generating a given output
// pixel. |filterOffset| is the distance from the edge of the image to where
// the scaling factors start. The scaling factors apply to the source pixels
// starting from this position, and going for the next |filterLength| pixels.
//
// You will probably want to make sure your input is normalized (that is,
// all entries in |filterValuesg| sub to one) to prevent affecting the overall
// brighness of the image.
//
// The filterLength must be > 0.
void AddFilter(int filterOffset,
const ConvolutionFixed* filterValues,
int filterLength);
// Retrieves a filter for the given |valueOffset|, a position in the output
// image in the direction we're convolving. The offset and length of the
// filter values are put into the corresponding out arguments (see AddFilter
// above for what these mean), and a pointer to the first scaling factor is
// returned. There will be |filterLength| values in this array.
inline const ConvolutionFixed* FilterForValue(int valueOffset,
int* filterOffset,
int* filterLength) const {
const FilterInstance& filter = fFilters[valueOffset];
*filterOffset = filter.fOffset;
*filterLength = filter.fTrimmedLength;
if (filter.fTrimmedLength == 0) {
return nullptr;
}
return &fFilterValues[filter.fDataLocation];
}
// Retrieves the filter for the offset 0, presumed to be the one and only.
// The offset and length of the filter values are put into the corresponding
// out arguments (see AddFilter). Note that |filterLegth| and
// |specifiedFilterLength| may be different if leading/trailing zeros of the
// original floating point form were clipped.
// There will be |filterLength| values in the return array.
// Returns nullptr if the filter is 0-length (for instance when all floating
// point values passed to AddFilter were clipped to 0).
SK_API const ConvolutionFixed* GetSingleFilter(int* specifiedFilterLength,
int* filterOffset,
int* filterLength) const;
// Add another value to the fFilterValues array -- useful for
// SIMD padding which happens outside of this class.
void addFilterValue( ConvolutionFixed val ) {
fFilterValues.push( val );
}
private:
struct FilterInstance {
// Offset within filterValues for this instance of the filter.
int fDataLocation;
// Distance from the left of the filter to the center. IN PIXELS
int fOffset;
// Number of values in this filter instance.
int fTrimmedLength;
// Filter length as specified. Note that this may be different from
// 'trimmed_length' if leading/trailing zeros of the original floating
// point form were clipped differently on each tail.
int fLength;
};
// Stores the information for each filter added to this class.
SkTDArray<FilterInstance> fFilters;
// We store all the filter values in this flat list, indexed by
// |FilterInstance.data_location| to avoid the mallocs required for storing
// each one separately.
SkTDArray<ConvolutionFixed> fFilterValues;
// The maximum size of any filter we've added.
int fMaxFilter;
};
// Does a two-dimensional convolution on the given source image.
//
// It is assumed the source pixel offsets referenced in the input filters
// reference only valid pixels, so the source image size is not required. Each
// row of the source image starts |sourceByteRowStride| after the previous
// one (this allows you to have rows with some padding at the end).
//
// The result will be put into the given output buffer. The destination image
// size will be xfilter.numValues() * yfilter.numValues() pixels. It will be
// in rows of exactly xfilter.numValues() * 4 bytes.
//
// |sourceHasAlpha| is a hint that allows us to avoid doing computations on
// the alpha channel if the image is opaque. If you don't know, set this to
// true and it will work properly, but setting this to false will be a few
// percent faster if you know the image is opaque.
//
// The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
// (this is ARGB when loaded into 32-bit words on a little-endian machine).
/**
* Returns false if it was unable to perform the convolution/rescale. in which case the output
* buffer is assumed to be undefined.
*/
SK_API bool BGRAConvolve2D(const unsigned char* sourceData,
int sourceByteRowStride,
bool sourceHasAlpha,
const SkConvolutionFilter1D& xfilter,
const SkConvolutionFilter1D& yfilter,
int outputByteRowStride,
unsigned char* output);
#endif // SK_CONVOLVER_H

View File

@ -36,7 +36,6 @@
#define SK_OPTS_NS portable #define SK_OPTS_NS portable
#endif #endif
#include "SkBitmapFilter_opts.h"
#include "SkBlend_opts.h" #include "SkBlend_opts.h"
#include "SkBlitMask_opts.h" #include "SkBlitMask_opts.h"
#include "SkBlitRow_opts.h" #include "SkBlitRow_opts.h"
@ -88,10 +87,6 @@ namespace SkOpts {
DEFINE_DEFAULT(hash_fn); DEFINE_DEFAULT(hash_fn);
DEFINE_DEFAULT(convolve_vertically);
DEFINE_DEFAULT(convolve_horizontally);
DEFINE_DEFAULT(convolve_4_rows_horizontally);
#undef DEFINE_DEFAULT #undef DEFINE_DEFAULT
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp. // Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
@ -99,7 +94,6 @@ namespace SkOpts {
void Init_sse41(); void Init_sse41();
void Init_sse42(); void Init_sse42();
void Init_avx(); void Init_avx();
void Init_hsw();
void Init_crc32(); void Init_crc32();
static void init() { static void init() {
@ -109,7 +103,6 @@ namespace SkOpts {
if (SkCpu::Supports(SkCpu::SSE41)) { Init_sse41(); } if (SkCpu::Supports(SkCpu::SSE41)) { Init_sse41(); }
if (SkCpu::Supports(SkCpu::SSE42)) { Init_sse42(); } if (SkCpu::Supports(SkCpu::SSE42)) { Init_sse42(); }
if (SkCpu::Supports(SkCpu::AVX )) { Init_avx(); } if (SkCpu::Supports(SkCpu::AVX )) { Init_avx(); }
if (SkCpu::Supports(SkCpu::HSW )) { Init_hsw(); }
#elif defined(SK_CPU_ARM64) #elif defined(SK_CPU_ARM64)
if (SkCpu::Supports(SkCpu::CRC32)) { Init_crc32(); } if (SkCpu::Supports(SkCpu::CRC32)) { Init_crc32(); }

View File

@ -8,7 +8,6 @@
#ifndef SkOpts_DEFINED #ifndef SkOpts_DEFINED
#define SkOpts_DEFINED #define SkOpts_DEFINED
#include "SkConvolver.h"
#include "SkRasterPipeline.h" #include "SkRasterPipeline.h"
#include "SkTypes.h" #include "SkTypes.h"
#include "SkXfermodePriv.h" #include "SkXfermodePriv.h"
@ -62,15 +61,6 @@ namespace SkOpts {
static inline uint32_t hash(const void* data, size_t bytes, uint32_t seed=0) { static inline uint32_t hash(const void* data, size_t bytes, uint32_t seed=0) {
return hash_fn(data, bytes, seed); return hash_fn(data, bytes, seed);
} }
extern void (*convolve_vertically)(const SkConvolutionFilter1D::ConvolutionFixed* filter_values,
int filter_length, unsigned char* const* source_data_rows,
int pixel_width, unsigned char* out_row, bool has_alpha);
extern void (*convolve_4_rows_horizontally)(const unsigned char* src_data[4],
const SkConvolutionFilter1D& filter,
unsigned char* out_row[4], size_t out_row_bytes);
extern void (*convolve_horizontally)(const unsigned char* src_data, const SkConvolutionFilter1D& filter,
unsigned char* out_row, bool has_alpha);
} }
#endif//SkOpts_DEFINED #endif//SkOpts_DEFINED

View File

@ -1,940 +0,0 @@
/*
* Copyright 2016 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#ifndef SkBitmapFilter_opts_DEFINED
#define SkBitmapFilter_opts_DEFINED
#include "SkConvolver.h"
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
#include <immintrin.h>
#elif defined(SK_ARM_HAS_NEON)
#include <arm_neon.h>
#endif
namespace SK_OPTS_NS {
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
static SK_ALWAYS_INLINE void AccumRemainder(const unsigned char* pixelsLeft,
const SkConvolutionFilter1D::ConvolutionFixed* filterValues, __m128i& accum, int r) {
int remainder[4] = {0};
for (int i = 0; i < r; i++) {
SkConvolutionFilter1D::ConvolutionFixed coeff = filterValues[i];
remainder[0] += coeff * pixelsLeft[i * 4 + 0];
remainder[1] += coeff * pixelsLeft[i * 4 + 1];
remainder[2] += coeff * pixelsLeft[i * 4 + 2];
remainder[3] += coeff * pixelsLeft[i * 4 + 3];
}
__m128i t = _mm_setr_epi32(remainder[0], remainder[1], remainder[2], remainder[3]);
accum = _mm_add_epi32(accum, t);
}
// Convolves horizontally along a single row. The row data is given in
// |srcData| and continues for the numValues() of the filter.
void convolve_horizontally(const unsigned char* srcData,
const SkConvolutionFilter1D& filter,
unsigned char* outRow,
bool /*hasAlpha*/) {
// Output one pixel each iteration, calculating all channels (RGBA) together.
int numValues = filter.numValues();
for (int outX = 0; outX < numValues; outX++) {
// Get the filter that determines the current output pixel.
int filterOffset, filterLength;
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
filter.FilterForValue(outX, &filterOffset, &filterLength);
// Compute the first pixel in this row that the filter affects. It will
// touch |filterLength| pixels (4 bytes each) after this.
const unsigned char* rowToFilter = &srcData[filterOffset * 4];
__m128i zero = _mm_setzero_si128();
__m128i accum = _mm_setzero_si128();
// We will load and accumulate with four coefficients per iteration.
for (int filterX = 0; filterX < filterLength >> 2; filterX++) {
// Load 4 coefficients => duplicate 1st and 2nd of them for all channels.
__m128i coeff, coeff16;
// [16] xx xx xx xx c3 c2 c1 c0
coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filterValues));
// [16] xx xx xx xx c1 c1 c0 c0
coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
// [16] c1 c1 c1 c1 c0 c0 c0 c0
coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
// Load four pixels => unpack the first two pixels to 16 bits =>
// multiply with coefficients => accumulate the convolution result.
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
__m128i src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(rowToFilter));
// [16] a1 b1 g1 r1 a0 b0 g0 r0
__m128i src16 = _mm_unpacklo_epi8(src8, zero);
__m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
__m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
// [32] a0*c0 b0*c0 g0*c0 r0*c0
__m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
accum = _mm_add_epi32(accum, t);
// [32] a1*c1 b1*c1 g1*c1 r1*c1
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
accum = _mm_add_epi32(accum, t);
// Duplicate 3rd and 4th coefficients for all channels =>
// unpack the 3rd and 4th pixels to 16 bits => multiply with coefficients
// => accumulate the convolution results.
// [16] xx xx xx xx c3 c3 c2 c2
coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
// [16] c3 c3 c3 c3 c2 c2 c2 c2
coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
// [16] a3 g3 b3 r3 a2 g2 b2 r2
src16 = _mm_unpackhi_epi8(src8, zero);
mul_hi = _mm_mulhi_epi16(src16, coeff16);
mul_lo = _mm_mullo_epi16(src16, coeff16);
// [32] a2*c2 b2*c2 g2*c2 r2*c2
t = _mm_unpacklo_epi16(mul_lo, mul_hi);
accum = _mm_add_epi32(accum, t);
// [32] a3*c3 b3*c3 g3*c3 r3*c3
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
accum = _mm_add_epi32(accum, t);
// Advance the pixel and coefficients pointers.
rowToFilter += 16;
filterValues += 4;
}
// When |filterLength| is not divisible by 4, we accumulate the last 1 - 3
// coefficients one at a time.
int r = filterLength & 3;
if (r) {
int remainderOffset = (filterOffset + filterLength - r) * 4;
AccumRemainder(srcData + remainderOffset, filterValues, accum, r);
}
// Shift right for fixed point implementation.
accum = _mm_srai_epi32(accum, SkConvolutionFilter1D::kShiftBits);
// Packing 32 bits |accum| to 16 bits per channel (signed saturation).
accum = _mm_packs_epi32(accum, zero);
// Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
accum = _mm_packus_epi16(accum, zero);
// Store the pixel value of 32 bits.
*(reinterpret_cast<int*>(outRow)) = _mm_cvtsi128_si32(accum);
outRow += 4;
}
}
// Convolves horizontally along four rows. The row data is given in
// |srcData| and continues for the numValues() of the filter.
// The algorithm is almost same as |convolve_horizontally|. Please
// refer to that function for detailed comments.
void convolve_4_rows_horizontally(const unsigned char* srcData[4],
const SkConvolutionFilter1D& filter,
unsigned char* outRow[4],
size_t outRowBytes) {
SkDEBUGCODE(const unsigned char* out_row_0_start = outRow[0];)
// Output one pixel each iteration, calculating all channels (RGBA) together.
int numValues = filter.numValues();
for (int outX = 0; outX < numValues; outX++) {
int filterOffset, filterLength;
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
filter.FilterForValue(outX, &filterOffset, &filterLength);
__m128i zero = _mm_setzero_si128();
// four pixels in a column per iteration.
__m128i accum0 = _mm_setzero_si128();
__m128i accum1 = _mm_setzero_si128();
__m128i accum2 = _mm_setzero_si128();
__m128i accum3 = _mm_setzero_si128();
int start = filterOffset * 4;
// We will load and accumulate with four coefficients per iteration.
for (int filterX = 0; filterX < (filterLength >> 2); filterX++) {
__m128i coeff, coeff16lo, coeff16hi;
// [16] xx xx xx xx c3 c2 c1 c0
coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filterValues));
// [16] xx xx xx xx c1 c1 c0 c0
coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
// [16] c1 c1 c1 c1 c0 c0 c0 c0
coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
// [16] xx xx xx xx c3 c3 c2 c2
coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
// [16] c3 c3 c3 c3 c2 c2 c2 c2
coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
__m128i src8, src16, mul_hi, mul_lo, t;
#define ITERATION(src, accum) \
src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)); \
src16 = _mm_unpacklo_epi8(src8, zero); \
mul_hi = _mm_mulhi_epi16(src16, coeff16lo); \
mul_lo = _mm_mullo_epi16(src16, coeff16lo); \
t = _mm_unpacklo_epi16(mul_lo, mul_hi); \
accum = _mm_add_epi32(accum, t); \
t = _mm_unpackhi_epi16(mul_lo, mul_hi); \
accum = _mm_add_epi32(accum, t); \
src16 = _mm_unpackhi_epi8(src8, zero); \
mul_hi = _mm_mulhi_epi16(src16, coeff16hi); \
mul_lo = _mm_mullo_epi16(src16, coeff16hi); \
t = _mm_unpacklo_epi16(mul_lo, mul_hi); \
accum = _mm_add_epi32(accum, t); \
t = _mm_unpackhi_epi16(mul_lo, mul_hi); \
accum = _mm_add_epi32(accum, t)
ITERATION(srcData[0] + start, accum0);
ITERATION(srcData[1] + start, accum1);
ITERATION(srcData[2] + start, accum2);
ITERATION(srcData[3] + start, accum3);
start += 16;
filterValues += 4;
}
int r = filterLength & 3;
if (r) {
int remainderOffset = (filterOffset + filterLength - r) * 4;
AccumRemainder(srcData[0] + remainderOffset, filterValues, accum0, r);
AccumRemainder(srcData[1] + remainderOffset, filterValues, accum1, r);
AccumRemainder(srcData[2] + remainderOffset, filterValues, accum2, r);
AccumRemainder(srcData[3] + remainderOffset, filterValues, accum3, r);
}
accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
accum0 = _mm_packs_epi32(accum0, zero);
accum0 = _mm_packus_epi16(accum0, zero);
accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
accum1 = _mm_packs_epi32(accum1, zero);
accum1 = _mm_packus_epi16(accum1, zero);
accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
accum2 = _mm_packs_epi32(accum2, zero);
accum2 = _mm_packus_epi16(accum2, zero);
accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
accum3 = _mm_packs_epi32(accum3, zero);
accum3 = _mm_packus_epi16(accum3, zero);
// We seem to be running off the edge here (chromium:491660).
SkASSERT(((size_t)outRow[0] - (size_t)out_row_0_start) < outRowBytes);
*(reinterpret_cast<int*>(outRow[0])) = _mm_cvtsi128_si32(accum0);
*(reinterpret_cast<int*>(outRow[1])) = _mm_cvtsi128_si32(accum1);
*(reinterpret_cast<int*>(outRow[2])) = _mm_cvtsi128_si32(accum2);
*(reinterpret_cast<int*>(outRow[3])) = _mm_cvtsi128_si32(accum3);
outRow[0] += 4;
outRow[1] += 4;
outRow[2] += 4;
outRow[3] += 4;
}
}
// Does vertical convolution to produce one output row. The filter values and
// length are given in the first two parameters. These are applied to each
// of the rows pointed to in the |sourceDataRows| array, with each row
// being |pixelWidth| wide.
//
// The output must have room for |pixelWidth * 4| bytes.
template<bool hasAlpha>
void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
int filterLength,
unsigned char* const* sourceDataRows,
int pixelWidth,
unsigned char* outRow) {
// Output four pixels per iteration (16 bytes).
int width = pixelWidth & ~3;
__m128i zero = _mm_setzero_si128();
for (int outX = 0; outX < width; outX += 4) {
// Accumulated result for each pixel. 32 bits per RGBA channel.
__m128i accum0 = _mm_setzero_si128();
__m128i accum1 = _mm_setzero_si128();
__m128i accum2 = _mm_setzero_si128();
__m128i accum3 = _mm_setzero_si128();
// Convolve with one filter coefficient per iteration.
for (int filterY = 0; filterY < filterLength; filterY++) {
// Duplicate the filter coefficient 8 times.
// [16] cj cj cj cj cj cj cj cj
__m128i coeff16 = _mm_set1_epi16(filterValues[filterY]);
// Load four pixels (16 bytes) together.
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
const __m128i* src = reinterpret_cast<const __m128i*>(
&sourceDataRows[filterY][outX << 2]);
__m128i src8 = _mm_loadu_si128(src);
// Unpack 1st and 2nd pixels from 8 bits to 16 bits for each channels =>
// multiply with current coefficient => accumulate the result.
// [16] a1 b1 g1 r1 a0 b0 g0 r0
__m128i src16 = _mm_unpacklo_epi8(src8, zero);
__m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
__m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
// [32] a0 b0 g0 r0
__m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
accum0 = _mm_add_epi32(accum0, t);
// [32] a1 b1 g1 r1
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
accum1 = _mm_add_epi32(accum1, t);
// Unpack 3rd and 4th pixels from 8 bits to 16 bits for each channels =>
// multiply with current coefficient => accumulate the result.
// [16] a3 b3 g3 r3 a2 b2 g2 r2
src16 = _mm_unpackhi_epi8(src8, zero);
mul_hi = _mm_mulhi_epi16(src16, coeff16);
mul_lo = _mm_mullo_epi16(src16, coeff16);
// [32] a2 b2 g2 r2
t = _mm_unpacklo_epi16(mul_lo, mul_hi);
accum2 = _mm_add_epi32(accum2, t);
// [32] a3 b3 g3 r3
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
accum3 = _mm_add_epi32(accum3, t);
}
// Shift right for fixed point implementation.
accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
// Packing 32 bits |accum| to 16 bits per channel (signed saturation).
// [16] a1 b1 g1 r1 a0 b0 g0 r0
accum0 = _mm_packs_epi32(accum0, accum1);
// [16] a3 b3 g3 r3 a2 b2 g2 r2
accum2 = _mm_packs_epi32(accum2, accum3);
// Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
accum0 = _mm_packus_epi16(accum0, accum2);
if (hasAlpha) {
// Compute the max(ri, gi, bi) for each pixel.
// [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
__m128i a = _mm_srli_epi32(accum0, 8);
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
__m128i b = _mm_max_epu8(a, accum0); // Max of r and g.
// [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
a = _mm_srli_epi32(accum0, 16);
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
b = _mm_max_epu8(a, b); // Max of r and g and b.
// [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
b = _mm_slli_epi32(b, 24);
// Make sure the value of alpha channel is always larger than maximum
// value of color channels.
accum0 = _mm_max_epu8(b, accum0);
} else {
// Set value of alpha channels to 0xFF.
__m128i mask = _mm_set1_epi32(0xff000000);
accum0 = _mm_or_si128(accum0, mask);
}
// Store the convolution result (16 bytes) and advance the pixel pointers.
_mm_storeu_si128(reinterpret_cast<__m128i*>(outRow), accum0);
outRow += 16;
}
// When the width of the output is not divisible by 4, We need to save one
// pixel (4 bytes) each time. And also the fourth pixel is always absent.
int r = pixelWidth & 3;
if (r) {
__m128i accum0 = _mm_setzero_si128();
__m128i accum1 = _mm_setzero_si128();
__m128i accum2 = _mm_setzero_si128();
for (int filterY = 0; filterY < filterLength; ++filterY) {
__m128i coeff16 = _mm_set1_epi16(filterValues[filterY]);
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
const __m128i* src = reinterpret_cast<const __m128i*>(
&sourceDataRows[filterY][width << 2]);
__m128i src8 = _mm_loadu_si128(src);
// [16] a1 b1 g1 r1 a0 b0 g0 r0
__m128i src16 = _mm_unpacklo_epi8(src8, zero);
__m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
__m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
// [32] a0 b0 g0 r0
__m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
accum0 = _mm_add_epi32(accum0, t);
// [32] a1 b1 g1 r1
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
accum1 = _mm_add_epi32(accum1, t);
// [16] a3 b3 g3 r3 a2 b2 g2 r2
src16 = _mm_unpackhi_epi8(src8, zero);
mul_hi = _mm_mulhi_epi16(src16, coeff16);
mul_lo = _mm_mullo_epi16(src16, coeff16);
// [32] a2 b2 g2 r2
t = _mm_unpacklo_epi16(mul_lo, mul_hi);
accum2 = _mm_add_epi32(accum2, t);
}
accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
// [16] a1 b1 g1 r1 a0 b0 g0 r0
accum0 = _mm_packs_epi32(accum0, accum1);
// [16] a3 b3 g3 r3 a2 b2 g2 r2
accum2 = _mm_packs_epi32(accum2, zero);
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
accum0 = _mm_packus_epi16(accum0, accum2);
if (hasAlpha) {
// [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
__m128i a = _mm_srli_epi32(accum0, 8);
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
__m128i b = _mm_max_epu8(a, accum0); // Max of r and g.
// [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
a = _mm_srli_epi32(accum0, 16);
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
b = _mm_max_epu8(a, b); // Max of r and g and b.
// [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
b = _mm_slli_epi32(b, 24);
accum0 = _mm_max_epu8(b, accum0);
} else {
__m128i mask = _mm_set1_epi32(0xff000000);
accum0 = _mm_or_si128(accum0, mask);
}
for (int i = 0; i < r; i++) {
*(reinterpret_cast<int*>(outRow)) = _mm_cvtsi128_si32(accum0);
accum0 = _mm_srli_si128(accum0, 4);
outRow += 4;
}
}
}
#elif defined(SK_ARM_HAS_NEON)
static SK_ALWAYS_INLINE void AccumRemainder(const unsigned char* pixelsLeft,
const SkConvolutionFilter1D::ConvolutionFixed* filterValues, int32x4_t& accum, int r) {
int remainder[4] = {0};
for (int i = 0; i < r; i++) {
SkConvolutionFilter1D::ConvolutionFixed coeff = filterValues[i];
remainder[0] += coeff * pixelsLeft[i * 4 + 0];
remainder[1] += coeff * pixelsLeft[i * 4 + 1];
remainder[2] += coeff * pixelsLeft[i * 4 + 2];
remainder[3] += coeff * pixelsLeft[i * 4 + 3];
}
int32x4_t t = {remainder[0], remainder[1], remainder[2], remainder[3]};
accum += t;
}
// Convolves horizontally along a single row. The row data is given in
// |srcData| and continues for the numValues() of the filter.
void convolve_horizontally(const unsigned char* srcData,
const SkConvolutionFilter1D& filter,
unsigned char* outRow,
bool /*hasAlpha*/) {
// Loop over each pixel on this row in the output image.
int numValues = filter.numValues();
for (int outX = 0; outX < numValues; outX++) {
uint8x8_t coeff_mask0 = vcreate_u8(0x0100010001000100);
uint8x8_t coeff_mask1 = vcreate_u8(0x0302030203020302);
uint8x8_t coeff_mask2 = vcreate_u8(0x0504050405040504);
uint8x8_t coeff_mask3 = vcreate_u8(0x0706070607060706);
// Get the filter that determines the current output pixel.
int filterOffset, filterLength;
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
filter.FilterForValue(outX, &filterOffset, &filterLength);
// Compute the first pixel in this row that the filter affects. It will
// touch |filterLength| pixels (4 bytes each) after this.
const unsigned char* rowToFilter = &srcData[filterOffset * 4];
// Apply the filter to the row to get the destination pixel in |accum|.
int32x4_t accum = vdupq_n_s32(0);
for (int filterX = 0; filterX < filterLength >> 2; filterX++) {
// Load 4 coefficients
int16x4_t coeffs, coeff0, coeff1, coeff2, coeff3;
coeffs = vld1_s16(filterValues);
coeff0 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask0));
coeff1 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask1));
coeff2 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask2));
coeff3 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask3));
// Load pixels and calc
uint8x16_t pixels = vld1q_u8(rowToFilter);
int16x8_t p01_16 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pixels)));
int16x8_t p23_16 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pixels)));
int16x4_t p0_src = vget_low_s16(p01_16);
int16x4_t p1_src = vget_high_s16(p01_16);
int16x4_t p2_src = vget_low_s16(p23_16);
int16x4_t p3_src = vget_high_s16(p23_16);
int32x4_t p0 = vmull_s16(p0_src, coeff0);
int32x4_t p1 = vmull_s16(p1_src, coeff1);
int32x4_t p2 = vmull_s16(p2_src, coeff2);
int32x4_t p3 = vmull_s16(p3_src, coeff3);
accum += p0;
accum += p1;
accum += p2;
accum += p3;
// Advance the pointers
rowToFilter += 16;
filterValues += 4;
}
int r = filterLength & 3;
if (r) {
int remainder_offset = (filterOffset + filterLength - r) * 4;
AccumRemainder(srcData + remainder_offset, filterValues, accum, r);
}
// Bring this value back in range. All of the filter scaling factors
// are in fixed point with kShiftBits bits of fractional part.
accum = vshrq_n_s32(accum, SkConvolutionFilter1D::kShiftBits);
// Pack and store the new pixel.
int16x4_t accum16 = vqmovn_s32(accum);
uint8x8_t accum8 = vqmovun_s16(vcombine_s16(accum16, accum16));
vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow), vreinterpret_u32_u8(accum8), 0);
outRow += 4;
}
}
// Convolves horizontally along four rows. The row data is given in
// |srcData| and continues for the numValues() of the filter.
// The algorithm is almost same as |convolve_horizontally|. Please
// refer to that function for detailed comments.
void convolve_4_rows_horizontally(const unsigned char* srcData[4],
const SkConvolutionFilter1D& filter,
unsigned char* outRow[4],
size_t outRowBytes) {
// Output one pixel each iteration, calculating all channels (RGBA) together.
int numValues = filter.numValues();
for (int outX = 0; outX < numValues; outX++) {
int filterOffset, filterLength;
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
filter.FilterForValue(outX, &filterOffset, &filterLength);
// four pixels in a column per iteration.
int32x4_t accum0 = vdupq_n_s32(0);
int32x4_t accum1 = vdupq_n_s32(0);
int32x4_t accum2 = vdupq_n_s32(0);
int32x4_t accum3 = vdupq_n_s32(0);
uint8x8_t coeff_mask0 = vcreate_u8(0x0100010001000100);
uint8x8_t coeff_mask1 = vcreate_u8(0x0302030203020302);
uint8x8_t coeff_mask2 = vcreate_u8(0x0504050405040504);
uint8x8_t coeff_mask3 = vcreate_u8(0x0706070607060706);
int start = filterOffset * 4;
// We will load and accumulate with four coefficients per iteration.
for (int filterX = 0; filterX < (filterLength >> 2); filterX++) {
int16x4_t coeffs, coeff0, coeff1, coeff2, coeff3;
coeffs = vld1_s16(filterValues);
coeff0 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask0));
coeff1 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask1));
coeff2 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask2));
coeff3 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask3));
uint8x16_t pixels;
int16x8_t p01_16, p23_16;
int32x4_t p0, p1, p2, p3;
#define ITERATION(src, accum) \
pixels = vld1q_u8(src); \
p01_16 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pixels))); \
p23_16 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pixels))); \
p0 = vmull_s16(vget_low_s16(p01_16), coeff0); \
p1 = vmull_s16(vget_high_s16(p01_16), coeff1); \
p2 = vmull_s16(vget_low_s16(p23_16), coeff2); \
p3 = vmull_s16(vget_high_s16(p23_16), coeff3); \
accum += p0; \
accum += p1; \
accum += p2; \
accum += p3
ITERATION(srcData[0] + start, accum0);
ITERATION(srcData[1] + start, accum1);
ITERATION(srcData[2] + start, accum2);
ITERATION(srcData[3] + start, accum3);
start += 16;
filterValues += 4;
}
int r = filterLength & 3;
if (r) {
int remainder_offset = (filterOffset + filterLength - r) * 4;
AccumRemainder(srcData[0] + remainder_offset, filterValues, accum0, r);
AccumRemainder(srcData[1] + remainder_offset, filterValues, accum1, r);
AccumRemainder(srcData[2] + remainder_offset, filterValues, accum2, r);
AccumRemainder(srcData[3] + remainder_offset, filterValues, accum3, r);
}
int16x4_t accum16;
uint8x8_t res0, res1, res2, res3;
#define PACK_RESULT(accum, res) \
accum = vshrq_n_s32(accum, SkConvolutionFilter1D::kShiftBits); \
accum16 = vqmovn_s32(accum); \
res = vqmovun_s16(vcombine_s16(accum16, accum16));
PACK_RESULT(accum0, res0);
PACK_RESULT(accum1, res1);
PACK_RESULT(accum2, res2);
PACK_RESULT(accum3, res3);
vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[0]), vreinterpret_u32_u8(res0), 0);
vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[1]), vreinterpret_u32_u8(res1), 0);
vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[2]), vreinterpret_u32_u8(res2), 0);
vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[3]), vreinterpret_u32_u8(res3), 0);
outRow[0] += 4;
outRow[1] += 4;
outRow[2] += 4;
outRow[3] += 4;
}
}
// Does vertical convolution to produce one output row. The filter values and
// length are given in the first two parameters. These are applied to each
// of the rows pointed to in the |sourceDataRows| array, with each row
// being |pixelWidth| wide.
//
// The output must have room for |pixelWidth * 4| bytes.
template<bool hasAlpha>
void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
int filterLength,
unsigned char* const* sourceDataRows,
int pixelWidth,
unsigned char* outRow) {
int width = pixelWidth & ~3;
// Output four pixels per iteration (16 bytes).
for (int outX = 0; outX < width; outX += 4) {
// Accumulated result for each pixel. 32 bits per RGBA channel.
int32x4_t accum0 = vdupq_n_s32(0);
int32x4_t accum1 = vdupq_n_s32(0);
int32x4_t accum2 = vdupq_n_s32(0);
int32x4_t accum3 = vdupq_n_s32(0);
// Convolve with one filter coefficient per iteration.
for (int filterY = 0; filterY < filterLength; filterY++) {
// Duplicate the filter coefficient 4 times.
// [16] cj cj cj cj
int16x4_t coeff16 = vdup_n_s16(filterValues[filterY]);
// Load four pixels (16 bytes) together.
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
uint8x16_t src8 = vld1q_u8(&sourceDataRows[filterY][outX << 2]);
int16x8_t src16_01 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src8)));
int16x8_t src16_23 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src8)));
int16x4_t src16_0 = vget_low_s16(src16_01);
int16x4_t src16_1 = vget_high_s16(src16_01);
int16x4_t src16_2 = vget_low_s16(src16_23);
int16x4_t src16_3 = vget_high_s16(src16_23);
accum0 += vmull_s16(src16_0, coeff16);
accum1 += vmull_s16(src16_1, coeff16);
accum2 += vmull_s16(src16_2, coeff16);
accum3 += vmull_s16(src16_3, coeff16);
}
// Shift right for fixed point implementation.
accum0 = vshrq_n_s32(accum0, SkConvolutionFilter1D::kShiftBits);
accum1 = vshrq_n_s32(accum1, SkConvolutionFilter1D::kShiftBits);
accum2 = vshrq_n_s32(accum2, SkConvolutionFilter1D::kShiftBits);
accum3 = vshrq_n_s32(accum3, SkConvolutionFilter1D::kShiftBits);
// Packing 32 bits |accum| to 16 bits per channel (signed saturation).
// [16] a1 b1 g1 r1 a0 b0 g0 r0
int16x8_t accum16_0 = vcombine_s16(vqmovn_s32(accum0), vqmovn_s32(accum1));
// [16] a3 b3 g3 r3 a2 b2 g2 r2
int16x8_t accum16_1 = vcombine_s16(vqmovn_s32(accum2), vqmovn_s32(accum3));
// Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
uint8x16_t accum8 = vcombine_u8(vqmovun_s16(accum16_0), vqmovun_s16(accum16_1));
if (hasAlpha) {
// Compute the max(ri, gi, bi) for each pixel.
// [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
uint8x16_t a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 8));
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
uint8x16_t b = vmaxq_u8(a, accum8); // Max of r and g
// [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 16));
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
b = vmaxq_u8(a, b); // Max of r and g and b.
// [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
b = vreinterpretq_u8_u32(vshlq_n_u32(vreinterpretq_u32_u8(b), 24));
// Make sure the value of alpha channel is always larger than maximum
// value of color channels.
accum8 = vmaxq_u8(b, accum8);
} else {
// Set value of alpha channels to 0xFF.
accum8 = vreinterpretq_u8_u32(vreinterpretq_u32_u8(accum8) | vdupq_n_u32(0xFF000000));
}
// Store the convolution result (16 bytes) and advance the pixel pointers.
vst1q_u8(outRow, accum8);
outRow += 16;
}
// Process the leftovers when the width of the output is not divisible
// by 4, that is at most 3 pixels.
int r = pixelWidth & 3;
if (r) {
int32x4_t accum0 = vdupq_n_s32(0);
int32x4_t accum1 = vdupq_n_s32(0);
int32x4_t accum2 = vdupq_n_s32(0);
for (int filterY = 0; filterY < filterLength; ++filterY) {
int16x4_t coeff16 = vdup_n_s16(filterValues[filterY]);
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
uint8x16_t src8 = vld1q_u8(&sourceDataRows[filterY][width << 2]);
int16x8_t src16_01 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src8)));
int16x8_t src16_23 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src8)));
int16x4_t src16_0 = vget_low_s16(src16_01);
int16x4_t src16_1 = vget_high_s16(src16_01);
int16x4_t src16_2 = vget_low_s16(src16_23);
accum0 += vmull_s16(src16_0, coeff16);
accum1 += vmull_s16(src16_1, coeff16);
accum2 += vmull_s16(src16_2, coeff16);
}
accum0 = vshrq_n_s32(accum0, SkConvolutionFilter1D::kShiftBits);
accum1 = vshrq_n_s32(accum1, SkConvolutionFilter1D::kShiftBits);
accum2 = vshrq_n_s32(accum2, SkConvolutionFilter1D::kShiftBits);
int16x8_t accum16_0 = vcombine_s16(vqmovn_s32(accum0), vqmovn_s32(accum1));
int16x8_t accum16_1 = vcombine_s16(vqmovn_s32(accum2), vqmovn_s32(accum2));
uint8x16_t accum8 = vcombine_u8(vqmovun_s16(accum16_0), vqmovun_s16(accum16_1));
if (hasAlpha) {
// Compute the max(ri, gi, bi) for each pixel.
// [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
uint8x16_t a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 8));
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
uint8x16_t b = vmaxq_u8(a, accum8); // Max of r and g
// [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 16));
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
b = vmaxq_u8(a, b); // Max of r and g and b.
// [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
b = vreinterpretq_u8_u32(vshlq_n_u32(vreinterpretq_u32_u8(b), 24));
// Make sure the value of alpha channel is always larger than maximum
// value of color channels.
accum8 = vmaxq_u8(b, accum8);
} else {
// Set value of alpha channels to 0xFF.
accum8 = vreinterpretq_u8_u32(vreinterpretq_u32_u8(accum8) | vdupq_n_u32(0xFF000000));
}
switch(r) {
case 1:
vst1q_lane_u32(reinterpret_cast<uint32_t*>(outRow), vreinterpretq_u32_u8(accum8), 0);
break;
case 2:
vst1_u32(reinterpret_cast<uint32_t*>(outRow),
vreinterpret_u32_u8(vget_low_u8(accum8)));
break;
case 3:
vst1_u32(reinterpret_cast<uint32_t*>(outRow),
vreinterpret_u32_u8(vget_low_u8(accum8)));
vst1q_lane_u32(reinterpret_cast<uint32_t*>(outRow+8), vreinterpretq_u32_u8(accum8), 2);
break;
}
}
}
#else
// Converts the argument to an 8-bit unsigned value by clamping to the range
// 0-255.
inline unsigned char ClampTo8(int a) {
if (static_cast<unsigned>(a) < 256) {
return a; // Avoid the extra check in the common case.
}
if (a < 0) {
return 0;
}
return 255;
}
// Convolves horizontally along a single row. The row data is given in
// |srcData| and continues for the numValues() of the filter.
template<bool hasAlpha>
void ConvolveHorizontally(const unsigned char* srcData,
const SkConvolutionFilter1D& filter,
unsigned char* outRow) {
// Loop over each pixel on this row in the output image.
int numValues = filter.numValues();
for (int outX = 0; outX < numValues; outX++) {
// Get the filter that determines the current output pixel.
int filterOffset, filterLength;
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
filter.FilterForValue(outX, &filterOffset, &filterLength);
// Compute the first pixel in this row that the filter affects. It will
// touch |filterLength| pixels (4 bytes each) after this.
const unsigned char* rowToFilter = &srcData[filterOffset * 4];
// Apply the filter to the row to get the destination pixel in |accum|.
int accum[4] = {0};
for (int filterX = 0; filterX < filterLength; filterX++) {
SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];
accum[0] += curFilter * rowToFilter[filterX * 4 + 0];
accum[1] += curFilter * rowToFilter[filterX * 4 + 1];
accum[2] += curFilter * rowToFilter[filterX * 4 + 2];
if (hasAlpha) {
accum[3] += curFilter * rowToFilter[filterX * 4 + 3];
}
}
// Bring this value back in range. All of the filter scaling factors
// are in fixed point with kShiftBits bits of fractional part.
accum[0] >>= SkConvolutionFilter1D::kShiftBits;
accum[1] >>= SkConvolutionFilter1D::kShiftBits;
accum[2] >>= SkConvolutionFilter1D::kShiftBits;
if (hasAlpha) {
accum[3] >>= SkConvolutionFilter1D::kShiftBits;
}
// Store the new pixel.
outRow[outX * 4 + 0] = ClampTo8(accum[0]);
outRow[outX * 4 + 1] = ClampTo8(accum[1]);
outRow[outX * 4 + 2] = ClampTo8(accum[2]);
if (hasAlpha) {
outRow[outX * 4 + 3] = ClampTo8(accum[3]);
}
}
}
// Does vertical convolution to produce one output row. The filter values and
// length are given in the first two parameters. These are applied to each
// of the rows pointed to in the |sourceDataRows| array, with each row
// being |pixelWidth| wide.
//
// The output must have room for |pixelWidth * 4| bytes.
template<bool hasAlpha>
void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
int filterLength,
unsigned char* const* sourceDataRows,
int pixelWidth,
unsigned char* outRow) {
// We go through each column in the output and do a vertical convolution,
// generating one output pixel each time.
for (int outX = 0; outX < pixelWidth; outX++) {
// Compute the number of bytes over in each row that the current column
// we're convolving starts at. The pixel will cover the next 4 bytes.
int byteOffset = outX * 4;
// Apply the filter to one column of pixels.
int accum[4] = {0};
for (int filterY = 0; filterY < filterLength; filterY++) {
SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];
accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];
accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];
accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];
if (hasAlpha) {
accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];
}
}
// Bring this value back in range. All of the filter scaling factors
// are in fixed point with kShiftBits bits of precision.
accum[0] >>= SkConvolutionFilter1D::kShiftBits;
accum[1] >>= SkConvolutionFilter1D::kShiftBits;
accum[2] >>= SkConvolutionFilter1D::kShiftBits;
if (hasAlpha) {
accum[3] >>= SkConvolutionFilter1D::kShiftBits;
}
// Store the new pixel.
outRow[byteOffset + 0] = ClampTo8(accum[0]);
outRow[byteOffset + 1] = ClampTo8(accum[1]);
outRow[byteOffset + 2] = ClampTo8(accum[2]);
if (hasAlpha) {
unsigned char alpha = ClampTo8(accum[3]);
// Make sure the alpha channel doesn't come out smaller than any of the
// color channels. We use premultipled alpha channels, so this should
// never happen, but rounding errors will cause this from time to time.
// These "impossible" colors will cause overflows (and hence random pixel
// values) when the resulting bitmap is drawn to the screen.
//
// We only need to do this when generating the final output row (here).
int maxColorChannel = SkTMax(outRow[byteOffset + 0],
SkTMax(outRow[byteOffset + 1],
outRow[byteOffset + 2]));
if (alpha < maxColorChannel) {
outRow[byteOffset + 3] = maxColorChannel;
} else {
outRow[byteOffset + 3] = alpha;
}
} else {
// No alpha channel, the image is opaque.
outRow[byteOffset + 3] = 0xff;
}
}
}
// There's a bug somewhere here with GCC autovectorization (-ftree-vectorize). We originally
// thought this was 32 bit only, but subsequent tests show that some 64 bit gcc compiles
// suffer here too.
//
// Dropping to -O2 disables -ftree-vectorize. GCC 4.6 needs noinline. https://bug.skia.org/2575
#if SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE)
#define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), noinline))
#else
#define SK_MAYBE_DISABLE_VECTORIZATION
#endif
SK_MAYBE_DISABLE_VECTORIZATION
void convolve_horizontally(const unsigned char* srcData,
const SkConvolutionFilter1D& filter,
unsigned char* outRow,
bool hasAlpha) {
if (hasAlpha) {
ConvolveHorizontally<true>(srcData, filter, outRow);
} else {
ConvolveHorizontally<false>(srcData, filter, outRow);
}
}
#undef SK_MAYBE_DISABLE_VECTORIZATION
void (*convolve_4_rows_horizontally)(const unsigned char* srcData[4],
const SkConvolutionFilter1D& filter,
unsigned char* outRow[4],
size_t outRowBytes)
= nullptr;
#endif
void convolve_vertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
int filterLength,
unsigned char* const* sourceDataRows,
int pixelWidth,
unsigned char* outRow,
bool hasAlpha) {
if (hasAlpha) {
ConvolveVertically<true>(filterValues, filterLength, sourceDataRows,
pixelWidth, outRow);
} else {
ConvolveVertically<false>(filterValues, filterLength, sourceDataRows,
pixelWidth, outRow);
}
}
} // namespace SK_OPTS_NS
#endif//SkBitmapFilter_opts_DEFINED

View File

@ -5,7 +5,6 @@
* found in the LICENSE file. * found in the LICENSE file.
*/ */
#include "SkBitmapScaler.h"
#include "SkBitmapProcState.h" #include "SkBitmapProcState.h"
/* A platform may optionally overwrite any of these with accelerated /* A platform may optionally overwrite any of these with accelerated

View File

@ -1,118 +0,0 @@
/*
* Copyright 2016 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
// It is not safe to #include any header file here unless it has been vetted for ODR safety:
// all symbols used must be file-scoped static or in an anonymous namespace. This applies
// to _all_ header files: C standard library, C++ standard library, Skia... everything.
#include <immintrin.h> // ODR safe
#include <stdint.h> // ODR safe
#if defined(__AVX2__)
namespace hsw {
void convolve_vertically(const int16_t* filter, int filterLen,
uint8_t* const* srcRows, int width,
uint8_t* out, bool hasAlpha) {
// It's simpler to work with the output array in terms of 4-byte pixels.
auto dst = (int*)out;
// Output up to eight pixels per iteration.
for (int x = 0; x < width; x += 8) {
// Accumulated result for 4 (non-adjacent) pairs of pixels,
// with each channel in signed 17.14 fixed point.
auto accum04 = _mm256_setzero_si256(),
accum15 = _mm256_setzero_si256(),
accum26 = _mm256_setzero_si256(),
accum37 = _mm256_setzero_si256();
// Convolve with the filter. (This inner loop is where we spend ~all our time.)
// While we can, we consume 2 filter coefficients and 2 rows of 8 pixels each at a time.
auto convolve_16_pixels = [&](__m256i interlaced_coeffs,
__m256i pixels_01234567, __m256i pixels_89ABCDEF) {
// Interlaced R0R8 G0G8 B0B8 A0A8 R1R9 G1G9... 32 8-bit values each.
auto _08194C5D = _mm256_unpacklo_epi8(pixels_01234567, pixels_89ABCDEF),
_2A3B6E7F = _mm256_unpackhi_epi8(pixels_01234567, pixels_89ABCDEF);
// Still interlaced R0R8 G0G8... as above, each channel expanded to 16-bit lanes.
auto _084C = _mm256_unpacklo_epi8(_08194C5D, _mm256_setzero_si256()),
_195D = _mm256_unpackhi_epi8(_08194C5D, _mm256_setzero_si256()),
_2A6E = _mm256_unpacklo_epi8(_2A3B6E7F, _mm256_setzero_si256()),
_3B7F = _mm256_unpackhi_epi8(_2A3B6E7F, _mm256_setzero_si256());
// accum0_R += R0*coeff0 + R8*coeff1, etc.
accum04 = _mm256_add_epi32(accum04, _mm256_madd_epi16(_084C, interlaced_coeffs));
accum15 = _mm256_add_epi32(accum15, _mm256_madd_epi16(_195D, interlaced_coeffs));
accum26 = _mm256_add_epi32(accum26, _mm256_madd_epi16(_2A6E, interlaced_coeffs));
accum37 = _mm256_add_epi32(accum37, _mm256_madd_epi16(_3B7F, interlaced_coeffs));
};
int i = 0;
for (; i < filterLen/2*2; i += 2) {
convolve_16_pixels(_mm256_set1_epi32(*(const int32_t*)(filter+i)),
_mm256_loadu_si256((const __m256i*)(srcRows[i+0] + x*4)),
_mm256_loadu_si256((const __m256i*)(srcRows[i+1] + x*4)));
}
if (i < filterLen) {
convolve_16_pixels(_mm256_set1_epi32(*(const int16_t*)(filter+i)),
_mm256_loadu_si256((const __m256i*)(srcRows[i] + x*4)),
_mm256_setzero_si256());
}
// Trim the fractional parts off the accumulators.
accum04 = _mm256_srai_epi32(accum04, 14);
accum15 = _mm256_srai_epi32(accum15, 14);
accum26 = _mm256_srai_epi32(accum26, 14);
accum37 = _mm256_srai_epi32(accum37, 14);
// Pack back down to 8-bit channels.
auto pixels = _mm256_packus_epi16(_mm256_packs_epi32(accum04, accum15),
_mm256_packs_epi32(accum26, accum37));
if (hasAlpha) {
// Clamp alpha to the max of r,g,b to make sure we stay premultiplied.
__m256i max_rg = _mm256_max_epu8(pixels, _mm256_srli_epi32(pixels, 8)),
max_rgb = _mm256_max_epu8(max_rg, _mm256_srli_epi32(pixels, 16));
pixels = _mm256_max_epu8(pixels, _mm256_slli_epi32(max_rgb, 24));
} else {
// Force opaque.
pixels = _mm256_or_si256(pixels, _mm256_set1_epi32(0xff000000));
}
// Normal path to store 8 pixels.
if (x + 8 <= width) {
_mm256_storeu_si256((__m256i*)dst, pixels);
dst += 8;
continue;
}
// Store one pixel at a time on the last iteration.
for (int i = x; i < width; i++) {
*dst++ = _mm_cvtsi128_si32(_mm256_castsi256_si128(pixels));
pixels = _mm256_permutevar8x32_epi32(pixels, _mm256_setr_epi32(1,2,3,4,5,6,7,0));
}
}
}
}
namespace SkOpts {
// See SkOpts.h, writing SkConvolutionFilter1D::ConvolutionFixed as the underlying type.
extern void (*convolve_vertically)(const int16_t* filter, int filterLen,
uint8_t* const* srcRows, int width,
uint8_t* out, bool hasAlpha);
void Init_hsw() {
convolve_vertically = hsw::convolve_vertically;
}
}
#else // defined(__AVX2__) is not true...
namespace SkOpts { void Init_hsw() {} }
#endif

View File

@ -7,7 +7,6 @@
#include "SkBitmapProcState_opts_SSE2.h" #include "SkBitmapProcState_opts_SSE2.h"
#include "SkBitmapProcState_opts_SSSE3.h" #include "SkBitmapProcState_opts_SSSE3.h"
#include "SkBitmapScaler.h"
#include "SkBlitMask.h" #include "SkBlitMask.h"
#include "SkBlitRow.h" #include "SkBlitRow.h"
#include "SkBlitRow_opts_SSE2.h" #include "SkBlitRow_opts_SSE2.h"

View File

@ -256,7 +256,7 @@ bool SkImageShader::onAppendStages(SkRasterPipeline* p, SkColorSpace* dstCS, SkA
auto quality = paint.getFilterQuality(); auto quality = paint.getFilterQuality();
SkBitmapProvider provider(fImage.get(), dstCS); SkBitmapProvider provider(fImage.get(), dstCS);
SkDefaultBitmapController controller(SkDefaultBitmapController::CanShadeHQ::kYes); SkDefaultBitmapController controller;
std::unique_ptr<SkBitmapController::State> state { std::unique_ptr<SkBitmapController::State> state {
controller.requestBitmap(provider, matrix, quality) controller.requestBitmap(provider, matrix, quality)
}; };