Assume HQ is handled by pipeline, delete legacy code-path
CQ_INCLUDE_TRYBOTS=skia.primary:Test-Debian9-GCC-GCE-CPU-AVX2-x86_64-Release-SKNX_NO_SIMD Bug: skia: Change-Id: If6f0d0a57463bf99a66d674e65a62ce3931d0116 Reviewed-on: https://skia-review.googlesource.com/24644 Commit-Queue: Mike Reed <reed@google.com> Reviewed-by: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
3e583cba8a
commit
e32500f064
17
BUILD.gn
17
BUILD.gn
@ -297,22 +297,6 @@ opts("avx") {
|
||||
}
|
||||
}
|
||||
|
||||
opts("hsw") {
|
||||
enabled = is_x86
|
||||
sources = skia_opts.hsw_sources
|
||||
if (is_win) {
|
||||
cflags = [ "/arch:AVX2" ]
|
||||
} else {
|
||||
cflags = [
|
||||
"-mavx2",
|
||||
"-mbmi",
|
||||
"-mbmi2",
|
||||
"-mf16c",
|
||||
"-mfma",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
# Any feature of Skia that requires third-party code should be optional and use this template.
|
||||
template("optional") {
|
||||
if (invoker.enabled) {
|
||||
@ -668,7 +652,6 @@ component("skia") {
|
||||
":fontmgr_fontconfig",
|
||||
":fontmgr_fuchsia",
|
||||
":gpu",
|
||||
":hsw",
|
||||
":jpeg",
|
||||
":none",
|
||||
":pdf",
|
||||
|
@ -1,161 +0,0 @@
|
||||
/*
|
||||
* Copyright 2013 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "Benchmark.h"
|
||||
#include "SkBlurMask.h"
|
||||
#include "SkCanvas.h"
|
||||
#include "SkPaint.h"
|
||||
#include "SkRandom.h"
|
||||
#include "SkShader.h"
|
||||
#include "SkString.h"
|
||||
|
||||
class BitmapScaleBench: public Benchmark {
|
||||
int fLoopCount;
|
||||
int fInputSize;
|
||||
int fOutputSize;
|
||||
SkString fName;
|
||||
|
||||
public:
|
||||
BitmapScaleBench( int is, int os) {
|
||||
fInputSize = is;
|
||||
fOutputSize = os;
|
||||
|
||||
fLoopCount = 20;
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
SkBitmap fInputBitmap, fOutputBitmap;
|
||||
SkMatrix fMatrix;
|
||||
|
||||
const char* onGetName() override {
|
||||
return fName.c_str();
|
||||
}
|
||||
|
||||
int inputSize() const {
|
||||
return fInputSize;
|
||||
}
|
||||
|
||||
int outputSize() const {
|
||||
return fOutputSize;
|
||||
}
|
||||
|
||||
float scale() const {
|
||||
return float(outputSize())/inputSize();
|
||||
}
|
||||
|
||||
SkIPoint onGetSize() override {
|
||||
return SkIPoint::Make( fOutputSize, fOutputSize );
|
||||
}
|
||||
|
||||
void setName(const char * name) {
|
||||
fName.printf( "bitmap_scale_%s_%d_%d", name, fInputSize, fOutputSize );
|
||||
}
|
||||
|
||||
void onDelayedSetup() override {
|
||||
fInputBitmap.allocN32Pixels(fInputSize, fInputSize, true);
|
||||
fInputBitmap.eraseColor(SK_ColorWHITE);
|
||||
|
||||
fOutputBitmap.allocN32Pixels(fOutputSize, fOutputSize, true);
|
||||
|
||||
fMatrix.setScale( scale(), scale() );
|
||||
}
|
||||
|
||||
void onDraw(int loops, SkCanvas*) override {
|
||||
SkPaint paint;
|
||||
this->setupPaint(&paint);
|
||||
|
||||
preBenchSetup();
|
||||
|
||||
for (int i = 0; i < loops; i++) {
|
||||
doScaleImage();
|
||||
}
|
||||
}
|
||||
|
||||
virtual void doScaleImage() = 0;
|
||||
virtual void preBenchSetup() {}
|
||||
private:
|
||||
typedef Benchmark INHERITED;
|
||||
};
|
||||
|
||||
class BitmapFilterScaleBench: public BitmapScaleBench {
|
||||
public:
|
||||
BitmapFilterScaleBench( int is, int os) : INHERITED(is, os) {
|
||||
setName( "filter" );
|
||||
}
|
||||
protected:
|
||||
void doScaleImage() override {
|
||||
SkCanvas canvas( fOutputBitmap );
|
||||
SkPaint paint;
|
||||
|
||||
paint.setFilterQuality(kHigh_SkFilterQuality);
|
||||
fInputBitmap.notifyPixelsChanged();
|
||||
canvas.concat(fMatrix);
|
||||
canvas.drawBitmap(fInputBitmap, 0, 0, &paint );
|
||||
}
|
||||
private:
|
||||
typedef BitmapScaleBench INHERITED;
|
||||
};
|
||||
|
||||
DEF_BENCH(return new BitmapFilterScaleBench(10, 90);)
|
||||
DEF_BENCH(return new BitmapFilterScaleBench(30, 90);)
|
||||
DEF_BENCH(return new BitmapFilterScaleBench(80, 90);)
|
||||
DEF_BENCH(return new BitmapFilterScaleBench(90, 90);)
|
||||
DEF_BENCH(return new BitmapFilterScaleBench(90, 80);)
|
||||
DEF_BENCH(return new BitmapFilterScaleBench(90, 30);)
|
||||
DEF_BENCH(return new BitmapFilterScaleBench(90, 10);)
|
||||
DEF_BENCH(return new BitmapFilterScaleBench(256, 64);)
|
||||
DEF_BENCH(return new BitmapFilterScaleBench(64, 256);)
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "SkBitmapScaler.h"
|
||||
|
||||
class PixmapScalerBench: public Benchmark {
|
||||
SkBitmapScaler::ResizeMethod fMethod;
|
||||
SkString fName;
|
||||
SkBitmap fSrc, fDst;
|
||||
|
||||
public:
|
||||
PixmapScalerBench(SkBitmapScaler::ResizeMethod method, const char suffix[]) : fMethod(method) {
|
||||
fName.printf("pixmapscaler_%s", suffix);
|
||||
}
|
||||
|
||||
protected:
|
||||
const char* onGetName() override {
|
||||
return fName.c_str();
|
||||
}
|
||||
|
||||
SkIPoint onGetSize() override { return{ 100, 100 }; }
|
||||
|
||||
bool isSuitableFor(Backend backend) override {
|
||||
return backend == kNonRendering_Backend;
|
||||
}
|
||||
|
||||
void onDelayedSetup() override {
|
||||
fSrc.allocN32Pixels(640, 480);
|
||||
fSrc.eraseColor(SK_ColorWHITE);
|
||||
fDst.allocN32Pixels(300, 250);
|
||||
}
|
||||
|
||||
void onDraw(int loops, SkCanvas*) override {
|
||||
SkPixmap src, dst;
|
||||
fSrc.peekPixels(&src);
|
||||
fDst.peekPixels(&dst);
|
||||
for (int i = 0; i < loops * 16; i++) {
|
||||
SkBitmapScaler::Resize(dst, src, fMethod);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
typedef Benchmark INHERITED;
|
||||
};
|
||||
DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_LANCZOS3, "lanczos"); )
|
||||
DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_MITCHELL, "mitchell"); )
|
||||
DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_HAMMING, "hamming"); )
|
||||
DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_TRIANGLE, "triangle"); )
|
||||
DEF_BENCH( return new PixmapScalerBench(SkBitmapScaler::RESIZE_BOX, "box"); )
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include "Resources.h"
|
||||
#include "SkBitmapProcState.h"
|
||||
#include "SkBitmapScaler.h"
|
||||
#include "SkGradientShader.h"
|
||||
#include "SkImageEncoder.h"
|
||||
#include "SkStream.h"
|
||||
@ -75,7 +74,7 @@ protected:
|
||||
}
|
||||
|
||||
SkISize onISize() override {
|
||||
return SkISize::Make(1024, 768);
|
||||
return SkISize::Make(680, 130);
|
||||
}
|
||||
|
||||
void onDraw(SkCanvas* canvas) override {
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include "sk_tool_utils.h"
|
||||
|
||||
#include "Resources.h"
|
||||
#include "SkBitmapScaler.h"
|
||||
#include "SkGradientShader.h"
|
||||
#include "SkTypeface.h"
|
||||
#include "SkStream.h"
|
||||
@ -114,9 +113,7 @@ protected:
|
||||
return str;
|
||||
}
|
||||
|
||||
SkISize onISize() override {
|
||||
return { 824, 862 };
|
||||
}
|
||||
SkISize onISize() override { return { 150, 862 }; }
|
||||
|
||||
static void DrawAndFrame(SkCanvas* canvas, const SkBitmap& orig, SkScalar x, SkScalar y) {
|
||||
SkBitmap bm;
|
||||
@ -169,25 +166,6 @@ protected:
|
||||
bm.installPixels(curr);
|
||||
return bm;
|
||||
});
|
||||
|
||||
const SkBitmapScaler::ResizeMethod methods[] = {
|
||||
SkBitmapScaler::RESIZE_BOX,
|
||||
SkBitmapScaler::RESIZE_TRIANGLE,
|
||||
SkBitmapScaler::RESIZE_LANCZOS3,
|
||||
SkBitmapScaler::RESIZE_HAMMING,
|
||||
SkBitmapScaler::RESIZE_MITCHELL,
|
||||
};
|
||||
|
||||
SkPixmap basePM;
|
||||
orig.peekPixels(&basePM);
|
||||
for (auto method : methods) {
|
||||
canvas->translate(orig.width()/2 + 8.0f, 0);
|
||||
drawLevels(canvas, orig, [method](const SkPixmap& prev, const SkPixmap& curr) {
|
||||
SkBitmap bm;
|
||||
SkBitmapScaler::Resize(&bm, prev, method, curr.width(), curr.height());
|
||||
return bm;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void onOnceBeforeDraw() override {
|
||||
|
@ -17,7 +17,6 @@ bench_sources = [
|
||||
"$_bench/BitmapBench.cpp",
|
||||
"$_bench/BitmapRectBench.cpp",
|
||||
"$_bench/BitmapRegionDecoderBench.cpp",
|
||||
"$_bench/BitmapScaleBench.cpp",
|
||||
"$_bench/BlendmodeBench.cpp",
|
||||
"$_bench/BlurBench.cpp",
|
||||
"$_bench/BlurImageFilterBench.cpp",
|
||||
|
@ -31,7 +31,6 @@ skia_core_sources = [
|
||||
"$_src/core/SkBitmapController.cpp",
|
||||
"$_src/core/SkBitmapDevice.cpp",
|
||||
"$_src/core/SkBitmapDevice.h",
|
||||
"$_src/core/SkBitmapFilter.h",
|
||||
"$_src/core/SkBitmapProcState.cpp",
|
||||
"$_src/core/SkBitmapProcState.h",
|
||||
"$_src/core/SkBitmapProcState_filter.h",
|
||||
@ -44,8 +43,6 @@ skia_core_sources = [
|
||||
"$_src/core/SkBitmapProcState_utils.h",
|
||||
"$_src/core/SkBitmapProvider.cpp",
|
||||
"$_src/core/SkBitmapProvider.h",
|
||||
"$_src/core/SkBitmapScaler.h",
|
||||
"$_src/core/SkBitmapScaler.cpp",
|
||||
"$_src/core/SkBlendMode.cpp",
|
||||
"$_src/core/SkBlitBWMaskTemplate.h",
|
||||
"$_src/core/SkBlitMask.h",
|
||||
@ -89,8 +86,6 @@ skia_core_sources = [
|
||||
"$_src/core/SkColorTable.cpp",
|
||||
"$_src/core/SkConvertPixels.cpp",
|
||||
"$_src/core/SkConvertPixels.h",
|
||||
"$_src/core/SkConvolver.cpp",
|
||||
"$_src/core/SkConvolver.h",
|
||||
"$_src/core/SkCoreBlitters.h",
|
||||
"$_src/core/SkCpu.cpp",
|
||||
"$_src/core/SkCpu.h",
|
||||
|
@ -294,8 +294,7 @@ with open('Android.bp', 'w') as f:
|
||||
defs['ssse3'] +
|
||||
defs['sse41'] +
|
||||
defs['sse42'] +
|
||||
defs['avx' ] +
|
||||
defs['hsw' ])),
|
||||
defs['avx' ])),
|
||||
|
||||
'tool_cflags' : bpfmt(8, tool_cflags),
|
||||
'tool_shared_libs' : bpfmt(8, tool_shared_libs),
|
||||
|
@ -51,4 +51,3 @@ ssse3 = [
|
||||
sse41 = [ "$_src/opts/SkOpts_sse41.cpp" ]
|
||||
sse42 = [ "$_src/opts/SkOpts_sse42.cpp" ]
|
||||
avx = [ "$_src/opts/SkOpts_avx.cpp" ]
|
||||
hsw = [ "$_src/opts/SkOpts_hsw.cpp" ]
|
||||
|
@ -24,7 +24,7 @@ skia_opts = {
|
||||
sse41_sources = sse41
|
||||
sse42_sources = sse42
|
||||
avx_sources = avx
|
||||
hsw_sources = hsw
|
||||
hsw_sources = [] # remove after we update Chrome
|
||||
}
|
||||
|
||||
# Skia Chromium defines. These flags will be defined in chromium If these
|
||||
|
@ -6,15 +6,13 @@
|
||||
*/
|
||||
|
||||
#include "SkBitmap.h"
|
||||
#include "SkBitmapCache.h"
|
||||
#include "SkBitmapController.h"
|
||||
#include "SkBitmapProvider.h"
|
||||
#include "SkMatrix.h"
|
||||
#include "SkPixelRef.h"
|
||||
#include "SkMipMap.h"
|
||||
#include "SkTemplates.h"
|
||||
|
||||
// RESIZE_LANCZOS3 is another good option, but chrome prefers mitchell at the moment
|
||||
#define kHQ_RESIZE_METHOD SkBitmapScaler::RESIZE_MITCHELL
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
SkBitmapController::State* SkBitmapController::requestBitmap(const SkBitmapProvider& provider,
|
||||
@ -33,70 +31,24 @@ SkBitmapController::State* SkBitmapController::requestBitmap(const SkBitmapProvi
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include "SkBitmapCache.h"
|
||||
#include "SkBitmapScaler.h"
|
||||
#include "SkMipMap.h"
|
||||
#include "SkResourceCache.h"
|
||||
|
||||
class SkDefaultBitmapControllerState : public SkBitmapController::State {
|
||||
public:
|
||||
SkDefaultBitmapControllerState(const SkBitmapProvider&,
|
||||
const SkMatrix& inv,
|
||||
SkFilterQuality,
|
||||
bool canShadeHQ);
|
||||
SkDefaultBitmapControllerState(const SkBitmapProvider&, const SkMatrix& inv, SkFilterQuality);
|
||||
|
||||
private:
|
||||
SkBitmap fResultBitmap;
|
||||
sk_sp<const SkMipMap> fCurrMip;
|
||||
bool fCanShadeHQ;
|
||||
SkBitmap fResultBitmap;
|
||||
sk_sp<const SkMipMap> fCurrMip;
|
||||
|
||||
bool processHQRequest(const SkBitmapProvider&);
|
||||
bool processHighRequest(const SkBitmapProvider&);
|
||||
bool processMediumRequest(const SkBitmapProvider&);
|
||||
};
|
||||
|
||||
// Check to see that the size of the bitmap that would be produced by
|
||||
// scaling by the given inverted matrix is less than the maximum allowed.
|
||||
static inline bool cache_size_okay(const SkBitmapProvider& provider, const SkMatrix& invMat) {
|
||||
size_t maximumAllocation = SkResourceCache::GetEffectiveSingleAllocationByteLimit();
|
||||
if (0 == maximumAllocation) {
|
||||
return true;
|
||||
}
|
||||
// float matrixScaleFactor = 1.0 / (invMat.scaleX * invMat.scaleY);
|
||||
// return ((origBitmapSize * matrixScaleFactor) < maximumAllocationSize);
|
||||
// Skip the division step:
|
||||
const size_t size = provider.info().getSafeSize(provider.info().minRowBytes());
|
||||
SkScalar invScaleSqr = invMat.getScaleX() * invMat.getScaleY();
|
||||
return size < (maximumAllocation * SkScalarAbs(invScaleSqr));
|
||||
}
|
||||
|
||||
/*
|
||||
* High quality is implemented by performing up-right scale-only filtering and then
|
||||
* using bilerp for any remaining transformations.
|
||||
*/
|
||||
bool SkDefaultBitmapControllerState::processHQRequest(const SkBitmapProvider& provider) {
|
||||
bool SkDefaultBitmapControllerState::processHighRequest(const SkBitmapProvider& provider) {
|
||||
if (fQuality != kHigh_SkFilterQuality) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Our default return state is to downgrade the request to Medium, w/ or w/o setting fBitmap
|
||||
// to a valid bitmap. If we succeed, we will set this to Low instead.
|
||||
fQuality = kMedium_SkFilterQuality;
|
||||
#ifdef SK_USE_MIP_FOR_DOWNSCALE_HQ
|
||||
return false;
|
||||
#endif
|
||||
|
||||
bool supported = false;
|
||||
switch (provider.info().colorType()) {
|
||||
case kRGBA_8888_SkColorType:
|
||||
case kBGRA_8888_SkColorType:
|
||||
supported = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (!supported || !cache_size_okay(provider, fInvMatrix) || fInvMatrix.hasPerspective()) {
|
||||
return false; // can't handle the reqeust
|
||||
}
|
||||
|
||||
SkScalar invScaleX = fInvMatrix.getScaleX();
|
||||
SkScalar invScaleY = fInvMatrix.getScaleY();
|
||||
@ -111,68 +63,14 @@ bool SkDefaultBitmapControllerState::processHQRequest(const SkBitmapProvider& pr
|
||||
invScaleX = SkScalarAbs(invScaleX);
|
||||
invScaleY = SkScalarAbs(invScaleY);
|
||||
|
||||
if (SkScalarNearlyEqual(invScaleX, 1) && SkScalarNearlyEqual(invScaleY, 1)) {
|
||||
return false; // no need for HQ
|
||||
if (invScaleX >= 1 - SK_ScalarNearlyZero || invScaleY >= 1 - SK_ScalarNearlyZero) {
|
||||
// we're down-scaling so abort HQ
|
||||
return false;
|
||||
}
|
||||
|
||||
if (invScaleX > 1 || invScaleY > 1) {
|
||||
return false; // only use HQ when upsampling
|
||||
}
|
||||
|
||||
// If the shader can natively handle HQ filtering, let it do it.
|
||||
if (fCanShadeHQ) {
|
||||
fQuality = kHigh_SkFilterQuality;
|
||||
SkAssertResult(provider.asBitmap(&fResultBitmap));
|
||||
return true;
|
||||
}
|
||||
|
||||
const int dstW = SkScalarRoundToScalar(provider.width() / invScaleX);
|
||||
const int dstH = SkScalarRoundToScalar(provider.height() / invScaleY);
|
||||
const SkBitmapCacheDesc desc = provider.makeCacheDesc(dstW, dstH);
|
||||
|
||||
if (!SkBitmapCache::Find(desc, &fResultBitmap)) {
|
||||
SkBitmap orig;
|
||||
if (!provider.asBitmap(&orig)) {
|
||||
return false;
|
||||
}
|
||||
SkPixmap src;
|
||||
if (!orig.peekPixels(&src)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SkPixmap dst;
|
||||
SkBitmapCache::RecPtr rec;
|
||||
const SkImageInfo info = SkImageInfo::Make(desc.fScaledWidth, desc.fScaledHeight,
|
||||
src.colorType(), src.alphaType());
|
||||
if (provider.isVolatile()) {
|
||||
if (!fResultBitmap.tryAllocPixels(info)) {
|
||||
return false;
|
||||
}
|
||||
SkASSERT(fResultBitmap.getPixels());
|
||||
fResultBitmap.peekPixels(&dst);
|
||||
fResultBitmap.setImmutable(); // a little cheat, as we haven't resized yet, but ok
|
||||
} else {
|
||||
rec = SkBitmapCache::Alloc(desc, info, &dst);
|
||||
if (!rec) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!SkBitmapScaler::Resize(dst, src, kHQ_RESIZE_METHOD)) {
|
||||
return false; // we failed to create fScaledBitmap
|
||||
}
|
||||
if (rec) {
|
||||
SkBitmapCache::Add(std::move(rec), &fResultBitmap);
|
||||
SkASSERT(fResultBitmap.getPixels());
|
||||
provider.notifyAddedToCache();
|
||||
}
|
||||
}
|
||||
|
||||
SkASSERT(fResultBitmap.getPixels());
|
||||
SkASSERT(fResultBitmap.isImmutable());
|
||||
|
||||
fInvMatrix.postScale(SkIntToScalar(dstW) / provider.width(),
|
||||
SkIntToScalar(dstH) / provider.height());
|
||||
fQuality = kLow_SkFilterQuality;
|
||||
// Confirmed that we can use HQ (w/ rasterpipeline)
|
||||
fQuality = kHigh_SkFilterQuality;
|
||||
(void)provider.asBitmap(&fResultBitmap);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -235,20 +133,15 @@ bool SkDefaultBitmapControllerState::processMediumRequest(const SkBitmapProvider
|
||||
|
||||
SkDefaultBitmapControllerState::SkDefaultBitmapControllerState(const SkBitmapProvider& provider,
|
||||
const SkMatrix& inv,
|
||||
SkFilterQuality qual,
|
||||
bool canShadeHQ) {
|
||||
SkFilterQuality qual) {
|
||||
fInvMatrix = inv;
|
||||
fQuality = qual;
|
||||
fCanShadeHQ = canShadeHQ;
|
||||
|
||||
bool processed = this->processHQRequest(provider) || this->processMediumRequest(provider);
|
||||
|
||||
if (processed) {
|
||||
if (this->processHighRequest(provider) || this->processMediumRequest(provider)) {
|
||||
SkASSERT(fResultBitmap.getPixels());
|
||||
} else {
|
||||
(void)provider.asBitmap(&fResultBitmap);
|
||||
}
|
||||
SkASSERT(fCanShadeHQ || fQuality <= kLow_SkFilterQuality);
|
||||
|
||||
// fResultBitmap.getPixels() may be null, but our caller knows to check fPixmap.addr()
|
||||
// and will destroy us if it is nullptr.
|
||||
@ -259,6 +152,5 @@ SkBitmapController::State* SkDefaultBitmapController::onRequestBitmap(const SkBi
|
||||
const SkMatrix& inverse,
|
||||
SkFilterQuality quality,
|
||||
void* storage, size_t size) {
|
||||
return SkInPlaceNewCheck<SkDefaultBitmapControllerState>(storage, size,
|
||||
bm, inverse, quality, fCanShadeHQ);
|
||||
return SkInPlaceNewCheck<SkDefaultBitmapControllerState>(storage, size, bm, inverse, quality);
|
||||
}
|
||||
|
@ -57,14 +57,11 @@ protected:
|
||||
|
||||
class SkDefaultBitmapController : public SkBitmapController {
|
||||
public:
|
||||
enum class CanShadeHQ { kNo, kYes };
|
||||
SkDefaultBitmapController(CanShadeHQ canShadeHQ)
|
||||
: fCanShadeHQ(canShadeHQ == CanShadeHQ::kYes) {}
|
||||
SkDefaultBitmapController() {}
|
||||
|
||||
protected:
|
||||
State* onRequestBitmap(const SkBitmapProvider&, const SkMatrix& inverse, SkFilterQuality,
|
||||
void* storage, size_t storageSize) override;
|
||||
bool fCanShadeHQ;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,209 +0,0 @@
|
||||
/*
|
||||
* Copyright 2013 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SkBitmapFilter_DEFINED
|
||||
#define SkBitmapFilter_DEFINED
|
||||
|
||||
#include "SkFixed.h"
|
||||
#include "SkMath.h"
|
||||
#include "SkScalar.h"
|
||||
|
||||
#include "SkNx.h"
|
||||
|
||||
// size of the precomputed bitmap filter tables for high quality filtering.
|
||||
// Used to precompute the shape of the filter kernel.
|
||||
// Table size chosen from experiments to see where I could start to see a difference.
|
||||
|
||||
#define SKBITMAP_FILTER_TABLE_SIZE 128
|
||||
|
||||
class SkBitmapFilter {
|
||||
public:
|
||||
SkBitmapFilter(float width) : fWidth(width), fInvWidth(1.f/width) {
|
||||
fPrecomputed = false;
|
||||
fLookupMultiplier = this->invWidth() * (SKBITMAP_FILTER_TABLE_SIZE-1);
|
||||
}
|
||||
virtual ~SkBitmapFilter() {}
|
||||
|
||||
SkScalar lookupScalar(float x) const {
|
||||
if (!fPrecomputed) {
|
||||
precomputeTable();
|
||||
}
|
||||
int filter_idx = int(sk_float_abs(x * fLookupMultiplier));
|
||||
SkASSERT(filter_idx < SKBITMAP_FILTER_TABLE_SIZE);
|
||||
return fFilterTableScalar[filter_idx];
|
||||
}
|
||||
|
||||
float width() const { return fWidth; }
|
||||
float invWidth() const { return fInvWidth; }
|
||||
virtual float evaluate(float x) const = 0;
|
||||
|
||||
virtual float evaluate_n(float val, float diff, int count, float* output) const {
|
||||
float sum = 0;
|
||||
for (int index = 0; index < count; index++) {
|
||||
float filterValue = evaluate(val);
|
||||
*output++ = filterValue;
|
||||
sum += filterValue;
|
||||
val += diff;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
protected:
|
||||
float fWidth;
|
||||
float fInvWidth;
|
||||
float fLookupMultiplier;
|
||||
|
||||
mutable bool fPrecomputed;
|
||||
mutable SkScalar fFilterTableScalar[SKBITMAP_FILTER_TABLE_SIZE];
|
||||
|
||||
private:
|
||||
void precomputeTable() const {
|
||||
fPrecomputed = true;
|
||||
SkScalar *ftpScalar = fFilterTableScalar;
|
||||
for (int x = 0; x < SKBITMAP_FILTER_TABLE_SIZE; ++x) {
|
||||
float fx = ((float)x + .5f) * this->width() / SKBITMAP_FILTER_TABLE_SIZE;
|
||||
float filter_value = evaluate(fx);
|
||||
*ftpScalar++ = filter_value;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class SkMitchellFilter final : public SkBitmapFilter {
|
||||
public:
|
||||
SkMitchellFilter()
|
||||
: INHERITED(2)
|
||||
, fB(1.f / 3.f)
|
||||
, fC(1.f / 3.f)
|
||||
, fA1(-fB - 6*fC)
|
||||
, fB1(6*fB + 30*fC)
|
||||
, fC1(-12*fB - 48*fC)
|
||||
, fD1(8*fB + 24*fC)
|
||||
, fA2(12 - 9*fB - 6*fC)
|
||||
, fB2(-18 + 12*fB + 6*fC)
|
||||
, fD2(6 - 2*fB)
|
||||
{}
|
||||
|
||||
float evaluate(float x) const override {
|
||||
x = fabsf(x);
|
||||
if (x > 2.f) {
|
||||
return 0;
|
||||
} else if (x > 1.f) {
|
||||
return (((fA1 * x + fB1) * x + fC1) * x + fD1) * (1.f/6.f);
|
||||
} else {
|
||||
return ((fA2 * x + fB2) * x*x + fD2) * (1.f/6.f);
|
||||
}
|
||||
}
|
||||
|
||||
Sk4f evalcore_n(const Sk4f& val) const {
|
||||
Sk4f x = val.abs();
|
||||
Sk4f over2 = x > Sk4f(2);
|
||||
Sk4f over1 = x > Sk4f(1);
|
||||
Sk4f poly1 = (((Sk4f(fA1) * x + Sk4f(fB1)) * x + Sk4f(fC1)) * x + Sk4f(fD1))
|
||||
* Sk4f(1.f/6.f);
|
||||
Sk4f poly0 = ((Sk4f(fA2) * x + Sk4f(fB2)) * x*x + Sk4f(fD2)) * Sk4f(1.f/6.f);
|
||||
return over2.thenElse(Sk4f(0), over1.thenElse(poly1, poly0));
|
||||
}
|
||||
|
||||
float evaluate_n(float val, float diff, int count, float* output) const override {
|
||||
Sk4f sum(0);
|
||||
while (count >= 4) {
|
||||
float v0 = val;
|
||||
float v1 = val += diff;
|
||||
float v2 = val += diff;
|
||||
float v3 = val += diff;
|
||||
val += diff;
|
||||
Sk4f filterValue = evalcore_n(Sk4f(v0, v1, v2, v3));
|
||||
filterValue.store(output);
|
||||
output += 4;
|
||||
sum = sum + filterValue;
|
||||
count -= 4;
|
||||
}
|
||||
float sums[4];
|
||||
sum.store(sums);
|
||||
float result = sums[0] + sums[1] + sums[2] + sums[3];
|
||||
result += INHERITED::evaluate_n(val, diff, count, output);
|
||||
return result;
|
||||
}
|
||||
|
||||
protected:
|
||||
float fB, fC;
|
||||
float fA1, fB1, fC1, fD1;
|
||||
float fA2, fB2, fD2;
|
||||
private:
|
||||
typedef SkBitmapFilter INHERITED;
|
||||
};
|
||||
|
||||
class SkGaussianFilter final : public SkBitmapFilter {
|
||||
float fAlpha, fExpWidth;
|
||||
|
||||
public:
|
||||
SkGaussianFilter(float a, float width = 2)
|
||||
: SkBitmapFilter(width)
|
||||
, fAlpha(a)
|
||||
, fExpWidth(expf(-a * width * width))
|
||||
{}
|
||||
|
||||
float evaluate(float x) const override {
|
||||
return SkTMax(0.f, float(expf(-fAlpha*x*x) - fExpWidth));
|
||||
}
|
||||
};
|
||||
|
||||
class SkTriangleFilter final : public SkBitmapFilter {
|
||||
public:
|
||||
SkTriangleFilter(float width = 1) : SkBitmapFilter(width) {}
|
||||
|
||||
float evaluate(float x) const override {
|
||||
return SkTMax(0.f, fWidth - fabsf(x));
|
||||
}
|
||||
};
|
||||
|
||||
class SkBoxFilter final : public SkBitmapFilter {
|
||||
public:
|
||||
SkBoxFilter(float width = 0.5f) : SkBitmapFilter(width) {}
|
||||
|
||||
float evaluate(float x) const override {
|
||||
return (x >= -fWidth && x < fWidth) ? 1.0f : 0.0f;
|
||||
}
|
||||
};
|
||||
|
||||
class SkHammingFilter final : public SkBitmapFilter {
|
||||
public:
|
||||
SkHammingFilter(float width = 1) : SkBitmapFilter(width) {}
|
||||
|
||||
float evaluate(float x) const override {
|
||||
if (x <= -fWidth || x >= fWidth) {
|
||||
return 0.0f; // Outside of the window.
|
||||
}
|
||||
if (x > -FLT_EPSILON && x < FLT_EPSILON) {
|
||||
return 1.0f; // Special case the sinc discontinuity at the origin.
|
||||
}
|
||||
const float xpi = x * static_cast<float>(SK_ScalarPI);
|
||||
|
||||
return ((sk_float_sin(xpi) / xpi) * // sinc(x)
|
||||
(0.54f + 0.46f * sk_float_cos(xpi / fWidth))); // hamming(x)
|
||||
}
|
||||
};
|
||||
|
||||
class SkLanczosFilter final : public SkBitmapFilter {
|
||||
public:
|
||||
SkLanczosFilter(float width = 3.f) : SkBitmapFilter(width) {}
|
||||
|
||||
float evaluate(float x) const override {
|
||||
if (x <= -fWidth || x >= fWidth) {
|
||||
return 0.0f; // Outside of the window.
|
||||
}
|
||||
if (x > -FLT_EPSILON && x < FLT_EPSILON) {
|
||||
return 1.0f; // Special case the discontinuity at the origin.
|
||||
}
|
||||
float xpi = x * static_cast<float>(SK_ScalarPI);
|
||||
return (sk_float_sin(xpi) / xpi) * // sinc(x)
|
||||
sk_float_sin(xpi / fWidth) / (xpi / fWidth); // sinc(x/fWidth)
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif
|
@ -12,7 +12,6 @@
|
||||
#include "SkPaint.h"
|
||||
#include "SkShader.h" // for tilemodes
|
||||
#include "SkUtilsArm.h"
|
||||
#include "SkBitmapScaler.h"
|
||||
#include "SkMipMap.h"
|
||||
#include "SkPixelRef.h"
|
||||
#include "SkImageEncoder.h"
|
||||
@ -90,7 +89,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) {
|
||||
fInvMatrix = inv;
|
||||
fFilterQuality = paint.getFilterQuality();
|
||||
|
||||
SkDefaultBitmapController controller(SkDefaultBitmapController::CanShadeHQ::kNo);
|
||||
SkDefaultBitmapController controller;
|
||||
fBMState = controller.requestBitmap(fProvider, inv, paint.getFilterQuality(),
|
||||
fBMStateStorage.get(), fBMStateStorage.size());
|
||||
// Note : we allow the controller to return an empty (zero-dimension) result. Should we?
|
||||
@ -102,6 +101,7 @@ bool SkBitmapProcInfo::init(const SkMatrix& inv, const SkPaint& paint) {
|
||||
fRealInvMatrix = fBMState->invMatrix();
|
||||
fPaintColor = paint.getColor();
|
||||
fFilterQuality = fBMState->quality();
|
||||
SkASSERT(fFilterQuality <= kLow_SkFilterQuality);
|
||||
SkASSERT(fPixmap.addr());
|
||||
|
||||
// Most of the scanline procs deal with "unit" texture coordinates, as this
|
||||
|
@ -10,8 +10,8 @@
|
||||
|
||||
#include "SkBitmap.h"
|
||||
#include "SkBitmapController.h"
|
||||
#include "SkBitmapFilter.h"
|
||||
#include "SkBitmapProvider.h"
|
||||
#include "SkFixed.h"
|
||||
#include "SkFloatBits.h"
|
||||
#include "SkMatrix.h"
|
||||
#include "SkMipMap.h"
|
||||
|
@ -1,254 +0,0 @@
|
||||
/*
|
||||
* Copyright 2015 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "SkBitmapScaler.h"
|
||||
#include "SkBitmapFilter.h"
|
||||
#include "SkConvolver.h"
|
||||
#include "SkImageInfo.h"
|
||||
#include "SkPixmap.h"
|
||||
#include "SkRect.h"
|
||||
#include "SkTArray.h"
|
||||
|
||||
// SkResizeFilter ----------------------------------------------------------------
|
||||
|
||||
// Encapsulates computation and storage of the filters required for one complete
|
||||
// resize operation.
|
||||
class SkResizeFilter {
|
||||
public:
|
||||
SkResizeFilter(SkBitmapScaler::ResizeMethod method,
|
||||
int srcFullWidth, int srcFullHeight,
|
||||
float destWidth, float destHeight,
|
||||
const SkRect& destSubset);
|
||||
~SkResizeFilter() { delete fBitmapFilter; }
|
||||
|
||||
// Returns the filled filter values.
|
||||
const SkConvolutionFilter1D& xFilter() { return fXFilter; }
|
||||
const SkConvolutionFilter1D& yFilter() { return fYFilter; }
|
||||
|
||||
private:
|
||||
|
||||
SkBitmapFilter* fBitmapFilter;
|
||||
|
||||
// Computes one set of filters either horizontally or vertically. The caller
|
||||
// will specify the "min" and "max" rather than the bottom/top and
|
||||
// right/bottom so that the same code can be re-used in each dimension.
|
||||
//
|
||||
// |srcDependLo| and |srcDependSize| gives the range for the source
|
||||
// depend rectangle (horizontally or vertically at the caller's discretion
|
||||
// -- see above for what this means).
|
||||
//
|
||||
// Likewise, the range of destination values to compute and the scale factor
|
||||
// for the transform is also specified.
|
||||
|
||||
void computeFilters(int srcSize,
|
||||
float destSubsetLo, float destSubsetSize,
|
||||
float scale,
|
||||
SkConvolutionFilter1D* output);
|
||||
|
||||
SkConvolutionFilter1D fXFilter;
|
||||
SkConvolutionFilter1D fYFilter;
|
||||
};
|
||||
|
||||
SkResizeFilter::SkResizeFilter(SkBitmapScaler::ResizeMethod method,
|
||||
int srcFullWidth, int srcFullHeight,
|
||||
float destWidth, float destHeight,
|
||||
const SkRect& destSubset) {
|
||||
|
||||
SkASSERT(method >= SkBitmapScaler::RESIZE_FirstMethod &&
|
||||
method <= SkBitmapScaler::RESIZE_LastMethod);
|
||||
|
||||
fBitmapFilter = nullptr;
|
||||
switch(method) {
|
||||
case SkBitmapScaler::RESIZE_BOX:
|
||||
fBitmapFilter = new SkBoxFilter;
|
||||
break;
|
||||
case SkBitmapScaler::RESIZE_TRIANGLE:
|
||||
fBitmapFilter = new SkTriangleFilter;
|
||||
break;
|
||||
case SkBitmapScaler::RESIZE_MITCHELL:
|
||||
fBitmapFilter = new SkMitchellFilter;
|
||||
break;
|
||||
case SkBitmapScaler::RESIZE_HAMMING:
|
||||
fBitmapFilter = new SkHammingFilter;
|
||||
break;
|
||||
case SkBitmapScaler::RESIZE_LANCZOS3:
|
||||
fBitmapFilter = new SkLanczosFilter;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
float scaleX = destWidth / srcFullWidth;
|
||||
float scaleY = destHeight / srcFullHeight;
|
||||
|
||||
this->computeFilters(srcFullWidth, destSubset.fLeft, destSubset.width(),
|
||||
scaleX, &fXFilter);
|
||||
if (srcFullWidth == srcFullHeight &&
|
||||
destSubset.fLeft == destSubset.fTop &&
|
||||
destSubset.width() == destSubset.height()&&
|
||||
scaleX == scaleY) {
|
||||
fYFilter = fXFilter;
|
||||
} else {
|
||||
this->computeFilters(srcFullHeight, destSubset.fTop, destSubset.height(),
|
||||
scaleY, &fYFilter);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(egouriou): Take advantage of periods in the convolution.
|
||||
// Practical resizing filters are periodic outside of the border area.
|
||||
// For Lanczos, a scaling by a (reduced) factor of p/q (q pixels in the
|
||||
// source become p pixels in the destination) will have a period of p.
|
||||
// A nice consequence is a period of 1 when downscaling by an integral
|
||||
// factor. Downscaling from typical display resolutions is also bound
|
||||
// to produce interesting periods as those are chosen to have multiple
|
||||
// small factors.
|
||||
// Small periods reduce computational load and improve cache usage if
|
||||
// the coefficients can be shared. For periods of 1 we can consider
|
||||
// loading the factors only once outside the borders.
|
||||
void SkResizeFilter::computeFilters(int srcSize,
|
||||
float destSubsetLo, float destSubsetSize,
|
||||
float scale,
|
||||
SkConvolutionFilter1D* output) {
|
||||
float destSubsetHi = destSubsetLo + destSubsetSize; // [lo, hi)
|
||||
|
||||
// When we're doing a magnification, the scale will be larger than one. This
|
||||
// means the destination pixels are much smaller than the source pixels, and
|
||||
// that the range covered by the filter won't necessarily cover any source
|
||||
// pixel boundaries. Therefore, we use these clamped values (max of 1) for
|
||||
// some computations.
|
||||
float clampedScale = SkTMin(1.0f, scale);
|
||||
|
||||
// This is how many source pixels from the center we need to count
|
||||
// to support the filtering function.
|
||||
float srcSupport = fBitmapFilter->width() / clampedScale;
|
||||
|
||||
float invScale = 1.0f / scale;
|
||||
|
||||
SkSTArray<64, float, true> filterValuesArray;
|
||||
SkSTArray<64, SkConvolutionFilter1D::ConvolutionFixed, true> fixedFilterValuesArray;
|
||||
|
||||
// Loop over all pixels in the output range. We will generate one set of
|
||||
// filter values for each one. Those values will tell us how to blend the
|
||||
// source pixels to compute the destination pixel.
|
||||
|
||||
// This is the pixel in the source directly under the pixel in the dest.
|
||||
// Note that we base computations on the "center" of the pixels. To see
|
||||
// why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
|
||||
// downscale should "cover" the pixels around the pixel with *its center*
|
||||
// at coordinates (2.5, 2.5) in the source, not those around (0, 0).
|
||||
// Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
|
||||
destSubsetLo = SkScalarFloorToScalar(destSubsetLo);
|
||||
destSubsetHi = SkScalarCeilToScalar(destSubsetHi);
|
||||
float srcPixel = (destSubsetLo + 0.5f) * invScale;
|
||||
int destLimit = SkScalarTruncToInt(destSubsetHi - destSubsetLo);
|
||||
output->reserveAdditional(destLimit, SkScalarCeilToInt(destLimit * srcSupport * 2));
|
||||
for (int destI = 0; destI < destLimit; srcPixel += invScale, destI++) {
|
||||
// Compute the (inclusive) range of source pixels the filter covers.
|
||||
float srcBegin = SkTMax(0.f, SkScalarFloorToScalar(srcPixel - srcSupport));
|
||||
float srcEnd = SkTMin(srcSize - 1.f, SkScalarCeilToScalar(srcPixel + srcSupport));
|
||||
|
||||
// Compute the unnormalized filter value at each location of the source
|
||||
// it covers.
|
||||
|
||||
// Sum of the filter values for normalizing.
|
||||
// Distance from the center of the filter, this is the filter coordinate
|
||||
// in source space. We also need to consider the center of the pixel
|
||||
// when comparing distance against 'srcPixel'. In the 5x downscale
|
||||
// example used above the distance from the center of the filter to
|
||||
// the pixel with coordinates (2, 2) should be 0, because its center
|
||||
// is at (2.5, 2.5).
|
||||
float destFilterDist = (srcBegin + 0.5f - srcPixel) * clampedScale;
|
||||
int filterCount = SkScalarTruncToInt(srcEnd - srcBegin) + 1;
|
||||
if (filterCount <= 0) {
|
||||
// true when srcSize is equal to srcPixel - srcSupport; this may be a bug
|
||||
return;
|
||||
}
|
||||
filterValuesArray.reset(filterCount);
|
||||
float filterSum = fBitmapFilter->evaluate_n(destFilterDist, clampedScale, filterCount,
|
||||
filterValuesArray.begin());
|
||||
|
||||
// The filter must be normalized so that we don't affect the brightness of
|
||||
// the image. Convert to normalized fixed point.
|
||||
int fixedSum = 0;
|
||||
fixedFilterValuesArray.reset(filterCount);
|
||||
const float* filterValues = filterValuesArray.begin();
|
||||
SkConvolutionFilter1D::ConvolutionFixed* fixedFilterValues = fixedFilterValuesArray.begin();
|
||||
float invFilterSum = 1 / filterSum;
|
||||
for (int fixedI = 0; fixedI < filterCount; fixedI++) {
|
||||
int curFixed = SkConvolutionFilter1D::FloatToFixed(filterValues[fixedI] * invFilterSum);
|
||||
fixedSum += curFixed;
|
||||
fixedFilterValues[fixedI] = SkToS16(curFixed);
|
||||
}
|
||||
SkASSERT(fixedSum <= 0x7FFF);
|
||||
|
||||
// The conversion to fixed point will leave some rounding errors, which
|
||||
// we add back in to avoid affecting the brightness of the image. We
|
||||
// arbitrarily add this to the center of the filter array (this won't always
|
||||
// be the center of the filter function since it could get clipped on the
|
||||
// edges, but it doesn't matter enough to worry about that case).
|
||||
int leftovers = SkConvolutionFilter1D::FloatToFixed(1) - fixedSum;
|
||||
fixedFilterValues[filterCount / 2] += leftovers;
|
||||
|
||||
// Now it's ready to go.
|
||||
output->AddFilter(SkScalarFloorToInt(srcBegin), fixedFilterValues, filterCount);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
static bool valid_for_resize(const SkPixmap& source, int dstW, int dstH) {
|
||||
// TODO: Seems like we shouldn't care about the swizzle of source, just that it's 8888
|
||||
return source.addr() && source.colorType() == kN32_SkColorType &&
|
||||
source.width() >= 1 && source.height() >= 1 && dstW >= 1 && dstH >= 1;
|
||||
}
|
||||
|
||||
bool SkBitmapScaler::Resize(const SkPixmap& result, const SkPixmap& source, ResizeMethod method) {
|
||||
if (!valid_for_resize(source, result.width(), result.height())) {
|
||||
return false;
|
||||
}
|
||||
if (!result.addr() || result.colorType() != source.colorType()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SkRect destSubset = SkRect::MakeIWH(result.width(), result.height());
|
||||
|
||||
SkResizeFilter filter(method, source.width(), source.height(),
|
||||
result.width(), result.height(), destSubset);
|
||||
|
||||
// Get a subset encompassing this touched area. We construct the
|
||||
// offsets and row strides such that it looks like a new bitmap, while
|
||||
// referring to the old data.
|
||||
const uint8_t* sourceSubset = reinterpret_cast<const uint8_t*>(source.addr());
|
||||
|
||||
return BGRAConvolve2D(sourceSubset, static_cast<int>(source.rowBytes()),
|
||||
!source.isOpaque(), filter.xFilter(), filter.yFilter(),
|
||||
static_cast<int>(result.rowBytes()),
|
||||
static_cast<unsigned char*>(result.writable_addr()));
|
||||
}
|
||||
|
||||
bool SkBitmapScaler::Resize(SkBitmap* resultPtr, const SkPixmap& source, ResizeMethod method,
|
||||
int destWidth, int destHeight, SkBitmap::Allocator* allocator) {
|
||||
// Preflight some of the checks, to avoid allocating the result if we don't need it.
|
||||
if (!valid_for_resize(source, destWidth, destHeight)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SkBitmap result;
|
||||
// Note: pass along the profile information even thought this is no the right answer because
|
||||
// this could be scaling in sRGB.
|
||||
result.setInfo(SkImageInfo::MakeN32(destWidth, destHeight, source.alphaType(),
|
||||
sk_ref_sp(source.info().colorSpace())));
|
||||
result.allocPixels(allocator);
|
||||
|
||||
SkPixmap resultPM;
|
||||
if (!result.peekPixels(&resultPM) || !Resize(resultPM, source, method)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*resultPtr = result;
|
||||
SkASSERT(resultPtr->getPixels());
|
||||
return true;
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
/*
|
||||
* Copyright 2013 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SkBitmapScaler_DEFINED
|
||||
#define SkBitmapScaler_DEFINED
|
||||
|
||||
#include "SkBitmap.h"
|
||||
#include "SkConvolver.h"
|
||||
|
||||
/** \class SkBitmapScaler
|
||||
|
||||
Provides the interface for high quality image resampling.
|
||||
*/
|
||||
|
||||
class SK_API SkBitmapScaler {
|
||||
public:
|
||||
enum ResizeMethod {
|
||||
RESIZE_BOX,
|
||||
RESIZE_TRIANGLE,
|
||||
RESIZE_LANCZOS3,
|
||||
RESIZE_HAMMING,
|
||||
RESIZE_MITCHELL,
|
||||
|
||||
RESIZE_FirstMethod = RESIZE_BOX,
|
||||
RESIZE_LastMethod = RESIZE_MITCHELL,
|
||||
};
|
||||
|
||||
/**
|
||||
* Given already-allocated src and dst pixmaps, this will scale the src pixels using the
|
||||
* specified resize-method and write the results into the pixels pointed to by dst.
|
||||
*/
|
||||
static bool Resize(const SkPixmap& dst, const SkPixmap& src, ResizeMethod method);
|
||||
|
||||
/**
|
||||
* Helper function that manages allocating a bitmap to hold the dst pixels, and then calls
|
||||
* the pixmap version of Resize.
|
||||
*/
|
||||
static bool Resize(SkBitmap* result, const SkPixmap& src, ResizeMethod method,
|
||||
int dest_width, int dest_height, SkBitmap::Allocator* = nullptr);
|
||||
};
|
||||
|
||||
#endif
|
@ -798,21 +798,16 @@ bool SkBlitter::UseRasterPipelineBlitter(const SkPixmap& device, const SkPaint&
|
||||
if (paint.getColorFilter()) {
|
||||
return true;
|
||||
}
|
||||
#ifndef SK_SUPPORT_LEGACY_HQ_SCALER
|
||||
if (paint.getFilterQuality() == kHigh_SkFilterQuality) {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
// ... unless the blend mode is complicated enough.
|
||||
if (paint.getBlendMode() > SkBlendMode::kLastSeparableMode) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// ... or unless we have to deal with perspective.
|
||||
if (matrix.hasPerspective()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// ... or unless the shader is raster pipeline-only.
|
||||
if (paint.getShader() && as_SB(paint.getShader())->isRasterPipelineOnly()) {
|
||||
return true;
|
||||
|
@ -1,272 +0,0 @@
|
||||
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#include "SkConvolver.h"
|
||||
#include "SkOpts.h"
|
||||
#include "SkTArray.h"
|
||||
|
||||
namespace {
|
||||
// Stores a list of rows in a circular buffer. The usage is you write into it
|
||||
// by calling AdvanceRow. It will keep track of which row in the buffer it
|
||||
// should use next, and the total number of rows added.
|
||||
class CircularRowBuffer {
|
||||
public:
|
||||
// The number of pixels in each row is given in |sourceRowPixelWidth|.
|
||||
// The maximum number of rows needed in the buffer is |maxYFilterSize|
|
||||
// (we only need to store enough rows for the biggest filter).
|
||||
//
|
||||
// We use the |firstInputRow| to compute the coordinates of all of the
|
||||
// following rows returned by Advance().
|
||||
CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,
|
||||
int firstInputRow)
|
||||
: fRowByteWidth(destRowPixelWidth * 4),
|
||||
fNumRows(maxYFilterSize),
|
||||
fNextRow(0),
|
||||
fNextRowCoordinate(firstInputRow) {
|
||||
fBuffer.reset(fRowByteWidth * maxYFilterSize);
|
||||
fRowAddresses.reset(fNumRows);
|
||||
}
|
||||
|
||||
// Moves to the next row in the buffer, returning a pointer to the beginning
|
||||
// of it.
|
||||
unsigned char* advanceRow() {
|
||||
unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];
|
||||
fNextRowCoordinate++;
|
||||
|
||||
// Set the pointer to the next row to use, wrapping around if necessary.
|
||||
fNextRow++;
|
||||
if (fNextRow == fNumRows) {
|
||||
fNextRow = 0;
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
||||
// Returns a pointer to an "unrolled" array of rows. These rows will start
|
||||
// at the y coordinate placed into |*firstRowIndex| and will continue in
|
||||
// order for the maximum number of rows in this circular buffer.
|
||||
//
|
||||
// The |firstRowIndex_| may be negative. This means the circular buffer
|
||||
// starts before the top of the image (it hasn't been filled yet).
|
||||
unsigned char* const* GetRowAddresses(int* firstRowIndex) {
|
||||
// Example for a 4-element circular buffer holding coords 6-9.
|
||||
// Row 0 Coord 8
|
||||
// Row 1 Coord 9
|
||||
// Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10.
|
||||
// Row 3 Coord 7
|
||||
//
|
||||
// The "next" row is also the first (lowest) coordinate. This computation
|
||||
// may yield a negative value, but that's OK, the math will work out
|
||||
// since the user of this buffer will compute the offset relative
|
||||
// to the firstRowIndex and the negative rows will never be used.
|
||||
*firstRowIndex = fNextRowCoordinate - fNumRows;
|
||||
|
||||
int curRow = fNextRow;
|
||||
for (int i = 0; i < fNumRows; i++) {
|
||||
fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth];
|
||||
|
||||
// Advance to the next row, wrapping if necessary.
|
||||
curRow++;
|
||||
if (curRow == fNumRows) {
|
||||
curRow = 0;
|
||||
}
|
||||
}
|
||||
return &fRowAddresses[0];
|
||||
}
|
||||
|
||||
private:
|
||||
// The buffer storing the rows. They are packed, each one fRowByteWidth.
|
||||
SkTArray<unsigned char> fBuffer;
|
||||
|
||||
// Number of bytes per row in the |buffer|.
|
||||
int fRowByteWidth;
|
||||
|
||||
// The number of rows available in the buffer.
|
||||
int fNumRows;
|
||||
|
||||
// The next row index we should write into. This wraps around as the
|
||||
// circular buffer is used.
|
||||
int fNextRow;
|
||||
|
||||
// The y coordinate of the |fNextRow|. This is incremented each time a
|
||||
// new row is appended and does not wrap.
|
||||
int fNextRowCoordinate;
|
||||
|
||||
// Buffer used by GetRowAddresses().
|
||||
SkTArray<unsigned char*> fRowAddresses;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
// SkConvolutionFilter1D ---------------------------------------------------------
|
||||
|
||||
SkConvolutionFilter1D::SkConvolutionFilter1D()
|
||||
: fMaxFilter(0) {
|
||||
}
|
||||
|
||||
SkConvolutionFilter1D::~SkConvolutionFilter1D() {
|
||||
}
|
||||
|
||||
void SkConvolutionFilter1D::AddFilter(int filterOffset,
|
||||
const ConvolutionFixed* filterValues,
|
||||
int filterLength) {
|
||||
// It is common for leading/trailing filter values to be zeros. In such
|
||||
// cases it is beneficial to only store the central factors.
|
||||
// For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
|
||||
// a 1080p image this optimization gives a ~10% speed improvement.
|
||||
int filterSize = filterLength;
|
||||
int firstNonZero = 0;
|
||||
while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) {
|
||||
firstNonZero++;
|
||||
}
|
||||
|
||||
if (firstNonZero < filterLength) {
|
||||
// Here we have at least one non-zero factor.
|
||||
int lastNonZero = filterLength - 1;
|
||||
while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) {
|
||||
lastNonZero--;
|
||||
}
|
||||
|
||||
filterOffset += firstNonZero;
|
||||
filterLength = lastNonZero + 1 - firstNonZero;
|
||||
SkASSERT(filterLength > 0);
|
||||
|
||||
fFilterValues.append(filterLength, &filterValues[firstNonZero]);
|
||||
} else {
|
||||
// Here all the factors were zeroes.
|
||||
filterLength = 0;
|
||||
}
|
||||
|
||||
FilterInstance instance;
|
||||
|
||||
// We pushed filterLength elements onto fFilterValues
|
||||
instance.fDataLocation = (static_cast<int>(fFilterValues.count()) -
|
||||
filterLength);
|
||||
instance.fOffset = filterOffset;
|
||||
instance.fTrimmedLength = filterLength;
|
||||
instance.fLength = filterSize;
|
||||
fFilters.push(instance);
|
||||
|
||||
fMaxFilter = SkTMax(fMaxFilter, filterLength);
|
||||
}
|
||||
|
||||
const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter(
|
||||
int* specifiedFilterlength,
|
||||
int* filterOffset,
|
||||
int* filterLength) const {
|
||||
const FilterInstance& filter = fFilters[0];
|
||||
*filterOffset = filter.fOffset;
|
||||
*filterLength = filter.fTrimmedLength;
|
||||
*specifiedFilterlength = filter.fLength;
|
||||
if (filter.fTrimmedLength == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return &fFilterValues[filter.fDataLocation];
|
||||
}
|
||||
|
||||
bool BGRAConvolve2D(const unsigned char* sourceData,
|
||||
int sourceByteRowStride,
|
||||
bool sourceHasAlpha,
|
||||
const SkConvolutionFilter1D& filterX,
|
||||
const SkConvolutionFilter1D& filterY,
|
||||
int outputByteRowStride,
|
||||
unsigned char* output) {
|
||||
|
||||
int maxYFilterSize = filterY.maxFilter();
|
||||
|
||||
// The next row in the input that we will generate a horizontally
|
||||
// convolved row for. If the filter doesn't start at the beginning of the
|
||||
// image (this is the case when we are only resizing a subset), then we
|
||||
// don't want to generate any output rows before that. Compute the starting
|
||||
// row for convolution as the first pixel for the first vertical filter.
|
||||
int filterOffset, filterLength;
|
||||
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
|
||||
filterY.FilterForValue(0, &filterOffset, &filterLength);
|
||||
int nextXRow = filterOffset;
|
||||
|
||||
// We loop over each row in the input doing a horizontal convolution. This
|
||||
// will result in a horizontally convolved image. We write the results into
|
||||
// a circular buffer of convolved rows and do vertical convolution as rows
|
||||
// are available. This prevents us from having to store the entire
|
||||
// intermediate image and helps cache coherency.
|
||||
// We will need four extra rows to allow horizontal convolution could be done
|
||||
// simultaneously. We also pad each row in row buffer to be aligned-up to
|
||||
// 32 bytes.
|
||||
// TODO(jiesun): We do not use aligned load from row buffer in vertical
|
||||
// convolution pass yet. Somehow Windows does not like it.
|
||||
int rowBufferWidth = (filterX.numValues() + 31) & ~0x1F;
|
||||
int rowBufferHeight = maxYFilterSize +
|
||||
(SkOpts::convolve_4_rows_horizontally != nullptr ? 4 : 0);
|
||||
|
||||
// check for too-big allocation requests : crbug.com/528628
|
||||
{
|
||||
int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight);
|
||||
// need some limit, to avoid over-committing success from malloc, but then
|
||||
// crashing when we try to actually use the memory.
|
||||
// 100meg seems big enough to allow "normal" zoom factors and image sizes through
|
||||
// while avoiding the crash seen by the bug (crbug.com/528628)
|
||||
if (size > 100 * 1024 * 1024) {
|
||||
// SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
CircularRowBuffer rowBuffer(rowBufferWidth,
|
||||
rowBufferHeight,
|
||||
filterOffset);
|
||||
|
||||
// Loop over every possible output row, processing just enough horizontal
|
||||
// convolutions to run each subsequent vertical convolution.
|
||||
SkASSERT(outputByteRowStride >= filterX.numValues() * 4);
|
||||
int numOutputRows = filterY.numValues();
|
||||
|
||||
// We need to check which is the last line to convolve before we advance 4
|
||||
// lines in one iteration.
|
||||
int lastFilterOffset, lastFilterLength;
|
||||
filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,
|
||||
&lastFilterLength);
|
||||
|
||||
for (int outY = 0; outY < numOutputRows; outY++) {
|
||||
filterValues = filterY.FilterForValue(outY,
|
||||
&filterOffset, &filterLength);
|
||||
|
||||
// Generate output rows until we have enough to run the current filter.
|
||||
while (nextXRow < filterOffset + filterLength) {
|
||||
if (SkOpts::convolve_4_rows_horizontally != nullptr &&
|
||||
nextXRow + 3 < lastFilterOffset + lastFilterLength) {
|
||||
const unsigned char* src[4];
|
||||
unsigned char* outRow[4];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRowStride];
|
||||
outRow[i] = rowBuffer.advanceRow();
|
||||
}
|
||||
SkOpts::convolve_4_rows_horizontally(src, filterX, outRow, 4*rowBufferWidth);
|
||||
nextXRow += 4;
|
||||
} else {
|
||||
SkOpts::convolve_horizontally(
|
||||
&sourceData[(uint64_t)nextXRow * sourceByteRowStride],
|
||||
filterX, rowBuffer.advanceRow(), sourceHasAlpha);
|
||||
nextXRow++;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute where in the output image this row of final data will go.
|
||||
unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStride];
|
||||
|
||||
// Get the list of rows that the circular buffer has, in order.
|
||||
int firstRowInCircularBuffer;
|
||||
unsigned char* const* rowsToConvolve =
|
||||
rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);
|
||||
|
||||
// Now compute the start of the subset of those rows that the filter needs.
|
||||
unsigned char* const* firstRowForFilter =
|
||||
&rowsToConvolve[filterOffset - firstRowInCircularBuffer];
|
||||
|
||||
SkOpts::convolve_vertically(filterValues, filterLength,
|
||||
firstRowForFilter,
|
||||
filterX.numValues(), curOutputRow,
|
||||
sourceHasAlpha);
|
||||
}
|
||||
return true;
|
||||
}
|
@ -1,173 +0,0 @@
|
||||
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
#ifndef SK_CONVOLVER_H
|
||||
#define SK_CONVOLVER_H
|
||||
|
||||
#include "SkSize.h"
|
||||
#include "SkTDArray.h"
|
||||
|
||||
// avoid confusion with Mac OS X's math library (Carbon)
|
||||
#if defined(__APPLE__)
|
||||
#undef FloatToConvolutionFixed
|
||||
#undef ConvolutionFixedToFloat
|
||||
#undef FloatToFixed
|
||||
#undef FixedToFloat
|
||||
#endif
|
||||
|
||||
// Represents a filter in one dimension. Each output pixel has one entry in this
|
||||
// object for the filter values contributing to it. You build up the filter
|
||||
// list by calling AddFilter for each output pixel (in order).
|
||||
//
|
||||
// We do 2-dimensional convolution by first convolving each row by one
|
||||
// SkConvolutionFilter1D, then convolving each column by another one.
|
||||
//
|
||||
// Entries are stored in ConvolutionFixed point, shifted left by kShiftBits.
|
||||
class SkConvolutionFilter1D {
|
||||
public:
|
||||
typedef short ConvolutionFixed;
|
||||
|
||||
// The number of bits that ConvolutionFixed point values are shifted by.
|
||||
enum { kShiftBits = 14 };
|
||||
|
||||
SK_API SkConvolutionFilter1D();
|
||||
SK_API ~SkConvolutionFilter1D();
|
||||
|
||||
// Convert between floating point and our ConvolutionFixed point representation.
|
||||
static ConvolutionFixed FloatToFixed(float f) {
|
||||
return static_cast<ConvolutionFixed>(f * (1 << kShiftBits));
|
||||
}
|
||||
static unsigned char FixedToChar(ConvolutionFixed x) {
|
||||
return static_cast<unsigned char>(x >> kShiftBits);
|
||||
}
|
||||
static float FixedToFloat(ConvolutionFixed x) {
|
||||
// The cast relies on ConvolutionFixed being a short, implying that on
|
||||
// the platforms we care about all (16) bits will fit into
|
||||
// the mantissa of a (32-bit) float.
|
||||
static_assert(sizeof(ConvolutionFixed) == 2, "ConvolutionFixed_type_should_fit_in_float_mantissa");
|
||||
float raw = static_cast<float>(x);
|
||||
return ldexpf(raw, -kShiftBits);
|
||||
}
|
||||
|
||||
// Returns the maximum pixel span of a filter.
|
||||
int maxFilter() const { return fMaxFilter; }
|
||||
|
||||
// Returns the number of filters in this filter. This is the dimension of the
|
||||
// output image.
|
||||
int numValues() const { return static_cast<int>(fFilters.count()); }
|
||||
|
||||
void reserveAdditional(int filterCount, int filterValueCount) {
|
||||
fFilters.setReserve(fFilters.count() + filterCount);
|
||||
fFilterValues.setReserve(fFilterValues.count() + filterValueCount);
|
||||
}
|
||||
|
||||
// Appends the given list of scaling values for generating a given output
|
||||
// pixel. |filterOffset| is the distance from the edge of the image to where
|
||||
// the scaling factors start. The scaling factors apply to the source pixels
|
||||
// starting from this position, and going for the next |filterLength| pixels.
|
||||
//
|
||||
// You will probably want to make sure your input is normalized (that is,
|
||||
// all entries in |filterValuesg| sub to one) to prevent affecting the overall
|
||||
// brighness of the image.
|
||||
//
|
||||
// The filterLength must be > 0.
|
||||
void AddFilter(int filterOffset,
|
||||
const ConvolutionFixed* filterValues,
|
||||
int filterLength);
|
||||
|
||||
// Retrieves a filter for the given |valueOffset|, a position in the output
|
||||
// image in the direction we're convolving. The offset and length of the
|
||||
// filter values are put into the corresponding out arguments (see AddFilter
|
||||
// above for what these mean), and a pointer to the first scaling factor is
|
||||
// returned. There will be |filterLength| values in this array.
|
||||
inline const ConvolutionFixed* FilterForValue(int valueOffset,
|
||||
int* filterOffset,
|
||||
int* filterLength) const {
|
||||
const FilterInstance& filter = fFilters[valueOffset];
|
||||
*filterOffset = filter.fOffset;
|
||||
*filterLength = filter.fTrimmedLength;
|
||||
if (filter.fTrimmedLength == 0) {
|
||||
return nullptr;
|
||||
}
|
||||
return &fFilterValues[filter.fDataLocation];
|
||||
}
|
||||
|
||||
// Retrieves the filter for the offset 0, presumed to be the one and only.
|
||||
// The offset and length of the filter values are put into the corresponding
|
||||
// out arguments (see AddFilter). Note that |filterLegth| and
|
||||
// |specifiedFilterLength| may be different if leading/trailing zeros of the
|
||||
// original floating point form were clipped.
|
||||
// There will be |filterLength| values in the return array.
|
||||
// Returns nullptr if the filter is 0-length (for instance when all floating
|
||||
// point values passed to AddFilter were clipped to 0).
|
||||
SK_API const ConvolutionFixed* GetSingleFilter(int* specifiedFilterLength,
|
||||
int* filterOffset,
|
||||
int* filterLength) const;
|
||||
|
||||
// Add another value to the fFilterValues array -- useful for
|
||||
// SIMD padding which happens outside of this class.
|
||||
|
||||
void addFilterValue( ConvolutionFixed val ) {
|
||||
fFilterValues.push( val );
|
||||
}
|
||||
private:
|
||||
struct FilterInstance {
|
||||
// Offset within filterValues for this instance of the filter.
|
||||
int fDataLocation;
|
||||
|
||||
// Distance from the left of the filter to the center. IN PIXELS
|
||||
int fOffset;
|
||||
|
||||
// Number of values in this filter instance.
|
||||
int fTrimmedLength;
|
||||
|
||||
// Filter length as specified. Note that this may be different from
|
||||
// 'trimmed_length' if leading/trailing zeros of the original floating
|
||||
// point form were clipped differently on each tail.
|
||||
int fLength;
|
||||
};
|
||||
|
||||
// Stores the information for each filter added to this class.
|
||||
SkTDArray<FilterInstance> fFilters;
|
||||
|
||||
// We store all the filter values in this flat list, indexed by
|
||||
// |FilterInstance.data_location| to avoid the mallocs required for storing
|
||||
// each one separately.
|
||||
SkTDArray<ConvolutionFixed> fFilterValues;
|
||||
|
||||
// The maximum size of any filter we've added.
|
||||
int fMaxFilter;
|
||||
};
|
||||
|
||||
// Does a two-dimensional convolution on the given source image.
|
||||
//
|
||||
// It is assumed the source pixel offsets referenced in the input filters
|
||||
// reference only valid pixels, so the source image size is not required. Each
|
||||
// row of the source image starts |sourceByteRowStride| after the previous
|
||||
// one (this allows you to have rows with some padding at the end).
|
||||
//
|
||||
// The result will be put into the given output buffer. The destination image
|
||||
// size will be xfilter.numValues() * yfilter.numValues() pixels. It will be
|
||||
// in rows of exactly xfilter.numValues() * 4 bytes.
|
||||
//
|
||||
// |sourceHasAlpha| is a hint that allows us to avoid doing computations on
|
||||
// the alpha channel if the image is opaque. If you don't know, set this to
|
||||
// true and it will work properly, but setting this to false will be a few
|
||||
// percent faster if you know the image is opaque.
|
||||
//
|
||||
// The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
|
||||
// (this is ARGB when loaded into 32-bit words on a little-endian machine).
|
||||
/**
|
||||
* Returns false if it was unable to perform the convolution/rescale. in which case the output
|
||||
* buffer is assumed to be undefined.
|
||||
*/
|
||||
SK_API bool BGRAConvolve2D(const unsigned char* sourceData,
|
||||
int sourceByteRowStride,
|
||||
bool sourceHasAlpha,
|
||||
const SkConvolutionFilter1D& xfilter,
|
||||
const SkConvolutionFilter1D& yfilter,
|
||||
int outputByteRowStride,
|
||||
unsigned char* output);
|
||||
|
||||
#endif // SK_CONVOLVER_H
|
@ -36,7 +36,6 @@
|
||||
#define SK_OPTS_NS portable
|
||||
#endif
|
||||
|
||||
#include "SkBitmapFilter_opts.h"
|
||||
#include "SkBlend_opts.h"
|
||||
#include "SkBlitMask_opts.h"
|
||||
#include "SkBlitRow_opts.h"
|
||||
@ -88,10 +87,6 @@ namespace SkOpts {
|
||||
|
||||
DEFINE_DEFAULT(hash_fn);
|
||||
|
||||
DEFINE_DEFAULT(convolve_vertically);
|
||||
DEFINE_DEFAULT(convolve_horizontally);
|
||||
DEFINE_DEFAULT(convolve_4_rows_horizontally);
|
||||
|
||||
#undef DEFINE_DEFAULT
|
||||
|
||||
// Each Init_foo() is defined in src/opts/SkOpts_foo.cpp.
|
||||
@ -99,7 +94,6 @@ namespace SkOpts {
|
||||
void Init_sse41();
|
||||
void Init_sse42();
|
||||
void Init_avx();
|
||||
void Init_hsw();
|
||||
void Init_crc32();
|
||||
|
||||
static void init() {
|
||||
@ -109,7 +103,6 @@ namespace SkOpts {
|
||||
if (SkCpu::Supports(SkCpu::SSE41)) { Init_sse41(); }
|
||||
if (SkCpu::Supports(SkCpu::SSE42)) { Init_sse42(); }
|
||||
if (SkCpu::Supports(SkCpu::AVX )) { Init_avx(); }
|
||||
if (SkCpu::Supports(SkCpu::HSW )) { Init_hsw(); }
|
||||
|
||||
#elif defined(SK_CPU_ARM64)
|
||||
if (SkCpu::Supports(SkCpu::CRC32)) { Init_crc32(); }
|
||||
|
@ -8,7 +8,6 @@
|
||||
#ifndef SkOpts_DEFINED
|
||||
#define SkOpts_DEFINED
|
||||
|
||||
#include "SkConvolver.h"
|
||||
#include "SkRasterPipeline.h"
|
||||
#include "SkTypes.h"
|
||||
#include "SkXfermodePriv.h"
|
||||
@ -62,15 +61,6 @@ namespace SkOpts {
|
||||
static inline uint32_t hash(const void* data, size_t bytes, uint32_t seed=0) {
|
||||
return hash_fn(data, bytes, seed);
|
||||
}
|
||||
|
||||
extern void (*convolve_vertically)(const SkConvolutionFilter1D::ConvolutionFixed* filter_values,
|
||||
int filter_length, unsigned char* const* source_data_rows,
|
||||
int pixel_width, unsigned char* out_row, bool has_alpha);
|
||||
extern void (*convolve_4_rows_horizontally)(const unsigned char* src_data[4],
|
||||
const SkConvolutionFilter1D& filter,
|
||||
unsigned char* out_row[4], size_t out_row_bytes);
|
||||
extern void (*convolve_horizontally)(const unsigned char* src_data, const SkConvolutionFilter1D& filter,
|
||||
unsigned char* out_row, bool has_alpha);
|
||||
}
|
||||
|
||||
#endif//SkOpts_DEFINED
|
||||
|
@ -1,940 +0,0 @@
|
||||
/*
|
||||
* Copyright 2016 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#ifndef SkBitmapFilter_opts_DEFINED
|
||||
#define SkBitmapFilter_opts_DEFINED
|
||||
|
||||
#include "SkConvolver.h"
|
||||
|
||||
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
|
||||
#include <immintrin.h>
|
||||
#elif defined(SK_ARM_HAS_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
namespace SK_OPTS_NS {
|
||||
|
||||
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
|
||||
|
||||
static SK_ALWAYS_INLINE void AccumRemainder(const unsigned char* pixelsLeft,
|
||||
const SkConvolutionFilter1D::ConvolutionFixed* filterValues, __m128i& accum, int r) {
|
||||
int remainder[4] = {0};
|
||||
for (int i = 0; i < r; i++) {
|
||||
SkConvolutionFilter1D::ConvolutionFixed coeff = filterValues[i];
|
||||
remainder[0] += coeff * pixelsLeft[i * 4 + 0];
|
||||
remainder[1] += coeff * pixelsLeft[i * 4 + 1];
|
||||
remainder[2] += coeff * pixelsLeft[i * 4 + 2];
|
||||
remainder[3] += coeff * pixelsLeft[i * 4 + 3];
|
||||
}
|
||||
__m128i t = _mm_setr_epi32(remainder[0], remainder[1], remainder[2], remainder[3]);
|
||||
accum = _mm_add_epi32(accum, t);
|
||||
}
|
||||
|
||||
// Convolves horizontally along a single row. The row data is given in
|
||||
// |srcData| and continues for the numValues() of the filter.
|
||||
void convolve_horizontally(const unsigned char* srcData,
|
||||
const SkConvolutionFilter1D& filter,
|
||||
unsigned char* outRow,
|
||||
bool /*hasAlpha*/) {
|
||||
// Output one pixel each iteration, calculating all channels (RGBA) together.
|
||||
int numValues = filter.numValues();
|
||||
for (int outX = 0; outX < numValues; outX++) {
|
||||
// Get the filter that determines the current output pixel.
|
||||
int filterOffset, filterLength;
|
||||
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
|
||||
filter.FilterForValue(outX, &filterOffset, &filterLength);
|
||||
|
||||
// Compute the first pixel in this row that the filter affects. It will
|
||||
// touch |filterLength| pixels (4 bytes each) after this.
|
||||
const unsigned char* rowToFilter = &srcData[filterOffset * 4];
|
||||
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
__m128i accum = _mm_setzero_si128();
|
||||
|
||||
// We will load and accumulate with four coefficients per iteration.
|
||||
for (int filterX = 0; filterX < filterLength >> 2; filterX++) {
|
||||
// Load 4 coefficients => duplicate 1st and 2nd of them for all channels.
|
||||
__m128i coeff, coeff16;
|
||||
// [16] xx xx xx xx c3 c2 c1 c0
|
||||
coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filterValues));
|
||||
// [16] xx xx xx xx c1 c1 c0 c0
|
||||
coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
|
||||
// [16] c1 c1 c1 c1 c0 c0 c0 c0
|
||||
coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
|
||||
|
||||
// Load four pixels => unpack the first two pixels to 16 bits =>
|
||||
// multiply with coefficients => accumulate the convolution result.
|
||||
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||
__m128i src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(rowToFilter));
|
||||
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||
__m128i src16 = _mm_unpacklo_epi8(src8, zero);
|
||||
__m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||
__m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||
// [32] a0*c0 b0*c0 g0*c0 r0*c0
|
||||
__m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||
accum = _mm_add_epi32(accum, t);
|
||||
// [32] a1*c1 b1*c1 g1*c1 r1*c1
|
||||
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||
accum = _mm_add_epi32(accum, t);
|
||||
|
||||
// Duplicate 3rd and 4th coefficients for all channels =>
|
||||
// unpack the 3rd and 4th pixels to 16 bits => multiply with coefficients
|
||||
// => accumulate the convolution results.
|
||||
// [16] xx xx xx xx c3 c3 c2 c2
|
||||
coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
|
||||
// [16] c3 c3 c3 c3 c2 c2 c2 c2
|
||||
coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
|
||||
// [16] a3 g3 b3 r3 a2 g2 b2 r2
|
||||
src16 = _mm_unpackhi_epi8(src8, zero);
|
||||
mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||
mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||
// [32] a2*c2 b2*c2 g2*c2 r2*c2
|
||||
t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||
accum = _mm_add_epi32(accum, t);
|
||||
// [32] a3*c3 b3*c3 g3*c3 r3*c3
|
||||
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||
accum = _mm_add_epi32(accum, t);
|
||||
|
||||
// Advance the pixel and coefficients pointers.
|
||||
rowToFilter += 16;
|
||||
filterValues += 4;
|
||||
}
|
||||
|
||||
// When |filterLength| is not divisible by 4, we accumulate the last 1 - 3
|
||||
// coefficients one at a time.
|
||||
int r = filterLength & 3;
|
||||
if (r) {
|
||||
int remainderOffset = (filterOffset + filterLength - r) * 4;
|
||||
AccumRemainder(srcData + remainderOffset, filterValues, accum, r);
|
||||
}
|
||||
|
||||
// Shift right for fixed point implementation.
|
||||
accum = _mm_srai_epi32(accum, SkConvolutionFilter1D::kShiftBits);
|
||||
|
||||
// Packing 32 bits |accum| to 16 bits per channel (signed saturation).
|
||||
accum = _mm_packs_epi32(accum, zero);
|
||||
// Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
|
||||
accum = _mm_packus_epi16(accum, zero);
|
||||
|
||||
// Store the pixel value of 32 bits.
|
||||
*(reinterpret_cast<int*>(outRow)) = _mm_cvtsi128_si32(accum);
|
||||
outRow += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Convolves horizontally along four rows. The row data is given in
|
||||
// |srcData| and continues for the numValues() of the filter.
|
||||
// The algorithm is almost same as |convolve_horizontally|. Please
|
||||
// refer to that function for detailed comments.
|
||||
void convolve_4_rows_horizontally(const unsigned char* srcData[4],
|
||||
const SkConvolutionFilter1D& filter,
|
||||
unsigned char* outRow[4],
|
||||
size_t outRowBytes) {
|
||||
SkDEBUGCODE(const unsigned char* out_row_0_start = outRow[0];)
|
||||
|
||||
// Output one pixel each iteration, calculating all channels (RGBA) together.
|
||||
int numValues = filter.numValues();
|
||||
for (int outX = 0; outX < numValues; outX++) {
|
||||
int filterOffset, filterLength;
|
||||
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
|
||||
filter.FilterForValue(outX, &filterOffset, &filterLength);
|
||||
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
|
||||
// four pixels in a column per iteration.
|
||||
__m128i accum0 = _mm_setzero_si128();
|
||||
__m128i accum1 = _mm_setzero_si128();
|
||||
__m128i accum2 = _mm_setzero_si128();
|
||||
__m128i accum3 = _mm_setzero_si128();
|
||||
|
||||
int start = filterOffset * 4;
|
||||
// We will load and accumulate with four coefficients per iteration.
|
||||
for (int filterX = 0; filterX < (filterLength >> 2); filterX++) {
|
||||
__m128i coeff, coeff16lo, coeff16hi;
|
||||
// [16] xx xx xx xx c3 c2 c1 c0
|
||||
coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filterValues));
|
||||
// [16] xx xx xx xx c1 c1 c0 c0
|
||||
coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
|
||||
// [16] c1 c1 c1 c1 c0 c0 c0 c0
|
||||
coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
|
||||
// [16] xx xx xx xx c3 c3 c2 c2
|
||||
coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
|
||||
// [16] c3 c3 c3 c3 c2 c2 c2 c2
|
||||
coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
|
||||
|
||||
__m128i src8, src16, mul_hi, mul_lo, t;
|
||||
|
||||
#define ITERATION(src, accum) \
|
||||
src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)); \
|
||||
src16 = _mm_unpacklo_epi8(src8, zero); \
|
||||
mul_hi = _mm_mulhi_epi16(src16, coeff16lo); \
|
||||
mul_lo = _mm_mullo_epi16(src16, coeff16lo); \
|
||||
t = _mm_unpacklo_epi16(mul_lo, mul_hi); \
|
||||
accum = _mm_add_epi32(accum, t); \
|
||||
t = _mm_unpackhi_epi16(mul_lo, mul_hi); \
|
||||
accum = _mm_add_epi32(accum, t); \
|
||||
src16 = _mm_unpackhi_epi8(src8, zero); \
|
||||
mul_hi = _mm_mulhi_epi16(src16, coeff16hi); \
|
||||
mul_lo = _mm_mullo_epi16(src16, coeff16hi); \
|
||||
t = _mm_unpacklo_epi16(mul_lo, mul_hi); \
|
||||
accum = _mm_add_epi32(accum, t); \
|
||||
t = _mm_unpackhi_epi16(mul_lo, mul_hi); \
|
||||
accum = _mm_add_epi32(accum, t)
|
||||
|
||||
ITERATION(srcData[0] + start, accum0);
|
||||
ITERATION(srcData[1] + start, accum1);
|
||||
ITERATION(srcData[2] + start, accum2);
|
||||
ITERATION(srcData[3] + start, accum3);
|
||||
|
||||
start += 16;
|
||||
filterValues += 4;
|
||||
}
|
||||
|
||||
int r = filterLength & 3;
|
||||
if (r) {
|
||||
int remainderOffset = (filterOffset + filterLength - r) * 4;
|
||||
AccumRemainder(srcData[0] + remainderOffset, filterValues, accum0, r);
|
||||
AccumRemainder(srcData[1] + remainderOffset, filterValues, accum1, r);
|
||||
AccumRemainder(srcData[2] + remainderOffset, filterValues, accum2, r);
|
||||
AccumRemainder(srcData[3] + remainderOffset, filterValues, accum3, r);
|
||||
}
|
||||
|
||||
accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
|
||||
accum0 = _mm_packs_epi32(accum0, zero);
|
||||
accum0 = _mm_packus_epi16(accum0, zero);
|
||||
accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
|
||||
accum1 = _mm_packs_epi32(accum1, zero);
|
||||
accum1 = _mm_packus_epi16(accum1, zero);
|
||||
accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
|
||||
accum2 = _mm_packs_epi32(accum2, zero);
|
||||
accum2 = _mm_packus_epi16(accum2, zero);
|
||||
accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
|
||||
accum3 = _mm_packs_epi32(accum3, zero);
|
||||
accum3 = _mm_packus_epi16(accum3, zero);
|
||||
|
||||
// We seem to be running off the edge here (chromium:491660).
|
||||
SkASSERT(((size_t)outRow[0] - (size_t)out_row_0_start) < outRowBytes);
|
||||
|
||||
*(reinterpret_cast<int*>(outRow[0])) = _mm_cvtsi128_si32(accum0);
|
||||
*(reinterpret_cast<int*>(outRow[1])) = _mm_cvtsi128_si32(accum1);
|
||||
*(reinterpret_cast<int*>(outRow[2])) = _mm_cvtsi128_si32(accum2);
|
||||
*(reinterpret_cast<int*>(outRow[3])) = _mm_cvtsi128_si32(accum3);
|
||||
|
||||
outRow[0] += 4;
|
||||
outRow[1] += 4;
|
||||
outRow[2] += 4;
|
||||
outRow[3] += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Does vertical convolution to produce one output row. The filter values and
|
||||
// length are given in the first two parameters. These are applied to each
|
||||
// of the rows pointed to in the |sourceDataRows| array, with each row
|
||||
// being |pixelWidth| wide.
|
||||
//
|
||||
// The output must have room for |pixelWidth * 4| bytes.
|
||||
template<bool hasAlpha>
|
||||
void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
|
||||
int filterLength,
|
||||
unsigned char* const* sourceDataRows,
|
||||
int pixelWidth,
|
||||
unsigned char* outRow) {
|
||||
// Output four pixels per iteration (16 bytes).
|
||||
int width = pixelWidth & ~3;
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
for (int outX = 0; outX < width; outX += 4) {
|
||||
// Accumulated result for each pixel. 32 bits per RGBA channel.
|
||||
__m128i accum0 = _mm_setzero_si128();
|
||||
__m128i accum1 = _mm_setzero_si128();
|
||||
__m128i accum2 = _mm_setzero_si128();
|
||||
__m128i accum3 = _mm_setzero_si128();
|
||||
|
||||
// Convolve with one filter coefficient per iteration.
|
||||
for (int filterY = 0; filterY < filterLength; filterY++) {
|
||||
|
||||
// Duplicate the filter coefficient 8 times.
|
||||
// [16] cj cj cj cj cj cj cj cj
|
||||
__m128i coeff16 = _mm_set1_epi16(filterValues[filterY]);
|
||||
|
||||
// Load four pixels (16 bytes) together.
|
||||
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||
const __m128i* src = reinterpret_cast<const __m128i*>(
|
||||
&sourceDataRows[filterY][outX << 2]);
|
||||
__m128i src8 = _mm_loadu_si128(src);
|
||||
|
||||
// Unpack 1st and 2nd pixels from 8 bits to 16 bits for each channels =>
|
||||
// multiply with current coefficient => accumulate the result.
|
||||
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||
__m128i src16 = _mm_unpacklo_epi8(src8, zero);
|
||||
__m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||
__m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||
// [32] a0 b0 g0 r0
|
||||
__m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||
accum0 = _mm_add_epi32(accum0, t);
|
||||
// [32] a1 b1 g1 r1
|
||||
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||
accum1 = _mm_add_epi32(accum1, t);
|
||||
|
||||
// Unpack 3rd and 4th pixels from 8 bits to 16 bits for each channels =>
|
||||
// multiply with current coefficient => accumulate the result.
|
||||
// [16] a3 b3 g3 r3 a2 b2 g2 r2
|
||||
src16 = _mm_unpackhi_epi8(src8, zero);
|
||||
mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||
mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||
// [32] a2 b2 g2 r2
|
||||
t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||
accum2 = _mm_add_epi32(accum2, t);
|
||||
// [32] a3 b3 g3 r3
|
||||
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||
accum3 = _mm_add_epi32(accum3, t);
|
||||
}
|
||||
|
||||
// Shift right for fixed point implementation.
|
||||
accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
|
||||
accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
|
||||
accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
|
||||
accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
|
||||
|
||||
// Packing 32 bits |accum| to 16 bits per channel (signed saturation).
|
||||
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||
accum0 = _mm_packs_epi32(accum0, accum1);
|
||||
// [16] a3 b3 g3 r3 a2 b2 g2 r2
|
||||
accum2 = _mm_packs_epi32(accum2, accum3);
|
||||
|
||||
// Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
|
||||
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||
accum0 = _mm_packus_epi16(accum0, accum2);
|
||||
|
||||
if (hasAlpha) {
|
||||
// Compute the max(ri, gi, bi) for each pixel.
|
||||
// [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
|
||||
__m128i a = _mm_srli_epi32(accum0, 8);
|
||||
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||
__m128i b = _mm_max_epu8(a, accum0); // Max of r and g.
|
||||
// [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
|
||||
a = _mm_srli_epi32(accum0, 16);
|
||||
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||
b = _mm_max_epu8(a, b); // Max of r and g and b.
|
||||
// [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
|
||||
b = _mm_slli_epi32(b, 24);
|
||||
|
||||
// Make sure the value of alpha channel is always larger than maximum
|
||||
// value of color channels.
|
||||
accum0 = _mm_max_epu8(b, accum0);
|
||||
} else {
|
||||
// Set value of alpha channels to 0xFF.
|
||||
__m128i mask = _mm_set1_epi32(0xff000000);
|
||||
accum0 = _mm_or_si128(accum0, mask);
|
||||
}
|
||||
|
||||
// Store the convolution result (16 bytes) and advance the pixel pointers.
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(outRow), accum0);
|
||||
outRow += 16;
|
||||
}
|
||||
|
||||
// When the width of the output is not divisible by 4, We need to save one
|
||||
// pixel (4 bytes) each time. And also the fourth pixel is always absent.
|
||||
int r = pixelWidth & 3;
|
||||
if (r) {
|
||||
__m128i accum0 = _mm_setzero_si128();
|
||||
__m128i accum1 = _mm_setzero_si128();
|
||||
__m128i accum2 = _mm_setzero_si128();
|
||||
for (int filterY = 0; filterY < filterLength; ++filterY) {
|
||||
__m128i coeff16 = _mm_set1_epi16(filterValues[filterY]);
|
||||
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||
const __m128i* src = reinterpret_cast<const __m128i*>(
|
||||
&sourceDataRows[filterY][width << 2]);
|
||||
__m128i src8 = _mm_loadu_si128(src);
|
||||
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||
__m128i src16 = _mm_unpacklo_epi8(src8, zero);
|
||||
__m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||
__m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||
// [32] a0 b0 g0 r0
|
||||
__m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||
accum0 = _mm_add_epi32(accum0, t);
|
||||
// [32] a1 b1 g1 r1
|
||||
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||
accum1 = _mm_add_epi32(accum1, t);
|
||||
// [16] a3 b3 g3 r3 a2 b2 g2 r2
|
||||
src16 = _mm_unpackhi_epi8(src8, zero);
|
||||
mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||
mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||
// [32] a2 b2 g2 r2
|
||||
t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||
accum2 = _mm_add_epi32(accum2, t);
|
||||
}
|
||||
|
||||
accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
|
||||
accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
|
||||
accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
|
||||
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||
accum0 = _mm_packs_epi32(accum0, accum1);
|
||||
// [16] a3 b3 g3 r3 a2 b2 g2 r2
|
||||
accum2 = _mm_packs_epi32(accum2, zero);
|
||||
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||
accum0 = _mm_packus_epi16(accum0, accum2);
|
||||
if (hasAlpha) {
|
||||
// [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
|
||||
__m128i a = _mm_srli_epi32(accum0, 8);
|
||||
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||
__m128i b = _mm_max_epu8(a, accum0); // Max of r and g.
|
||||
// [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
|
||||
a = _mm_srli_epi32(accum0, 16);
|
||||
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||
b = _mm_max_epu8(a, b); // Max of r and g and b.
|
||||
// [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
|
||||
b = _mm_slli_epi32(b, 24);
|
||||
accum0 = _mm_max_epu8(b, accum0);
|
||||
} else {
|
||||
__m128i mask = _mm_set1_epi32(0xff000000);
|
||||
accum0 = _mm_or_si128(accum0, mask);
|
||||
}
|
||||
|
||||
for (int i = 0; i < r; i++) {
|
||||
*(reinterpret_cast<int*>(outRow)) = _mm_cvtsi128_si32(accum0);
|
||||
accum0 = _mm_srli_si128(accum0, 4);
|
||||
outRow += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined(SK_ARM_HAS_NEON)
|
||||
|
||||
static SK_ALWAYS_INLINE void AccumRemainder(const unsigned char* pixelsLeft,
|
||||
const SkConvolutionFilter1D::ConvolutionFixed* filterValues, int32x4_t& accum, int r) {
|
||||
int remainder[4] = {0};
|
||||
for (int i = 0; i < r; i++) {
|
||||
SkConvolutionFilter1D::ConvolutionFixed coeff = filterValues[i];
|
||||
remainder[0] += coeff * pixelsLeft[i * 4 + 0];
|
||||
remainder[1] += coeff * pixelsLeft[i * 4 + 1];
|
||||
remainder[2] += coeff * pixelsLeft[i * 4 + 2];
|
||||
remainder[3] += coeff * pixelsLeft[i * 4 + 3];
|
||||
}
|
||||
int32x4_t t = {remainder[0], remainder[1], remainder[2], remainder[3]};
|
||||
accum += t;
|
||||
}
|
||||
|
||||
// Convolves horizontally along a single row. The row data is given in
|
||||
// |srcData| and continues for the numValues() of the filter.
|
||||
void convolve_horizontally(const unsigned char* srcData,
|
||||
const SkConvolutionFilter1D& filter,
|
||||
unsigned char* outRow,
|
||||
bool /*hasAlpha*/) {
|
||||
// Loop over each pixel on this row in the output image.
|
||||
int numValues = filter.numValues();
|
||||
for (int outX = 0; outX < numValues; outX++) {
|
||||
uint8x8_t coeff_mask0 = vcreate_u8(0x0100010001000100);
|
||||
uint8x8_t coeff_mask1 = vcreate_u8(0x0302030203020302);
|
||||
uint8x8_t coeff_mask2 = vcreate_u8(0x0504050405040504);
|
||||
uint8x8_t coeff_mask3 = vcreate_u8(0x0706070607060706);
|
||||
// Get the filter that determines the current output pixel.
|
||||
int filterOffset, filterLength;
|
||||
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
|
||||
filter.FilterForValue(outX, &filterOffset, &filterLength);
|
||||
|
||||
// Compute the first pixel in this row that the filter affects. It will
|
||||
// touch |filterLength| pixels (4 bytes each) after this.
|
||||
const unsigned char* rowToFilter = &srcData[filterOffset * 4];
|
||||
|
||||
// Apply the filter to the row to get the destination pixel in |accum|.
|
||||
int32x4_t accum = vdupq_n_s32(0);
|
||||
for (int filterX = 0; filterX < filterLength >> 2; filterX++) {
|
||||
// Load 4 coefficients
|
||||
int16x4_t coeffs, coeff0, coeff1, coeff2, coeff3;
|
||||
coeffs = vld1_s16(filterValues);
|
||||
coeff0 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask0));
|
||||
coeff1 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask1));
|
||||
coeff2 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask2));
|
||||
coeff3 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask3));
|
||||
|
||||
// Load pixels and calc
|
||||
uint8x16_t pixels = vld1q_u8(rowToFilter);
|
||||
int16x8_t p01_16 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pixels)));
|
||||
int16x8_t p23_16 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pixels)));
|
||||
|
||||
int16x4_t p0_src = vget_low_s16(p01_16);
|
||||
int16x4_t p1_src = vget_high_s16(p01_16);
|
||||
int16x4_t p2_src = vget_low_s16(p23_16);
|
||||
int16x4_t p3_src = vget_high_s16(p23_16);
|
||||
|
||||
int32x4_t p0 = vmull_s16(p0_src, coeff0);
|
||||
int32x4_t p1 = vmull_s16(p1_src, coeff1);
|
||||
int32x4_t p2 = vmull_s16(p2_src, coeff2);
|
||||
int32x4_t p3 = vmull_s16(p3_src, coeff3);
|
||||
|
||||
accum += p0;
|
||||
accum += p1;
|
||||
accum += p2;
|
||||
accum += p3;
|
||||
|
||||
// Advance the pointers
|
||||
rowToFilter += 16;
|
||||
filterValues += 4;
|
||||
}
|
||||
|
||||
int r = filterLength & 3;
|
||||
if (r) {
|
||||
int remainder_offset = (filterOffset + filterLength - r) * 4;
|
||||
AccumRemainder(srcData + remainder_offset, filterValues, accum, r);
|
||||
}
|
||||
|
||||
// Bring this value back in range. All of the filter scaling factors
|
||||
// are in fixed point with kShiftBits bits of fractional part.
|
||||
accum = vshrq_n_s32(accum, SkConvolutionFilter1D::kShiftBits);
|
||||
|
||||
// Pack and store the new pixel.
|
||||
int16x4_t accum16 = vqmovn_s32(accum);
|
||||
uint8x8_t accum8 = vqmovun_s16(vcombine_s16(accum16, accum16));
|
||||
vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow), vreinterpret_u32_u8(accum8), 0);
|
||||
outRow += 4;
|
||||
}
|
||||
}
|
||||
|
||||
// Convolves horizontally along four rows. The row data is given in
|
||||
// |srcData| and continues for the numValues() of the filter.
|
||||
// The algorithm is almost same as |convolve_horizontally|. Please
|
||||
// refer to that function for detailed comments.
|
||||
void convolve_4_rows_horizontally(const unsigned char* srcData[4],
|
||||
const SkConvolutionFilter1D& filter,
|
||||
unsigned char* outRow[4],
|
||||
size_t outRowBytes) {
|
||||
// Output one pixel each iteration, calculating all channels (RGBA) together.
|
||||
int numValues = filter.numValues();
|
||||
for (int outX = 0; outX < numValues; outX++) {
|
||||
|
||||
int filterOffset, filterLength;
|
||||
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
|
||||
filter.FilterForValue(outX, &filterOffset, &filterLength);
|
||||
|
||||
// four pixels in a column per iteration.
|
||||
int32x4_t accum0 = vdupq_n_s32(0);
|
||||
int32x4_t accum1 = vdupq_n_s32(0);
|
||||
int32x4_t accum2 = vdupq_n_s32(0);
|
||||
int32x4_t accum3 = vdupq_n_s32(0);
|
||||
|
||||
uint8x8_t coeff_mask0 = vcreate_u8(0x0100010001000100);
|
||||
uint8x8_t coeff_mask1 = vcreate_u8(0x0302030203020302);
|
||||
uint8x8_t coeff_mask2 = vcreate_u8(0x0504050405040504);
|
||||
uint8x8_t coeff_mask3 = vcreate_u8(0x0706070607060706);
|
||||
|
||||
int start = filterOffset * 4;
|
||||
|
||||
// We will load and accumulate with four coefficients per iteration.
|
||||
for (int filterX = 0; filterX < (filterLength >> 2); filterX++) {
|
||||
int16x4_t coeffs, coeff0, coeff1, coeff2, coeff3;
|
||||
|
||||
coeffs = vld1_s16(filterValues);
|
||||
coeff0 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask0));
|
||||
coeff1 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask1));
|
||||
coeff2 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask2));
|
||||
coeff3 = vreinterpret_s16_u8(vtbl1_u8(vreinterpret_u8_s16(coeffs), coeff_mask3));
|
||||
|
||||
uint8x16_t pixels;
|
||||
int16x8_t p01_16, p23_16;
|
||||
int32x4_t p0, p1, p2, p3;
|
||||
|
||||
#define ITERATION(src, accum) \
|
||||
pixels = vld1q_u8(src); \
|
||||
p01_16 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(pixels))); \
|
||||
p23_16 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(pixels))); \
|
||||
p0 = vmull_s16(vget_low_s16(p01_16), coeff0); \
|
||||
p1 = vmull_s16(vget_high_s16(p01_16), coeff1); \
|
||||
p2 = vmull_s16(vget_low_s16(p23_16), coeff2); \
|
||||
p3 = vmull_s16(vget_high_s16(p23_16), coeff3); \
|
||||
accum += p0; \
|
||||
accum += p1; \
|
||||
accum += p2; \
|
||||
accum += p3
|
||||
|
||||
ITERATION(srcData[0] + start, accum0);
|
||||
ITERATION(srcData[1] + start, accum1);
|
||||
ITERATION(srcData[2] + start, accum2);
|
||||
ITERATION(srcData[3] + start, accum3);
|
||||
|
||||
start += 16;
|
||||
filterValues += 4;
|
||||
}
|
||||
|
||||
int r = filterLength & 3;
|
||||
if (r) {
|
||||
int remainder_offset = (filterOffset + filterLength - r) * 4;
|
||||
AccumRemainder(srcData[0] + remainder_offset, filterValues, accum0, r);
|
||||
AccumRemainder(srcData[1] + remainder_offset, filterValues, accum1, r);
|
||||
AccumRemainder(srcData[2] + remainder_offset, filterValues, accum2, r);
|
||||
AccumRemainder(srcData[3] + remainder_offset, filterValues, accum3, r);
|
||||
}
|
||||
|
||||
int16x4_t accum16;
|
||||
uint8x8_t res0, res1, res2, res3;
|
||||
|
||||
#define PACK_RESULT(accum, res) \
|
||||
accum = vshrq_n_s32(accum, SkConvolutionFilter1D::kShiftBits); \
|
||||
accum16 = vqmovn_s32(accum); \
|
||||
res = vqmovun_s16(vcombine_s16(accum16, accum16));
|
||||
|
||||
PACK_RESULT(accum0, res0);
|
||||
PACK_RESULT(accum1, res1);
|
||||
PACK_RESULT(accum2, res2);
|
||||
PACK_RESULT(accum3, res3);
|
||||
|
||||
vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[0]), vreinterpret_u32_u8(res0), 0);
|
||||
vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[1]), vreinterpret_u32_u8(res1), 0);
|
||||
vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[2]), vreinterpret_u32_u8(res2), 0);
|
||||
vst1_lane_u32(reinterpret_cast<uint32_t*>(outRow[3]), vreinterpret_u32_u8(res3), 0);
|
||||
outRow[0] += 4;
|
||||
outRow[1] += 4;
|
||||
outRow[2] += 4;
|
||||
outRow[3] += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Does vertical convolution to produce one output row. The filter values and
|
||||
// length are given in the first two parameters. These are applied to each
|
||||
// of the rows pointed to in the |sourceDataRows| array, with each row
|
||||
// being |pixelWidth| wide.
|
||||
//
|
||||
// The output must have room for |pixelWidth * 4| bytes.
|
||||
template<bool hasAlpha>
|
||||
void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
|
||||
int filterLength,
|
||||
unsigned char* const* sourceDataRows,
|
||||
int pixelWidth,
|
||||
unsigned char* outRow) {
|
||||
int width = pixelWidth & ~3;
|
||||
|
||||
// Output four pixels per iteration (16 bytes).
|
||||
for (int outX = 0; outX < width; outX += 4) {
|
||||
|
||||
// Accumulated result for each pixel. 32 bits per RGBA channel.
|
||||
int32x4_t accum0 = vdupq_n_s32(0);
|
||||
int32x4_t accum1 = vdupq_n_s32(0);
|
||||
int32x4_t accum2 = vdupq_n_s32(0);
|
||||
int32x4_t accum3 = vdupq_n_s32(0);
|
||||
|
||||
// Convolve with one filter coefficient per iteration.
|
||||
for (int filterY = 0; filterY < filterLength; filterY++) {
|
||||
|
||||
// Duplicate the filter coefficient 4 times.
|
||||
// [16] cj cj cj cj
|
||||
int16x4_t coeff16 = vdup_n_s16(filterValues[filterY]);
|
||||
|
||||
// Load four pixels (16 bytes) together.
|
||||
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||
uint8x16_t src8 = vld1q_u8(&sourceDataRows[filterY][outX << 2]);
|
||||
|
||||
int16x8_t src16_01 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src8)));
|
||||
int16x8_t src16_23 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src8)));
|
||||
int16x4_t src16_0 = vget_low_s16(src16_01);
|
||||
int16x4_t src16_1 = vget_high_s16(src16_01);
|
||||
int16x4_t src16_2 = vget_low_s16(src16_23);
|
||||
int16x4_t src16_3 = vget_high_s16(src16_23);
|
||||
|
||||
accum0 += vmull_s16(src16_0, coeff16);
|
||||
accum1 += vmull_s16(src16_1, coeff16);
|
||||
accum2 += vmull_s16(src16_2, coeff16);
|
||||
accum3 += vmull_s16(src16_3, coeff16);
|
||||
}
|
||||
|
||||
// Shift right for fixed point implementation.
|
||||
accum0 = vshrq_n_s32(accum0, SkConvolutionFilter1D::kShiftBits);
|
||||
accum1 = vshrq_n_s32(accum1, SkConvolutionFilter1D::kShiftBits);
|
||||
accum2 = vshrq_n_s32(accum2, SkConvolutionFilter1D::kShiftBits);
|
||||
accum3 = vshrq_n_s32(accum3, SkConvolutionFilter1D::kShiftBits);
|
||||
|
||||
// Packing 32 bits |accum| to 16 bits per channel (signed saturation).
|
||||
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||
int16x8_t accum16_0 = vcombine_s16(vqmovn_s32(accum0), vqmovn_s32(accum1));
|
||||
// [16] a3 b3 g3 r3 a2 b2 g2 r2
|
||||
int16x8_t accum16_1 = vcombine_s16(vqmovn_s32(accum2), vqmovn_s32(accum3));
|
||||
|
||||
// Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
|
||||
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||
uint8x16_t accum8 = vcombine_u8(vqmovun_s16(accum16_0), vqmovun_s16(accum16_1));
|
||||
|
||||
if (hasAlpha) {
|
||||
// Compute the max(ri, gi, bi) for each pixel.
|
||||
// [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
|
||||
uint8x16_t a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 8));
|
||||
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||
uint8x16_t b = vmaxq_u8(a, accum8); // Max of r and g
|
||||
// [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
|
||||
a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 16));
|
||||
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||
b = vmaxq_u8(a, b); // Max of r and g and b.
|
||||
// [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
|
||||
b = vreinterpretq_u8_u32(vshlq_n_u32(vreinterpretq_u32_u8(b), 24));
|
||||
|
||||
// Make sure the value of alpha channel is always larger than maximum
|
||||
// value of color channels.
|
||||
accum8 = vmaxq_u8(b, accum8);
|
||||
} else {
|
||||
// Set value of alpha channels to 0xFF.
|
||||
accum8 = vreinterpretq_u8_u32(vreinterpretq_u32_u8(accum8) | vdupq_n_u32(0xFF000000));
|
||||
}
|
||||
|
||||
// Store the convolution result (16 bytes) and advance the pixel pointers.
|
||||
vst1q_u8(outRow, accum8);
|
||||
outRow += 16;
|
||||
}
|
||||
|
||||
// Process the leftovers when the width of the output is not divisible
|
||||
// by 4, that is at most 3 pixels.
|
||||
int r = pixelWidth & 3;
|
||||
if (r) {
|
||||
|
||||
int32x4_t accum0 = vdupq_n_s32(0);
|
||||
int32x4_t accum1 = vdupq_n_s32(0);
|
||||
int32x4_t accum2 = vdupq_n_s32(0);
|
||||
|
||||
for (int filterY = 0; filterY < filterLength; ++filterY) {
|
||||
int16x4_t coeff16 = vdup_n_s16(filterValues[filterY]);
|
||||
|
||||
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||
uint8x16_t src8 = vld1q_u8(&sourceDataRows[filterY][width << 2]);
|
||||
|
||||
int16x8_t src16_01 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src8)));
|
||||
int16x8_t src16_23 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src8)));
|
||||
int16x4_t src16_0 = vget_low_s16(src16_01);
|
||||
int16x4_t src16_1 = vget_high_s16(src16_01);
|
||||
int16x4_t src16_2 = vget_low_s16(src16_23);
|
||||
|
||||
accum0 += vmull_s16(src16_0, coeff16);
|
||||
accum1 += vmull_s16(src16_1, coeff16);
|
||||
accum2 += vmull_s16(src16_2, coeff16);
|
||||
}
|
||||
|
||||
accum0 = vshrq_n_s32(accum0, SkConvolutionFilter1D::kShiftBits);
|
||||
accum1 = vshrq_n_s32(accum1, SkConvolutionFilter1D::kShiftBits);
|
||||
accum2 = vshrq_n_s32(accum2, SkConvolutionFilter1D::kShiftBits);
|
||||
|
||||
int16x8_t accum16_0 = vcombine_s16(vqmovn_s32(accum0), vqmovn_s32(accum1));
|
||||
int16x8_t accum16_1 = vcombine_s16(vqmovn_s32(accum2), vqmovn_s32(accum2));
|
||||
|
||||
uint8x16_t accum8 = vcombine_u8(vqmovun_s16(accum16_0), vqmovun_s16(accum16_1));
|
||||
|
||||
if (hasAlpha) {
|
||||
// Compute the max(ri, gi, bi) for each pixel.
|
||||
// [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
|
||||
uint8x16_t a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 8));
|
||||
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||
uint8x16_t b = vmaxq_u8(a, accum8); // Max of r and g
|
||||
// [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
|
||||
a = vreinterpretq_u8_u32(vshrq_n_u32(vreinterpretq_u32_u8(accum8), 16));
|
||||
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||
b = vmaxq_u8(a, b); // Max of r and g and b.
|
||||
// [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
|
||||
b = vreinterpretq_u8_u32(vshlq_n_u32(vreinterpretq_u32_u8(b), 24));
|
||||
|
||||
// Make sure the value of alpha channel is always larger than maximum
|
||||
// value of color channels.
|
||||
accum8 = vmaxq_u8(b, accum8);
|
||||
} else {
|
||||
// Set value of alpha channels to 0xFF.
|
||||
accum8 = vreinterpretq_u8_u32(vreinterpretq_u32_u8(accum8) | vdupq_n_u32(0xFF000000));
|
||||
}
|
||||
|
||||
switch(r) {
|
||||
case 1:
|
||||
vst1q_lane_u32(reinterpret_cast<uint32_t*>(outRow), vreinterpretq_u32_u8(accum8), 0);
|
||||
break;
|
||||
case 2:
|
||||
vst1_u32(reinterpret_cast<uint32_t*>(outRow),
|
||||
vreinterpret_u32_u8(vget_low_u8(accum8)));
|
||||
break;
|
||||
case 3:
|
||||
vst1_u32(reinterpret_cast<uint32_t*>(outRow),
|
||||
vreinterpret_u32_u8(vget_low_u8(accum8)));
|
||||
vst1q_lane_u32(reinterpret_cast<uint32_t*>(outRow+8), vreinterpretq_u32_u8(accum8), 2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Converts the argument to an 8-bit unsigned value by clamping to the range
|
||||
// 0-255.
|
||||
inline unsigned char ClampTo8(int a) {
|
||||
if (static_cast<unsigned>(a) < 256) {
|
||||
return a; // Avoid the extra check in the common case.
|
||||
}
|
||||
if (a < 0) {
|
||||
return 0;
|
||||
}
|
||||
return 255;
|
||||
}
|
||||
|
||||
// Convolves horizontally along a single row. The row data is given in
|
||||
// |srcData| and continues for the numValues() of the filter.
|
||||
template<bool hasAlpha>
|
||||
void ConvolveHorizontally(const unsigned char* srcData,
|
||||
const SkConvolutionFilter1D& filter,
|
||||
unsigned char* outRow) {
|
||||
// Loop over each pixel on this row in the output image.
|
||||
int numValues = filter.numValues();
|
||||
for (int outX = 0; outX < numValues; outX++) {
|
||||
// Get the filter that determines the current output pixel.
|
||||
int filterOffset, filterLength;
|
||||
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
|
||||
filter.FilterForValue(outX, &filterOffset, &filterLength);
|
||||
|
||||
// Compute the first pixel in this row that the filter affects. It will
|
||||
// touch |filterLength| pixels (4 bytes each) after this.
|
||||
const unsigned char* rowToFilter = &srcData[filterOffset * 4];
|
||||
|
||||
// Apply the filter to the row to get the destination pixel in |accum|.
|
||||
int accum[4] = {0};
|
||||
for (int filterX = 0; filterX < filterLength; filterX++) {
|
||||
SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];
|
||||
accum[0] += curFilter * rowToFilter[filterX * 4 + 0];
|
||||
accum[1] += curFilter * rowToFilter[filterX * 4 + 1];
|
||||
accum[2] += curFilter * rowToFilter[filterX * 4 + 2];
|
||||
if (hasAlpha) {
|
||||
accum[3] += curFilter * rowToFilter[filterX * 4 + 3];
|
||||
}
|
||||
}
|
||||
|
||||
// Bring this value back in range. All of the filter scaling factors
|
||||
// are in fixed point with kShiftBits bits of fractional part.
|
||||
accum[0] >>= SkConvolutionFilter1D::kShiftBits;
|
||||
accum[1] >>= SkConvolutionFilter1D::kShiftBits;
|
||||
accum[2] >>= SkConvolutionFilter1D::kShiftBits;
|
||||
if (hasAlpha) {
|
||||
accum[3] >>= SkConvolutionFilter1D::kShiftBits;
|
||||
}
|
||||
|
||||
// Store the new pixel.
|
||||
outRow[outX * 4 + 0] = ClampTo8(accum[0]);
|
||||
outRow[outX * 4 + 1] = ClampTo8(accum[1]);
|
||||
outRow[outX * 4 + 2] = ClampTo8(accum[2]);
|
||||
if (hasAlpha) {
|
||||
outRow[outX * 4 + 3] = ClampTo8(accum[3]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Does vertical convolution to produce one output row. The filter values and
|
||||
// length are given in the first two parameters. These are applied to each
|
||||
// of the rows pointed to in the |sourceDataRows| array, with each row
|
||||
// being |pixelWidth| wide.
|
||||
//
|
||||
// The output must have room for |pixelWidth * 4| bytes.
|
||||
template<bool hasAlpha>
|
||||
void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
|
||||
int filterLength,
|
||||
unsigned char* const* sourceDataRows,
|
||||
int pixelWidth,
|
||||
unsigned char* outRow) {
|
||||
// We go through each column in the output and do a vertical convolution,
|
||||
// generating one output pixel each time.
|
||||
for (int outX = 0; outX < pixelWidth; outX++) {
|
||||
// Compute the number of bytes over in each row that the current column
|
||||
// we're convolving starts at. The pixel will cover the next 4 bytes.
|
||||
int byteOffset = outX * 4;
|
||||
|
||||
// Apply the filter to one column of pixels.
|
||||
int accum[4] = {0};
|
||||
for (int filterY = 0; filterY < filterLength; filterY++) {
|
||||
SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];
|
||||
accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];
|
||||
accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];
|
||||
accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];
|
||||
if (hasAlpha) {
|
||||
accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];
|
||||
}
|
||||
}
|
||||
|
||||
// Bring this value back in range. All of the filter scaling factors
|
||||
// are in fixed point with kShiftBits bits of precision.
|
||||
accum[0] >>= SkConvolutionFilter1D::kShiftBits;
|
||||
accum[1] >>= SkConvolutionFilter1D::kShiftBits;
|
||||
accum[2] >>= SkConvolutionFilter1D::kShiftBits;
|
||||
if (hasAlpha) {
|
||||
accum[3] >>= SkConvolutionFilter1D::kShiftBits;
|
||||
}
|
||||
|
||||
// Store the new pixel.
|
||||
outRow[byteOffset + 0] = ClampTo8(accum[0]);
|
||||
outRow[byteOffset + 1] = ClampTo8(accum[1]);
|
||||
outRow[byteOffset + 2] = ClampTo8(accum[2]);
|
||||
if (hasAlpha) {
|
||||
unsigned char alpha = ClampTo8(accum[3]);
|
||||
|
||||
// Make sure the alpha channel doesn't come out smaller than any of the
|
||||
// color channels. We use premultipled alpha channels, so this should
|
||||
// never happen, but rounding errors will cause this from time to time.
|
||||
// These "impossible" colors will cause overflows (and hence random pixel
|
||||
// values) when the resulting bitmap is drawn to the screen.
|
||||
//
|
||||
// We only need to do this when generating the final output row (here).
|
||||
int maxColorChannel = SkTMax(outRow[byteOffset + 0],
|
||||
SkTMax(outRow[byteOffset + 1],
|
||||
outRow[byteOffset + 2]));
|
||||
if (alpha < maxColorChannel) {
|
||||
outRow[byteOffset + 3] = maxColorChannel;
|
||||
} else {
|
||||
outRow[byteOffset + 3] = alpha;
|
||||
}
|
||||
} else {
|
||||
// No alpha channel, the image is opaque.
|
||||
outRow[byteOffset + 3] = 0xff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// There's a bug somewhere here with GCC autovectorization (-ftree-vectorize). We originally
|
||||
// thought this was 32 bit only, but subsequent tests show that some 64 bit gcc compiles
|
||||
// suffer here too.
|
||||
//
|
||||
// Dropping to -O2 disables -ftree-vectorize. GCC 4.6 needs noinline. https://bug.skia.org/2575
|
||||
#if SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE)
|
||||
#define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), noinline))
|
||||
#else
|
||||
#define SK_MAYBE_DISABLE_VECTORIZATION
|
||||
#endif
|
||||
|
||||
SK_MAYBE_DISABLE_VECTORIZATION
|
||||
void convolve_horizontally(const unsigned char* srcData,
|
||||
const SkConvolutionFilter1D& filter,
|
||||
unsigned char* outRow,
|
||||
bool hasAlpha) {
|
||||
if (hasAlpha) {
|
||||
ConvolveHorizontally<true>(srcData, filter, outRow);
|
||||
} else {
|
||||
ConvolveHorizontally<false>(srcData, filter, outRow);
|
||||
}
|
||||
}
|
||||
#undef SK_MAYBE_DISABLE_VECTORIZATION
|
||||
|
||||
void (*convolve_4_rows_horizontally)(const unsigned char* srcData[4],
|
||||
const SkConvolutionFilter1D& filter,
|
||||
unsigned char* outRow[4],
|
||||
size_t outRowBytes)
|
||||
= nullptr;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
void convolve_vertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
|
||||
int filterLength,
|
||||
unsigned char* const* sourceDataRows,
|
||||
int pixelWidth,
|
||||
unsigned char* outRow,
|
||||
bool hasAlpha) {
|
||||
if (hasAlpha) {
|
||||
ConvolveVertically<true>(filterValues, filterLength, sourceDataRows,
|
||||
pixelWidth, outRow);
|
||||
} else {
|
||||
ConvolveVertically<false>(filterValues, filterLength, sourceDataRows,
|
||||
pixelWidth, outRow);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace SK_OPTS_NS
|
||||
|
||||
#endif//SkBitmapFilter_opts_DEFINED
|
@ -5,7 +5,6 @@
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
#include "SkBitmapScaler.h"
|
||||
#include "SkBitmapProcState.h"
|
||||
|
||||
/* A platform may optionally overwrite any of these with accelerated
|
||||
|
@ -1,118 +0,0 @@
|
||||
/*
|
||||
* Copyright 2016 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
* found in the LICENSE file.
|
||||
*/
|
||||
|
||||
// It is not safe to #include any header file here unless it has been vetted for ODR safety:
|
||||
// all symbols used must be file-scoped static or in an anonymous namespace. This applies
|
||||
// to _all_ header files: C standard library, C++ standard library, Skia... everything.
|
||||
|
||||
#include <immintrin.h> // ODR safe
|
||||
#include <stdint.h> // ODR safe
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
namespace hsw {
|
||||
|
||||
void convolve_vertically(const int16_t* filter, int filterLen,
|
||||
uint8_t* const* srcRows, int width,
|
||||
uint8_t* out, bool hasAlpha) {
|
||||
// It's simpler to work with the output array in terms of 4-byte pixels.
|
||||
auto dst = (int*)out;
|
||||
|
||||
// Output up to eight pixels per iteration.
|
||||
for (int x = 0; x < width; x += 8) {
|
||||
// Accumulated result for 4 (non-adjacent) pairs of pixels,
|
||||
// with each channel in signed 17.14 fixed point.
|
||||
auto accum04 = _mm256_setzero_si256(),
|
||||
accum15 = _mm256_setzero_si256(),
|
||||
accum26 = _mm256_setzero_si256(),
|
||||
accum37 = _mm256_setzero_si256();
|
||||
|
||||
// Convolve with the filter. (This inner loop is where we spend ~all our time.)
|
||||
// While we can, we consume 2 filter coefficients and 2 rows of 8 pixels each at a time.
|
||||
auto convolve_16_pixels = [&](__m256i interlaced_coeffs,
|
||||
__m256i pixels_01234567, __m256i pixels_89ABCDEF) {
|
||||
// Interlaced R0R8 G0G8 B0B8 A0A8 R1R9 G1G9... 32 8-bit values each.
|
||||
auto _08194C5D = _mm256_unpacklo_epi8(pixels_01234567, pixels_89ABCDEF),
|
||||
_2A3B6E7F = _mm256_unpackhi_epi8(pixels_01234567, pixels_89ABCDEF);
|
||||
|
||||
// Still interlaced R0R8 G0G8... as above, each channel expanded to 16-bit lanes.
|
||||
auto _084C = _mm256_unpacklo_epi8(_08194C5D, _mm256_setzero_si256()),
|
||||
_195D = _mm256_unpackhi_epi8(_08194C5D, _mm256_setzero_si256()),
|
||||
_2A6E = _mm256_unpacklo_epi8(_2A3B6E7F, _mm256_setzero_si256()),
|
||||
_3B7F = _mm256_unpackhi_epi8(_2A3B6E7F, _mm256_setzero_si256());
|
||||
|
||||
// accum0_R += R0*coeff0 + R8*coeff1, etc.
|
||||
accum04 = _mm256_add_epi32(accum04, _mm256_madd_epi16(_084C, interlaced_coeffs));
|
||||
accum15 = _mm256_add_epi32(accum15, _mm256_madd_epi16(_195D, interlaced_coeffs));
|
||||
accum26 = _mm256_add_epi32(accum26, _mm256_madd_epi16(_2A6E, interlaced_coeffs));
|
||||
accum37 = _mm256_add_epi32(accum37, _mm256_madd_epi16(_3B7F, interlaced_coeffs));
|
||||
};
|
||||
|
||||
int i = 0;
|
||||
for (; i < filterLen/2*2; i += 2) {
|
||||
convolve_16_pixels(_mm256_set1_epi32(*(const int32_t*)(filter+i)),
|
||||
_mm256_loadu_si256((const __m256i*)(srcRows[i+0] + x*4)),
|
||||
_mm256_loadu_si256((const __m256i*)(srcRows[i+1] + x*4)));
|
||||
}
|
||||
if (i < filterLen) {
|
||||
convolve_16_pixels(_mm256_set1_epi32(*(const int16_t*)(filter+i)),
|
||||
_mm256_loadu_si256((const __m256i*)(srcRows[i] + x*4)),
|
||||
_mm256_setzero_si256());
|
||||
}
|
||||
|
||||
// Trim the fractional parts off the accumulators.
|
||||
accum04 = _mm256_srai_epi32(accum04, 14);
|
||||
accum15 = _mm256_srai_epi32(accum15, 14);
|
||||
accum26 = _mm256_srai_epi32(accum26, 14);
|
||||
accum37 = _mm256_srai_epi32(accum37, 14);
|
||||
|
||||
// Pack back down to 8-bit channels.
|
||||
auto pixels = _mm256_packus_epi16(_mm256_packs_epi32(accum04, accum15),
|
||||
_mm256_packs_epi32(accum26, accum37));
|
||||
|
||||
if (hasAlpha) {
|
||||
// Clamp alpha to the max of r,g,b to make sure we stay premultiplied.
|
||||
__m256i max_rg = _mm256_max_epu8(pixels, _mm256_srli_epi32(pixels, 8)),
|
||||
max_rgb = _mm256_max_epu8(max_rg, _mm256_srli_epi32(pixels, 16));
|
||||
pixels = _mm256_max_epu8(pixels, _mm256_slli_epi32(max_rgb, 24));
|
||||
} else {
|
||||
// Force opaque.
|
||||
pixels = _mm256_or_si256(pixels, _mm256_set1_epi32(0xff000000));
|
||||
}
|
||||
|
||||
// Normal path to store 8 pixels.
|
||||
if (x + 8 <= width) {
|
||||
_mm256_storeu_si256((__m256i*)dst, pixels);
|
||||
dst += 8;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Store one pixel at a time on the last iteration.
|
||||
for (int i = x; i < width; i++) {
|
||||
*dst++ = _mm_cvtsi128_si32(_mm256_castsi256_si128(pixels));
|
||||
pixels = _mm256_permutevar8x32_epi32(pixels, _mm256_setr_epi32(1,2,3,4,5,6,7,0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace SkOpts {
|
||||
// See SkOpts.h, writing SkConvolutionFilter1D::ConvolutionFixed as the underlying type.
|
||||
extern void (*convolve_vertically)(const int16_t* filter, int filterLen,
|
||||
uint8_t* const* srcRows, int width,
|
||||
uint8_t* out, bool hasAlpha);
|
||||
void Init_hsw() {
|
||||
convolve_vertically = hsw::convolve_vertically;
|
||||
}
|
||||
}
|
||||
|
||||
#else // defined(__AVX2__) is not true...
|
||||
|
||||
namespace SkOpts { void Init_hsw() {} }
|
||||
|
||||
#endif
|
@ -7,7 +7,6 @@
|
||||
|
||||
#include "SkBitmapProcState_opts_SSE2.h"
|
||||
#include "SkBitmapProcState_opts_SSSE3.h"
|
||||
#include "SkBitmapScaler.h"
|
||||
#include "SkBlitMask.h"
|
||||
#include "SkBlitRow.h"
|
||||
#include "SkBlitRow_opts_SSE2.h"
|
||||
|
@ -256,7 +256,7 @@ bool SkImageShader::onAppendStages(SkRasterPipeline* p, SkColorSpace* dstCS, SkA
|
||||
auto quality = paint.getFilterQuality();
|
||||
|
||||
SkBitmapProvider provider(fImage.get(), dstCS);
|
||||
SkDefaultBitmapController controller(SkDefaultBitmapController::CanShadeHQ::kYes);
|
||||
SkDefaultBitmapController controller;
|
||||
std::unique_ptr<SkBitmapController::State> state {
|
||||
controller.requestBitmap(provider, matrix, quality)
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user