skia2/gm/imageblur.cpp

75 lines
1.9 KiB
C++
Raw Normal View History

/*
* Copyright 2011 Google Inc.
*
* Use of this source code is governed by a BSD-style license that can be
* found in the LICENSE file.
*/
#include "gm.h"
#include "SkBlurImageFilter.h"
#include "SkRandom.h"
#define WIDTH 500
#define HEIGHT 500
namespace skiagm {
class ImageBlurGM : public GM {
public:
NEON fast path for box blur Calculate 8 channels in parallel by using 16-bits to store each channel. Due to the limitation of VQRDMULH, (int16 * int16 * 2 + 0x8000) >> 16, the fast path can only support kernelSize < 128. 8 significant bits are kept at least in each stage, the final error should less-equal than 1. Pre-fetching memory for X-direction read. In fact pre-fetching memory doesn't help much for Y direction read, since it is a waste to load a cache line for only read 8 bytes.(I left it there to keep the symmetry. pre-fetch is cheap :) ) bench data on Nexus 10 before: running bench [640 480] blur_image_filter_large_10.00_10.00 8888: cmsecs = 25081.48 running bench [640 480] blur_image_filter_small_10.00_10.00 8888: cmsecs = 25038.04 running bench [640 480] blur_image_filter_large_1.00_1.00 8888: cmsecs = 25209.04 running bench [640 480] blur_image_filter_small_1.00_1.00 8888: cmsecs = 24928.01 running bench [640 480] blur_image_filter_large_0.00_1.00 8888: cmsecs = 17160.98 running bench [640 480] blur_image_filter_large_0.00_10.00 8888: cmsecs = 17924.11 running bench [640 480] blur_image_filter_large_1.00_0.00 8888: cmsecs = 14609.19 running bench [640 480] blur_image_filter_large_10.00_0.00 8888: cmsecs = 14625.91 after: running bench [640 480] blur_image_filter_large_10.00_10.00 8888: cmsecs = 14848.42 running bench [640 480] blur_image_filter_small_10.00_10.00 8888: cmsecs = 16037.29 running bench [640 480] blur_image_filter_large_1.00_1.00 8888: cmsecs = 14819.55 running bench [640 480] blur_image_filter_small_1.00_1.00 8888: cmsecs = 14563.69 running bench [640 480] blur_image_filter_large_0.00_1.00 8888: cmsecs = 11905.34 running bench [640 480] blur_image_filter_large_0.00_10.00 8888: cmsecs = 11883.85 running bench [640 480] blur_image_filter_large_1.00_0.00 8888: cmsecs = 9576.51 running bench [640 480] blur_image_filter_large_10.00_0.00 8888: cmsecs = 9793.84 BUG= R=senorblanco@chromium.org, mtklein@google.com, reed@google.com, kevin.petit@arm.com, kevin.petit.arm@gmail.com Author: zheng.xu@arm.com Review URL: https://codereview.chromium.org/105893003 git-svn-id: http://skia.googlecode.com/svn/trunk@13036 2bbb7eff-a529-9590-31e7-b0007b416f81
2014-01-13 08:16:45 +00:00
ImageBlurGM(SkScalar sigmaX, SkScalar sigmaY, const char* suffix)
: fSigmaX(sigmaX), fSigmaY(sigmaY) {
this->setBGColor(0xFF000000);
NEON fast path for box blur Calculate 8 channels in parallel by using 16-bits to store each channel. Due to the limitation of VQRDMULH, (int16 * int16 * 2 + 0x8000) >> 16, the fast path can only support kernelSize < 128. 8 significant bits are kept at least in each stage, the final error should less-equal than 1. Pre-fetching memory for X-direction read. In fact pre-fetching memory doesn't help much for Y direction read, since it is a waste to load a cache line for only read 8 bytes.(I left it there to keep the symmetry. pre-fetch is cheap :) ) bench data on Nexus 10 before: running bench [640 480] blur_image_filter_large_10.00_10.00 8888: cmsecs = 25081.48 running bench [640 480] blur_image_filter_small_10.00_10.00 8888: cmsecs = 25038.04 running bench [640 480] blur_image_filter_large_1.00_1.00 8888: cmsecs = 25209.04 running bench [640 480] blur_image_filter_small_1.00_1.00 8888: cmsecs = 24928.01 running bench [640 480] blur_image_filter_large_0.00_1.00 8888: cmsecs = 17160.98 running bench [640 480] blur_image_filter_large_0.00_10.00 8888: cmsecs = 17924.11 running bench [640 480] blur_image_filter_large_1.00_0.00 8888: cmsecs = 14609.19 running bench [640 480] blur_image_filter_large_10.00_0.00 8888: cmsecs = 14625.91 after: running bench [640 480] blur_image_filter_large_10.00_10.00 8888: cmsecs = 14848.42 running bench [640 480] blur_image_filter_small_10.00_10.00 8888: cmsecs = 16037.29 running bench [640 480] blur_image_filter_large_1.00_1.00 8888: cmsecs = 14819.55 running bench [640 480] blur_image_filter_small_1.00_1.00 8888: cmsecs = 14563.69 running bench [640 480] blur_image_filter_large_0.00_1.00 8888: cmsecs = 11905.34 running bench [640 480] blur_image_filter_large_0.00_10.00 8888: cmsecs = 11883.85 running bench [640 480] blur_image_filter_large_1.00_0.00 8888: cmsecs = 9576.51 running bench [640 480] blur_image_filter_large_10.00_0.00 8888: cmsecs = 9793.84 BUG= R=senorblanco@chromium.org, mtklein@google.com, reed@google.com, kevin.petit@arm.com, kevin.petit.arm@gmail.com Author: zheng.xu@arm.com Review URL: https://codereview.chromium.org/105893003 git-svn-id: http://skia.googlecode.com/svn/trunk@13036 2bbb7eff-a529-9590-31e7-b0007b416f81
2014-01-13 08:16:45 +00:00
fName.printf("imageblur%s", suffix);
}
protected:
virtual uint32_t onGetFlags() const SK_OVERRIDE {
return kSkipTiled_Flag;
}
virtual SkString onShortName() {
NEON fast path for box blur Calculate 8 channels in parallel by using 16-bits to store each channel. Due to the limitation of VQRDMULH, (int16 * int16 * 2 + 0x8000) >> 16, the fast path can only support kernelSize < 128. 8 significant bits are kept at least in each stage, the final error should less-equal than 1. Pre-fetching memory for X-direction read. In fact pre-fetching memory doesn't help much for Y direction read, since it is a waste to load a cache line for only read 8 bytes.(I left it there to keep the symmetry. pre-fetch is cheap :) ) bench data on Nexus 10 before: running bench [640 480] blur_image_filter_large_10.00_10.00 8888: cmsecs = 25081.48 running bench [640 480] blur_image_filter_small_10.00_10.00 8888: cmsecs = 25038.04 running bench [640 480] blur_image_filter_large_1.00_1.00 8888: cmsecs = 25209.04 running bench [640 480] blur_image_filter_small_1.00_1.00 8888: cmsecs = 24928.01 running bench [640 480] blur_image_filter_large_0.00_1.00 8888: cmsecs = 17160.98 running bench [640 480] blur_image_filter_large_0.00_10.00 8888: cmsecs = 17924.11 running bench [640 480] blur_image_filter_large_1.00_0.00 8888: cmsecs = 14609.19 running bench [640 480] blur_image_filter_large_10.00_0.00 8888: cmsecs = 14625.91 after: running bench [640 480] blur_image_filter_large_10.00_10.00 8888: cmsecs = 14848.42 running bench [640 480] blur_image_filter_small_10.00_10.00 8888: cmsecs = 16037.29 running bench [640 480] blur_image_filter_large_1.00_1.00 8888: cmsecs = 14819.55 running bench [640 480] blur_image_filter_small_1.00_1.00 8888: cmsecs = 14563.69 running bench [640 480] blur_image_filter_large_0.00_1.00 8888: cmsecs = 11905.34 running bench [640 480] blur_image_filter_large_0.00_10.00 8888: cmsecs = 11883.85 running bench [640 480] blur_image_filter_large_1.00_0.00 8888: cmsecs = 9576.51 running bench [640 480] blur_image_filter_large_10.00_0.00 8888: cmsecs = 9793.84 BUG= R=senorblanco@chromium.org, mtklein@google.com, reed@google.com, kevin.petit@arm.com, kevin.petit.arm@gmail.com Author: zheng.xu@arm.com Review URL: https://codereview.chromium.org/105893003 git-svn-id: http://skia.googlecode.com/svn/trunk@13036 2bbb7eff-a529-9590-31e7-b0007b416f81
2014-01-13 08:16:45 +00:00
return fName;
}
virtual SkISize onISize() {
return make_isize(WIDTH, HEIGHT);
}
virtual void onDraw(SkCanvas* canvas) {
SkPaint paint;
paint.setImageFilter(SkBlurImageFilter::Create(fSigmaX, fSigmaY))->unref();
canvas->saveLayer(NULL, &paint);
const char* str = "The quick brown fox jumped over the lazy dog.";
SkRandom rand;
SkPaint textPaint;
textPaint.setAntiAlias(true);
for (int i = 0; i < 25; ++i) {
int x = rand.nextULessThan(WIDTH);
int y = rand.nextULessThan(HEIGHT);
textPaint.setColor(rand.nextBits(24) | 0xFF000000);
textPaint.setTextSize(rand.nextRangeScalar(0, 300));
canvas->drawText(str, strlen(str), SkIntToScalar(x),
SkIntToScalar(y), textPaint);
}
canvas->restore();
}
private:
NEON fast path for box blur Calculate 8 channels in parallel by using 16-bits to store each channel. Due to the limitation of VQRDMULH, (int16 * int16 * 2 + 0x8000) >> 16, the fast path can only support kernelSize < 128. 8 significant bits are kept at least in each stage, the final error should less-equal than 1. Pre-fetching memory for X-direction read. In fact pre-fetching memory doesn't help much for Y direction read, since it is a waste to load a cache line for only read 8 bytes.(I left it there to keep the symmetry. pre-fetch is cheap :) ) bench data on Nexus 10 before: running bench [640 480] blur_image_filter_large_10.00_10.00 8888: cmsecs = 25081.48 running bench [640 480] blur_image_filter_small_10.00_10.00 8888: cmsecs = 25038.04 running bench [640 480] blur_image_filter_large_1.00_1.00 8888: cmsecs = 25209.04 running bench [640 480] blur_image_filter_small_1.00_1.00 8888: cmsecs = 24928.01 running bench [640 480] blur_image_filter_large_0.00_1.00 8888: cmsecs = 17160.98 running bench [640 480] blur_image_filter_large_0.00_10.00 8888: cmsecs = 17924.11 running bench [640 480] blur_image_filter_large_1.00_0.00 8888: cmsecs = 14609.19 running bench [640 480] blur_image_filter_large_10.00_0.00 8888: cmsecs = 14625.91 after: running bench [640 480] blur_image_filter_large_10.00_10.00 8888: cmsecs = 14848.42 running bench [640 480] blur_image_filter_small_10.00_10.00 8888: cmsecs = 16037.29 running bench [640 480] blur_image_filter_large_1.00_1.00 8888: cmsecs = 14819.55 running bench [640 480] blur_image_filter_small_1.00_1.00 8888: cmsecs = 14563.69 running bench [640 480] blur_image_filter_large_0.00_1.00 8888: cmsecs = 11905.34 running bench [640 480] blur_image_filter_large_0.00_10.00 8888: cmsecs = 11883.85 running bench [640 480] blur_image_filter_large_1.00_0.00 8888: cmsecs = 9576.51 running bench [640 480] blur_image_filter_large_10.00_0.00 8888: cmsecs = 9793.84 BUG= R=senorblanco@chromium.org, mtklein@google.com, reed@google.com, kevin.petit@arm.com, kevin.petit.arm@gmail.com Author: zheng.xu@arm.com Review URL: https://codereview.chromium.org/105893003 git-svn-id: http://skia.googlecode.com/svn/trunk@13036 2bbb7eff-a529-9590-31e7-b0007b416f81
2014-01-13 08:16:45 +00:00
SkScalar fSigmaX;
SkScalar fSigmaY;
SkString fName;
typedef GM INHERITED;
};
//////////////////////////////////////////////////////////////////////////////
NEON fast path for box blur Calculate 8 channels in parallel by using 16-bits to store each channel. Due to the limitation of VQRDMULH, (int16 * int16 * 2 + 0x8000) >> 16, the fast path can only support kernelSize < 128. 8 significant bits are kept at least in each stage, the final error should less-equal than 1. Pre-fetching memory for X-direction read. In fact pre-fetching memory doesn't help much for Y direction read, since it is a waste to load a cache line for only read 8 bytes.(I left it there to keep the symmetry. pre-fetch is cheap :) ) bench data on Nexus 10 before: running bench [640 480] blur_image_filter_large_10.00_10.00 8888: cmsecs = 25081.48 running bench [640 480] blur_image_filter_small_10.00_10.00 8888: cmsecs = 25038.04 running bench [640 480] blur_image_filter_large_1.00_1.00 8888: cmsecs = 25209.04 running bench [640 480] blur_image_filter_small_1.00_1.00 8888: cmsecs = 24928.01 running bench [640 480] blur_image_filter_large_0.00_1.00 8888: cmsecs = 17160.98 running bench [640 480] blur_image_filter_large_0.00_10.00 8888: cmsecs = 17924.11 running bench [640 480] blur_image_filter_large_1.00_0.00 8888: cmsecs = 14609.19 running bench [640 480] blur_image_filter_large_10.00_0.00 8888: cmsecs = 14625.91 after: running bench [640 480] blur_image_filter_large_10.00_10.00 8888: cmsecs = 14848.42 running bench [640 480] blur_image_filter_small_10.00_10.00 8888: cmsecs = 16037.29 running bench [640 480] blur_image_filter_large_1.00_1.00 8888: cmsecs = 14819.55 running bench [640 480] blur_image_filter_small_1.00_1.00 8888: cmsecs = 14563.69 running bench [640 480] blur_image_filter_large_0.00_1.00 8888: cmsecs = 11905.34 running bench [640 480] blur_image_filter_large_0.00_10.00 8888: cmsecs = 11883.85 running bench [640 480] blur_image_filter_large_1.00_0.00 8888: cmsecs = 9576.51 running bench [640 480] blur_image_filter_large_10.00_0.00 8888: cmsecs = 9793.84 BUG= R=senorblanco@chromium.org, mtklein@google.com, reed@google.com, kevin.petit@arm.com, kevin.petit.arm@gmail.com Author: zheng.xu@arm.com Review URL: https://codereview.chromium.org/105893003 git-svn-id: http://skia.googlecode.com/svn/trunk@13036 2bbb7eff-a529-9590-31e7-b0007b416f81
2014-01-13 08:16:45 +00:00
static GM* MyFactory1(void*) { return new ImageBlurGM(24.0f, 0.0f, ""); }
static GMRegistry reg1(MyFactory1);
static GM* MyFactory2(void*) { return new ImageBlurGM(80.0f, 80.0f, "_large"); }
static GMRegistry reg2(MyFactory2);
}