Add SK_PREFETCH and use in SkBlurImageFilter.
Relative speed is 1.2-1.6x on desktop, 1.0-1.2x on Nexus 4. (Division remains the bottleneck, now more so.) BUG= R=senorblanco@google.com, reed@google.com, senorblanco@chromium.org Author: mtklein@google.com Review URL: https://codereview.chromium.org/57823003 git-svn-id: http://skia.googlecode.com/svn/trunk@12129 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
ecdb6b326f
commit
fedf13d73a
@ -381,6 +381,15 @@
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define SK_PREFETCH(ptr) __builtin_prefetch(ptr)
|
||||
#define SK_WRITE_PREFETCH(ptr) __builtin_prefetch(ptr, 1)
|
||||
#else
|
||||
#define SK_PREFETCH(ptr)
|
||||
#define SK_WRITE_PREFETCH(ptr)
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
#ifndef SK_PRINTF_LIKE
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define SK_PRINTF_LIKE(A, B) __attribute__((format(printf, (A), (B))))
|
||||
|
@ -144,6 +144,8 @@ static void boxBlurY(const SkBitmap& src, SkBitmap* dst, int kernelSize,
|
||||
sumB += SkGetPackedB32(r);
|
||||
}
|
||||
sptr += srcStride;
|
||||
// The next leading pixel seems to be too hard to predict. Hint the fetch.
|
||||
SK_PREFETCH(sptr + (bottomOffset + 1) * srcStride);
|
||||
dptr += dstStride;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user