The image resampling code has been transplanted from Chrome; it's incredibly fast.
We've tested this CL plumbed into Chrome and done benchmarking with excellent results. This CL can land independent of any Chrome changes; it's completely internal to skia. BUG= R=reed@google.com Review URL: https://codereview.chromium.org/19335002 git-svn-id: http://skia.googlecode.com/svn/trunk@10206 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
d322cf4939
commit
138ebc3e40
@ -75,7 +75,7 @@ protected:
|
|||||||
curWidth = (int) (fBM.width() * curScale + 2);
|
curWidth = (int) (fBM.width() * curScale + 2);
|
||||||
curX += curWidth;
|
curX += curWidth;
|
||||||
curScale *= 0.75f;
|
curScale *= 0.75f;
|
||||||
} while (curX < 4*fBM.width());
|
} while (curWidth >= 2 && curX < 4*fBM.width());
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -32,6 +32,8 @@
|
|||||||
'<(skia_src_path)/core/SkBitmapProcState_matrix.h',
|
'<(skia_src_path)/core/SkBitmapProcState_matrix.h',
|
||||||
'<(skia_src_path)/core/SkBitmapProcState_matrixProcs.cpp',
|
'<(skia_src_path)/core/SkBitmapProcState_matrixProcs.cpp',
|
||||||
'<(skia_src_path)/core/SkBitmapProcState_sample.h',
|
'<(skia_src_path)/core/SkBitmapProcState_sample.h',
|
||||||
|
'<(skia_src_path)/core/SkBitmapScaler.h',
|
||||||
|
'<(skia_src_path)/core/SkBitmapScaler.cpp',
|
||||||
'<(skia_src_path)/core/SkBitmapShader16BilerpTemplate.h',
|
'<(skia_src_path)/core/SkBitmapShader16BilerpTemplate.h',
|
||||||
'<(skia_src_path)/core/SkBitmapShaderTemplate.h',
|
'<(skia_src_path)/core/SkBitmapShaderTemplate.h',
|
||||||
'<(skia_src_path)/core/SkBitmap_scroll.cpp',
|
'<(skia_src_path)/core/SkBitmap_scroll.cpp',
|
||||||
@ -56,6 +58,8 @@
|
|||||||
'<(skia_src_path)/core/SkComposeShader.cpp',
|
'<(skia_src_path)/core/SkComposeShader.cpp',
|
||||||
'<(skia_src_path)/core/SkConfig8888.cpp',
|
'<(skia_src_path)/core/SkConfig8888.cpp',
|
||||||
'<(skia_src_path)/core/SkConfig8888.h',
|
'<(skia_src_path)/core/SkConfig8888.h',
|
||||||
|
'<(skia_src_path)/core/SkConvolver.cpp',
|
||||||
|
'<(skia_src_path)/core/SkConvolver.h',
|
||||||
'<(skia_src_path)/core/SkCordic.cpp',
|
'<(skia_src_path)/core/SkCordic.cpp',
|
||||||
'<(skia_src_path)/core/SkCordic.h',
|
'<(skia_src_path)/core/SkCordic.h',
|
||||||
'<(skia_src_path)/core/SkCoreBlitters.h',
|
'<(skia_src_path)/core/SkCoreBlitters.h',
|
||||||
|
@ -702,19 +702,7 @@ private:
|
|||||||
int extractMipLevel(SkBitmap* dst, SkFixed sx, SkFixed sy);
|
int extractMipLevel(SkBitmap* dst, SkFixed sx, SkFixed sy);
|
||||||
bool hasMipMap() const;
|
bool hasMipMap() const;
|
||||||
void freeMipMap();
|
void freeMipMap();
|
||||||
|
|
||||||
/** Make a scaled copy of this bitmap into the provided destination.
|
|
||||||
* The caller is responsible for having set the width and height of the
|
|
||||||
* provided destination bitmap, and also having allocated its pixel
|
|
||||||
* memory.
|
|
||||||
*
|
|
||||||
* This function is temporary and for testing purposes only; it will
|
|
||||||
* likely move once it has been properly plumbed into the bitmap
|
|
||||||
* shader infrastructure.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void scale(SkBitmap *dst) const;
|
|
||||||
|
|
||||||
friend struct SkBitmapProcState;
|
friend struct SkBitmapProcState;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -5,15 +5,23 @@
|
|||||||
* found in the LICENSE file.
|
* found in the LICENSE file.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "SkErrorInternals.h"
|
||||||
|
#include "SkConvolver.h"
|
||||||
#include "SkBitmapProcState.h"
|
#include "SkBitmapProcState.h"
|
||||||
#include "SkBitmap.h"
|
#include "SkBitmap.h"
|
||||||
#include "SkColor.h"
|
#include "SkColor.h"
|
||||||
#include "SkColorPriv.h"
|
#include "SkColorPriv.h"
|
||||||
|
#include "SkConvolver.h"
|
||||||
#include "SkUnPreMultiply.h"
|
#include "SkUnPreMultiply.h"
|
||||||
#include "SkShader.h"
|
#include "SkShader.h"
|
||||||
#include "SkRTConf.h"
|
#include "SkRTConf.h"
|
||||||
#include "SkMath.h"
|
#include "SkMath.h"
|
||||||
|
|
||||||
|
// These are the per-scanline callbacks that are used when we must resort to
|
||||||
|
// resampling an image as it is blitted. Typically these are used only when
|
||||||
|
// the image is rotated or has some other complex transformation applied.
|
||||||
|
// Scaled images will usually be rescaled directly before rasterization.
|
||||||
|
|
||||||
void highQualityFilter(const SkBitmapProcState& s, int x, int y,
|
void highQualityFilter(const SkBitmapProcState& s, int x, int y,
|
||||||
SkPMColor* SK_RESTRICT colors, int count) {
|
SkPMColor* SK_RESTRICT colors, int count) {
|
||||||
|
|
||||||
@ -68,71 +76,15 @@ void highQualityFilter(const SkBitmapProcState& s, int x, int y,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void highQualityFilter_ScaleOnly(const SkBitmapProcState &s, int x, int y,
|
SK_CONF_DECLARE(const char *, c_bitmapFilter, "bitmap.filter", "mitchell", "Which scanline bitmap filter to use [mitchell, lanczos, hamming, gaussian, triangle, box]");
|
||||||
SkPMColor *SK_RESTRICT colors, int count) {
|
|
||||||
const int maxX = s.fBitmap->width() - 1;
|
|
||||||
const int maxY = s.fBitmap->height() - 1;
|
|
||||||
|
|
||||||
SkPoint srcPt;
|
SkBitmapFilter *SkBitmapFilter::Allocate() {
|
||||||
|
|
||||||
s.fInvProc(s.fInvMatrix, SkFloatToScalar(x + 0.5f),
|
|
||||||
SkFloatToScalar(y + 0.5f), &srcPt);
|
|
||||||
srcPt.fY -= SK_ScalarHalf;
|
|
||||||
int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->width()), maxY);
|
|
||||||
int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->width()), maxY);
|
|
||||||
|
|
||||||
while (count-- > 0) {
|
|
||||||
s.fInvProc(s.fInvMatrix, SkFloatToScalar(x + 0.5f),
|
|
||||||
SkFloatToScalar(y + 0.5f), &srcPt);
|
|
||||||
srcPt.fX -= SK_ScalarHalf;
|
|
||||||
srcPt.fY -= SK_ScalarHalf;
|
|
||||||
|
|
||||||
SkScalar weight = 0;
|
|
||||||
SkScalar fr = 0, fg = 0, fb = 0, fa = 0;
|
|
||||||
|
|
||||||
int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX);
|
|
||||||
int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width()), maxX);
|
|
||||||
|
|
||||||
for (int srcY = y0; srcY <= y1; srcY++) {
|
|
||||||
SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY));
|
|
||||||
|
|
||||||
for (int srcX = x0; srcX <= x1 ; srcX++) {
|
|
||||||
SkScalar xWeight = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX));
|
|
||||||
|
|
||||||
SkScalar combined_weight = SkScalarMul(xWeight, yWeight);
|
|
||||||
|
|
||||||
SkPMColor c = *s.fBitmap->getAddr32(srcX, srcY);
|
|
||||||
fr += combined_weight * SkGetPackedR32(c);
|
|
||||||
fg += combined_weight * SkGetPackedG32(c);
|
|
||||||
fb += combined_weight * SkGetPackedB32(c);
|
|
||||||
fa += combined_weight * SkGetPackedA32(c);
|
|
||||||
weight += combined_weight;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fr = SkScalarDiv(fr, weight);
|
|
||||||
fg = SkScalarDiv(fg, weight);
|
|
||||||
fb = SkScalarDiv(fb, weight);
|
|
||||||
fa = SkScalarDiv(fa, weight);
|
|
||||||
|
|
||||||
int a = SkClampMax(SkScalarRoundToInt(fa), 255);
|
|
||||||
int r = SkClampMax(SkScalarRoundToInt(fr), a);
|
|
||||||
int g = SkClampMax(SkScalarRoundToInt(fg), a);
|
|
||||||
int b = SkClampMax(SkScalarRoundToInt(fb), a);
|
|
||||||
|
|
||||||
*colors++ = SkPackARGB32(a, r, g, b);
|
|
||||||
|
|
||||||
x++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
SK_CONF_DECLARE(const char *, c_bitmapFilter, "bitmap.filter", "mitchell", "Which bitmap filter to use [mitchell, sinc, gaussian, triangle, box]");
|
|
||||||
|
|
||||||
static SkBitmapFilter *allocateBitmapFilter() {
|
|
||||||
if (!strcmp(c_bitmapFilter, "mitchell")) {
|
if (!strcmp(c_bitmapFilter, "mitchell")) {
|
||||||
return SkNEW_ARGS(SkMitchellFilter,(1.f/3.f,1.f/3.f));
|
return SkNEW_ARGS(SkMitchellFilter,(1.f/3.f,1.f/3.f));
|
||||||
} else if (!strcmp(c_bitmapFilter, "sinc")) {
|
} else if (!strcmp(c_bitmapFilter, "lanczos")) {
|
||||||
return SkNEW_ARGS(SkSincFilter,(3));
|
return SkNEW(SkLanczosFilter);
|
||||||
|
} else if (!strcmp(c_bitmapFilter, "hamming")) {
|
||||||
|
return SkNEW(SkHammingFilter);
|
||||||
} else if (!strcmp(c_bitmapFilter, "gaussian")) {
|
} else if (!strcmp(c_bitmapFilter, "gaussian")) {
|
||||||
return SkNEW_ARGS(SkGaussianFilter,(2));
|
return SkNEW_ARGS(SkGaussianFilter,(2));
|
||||||
} else if (!strcmp(c_bitmapFilter, "triangle")) {
|
} else if (!strcmp(c_bitmapFilter, "triangle")) {
|
||||||
@ -168,159 +120,12 @@ SkBitmapProcState::chooseBitmapFilterProc() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (fInvType & (SkMatrix::kAffine_Mask | SkMatrix::kScale_Mask)) {
|
if (fInvType & (SkMatrix::kAffine_Mask | SkMatrix::kScale_Mask)) {
|
||||||
fBitmapFilter = allocateBitmapFilter();
|
fBitmapFilter = SkBitmapFilter::Allocate();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fInvType & SkMatrix::kAffine_Mask) {
|
if (fInvType & SkMatrix::kScale_Mask) {
|
||||||
return highQualityFilter;
|
return highQualityFilter;
|
||||||
} else if (fInvType & SkMatrix::kScale_Mask) {
|
|
||||||
return highQualityFilter_ScaleOnly;
|
|
||||||
} else {
|
} else {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void divideByWeights(SkScalar *sums, SkScalar *weights, SkBitmap *dst) {
|
|
||||||
for (int y = 0 ; y < dst->height() ; y++) {
|
|
||||||
for (int x = 0 ; x < dst->width() ; x++) {
|
|
||||||
SkScalar fr = SkScalarDiv(sums[4*(y*dst->width() + x) + 0], weights[y*dst->width() + x]);
|
|
||||||
SkScalar fg = SkScalarDiv(sums[4*(y*dst->width() + x) + 1], weights[y*dst->width() + x]);
|
|
||||||
SkScalar fb = SkScalarDiv(sums[4*(y*dst->width() + x) + 2], weights[y*dst->width() + x]);
|
|
||||||
SkScalar fa = SkScalarDiv(sums[4*(y*dst->width() + x) + 3], weights[y*dst->width() + x]);
|
|
||||||
int a = SkClampMax(SkScalarRoundToInt(fa), 255);
|
|
||||||
int r = SkClampMax(SkScalarRoundToInt(fr), a);
|
|
||||||
int g = SkClampMax(SkScalarRoundToInt(fg), a);
|
|
||||||
int b = SkClampMax(SkScalarRoundToInt(fb), a);
|
|
||||||
|
|
||||||
*dst->getAddr32(x,y) = SkPackARGB32(a, r, g, b);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void upScaleHorizTranspose(const SkBitmap *src, SkBitmap *dst, float scale, SkBitmapFilter *filter) {
|
|
||||||
for (int y = 0 ; y < dst->height() ; y++) {
|
|
||||||
for (int x = 0 ; x < dst->width() ; x++) {
|
|
||||||
float sx = (y + 0.5f) / scale - 0.5f;
|
|
||||||
int x0 = SkClampMax(sk_float_ceil2int(sx-filter->width()), src->width()-1);
|
|
||||||
int x1 = SkClampMax(sk_float_floor2int(sx+filter->width()), src->width()-1);
|
|
||||||
|
|
||||||
SkScalar totalWeight = 0;
|
|
||||||
SkScalar fr = 0, fg = 0, fb = 0, fa = 0;
|
|
||||||
|
|
||||||
for (int srcX = x0 ; srcX <= x1 ; srcX++) {
|
|
||||||
SkScalar weight = filter->lookupScalar(sx - srcX);
|
|
||||||
SkPMColor c = *src->getAddr32(srcX, x);
|
|
||||||
fr += SkScalarMul(weight,SkGetPackedR32(c));
|
|
||||||
fg += SkScalarMul(weight,SkGetPackedG32(c));
|
|
||||||
fb += SkScalarMul(weight,SkGetPackedB32(c));
|
|
||||||
fa += SkScalarMul(weight,SkGetPackedA32(c));
|
|
||||||
totalWeight += weight;
|
|
||||||
}
|
|
||||||
fr = SkScalarDiv(fr,totalWeight);
|
|
||||||
fg = SkScalarDiv(fg,totalWeight);
|
|
||||||
fb = SkScalarDiv(fb,totalWeight);
|
|
||||||
fa = SkScalarDiv(fa,totalWeight);
|
|
||||||
|
|
||||||
int a = SkClampMax(SkScalarRoundToInt(fa), 255);
|
|
||||||
int r = SkClampMax(SkScalarRoundToInt(fr), a);
|
|
||||||
int g = SkClampMax(SkScalarRoundToInt(fg), a);
|
|
||||||
int b = SkClampMax(SkScalarRoundToInt(fb), a);
|
|
||||||
|
|
||||||
*dst->getAddr32(x,y) = SkPackARGB32(a, r, g, b);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void downScaleHoriz(const SkBitmap *src, SkBitmap *dst, float scale, SkBitmapFilter *filter) {
|
|
||||||
SkScalar *sums = SkNEW_ARRAY(SkScalar, dst->width() * dst->height() * 4);
|
|
||||||
SkScalar *weights = SkNEW_ARRAY(SkScalar, dst->width() * dst->height());
|
|
||||||
|
|
||||||
SkAutoTDeleteArray<SkScalar> ada1(sums);
|
|
||||||
SkAutoTDeleteArray<SkScalar> ada2(weights);
|
|
||||||
|
|
||||||
memset(sums, 0, dst->width() * dst->height() * sizeof(SkScalar) * 4);
|
|
||||||
memset(weights, 0, dst->width() * dst->height() * sizeof(SkScalar));
|
|
||||||
|
|
||||||
for (int y = 0 ; y < src->height() ; y++) {
|
|
||||||
for (int x = 0 ; x < src->width() ; x++) {
|
|
||||||
// splat each source pixel into the destination image
|
|
||||||
float dx = (x + 0.5f) * scale - 0.5f;
|
|
||||||
int x0 = SkClampMax(sk_float_ceil2int(dx-filter->width()), dst->width()-1);
|
|
||||||
int x1 = SkClampMax(sk_float_floor2int(dx+filter->width()), dst->width()-1);
|
|
||||||
|
|
||||||
SkPMColor c = *src->getAddr32(x,y);
|
|
||||||
|
|
||||||
for (int dst_x = x0 ; dst_x <= x1 ; dst_x++) {
|
|
||||||
SkScalar weight = filter->lookup(dx - dst_x);
|
|
||||||
sums[4*(y*dst->width() + dst_x) + 0] += weight*SkGetPackedR32(c);
|
|
||||||
sums[4*(y*dst->width() + dst_x) + 1] += weight*SkGetPackedG32(c);
|
|
||||||
sums[4*(y*dst->width() + dst_x) + 2] += weight*SkGetPackedB32(c);
|
|
||||||
sums[4*(y*dst->width() + dst_x) + 3] += weight*SkGetPackedA32(c);
|
|
||||||
weights[y*dst->width() + dst_x] += weight;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
divideByWeights(sums, weights, dst);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void downScaleVert(const SkBitmap *src, SkBitmap *dst, float scale, SkBitmapFilter *filter) {
|
|
||||||
SkScalar *sums = SkNEW_ARRAY(SkScalar, dst->width() * dst->height() * 4);
|
|
||||||
SkScalar *weights = SkNEW_ARRAY(SkScalar, dst->width() * dst->height());
|
|
||||||
|
|
||||||
SkAutoTDeleteArray<SkScalar> ada1(sums);
|
|
||||||
SkAutoTDeleteArray<SkScalar> ada2(weights);
|
|
||||||
|
|
||||||
memset(sums, 0, dst->width() * dst->height() * sizeof(SkScalar) * 4);
|
|
||||||
memset(weights, 0, dst->width() * dst->height() * sizeof(SkScalar));
|
|
||||||
|
|
||||||
for (int y = 0 ; y < src->height() ; y++) {
|
|
||||||
for (int x = 0 ; x < src->width() ; x++) {
|
|
||||||
// splat each source pixel into the destination image
|
|
||||||
float dy = (y + 0.5f) * scale - 0.5f;
|
|
||||||
int y0 = SkClampMax(sk_float_ceil2int(dy-filter->width()), dst->height()-1);
|
|
||||||
int y1 = SkClampMax(sk_float_ceil2int(dy+filter->width()), dst->height()-1);
|
|
||||||
|
|
||||||
SkPMColor c = *src->getAddr32(x,y);
|
|
||||||
|
|
||||||
for (int dst_y = y0 ; dst_y <= y1 ; dst_y++) {
|
|
||||||
SkScalar weight = filter->lookupScalar(dy - dst_y);
|
|
||||||
sums[4*(dst_y*dst->width() + x) + 0] += weight*SkGetPackedR32(c);
|
|
||||||
sums[4*(dst_y*dst->width() + x) + 1] += weight*SkGetPackedG32(c);
|
|
||||||
sums[4*(dst_y*dst->width() + x) + 2] += weight*SkGetPackedB32(c);
|
|
||||||
sums[4*(dst_y*dst->width() + x) + 3] += weight*SkGetPackedA32(c);
|
|
||||||
weights[dst_y*dst->width() + x] += weight;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
divideByWeights(sums, weights, dst);
|
|
||||||
}
|
|
||||||
|
|
||||||
void SkBitmap::scale(SkBitmap *dst) const {
|
|
||||||
|
|
||||||
SkBitmap horizTemp;
|
|
||||||
|
|
||||||
horizTemp.setConfig(SkBitmap::kARGB_8888_Config, height(), dst->width());
|
|
||||||
horizTemp.allocPixels();
|
|
||||||
|
|
||||||
SkBitmapFilter *filter = allocateBitmapFilter();
|
|
||||||
|
|
||||||
float horizScale = float(dst->width()) / width();
|
|
||||||
|
|
||||||
if (horizScale >= 1) {
|
|
||||||
upScaleHorizTranspose(this, &horizTemp, horizScale, filter);
|
|
||||||
} else if (horizScale < 1) {
|
|
||||||
downScaleHoriz(this, &horizTemp, horizScale, filter);
|
|
||||||
}
|
|
||||||
|
|
||||||
float vertScale = float(dst->height()) / height();
|
|
||||||
|
|
||||||
if (vertScale >= 1) {
|
|
||||||
upScaleHorizTranspose(&horizTemp, dst, vertScale, filter);
|
|
||||||
} else if (vertScale < 1) {
|
|
||||||
downScaleVert(&horizTemp, dst, vertScale, filter);
|
|
||||||
}
|
|
||||||
|
|
||||||
SkDELETE(filter);
|
|
||||||
}
|
|
||||||
|
@ -26,28 +26,30 @@ class SkBitmapFilter {
|
|||||||
fLookupMultiplier = this->invWidth() * (SKBITMAP_FILTER_TABLE_SIZE-1);
|
fLookupMultiplier = this->invWidth() * (SKBITMAP_FILTER_TABLE_SIZE-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
SkFixed lookup( float x ) const {
|
SkFixed lookup(float x) const {
|
||||||
if (!fPrecomputed) {
|
if (!fPrecomputed) {
|
||||||
precomputeTable();
|
precomputeTable();
|
||||||
}
|
}
|
||||||
int filter_idx = int(sk_float_abs(x * fLookupMultiplier));
|
int filter_idx = int(sk_float_abs(x * fLookupMultiplier));
|
||||||
SkASSERT(filter_idx < SKBITMAP_FILTER_TABLE_SIZE);
|
SkASSERT(filter_idx < SKBITMAP_FILTER_TABLE_SIZE);
|
||||||
return fFilterTable[ filter_idx ];
|
return fFilterTable[filter_idx];
|
||||||
}
|
}
|
||||||
|
|
||||||
SkScalar lookupScalar( float x ) const {
|
SkScalar lookupScalar(float x) const {
|
||||||
if (!fPrecomputed) {
|
if (!fPrecomputed) {
|
||||||
precomputeTable();
|
precomputeTable();
|
||||||
}
|
}
|
||||||
int filter_idx = int(sk_float_abs(x * fLookupMultiplier));
|
int filter_idx = int(sk_float_abs(x * fLookupMultiplier));
|
||||||
SkASSERT(filter_idx < SKBITMAP_FILTER_TABLE_SIZE);
|
SkASSERT(filter_idx < SKBITMAP_FILTER_TABLE_SIZE);
|
||||||
return fFilterTableScalar[ filter_idx ];
|
return fFilterTableScalar[filter_idx];
|
||||||
}
|
}
|
||||||
|
|
||||||
float width() const { return fWidth; }
|
float width() const { return fWidth; }
|
||||||
float invWidth() const { return fInvWidth; }
|
float invWidth() const { return fInvWidth; }
|
||||||
virtual float evaluate(float x) const = 0;
|
virtual float evaluate(float x) const = 0;
|
||||||
virtual ~SkBitmapFilter() {}
|
virtual ~SkBitmapFilter() {}
|
||||||
|
|
||||||
|
static SkBitmapFilter* Allocate();
|
||||||
protected:
|
protected:
|
||||||
float fWidth;
|
float fWidth;
|
||||||
float fInvWidth;
|
float fInvWidth;
|
||||||
@ -126,29 +128,47 @@ class SkBoxFilter: public SkBitmapFilter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
virtual float evaluate(float x) const SK_OVERRIDE {
|
virtual float evaluate(float x) const SK_OVERRIDE {
|
||||||
return 1;
|
return (x >= -fWidth && x < fWidth) ? 1.0f : 0.0f;
|
||||||
}
|
}
|
||||||
protected:
|
protected:
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class SkHammingFilter: public SkBitmapFilter {
|
||||||
|
public:
|
||||||
|
SkHammingFilter(float width=1.f)
|
||||||
|
: SkBitmapFilter(width) {
|
||||||
|
}
|
||||||
|
virtual float evaluate(float x) const SK_OVERRIDE {
|
||||||
|
if (x <= -fWidth || x >= fWidth) {
|
||||||
|
return 0.0f; // Outside of the window.
|
||||||
|
}
|
||||||
|
if (x > -FLT_EPSILON && x < FLT_EPSILON) {
|
||||||
|
return 1.0f; // Special case the sinc discontinuity at the origin.
|
||||||
|
}
|
||||||
|
const float xpi = x * static_cast<float>(M_PI);
|
||||||
|
|
||||||
class SkSincFilter: public SkBitmapFilter {
|
return ((sk_float_sin(xpi) / xpi) * // sinc(x)
|
||||||
|
(0.54f + 0.46f * sk_float_cos(xpi / fWidth))); // hamming(x)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class SkLanczosFilter: public SkBitmapFilter {
|
||||||
public:
|
public:
|
||||||
SkSincFilter(float t, float width=3.f)
|
SkLanczosFilter(float width=3.f)
|
||||||
: SkBitmapFilter(width), tau(t) {
|
: SkBitmapFilter(width) {
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual float evaluate(float x) const SK_OVERRIDE {
|
virtual float evaluate(float x) const SK_OVERRIDE {
|
||||||
x = sk_float_abs(x * fInvWidth);
|
if (x <= -fWidth || x >= fWidth) {
|
||||||
if (x < 1e-5f) return 1.f;
|
return 0.0f; // Outside of the window.
|
||||||
if (x > 1.f) return 0.f;
|
}
|
||||||
x *= SK_ScalarPI;
|
if (x > -FLT_EPSILON && x < FLT_EPSILON) {
|
||||||
float sinc = sk_float_sin(x) / x;
|
return 1.0f; // Special case the discontinuity at the origin.
|
||||||
float lanczos = sk_float_sin(x * tau) / (x * tau);
|
}
|
||||||
return sinc * lanczos;
|
float xpi = x * static_cast<float>(M_PI);
|
||||||
}
|
return (sk_float_sin(xpi) / xpi) * // sinc(x)
|
||||||
protected:
|
sk_float_sin(xpi / fWidth) / (xpi / fWidth); // sinc(x/fWidth)
|
||||||
float tau;
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,6 +11,7 @@
|
|||||||
#include "SkPaint.h"
|
#include "SkPaint.h"
|
||||||
#include "SkShader.h" // for tilemodes
|
#include "SkShader.h" // for tilemodes
|
||||||
#include "SkUtilsArm.h"
|
#include "SkUtilsArm.h"
|
||||||
|
#include "SkBitmapScaler.h"
|
||||||
|
|
||||||
#if !SK_ARM_NEON_IS_NONE
|
#if !SK_ARM_NEON_IS_NONE
|
||||||
// These are defined in src/opts/SkBitmapProcState_arm_neon.cpp
|
// These are defined in src/opts/SkBitmapProcState_arm_neon.cpp
|
||||||
@ -99,23 +100,45 @@ void SkBitmapProcState::possiblyScaleImage() {
|
|||||||
if (fFilterQuality != kHQ_BitmapFilter) {
|
if (fFilterQuality != kHQ_BitmapFilter) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// see if our platform has any specialized convolution code.
|
||||||
|
|
||||||
|
|
||||||
|
// Set up a pointer to a local (instead of storing the structure in the
|
||||||
|
// proc state) to avoid introducing a header dependency; this makes
|
||||||
|
// recompiles a lot less painful.
|
||||||
|
|
||||||
|
SkConvolutionProcs simd;
|
||||||
|
fConvolutionProcs = &simd;
|
||||||
|
|
||||||
|
fConvolutionProcs->fExtraHorizontalReads = 0;
|
||||||
|
fConvolutionProcs->fConvolveVertically = NULL;
|
||||||
|
fConvolutionProcs->fConvolve4RowsHorizontally = NULL;
|
||||||
|
fConvolutionProcs->fConvolveHorizontally = NULL;
|
||||||
|
fConvolutionProcs->fApplySIMDPadding = NULL;
|
||||||
|
|
||||||
|
this->platformConvolutionProcs();
|
||||||
|
|
||||||
// STEP 1: UPSAMPLE?
|
// STEP 1: Highest quality direct scale?
|
||||||
|
|
||||||
// Check to see if the transformation matrix is scaling up, and if
|
// Check to see if the transformation matrix is simple, and if we're
|
||||||
// the matrix is simple, and if we're doing high quality scaling.
|
// doing high quality scaling. If so, do the bitmap scale here and
|
||||||
// If so, do the bitmap scale here and remove the scaling component from the matrix.
|
// remove the scaling component from the matrix.
|
||||||
|
|
||||||
if (fInvMatrix.getType() <= (SkMatrix::kScale_Mask | SkMatrix::kTranslate_Mask) &&
|
if (fFilterQuality == kHQ_BitmapFilter &&
|
||||||
(fInvMatrix.getScaleX() < 1 || fInvMatrix.getScaleY() < 1) &&
|
fInvMatrix.getType() <= (SkMatrix::kScale_Mask | SkMatrix::kTranslate_Mask) &&
|
||||||
fOrigBitmap.config() == SkBitmap::kARGB_8888_Config) {
|
fOrigBitmap.config() == SkBitmap::kARGB_8888_Config) {
|
||||||
|
|
||||||
|
int dest_width = SkScalarCeilToInt(fOrigBitmap.width() / fInvMatrix.getScaleX());
|
||||||
|
int dest_height = SkScalarCeilToInt(fOrigBitmap.height() / fInvMatrix.getScaleY());
|
||||||
|
|
||||||
// All the criteria are met; let's make a new bitmap.
|
// All the criteria are met; let's make a new bitmap.
|
||||||
fScaledBitmap.setConfig(SkBitmap::kARGB_8888_Config,
|
|
||||||
(int)(fOrigBitmap.width() / fInvMatrix.getScaleX()),
|
fScaledBitmap = SkBitmapScaler::Resize( fOrigBitmap, SkBitmapScaler::RESIZE_BEST,
|
||||||
(int)(fOrigBitmap.height() / fInvMatrix.getScaleY()));
|
dest_width, dest_height, fConvolutionProcs );
|
||||||
fScaledBitmap.allocPixels();
|
|
||||||
fOrigBitmap.scale(&fScaledBitmap);
|
fScaledBitmap.lockPixels();
|
||||||
|
|
||||||
fBitmap = &fScaledBitmap;
|
fBitmap = &fScaledBitmap;
|
||||||
|
|
||||||
// set the inv matrix type to translate-only;
|
// set the inv matrix type to translate-only;
|
||||||
@ -130,9 +153,9 @@ void SkBitmapProcState::possiblyScaleImage() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fOrigBitmap.hasMipMap()) {
|
if (!fOrigBitmap.hasMipMap() && fFilterQuality != kNone_BitmapFilter) {
|
||||||
|
|
||||||
// STEP 2: DOWNSAMPLE
|
// STEP 2: MIPMAP DOWNSAMPLE?
|
||||||
|
|
||||||
// Check to see if the transformation matrix is scaling *down*.
|
// Check to see if the transformation matrix is scaling *down*.
|
||||||
// If so, automatically build mipmaps.
|
// If so, automatically build mipmaps.
|
||||||
|
@ -31,6 +31,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
class SkPaint;
|
class SkPaint;
|
||||||
|
class SkConvolutionProcs;
|
||||||
|
|
||||||
struct SkBitmapProcState {
|
struct SkBitmapProcState {
|
||||||
|
|
||||||
@ -59,7 +60,7 @@ struct SkBitmapProcState {
|
|||||||
const uint32_t[],
|
const uint32_t[],
|
||||||
int count,
|
int count,
|
||||||
uint16_t colors[]);
|
uint16_t colors[]);
|
||||||
|
|
||||||
typedef U16CPU (*FixedTileProc)(SkFixed); // returns 0..0xFFFF
|
typedef U16CPU (*FixedTileProc)(SkFixed); // returns 0..0xFFFF
|
||||||
typedef U16CPU (*FixedTileLowBitsProc)(SkFixed, int); // returns 0..0xF
|
typedef U16CPU (*FixedTileLowBitsProc)(SkFixed, int); // returns 0..0xF
|
||||||
typedef U16CPU (*IntTileProc)(int value, int count); // returns 0..count-1
|
typedef U16CPU (*IntTileProc)(int value, int count); // returns 0..count-1
|
||||||
@ -78,6 +79,8 @@ struct SkBitmapProcState {
|
|||||||
IntTileProc fIntTileProcY; // chooseProcs
|
IntTileProc fIntTileProcY; // chooseProcs
|
||||||
SkFixed fFilterOneX;
|
SkFixed fFilterOneX;
|
||||||
SkFixed fFilterOneY;
|
SkFixed fFilterOneY;
|
||||||
|
|
||||||
|
SkConvolutionProcs* fConvolutionProcs; // possiblyScaleImage
|
||||||
|
|
||||||
SkPMColor fPaintPMColor; // chooseProcs - A8 config
|
SkPMColor fPaintPMColor; // chooseProcs - A8 config
|
||||||
SkFixed fInvSx; // chooseProcs
|
SkFixed fInvSx; // chooseProcs
|
||||||
@ -113,7 +116,12 @@ struct SkBitmapProcState {
|
|||||||
implementation can do nothing (see SkBitmapProcState_opts_none.cpp)
|
implementation can do nothing (see SkBitmapProcState_opts_none.cpp)
|
||||||
*/
|
*/
|
||||||
void platformProcs();
|
void platformProcs();
|
||||||
|
|
||||||
|
/** Platforms can also optionally overwrite the convolution functions
|
||||||
|
if we have SIMD versions of them.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void platformConvolutionProcs();
|
||||||
|
|
||||||
/** Given the byte size of the index buffer to be passed to the matrix proc,
|
/** Given the byte size of the index buffer to be passed to the matrix proc,
|
||||||
return the maximum number of resulting pixels that can be computed
|
return the maximum number of resulting pixels that can be computed
|
||||||
@ -160,7 +168,7 @@ private:
|
|||||||
|
|
||||||
void possiblyScaleImage();
|
void possiblyScaleImage();
|
||||||
|
|
||||||
SkBitmapFilter *fBitmapFilter;
|
SkBitmapFilter* fBitmapFilter;
|
||||||
|
|
||||||
ShaderProc32 chooseBitmapFilterProc();
|
ShaderProc32 chooseBitmapFilterProc();
|
||||||
|
|
||||||
@ -218,8 +226,6 @@ void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s,
|
|||||||
void S32_D16_filter_DX(const SkBitmapProcState& s,
|
void S32_D16_filter_DX(const SkBitmapProcState& s,
|
||||||
const uint32_t* xy, int count, uint16_t* colors);
|
const uint32_t* xy, int count, uint16_t* colors);
|
||||||
|
|
||||||
void highQualityFilter_ScaleOnly(const SkBitmapProcState &s, int x, int y,
|
|
||||||
SkPMColor *SK_RESTRICT colors, int count);
|
|
||||||
void highQualityFilter(const SkBitmapProcState &s, int x, int y,
|
void highQualityFilter(const SkBitmapProcState &s, int x, int y,
|
||||||
SkPMColor *SK_RESTRICT colors, int count);
|
SkPMColor *SK_RESTRICT colors, int count);
|
||||||
|
|
||||||
|
315
src/core/SkBitmapScaler.cpp
Normal file
315
src/core/SkBitmapScaler.cpp
Normal file
@ -0,0 +1,315 @@
|
|||||||
|
#include "SkBitmapScaler.h"
|
||||||
|
#include "SkBitmapFilter.h"
|
||||||
|
#include "SkRect.h"
|
||||||
|
#include "SkTArray.h"
|
||||||
|
#include "SkErrorInternals.h"
|
||||||
|
#include "SkConvolver.h"
|
||||||
|
|
||||||
|
// SkResizeFilter ----------------------------------------------------------------
|
||||||
|
|
||||||
|
// Encapsulates computation and storage of the filters required for one complete
|
||||||
|
// resize operation.
|
||||||
|
class SkResizeFilter {
|
||||||
|
public:
|
||||||
|
SkResizeFilter(SkBitmapScaler::ResizeMethod method,
|
||||||
|
int srcFullWidth, int srcFullHeight,
|
||||||
|
int destWidth, int destHeight,
|
||||||
|
const SkIRect& destSubset,
|
||||||
|
SkConvolutionProcs* convolveProcs);
|
||||||
|
~SkResizeFilter() {
|
||||||
|
SkDELETE( fBitmapFilter );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the filled filter values.
|
||||||
|
const SkConvolutionFilter1D& xFilter() { return fXFilter; }
|
||||||
|
const SkConvolutionFilter1D& yFilter() { return fYFilter; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
SkBitmapFilter* fBitmapFilter;
|
||||||
|
|
||||||
|
// Computes one set of filters either horizontally or vertically. The caller
|
||||||
|
// will specify the "min" and "max" rather than the bottom/top and
|
||||||
|
// right/bottom so that the same code can be re-used in each dimension.
|
||||||
|
//
|
||||||
|
// |srcDependLo| and |srcDependSize| gives the range for the source
|
||||||
|
// depend rectangle (horizontally or vertically at the caller's discretion
|
||||||
|
// -- see above for what this means).
|
||||||
|
//
|
||||||
|
// Likewise, the range of destination values to compute and the scale factor
|
||||||
|
// for the transform is also specified.
|
||||||
|
|
||||||
|
void computeFilters(int srcSize,
|
||||||
|
int destSubsetLo, int destSubsetSize,
|
||||||
|
float scale,
|
||||||
|
SkConvolutionFilter1D* output,
|
||||||
|
SkConvolutionProcs* convolveProcs);
|
||||||
|
|
||||||
|
// Subset of scaled destination bitmap to compute.
|
||||||
|
SkIRect fOutBounds;
|
||||||
|
|
||||||
|
SkConvolutionFilter1D fXFilter;
|
||||||
|
SkConvolutionFilter1D fYFilter;
|
||||||
|
};
|
||||||
|
|
||||||
|
SkResizeFilter::SkResizeFilter(SkBitmapScaler::ResizeMethod method,
|
||||||
|
int srcFullWidth, int srcFullHeight,
|
||||||
|
int destWidth, int destHeight,
|
||||||
|
const SkIRect& destSubset,
|
||||||
|
SkConvolutionProcs* convolveProcs)
|
||||||
|
: fOutBounds(destSubset) {
|
||||||
|
|
||||||
|
// method will only ever refer to an "algorithm method".
|
||||||
|
SkASSERT((SkBitmapScaler::RESIZE_FIRST_ALGORITHM_METHOD <= method) &&
|
||||||
|
(method <= SkBitmapScaler::RESIZE_LAST_ALGORITHM_METHOD));
|
||||||
|
|
||||||
|
switch(method) {
|
||||||
|
case SkBitmapScaler::RESIZE_BOX:
|
||||||
|
fBitmapFilter = SkNEW(SkBoxFilter);
|
||||||
|
break;
|
||||||
|
case SkBitmapScaler::RESIZE_TRIANGLE:
|
||||||
|
fBitmapFilter = SkNEW(SkTriangleFilter);
|
||||||
|
break;
|
||||||
|
case SkBitmapScaler::RESIZE_MITCHELL:
|
||||||
|
fBitmapFilter = SkNEW_ARGS(SkMitchellFilter, (1.f/3.f, 1.f/3.f));
|
||||||
|
break;
|
||||||
|
case SkBitmapScaler::RESIZE_HAMMING:
|
||||||
|
fBitmapFilter = SkNEW(SkHammingFilter);
|
||||||
|
break;
|
||||||
|
case SkBitmapScaler::RESIZE_LANCZOS3:
|
||||||
|
fBitmapFilter = SkNEW(SkLanczosFilter);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// NOTREACHED:
|
||||||
|
fBitmapFilter = SkNEW_ARGS(SkMitchellFilter, (1.f/3.f, 1.f/3.f));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
float scaleX = static_cast<float>(destWidth) /
|
||||||
|
static_cast<float>(srcFullWidth);
|
||||||
|
float scaleY = static_cast<float>(destHeight) /
|
||||||
|
static_cast<float>(srcFullHeight);
|
||||||
|
|
||||||
|
this->computeFilters(srcFullWidth, destSubset.fLeft, destSubset.width(),
|
||||||
|
scaleX, &fXFilter, convolveProcs);
|
||||||
|
this->computeFilters(srcFullHeight, destSubset.fTop, destSubset.height(),
|
||||||
|
scaleY, &fYFilter, convolveProcs);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(egouriou): Take advantage of periods in the convolution.
|
||||||
|
// Practical resizing filters are periodic outside of the border area.
|
||||||
|
// For Lanczos, a scaling by a (reduced) factor of p/q (q pixels in the
|
||||||
|
// source become p pixels in the destination) will have a period of p.
|
||||||
|
// A nice consequence is a period of 1 when downscaling by an integral
|
||||||
|
// factor. Downscaling from typical display resolutions is also bound
|
||||||
|
// to produce interesting periods as those are chosen to have multiple
|
||||||
|
// small factors.
|
||||||
|
// Small periods reduce computational load and improve cache usage if
|
||||||
|
// the coefficients can be shared. For periods of 1 we can consider
|
||||||
|
// loading the factors only once outside the borders.
|
||||||
|
void SkResizeFilter::computeFilters(int srcSize,
|
||||||
|
int destSubsetLo, int destSubsetSize,
|
||||||
|
float scale,
|
||||||
|
SkConvolutionFilter1D* output,
|
||||||
|
SkConvolutionProcs* convolveProcs) {
|
||||||
|
int destSubsetHi = destSubsetLo + destSubsetSize; // [lo, hi)
|
||||||
|
|
||||||
|
// When we're doing a magnification, the scale will be larger than one. This
|
||||||
|
// means the destination pixels are much smaller than the source pixels, and
|
||||||
|
// that the range covered by the filter won't necessarily cover any source
|
||||||
|
// pixel boundaries. Therefore, we use these clamped values (max of 1) for
|
||||||
|
// some computations.
|
||||||
|
float clampedScale = SkTMin(1.0f, scale);
|
||||||
|
|
||||||
|
// This is how many source pixels from the center we need to count
|
||||||
|
// to support the filtering function.
|
||||||
|
float srcSupport = fBitmapFilter->width() / clampedScale;
|
||||||
|
|
||||||
|
// Speed up the divisions below by turning them into multiplies.
|
||||||
|
float invScale = 1.0f / scale;
|
||||||
|
|
||||||
|
SkTArray<float> filterValues(64);
|
||||||
|
SkTArray<short> fixedFilterValues(64);
|
||||||
|
|
||||||
|
// Loop over all pixels in the output range. We will generate one set of
|
||||||
|
// filter values for each one. Those values will tell us how to blend the
|
||||||
|
// source pixels to compute the destination pixel.
|
||||||
|
for (int destSubsetI = destSubsetLo; destSubsetI < destSubsetHi;
|
||||||
|
destSubsetI++) {
|
||||||
|
// Reset the arrays. We don't declare them inside so they can re-use the
|
||||||
|
// same malloc-ed buffer.
|
||||||
|
filterValues.reset();
|
||||||
|
fixedFilterValues.reset();
|
||||||
|
|
||||||
|
// This is the pixel in the source directly under the pixel in the dest.
|
||||||
|
// Note that we base computations on the "center" of the pixels. To see
|
||||||
|
// why, observe that the destination pixel at coordinates (0, 0) in a 5.0x
|
||||||
|
// downscale should "cover" the pixels around the pixel with *its center*
|
||||||
|
// at coordinates (2.5, 2.5) in the source, not those around (0, 0).
|
||||||
|
// Hence we need to scale coordinates (0.5, 0.5), not (0, 0).
|
||||||
|
float srcPixel = (static_cast<float>(destSubsetI) + 0.5f) * invScale;
|
||||||
|
|
||||||
|
// Compute the (inclusive) range of source pixels the filter covers.
|
||||||
|
int srcBegin = SkTMax(0, SkScalarFloorToInt(srcPixel - srcSupport));
|
||||||
|
int srcEnd = SkTMin(srcSize - 1, SkScalarCeilToInt(srcPixel + srcSupport));
|
||||||
|
|
||||||
|
// Compute the unnormalized filter value at each location of the source
|
||||||
|
// it covers.
|
||||||
|
float filterSum = 0.0f; // Sub of the filter values for normalizing.
|
||||||
|
for (int curFilterPixel = srcBegin; curFilterPixel <= srcEnd;
|
||||||
|
curFilterPixel++) {
|
||||||
|
// Distance from the center of the filter, this is the filter coordinate
|
||||||
|
// in source space. We also need to consider the center of the pixel
|
||||||
|
// when comparing distance against 'srcPixel'. In the 5x downscale
|
||||||
|
// example used above the distance from the center of the filter to
|
||||||
|
// the pixel with coordinates (2, 2) should be 0, because its center
|
||||||
|
// is at (2.5, 2.5).
|
||||||
|
float srcFilterDist =
|
||||||
|
((static_cast<float>(curFilterPixel) + 0.5f) - srcPixel);
|
||||||
|
|
||||||
|
// Since the filter really exists in dest space, map it there.
|
||||||
|
float destFilterDist = srcFilterDist * clampedScale;
|
||||||
|
|
||||||
|
// Compute the filter value at that location.
|
||||||
|
float filterValue = fBitmapFilter->evaluate(destFilterDist);
|
||||||
|
filterValues.push_back(filterValue);
|
||||||
|
|
||||||
|
filterSum += filterValue;
|
||||||
|
}
|
||||||
|
SkASSERT(!filterValues.empty());
|
||||||
|
|
||||||
|
// The filter must be normalized so that we don't affect the brightness of
|
||||||
|
// the image. Convert to normalized fixed point.
|
||||||
|
short fixedSum = 0;
|
||||||
|
for (int i = 0; i < filterValues.count(); i++) {
|
||||||
|
short curFixed = output->FloatToFixed(filterValues[i] / filterSum);
|
||||||
|
fixedSum += curFixed;
|
||||||
|
fixedFilterValues.push_back(curFixed);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The conversion to fixed point will leave some rounding errors, which
|
||||||
|
// we add back in to avoid affecting the brightness of the image. We
|
||||||
|
// arbitrarily add this to the center of the filter array (this won't always
|
||||||
|
// be the center of the filter function since it could get clipped on the
|
||||||
|
// edges, but it doesn't matter enough to worry about that case).
|
||||||
|
short leftovers = output->FloatToFixed(1.0f) - fixedSum;
|
||||||
|
fixedFilterValues[fixedFilterValues.count() / 2] += leftovers;
|
||||||
|
|
||||||
|
// Now it's ready to go.
|
||||||
|
output->AddFilter(srcBegin, &fixedFilterValues[0],
|
||||||
|
static_cast<int>(fixedFilterValues.count()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (convolveProcs->fApplySIMDPadding) {
|
||||||
|
convolveProcs->fApplySIMDPadding( output );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static SkBitmapScaler::ResizeMethod ResizeMethodToAlgorithmMethod(
|
||||||
|
SkBitmapScaler::ResizeMethod method) {
|
||||||
|
// Convert any "Quality Method" into an "Algorithm Method"
|
||||||
|
if (method >= SkBitmapScaler::RESIZE_FIRST_ALGORITHM_METHOD &&
|
||||||
|
method <= SkBitmapScaler::RESIZE_LAST_ALGORITHM_METHOD) {
|
||||||
|
return method;
|
||||||
|
}
|
||||||
|
// The call to SkBitmapScalerGtv::Resize() above took care of
|
||||||
|
// GPU-acceleration in the cases where it is possible. So now we just
|
||||||
|
// pick the appropriate software method for each resize quality.
|
||||||
|
switch (method) {
|
||||||
|
// Users of RESIZE_GOOD are willing to trade a lot of quality to
|
||||||
|
// get speed, allowing the use of linear resampling to get hardware
|
||||||
|
// acceleration (SRB). Hence any of our "good" software filters
|
||||||
|
// will be acceptable, so we use a triangle.
|
||||||
|
case SkBitmapScaler::RESIZE_GOOD:
|
||||||
|
return SkBitmapScaler::RESIZE_TRIANGLE;
|
||||||
|
// Users of RESIZE_BETTER are willing to trade some quality in order
|
||||||
|
// to improve performance, but are guaranteed not to devolve to a linear
|
||||||
|
// resampling. In visual tests we see that Hamming-1 is not as good as
|
||||||
|
// Lanczos-2, however it is about 40% faster and Lanczos-2 itself is
|
||||||
|
// about 30% faster than Lanczos-3. The use of Hamming-1 has been deemed
|
||||||
|
// an acceptable trade-off between quality and speed.
|
||||||
|
case SkBitmapScaler::RESIZE_BETTER:
|
||||||
|
return SkBitmapScaler::RESIZE_HAMMING;
|
||||||
|
default:
|
||||||
|
return SkBitmapScaler::RESIZE_MITCHELL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// static
|
||||||
|
SkBitmap SkBitmapScaler::Resize(const SkBitmap& source,
|
||||||
|
ResizeMethod method,
|
||||||
|
int destWidth, int destHeight,
|
||||||
|
const SkIRect& destSubset,
|
||||||
|
SkConvolutionProcs* convolveProcs,
|
||||||
|
SkBitmap::Allocator* allocator) {
|
||||||
|
// Ensure that the ResizeMethod enumeration is sound.
|
||||||
|
SkASSERT(((RESIZE_FIRST_QUALITY_METHOD <= method) &&
|
||||||
|
(method <= RESIZE_LAST_QUALITY_METHOD)) ||
|
||||||
|
((RESIZE_FIRST_ALGORITHM_METHOD <= method) &&
|
||||||
|
(method <= RESIZE_LAST_ALGORITHM_METHOD)));
|
||||||
|
|
||||||
|
SkIRect dest = { 0, 0, destWidth, destHeight };
|
||||||
|
if (!dest.contains(destSubset)) {
|
||||||
|
SkErrorInternals::SetError( kInvalidArgument_SkError,
|
||||||
|
"Sorry, you passed me a bitmap resize "
|
||||||
|
" method I have never heard of: %d",
|
||||||
|
method );
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the size of source or destination is 0, i.e. 0x0, 0xN or Nx0, just
|
||||||
|
// return empty.
|
||||||
|
if (source.width() < 1 || source.height() < 1 ||
|
||||||
|
destWidth < 1 || destHeight < 1) {
|
||||||
|
return SkBitmap();
|
||||||
|
}
|
||||||
|
|
||||||
|
method = ResizeMethodToAlgorithmMethod(method);
|
||||||
|
|
||||||
|
// Check that we deal with an "algorithm methods" from this point onward.
|
||||||
|
SkASSERT((SkBitmapScaler::RESIZE_FIRST_ALGORITHM_METHOD <= method) &&
|
||||||
|
(method <= SkBitmapScaler::RESIZE_LAST_ALGORITHM_METHOD));
|
||||||
|
|
||||||
|
SkAutoLockPixels locker(source);
|
||||||
|
if (!source.readyToDraw() || source.config() != SkBitmap::kARGB_8888_Config)
|
||||||
|
return SkBitmap();
|
||||||
|
|
||||||
|
SkResizeFilter filter(method, source.width(), source.height(),
|
||||||
|
destWidth, destHeight, destSubset, convolveProcs);
|
||||||
|
|
||||||
|
// Get a source bitmap encompassing this touched area. We construct the
|
||||||
|
// offsets and row strides such that it looks like a new bitmap, while
|
||||||
|
// referring to the old data.
|
||||||
|
const unsigned char* sourceSubset =
|
||||||
|
reinterpret_cast<const unsigned char*>(source.getPixels());
|
||||||
|
|
||||||
|
// Convolve into the result.
|
||||||
|
SkBitmap result;
|
||||||
|
result.setConfig(SkBitmap::kARGB_8888_Config,
|
||||||
|
destSubset.width(), destSubset.height());
|
||||||
|
result.allocPixels(allocator, NULL);
|
||||||
|
if (!result.readyToDraw())
|
||||||
|
return SkBitmap();
|
||||||
|
|
||||||
|
BGRAConvolve2D(sourceSubset, static_cast<int>(source.rowBytes()),
|
||||||
|
!source.isOpaque(), filter.xFilter(), filter.yFilter(),
|
||||||
|
static_cast<int>(result.rowBytes()),
|
||||||
|
static_cast<unsigned char*>(result.getPixels()),
|
||||||
|
convolveProcs, true);
|
||||||
|
|
||||||
|
// Preserve the "opaque" flag for use as an optimization later.
|
||||||
|
result.setIsOpaque(source.isOpaque());
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// static
|
||||||
|
SkBitmap SkBitmapScaler::Resize(const SkBitmap& source,
|
||||||
|
ResizeMethod method,
|
||||||
|
int destWidth, int destHeight,
|
||||||
|
SkConvolutionProcs* convolveProcs,
|
||||||
|
SkBitmap::Allocator* allocator) {
|
||||||
|
SkIRect destSubset = { 0, 0, destWidth, destHeight };
|
||||||
|
return Resize(source, method, destWidth, destHeight, destSubset,
|
||||||
|
convolveProcs, allocator);
|
||||||
|
}
|
106
src/core/SkBitmapScaler.h
Normal file
106
src/core/SkBitmapScaler.h
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2013 Google Inc.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license that can be
|
||||||
|
* found in the LICENSE file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SkBitmapScaler_DEFINED
|
||||||
|
#define SkBitmapScaler_DEFINED
|
||||||
|
|
||||||
|
#include "SkBitmap.h"
|
||||||
|
#include "SkConvolver.h"
|
||||||
|
|
||||||
|
/** \class SkBitmapScaler
|
||||||
|
|
||||||
|
Provides the interface for high quality image resampling.
|
||||||
|
*/
|
||||||
|
|
||||||
|
class SK_API SkBitmapScaler {
|
||||||
|
public:
|
||||||
|
enum ResizeMethod {
|
||||||
|
// Quality Methods
|
||||||
|
//
|
||||||
|
// Those enumeration values express a desired quality/speed tradeoff.
|
||||||
|
// They are translated into an algorithm-specific method that depends
|
||||||
|
// on the capabilities (CPU, GPU) of the underlying platform.
|
||||||
|
// It is possible for all three methods to be mapped to the same
|
||||||
|
// algorithm on a given platform.
|
||||||
|
|
||||||
|
// Good quality resizing. Fastest resizing with acceptable visual quality.
|
||||||
|
// This is typically intended for use during interactive layouts
|
||||||
|
// where slower platforms may want to trade image quality for large
|
||||||
|
// increase in resizing performance.
|
||||||
|
//
|
||||||
|
// For example the resizing implementation may devolve to linear
|
||||||
|
// filtering if this enables GPU acceleration to be used.
|
||||||
|
//
|
||||||
|
// Note that the underlying resizing method may be determined
|
||||||
|
// on the fly based on the parameters for a given resize call.
|
||||||
|
// For example an implementation using a GPU-based linear filter
|
||||||
|
// in the common case may still use a higher-quality software-based
|
||||||
|
// filter in cases where using the GPU would actually be slower - due
|
||||||
|
// to too much latency - or impossible - due to image format or size
|
||||||
|
// constraints.
|
||||||
|
RESIZE_GOOD,
|
||||||
|
|
||||||
|
// Medium quality resizing. Close to high quality resizing (better
|
||||||
|
// than linear interpolation) with potentially some quality being
|
||||||
|
// traded-off for additional speed compared to RESIZE_BEST.
|
||||||
|
//
|
||||||
|
// This is intended, for example, for generation of large thumbnails
|
||||||
|
// (hundreds of pixels in each dimension) from large sources, where
|
||||||
|
// a linear filter would produce too many artifacts but where
|
||||||
|
// a RESIZE_HIGH might be too costly time-wise.
|
||||||
|
RESIZE_BETTER,
|
||||||
|
|
||||||
|
// High quality resizing. The algorithm is picked to favor image quality.
|
||||||
|
RESIZE_BEST,
|
||||||
|
|
||||||
|
//
|
||||||
|
// Algorithm-specific enumerations
|
||||||
|
//
|
||||||
|
|
||||||
|
// Box filter. This is a weighted average of all of the pixels touching
|
||||||
|
// the destination pixel. For enlargement, this is nearest neighbor.
|
||||||
|
//
|
||||||
|
// You probably don't want this, it is here for testing since it is easy to
|
||||||
|
// compute. Use RESIZE_LANCZOS3 instead.
|
||||||
|
RESIZE_BOX,
|
||||||
|
RESIZE_TRIANGLE,
|
||||||
|
RESIZE_LANCZOS3,
|
||||||
|
RESIZE_HAMMING,
|
||||||
|
RESIZE_MITCHELL,
|
||||||
|
|
||||||
|
// enum aliases for first and last methods by algorithm or by quality.
|
||||||
|
RESIZE_FIRST_QUALITY_METHOD = RESIZE_GOOD,
|
||||||
|
RESIZE_LAST_QUALITY_METHOD = RESIZE_BEST,
|
||||||
|
RESIZE_FIRST_ALGORITHM_METHOD = RESIZE_BOX,
|
||||||
|
RESIZE_LAST_ALGORITHM_METHOD = RESIZE_MITCHELL,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Resizes the given source bitmap using the specified resize method, so that
|
||||||
|
// the entire image is (dest_size) big. The dest_subset is the rectangle in
|
||||||
|
// this destination image that should actually be returned.
|
||||||
|
//
|
||||||
|
// The output image will be (dest_subset.width(), dest_subset.height()). This
|
||||||
|
// will save work if you do not need the entire bitmap.
|
||||||
|
//
|
||||||
|
// The destination subset must be smaller than the destination image.
|
||||||
|
static SkBitmap Resize(const SkBitmap& source,
|
||||||
|
ResizeMethod method,
|
||||||
|
int dest_width, int dest_height,
|
||||||
|
const SkIRect& dest_subset,
|
||||||
|
SkConvolutionProcs *convolveProcs = NULL,
|
||||||
|
SkBitmap::Allocator* allocator = NULL);
|
||||||
|
|
||||||
|
// Alternate version for resizing and returning the entire bitmap rather than
|
||||||
|
// a subset.
|
||||||
|
static SkBitmap Resize(const SkBitmap& source,
|
||||||
|
ResizeMethod method,
|
||||||
|
int dest_width, int dest_height,
|
||||||
|
SkConvolutionProcs *convolveProcs = NULL,
|
||||||
|
SkBitmap::Allocator* allocator = NULL);
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
473
src/core/SkConvolver.cpp
Normal file
473
src/core/SkConvolver.cpp
Normal file
@ -0,0 +1,473 @@
|
|||||||
|
// Copyright (c) 2011 The Chromium Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
#include "SkConvolver.h"
|
||||||
|
#include "SkSize.h"
|
||||||
|
#include "SkTypes.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// Converts the argument to an 8-bit unsigned value by clamping to the range
|
||||||
|
// 0-255.
|
||||||
|
inline unsigned char ClampTo8(int a) {
|
||||||
|
if (static_cast<unsigned>(a) < 256) {
|
||||||
|
return a; // Avoid the extra check in the common case.
|
||||||
|
}
|
||||||
|
if (a < 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return 255;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Takes the value produced by accumulating element-wise product of image with
|
||||||
|
// a kernel and brings it back into range.
|
||||||
|
// All of the filter scaling factors are in fixed point with kShiftBits bits of
|
||||||
|
// fractional part.
|
||||||
|
inline unsigned char BringBackTo8(int a, bool takeAbsolute) {
|
||||||
|
a >>= SkConvolutionFilter1D::kShiftBits;
|
||||||
|
if (takeAbsolute) {
|
||||||
|
a = abs(a);
|
||||||
|
}
|
||||||
|
return ClampTo8(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stores a list of rows in a circular buffer. The usage is you write into it
|
||||||
|
// by calling AdvanceRow. It will keep track of which row in the buffer it
|
||||||
|
// should use next, and the total number of rows added.
|
||||||
|
class CircularRowBuffer {
|
||||||
|
public:
|
||||||
|
// The number of pixels in each row is given in |sourceRowPixelWidth|.
|
||||||
|
// The maximum number of rows needed in the buffer is |maxYFilterSize|
|
||||||
|
// (we only need to store enough rows for the biggest filter).
|
||||||
|
//
|
||||||
|
// We use the |firstInputRow| to compute the coordinates of all of the
|
||||||
|
// following rows returned by Advance().
|
||||||
|
CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,
|
||||||
|
int firstInputRow)
|
||||||
|
: fRowByteWidth(destRowPixelWidth * 4),
|
||||||
|
fNumRows(maxYFilterSize),
|
||||||
|
fNextRow(0),
|
||||||
|
fNextRowCoordinate(firstInputRow) {
|
||||||
|
fBuffer.reset(fRowByteWidth * maxYFilterSize);
|
||||||
|
fRowAddresses.reset(fNumRows);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Moves to the next row in the buffer, returning a pointer to the beginning
|
||||||
|
// of it.
|
||||||
|
unsigned char* advanceRow() {
|
||||||
|
unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];
|
||||||
|
fNextRowCoordinate++;
|
||||||
|
|
||||||
|
// Set the pointer to the next row to use, wrapping around if necessary.
|
||||||
|
fNextRow++;
|
||||||
|
if (fNextRow == fNumRows) {
|
||||||
|
fNextRow = 0;
|
||||||
|
}
|
||||||
|
return row;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a pointer to an "unrolled" array of rows. These rows will start
|
||||||
|
// at the y coordinate placed into |*firstRowIndex| and will continue in
|
||||||
|
// order for the maximum number of rows in this circular buffer.
|
||||||
|
//
|
||||||
|
// The |firstRowIndex_| may be negative. This means the circular buffer
|
||||||
|
// starts before the top of the image (it hasn't been filled yet).
|
||||||
|
unsigned char* const* GetRowAddresses(int* firstRowIndex) {
|
||||||
|
// Example for a 4-element circular buffer holding coords 6-9.
|
||||||
|
// Row 0 Coord 8
|
||||||
|
// Row 1 Coord 9
|
||||||
|
// Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10.
|
||||||
|
// Row 3 Coord 7
|
||||||
|
//
|
||||||
|
// The "next" row is also the first (lowest) coordinate. This computation
|
||||||
|
// may yield a negative value, but that's OK, the math will work out
|
||||||
|
// since the user of this buffer will compute the offset relative
|
||||||
|
// to the firstRowIndex and the negative rows will never be used.
|
||||||
|
*firstRowIndex = fNextRowCoordinate - fNumRows;
|
||||||
|
|
||||||
|
int curRow = fNextRow;
|
||||||
|
for (int i = 0; i < fNumRows; i++) {
|
||||||
|
fRowAddresses[i] = &fBuffer[curRow * fRowByteWidth];
|
||||||
|
|
||||||
|
// Advance to the next row, wrapping if necessary.
|
||||||
|
curRow++;
|
||||||
|
if (curRow == fNumRows) {
|
||||||
|
curRow = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return &fRowAddresses[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// The buffer storing the rows. They are packed, each one fRowByteWidth.
|
||||||
|
SkTArray<unsigned char> fBuffer;
|
||||||
|
|
||||||
|
// Number of bytes per row in the |buffer|.
|
||||||
|
int fRowByteWidth;
|
||||||
|
|
||||||
|
// The number of rows available in the buffer.
|
||||||
|
int fNumRows;
|
||||||
|
|
||||||
|
// The next row index we should write into. This wraps around as the
|
||||||
|
// circular buffer is used.
|
||||||
|
int fNextRow;
|
||||||
|
|
||||||
|
// The y coordinate of the |fNextRow|. This is incremented each time a
|
||||||
|
// new row is appended and does not wrap.
|
||||||
|
int fNextRowCoordinate;
|
||||||
|
|
||||||
|
// Buffer used by GetRowAddresses().
|
||||||
|
SkTArray<unsigned char*> fRowAddresses;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Convolves horizontally along a single row. The row data is given in
|
||||||
|
// |srcData| and continues for the numValues() of the filter.
|
||||||
|
template<bool hasAlpha>
|
||||||
|
void ConvolveHorizontally(const unsigned char* srcData,
|
||||||
|
const SkConvolutionFilter1D& filter,
|
||||||
|
unsigned char* outRow) {
|
||||||
|
// Loop over each pixel on this row in the output image.
|
||||||
|
int numValues = filter.numValues();
|
||||||
|
for (int outX = 0; outX < numValues; outX++) {
|
||||||
|
// Get the filter that determines the current output pixel.
|
||||||
|
int filterOffset, filterLength;
|
||||||
|
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
|
||||||
|
filter.FilterForValue(outX, &filterOffset, &filterLength);
|
||||||
|
|
||||||
|
// Compute the first pixel in this row that the filter affects. It will
|
||||||
|
// touch |filterLength| pixels (4 bytes each) after this.
|
||||||
|
const unsigned char* rowToFilter = &srcData[filterOffset * 4];
|
||||||
|
|
||||||
|
// Apply the filter to the row to get the destination pixel in |accum|.
|
||||||
|
int accum[4] = {0};
|
||||||
|
for (int filterX = 0; filterX < filterLength; filterX++) {
|
||||||
|
SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterX];
|
||||||
|
accum[0] += curFilter * rowToFilter[filterX * 4 + 0];
|
||||||
|
accum[1] += curFilter * rowToFilter[filterX * 4 + 1];
|
||||||
|
accum[2] += curFilter * rowToFilter[filterX * 4 + 2];
|
||||||
|
if (hasAlpha) {
|
||||||
|
accum[3] += curFilter * rowToFilter[filterX * 4 + 3];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bring this value back in range. All of the filter scaling factors
|
||||||
|
// are in fixed point with kShiftBits bits of fractional part.
|
||||||
|
accum[0] >>= SkConvolutionFilter1D::kShiftBits;
|
||||||
|
accum[1] >>= SkConvolutionFilter1D::kShiftBits;
|
||||||
|
accum[2] >>= SkConvolutionFilter1D::kShiftBits;
|
||||||
|
if (hasAlpha) {
|
||||||
|
accum[3] >>= SkConvolutionFilter1D::kShiftBits;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the new pixel.
|
||||||
|
outRow[outX * 4 + 0] = ClampTo8(accum[0]);
|
||||||
|
outRow[outX * 4 + 1] = ClampTo8(accum[1]);
|
||||||
|
outRow[outX * 4 + 2] = ClampTo8(accum[2]);
|
||||||
|
if (hasAlpha) {
|
||||||
|
outRow[outX * 4 + 3] = ClampTo8(accum[3]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Does vertical convolution to produce one output row. The filter values and
|
||||||
|
// length are given in the first two parameters. These are applied to each
|
||||||
|
// of the rows pointed to in the |sourceDataRows| array, with each row
|
||||||
|
// being |pixelWidth| wide.
|
||||||
|
//
|
||||||
|
// The output must have room for |pixelWidth * 4| bytes.
|
||||||
|
template<bool hasAlpha>
|
||||||
|
void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
|
||||||
|
int filterLength,
|
||||||
|
unsigned char* const* sourceDataRows,
|
||||||
|
int pixelWidth,
|
||||||
|
unsigned char* outRow) {
|
||||||
|
// We go through each column in the output and do a vertical convolution,
|
||||||
|
// generating one output pixel each time.
|
||||||
|
for (int outX = 0; outX < pixelWidth; outX++) {
|
||||||
|
// Compute the number of bytes over in each row that the current column
|
||||||
|
// we're convolving starts at. The pixel will cover the next 4 bytes.
|
||||||
|
int byteOffset = outX * 4;
|
||||||
|
|
||||||
|
// Apply the filter to one column of pixels.
|
||||||
|
int accum[4] = {0};
|
||||||
|
for (int filterY = 0; filterY < filterLength; filterY++) {
|
||||||
|
SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues[filterY];
|
||||||
|
accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];
|
||||||
|
accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];
|
||||||
|
accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];
|
||||||
|
if (hasAlpha) {
|
||||||
|
accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bring this value back in range. All of the filter scaling factors
|
||||||
|
// are in fixed point with kShiftBits bits of precision.
|
||||||
|
accum[0] >>= SkConvolutionFilter1D::kShiftBits;
|
||||||
|
accum[1] >>= SkConvolutionFilter1D::kShiftBits;
|
||||||
|
accum[2] >>= SkConvolutionFilter1D::kShiftBits;
|
||||||
|
if (hasAlpha) {
|
||||||
|
accum[3] >>= SkConvolutionFilter1D::kShiftBits;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the new pixel.
|
||||||
|
outRow[byteOffset + 0] = ClampTo8(accum[0]);
|
||||||
|
outRow[byteOffset + 1] = ClampTo8(accum[1]);
|
||||||
|
outRow[byteOffset + 2] = ClampTo8(accum[2]);
|
||||||
|
if (hasAlpha) {
|
||||||
|
unsigned char alpha = ClampTo8(accum[3]);
|
||||||
|
|
||||||
|
// Make sure the alpha channel doesn't come out smaller than any of the
|
||||||
|
// color channels. We use premultipled alpha channels, so this should
|
||||||
|
// never happen, but rounding errors will cause this from time to time.
|
||||||
|
// These "impossible" colors will cause overflows (and hence random pixel
|
||||||
|
// values) when the resulting bitmap is drawn to the screen.
|
||||||
|
//
|
||||||
|
// We only need to do this when generating the final output row (here).
|
||||||
|
int maxColorChannel = SkTMax(outRow[byteOffset + 0],
|
||||||
|
SkTMax(outRow[byteOffset + 1],
|
||||||
|
outRow[byteOffset + 2]));
|
||||||
|
if (alpha < maxColorChannel) {
|
||||||
|
outRow[byteOffset + 3] = maxColorChannel;
|
||||||
|
} else {
|
||||||
|
outRow[byteOffset + 3] = alpha;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No alpha channel, the image is opaque.
|
||||||
|
outRow[byteOffset + 3] = 0xff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
|
||||||
|
int filterLength,
|
||||||
|
unsigned char* const* sourceDataRows,
|
||||||
|
int pixelWidth,
|
||||||
|
unsigned char* outRow,
|
||||||
|
bool sourceHasAlpha) {
|
||||||
|
if (sourceHasAlpha) {
|
||||||
|
ConvolveVertically<true>(filterValues, filterLength,
|
||||||
|
sourceDataRows, pixelWidth,
|
||||||
|
outRow);
|
||||||
|
} else {
|
||||||
|
ConvolveVertically<false>(filterValues, filterLength,
|
||||||
|
sourceDataRows, pixelWidth,
|
||||||
|
outRow);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// SkConvolutionFilter1D ---------------------------------------------------------
|
||||||
|
|
||||||
|
SkConvolutionFilter1D::SkConvolutionFilter1D()
|
||||||
|
: fMaxFilter(0) {
|
||||||
|
}
|
||||||
|
|
||||||
|
SkConvolutionFilter1D::~SkConvolutionFilter1D() {
|
||||||
|
}
|
||||||
|
|
||||||
|
void SkConvolutionFilter1D::AddFilter(int filterOffset,
|
||||||
|
const float* filterValues,
|
||||||
|
int filterLength) {
|
||||||
|
SkASSERT(filterLength > 0);
|
||||||
|
|
||||||
|
SkTArray<ConvolutionFixed> fixedValues;
|
||||||
|
fixedValues.reset(filterLength);
|
||||||
|
|
||||||
|
for (int i = 0; i < filterLength; ++i) {
|
||||||
|
fixedValues.push_back(FloatToFixed(filterValues[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
AddFilter(filterOffset, &fixedValues[0], filterLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SkConvolutionFilter1D::AddFilter(int filterOffset,
|
||||||
|
const ConvolutionFixed* filterValues,
|
||||||
|
int filterLength) {
|
||||||
|
// It is common for leading/trailing filter values to be zeros. In such
|
||||||
|
// cases it is beneficial to only store the central factors.
|
||||||
|
// For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
|
||||||
|
// a 1080p image this optimization gives a ~10% speed improvement.
|
||||||
|
int filterSize = filterLength;
|
||||||
|
int firstNonZero = 0;
|
||||||
|
while (firstNonZero < filterLength && filterValues[firstNonZero] == 0) {
|
||||||
|
firstNonZero++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (firstNonZero < filterLength) {
|
||||||
|
// Here we have at least one non-zero factor.
|
||||||
|
int lastNonZero = filterLength - 1;
|
||||||
|
while (lastNonZero >= 0 && filterValues[lastNonZero] == 0) {
|
||||||
|
lastNonZero--;
|
||||||
|
}
|
||||||
|
|
||||||
|
filterOffset += firstNonZero;
|
||||||
|
filterLength = lastNonZero + 1 - firstNonZero;
|
||||||
|
SkASSERT(filterLength > 0);
|
||||||
|
|
||||||
|
for (int i = firstNonZero; i <= lastNonZero; i++) {
|
||||||
|
fFilterValues.push_back(filterValues[i]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Here all the factors were zeroes.
|
||||||
|
filterLength = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
FilterInstance instance;
|
||||||
|
|
||||||
|
// We pushed filterLength elements onto fFilterValues
|
||||||
|
instance.fDataLocation = (static_cast<int>(fFilterValues.count()) -
|
||||||
|
filterLength);
|
||||||
|
instance.fOffset = filterOffset;
|
||||||
|
instance.fTrimmedLength = filterLength;
|
||||||
|
instance.fLength = filterSize;
|
||||||
|
fFilters.push_back(instance);
|
||||||
|
|
||||||
|
fMaxFilter = SkTMax(fMaxFilter, filterLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleFilter(
|
||||||
|
int* specifiedFilterlength,
|
||||||
|
int* filterOffset,
|
||||||
|
int* filterLength) const {
|
||||||
|
const FilterInstance& filter = fFilters[0];
|
||||||
|
*filterOffset = filter.fOffset;
|
||||||
|
*filterLength = filter.fTrimmedLength;
|
||||||
|
*specifiedFilterlength = filter.fLength;
|
||||||
|
if (filter.fTrimmedLength == 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return &fFilterValues[filter.fDataLocation];
|
||||||
|
}
|
||||||
|
|
||||||
|
void BGRAConvolve2D(const unsigned char* sourceData,
|
||||||
|
int sourceByteRowStride,
|
||||||
|
bool sourceHasAlpha,
|
||||||
|
const SkConvolutionFilter1D& filterX,
|
||||||
|
const SkConvolutionFilter1D& filterY,
|
||||||
|
int outputByteRowStride,
|
||||||
|
unsigned char* output,
|
||||||
|
SkConvolutionProcs* convolveProcs,
|
||||||
|
bool useSimdIfPossible) {
|
||||||
|
|
||||||
|
int maxYFilterSize = filterY.maxFilter();
|
||||||
|
|
||||||
|
// The next row in the input that we will generate a horizontally
|
||||||
|
// convolved row for. If the filter doesn't start at the beginning of the
|
||||||
|
// image (this is the case when we are only resizing a subset), then we
|
||||||
|
// don't want to generate any output rows before that. Compute the starting
|
||||||
|
// row for convolution as the first pixel for the first vertical filter.
|
||||||
|
int filterOffset, filterLength;
|
||||||
|
const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
|
||||||
|
filterY.FilterForValue(0, &filterOffset, &filterLength);
|
||||||
|
int nextXRow = filterOffset;
|
||||||
|
|
||||||
|
// We loop over each row in the input doing a horizontal convolution. This
|
||||||
|
// will result in a horizontally convolved image. We write the results into
|
||||||
|
// a circular buffer of convolved rows and do vertical convolution as rows
|
||||||
|
// are available. This prevents us from having to store the entire
|
||||||
|
// intermediate image and helps cache coherency.
|
||||||
|
// We will need four extra rows to allow horizontal convolution could be done
|
||||||
|
// simultaneously. We also pad each row in row buffer to be aligned-up to
|
||||||
|
// 16 bytes.
|
||||||
|
// TODO(jiesun): We do not use aligned load from row buffer in vertical
|
||||||
|
// convolution pass yet. Somehow Windows does not like it.
|
||||||
|
int rowBufferWidth = (filterX.numValues() + 15) & ~0xF;
|
||||||
|
int rowBufferHeight = maxYFilterSize +
|
||||||
|
(convolveProcs->fConvolve4RowsHorizontally ? 4 : 0);
|
||||||
|
CircularRowBuffer rowBuffer(rowBufferWidth,
|
||||||
|
rowBufferHeight,
|
||||||
|
filterOffset);
|
||||||
|
|
||||||
|
// Loop over every possible output row, processing just enough horizontal
|
||||||
|
// convolutions to run each subsequent vertical convolution.
|
||||||
|
SkASSERT(outputByteRowStride >= filterX.numValues() * 4);
|
||||||
|
int numOutputRows = filterY.numValues();
|
||||||
|
|
||||||
|
// We need to check which is the last line to convolve before we advance 4
|
||||||
|
// lines in one iteration.
|
||||||
|
int lastFilterOffset, lastFilterLength;
|
||||||
|
|
||||||
|
// SSE2 can access up to 3 extra pixels past the end of the
|
||||||
|
// buffer. At the bottom of the image, we have to be careful
|
||||||
|
// not to access data past the end of the buffer. Normally
|
||||||
|
// we fall back to the C++ implementation for the last row.
|
||||||
|
// If the last row is less than 3 pixels wide, we may have to fall
|
||||||
|
// back to the C++ version for more rows. Compute how many
|
||||||
|
// rows we need to avoid the SSE implementation for here.
|
||||||
|
filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset,
|
||||||
|
&lastFilterLength);
|
||||||
|
int avoidSimdRows = 1 + convolveProcs->fExtraHorizontalReads /
|
||||||
|
(lastFilterOffset + lastFilterLength);
|
||||||
|
|
||||||
|
filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,
|
||||||
|
&lastFilterLength);
|
||||||
|
|
||||||
|
for (int outY = 0; outY < numOutputRows; outY++) {
|
||||||
|
filterValues = filterY.FilterForValue(outY,
|
||||||
|
&filterOffset, &filterLength);
|
||||||
|
|
||||||
|
// Generate output rows until we have enough to run the current filter.
|
||||||
|
while (nextXRow < filterOffset + filterLength) {
|
||||||
|
if (convolveProcs->fConvolve4RowsHorizontally &&
|
||||||
|
nextXRow + 3 < lastFilterOffset + lastFilterLength -
|
||||||
|
avoidSimdRows) {
|
||||||
|
const unsigned char* src[4];
|
||||||
|
unsigned char* outRow[4];
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
src[i] = &sourceData[(nextXRow + i) * sourceByteRowStride];
|
||||||
|
outRow[i] = rowBuffer.advanceRow();
|
||||||
|
}
|
||||||
|
convolveProcs->fConvolve4RowsHorizontally(src, filterX, outRow);
|
||||||
|
nextXRow += 4;
|
||||||
|
} else {
|
||||||
|
// Check if we need to avoid SSE2 for this row.
|
||||||
|
if (convolveProcs->fConvolveHorizontally &&
|
||||||
|
nextXRow < lastFilterOffset + lastFilterLength -
|
||||||
|
avoidSimdRows) {
|
||||||
|
convolveProcs->fConvolveHorizontally(
|
||||||
|
&sourceData[nextXRow * sourceByteRowStride],
|
||||||
|
filterX, rowBuffer.advanceRow(), sourceHasAlpha);
|
||||||
|
} else {
|
||||||
|
if (sourceHasAlpha) {
|
||||||
|
ConvolveHorizontally<true>(
|
||||||
|
&sourceData[nextXRow * sourceByteRowStride],
|
||||||
|
filterX, rowBuffer.advanceRow());
|
||||||
|
} else {
|
||||||
|
ConvolveHorizontally<false>(
|
||||||
|
&sourceData[nextXRow * sourceByteRowStride],
|
||||||
|
filterX, rowBuffer.advanceRow());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nextXRow++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute where in the output image this row of final data will go.
|
||||||
|
unsigned char* curOutputRow = &output[outY * outputByteRowStride];
|
||||||
|
|
||||||
|
// Get the list of rows that the circular buffer has, in order.
|
||||||
|
int firstRowInCircularBuffer;
|
||||||
|
unsigned char* const* rowsToConvolve =
|
||||||
|
rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);
|
||||||
|
|
||||||
|
// Now compute the start of the subset of those rows that the filter
|
||||||
|
// needs.
|
||||||
|
unsigned char* const* firstRowForFilter =
|
||||||
|
&rowsToConvolve[filterOffset - firstRowInCircularBuffer];
|
||||||
|
|
||||||
|
if (convolveProcs->fConvolveVertically) {
|
||||||
|
convolveProcs->fConvolveVertically(filterValues, filterLength,
|
||||||
|
firstRowForFilter,
|
||||||
|
filterX.numValues(), curOutputRow,
|
||||||
|
sourceHasAlpha);
|
||||||
|
} else {
|
||||||
|
ConvolveVertically(filterValues, filterLength,
|
||||||
|
firstRowForFilter,
|
||||||
|
filterX.numValues(), curOutputRow,
|
||||||
|
sourceHasAlpha);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
203
src/core/SkConvolver.h
Normal file
203
src/core/SkConvolver.h
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file.
|
||||||
|
|
||||||
|
#ifndef SK_CONVOLVER_H
|
||||||
|
#define SK_CONVOLVER_H
|
||||||
|
|
||||||
|
#include "SkSize.h"
|
||||||
|
#include "SkTypes.h"
|
||||||
|
#include "SkTArray.h"
|
||||||
|
|
||||||
|
// avoid confusion with Mac OS X's math library (Carbon)
|
||||||
|
#if defined(__APPLE__)
|
||||||
|
#undef FloatToConvolutionFixed
|
||||||
|
#undef ConvolutionFixedToFloat
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Represents a filter in one dimension. Each output pixel has one entry in this
|
||||||
|
// object for the filter values contributing to it. You build up the filter
|
||||||
|
// list by calling AddFilter for each output pixel (in order).
|
||||||
|
//
|
||||||
|
// We do 2-dimensional convolution by first convolving each row by one
|
||||||
|
// SkConvolutionFilter1D, then convolving each column by another one.
|
||||||
|
//
|
||||||
|
// Entries are stored in ConvolutionFixed point, shifted left by kShiftBits.
|
||||||
|
class SkConvolutionFilter1D {
|
||||||
|
public:
|
||||||
|
typedef short ConvolutionFixed;
|
||||||
|
|
||||||
|
// The number of bits that ConvolutionFixed point values are shifted by.
|
||||||
|
enum { kShiftBits = 14 };
|
||||||
|
|
||||||
|
SK_API SkConvolutionFilter1D();
|
||||||
|
SK_API ~SkConvolutionFilter1D();
|
||||||
|
|
||||||
|
// Convert between floating point and our ConvolutionFixed point representation.
|
||||||
|
static ConvolutionFixed FloatToFixed(float f) {
|
||||||
|
return static_cast<ConvolutionFixed>(f * (1 << kShiftBits));
|
||||||
|
}
|
||||||
|
static unsigned char FixedToChar(ConvolutionFixed x) {
|
||||||
|
return static_cast<unsigned char>(x >> kShiftBits);
|
||||||
|
}
|
||||||
|
static float FixedToFloat(ConvolutionFixed x) {
|
||||||
|
// The cast relies on ConvolutionFixed being a short, implying that on
|
||||||
|
// the platforms we care about all (16) bits will fit into
|
||||||
|
// the mantissa of a (32-bit) float.
|
||||||
|
SK_COMPILE_ASSERT(sizeof(ConvolutionFixed) == 2, ConvolutionFixed_type_should_fit_in_float_mantissa);
|
||||||
|
float raw = static_cast<float>(x);
|
||||||
|
return ldexpf(raw, -kShiftBits);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the maximum pixel span of a filter.
|
||||||
|
int maxFilter() const { return fMaxFilter; }
|
||||||
|
|
||||||
|
// Returns the number of filters in this filter. This is the dimension of the
|
||||||
|
// output image.
|
||||||
|
int numValues() const { return static_cast<int>(fFilters.count()); }
|
||||||
|
|
||||||
|
// Appends the given list of scaling values for generating a given output
|
||||||
|
// pixel. |filterOffset| is the distance from the edge of the image to where
|
||||||
|
// the scaling factors start. The scaling factors apply to the source pixels
|
||||||
|
// starting from this position, and going for the next |filterLength| pixels.
|
||||||
|
//
|
||||||
|
// You will probably want to make sure your input is normalized (that is,
|
||||||
|
// all entries in |filterValuesg| sub to one) to prevent affecting the overall
|
||||||
|
// brighness of the image.
|
||||||
|
//
|
||||||
|
// The filterLength must be > 0.
|
||||||
|
//
|
||||||
|
// This version will automatically convert your input to ConvolutionFixed point.
|
||||||
|
SK_API void AddFilter(int filterOffset,
|
||||||
|
const float* filterValues,
|
||||||
|
int filterLength);
|
||||||
|
|
||||||
|
// Same as the above version, but the input is already ConvolutionFixed point.
|
||||||
|
void AddFilter(int filterOffset,
|
||||||
|
const ConvolutionFixed* filterValues,
|
||||||
|
int filterLength);
|
||||||
|
|
||||||
|
// Retrieves a filter for the given |valueOffset|, a position in the output
|
||||||
|
// image in the direction we're convolving. The offset and length of the
|
||||||
|
// filter values are put into the corresponding out arguments (see AddFilter
|
||||||
|
// above for what these mean), and a pointer to the first scaling factor is
|
||||||
|
// returned. There will be |filterLength| values in this array.
|
||||||
|
inline const ConvolutionFixed* FilterForValue(int valueOffset,
|
||||||
|
int* filterOffset,
|
||||||
|
int* filterLength) const {
|
||||||
|
const FilterInstance& filter = fFilters[valueOffset];
|
||||||
|
*filterOffset = filter.fOffset;
|
||||||
|
*filterLength = filter.fTrimmedLength;
|
||||||
|
if (filter.fTrimmedLength == 0) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
return &fFilterValues[filter.fDataLocation];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retrieves the filter for the offset 0, presumed to be the one and only.
|
||||||
|
// The offset and length of the filter values are put into the corresponding
|
||||||
|
// out arguments (see AddFilter). Note that |filterLegth| and
|
||||||
|
// |specifiedFilterLength| may be different if leading/trailing zeros of the
|
||||||
|
// original floating point form were clipped.
|
||||||
|
// There will be |filterLength| values in the return array.
|
||||||
|
// Returns NULL if the filter is 0-length (for instance when all floating
|
||||||
|
// point values passed to AddFilter were clipped to 0).
|
||||||
|
SK_API const ConvolutionFixed* GetSingleFilter(int* specifiedFilterLength,
|
||||||
|
int* filterOffset,
|
||||||
|
int* filterLength) const;
|
||||||
|
|
||||||
|
// Add another value to the fFilterValues array -- useful for
|
||||||
|
// SIMD padding which happens outside of this class.
|
||||||
|
|
||||||
|
void addFilterValue( ConvolutionFixed val ) {
|
||||||
|
fFilterValues.push_back( val );
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
struct FilterInstance {
|
||||||
|
// Offset within filterValues for this instance of the filter.
|
||||||
|
int fDataLocation;
|
||||||
|
|
||||||
|
// Distance from the left of the filter to the center. IN PIXELS
|
||||||
|
int fOffset;
|
||||||
|
|
||||||
|
// Number of values in this filter instance.
|
||||||
|
int fTrimmedLength;
|
||||||
|
|
||||||
|
// Filter length as specified. Note that this may be different from
|
||||||
|
// 'trimmed_length' if leading/trailing zeros of the original floating
|
||||||
|
// point form were clipped differently on each tail.
|
||||||
|
int fLength;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Stores the information for each filter added to this class.
|
||||||
|
SkTArray<FilterInstance> fFilters;
|
||||||
|
|
||||||
|
// We store all the filter values in this flat list, indexed by
|
||||||
|
// |FilterInstance.data_location| to avoid the mallocs required for storing
|
||||||
|
// each one separately.
|
||||||
|
SkTArray<ConvolutionFixed> fFilterValues;
|
||||||
|
|
||||||
|
// The maximum size of any filter we've added.
|
||||||
|
int fMaxFilter;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef void (*SkConvolveVertically_pointer)(
|
||||||
|
const SkConvolutionFilter1D::ConvolutionFixed* filterValues,
|
||||||
|
int filterLength,
|
||||||
|
unsigned char* const* sourceDataRows,
|
||||||
|
int pixelWidth,
|
||||||
|
unsigned char* outRow,
|
||||||
|
bool hasAlpha);
|
||||||
|
typedef void (*SkConvolve4RowsHorizontally_pointer)(
|
||||||
|
const unsigned char* srcData[4],
|
||||||
|
const SkConvolutionFilter1D& filter,
|
||||||
|
unsigned char* outRow[4]);
|
||||||
|
typedef void (*SkConvolveHorizontally_pointer)(
|
||||||
|
const unsigned char* srcData,
|
||||||
|
const SkConvolutionFilter1D& filter,
|
||||||
|
unsigned char* outRow,
|
||||||
|
bool hasAlpha);
|
||||||
|
typedef void (*SkConvolveFilterPadding_pointer)(
|
||||||
|
SkConvolutionFilter1D* filter);
|
||||||
|
|
||||||
|
struct SkConvolutionProcs {
|
||||||
|
// This is how many extra pixels may be read by the
|
||||||
|
// conolve*horizontally functions.
|
||||||
|
int fExtraHorizontalReads;
|
||||||
|
SkConvolveVertically_pointer fConvolveVertically;
|
||||||
|
SkConvolve4RowsHorizontally_pointer fConvolve4RowsHorizontally;
|
||||||
|
SkConvolveHorizontally_pointer fConvolveHorizontally;
|
||||||
|
SkConvolveFilterPadding_pointer fApplySIMDPadding;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Does a two-dimensional convolution on the given source image.
|
||||||
|
//
|
||||||
|
// It is assumed the source pixel offsets referenced in the input filters
|
||||||
|
// reference only valid pixels, so the source image size is not required. Each
|
||||||
|
// row of the source image starts |sourceByteRowStride| after the previous
|
||||||
|
// one (this allows you to have rows with some padding at the end).
|
||||||
|
//
|
||||||
|
// The result will be put into the given output buffer. The destination image
|
||||||
|
// size will be xfilter.numValues() * yfilter.numValues() pixels. It will be
|
||||||
|
// in rows of exactly xfilter.numValues() * 4 bytes.
|
||||||
|
//
|
||||||
|
// |sourceHasAlpha| is a hint that allows us to avoid doing computations on
|
||||||
|
// the alpha channel if the image is opaque. If you don't know, set this to
|
||||||
|
// true and it will work properly, but setting this to false will be a few
|
||||||
|
// percent faster if you know the image is opaque.
|
||||||
|
//
|
||||||
|
// The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order
|
||||||
|
// (this is ARGB when loaded into 32-bit words on a little-endian machine).
|
||||||
|
SK_API void BGRAConvolve2D(const unsigned char* sourceData,
|
||||||
|
int sourceByteRowStride,
|
||||||
|
bool sourceHasAlpha,
|
||||||
|
const SkConvolutionFilter1D& xfilter,
|
||||||
|
const SkConvolutionFilter1D& yfilter,
|
||||||
|
int outputByteRowStride,
|
||||||
|
unsigned char* output,
|
||||||
|
SkConvolutionProcs* convolveProcs,
|
||||||
|
bool useSimdIfPossible);
|
||||||
|
|
||||||
|
#endif // SK_CONVOLVER_H
|
@ -11,6 +11,7 @@
|
|||||||
#include "SkColorPriv.h"
|
#include "SkColorPriv.h"
|
||||||
#include "SkUnPreMultiply.h"
|
#include "SkUnPreMultiply.h"
|
||||||
#include "SkShader.h"
|
#include "SkShader.h"
|
||||||
|
#include "SkConvolver.h"
|
||||||
|
|
||||||
#include "SkBitmapFilter_opts_SSE2.h"
|
#include "SkBitmapFilter_opts_SSE2.h"
|
||||||
|
|
||||||
@ -180,3 +181,456 @@ void highQualityFilter_ScaleOnly_SSE2(const SkBitmapProcState &s, int x, int y,
|
|||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convolves horizontally along a single row. The row data is given in
|
||||||
|
// |src_data| and continues for the num_values() of the filter.
|
||||||
|
void convolveHorizontally_SSE2(const unsigned char* src_data,
|
||||||
|
const SkConvolutionFilter1D& filter,
|
||||||
|
unsigned char* out_row,
|
||||||
|
bool /*has_alpha*/) {
|
||||||
|
int num_values = filter.numValues();
|
||||||
|
|
||||||
|
int filter_offset, filter_length;
|
||||||
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
__m128i mask[4];
|
||||||
|
// |mask| will be used to decimate all extra filter coefficients that are
|
||||||
|
// loaded by SIMD when |filter_length| is not divisible by 4.
|
||||||
|
// mask[0] is not used in following algorithm.
|
||||||
|
mask[1] = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, -1);
|
||||||
|
mask[2] = _mm_set_epi16(0, 0, 0, 0, 0, 0, -1, -1);
|
||||||
|
mask[3] = _mm_set_epi16(0, 0, 0, 0, 0, -1, -1, -1);
|
||||||
|
|
||||||
|
// Output one pixel each iteration, calculating all channels (RGBA) together.
|
||||||
|
for (int out_x = 0; out_x < num_values; out_x++) {
|
||||||
|
const SkConvolutionFilter1D::ConvolutionFixed* filter_values =
|
||||||
|
filter.FilterForValue(out_x, &filter_offset, &filter_length);
|
||||||
|
|
||||||
|
__m128i accum = _mm_setzero_si128();
|
||||||
|
|
||||||
|
// Compute the first pixel in this row that the filter affects. It will
|
||||||
|
// touch |filter_length| pixels (4 bytes each) after this.
|
||||||
|
const __m128i* row_to_filter =
|
||||||
|
reinterpret_cast<const __m128i*>(&src_data[filter_offset << 2]);
|
||||||
|
|
||||||
|
// We will load and accumulate with four coefficients per iteration.
|
||||||
|
for (int filter_x = 0; filter_x < filter_length >> 2; filter_x++) {
|
||||||
|
|
||||||
|
// Load 4 coefficients => duplicate 1st and 2nd of them for all channels.
|
||||||
|
__m128i coeff, coeff16;
|
||||||
|
// [16] xx xx xx xx c3 c2 c1 c0
|
||||||
|
coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
|
||||||
|
// [16] xx xx xx xx c1 c1 c0 c0
|
||||||
|
coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
|
||||||
|
// [16] c1 c1 c1 c1 c0 c0 c0 c0
|
||||||
|
coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
|
||||||
|
|
||||||
|
// Load four pixels => unpack the first two pixels to 16 bits =>
|
||||||
|
// multiply with coefficients => accumulate the convolution result.
|
||||||
|
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||||
|
__m128i src8 = _mm_loadu_si128(row_to_filter);
|
||||||
|
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||||
|
__m128i src16 = _mm_unpacklo_epi8(src8, zero);
|
||||||
|
__m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||||
|
__m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||||
|
// [32] a0*c0 b0*c0 g0*c0 r0*c0
|
||||||
|
__m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||||
|
accum = _mm_add_epi32(accum, t);
|
||||||
|
// [32] a1*c1 b1*c1 g1*c1 r1*c1
|
||||||
|
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||||
|
accum = _mm_add_epi32(accum, t);
|
||||||
|
|
||||||
|
// Duplicate 3rd and 4th coefficients for all channels =>
|
||||||
|
// unpack the 3rd and 4th pixels to 16 bits => multiply with coefficients
|
||||||
|
// => accumulate the convolution results.
|
||||||
|
// [16] xx xx xx xx c3 c3 c2 c2
|
||||||
|
coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
|
||||||
|
// [16] c3 c3 c3 c3 c2 c2 c2 c2
|
||||||
|
coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
|
||||||
|
// [16] a3 g3 b3 r3 a2 g2 b2 r2
|
||||||
|
src16 = _mm_unpackhi_epi8(src8, zero);
|
||||||
|
mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||||
|
mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||||
|
// [32] a2*c2 b2*c2 g2*c2 r2*c2
|
||||||
|
t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||||
|
accum = _mm_add_epi32(accum, t);
|
||||||
|
// [32] a3*c3 b3*c3 g3*c3 r3*c3
|
||||||
|
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||||
|
accum = _mm_add_epi32(accum, t);
|
||||||
|
|
||||||
|
// Advance the pixel and coefficients pointers.
|
||||||
|
row_to_filter += 1;
|
||||||
|
filter_values += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// When |filter_length| is not divisible by 4, we need to decimate some of
|
||||||
|
// the filter coefficient that was loaded incorrectly to zero; Other than
|
||||||
|
// that the algorithm is same with above, exceot that the 4th pixel will be
|
||||||
|
// always absent.
|
||||||
|
int r = filter_length&3;
|
||||||
|
if (r) {
|
||||||
|
// Note: filter_values must be padded to align_up(filter_offset, 8).
|
||||||
|
__m128i coeff, coeff16;
|
||||||
|
coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
|
||||||
|
// Mask out extra filter taps.
|
||||||
|
coeff = _mm_and_si128(coeff, mask[r]);
|
||||||
|
coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
|
||||||
|
coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
|
||||||
|
|
||||||
|
// Note: line buffer must be padded to align_up(filter_offset, 16).
|
||||||
|
// We resolve this by use C-version for the last horizontal line.
|
||||||
|
__m128i src8 = _mm_loadu_si128(row_to_filter);
|
||||||
|
__m128i src16 = _mm_unpacklo_epi8(src8, zero);
|
||||||
|
__m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||||
|
__m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||||
|
__m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||||
|
accum = _mm_add_epi32(accum, t);
|
||||||
|
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||||
|
accum = _mm_add_epi32(accum, t);
|
||||||
|
|
||||||
|
src16 = _mm_unpackhi_epi8(src8, zero);
|
||||||
|
coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
|
||||||
|
coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
|
||||||
|
mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||||
|
mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||||
|
t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||||
|
accum = _mm_add_epi32(accum, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shift right for fixed point implementation.
|
||||||
|
accum = _mm_srai_epi32(accum, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
|
||||||
|
// Packing 32 bits |accum| to 16 bits per channel (signed saturation).
|
||||||
|
accum = _mm_packs_epi32(accum, zero);
|
||||||
|
// Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
|
||||||
|
accum = _mm_packus_epi16(accum, zero);
|
||||||
|
|
||||||
|
// Store the pixel value of 32 bits.
|
||||||
|
*(reinterpret_cast<int*>(out_row)) = _mm_cvtsi128_si32(accum);
|
||||||
|
out_row += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convolves horizontally along four rows. The row data is given in
|
||||||
|
// |src_data| and continues for the num_values() of the filter.
|
||||||
|
// The algorithm is almost same as |ConvolveHorizontally_SSE2|. Please
|
||||||
|
// refer to that function for detailed comments.
|
||||||
|
void convolve4RowsHorizontally_SSE2(const unsigned char* src_data[4],
|
||||||
|
const SkConvolutionFilter1D& filter,
|
||||||
|
unsigned char* out_row[4]) {
|
||||||
|
int num_values = filter.numValues();
|
||||||
|
|
||||||
|
int filter_offset, filter_length;
|
||||||
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
__m128i mask[4];
|
||||||
|
// |mask| will be used to decimate all extra filter coefficients that are
|
||||||
|
// loaded by SIMD when |filter_length| is not divisible by 4.
|
||||||
|
// mask[0] is not used in following algorithm.
|
||||||
|
mask[1] = _mm_set_epi16(0, 0, 0, 0, 0, 0, 0, -1);
|
||||||
|
mask[2] = _mm_set_epi16(0, 0, 0, 0, 0, 0, -1, -1);
|
||||||
|
mask[3] = _mm_set_epi16(0, 0, 0, 0, 0, -1, -1, -1);
|
||||||
|
|
||||||
|
// Output one pixel each iteration, calculating all channels (RGBA) together.
|
||||||
|
for (int out_x = 0; out_x < num_values; out_x++) {
|
||||||
|
const SkConvolutionFilter1D::ConvolutionFixed* filter_values =
|
||||||
|
filter.FilterForValue(out_x, &filter_offset, &filter_length);
|
||||||
|
|
||||||
|
// four pixels in a column per iteration.
|
||||||
|
__m128i accum0 = _mm_setzero_si128();
|
||||||
|
__m128i accum1 = _mm_setzero_si128();
|
||||||
|
__m128i accum2 = _mm_setzero_si128();
|
||||||
|
__m128i accum3 = _mm_setzero_si128();
|
||||||
|
int start = (filter_offset<<2);
|
||||||
|
// We will load and accumulate with four coefficients per iteration.
|
||||||
|
for (int filter_x = 0; filter_x < (filter_length >> 2); filter_x++) {
|
||||||
|
__m128i coeff, coeff16lo, coeff16hi;
|
||||||
|
// [16] xx xx xx xx c3 c2 c1 c0
|
||||||
|
coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
|
||||||
|
// [16] xx xx xx xx c1 c1 c0 c0
|
||||||
|
coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
|
||||||
|
// [16] c1 c1 c1 c1 c0 c0 c0 c0
|
||||||
|
coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
|
||||||
|
// [16] xx xx xx xx c3 c3 c2 c2
|
||||||
|
coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
|
||||||
|
// [16] c3 c3 c3 c3 c2 c2 c2 c2
|
||||||
|
coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
|
||||||
|
|
||||||
|
__m128i src8, src16, mul_hi, mul_lo, t;
|
||||||
|
|
||||||
|
#define ITERATION(src, accum) \
|
||||||
|
src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)); \
|
||||||
|
src16 = _mm_unpacklo_epi8(src8, zero); \
|
||||||
|
mul_hi = _mm_mulhi_epi16(src16, coeff16lo); \
|
||||||
|
mul_lo = _mm_mullo_epi16(src16, coeff16lo); \
|
||||||
|
t = _mm_unpacklo_epi16(mul_lo, mul_hi); \
|
||||||
|
accum = _mm_add_epi32(accum, t); \
|
||||||
|
t = _mm_unpackhi_epi16(mul_lo, mul_hi); \
|
||||||
|
accum = _mm_add_epi32(accum, t); \
|
||||||
|
src16 = _mm_unpackhi_epi8(src8, zero); \
|
||||||
|
mul_hi = _mm_mulhi_epi16(src16, coeff16hi); \
|
||||||
|
mul_lo = _mm_mullo_epi16(src16, coeff16hi); \
|
||||||
|
t = _mm_unpacklo_epi16(mul_lo, mul_hi); \
|
||||||
|
accum = _mm_add_epi32(accum, t); \
|
||||||
|
t = _mm_unpackhi_epi16(mul_lo, mul_hi); \
|
||||||
|
accum = _mm_add_epi32(accum, t)
|
||||||
|
|
||||||
|
ITERATION(src_data[0] + start, accum0);
|
||||||
|
ITERATION(src_data[1] + start, accum1);
|
||||||
|
ITERATION(src_data[2] + start, accum2);
|
||||||
|
ITERATION(src_data[3] + start, accum3);
|
||||||
|
|
||||||
|
start += 16;
|
||||||
|
filter_values += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
int r = filter_length & 3;
|
||||||
|
if (r) {
|
||||||
|
// Note: filter_values must be padded to align_up(filter_offset, 8);
|
||||||
|
__m128i coeff;
|
||||||
|
coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_values));
|
||||||
|
// Mask out extra filter taps.
|
||||||
|
coeff = _mm_and_si128(coeff, mask[r]);
|
||||||
|
|
||||||
|
__m128i coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
|
||||||
|
/* c1 c1 c1 c1 c0 c0 c0 c0 */
|
||||||
|
coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
|
||||||
|
__m128i coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
|
||||||
|
coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
|
||||||
|
|
||||||
|
__m128i src8, src16, mul_hi, mul_lo, t;
|
||||||
|
|
||||||
|
ITERATION(src_data[0] + start, accum0);
|
||||||
|
ITERATION(src_data[1] + start, accum1);
|
||||||
|
ITERATION(src_data[2] + start, accum2);
|
||||||
|
ITERATION(src_data[3] + start, accum3);
|
||||||
|
}
|
||||||
|
|
||||||
|
accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
accum0 = _mm_packs_epi32(accum0, zero);
|
||||||
|
accum0 = _mm_packus_epi16(accum0, zero);
|
||||||
|
accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
accum1 = _mm_packs_epi32(accum1, zero);
|
||||||
|
accum1 = _mm_packus_epi16(accum1, zero);
|
||||||
|
accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
accum2 = _mm_packs_epi32(accum2, zero);
|
||||||
|
accum2 = _mm_packus_epi16(accum2, zero);
|
||||||
|
accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
accum3 = _mm_packs_epi32(accum3, zero);
|
||||||
|
accum3 = _mm_packus_epi16(accum3, zero);
|
||||||
|
|
||||||
|
*(reinterpret_cast<int*>(out_row[0])) = _mm_cvtsi128_si32(accum0);
|
||||||
|
*(reinterpret_cast<int*>(out_row[1])) = _mm_cvtsi128_si32(accum1);
|
||||||
|
*(reinterpret_cast<int*>(out_row[2])) = _mm_cvtsi128_si32(accum2);
|
||||||
|
*(reinterpret_cast<int*>(out_row[3])) = _mm_cvtsi128_si32(accum3);
|
||||||
|
|
||||||
|
out_row[0] += 4;
|
||||||
|
out_row[1] += 4;
|
||||||
|
out_row[2] += 4;
|
||||||
|
out_row[3] += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Does vertical convolution to produce one output row. The filter values and
|
||||||
|
// length are given in the first two parameters. These are applied to each
|
||||||
|
// of the rows pointed to in the |source_data_rows| array, with each row
|
||||||
|
// being |pixel_width| wide.
|
||||||
|
//
|
||||||
|
// The output must have room for |pixel_width * 4| bytes.
|
||||||
|
template<bool has_alpha>
|
||||||
|
void convolveVertically_SSE2(const SkConvolutionFilter1D::ConvolutionFixed* filter_values,
|
||||||
|
int filter_length,
|
||||||
|
unsigned char* const* source_data_rows,
|
||||||
|
int pixel_width,
|
||||||
|
unsigned char* out_row) {
|
||||||
|
int width = pixel_width & ~3;
|
||||||
|
|
||||||
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
__m128i accum0, accum1, accum2, accum3, coeff16;
|
||||||
|
const __m128i* src;
|
||||||
|
// Output four pixels per iteration (16 bytes).
|
||||||
|
for (int out_x = 0; out_x < width; out_x += 4) {
|
||||||
|
|
||||||
|
// Accumulated result for each pixel. 32 bits per RGBA channel.
|
||||||
|
accum0 = _mm_setzero_si128();
|
||||||
|
accum1 = _mm_setzero_si128();
|
||||||
|
accum2 = _mm_setzero_si128();
|
||||||
|
accum3 = _mm_setzero_si128();
|
||||||
|
|
||||||
|
// Convolve with one filter coefficient per iteration.
|
||||||
|
for (int filter_y = 0; filter_y < filter_length; filter_y++) {
|
||||||
|
|
||||||
|
// Duplicate the filter coefficient 8 times.
|
||||||
|
// [16] cj cj cj cj cj cj cj cj
|
||||||
|
coeff16 = _mm_set1_epi16(filter_values[filter_y]);
|
||||||
|
|
||||||
|
// Load four pixels (16 bytes) together.
|
||||||
|
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||||
|
src = reinterpret_cast<const __m128i*>(
|
||||||
|
&source_data_rows[filter_y][out_x << 2]);
|
||||||
|
__m128i src8 = _mm_loadu_si128(src);
|
||||||
|
|
||||||
|
// Unpack 1st and 2nd pixels from 8 bits to 16 bits for each channels =>
|
||||||
|
// multiply with current coefficient => accumulate the result.
|
||||||
|
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||||
|
__m128i src16 = _mm_unpacklo_epi8(src8, zero);
|
||||||
|
__m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||||
|
__m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||||
|
// [32] a0 b0 g0 r0
|
||||||
|
__m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||||
|
accum0 = _mm_add_epi32(accum0, t);
|
||||||
|
// [32] a1 b1 g1 r1
|
||||||
|
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||||
|
accum1 = _mm_add_epi32(accum1, t);
|
||||||
|
|
||||||
|
// Unpack 3rd and 4th pixels from 8 bits to 16 bits for each channels =>
|
||||||
|
// multiply with current coefficient => accumulate the result.
|
||||||
|
// [16] a3 b3 g3 r3 a2 b2 g2 r2
|
||||||
|
src16 = _mm_unpackhi_epi8(src8, zero);
|
||||||
|
mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||||
|
mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||||
|
// [32] a2 b2 g2 r2
|
||||||
|
t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||||
|
accum2 = _mm_add_epi32(accum2, t);
|
||||||
|
// [32] a3 b3 g3 r3
|
||||||
|
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||||
|
accum3 = _mm_add_epi32(accum3, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shift right for fixed point implementation.
|
||||||
|
accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
|
||||||
|
// Packing 32 bits |accum| to 16 bits per channel (signed saturation).
|
||||||
|
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||||
|
accum0 = _mm_packs_epi32(accum0, accum1);
|
||||||
|
// [16] a3 b3 g3 r3 a2 b2 g2 r2
|
||||||
|
accum2 = _mm_packs_epi32(accum2, accum3);
|
||||||
|
|
||||||
|
// Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
|
||||||
|
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||||
|
accum0 = _mm_packus_epi16(accum0, accum2);
|
||||||
|
|
||||||
|
if (has_alpha) {
|
||||||
|
// Compute the max(ri, gi, bi) for each pixel.
|
||||||
|
// [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
|
||||||
|
__m128i a = _mm_srli_epi32(accum0, 8);
|
||||||
|
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||||
|
__m128i b = _mm_max_epu8(a, accum0); // Max of r and g.
|
||||||
|
// [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
|
||||||
|
a = _mm_srli_epi32(accum0, 16);
|
||||||
|
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||||
|
b = _mm_max_epu8(a, b); // Max of r and g and b.
|
||||||
|
// [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
|
||||||
|
b = _mm_slli_epi32(b, 24);
|
||||||
|
|
||||||
|
// Make sure the value of alpha channel is always larger than maximum
|
||||||
|
// value of color channels.
|
||||||
|
accum0 = _mm_max_epu8(b, accum0);
|
||||||
|
} else {
|
||||||
|
// Set value of alpha channels to 0xFF.
|
||||||
|
__m128i mask = _mm_set1_epi32(0xff000000);
|
||||||
|
accum0 = _mm_or_si128(accum0, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store the convolution result (16 bytes) and advance the pixel pointers.
|
||||||
|
_mm_storeu_si128(reinterpret_cast<__m128i*>(out_row), accum0);
|
||||||
|
out_row += 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
// When the width of the output is not divisible by 4, We need to save one
|
||||||
|
// pixel (4 bytes) each time. And also the fourth pixel is always absent.
|
||||||
|
if (pixel_width & 3) {
|
||||||
|
accum0 = _mm_setzero_si128();
|
||||||
|
accum1 = _mm_setzero_si128();
|
||||||
|
accum2 = _mm_setzero_si128();
|
||||||
|
for (int filter_y = 0; filter_y < filter_length; ++filter_y) {
|
||||||
|
coeff16 = _mm_set1_epi16(filter_values[filter_y]);
|
||||||
|
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||||
|
src = reinterpret_cast<const __m128i*>(
|
||||||
|
&source_data_rows[filter_y][width<<2]);
|
||||||
|
__m128i src8 = _mm_loadu_si128(src);
|
||||||
|
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||||
|
__m128i src16 = _mm_unpacklo_epi8(src8, zero);
|
||||||
|
__m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||||
|
__m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||||
|
// [32] a0 b0 g0 r0
|
||||||
|
__m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||||
|
accum0 = _mm_add_epi32(accum0, t);
|
||||||
|
// [32] a1 b1 g1 r1
|
||||||
|
t = _mm_unpackhi_epi16(mul_lo, mul_hi);
|
||||||
|
accum1 = _mm_add_epi32(accum1, t);
|
||||||
|
// [16] a3 b3 g3 r3 a2 b2 g2 r2
|
||||||
|
src16 = _mm_unpackhi_epi8(src8, zero);
|
||||||
|
mul_hi = _mm_mulhi_epi16(src16, coeff16);
|
||||||
|
mul_lo = _mm_mullo_epi16(src16, coeff16);
|
||||||
|
// [32] a2 b2 g2 r2
|
||||||
|
t = _mm_unpacklo_epi16(mul_lo, mul_hi);
|
||||||
|
accum2 = _mm_add_epi32(accum2, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
|
||||||
|
// [16] a1 b1 g1 r1 a0 b0 g0 r0
|
||||||
|
accum0 = _mm_packs_epi32(accum0, accum1);
|
||||||
|
// [16] a3 b3 g3 r3 a2 b2 g2 r2
|
||||||
|
accum2 = _mm_packs_epi32(accum2, zero);
|
||||||
|
// [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
|
||||||
|
accum0 = _mm_packus_epi16(accum0, accum2);
|
||||||
|
if (has_alpha) {
|
||||||
|
// [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
|
||||||
|
__m128i a = _mm_srli_epi32(accum0, 8);
|
||||||
|
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||||
|
__m128i b = _mm_max_epu8(a, accum0); // Max of r and g.
|
||||||
|
// [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
|
||||||
|
a = _mm_srli_epi32(accum0, 16);
|
||||||
|
// [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
|
||||||
|
b = _mm_max_epu8(a, b); // Max of r and g and b.
|
||||||
|
// [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
|
||||||
|
b = _mm_slli_epi32(b, 24);
|
||||||
|
accum0 = _mm_max_epu8(b, accum0);
|
||||||
|
} else {
|
||||||
|
__m128i mask = _mm_set1_epi32(0xff000000);
|
||||||
|
accum0 = _mm_or_si128(accum0, mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int out_x = width; out_x < pixel_width; out_x++) {
|
||||||
|
*(reinterpret_cast<int*>(out_row)) = _mm_cvtsi128_si32(accum0);
|
||||||
|
accum0 = _mm_srli_si128(accum0, 4);
|
||||||
|
out_row += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void convolveVertically_SSE2(const SkConvolutionFilter1D::ConvolutionFixed* filter_values,
|
||||||
|
int filter_length,
|
||||||
|
unsigned char* const* source_data_rows,
|
||||||
|
int pixel_width,
|
||||||
|
unsigned char* out_row,
|
||||||
|
bool has_alpha) {
|
||||||
|
if (has_alpha) {
|
||||||
|
convolveVertically_SSE2<true>(filter_values,
|
||||||
|
filter_length,
|
||||||
|
source_data_rows,
|
||||||
|
pixel_width,
|
||||||
|
out_row);
|
||||||
|
} else {
|
||||||
|
convolveVertically_SSE2<false>(filter_values,
|
||||||
|
filter_length,
|
||||||
|
source_data_rows,
|
||||||
|
pixel_width,
|
||||||
|
out_row);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void applySIMDPadding_SSE2(SkConvolutionFilter1D *filter) {
|
||||||
|
// Padding |paddingCount| of more dummy coefficients after the coefficients
|
||||||
|
// of last filter to prevent SIMD instructions which load 8 or 16 bytes
|
||||||
|
// together to access invalid memory areas. We are not trying to align the
|
||||||
|
// coefficients right now due to the opaqueness of <vector> implementation.
|
||||||
|
// This has to be done after all |AddFilter| calls.
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFixed>(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -11,10 +11,27 @@
|
|||||||
#define SkBitmapFilter_opts_sse2_DEFINED
|
#define SkBitmapFilter_opts_sse2_DEFINED
|
||||||
|
|
||||||
#include "SkBitmapProcState.h"
|
#include "SkBitmapProcState.h"
|
||||||
|
#include "SkConvolver.h"
|
||||||
|
|
||||||
void highQualityFilter_ScaleOnly_SSE2(const SkBitmapProcState &s, int x, int y,
|
void highQualityFilter_ScaleOnly_SSE2(const SkBitmapProcState &s, int x, int y,
|
||||||
SkPMColor *SK_RESTRICT colors, int count);
|
SkPMColor *SK_RESTRICT colors, int count);
|
||||||
void highQualityFilter_SSE2(const SkBitmapProcState &s, int x, int y,
|
void highQualityFilter_SSE2(const SkBitmapProcState &s, int x, int y,
|
||||||
SkPMColor *SK_RESTRICT colors, int count);
|
SkPMColor *SK_RESTRICT colors, int count);
|
||||||
|
|
||||||
|
|
||||||
|
void convolveVertically_SSE2(const SkConvolutionFilter1D::ConvolutionFixed* filter_values,
|
||||||
|
int filter_length,
|
||||||
|
unsigned char* const* source_data_rows,
|
||||||
|
int pixel_width,
|
||||||
|
unsigned char* out_row,
|
||||||
|
bool has_alpha);
|
||||||
|
void convolve4RowsHorizontally_SSE2(const unsigned char* src_data[4],
|
||||||
|
const SkConvolutionFilter1D& filter,
|
||||||
|
unsigned char* out_row[4]);
|
||||||
|
void convolveHorizontally_SSE2(const unsigned char* src_data,
|
||||||
|
const SkConvolutionFilter1D& filter,
|
||||||
|
unsigned char* out_row,
|
||||||
|
bool has_alpha);
|
||||||
|
void applySIMDPadding_SSE2(SkConvolutionFilter1D* filter);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -21,3 +21,6 @@
|
|||||||
|
|
||||||
// empty implementation just uses default supplied function pointers
|
// empty implementation just uses default supplied function pointers
|
||||||
void SkBitmapProcState::platformProcs() {}
|
void SkBitmapProcState::platformProcs() {}
|
||||||
|
|
||||||
|
// empty implementation just uses default supplied function pointers
|
||||||
|
void SkBitmapProcState::platformScaleProc() {}
|
||||||
|
@ -107,6 +107,16 @@ static bool cachedHasSSSE3() {
|
|||||||
|
|
||||||
SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
|
SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
|
||||||
|
|
||||||
|
void SkBitmapProcState::platformConvolutionProcs() {
|
||||||
|
if (cachedHasSSE2()) {
|
||||||
|
fConvolutionProcs->fExtraHorizontalReads = 3;
|
||||||
|
fConvolutionProcs->fConvolveVertically = &convolveVertically_SSE2;
|
||||||
|
fConvolutionProcs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
|
||||||
|
fConvolutionProcs->fConvolveHorizontally = &convolveHorizontally_SSE2;
|
||||||
|
fConvolutionProcs->fApplySIMDPadding = &applySIMDPadding_SSE2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void SkBitmapProcState::platformProcs() {
|
void SkBitmapProcState::platformProcs() {
|
||||||
if (cachedHasSSSE3()) {
|
if (cachedHasSSSE3()) {
|
||||||
#if !defined(SK_BUILD_FOR_ANDROID)
|
#if !defined(SK_BUILD_FOR_ANDROID)
|
||||||
@ -151,9 +161,6 @@ void SkBitmapProcState::platformProcs() {
|
|||||||
if (fShaderProc32 == highQualityFilter) {
|
if (fShaderProc32 == highQualityFilter) {
|
||||||
fShaderProc32 = highQualityFilter_SSE2;
|
fShaderProc32 = highQualityFilter_SSE2;
|
||||||
}
|
}
|
||||||
if (fShaderProc32 == highQualityFilter_ScaleOnly) {
|
|
||||||
fShaderProc32 = highQualityFilter_ScaleOnly_SSE2;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user