Patches on top of Radu's latest.

patch from issue 1273033005 at patchset 120001 (http://crrev.com/1273033005#ps120001)

BUG=skia:

Review URL: https://codereview.chromium.org/1288323004
This commit is contained in:
mtklein 2015-08-18 09:43:28 -07:00 committed by Commit bot
parent f05271581f
commit 2d141ba2df
10 changed files with 157 additions and 31 deletions

View File

@ -11,6 +11,7 @@
#define SK_OPTS_NS portable
#include "SkBlitMask_opts.h"
#include "SkBlurImageFilter_opts.h"
#include "SkColorCubeFilter_opts.h"
#include "SkFloatingPoint_opts.h"
#include "SkMorphologyImageFilter_opts.h"
#include "SkTextureCompressor_opts.h"
@ -38,6 +39,7 @@ namespace SkOpts {
decltype(memset16) memset16 = portable::memset16;
decltype(memset32) memset32 = portable::memset32;
decltype(create_xfermode) create_xfermode = SkCreate4pxXfermode;
decltype(color_cube_filter_span) color_cube_filter_span = portable::color_cube_filter_span;
decltype(box_blur_xx) box_blur_xx = portable::box_blur_xx;
decltype(box_blur_xy) box_blur_xy = portable::box_blur_xy;

View File

@ -44,6 +44,15 @@ namespace SkOpts {
extern bool (*fill_block_dimensions)(SkTextureCompressor::Format, int* x, int* y);
extern void (*blit_mask_d32_a8)(SkPMColor*, size_t, const SkAlpha*, size_t, SkColor, int, int);
// This function is an optimized version of SkColorCubeFilter::filterSpan
extern void (*color_cube_filter_span)(const SkPMColor[],
int,
SkPMColor[],
const int * [2],
const SkScalar * [2],
int,
const SkColor*);
}
#endif//SkOpts_DEFINED

View File

@ -26,6 +26,7 @@ class SkPMFloat : public Sk4f {
public:
static SkPMFloat FromPMColor(SkPMColor c) { return SkPMFloat(c); }
static SkPMFloat FromARGB(float a, float r, float g, float b) { return SkPMFloat(a,r,g,b); }
static SkPMFloat FromBGRx(SkColor c); // Ignores c's alpha, instead forcing it to 1.
Sk4f alphas() const; // argb -> aaaa, generally faster than the equivalent Sk4f(this->a()).

View File

@ -8,6 +8,7 @@
#include "SkColorCubeFilter.h"
#include "SkColorPriv.h"
#include "SkOnce.h"
#include "SkOpts.h"
#include "SkReadBuffer.h"
#include "SkUnPreMultiply.h"
#include "SkWriteBuffer.h"
@ -128,36 +129,9 @@ void SkColorCubeFilter::filterSpan(const SkPMColor src[], int count, SkPMColor d
const SkScalar* colorToScalar;
fCache.getProcessingLuts(&colorToIndex, &colorToFactors, &colorToScalar);
const int dim = fCache.cubeDimension();
SkColor* colorCube = (SkColor*)fCubeData->data();
for (int i = 0; i < count; ++i) {
SkColor inputColor = SkUnPreMultiply::PMColorToColor(src[i]);
uint8_t r = SkColorGetR(inputColor);
uint8_t g = SkColorGetG(inputColor);
uint8_t b = SkColorGetB(inputColor);
uint8_t a = SkColorGetA(inputColor);
SkScalar rOut(0), gOut(0), bOut(0);
for (int x = 0; x < 2; ++x) {
for (int y = 0; y < 2; ++y) {
for (int z = 0; z < 2; ++z) {
SkColor lutColor = colorCube[colorToIndex[x][r] +
(colorToIndex[y][g] +
colorToIndex[z][b] * dim) * dim];
SkScalar factor = colorToFactors[x][r] *
colorToFactors[y][g] *
colorToFactors[z][b];
rOut += colorToScalar[SkColorGetR(lutColor)] * factor;
gOut += colorToScalar[SkColorGetG(lutColor)] * factor;
bOut += colorToScalar[SkColorGetB(lutColor)] * factor;
}
}
}
const SkScalar aOut = SkIntToScalar(a);
dst[i] = SkPackARGB32(a,
SkScalarRoundToInt(rOut * aOut),
SkScalarRoundToInt(gOut * aOut),
SkScalarRoundToInt(bOut * aOut));
}
SkOpts::color_cube_filter_span(src, count, dst, colorToIndex,
colorToFactors, fCache.cubeDimension(),
(SkColor*)fCubeData->data());
}
SkFlattenable* SkColorCubeFilter::CreateProc(SkReadBuffer& buffer) {

View File

@ -0,0 +1,85 @@
// Copyright 2015 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef SkColorCubeFilter_opts_DEFINED
#define SkColorCubeFilter_opts_DEFINED
#include "SkColor.h"
#include "SkPMFloat.h"
#include "SkUnPreMultiply.h"
namespace SK_OPTS_NS {
void color_cube_filter_span(const SkPMColor src[],
int count,
SkPMColor dst[],
const int* colorToIndex[2],
const SkScalar* colorToFactors[2],
int dim,
const SkColor* colorCube) {
uint8_t* ptr_dst = reinterpret_cast<uint8_t*>(dst);
uint8_t r, g, b, a;
for (int i = 0; i < count; ++i) {
const SkPMColor input = src[i];
a = input >> SK_A32_SHIFT;
if (a != 255) {
const SkColor source = SkUnPreMultiply::PMColorToColor(input);
r = SkColorGetR(source);
g = SkColorGetG(source);
b = SkColorGetB(source);
} else {
r = SkGetPackedR32(input);
g = SkGetPackedG32(input);
b = SkGetPackedB32(input);
}
const SkScalar g0 = colorToFactors[0][g],
g1 = colorToFactors[1][g],
b0 = colorToFactors[0][b],
b1 = colorToFactors[1][b];
const Sk4f g0b0(g0*b0),
g0b1(g0*b1),
g1b0(g1*b0),
g1b1(g1*b1);
const int i00 = (colorToIndex[0][g] + colorToIndex[0][b] * dim) * dim;
const int i01 = (colorToIndex[0][g] + colorToIndex[1][b] * dim) * dim;
const int i10 = (colorToIndex[1][g] + colorToIndex[0][b] * dim) * dim;
const int i11 = (colorToIndex[1][g] + colorToIndex[1][b] * dim) * dim;
SkPMFloat color(0);
for (int x = 0; x < 2; ++x) {
const int ix = colorToIndex[x][r];
const SkColor lutColor00 = colorCube[ix + i00];
const SkColor lutColor01 = colorCube[ix + i01];
const SkColor lutColor10 = colorCube[ix + i10];
const SkColor lutColor11 = colorCube[ix + i11];
Sk4f sum = SkPMFloat::FromBGRx(lutColor00) * g0b0;
sum = sum + SkPMFloat::FromBGRx(lutColor01) * g0b1;
sum = sum + SkPMFloat::FromBGRx(lutColor10) * g1b0;
sum = sum + SkPMFloat::FromBGRx(lutColor11) * g1b1;
color = color + sum * Sk4f((float)colorToFactors[x][r]);
}
if (a != 255) {
color = color * Sk4f(((float)a) / 255);
}
dst[i] = color.round();
ptr_dst[SK_A32_SHIFT / 8] = a;
ptr_dst += 4;
}
}
} // namespace SK_OPTS NS
#endif // SkColorCubeFilter_opts_DEFINED

View File

@ -10,6 +10,7 @@
#define SK_OPTS_NS neon
#include "SkBlitMask_opts.h"
#include "SkBlurImageFilter_opts.h"
#include "SkColorCubeFilter_opts.h"
#include "SkFloatingPoint_opts.h"
#include "SkMorphologyImageFilter_opts.h"
#include "SkTextureCompressor_opts.h"
@ -36,5 +37,7 @@ namespace SkOpts {
fill_block_dimensions = neon::fill_block_dimensions;
blit_mask_d32_a8 = neon::blit_mask_d32_a8;
color_cube_filter_span = neon::color_cube_filter_span;
}
}

View File

@ -8,11 +8,13 @@
#include "SkOpts.h"
#define SK_OPTS_NS ssse3
#include "SkBlitMask_opts.h"
#include "SkColorCubeFilter_opts.h"
#include "SkXfermode_opts.h"
namespace SkOpts {
void Init_ssse3() {
create_xfermode = SkCreate4pxXfermode;
blit_mask_d32_a8 = ssse3::blit_mask_d32_a8;
color_cube_filter_span = ssse3::color_cube_filter_span;
}
}

View File

@ -7,6 +7,8 @@
namespace { // See SkPMFloat.h
static_assert(SK_A32_SHIFT == 24, "This file assumes little-endian.");
inline SkPMFloat::SkPMFloat(SkPMColor c) {
SkPMColorAssert(c);
uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c);
@ -28,8 +30,21 @@ inline SkPMColor SkPMFloat::round() const {
}
inline Sk4f SkPMFloat::alphas() const {
static_assert(SK_A32_SHIFT == 24, "Assuming little-endian.");
return vdupq_lane_f32(vget_high_f32(fVec), 1); // Duplicate high lane of high half i.e. lane 3.
}
inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c);
#if defined(SK_PMCOLOR_IS_RGBA)
fix8 = vtbl1_u8(fix8, vcreate_u8(0x0300010203000102ULL)); // 03 00 01 02, 2x, i.e. swap R&B.
#endif
uint16x8_t fix8_16 = vmovl_u8(fix8);
uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));
fix8_32 = vsetq_lane_u32(0xFF, fix8_32, 3); // Force alpha to 1.
SkPMFloat pmf = Sk4f(vmulq_f32(vcvtq_f32_u32(fix8_32), vdupq_n_f32(1.0f/255)));
SkASSERT(pmf.isValid());
return pmf;
}
} // namespace

View File

@ -34,4 +34,14 @@ inline Sk4f SkPMFloat::alphas() const {
return Sk4f(this->a());
}
inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
float inv255 = 1.0f / 255;
SkPMFloat pmf = SkPMFloat::FromARGB(1.0f,
SkGetPackedR32(c) * inv255,
SkGetPackedG32(c) * inv255,
SkGetPackedB32(c) * inv255);
SkASSERT(pmf.isValid());
return pmf;
}
} // namespace

View File

@ -38,4 +38,29 @@ inline Sk4f SkPMFloat::alphas() const {
return _mm_shuffle_ps(fVec, fVec, 0xff); // Read as 11 11 11 11, copying lane 3 to all lanes.
}
inline SkPMFloat SkPMFloat::FromBGRx(SkColor c) {
__m128i fix8 = _mm_cvtsi32_si128((int)c);
#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
const char _ = ~0; // Zero these bytes.
__m128i fix8_32 = _mm_shuffle_epi8(fix8,
#if defined(SK_PMCOLOR_IS_BGRA)
_mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, _,_,_,_)
#else
_mm_setr_epi8(2,_,_,_, 1,_,_,_, 0,_,_,_, _,_,_,_)
#endif
);
#else
__m128i fix8_16 = _mm_unpacklo_epi8 (fix8 , _mm_setzero_si128()),
fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());
#if defined(SK_PMCOLOR_IS_RGBA)
fix8_32 = _mm_shuffle_epi32(fix8_32, 0xC6); // C6 == 11 00 01 10, i.e swap lanes 0 and 2.
#endif
#endif
fix8_32 = _mm_or_si128(fix8_32, _mm_set_epi32(0xFF,0,0,0)); // Force alpha to 1.
SkPMFloat pmf = Sk4f(_mm_mul_ps(_mm_cvtepi32_ps(fix8_32), _mm_set1_ps(1.0f/255)));
SkASSERT(pmf.isValid());
return pmf;
}
} // namespace