SSE2 version of blit_lcd16, courtesy of Jin Yang.
Yields 25-30% speedup on Windows (32b), 4-7% on Linux (64b, less register pressure), not invoked on Mac (lcd text is 32b instead of 16b). Followup: GDI system settings on Windows can suppress LCD text for small fonts, interfering with our benchmarks. (http://code.google.com/p/skia/issues/detail?id=483) http://codereview.appspot.com/5617058/ git-svn-id: http://skia.googlecode.com/svn/trunk@3189 2bbb7eff-a529-9590-31e7-b0007b416f81
This commit is contained in:
parent
02f90e8ac1
commit
d6770e69e0
@ -714,5 +714,116 @@ static inline uint32_t SkExpand32_4444(SkPMColor c) {
|
|||||||
// used for cheap 2x2 dithering when the colors are opaque
|
// used for cheap 2x2 dithering when the colors are opaque
|
||||||
void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
|
void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
static inline int SkUpscale31To32(int value) {
|
||||||
|
SkASSERT((unsigned)value <= 31);
|
||||||
|
return value + (value >> 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int SkBlend32(int src, int dst, int scale) {
|
||||||
|
SkASSERT((unsigned)src <= 0xFF);
|
||||||
|
SkASSERT((unsigned)dst <= 0xFF);
|
||||||
|
SkASSERT((unsigned)scale <= 32);
|
||||||
|
return dst + ((src - dst) * scale >> 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
static SkPMColor SkBlendLCD16(int srcA, int srcR, int srcG, int srcB,
|
||||||
|
SkPMColor dst, uint16_t mask) {
|
||||||
|
if (mask == 0) {
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We want all of these in 5bits, hence the shifts in case one of them
|
||||||
|
* (green) is 6bits.
|
||||||
|
*/
|
||||||
|
int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
|
||||||
|
int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
|
||||||
|
int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
|
||||||
|
|
||||||
|
// Now upscale them to 0..32, so we can use blend32
|
||||||
|
maskR = SkUpscale31To32(maskR);
|
||||||
|
maskG = SkUpscale31To32(maskG);
|
||||||
|
maskB = SkUpscale31To32(maskB);
|
||||||
|
|
||||||
|
// srcA has been upscaled to 256 before passed into this function
|
||||||
|
maskR = maskR * srcA >> 8;
|
||||||
|
maskG = maskG * srcA >> 8;
|
||||||
|
maskB = maskB * srcA >> 8;
|
||||||
|
|
||||||
|
int dstR = SkGetPackedR32(dst);
|
||||||
|
int dstG = SkGetPackedG32(dst);
|
||||||
|
int dstB = SkGetPackedB32(dst);
|
||||||
|
|
||||||
|
// LCD blitting is only supported if the dst is known/required
|
||||||
|
// to be opaque
|
||||||
|
return SkPackARGB32(0xFF,
|
||||||
|
SkBlend32(srcR, dstR, maskR),
|
||||||
|
SkBlend32(srcG, dstG, maskG),
|
||||||
|
SkBlend32(srcB, dstB, maskB));
|
||||||
|
}
|
||||||
|
|
||||||
|
static SkPMColor SkBlendLCD16Opaque(int srcR, int srcG, int srcB,
|
||||||
|
SkPMColor dst, uint16_t mask,
|
||||||
|
SkPMColor opaqueDst) {
|
||||||
|
if (mask == 0) {
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (0xFFFF == mask) {
|
||||||
|
return opaqueDst;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We want all of these in 5bits, hence the shifts in case one of them
|
||||||
|
* (green) is 6bits.
|
||||||
|
*/
|
||||||
|
int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
|
||||||
|
int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
|
||||||
|
int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
|
||||||
|
|
||||||
|
// Now upscale them to 0..32, so we can use blend32
|
||||||
|
maskR = SkUpscale31To32(maskR);
|
||||||
|
maskG = SkUpscale31To32(maskG);
|
||||||
|
maskB = SkUpscale31To32(maskB);
|
||||||
|
|
||||||
|
int dstR = SkGetPackedR32(dst);
|
||||||
|
int dstG = SkGetPackedG32(dst);
|
||||||
|
int dstB = SkGetPackedB32(dst);
|
||||||
|
|
||||||
|
// LCD blitting is only supported if the dst is known/required
|
||||||
|
// to be opaque
|
||||||
|
return SkPackARGB32(0xFF,
|
||||||
|
SkBlend32(srcR, dstR, maskR),
|
||||||
|
SkBlend32(srcG, dstG, maskG),
|
||||||
|
SkBlend32(srcB, dstB, maskB));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SkBlitLCD16Row(SkPMColor dst[], const uint16_t src[],
|
||||||
|
SkColor color, int width, SkPMColor) {
|
||||||
|
int srcA = SkColorGetA(color);
|
||||||
|
int srcR = SkColorGetR(color);
|
||||||
|
int srcG = SkColorGetG(color);
|
||||||
|
int srcB = SkColorGetB(color);
|
||||||
|
|
||||||
|
srcA = SkAlpha255To256(srcA);
|
||||||
|
|
||||||
|
for (int i = 0; i < width; i++) {
|
||||||
|
dst[i] = SkBlendLCD16(srcA, srcR, srcG, srcB, dst[i], src[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SkBlitLCD16OpaqueRow(SkPMColor dst[], const uint16_t src[],
|
||||||
|
SkColor color, int width,
|
||||||
|
SkPMColor opaqueDst) {
|
||||||
|
int srcR = SkColorGetR(color);
|
||||||
|
int srcG = SkColorGetG(color);
|
||||||
|
int srcB = SkColorGetB(color);
|
||||||
|
|
||||||
|
for (int i = 0; i < width; i++) {
|
||||||
|
dst[i] = SkBlendLCD16Opaque(srcR, srcG, srcB, dst[i], src[i],
|
||||||
|
opaqueDst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -30,6 +30,15 @@ public:
|
|||||||
const void* mask, size_t maskRB,
|
const void* mask, size_t maskRB,
|
||||||
SkColor color, int width, int height);
|
SkColor color, int width, int height);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function pointer that blits a row of mask(lcd16) into a row of dst
|
||||||
|
* colorized by a single color. The number of pixels to blit is specified
|
||||||
|
* by width.
|
||||||
|
*/
|
||||||
|
typedef void (*BlitLCD16RowProc)(SkPMColor dst[], const uint16_t src[],
|
||||||
|
SkColor color, int width,
|
||||||
|
SkPMColor opaqueDst);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function pointer that blits a row of src colors through a row of a mask
|
* Function pointer that blits a row of src colors through a row of a mask
|
||||||
* onto a row of dst colors. The RowFactory that returns this function ptr
|
* onto a row of dst colors. The RowFactory that returns this function ptr
|
||||||
@ -50,6 +59,17 @@ public:
|
|||||||
*/
|
*/
|
||||||
static ColorProc PlatformColorProcs(SkBitmap::Config, SkMask::Format, SkColor);
|
static ColorProc PlatformColorProcs(SkBitmap::Config, SkMask::Format, SkColor);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Public entry-point to return a blitcolor BlitLCD16RowProc.
|
||||||
|
*/
|
||||||
|
static BlitLCD16RowProc BlitLCD16RowFactory(bool isOpaque);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return either platform specific optimized blitcolor BlitLCD16RowProc,
|
||||||
|
* or NULL if no optimized routine is available.
|
||||||
|
*/
|
||||||
|
static BlitLCD16RowProc PlatformBlitRowProcs16(bool isOpaque);
|
||||||
|
|
||||||
enum RowFlags {
|
enum RowFlags {
|
||||||
kSrcIsOpaque_RowFlag = 1 << 0
|
kSrcIsOpaque_RowFlag = 1 << 0
|
||||||
};
|
};
|
||||||
|
@ -64,106 +64,16 @@ static void D32_A8_Black(void* SK_RESTRICT dst, size_t dstRB,
|
|||||||
} while (--height != 0);
|
} while (--height != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
SkBlitMask::BlitLCD16RowProc SkBlitMask::BlitLCD16RowFactory(bool isOpaque) {
|
||||||
|
BlitLCD16RowProc proc = PlatformBlitRowProcs16(isOpaque);
|
||||||
static inline int upscale31To32(int value) {
|
if (proc) {
|
||||||
SkASSERT((unsigned)value <= 31);
|
return proc;
|
||||||
return value + (value >> 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int blend32(int src, int dst, int scale) {
|
|
||||||
SkASSERT((unsigned)src <= 0xFF);
|
|
||||||
SkASSERT((unsigned)dst <= 0xFF);
|
|
||||||
SkASSERT((unsigned)scale <= 32);
|
|
||||||
return dst + ((src - dst) * scale >> 5);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void blit_lcd16_row(SkPMColor dst[], const uint16_t src[],
|
|
||||||
SkColor color, int width, SkPMColor) {
|
|
||||||
int srcA = SkColorGetA(color);
|
|
||||||
int srcR = SkColorGetR(color);
|
|
||||||
int srcG = SkColorGetG(color);
|
|
||||||
int srcB = SkColorGetB(color);
|
|
||||||
|
|
||||||
srcA = SkAlpha255To256(srcA);
|
|
||||||
|
|
||||||
for (int i = 0; i < width; i++) {
|
|
||||||
uint16_t mask = src[i];
|
|
||||||
if (0 == mask) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
SkPMColor d = dst[i];
|
|
||||||
|
|
||||||
/* We want all of these in 5bits, hence the shifts in case one of them
|
|
||||||
* (green) is 6bits.
|
|
||||||
*/
|
|
||||||
int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
|
|
||||||
int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
|
|
||||||
int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
|
|
||||||
|
|
||||||
// Now upscale them to 0..32, so we can use blend32
|
|
||||||
maskR = upscale31To32(maskR);
|
|
||||||
maskG = upscale31To32(maskG);
|
|
||||||
maskB = upscale31To32(maskB);
|
|
||||||
|
|
||||||
maskR = maskR * srcA >> 8;
|
|
||||||
maskG = maskG * srcA >> 8;
|
|
||||||
maskB = maskB * srcA >> 8;
|
|
||||||
|
|
||||||
int dstR = SkGetPackedR32(d);
|
|
||||||
int dstG = SkGetPackedG32(d);
|
|
||||||
int dstB = SkGetPackedB32(d);
|
|
||||||
|
|
||||||
// LCD blitting is only supported if the dst is known/required
|
|
||||||
// to be opaque
|
|
||||||
dst[i] = SkPackARGB32(0xFF,
|
|
||||||
blend32(srcR, dstR, maskR),
|
|
||||||
blend32(srcG, dstG, maskG),
|
|
||||||
blend32(srcB, dstB, maskB));
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
static void blit_lcd16_opaque_row(SkPMColor dst[], const uint16_t src[],
|
if (isOpaque) {
|
||||||
SkColor color, int width, SkPMColor opaqueDst) {
|
return SkBlitLCD16OpaqueRow;
|
||||||
int srcR = SkColorGetR(color);
|
} else {
|
||||||
int srcG = SkColorGetG(color);
|
return SkBlitLCD16Row;
|
||||||
int srcB = SkColorGetB(color);
|
|
||||||
|
|
||||||
for (int i = 0; i < width; i++) {
|
|
||||||
uint16_t mask = src[i];
|
|
||||||
if (0 == mask) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (0xFFFF == mask) {
|
|
||||||
dst[i] = opaqueDst;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
SkPMColor d = dst[i];
|
|
||||||
|
|
||||||
/* We want all of these in 5bits, hence the shifts in case one of them
|
|
||||||
* (green) is 6bits.
|
|
||||||
*/
|
|
||||||
int maskR = SkGetPackedR16(mask) >> (SK_R16_BITS - 5);
|
|
||||||
int maskG = SkGetPackedG16(mask) >> (SK_G16_BITS - 5);
|
|
||||||
int maskB = SkGetPackedB16(mask) >> (SK_B16_BITS - 5);
|
|
||||||
|
|
||||||
// Now upscale them to 0..32, so we can use blend32
|
|
||||||
maskR = upscale31To32(maskR);
|
|
||||||
maskG = upscale31To32(maskG);
|
|
||||||
maskB = upscale31To32(maskB);
|
|
||||||
|
|
||||||
int dstR = SkGetPackedR32(d);
|
|
||||||
int dstG = SkGetPackedG32(d);
|
|
||||||
int dstB = SkGetPackedB32(d);
|
|
||||||
|
|
||||||
// LCD blitting is only supported if the dst is known/required
|
|
||||||
// to be opaque
|
|
||||||
dst[i] = SkPackARGB32(0xFF,
|
|
||||||
blend32(srcR, dstR, maskR),
|
|
||||||
blend32(srcG, dstG, maskG),
|
|
||||||
blend32(srcB, dstB, maskB));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -175,13 +85,14 @@ static void D32_LCD16_Proc(void* SK_RESTRICT dst, size_t dstRB,
|
|||||||
const uint16_t* srcRow = (const uint16_t*)mask;
|
const uint16_t* srcRow = (const uint16_t*)mask;
|
||||||
SkPMColor opaqueDst;
|
SkPMColor opaqueDst;
|
||||||
|
|
||||||
void (*proc)(SkPMColor dst[], const uint16_t src[],
|
SkBlitMask::BlitLCD16RowProc proc = NULL;
|
||||||
SkColor color, int width, SkPMColor);
|
bool isOpaque = (0xFF == SkColorGetA(color));
|
||||||
if (0xFF == SkColorGetA(color)) {
|
proc = SkBlitMask::BlitLCD16RowFactory(isOpaque);
|
||||||
proc = blit_lcd16_opaque_row;
|
SkASSERT(proc != NULL);
|
||||||
|
|
||||||
|
if (isOpaque) {
|
||||||
opaqueDst = SkPreMultiplyColor(color);
|
opaqueDst = SkPreMultiplyColor(color);
|
||||||
} else {
|
} else {
|
||||||
proc = blit_lcd16_row;
|
|
||||||
opaqueDst = 0; // ignored
|
opaqueDst = 0; // ignored
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -546,9 +457,9 @@ static void LCD16_RowProc_Opaque(SkPMColor* SK_RESTRICT dst,
|
|||||||
int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
|
int maskB = SkGetPackedB16(m) >> (SK_B16_BITS - 5);
|
||||||
|
|
||||||
// Now upscale them to 0..32, so we can use blend32
|
// Now upscale them to 0..32, so we can use blend32
|
||||||
maskR = upscale31To32(maskR);
|
maskR = SkUpscale31To32(maskR);
|
||||||
maskG = upscale31To32(maskG);
|
maskG = SkUpscale31To32(maskG);
|
||||||
maskB = upscale31To32(maskB);
|
maskB = SkUpscale31To32(maskB);
|
||||||
|
|
||||||
int dstR = SkGetPackedR32(d);
|
int dstR = SkGetPackedR32(d);
|
||||||
int dstG = SkGetPackedG32(d);
|
int dstG = SkGetPackedG32(d);
|
||||||
@ -557,9 +468,9 @@ static void LCD16_RowProc_Opaque(SkPMColor* SK_RESTRICT dst,
|
|||||||
// LCD blitting is only supported if the dst is known/required
|
// LCD blitting is only supported if the dst is known/required
|
||||||
// to be opaque
|
// to be opaque
|
||||||
dst[i] = SkPackARGB32(0xFF,
|
dst[i] = SkPackARGB32(0xFF,
|
||||||
blend32(srcR, dstR, maskR),
|
SkBlend32(srcR, dstR, maskR),
|
||||||
blend32(srcG, dstG, maskG),
|
SkBlend32(srcG, dstG, maskG),
|
||||||
blend32(srcB, dstB, maskB));
|
SkBlend32(srcB, dstB, maskB));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -386,8 +386,7 @@ void Color32_SSE2(SkPMColor dst[], const SkPMColor src[], int count,
|
|||||||
|
|
||||||
void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
|
void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
|
||||||
size_t maskRB, SkColor origColor,
|
size_t maskRB, SkColor origColor,
|
||||||
int width, int height)
|
int width, int height) {
|
||||||
{
|
|
||||||
SkPMColor color = SkPreMultiplyColor(origColor);
|
SkPMColor color = SkPreMultiplyColor(origColor);
|
||||||
size_t dstOffset = dstRB - (width << 2);
|
size_t dstOffset = dstRB - (width << 2);
|
||||||
size_t maskOffset = maskRB - width;
|
size_t maskOffset = maskRB - width;
|
||||||
@ -482,3 +481,226 @@ void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* maskPtr,
|
|||||||
mask += maskOffset;
|
mask += maskOffset;
|
||||||
} while (--height != 0);
|
} while (--height != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __m128i SkBlendLCD16_SSE2(__m128i &srci, __m128i &dst,
|
||||||
|
__m128i &mask, __m128i &scale) {
|
||||||
|
// Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
|
||||||
|
__m128i r = _mm_and_si128(_mm_slli_epi32(mask,
|
||||||
|
16-SK_R16_SHIFT-(SK_R16_BITS-5)),
|
||||||
|
_mm_set1_epi32(0x001F0000));
|
||||||
|
|
||||||
|
__m128i g = _mm_and_si128(_mm_slli_epi32(mask,
|
||||||
|
8-SK_G16_SHIFT-(SK_G16_BITS-5)),
|
||||||
|
_mm_set1_epi32(0x00001F00));
|
||||||
|
|
||||||
|
__m128i b = _mm_and_si128(_mm_slli_epi32(mask,
|
||||||
|
SK_B16_BITS-5),
|
||||||
|
_mm_set1_epi32(0x0000001F));
|
||||||
|
|
||||||
|
// Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
|
||||||
|
mask = _mm_or_si128(_mm_or_si128(r, g), b);
|
||||||
|
|
||||||
|
// Interleave R,G,B into the lower byte of word.
|
||||||
|
__m128i maskLo, maskHi;
|
||||||
|
maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
|
||||||
|
maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
|
||||||
|
|
||||||
|
// Upscale to 0..32
|
||||||
|
maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
|
||||||
|
maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
|
||||||
|
|
||||||
|
maskLo = _mm_mullo_epi16(maskLo, scale);
|
||||||
|
maskHi = _mm_mullo_epi16(maskHi, scale);
|
||||||
|
|
||||||
|
maskLo = _mm_srli_epi16(maskLo, 8);
|
||||||
|
maskHi = _mm_srli_epi16(maskHi, 8);
|
||||||
|
|
||||||
|
// Interleave R,G,B into the lower byte of the word.
|
||||||
|
__m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
|
||||||
|
__m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
|
||||||
|
|
||||||
|
maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(srci, dstLo));
|
||||||
|
maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(srci, dstHi));
|
||||||
|
|
||||||
|
maskLo = _mm_srai_epi16(maskLo, 5);
|
||||||
|
maskHi = _mm_srai_epi16(maskHi, 5);
|
||||||
|
|
||||||
|
// Add two pixels into result.
|
||||||
|
__m128i resultLo = _mm_add_epi16(dstLo, maskLo);
|
||||||
|
__m128i resultHi = _mm_add_epi16(dstHi, maskHi);
|
||||||
|
|
||||||
|
// Pack into 4 32bit dst pixels
|
||||||
|
return _mm_packus_epi16(resultLo, resultHi);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __m128i SkBlendLCD16Opaque_SSE2(__m128i &srci, __m128i &dst,
|
||||||
|
__m128i &mask) {
|
||||||
|
// Get the R,G,B of each 16bit mask pixel, we want all of them in 5 bits.
|
||||||
|
__m128i r = _mm_and_si128(_mm_slli_epi32(mask,
|
||||||
|
16-SK_R16_SHIFT-(SK_R16_BITS-5)),
|
||||||
|
_mm_set1_epi32(0x001F0000));
|
||||||
|
|
||||||
|
__m128i g = _mm_and_si128(_mm_slli_epi32(mask,
|
||||||
|
8-SK_G16_SHIFT-(SK_G16_BITS-5)),
|
||||||
|
_mm_set1_epi32(0x00001F00));
|
||||||
|
|
||||||
|
__m128i b = _mm_and_si128(_mm_slli_epi32(mask, SK_B16_BITS-5),
|
||||||
|
_mm_set1_epi32(0x0000001F));
|
||||||
|
|
||||||
|
// Pack the 4 16bit mask pixels into 4 32bit pixels, (p0, p1, p2, p3)
|
||||||
|
mask = _mm_or_si128(_mm_or_si128(r, g), b);
|
||||||
|
|
||||||
|
// Interleave R,G,B into the lower byte of word.
|
||||||
|
__m128i maskLo, maskHi;
|
||||||
|
maskLo = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
|
||||||
|
maskHi = _mm_unpackhi_epi8(mask, _mm_setzero_si128());
|
||||||
|
|
||||||
|
// Upscale to 0..32
|
||||||
|
maskLo = _mm_add_epi16(maskLo, _mm_srli_epi16(maskLo, 4));
|
||||||
|
maskHi = _mm_add_epi16(maskHi, _mm_srli_epi16(maskHi, 4));
|
||||||
|
|
||||||
|
// Interleave R,G,B into the lower byte of the word.
|
||||||
|
__m128i dstLo = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
|
||||||
|
__m128i dstHi = _mm_unpackhi_epi8(dst, _mm_setzero_si128());
|
||||||
|
|
||||||
|
maskLo = _mm_mullo_epi16(maskLo, _mm_sub_epi16(srci, dstLo));
|
||||||
|
maskHi = _mm_mullo_epi16(maskHi, _mm_sub_epi16(srci, dstHi));
|
||||||
|
|
||||||
|
maskLo = _mm_srai_epi16(maskLo, 5);
|
||||||
|
maskHi = _mm_srai_epi16(maskHi, 5);
|
||||||
|
|
||||||
|
// Add two pixels into result.
|
||||||
|
__m128i resultLo = _mm_add_epi16(dstLo, maskLo);
|
||||||
|
__m128i resultHi = _mm_add_epi16(dstHi, maskHi);
|
||||||
|
|
||||||
|
// Pack into 4 32bit dst pixels
|
||||||
|
return _mm_packus_epi16(resultLo, resultHi);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SkBlitLCD16Row_SSE2(SkPMColor dst[], const uint16_t src[],
|
||||||
|
SkColor color, int width, SkPMColor) {
|
||||||
|
if (width <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int srcA = SkColorGetA(color);
|
||||||
|
int srcR = SkColorGetR(color);
|
||||||
|
int srcG = SkColorGetG(color);
|
||||||
|
int srcB = SkColorGetB(color);
|
||||||
|
|
||||||
|
srcA = SkAlpha255To256(srcA);
|
||||||
|
|
||||||
|
if (width >= 4) {
|
||||||
|
SkASSERT(((size_t)dst & 0x03) == 0);
|
||||||
|
while (((size_t)dst & 0x0F) != 0) {
|
||||||
|
*dst = SkBlendLCD16(srcA, srcR, srcG, srcB, *dst, *src);
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
width--;
|
||||||
|
}
|
||||||
|
|
||||||
|
__m128i *d = reinterpret_cast<__m128i*>(dst);
|
||||||
|
__m128i srci = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
|
||||||
|
srci = _mm_unpacklo_epi8(srci, _mm_setzero_si128());
|
||||||
|
__m128i scale = _mm_set1_epi16(srcA);
|
||||||
|
while (width >= 4) {
|
||||||
|
__m128i dst_pixel = _mm_load_si128(d);
|
||||||
|
__m128i mask_pixel = _mm_loadl_epi64(
|
||||||
|
reinterpret_cast<const __m128i*>(src));
|
||||||
|
|
||||||
|
// Check whether mask_pixels are equal to 0 and get the highest bit
|
||||||
|
// of each byte of result, if mask pixes are all zero, we will get
|
||||||
|
// pack_cmp to 0xFFFF
|
||||||
|
int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_pixel,
|
||||||
|
_mm_setzero_si128()));
|
||||||
|
|
||||||
|
// if mask pixels are not all zero, we will blend the dst pixels
|
||||||
|
if (pack_cmp != 0xFFFF) {
|
||||||
|
// Unpack 4 16bit mask pixels to
|
||||||
|
// (p0, 0, p1, 0, p2, 0, p3, 0)
|
||||||
|
mask_pixel = _mm_unpacklo_epi16(mask_pixel,
|
||||||
|
_mm_setzero_si128());
|
||||||
|
|
||||||
|
// Process 4 32bit dst pixels
|
||||||
|
__m128i result = SkBlendLCD16_SSE2(srci, dst_pixel,
|
||||||
|
mask_pixel, scale);
|
||||||
|
_mm_store_si128(d, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
d++;
|
||||||
|
src += 4;
|
||||||
|
width -= 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
dst = reinterpret_cast<SkPMColor*>(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (width > 0) {
|
||||||
|
*dst = SkBlendLCD16(srcA, srcR, srcG, srcB, *dst, *src);
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
width--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SkBlitLCD16OpaqueRow_SSE2(SkPMColor dst[], const uint16_t src[],
|
||||||
|
SkColor color, int width, SkPMColor opaqueDst) {
|
||||||
|
if (width <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int srcR = SkColorGetR(color);
|
||||||
|
int srcG = SkColorGetG(color);
|
||||||
|
int srcB = SkColorGetB(color);
|
||||||
|
|
||||||
|
if (width >= 4) {
|
||||||
|
SkASSERT(((size_t)dst & 0x03) == 0);
|
||||||
|
while (((size_t)dst & 0x0F) != 0) {
|
||||||
|
*dst = SkBlendLCD16Opaque(srcR, srcG, srcB, *dst, *src, opaqueDst);
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
width--;
|
||||||
|
}
|
||||||
|
|
||||||
|
__m128i *d = reinterpret_cast<__m128i*>(dst);
|
||||||
|
__m128i srci = _mm_set1_epi32(SkPackARGB32(0xFF, srcR, srcG, srcB));
|
||||||
|
srci = _mm_unpacklo_epi8(srci, _mm_setzero_si128());
|
||||||
|
while (width >= 4) {
|
||||||
|
__m128i dst_pixel = _mm_load_si128(d);
|
||||||
|
__m128i mask_pixel = _mm_loadl_epi64(
|
||||||
|
reinterpret_cast<const __m128i*>(src));
|
||||||
|
|
||||||
|
// Check whether mask_pixels are equal to 0 and get the highest bit
|
||||||
|
// of each byte of result, if mask pixes are all zero, we will get
|
||||||
|
// pack_cmp to 0xFFFF
|
||||||
|
int pack_cmp = _mm_movemask_epi8(_mm_cmpeq_epi16(mask_pixel,
|
||||||
|
_mm_setzero_si128()));
|
||||||
|
|
||||||
|
// if mask pixels are not all zero, we will blend the dst pixels
|
||||||
|
if (pack_cmp != 0xFFFF) {
|
||||||
|
// Unpack 4 16bit mask pixels to
|
||||||
|
// (p0, 0, p1, 0, p2, 0, p3, 0)
|
||||||
|
mask_pixel = _mm_unpacklo_epi16(mask_pixel,
|
||||||
|
_mm_setzero_si128());
|
||||||
|
|
||||||
|
// Process 4 32bit dst pixels
|
||||||
|
__m128i result = SkBlendLCD16Opaque_SSE2(srci, dst_pixel,
|
||||||
|
mask_pixel);
|
||||||
|
_mm_store_si128(d, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
d++;
|
||||||
|
src += 4;
|
||||||
|
width -= 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
dst = reinterpret_cast<SkPMColor*>(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (width > 0) {
|
||||||
|
*dst = SkBlendLCD16Opaque(srcR, srcG, srcB, *dst, *src, opaqueDst);
|
||||||
|
src++;
|
||||||
|
dst++;
|
||||||
|
width--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -23,3 +23,8 @@ void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst,
|
|||||||
void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* mask,
|
void SkARGB32_A8_BlitMask_SSE2(void* device, size_t dstRB, const void* mask,
|
||||||
size_t maskRB, SkColor color,
|
size_t maskRB, SkColor color,
|
||||||
int width, int height);
|
int width, int height);
|
||||||
|
|
||||||
|
void SkBlitLCD16Row_SSE2(SkPMColor dst[], const uint16_t src[],
|
||||||
|
SkColor color, int width, SkPMColor);
|
||||||
|
void SkBlitLCD16OpaqueRow_SSE2(SkPMColor dst[], const uint16_t src[],
|
||||||
|
SkColor color, int width, SkPMColor opaqueDst);
|
||||||
|
@ -1313,6 +1313,10 @@ SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
|
SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
|
||||||
SkMask::Format maskFormat,
|
SkMask::Format maskFormat,
|
||||||
RowFlags flags) {
|
RowFlags flags) {
|
||||||
|
@ -31,7 +31,11 @@ SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
|
|||||||
SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
|
SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
|
||||||
SkMask::Format maskFormat,
|
SkMask::Format maskFormat,
|
||||||
SkColor color) {
|
SkColor color) {
|
||||||
return NULL;
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
|
SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
|
||||||
|
@ -126,6 +126,18 @@ SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
|
|||||||
return proc;
|
return proc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
|
||||||
|
if (cachedHasSSE2()) {
|
||||||
|
if (isOpaque) {
|
||||||
|
return SkBlitLCD16OpaqueRow_SSE2;
|
||||||
|
} else {
|
||||||
|
return SkBlitLCD16Row_SSE2;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
|
SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
|
||||||
SkMask::Format maskFormat,
|
SkMask::Format maskFormat,
|
||||||
RowFlags flags) {
|
RowFlags flags) {
|
||||||
|
Loading…
Reference in New Issue
Block a user