MIPS DSP composition functions optimizations.

List of optimized routines:
- comp_func_DestinationOver
- comp_func_SourceIn
- comp_func_DestinationIn
- comp_func_DestinationOut
- comp_func_SourceAtop
- comp_func_DestinationAtop
- comp_func_XOR
- comp_func_SourceOut
- comp_func_solid_SourceOver
- comp_func_solid_DestinationOver
- comp_func_solid_SourceIn
- comp_func_solid_DestinationIn
- comp_func_solid_SourceAtop
- comp_func_solid_DestinationAtop
- copm_func_solid_XOR
- comp_func_solid_SourceOut

Previously optimized routines qt_blend_argb32_on_argb32_mips_dsp and
comp_func_Source_mips_dsp are redesigned and rewritten.

Overall improvement by running tst_bench_blendbench benchmark app
from tests/benchmarks/gui/image/blendbench/ is 27%.

Change-Id: I6ab09b17cac10f4aded59787074ab4c89e72ccac
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
This commit is contained in:
Damir Tatalovic 2012-06-22 18:13:02 +02:00 committed by Qt by Nokia
parent 60829b4a56
commit 614422c2e7
6 changed files with 1770 additions and 159 deletions

View File

@ -5971,6 +5971,23 @@ void qInitDrawhelperAsm()
#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP)
functionForMode_C[QPainter::CompositionMode_SourceOver] = comp_func_SourceOver_asm_mips_dsp;
functionForMode_C[QPainter::CompositionMode_Source] = comp_func_Source_mips_dsp;
functionForMode_C[QPainter::CompositionMode_DestinationOver] = comp_func_DestinationOver_mips_dsp;
functionForMode_C[QPainter::CompositionMode_SourceIn] = comp_func_SourceIn_mips_dsp;
functionForMode_C[QPainter::CompositionMode_DestinationIn] = comp_func_DestinationIn_mips_dsp;
functionForMode_C[QPainter::CompositionMode_DestinationOut] = comp_func_DestinationOut_mips_dsp;
functionForMode_C[QPainter::CompositionMode_SourceAtop] = comp_func_SourceAtop_mips_dsp;
functionForMode_C[QPainter::CompositionMode_DestinationAtop] = comp_func_DestinationAtop_mips_dsp;
functionForMode_C[QPainter::CompositionMode_Xor] = comp_func_XOR_mips_dsp;
functionForMode_C[QPainter::CompositionMode_SourceOut] = comp_func_SourceOut_mips_dsp;
functionForModeSolid_C[QPainter::CompositionMode_SourceOver] = comp_func_solid_SourceOver_mips_dsp;
functionForModeSolid_C[QPainter::CompositionMode_DestinationOver] = comp_func_solid_DestinationOver_mips_dsp;
functionForModeSolid_C[QPainter::CompositionMode_SourceIn] = comp_func_solid_SourceIn_mips_dsp;
functionForModeSolid_C[QPainter::CompositionMode_DestinationIn] = comp_func_solid_DestinationIn_mips_dsp;
functionForModeSolid_C[QPainter::CompositionMode_SourceAtop] = comp_func_solid_SourceAtop_mips_dsp;
functionForModeSolid_C[QPainter::CompositionMode_DestinationAtop] = comp_func_solid_DestinationAtop_mips_dsp;
functionForModeSolid_C[QPainter::CompositionMode_Xor] = comp_func_solid_XOR_mips_dsp;
functionForModeSolid_C[QPainter::CompositionMode_SourceOut] = comp_func_solid_SourceOut_mips_dsp;
qt_memfill32 = qt_memfill32_asm_mips_dsp;

View File

@ -45,24 +45,6 @@
QT_BEGIN_NAMESPACE
#if defined(QT_COMPILER_SUPPORTS_MIPS_DSP)
extern "C" uint INTERPOLATE_PIXEL_255_asm_mips_dsp(uint x, uint a, uint y, uint b);
extern "C" uint BYTE_MUL_asm_mips_dsp(uint x, uint a);
extern "C" uint * destfetchARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
extern "C" uint * qt_destStoreARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
#if defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
extern "C" uint INTERPOLATE_PIXEL_255_asm_mips_dspr2(uint x, uint a, uint y, uint b);
extern "C" uint BYTE_MUL_asm_mips_dspr2(uint x, uint a);
#endif // QT_COMPILER_SUPPORTS_MIPS_DSPR2
void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
@ -80,32 +62,21 @@ void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl,
uint *dst = (uint *) destPixels;
if (const_alpha == 256) {
for (int y=0; y<h; ++y) {
for (int x=0; x<w; ++x) {
uint s = src[x];
if (s >= 0xff000000)
dst[x] = s;
else if (s != 0)
#if !defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
dst[x] = s + BYTE_MUL_asm_mips_dsp(dst[x], qAlpha(~s));
#else
dst[x] = s + BYTE_MUL_asm_mips_dspr2(dst[x], qAlpha(~s));
#endif
}
qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm(dst, src, w);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
} else if (const_alpha != 0) {
const_alpha = (const_alpha * 255) >> 8;
for (int y=0; y<h; ++y) {
for (int x=0; x<w; ++x) {
#if !defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
uint s = BYTE_MUL_asm_mips_dsp(src[x], const_alpha);
dst[x] = s + BYTE_MUL_asm_mips_dsp(dst[x], qAlpha(~s));
#else
uint s = BYTE_MUL_asm_mips_dspr2(src[x], const_alpha);
dst[x] = s + BYTE_MUL_asm_mips_dspr2(dst[x], qAlpha(~s));
#endif
if (h%2 > 0) {
uint s = BYTE_MUL(src[0], const_alpha);
dst[0] = s + BYTE_MUL(dst[0], qAlpha(~s));
h--;
dst++;
src++;
}
qt_blend_argb32_on_argb32_mips_dsp_asm_x2(dst, src, h, const_alpha);
dst = (quint32 *)(((uchar *) dst) + dbpl);
src = (const quint32 *)(((const uchar *) src) + sbpl);
}
@ -145,13 +116,13 @@ void comp_func_Source_mips_dsp(uint *dest, const uint *src, int length, uint con
::memcpy(dest, src, length * sizeof(uint));
} else {
int ialpha = 255 - const_alpha;
for (int i = 0; i < length; ++i) {
#if !defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
dest[i] = INTERPOLATE_PIXEL_255_asm_mips_dsp(src[i], const_alpha, dest[i], ialpha);
#else
dest[i] = INTERPOLATE_PIXEL_255_asm_mips_dspr2(src[i], const_alpha, dest[i], ialpha);
#endif
if (length%2 > 0) {
dest[0] = INTERPOLATE_PIXEL_255(src[0], const_alpha, dest[0], ialpha);
length--;
dest++;
src++;
}
comp_func_Source_dsp_asm_x2(dest, src, length, const_alpha);
}
}
@ -171,6 +142,285 @@ void QT_FASTCALL qt_destStoreARGB32_mips_dsp(QRasterBuffer *rasterBuffer, int x,
qt_destStoreARGB32_asm_mips_dsp(data, buffer, length);
}
#endif // QT_COMPILER_SUPPORTS_MIPS_DSP
void QT_FASTCALL comp_func_solid_SourceOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
{
if (const_alpha != 255)
color = BYTE_MUL(color, const_alpha);
if (length%2 > 0) {
dest[0] = color + BYTE_MUL(dest[0], qAlpha(~color));
length--;
dest++;
}
comp_func_solid_Source_dsp_asm_x2(dest, length, color, qAlpha(~color));
}
void QT_FASTCALL comp_func_solid_DestinationOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
{
if (const_alpha != 255)
color = BYTE_MUL(color, const_alpha);
if (length%2 > 0) {
uint d = dest[0];
dest[0] = d + BYTE_MUL(color, qAlpha(~d));
length--;
dest++;
}
comp_func_solid_DestinationOver_dsp_asm_x2(dest, length, color);
}
void QT_FASTCALL comp_func_DestinationOver_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
{
if (length%2 > 0) {
if (const_alpha == 255) {
uint d = dest[0];
dest[0] = d + BYTE_MUL(src[0], qAlpha(~d));
} else {
uint d = dest[0];
uint s = BYTE_MUL(src[0], const_alpha);
dest[0] = d + BYTE_MUL(s, qAlpha(~d));
}
length--;
dest++;
src++;
}
comp_func_DestinationOver_dsp_asm_x2(dest, src, length, const_alpha);
}
void QT_FASTCALL comp_func_solid_SourceIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
{
if (length%2 > 0) {
if (const_alpha == 255) {
dest[0] = BYTE_MUL(color, qAlpha(dest[0]));
} else {
uint tmp_color = BYTE_MUL(color, const_alpha);
uint cia = 255 - const_alpha;
uint d = dest[0];
dest[0] = INTERPOLATE_PIXEL_255(tmp_color, qAlpha(d), d, cia);
}
length--;
dest++;
}
comp_func_solid_SourceIn_dsp_asm_x2(dest, length, color, const_alpha);
}
void QT_FASTCALL comp_func_SourceIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
{
if (length%2 > 0) {
if (const_alpha == 255) {
dest[0] = BYTE_MUL(src[0], qAlpha(dest[0]));
} else {
uint cia = 255 - const_alpha;
uint d = dest[0];
uint s = BYTE_MUL(src[0], const_alpha);
dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(d), d, cia);
}
length--;
dest++;
src++;
}
comp_func_SourceIn_dsp_asm_x2(dest, src, length, const_alpha);
}
void QT_FASTCALL comp_func_solid_DestinationIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
{
uint a = qAlpha(color);
if (const_alpha != 255) {
a = BYTE_MUL(a, const_alpha) + 255 - const_alpha;
}
if (length%2 > 0) {
dest[0] = BYTE_MUL(dest[0], a);
length--;
dest++;
}
comp_func_solid_DestinationIn_dsp_asm_x2(dest, length, a);
}
void QT_FASTCALL comp_func_DestinationIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
{
if (length%2 > 0) {
if (const_alpha == 255) {
dest[0] = BYTE_MUL(dest[0], qAlpha(src[0]));
} else {
int cia = 255 - const_alpha;
uint a = BYTE_MUL(qAlpha(src[0]), const_alpha) + cia;
dest[0] = BYTE_MUL(dest[0], a);
}
length--;
src++;
dest++;
}
comp_func_DestinationIn_dsp_asm_x2(dest, src, length, const_alpha);
}
void QT_FASTCALL comp_func_solid_DestinationOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
{
uint a = qAlpha(~color);
if (const_alpha != 255) {
a = BYTE_MUL(a, const_alpha) + 255 - const_alpha;
}
if (length%2 > 0) {
dest[0] = BYTE_MUL(dest[0], a);
length--;
dest++;
}
comp_func_solid_DestinationIn_dsp_asm_x2(dest, length, a);
}
void QT_FASTCALL comp_func_DestinationOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
{
if (length%2 > 0) {
if (const_alpha == 255) {
dest[0] = BYTE_MUL(dest[0], qAlpha(~src[0]));
} else {
int cia = 255 - const_alpha;
uint sia = BYTE_MUL(qAlpha(~src[0]), const_alpha) + cia;
dest[0] = BYTE_MUL(dest[0], sia);
}
length--;
dest++;
src++;
}
comp_func_DestinationOut_dsp_asm_x2(dest, src, length, const_alpha);
}
void QT_FASTCALL comp_func_solid_SourceAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
{
if (const_alpha != 255) {
color = BYTE_MUL(color, const_alpha);
}
uint sia = qAlpha(~color);
if (length%2 > 0) {
dest[0] = INTERPOLATE_PIXEL_255(color, qAlpha(dest[0]), dest[0], sia);
length--;
dest++;
}
comp_func_solid_SourceAtop_dsp_asm_x2(dest, length, color, sia);
}
void QT_FASTCALL comp_func_SourceAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
{
if (length%2 > 0) {
if (const_alpha == 255) {
uint s = src[0];
uint d = dest[0];
dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(d), d, qAlpha(~s));
} else {
uint s = BYTE_MUL(src[0], const_alpha);
uint d = dest[0];
dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(d), d, qAlpha(~s));
}
length--;
dest++;
src++;
}
comp_func_SourceAtop_dsp_asm_x2(dest, src, length, const_alpha);
}
void QT_FASTCALL comp_func_solid_DestinationAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
{
uint a = qAlpha(color);
if (const_alpha != 255) {
color = BYTE_MUL(color, const_alpha);
a = qAlpha(color) + 255 - const_alpha;
}
if (length%2 > 0) {
uint d = dest[0];
dest[0] = INTERPOLATE_PIXEL_255(d, a, color, qAlpha(~d));
length--;
dest++;
}
comp_func_solid_DestinationAtop_dsp_asm_x2(dest, length, color, a);
}
void QT_FASTCALL comp_func_DestinationAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
{
if (length%2 > 0) {
if (const_alpha == 255) {
uint s = src[0];
uint d = dest[0];
dest[0] = INTERPOLATE_PIXEL_255(d, qAlpha(s), s, qAlpha(~d));
} else {
int cia = 255 - const_alpha;
uint s = BYTE_MUL(src[0], const_alpha);
uint d = dest[0];
uint a = qAlpha(s) + cia;
dest[0] = INTERPOLATE_PIXEL_255(d, a, s, qAlpha(~d));
}
length--;
dest++;
src++;
}
comp_func_DestinationAtop_dsp_asm_x2(dest, src, length, const_alpha);
}
void QT_FASTCALL comp_func_solid_XOR_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
{
if (const_alpha != 255)
color = BYTE_MUL(color, const_alpha);
uint sia = qAlpha(~color);
if (length%2 > 0) {
uint d = dest[0];
dest[0] = INTERPOLATE_PIXEL_255(color, qAlpha(~d), d, sia);
length--;
dest++;
}
comp_func_solid_XOR_dsp_asm_x2(dest, length, color, sia);
}
void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
{
if (length%2 > 0) {
if (const_alpha == 255) {
uint d = dest[0];
uint s = src[0];
dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(~d), d, qAlpha(~s));
} else {
uint d = dest[0];
uint s = BYTE_MUL(src[0], const_alpha);
dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(~d), d, qAlpha(~s));
}
length--;
dest++;
src++;
}
comp_func_XOR_dsp_asm_x2(dest, src, length, const_alpha);
}
void QT_FASTCALL comp_func_solid_SourceOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha)
{
if (length%2 > 0) {
if (const_alpha == 255) {
dest[0] = BYTE_MUL(color, qAlpha(~dest[0]));
} else {
uint tmp_color = BYTE_MUL(color, const_alpha);
int cia = 255 - const_alpha;
uint d = dest[0];
dest[0] = INTERPOLATE_PIXEL_255(tmp_color, qAlpha(~d), d, cia);
}
length--;
dest++;
}
comp_func_solid_SourceOut_dsp_asm_x2(dest, length, color, const_alpha);
}
void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
{
if (length%2 > 0) {
if (const_alpha == 255) {
dest[0] = BYTE_MUL(src[0], qAlpha(~dest[0]));
} else {
int cia = 255 - const_alpha;
uint s = BYTE_MUL(src[0], const_alpha);
uint d = dest[0];
dest[0] = INTERPOLATE_PIXEL_255(s, qAlpha(~d), d, cia);
}
length--;
dest++;
src++;
}
comp_func_SourceOut_dsp_asm_x2(dest, src, length, const_alpha);
}
QT_END_NAMESPACE

File diff suppressed because it is too large Load Diff

View File

@ -52,6 +52,48 @@ extern "C" void qt_memfill32_asm_mips_dsp(quint32 *dest, quint32 value, int coun
extern "C" void comp_func_SourceOver_asm_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void comp_func_solid_DestinationOver_dsp_asm_x2(uint *dest, int length, uint color);
extern "C" void comp_func_solid_Source_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
extern "C" void comp_func_DestinationOver_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void comp_func_solid_SourceIn_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
extern "C" void comp_func_SourceIn_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void comp_func_solid_DestinationIn_dsp_asm_x2(uint *dest, int length, uint a);
extern "C" void comp_func_DestinationIn_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void comp_func_DestinationOut_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void comp_func_solid_SourceAtop_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
extern "C" void comp_func_SourceAtop_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void comp_func_solid_DestinationAtop_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
extern "C" void comp_func_DestinationAtop_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void comp_func_solid_XOR_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
extern "C" void comp_func_XOR_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void comp_func_solid_SourceOut_dsp_asm_x2(uint *dest, int length, uint color, uint const_alpha);
extern "C" void comp_func_SourceOut_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void comp_func_Source_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void qt_blend_argb32_on_argb32_mips_dsp_asm_x2(uint *dest, const uint *src, int length, uint const_alpha);
extern "C" void qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm(uint *dest, const uint *src, int length);
extern "C" uint * destfetchARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
extern "C" uint * qt_destStoreARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
@ -71,6 +113,46 @@ uint * QT_FASTCALL qt_destFetchARGB32_mips_dsp(uint *buffer,
void QT_FASTCALL qt_destStoreARGB32_mips_dsp(QRasterBuffer *rasterBuffer, int x, int y,
const uint *buffer, int length);
void QT_FASTCALL comp_func_solid_Source_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_solid_SourceOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_solid_DestinationOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_solid_SourceOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_solid_DestinationOver_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_DestinationOver_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
void QT_FASTCALL comp_func_solid_SourceIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_SourceIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
void QT_FASTCALL comp_func_solid_DestinationIn_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_DestinationIn_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
void QT_FASTCALL comp_func_solid_DestinationOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_DestinationOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
void QT_FASTCALL comp_func_solid_SourceAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_SourceAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
void QT_FASTCALL comp_func_solid_DestinationAtop_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_DestinationAtop_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
void QT_FASTCALL comp_func_solid_XOR_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_solid_SourceOut_mips_dsp(uint *dest, int length, uint color, uint const_alpha);
void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
#endif // QT_COMPILER_SUPPORTS_MIPS_DSP

View File

@ -41,59 +41,6 @@
#include "qt_mips_asm_dsp.h"
LEAF_MIPS_DSPR2(INTERPOLATE_PIXEL_255_asm_mips_dspr2)
/*
* a0 - uint x (First value to multiply)
* a1 - uint a (Multiplicator byte for first value)
* a2 - uint y (Second value to multiply)
* a3 - uint b (Multiplicator byte for second value)
*/
.set reorder
replv.ph a1, a1
replv.ph a3, a3
li t8, 8388736
muleu_s.ph.qbl t0, a0, a1
muleu_s.ph.qbl t1, a2, a3
muleu_s.ph.qbr t2, a0, a1
muleu_s.ph.qbr t3, a2, a3
addu.ph t4, t0, t1
addu.ph t5, t2, t3
preceu.ph.qbla t0, t4
addu t1, t0, t8
addu t1, t4, t1
preceu.ph.qbla t6, t5
addu t7, t6, t8
addu t7, t5, t7
precrq.qb.ph t2, t1, t7
move v0, t2
j ra
END(INTERPOLATE_PIXEL_255_asm_mips_dspr2)
LEAF_MIPS_DSPR2(BYTE_MUL_asm_mips_dspr2)
/*
* a0 - uint x (Value to multiply)
* a1 - uint a (Multiplicator byte)
*/
.set reorder
replv.ph a1, a1 /* a1 = 0x00a00a */
li t4, 8388736 /* t4 = 0x800080 */
muleu_s.ph.qbl t0, a0, a1
muleu_s.ph.qbr t2, a0, a1
preceu.ph.qbla t1, t0
addu t0, t0, t1
addu t0, t0, t4
preceu.ph.qbla t3, t2
addu t2, t2, t3
addu t2, t2, t4
precrq.qb.ph t4, t0, t2
move v0, t4
j ra
END(BYTE_MUL_asm_mips_dspr2)
LEAF_MIPS_DSPR2(qConvertRgb16To32_asm_mips_dspr2)
/*
* a0 - dst (a8r8g8b8)

View File

@ -110,4 +110,140 @@ LEAF_MIPS32R2(symbol) \
.end function; \
.size function,.-function
/*
* BYTE_MUL operation on two pixels (in_1 and in_2) with two
* multiplicator bytes, repl_a1 and repl_a2, which should be
* prepered with:
* replv.ph repl_a1, a1
* replv.ph repl_a2, a2
* to became such as:
* repl_a1 = | 00 | a1 | 00 | a1 |
* repl_a2 = | 00 | a2 | 00 | a2 |
*
* rounding_factor must have following value:
* li rounding_factor, 0x00800080
*
* scratch(n) - temporary registers
*
* in_const: 1 -> (default) causes that in_1, in_2
* registers will remain unchanged after usage
* 0 -> (or anything different then 1) causes
* that registers repl_a1, repl_a2 remain
* unchanged after usage
*/
.macro BYTE_MUL_x2 in_1, in_2, out_1, out_2 \
repl_a1, repl_a2, rounding_factor, \
scratch1, scratch2, scratch3, scratch4, \
in_const = 1
muleu_s.ph.qbl \scratch1, \in_1, \repl_a1
muleu_s.ph.qbr \scratch2, \in_1, \repl_a1
muleu_s.ph.qbl \scratch3, \in_2, \repl_a2
muleu_s.ph.qbr \scratch4, \in_2, \repl_a2
.if \in_const == 1
preceu.ph.qbla \repl_a1, \scratch1
preceu.ph.qbla \repl_a2, \scratch2
preceu.ph.qbla \out_1, \scratch3
preceu.ph.qbla \out_2, \scratch4
addu \scratch1, \repl_a1, \scratch1
addu \scratch2, \repl_a2, \scratch2
.else
preceu.ph.qbla \in_1, \scratch1
preceu.ph.qbla \in_2, \scratch2
preceu.ph.qbla \out_1, \scratch3
preceu.ph.qbla \out_2, \scratch4
addu \scratch1, \in_1, \scratch1
addu \scratch2, \in_2, \scratch2
.endif
addu \out_1, \out_1, \scratch3
addu \out_2, \out_2, \scratch4
addu \scratch1, \scratch1, \rounding_factor
addu \scratch2, \scratch2, \rounding_factor
addu \scratch3, \out_1, \rounding_factor
addu \scratch4, \out_2, \rounding_factor
precrq.qb.ph \out_1, \scratch1, \scratch2
precrq.qb.ph \out_2, \scratch3, \scratch4
.endm
/*
* BYTE_MUL operation on one pixel (in_1) with
* multiplicator byte, repl_a1, which should be
* prepered with:
* replv.ph repl_a1, a1
* to became such as:
* repl_a1 = | 00 | a1 | 00 | a1 |
*
* rounding_factor must have following value:
* li rounding_factor, 0x00800080
*
* scratch(n) - temporary registers
*/
.macro BYTE_MUL in_1, out_1, \
repl_a1, rounding_factor, \
scratch1, scratch2, scratch3, scratch4
muleu_s.ph.qbl \scratch1, \in_1, \repl_a1
muleu_s.ph.qbr \scratch2, \in_1, \repl_a1
preceu.ph.qbla \scratch3, \scratch1
preceu.ph.qbla \scratch4, \scratch2
addu \scratch1, \scratch1, \scratch3
addu \scratch1, \scratch1, \rounding_factor
addu \scratch2, \scratch2, \scratch4
addu \scratch2, \scratch2, \rounding_factor
precrq.qb.ph \out_1, \scratch1, \scratch2
.endm
/*
* macro for INTERPOLATE_PIXEL_255 operation
* in_1 - First value to multiply
* mul_1 - Multiplicator byte for first value
* in_2 - Second value to multiply
* mul_2 - Multiplicator byte for second value
* rounding_factor and andi_factor should be prepared
* as:
* li rounding_factor, 0x00800080
* li andi_factor, 0xff00ff00
* scratch(n) - temporary registers
*/
.macro INTERPOLATE_PIXEL_255 in_1, mul_1, \
in_2, mul_2, \
out_1, \
rounding_factor, andi_factor \
scratch1, scratch2, scratch3, scratch4
# x part
preceu.ph.qbra \scratch1, \in_1
preceu.ph.qbra \scratch2, \in_2
mul \scratch1, \scratch1, \mul_1
mul \scratch2, \scratch2, \mul_2
# x>>8 part
preceu.ph.qbla \scratch3, \in_1
preceu.ph.qbla \scratch4, \in_2
mul \scratch3, \scratch3, \mul_1
mul \scratch4, \scratch4, \mul_2
# x part
addu \scratch1, \scratch1, \scratch2
preceu.ph.qbla \scratch2, \scratch1
addu \scratch1, \scratch1, \scratch2
addu \scratch1, \scratch1, \rounding_factor
preceu.ph.qbla \scratch1, \scratch1
# x>>8 part
addu \scratch3, \scratch3, \scratch4
preceu.ph.qbla \scratch4, \scratch3
addu \scratch3, \scratch3, \scratch4
addu \scratch3, \scratch3, \rounding_factor
and \scratch3, \scratch3, \andi_factor
or \out_1, \scratch1, \scratch3
.endm
#endif //QT_MIPS_DSP_H__