MIPS DSP build system fix and additional optimizations.

Changed MIPS DSP portion of the mkspecs/features/simd.prf file in order
to fix the corrupted build system for MIPS platforms.

List of the additionally optimized functions

from file src/gui/painting/qdrawhelper.cpp:
- qt_blend_rgb16_on_rgb16
- qt_fetchUntransformed_888
- qt_fetchUntransformed_444
- qt_fetchUntransformed_argb8565

from file src/gui/image/qimage.cpp:
- convert_ARGB_to_ARGB_PM_inplace

from file src/corelib/qstring.cpp:
- ucstrncmp
- toLatin1_helper
- fromLatin1_helper

Change-Id: I5c47a69784917eee29a8dbd2718828a390b27c93
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
lpapuga 2013-11-20 17:09:57 +01:00 committed by The Qt Project
parent 48caaee170
commit f10356ead1
15 changed files with 2141 additions and 10 deletions

View File

@ -174,10 +174,19 @@ QT_CPU_FEATURES = $$eval(QT_CPU_FEATURES.$$QT_ARCH)
mips_dsp_assembler.name = assembling[mips_dsp] ${QMAKE_FILE_IN}
silent:mips_dsp_assembler.commands = @echo assembling[mips_dsp] ${QMAKE_FILE_IN} && $$mips_dsp_assembler.commands
QMAKE_EXTRA_COMPILERS += mips_dsp_compiler
QMAKE_EXTRA_COMPILERS += mips_dsp_assembler
}
mips_dspr2 {
HEADERS += $$MIPS_DSP_HEADERS
mips_dspr2_compiler.commands = $$QMAKE_CXX -c
mips_dspr2_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
mips_dspr2_compiler.dependency_type = TYPE_C
mips_dspr2_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
mips_dspr2_compiler.input = MIPS_DSPR2_SOURCES
mips_dspr2_compiler.variable_out = OBJECTS
mips_dspr2_compiler.name = compiling[mips_dspr2] ${QMAKE_FILE_IN}
silent:mips_dspr2_compiler.commands = @echo compiling[mips_dspr2] ${QMAKE_FILE_IN} && $$mips_dspr2_compiler.commands
mips_dspr2_assembler.commands = $$QMAKE_CC -c
mips_dspr2_assembler.commands += $(CFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
mips_dspr2_assembler.dependency_type = TYPE_C
@ -186,6 +195,7 @@ QT_CPU_FEATURES = $$eval(QT_CPU_FEATURES.$$QT_ARCH)
mips_dspr2_assembler.variable_out = OBJECTS
mips_dspr2_assembler.name = assembling[mips_dspr2] ${QMAKE_FILE_IN}
silent:mips_dspr2_assembler.commands = @echo assembling[mips_dspr2] ${QMAKE_FILE_IN} && $$mips_dspr2_assembler.commands
QMAKE_EXTRA_COMPILERS += mips_dspr2_compiler
QMAKE_EXTRA_COMPILERS += mips_dspr2_assembler
}
} else:win32-msvc*|winrt {
@ -297,7 +307,7 @@ QT_CPU_FEATURES = $$eval(QT_CPU_FEATURES.$$QT_ARCH)
$$AVX_SOURCES $$AVX2_SOURCES \
$$NEON_SOURCES $$NEON_ASM \
$$IWMMXT_SOURCES \
$$MIPS_DSP_SOURCES $$MIPS_DSP_ASM $$MIPS_DSPR2_ASM
$$MIPS_DSP_SOURCES $$MIPS_DSPR2_SOURCES $$MIPS_DSP_ASM $$MIPS_DSPR2_ASM
# Headers are already done in the above sections.
}

View File

@ -111,3 +111,17 @@ ctest_qt5_module_files.files += $$ctest_macros_file.output $$cmake_extras_mkspec
ctest_qt5_module_files.path = $$[QT_INSTALL_LIBS]/cmake/Qt5Core
INSTALLS += ctest_qt5_module_files cmake_qt5_umbrella_module_files
mips_dsp:*-g++* {
HEADERS += $$MIPS_DSP_HEADERS
mips_dsp_corelib_assembler.commands = $$QMAKE_CXX -c
mips_dsp_corelib_assembler.commands += $(CXXFLAGS) $(INCPATH) -mips32r2 -mdsp ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
mips_dsp_corelib_assembler.dependency_type = TYPE_C
mips_dsp_corelib_assembler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
mips_dsp_corelib_assembler.input = MIPS_DSP_ASM
mips_dsp_corelib_assembler.variable_out = OBJECTS
mips_dsp_corelib_assembler.name = assembling[mips_dsp] ${QMAKE_FILE_IN}
silent:mips_dsp_corelib_assembler.commands = @echo assembling[mips_dsp] ${QMAKE_FILE_IN} && $$mips_dsp_corelib_assembler.commands
QMAKE_EXTRA_COMPILERS += mips_dsp_corelib_assembler
}

View File

@ -188,9 +188,23 @@ static int ucstricmp(const ushort *a, const ushort *ae, const uchar *b, const uc
return 1;
}
#if defined(__mips_dsp)
// From qstring_mips_dsp_asm.S
extern "C" int qt_ucstrncmp_mips_dsp_asm(const ushort *a,
const ushort *b,
unsigned len);
#endif
// Unicode case-sensitive compare two same-sized strings
static int ucstrncmp(const QChar *a, const QChar *b, int l)
{
#if defined(__mips_dsp)
if (l >= 8) {
return qt_ucstrncmp_mips_dsp_asm(reinterpret_cast<const ushort*>(a),
reinterpret_cast<const ushort*>(b),
l);
}
#endif // __mips_dsp
while (l-- && *a == *b)
a++,b++;
if (l==-1)
@ -3937,6 +3951,10 @@ static inline __m128i mergeQuestionMarks(__m128i chunk)
}
#endif
#if defined(__mips_dsp)
extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const ushort *src, int length);
#endif
static QByteArray toLatin1_helper(const QChar *data, int length)
{
QByteArray ba;
@ -3989,10 +4007,14 @@ static QByteArray toLatin1_helper(const QChar *data, int length)
length = length % 8;
}
#endif
#if defined(__mips_dsp)
qt_toLatin1_mips_dsp_asm(dst, src, length);
#else
while (length--) {
*dst++ = (*src>0xff) ? '?' : (uchar) *src;
++src;
}
#endif
}
return ba;
}
@ -4104,6 +4126,12 @@ QVector<uint> QString::toUcs4() const
return v;
}
#if defined(__mips_dsp)
// From qstring_mips_dsp_asm.S
extern "C" void qt_fromlatin1_mips_asm_unroll4 (ushort*, const char*, uint);
extern "C" void qt_fromlatin1_mips_asm_unroll8 (ushort*, const char*, uint);
#endif
QString::Data *QString::fromLatin1_helper(const char *str, int size)
{
Data *d;
@ -4144,8 +4172,15 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
size = size % 16;
}
#endif
#if defined(__mips_dsp)
if (size > 20)
qt_fromlatin1_mips_asm_unroll8(dst, str, size);
else
qt_fromlatin1_mips_asm_unroll4(dst, str, size);
#else
while (size--)
*dst++ = (uchar)*str++;
#endif
}
return d;
}

View File

@ -0,0 +1,449 @@
/****************************************************************************
**
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://qt.digia.com/licensing. For further information
** use the contact form at http://qt.digia.com/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "../../gui/painting/qt_mips_asm_dsp_p.h"
.macro EXTRACT_HALVES_x2 src1, src2, hi_1, hi_2
#if defined(__MIPSEL) && __MIPSEL
srl \hi_1, \src1, 16
ext \src1, \src1, 0, 16
srl \hi_2, \src2, 16
ext \src2, \src2, 0, 16
#else
ext \hi_1, \src1, 0, 16
srl \src1, \src1, 16
ext \hi_2, \src2, 0, 16
srl \src2, \src2, 16
#endif
.endm
LEAF_MIPS_DSP(qt_ucstrncmp_mips_dsp_asm)
/*
* Arguments:
* a0 - string_a (uint16_t*)
* a1 - string_b (uint16_t*)
* a2 - length (uint32_t)
*
* Register usage:
* t0 - batches
*/
move v0, zero /* result = 0 */
andi t0, a0, 0x3 /* t1 = string_a % 4 */
andi t1, a1, 0x3 /* t0 = string_b % 4 */
or t2, t0, t1 /* t2 = t0 | t1 */
beqz t2, 5f /* both aligned */
and t2, t0 ,t1
beqz t2, 6f /* one aligned */
nop
/*
* Both strings are unaligned: read 1 halfword from each,
* then fall-off to continue with the both-aligned case.
*/
lhu t0, 0 (a0)
lhu t1, 0 (a1)
addiu a2, a2, -1 /* len-- */
sub v0, t0, t1 /* v0 = t0-t1 */
addiu a0, a0, 2 /* string_a++ */
bnez v0, 0f /* if (t0-t1): return */
addiu a1, a1, 2 /* string_b++ */
beqz a2, 0f /* if !len: return */
/* next instruction (srl) fills delay branch slot */
5: /* Both string pointers are aligned */
srl t0, a2, 3 /* batches = length / 8 */
beqz t0, 9f /* if !batches: tail */
andi a2, a2, 0x7 /* length = length % 8 */
SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
1: lw t1, 0 (a0) /* [a0 a1] */
lw t3, 4 (a0) /* [a2 a3] */
lw t5, 8 (a0) /* [a4 a5] */
lw t7, 12 (a0) /* [a6 a7] */
lw t2, 0 (a1) /* [b0 b1] */
lw t4, 4 (a1) /* [b2 b3] */
lw t6, 8 (a1) /* [b4 b5] */
lw t8, 12 (a1) /* [b6 b7] */
/*
* Subtract elements one by one, if the result is zero
* both halves of the registers (shorts) are equal.
*/
subq.ph s0, t1, t2 /* [a0-b0 a1-b1] */
subq.ph s1, t3, t4 /* [a2-b2 a3-b3] */
bnez s0, 1f
subq.ph s2, t5, t6 /* [a4-b4 a5-b5] */
bnez s1, 2f
subq.ph s3, t7, t8 /* [a6-b6 a7-b7] */
bnez s2, 3f
addiu t0, t0, -1 /* batches-- */
bnez s3, 4f
addiu a0, a0, 8*2 /* string_a += 8 */
bnez t0, 1b /* if batches: loop */
addiu a1, a1, 8*2 /* string_b += 8 */
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
bnez a2, 9f /* if length: tail */
nop
jr ra
nop
1: /* Check t1 [a0 a1] vs. t2 [b0 b1] */
EXTRACT_HALVES_x2 t1, t2, t3, t4 /* a0, b0, a1, b1 */
sub v0, t1, t2
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
bnez v0, 0f /* if (a0-b0): return */
nop
jr ra
sub v0, t3, t4 /* return a1-b1 */
2: /* Check t3 [a2 a3] vs. t4 [b2 b3] */
EXTRACT_HALVES_x2 t3, t4, t1, t2 /* a2, b2, a3, b3 */
sub v0, t3, t4
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
bnez v0, 0f /* if (a2-b2): return */
nop
jr ra
sub v0, t1, t2 /* return a3-b3 */
3: /* Check t5 [a4 a5] vs. t6 [b4 b5] */
EXTRACT_HALVES_x2 t5, t6, t1, t2 /* a4, b4, a5, b5 */
sub v0, t5, t6
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
bnez v0, 0f /* if (a4-b4): return */
nop
jr ra
sub v0, t1, t2 /* return a5-b5 */
4: /* Check t7 [a6 a7] vs. t8 [b6 b7] */
EXTRACT_HALVES_x2 t7, t8, t1, t2 /* a6, b6, a7, b7 */
sub v0, t7, t8
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
bnez v0, 0f /* if (a6-b6): return */
nop
jr ra
sub v0, t1, t2
/* Process remaining tail items. */
9: lhu t1, 0 (a0) /* a */
lhu t2, 0 (a1) /* b */
addiu a2, a2, -1 /* length-- */
sub v0, t1, t2 /* result = (a - b) */
bnez v0, 0f /* if (a - b): return */
addiu a0, a0, 2 /* string_a++ */
bnez a2, 9b /* if length: loop */
addiu a1, a1, 2 /* string_b++ */
0: jr ra
nop
/* One of the inputs is unaligned, do unrolled half-word loads */
6: srl t0, a2, 3 /* batches = length / 8 */
andi a2, a2, 0x7 /* length = length % 8 */
1: lhu t1, 0 (a0)
lhu t2, 0 (a1)
lhu t3, 2 (a0)
lhu t4, 2 (a1)
lhu t5, 4 (a0)
lhu t6, 4 (a1)
lhu t7, 6 (a0)
lhu t8, 6 (a1)
sub v0, t1, t2
sub t1, t3, t4
bnez v0, 0f
sub t2, t5, t6
bnez t1, 2f
sub t3, t7, t8
bnez t2, 3f
lhu t1, 8 (a0)
bnez t3, 4f
lhu t2, 8 (a1)
lhu t3, 10 (a0)
lhu t4, 10 (a1)
lhu t5, 12 (a0)
lhu t6, 12 (a1)
lhu t7, 14 (a0)
lhu t8, 14 (a1)
sub v0, t1, t2
sub t1, t3, t4
bnez v0, 0f
sub t2, t5, t6
bnez t1, 2f
sub t3, t7, t8
bnez t2, 3f
addiu t0, t0, -1 /* batches-- */
bnez t3, 4f
addiu a0, a0, 8*2 /* string_a += 8 */
bnez t0, 1b
addiu a1, a1, 8*2 /* string_b += 8 */
bnez a2, 9b /* if length: tail */
nop
0: jr ra
nop
2: jr ra
move v0, t1
3: jr ra
move v0, t2
4: jr ra
move v0, t3
END(qt_ucstrncmp_mips_dsp_asm)
#if defined(__mips_dspr2)
LEAF_MIPS_DSPR2(qt_fromlatin1_mips_asm_unroll8)
#else
LEAF_MIPS_DSP(qt_fromlatin1_mips_asm_unroll8)
#endif
andi t0, a1, 0x3
beqz t0, 9f /* check that src is aligned */
nop
1: lbu t1, 0 (a1)
addiu a1, a1, 1
addiu a2, a2, -1
sh t1, 0 (a0)
beqz a2, 0f
andi t0, a1, 0x3
bnez t0, 1b
addiu a0, a0, 2
9: /* source pointer is aligned: do batches of 8 elements */
andi t0, a0, 3 /* check if dst is aligned */
bnez t0, 6f
srl t0, a2, 3 /* batches = len / 8 */
andi a2, a2, 0x7 /* tail = len % 8 */
beqz t0, 8f /* if !batches: tail */
nop
1: lw t1, 0 (a1)
lw t2, 4 (a1)
addiu a1, a1, 8*1
addiu t0, t0, -1
preceu.ph.qbl t3, t1
preceu.ph.qbr t1, t1
preceu.ph.qbl t4, t2
preceu.ph.qbr t2, t2
#if defined(__MIPSEL) && __MIPSEL
sw t1, 0 (a0)
sw t3, 4 (a0)
sw t2, 8 (a0)
sw t4, 12 (a0)
#else
sw t3, 0 (a0)
sw t1, 4 (a0)
sw t4, 8 (a0)
sw t2, 12 (a0)
#endif
bnez t0, 1b
addiu a0, a0, 8*2
8: /* process tail items */
beqz a2, 0f
nop
1: lbu t1, 0 (a1)
addiu a2, a2, -1
sh t1, 0 (a0)
addiu a1, a1, 1
bnez a2, 1b
addiu a0, a0, 2
0: jr ra
nop
6: beqz t0, 8b
andi a2, a2, 7
7: lw t1, 0(a1)
lw t2, 4(a1)
addiu t0, t0, -1
addiu a1, a1, 8
andi t3, t1, 0xff
#if defined(__mips_dspr2)
prepend t1, t2, 8
#else
sll t4, t4, 24
srl t1, t1, 8
or t1, t1, t4
#endif
srl t2, t2, 8
preceu.ph.qbr t4, t1
preceu.ph.qbl t1, t1
preceu.ph.qbr t5, t2
srl t2, t2, 16
sh t3, 0(a0)
sw t4, 2(a0)
sw t1, 6(a0)
sw t5, 10(a0)
sh t2, 14(a0)
bnez t0, 7b
addiu a0, a0, 16
bnez a2, 1b
nop
jr ra
nop
END(qt_fromlatin1_mips_asm_unroll8)
LEAF_MIPS_DSP(qt_fromlatin1_mips_asm_unroll4)
/*
* Arguments:
* a0 - dst (uint16_t*)
* a1 - src (const char*)
* a2 - len (unsigned int)
*/
/*
* QString::fromLatin1_helper() already handles the len==0
* case: assume that len is never zero.
*/
srl t0, a2, 2
beqz t0, 9f
andi a2, a2, 0x3
1: lbu t1, 0(a1)
lbu t2, 1(a1)
lbu t3, 2(a1)
lbu t4, 3(a1)
sh t1, 0(a0)
sh t2, 2(a0)
sh t3, 4(a0)
sh t4, 6(a0)
addiu t0, t0, -1
addiu a1, a1, 4
bnez t0, 1b
addiu a0, a0, 8
8: beqz a2, 0f
nop
9: lbu t1, 0(a1)
addiu a2, a2, -1
addiu a1, a1, 1
sh t1, 0(a0)
bnez a2, 9b
addiu a0, a0, 2
0: jr ra
nop
END(qt_fromlatin1_mips_asm_unroll4)
LEAF_MIPS_DSP(qt_toLatin1_mips_dsp_asm)
/*
* a0 - dst
* a1 - src
* a2 - length
*/
addiu t9, zero, 0x3f
srl t8, a2, 2
beqz t8, 2f
andi a2, a2, 3
1:
lhu t0, 0(a1)
lhu t1, 2(a1)
lhu t2, 4(a1)
lhu t3, 6(a1)
srl t4, t0, 8
srl t5, t1, 8
srl t6, t2, 8
srl t7, t3, 8
movn t0, t9, t4
movn t1, t9, t5
movn t2, t9, t6
movn t3, t9, t7
addiu a1, a1, 8
addiu t8, t8, -1
sb t0, 0(a0)
sb t1, 1(a0)
sb t2, 2(a0)
sb t3, 3(a0)
bgtz t8, 1b
addiu a0, a0, 4
2: beqz a2, 4f
nop
3:
lhu t0, 0(a1)
addiu a1, a1, 2
addiu a2, a2, -1
srl t1, t0, 8
movn t0, t9, t1
sb t0, 0(a0)
bgtz a2, 3b
addiu a0, a0, 1
4:
jr ra
nop
END(qt_toLatin1_mips_dsp_asm)

View File

@ -192,3 +192,7 @@ INCLUDEPATH += ../3rdparty/md5 \
!macx-icc:!vxworks:unix:LIBS_PRIVATE += -lm
TR_EXCLUDE += ../3rdparty/*
# MIPS DSP
MIPS_DSP_ASM += tools/qstring_mips_dsp_asm.S
MIPS_DSP_HEADERS += ../gui/painting/qt_mips_asm_dsp_p.h

View File

@ -78,3 +78,5 @@ NEON_SOURCES += image/qimage_neon.cpp
SSE2_SOURCES += image/qimage_sse2.cpp
SSSE3_SOURCES += image/qimage_ssse3.cpp
AVX_SOURCES += image/qimage_avx.cpp
MIPS_DSPR2_SOURCES += image/qimage_mips_dspr2.cpp
MIPS_DSPR2_ASM += image/qimage_mips_dspr2_asm.S

View File

@ -3960,6 +3960,12 @@ void qInitImageConversions()
return;
}
#endif
#ifdef QT_COMPILER_SUPPORTS_MIPS_DSPR2
extern bool convert_ARGB_to_ARGB_PM_inplace_mips_dspr2(QImageData *data, Qt::ImageConversionFlags);
inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_mips_dspr2;
return;
#endif
}
extern const uchar *qt_pow_rgb_gamma();

View File

@ -0,0 +1,69 @@
/****************************************************************************
**
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://qt.digia.com/licensing. For further information
** use the contact form at http://qt.digia.com/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "qimage.h"
#include <private/qimage_p.h>
QT_BEGIN_NAMESPACE
// Defined in qimage_mips_dspr2_asm.S
//
extern "C" void premultiply_argb_inplace_mips_asm(void*, unsigned, unsigned, int);
bool convert_ARGB_to_ARGB_PM_inplace_mips_dspr2(QImageData *data, Qt::ImageConversionFlags)
{
Q_ASSERT(data->format == QImage::Format_ARGB32);
if (!data->width || !data->height)
return true;
Q_ASSERT((data->bytes_per_line - (data->width << 2)) >= 0);
premultiply_argb_inplace_mips_asm(data->data,
data->height,
data->width,
data->bytes_per_line - (data->width << 2));
data->format = QImage::Format_ARGB32_Premultiplied;
return true;
}
QT_END_NAMESPACE

View File

@ -0,0 +1,207 @@
/****************************************************************************
**
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtGui module of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:LGPL$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and Digia. For licensing terms and
** conditions see http://qt.digia.com/licensing. For further information
** use the contact form at http://qt.digia.com/contact-us.
**
** GNU Lesser General Public License Usage
** Alternatively, this file may be used under the terms of the GNU Lesser
** General Public License version 2.1 as published by the Free Software
** Foundation and appearing in the file LICENSE.LGPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU Lesser General Public License version 2.1 requirements
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
**
** In addition, as a special exception, Digia gives you certain additional
** rights. These rights are described in the Digia Qt LGPL Exception
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3.0 as published by the Free Software
** Foundation and appearing in the file LICENSE.GPL included in the
** packaging of this file. Please review the following information to
** ensure the GNU General Public License version 3.0 requirements will be
** met: http://www.gnu.org/copyleft/gpl.html.
**
**
** $QT_END_LICENSE$
**
****************************************************************************/
#include "../painting/qt_mips_asm_dsp_p.h"
LEAF_MIPS_DSPR2(premultiply_argb_inplace_mips_asm)
SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
3: srl v1, a2, 3 /* t1 = linelen / 8 */
addiu a1, a1, -1 /* numlines-- */
beqz v1, 1f /* if (!(linelen / 8)): tail */
andi v0, a2, 0x7 /* v0 = linelen % 8 */
pref 5, 0 (a0) /* cache-hint: store-streamed */
/* unrolled loop, handles (v1 = len / 8) batches of 8 pixels */
2: addiu v1, v1, -1
pref 5, 0(a0)
pref 5, 32(a0)
lw t0, 0(a0)
lw t1, 4(a0)
lw t2, 8(a0)
lw t3, 12(a0)
srl t4, t0, 24 /* 00|00|00|A1 */
replv.ph t5, t4 /* 00|A1|00|A1 */
srl t6, t1, 24 /* 00|00|00|A2 */
replv.ph t7, t6 /* 00|A2|00|A2 */
muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */
muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */
muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */
muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */
srl t5, t2, 24 /* 00|00|00|A3 */
replv.ph s0, t5 /* 00|A3|00|A3 */
srl t7, t3, 24 /* 00|00|00|A4 */
replv.ph s1, t7 /* 00|A4|00|A4 */
muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */
muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */
muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */
muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */
preceu.ph.qbla s1, t8
preceu.ph.qbla s3, t0
addu.ph t8, t8, s1
addu.ph t0, t0, s3
preceu.ph.qbla s1, t9
preceu.ph.qbla s3, t1
addu.ph t9, t9, s1
addu.ph t1, t1, s3
preceu.ph.qbla s1, s2
preceu.ph.qbla s3, t2
addu.ph s2, s2, s1
addu.ph t2, t2, s3
preceu.ph.qbla s1, s0
preceu.ph.qbla s3, t3
addu.ph s0, s0, s1
addu.ph t3, t3, s3
shra_r.ph t8, t8, 8 /* xxAA1|xxRR1 */
shra_r.ph t0, t0, 8 /* xxBB1|xxGG1 */
shra_r.ph t9, t9, 8
shra_r.ph t1, t1, 8
shra_r.ph s2, s2, 8
shra_r.ph t2, t2, 8
shra_r.ph s0, s0, 8
shra_r.ph t3, t3, 8
precr.qb.ph t0, t8, t0
precr.qb.ph t1, t9, t1
precr.qb.ph t2, s2, t2
precr.qb.ph t3, s0, t3
append t4, t0, 24
append t6, t1, 24
append t5, t2, 24
append t7, t3, 24
sw t4, 0(a0)
sw t6, 4(a0)
sw t5, 8(a0)
sw t7, 12(a0)
lw t0, 16(a0)
lw t1, 20(a0)
lw t2, 24(a0)
lw t3, 28(a0)
srl t4, t0, 24 /* 00|00|00|A1 */
replv.ph t5, t4 /* 00|A1|00|A1 */
srl t6, t1, 24 /* 00|00|00|A2 */
replv.ph t7, t6 /* 00|A2|00|A2 */
muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */
muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */
muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */
muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */
srl t5, t2, 24 /* 00|00|00|A3 */
replv.ph s0, t5 /* 00|A3|00|A3 */
srl t7, t3, 24 /* 00|00|00|A4 */
replv.ph s1, t7 /* 00|A4|00|A4 */
muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */
muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */
muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */
muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */
preceu.ph.qbla s1, t8
preceu.ph.qbla s3, t0
addu.ph t8, t8, s1
addu.ph t0, t0, s3
preceu.ph.qbla s1, t9
preceu.ph.qbla s3, t1
addu.ph t9, t9, s1
addu.ph t1, t1, s3
preceu.ph.qbla s1, s2
preceu.ph.qbla s3, t2
addu.ph s2, s2, s1
addu.ph t2, t2, s3
preceu.ph.qbla s1, s0
preceu.ph.qbla s3, t3
addu.ph s0, s0, s1
addu.ph t3, t3, s3
shra_r.ph t8, t8, 8 /* xxAA1|xxRR1 */
shra_r.ph t0, t0, 8 /* xxBB1|xxGG1 */
shra_r.ph t9, t9, 8
shra_r.ph t1, t1, 8
shra_r.ph s2, s2, 8
shra_r.ph t2, t2, 8
shra_r.ph s0, s0, 8
shra_r.ph t3, t3, 8
precr.qb.ph t0, t8, t0
precr.qb.ph t1, t9, t1
precr.qb.ph t2, s2, t2
precr.qb.ph t3, s0, t3
append t4, t0, 24
append t6, t1, 24
append t5, t2, 24
append t7, t3, 24
sw t4, 16(a0)
sw t6, 20(a0)
sw t5, 24(a0)
sw t7, 28(a0)
bgtz v1, 2b /* if (t1): unrolled loop */
addiu a0, a0, 32 /* data += 8 */
beqz v0, 4f /* if (!v0): skip tail loop */
nop
/* tail loop, handles (len < 8), one pixel at a time */
1: lw t1, 0 (a0)
addiu v0, v0, -1 /* len-- */
srl t2, t1, 24 /* t2 = alpha */
replv.ph t3, t2
muleu_s.ph.qbl t4, t1, t3
muleu_s.ph.qbr t1, t1, t3
preceu.ph.qbla t3, t4
preceu.ph.qbla t5, t1
addu.ph t4, t4, t3
addu.ph t1, t1, t5
shra_r.ph t4, t4, 8
shra_r.ph t1, t1, 8
precr.qb.ph t1, t4, t1
append t2, t1, 24
sw t2, 0(a0)
bgtz v0, 1b
addiu a0, a0, 4 /* src++ */
4: bnez a1, 3b /* if (numlines): loop */
addu a0, a0, a3 /* src += srclineskip */
0: /* return */
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
jr ra
nop
END(premultiply_argb_inplace_mips_asm)

View File

@ -1790,7 +1790,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
return buffer;
}
static const SourceFetchProc sourceFetch[NBlendTypes][QImage::NImageFormats] = {
static SourceFetchProc sourceFetch[NBlendTypes][QImage::NImageFormats] = {
// Untransformed
{
0, // Invalid
@ -6402,6 +6402,21 @@ void qInitDrawhelperAsm()
destStoreProc[QImage::Format_ARGB32] = qt_destStoreARGB32_mips_dsp;
sourceFetch[BlendUntransformed][QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
sourceFetch[BlendTiled][QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
sourceFetch[BlendUntransformed][QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
sourceFetch[BlendTiled][QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
sourceFetch[BlendUntransformed][QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
sourceFetch[BlendTiled][QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
#if defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dspr2;
#else
qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dsp;
#endif // QT_COMPILER_SUPPORTS_MIPS_DSPR2
#endif // QT_COMPILER_SUPPORTS_MIPS_DSP
if (functionForModeSolidAsm) {
const int destinationMode = QPainter::CompositionMode_Destination;

View File

@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtGui module of the Qt Toolkit.
@ -110,6 +110,78 @@ void qt_blend_rgb32_on_rgb32_mips_dsp(uchar *destPixels, int dbpl,
}
}
#if defined QT_COMPILER_SUPPORTS_MIPS_DSPR2
void qt_blend_rgb16_on_rgb16_mips_dspr2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
if (const_alpha == 256) {
if (w < 256) {
const quint16 *src = (const quint16*) srcPixels;
quint16 *dst = (quint16*) destPixels;
for (int y = 0; y < h; ++y) {
qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm(dst, src, w);
dst = (quint16*) (((uchar*) dst) + dbpl);
src = (quint16*) (((uchar*) src) + sbpl);
}
}
else {
int length = w << 1;
while (h--) {
memcpy(destPixels, srcPixels, length);
destPixels += dbpl;
srcPixels += sbpl;
}
}
}
else if (const_alpha != 0) {
const quint16 *src = (const quint16*) srcPixels;
quint16 *dst = (quint16*) destPixels;
for (int y = 0; y < h; ++y) {
qt_blend_rgb16_on_rgb16_mips_dspr2_asm(dst, src, w, const_alpha);
dst = (quint16*) (((uchar*) dst) + dbpl);
src = (quint16*) (((uchar*) src) + sbpl);
}
}
}
#else
void qt_blend_rgb16_on_rgb16_mips_dsp(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha)
{
if (const_alpha == 256) {
if (w < 256) {
const quint16 *src = (const quint16*) srcPixels;
quint16 *dst = (quint16*) destPixels;
for (int y = 0; y < h; ++y) {
qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm(dst, src, w);
dst = (quint16*) (((uchar*) dst) + dbpl);
src = (quint16*) (((uchar*) src) + sbpl);
}
}
else {
int length = w << 1;
while (h--) {
memcpy(destPixels, srcPixels, length);
destPixels += dbpl;
srcPixels += sbpl;
}
}
}
else if (const_alpha != 0) {
const quint16 *src = (const quint16*) srcPixels;
quint16 *dst = (quint16*) destPixels;
for (int y = 0; y < h; ++y) {
qt_blend_rgb16_on_rgb16_mips_dsp_asm(dst, src, w, const_alpha);
dst = (quint16*) (((uchar*) dst) + dbpl);
src = (quint16*) (((uchar*) src) + sbpl);
}
}
}
#endif
void comp_func_Source_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
{
if (const_alpha == 255) {
@ -422,5 +494,28 @@ void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int l
comp_func_SourceOut_dsp_asm_x2(dest, src, length, const_alpha);
}
const uint * QT_FASTCALL qt_fetchUntransformed_888_mips_dsp (uint *buffer, const Operator *, const QSpanData *data,
int y, int x, int length)
{
uchar *line = (uchar *)data->texture.scanLine(y) + x;
fetchUntransformed_888_asm_mips_dsp(buffer, line, length);
return buffer;
}
const uint * QT_FASTCALL qt_fetchUntransformed_444_mips_dsp (uint *buffer, const Operator *, const QSpanData *data,
int y, int x, int length)
{
uchar *line = (uchar *)data->texture.scanLine(y) + x;
fetchUntransformed_444_asm_mips_dsp(buffer, line, length);
return buffer;
}
const uint * QT_FASTCALL qt_fetchUntransformed_argb8565_premultiplied_mips_dsp (uint *buffer, const Operator *, const QSpanData *data,
int y, int x, int length)
{
uchar *line = (uchar *)data->texture.scanLine(y) + x;
fetchUntransformed_argb8565_premultiplied_asm_mips_dsp(buffer, line, length);
return buffer;
}
QT_END_NAMESPACE

View File

@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtGui module of the Qt Toolkit.
@ -1601,3 +1601,479 @@ LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm)
nop
END(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm)
#if defined(__MIPSEL) && __MIPSEL
# define PACK(r, s, t) packrl.ph r, s, t
# define SWHI(r, o, b) swl r, o + 1 (b)
# define SWLO(r, o, b) swr r, o + 0 (b)
# define LDHI(r, o, b) lwl r, o + 1 (b)
# define LDLO(r, o, b) lwr r, o + 2 (b)
#else
# define PACK(r, s, t) packrl.ph r, t, s
# define SWHI(r, o, b) swr r, o + 1 (b)
# define SWLO(r, o, b) swl r, o + 0 (b)
# define LDHI(r, o, b) lwr r, o + 1 (b)
# define LDLO(r, o, b) lwl r, o + 2 (b)
#endif
LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm)
/*
* a0 - dst (*r5g6b5)
* a1 - src (const *r5g6b5)
* a2 - len (unsigned int)
*
* Register usage:
* t0-3 - Scratch registers
* t4 - Number of iterations to do in unrolled loops
* t5-7 - Auxiliary scratch registers.
*
* Check if base addresses of src/dst are aligned, cases:
* a) Both aligned.
* b) Both unaligned:
* 1. Copy a halfword
* 2. Use aligned case.
* c) dst aligned, src unaligned:
* 1. Read a word from dst, halfword from src.
* 2. Continue reading words from both.
* d) dst unaligned, src aligned:
* 1. Read a word from src, halfword from dst.
* 2. Continue reading words from both.
*/
beqz a2, 0f /* if (a2:len == 0): return */
andi t0, a0, 0x3 /* t0 = a0:dst % 4 */
andi t1, a1, 0x3 /* t1 = a1:dst % 4 */
or t2, t0, t1 /* t1 = t0 | t1 */
beqz t2, 4f /* both aligned */
nop
beqz t0, 3f /* dst aligned, src unaligned */
nop
beqz t1, 2f /* src aligned, dst unaligned */
nop
/*
* Both src/dst are unaligned: read 1 halfword from each,
* the fall-off to continue with word-aligned copy.
*/
lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */
addiu a1, a1, 2 /* src++ */
addiu a2, a2,-1 /* len-- */
sh t0, 0 (a0) /* t1 -> ((uint16_t*) dst)[0] */
addiu a0, a0, 2 /* dst++ */
/*
* Both src/dst pointers are word-aligned, process eight
* items at a time in an unrolled loop.
*/
4: beqz a2, 0f /* if (len == 0): return */
srl t4, a2, 3 /* t4 = len / 8 */
beqz t4, 5f /* if (t4 == 0): tail */
andi a2, a2, 0x07 /* len = len % 8 */
1: lw t0, 0 (a1)
lw t1, 4 (a1)
lw t2, 8 (a1)
lw t3, 12 (a1)
addiu t4, t4, -1 /* t4-- */
addiu a1, a1, 16 /* src += 8 */
sw t0, 0 (a0)
sw t1, 4 (a0)
sw t2, 8 (a0)
sw t3, 12 (a0)
bnez t4, 1b
addiu a0, a0, 16 /* dst += 8 */
b 5f
nop
/*
* dst pointer is unaligned
*/
2: beqz a2, 0f /* if (len == 0): return */
srl t4, a2, 3 /* t4 = len / 8 */
beqz t4, 5f /* if (t4 == 0): tail */
andi a2, a2, 0x07 /* len = len % 8 */
1: lw t0, 0 (a1)
lw t1, 4 (a1)
lw t2, 8 (a1)
lw t3, 12 (a1)
addiu t4, t4, -1 /* t4-- */
addiu a1, a1, 16 /* src += 8 */
SWLO (t0, 0, a0)
PACK (t5, t1, t0)
PACK (t6, t2, t1)
PACK (t7, t3, t2)
SWHI (t3, 14, a0)
sw t5, 2 (a0)
sw t6, 6 (a0)
sw t7, 10 (a0)
bnez t4, 1b
addiu a0, a0, 16 /* dst += 8 */
b 5f
nop
/*
* src pointer is unaligned
*/
3: beqz a2, 0f /* if (len == 0): return */
srl t4, a2, 3 /* t4 = len / 8 */
beqz t4, 5f /* if (t4 == 0): tail */
andi a2, a2, 0x07 /* len = len % 8 */
1: LDHI (t0, 0, a1)
lw t1, 2 (a1)
lw t2, 6 (a1)
lw t3, 10 (a1)
LDLO (t5, 12, a1)
addiu t4, t4, -1 /* t4-- */
addiu a1, a1, 16 /* src += 8 */
PACK (t0, t1, t0)
PACK (t6, t2, t1)
PACK (t7, t3, t2)
sw t0, 0 (a0)
PACK (t0, t5, t3)
sw t6, 4 (a0)
sw t7, 8 (a0)
sw t0, 12 (a0)
bnez t4, 1b
addiu a0, a0, 16 /* dst += 8 */
5: /* Process remaining items (a2:len < 4), one at a time */
beqz a2, 0f
nop
1: lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */
addiu a2, a2,-1 /* len-- */
addiu a1, a1, 2 /* src++ */
sh t0, 0 (a0) /* to -> ((uint16_t*) dst)[0] */
bnez a2, 1b /* if (len != 0): loop */
addiu a0, a0, 2 /* dst++ */
0: jr ra
nop
END(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm)
#undef LDHI
#undef LDLO
#undef PACK
#undef SWHI
#undef SWLO
LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_mips_dsp_asm)
/*
* a0 - dst (*r5g6b5)
* a1 - src (const *r5g6b5)
* a2 - len (unsigned int) - batch length
* a3 - alpha (int)
*/
beqz a2, 2f
li t9, 255
sll t8, a3, 8
subu a3, t8, a3
srl a3, a3, 8
subu t9, t9, a3
addiu a3, a3, 1
srl t4, a3, 2
addiu t9, t9, 1
srl t5, t9, 2
1:
lhu t0, 0(a1)
lhu t1, 0(a0)
addiu a2, a2, -1
andi t2, t0, 0x07e0
andi t0, t0, 0xf81f
mul t2, t2, a3
mul t0, t0, t4
andi t3, t1, 0x07e0
andi t1, t1, 0xf81f
mul t3, t3, t9
mul t1, t1, t5
addiu a1, a1, 2
srl t2, t2, 8
srl t0, t0, 6
andi t2, t2, 0x07e0
andi t0, t0, 0xf81f
or t0, t0, t2
srl t3, t3, 8
srl t1, t1, 6
andi t3, t3, 0x07e0
andi t1, t1, 0xf81f
or t1, t1, t3
addu t0, t0, t1
sh t0, 0(a0)
bgtz a2, 1b
addiu a0, a0, 2
2:
jr ra
nop
END(qt_blend_rgb16_on_rgb16_mips_dsp_asm)
LEAF_MIPS_DSP(fetchUntransformed_888_asm_mips_dsp)
/*
* a0 - dst address (address of 32-bit aRGB value)
* a1 - src address
* a2 - length
*/
beqz a2, 4f
lui t8, 0xff00
andi t0, a2, 0x1
beqz t0, 1f
nop
/* case for one pixel */
lbu t1, 0(a1)
lbu v1, 2(a1)
lbu t0, 1(a1)
addiu a1, a1, 3
addiu a2, a2, -1
sll t1, t1, 0x10
or v1, v1, t8
sll t0, t0, 0x8
or v1, v1, t1
or v1, v1, t0
sw v1, 0(a0)
addiu a0, a0, 4
beqz a2, 4f /* only one pixel is present (length = 1) */
nop
1:
andi t0, a1, 0x1
beqz t0, 3f
nop
2:
lbu t0, 0(a1) /* t0 = | 0 | 0 | 0 | R1 | */
lhu t1, 1(a1) /* t1 = | 0 | 0 | B1 | G1 | */
addiu a1, a1, 3
lhu t2, 0(a1) /* t2 = | 0 | 0 | G2 | R2 | */
lbu t3, 2(a1) /* t3 = | 0 | 0 | 0 | B2 | */
sll t0, t0, 16
or t0, t0, t8 /* t0 = | ff | R1 | 0 | 0 | */
shll.ph t4, t1, 8 /* t4 = | 0 | 0 | G1 | 0 | */
srl t5, t1, 8
or t4, t4, t5 /* t4 = | 0 | 0 | G1 | B1 | */
or t0, t0, t4 /* t0 = | ff | R1 | G1 | B1 | */
shll.ph t4, t2, 8 /* t4 = | 0 | 0 | R2 | 0 | */
srl t5, t2, 8 /* t5 = | 0 | 0 | 0 | G2 | */
or t4, t4, t5
sll t4, t4, 8 /* t4 = | 0 | R2 | G2 | 0 | */
or t5, t3, t8
or t2, t4, t5 /* t2 = | ff | R2 | G2 | B2 | */
sw t0, 0(a0)
addiu a1, a1, 3
sw t2, 4(a0)
addiu a2, a2, -2
bnez a2, 2b
addiu a0, a0, 8
b 4f
nop
3:
lhu t0, 0(a1) /* t0 = | 0 | 0 | G1 | R1 | */
lbu t1, 2(a1) /* t1 = | 0 | 0 | 0 | B1 | */
addiu a1, a1, 3
lbu t2, 0(a1) /* t2 = | 0 | 0 | 0 | R2 | */
lhu t3, 1(a1) /* t3 = | 0 | 0 | B2 | G2 | */
srl t4, t0, 8 /* t4 = | 0 | 0 | 0 | G1 | */
shll.ph t5, t0, 8 /* t5 = | 0 | 0 | R1 | 0 | */
or t0, t4, t5
sll t6, t0, 8 /* t6 = | 0 | R1 | G1 | 0 | */
or t4, t1, t8 /* t4 = | ff | 0 | 0 | B1 | */
or t0, t6, t4
sll t2, t2, 16
srl t4, t3, 8
shll.ph t5, t3, 8
or t3, t4, t5
or t2, t2, t3
or t2, t2, t8
sw t0, 0(a0)
addiu a1, a1, 3
sw t2, 4(a0)
addiu a2, a2, -2
bnez a2, 3b
addiu a0, a0, 8
4:
jr ra
nop
END(fetchUntransformed_888_asm_mips_dsp)
LEAF_MIPS_DSP(fetchUntransformed_444_asm_mips_dsp)
/*
* a0 - dst address (address of 32-bit aRGB value)
* a1 - src address
* a2 - length
*/
lui t8, 0xff00
li t4, 0x1
beqz a2, 5f
move v0, a0 /* just return the address of buffer
* for storing returning values */
andi t0, a2, 0x1
beqz t0, 2f /* there is more then one pixel
* (check src memory alignment (word)) */
nop
1:
lhu v0, 0(a1)
addiu a1, a1, 2
addiu a2, a2, -1
andi t0, v0, 0xf00
andi v1, v0, 0xf
andi v0, v0, 0xf0
sra t3, t0, 0x4
sra t1, v0, 0x4
sra t0, t0, 0x8
sll t2, v1, 0x4
or t0, t0, t3
or v0, t1, v0
lui t1, 0xff00
or v1, t2, v1
sll t0, t0, 0x10
or v1, v1, t1
sll v0, v0, 0x8
or v1, v1, t0
or v0, v1, v0
sw v0, 0(a0)
addiu a0, a0, 4
beqz a2, 5f /* no more pixels for processing */
nop
beq a2, t4, 4f /* only one more pixel remained */
nop
/* check if src memory address is word aligned */
2:
andi t0, a1, 0x3
beqz t0, 3f /* memory is word aligned */
andi a3, a2, 0x1 /* set the a3 register as the comparation
* for ending the unrolled loop
* (1 if odd, 0 if even) */
b 1b /* not word aligned,
* go another turn with
* just one pixel processing */
nop
3:
lw t0, 0(a1)
addiu a2, a2, -2
preceu.ph.qbr t1, t0 /* t1 = | 0 | aR1 | 0 | G1B1 | */
preceu.ph.qbl t2, t0 /* t1 = | 0 | aR2 | 0 | G2B2 | */
shll.qb t3, t1, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */
srl t4, t3, 4
or t0, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */
andi t3, t1, 0xf0
sll t3, t3, 8
srl t4, t3, 4
or t1, t3, t4
or t0, t0, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */
or t0, t0, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */
shll.qb t3, t2, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */
srl t4, t3, 4
or t7, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */
andi t3, t2, 0xf0
sll t3, t3, 8
srl t4, t3, 4
or t1, t3, t4
or t2, t7, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */
or t2, t2, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */
sw t0, 0(a0)
addiu a1, a1, 4
sw t2, 4(a0)
bne a2, a3, 3b
addiu a0, a0, 8
beqz a2, 5f /* no more pixels for processing */
nop
4:
/* one more pixel remained (after loop unrolling process finished) */
lhu v0, 0(a1)
addiu a1, a1, 2
addiu a2, a2, -1
andi t0, v0, 0xf00
andi v1, v0, 0xf
andi v0, v0, 0xf0
sra t3, t0, 0x4
sra t1, v0, 0x4
sra t0, t0, 0x8
sll t2, v1, 0x4
or t0, t0, t3
or v0, t1, v0
lui t1, 0xff00
or v1, t2, v1
sll t0, t0, 0x10
or v1, v1, t1
sll v0, v0, 0x8
or v1, v1, t0
or v0, v1, v0
sw v0, 0(a0)
addiu a0, a0, 4
5:
jr ra
nop
END(fetchUntransformed_444_asm_mips_dsp)
LEAF_MIPS_DSP(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp)
/*
* a0 - dst address
* a1 - src address
* a2 - length
*/
beqz a2, 2f
nop
1:
ulh t1, 0(a1)
lbu t2, 2(a1)
addiu a2, a2, -1
wsbh t1, t1
sll t0, t1, 8 /* t0 = 00000000rrrrrggggggbbbbb00000000 */
ins t0, t1, 3, 16 /* t0 = 00000000rrrrrrrrrrggggggbbbbb000 */
ins t0, t1, 5, 11 /* t0 = 00000000rrrrrrrrggggggbbbbbbb000 */
srl t4, t1, 9 /* t4 = 0000000000000000000000000rrrrrgg */
replv.qb t3, t2
ins t0, t4, 8, 2 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */
ins t0, t1, 3, 5 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */
srl t4, t1, 2 /* t4 = 000000000000000000rrrrrggggggbbb */
ins t0, t4, 0, 3 /* t0 = 00000000rrrrrrrrggggggggbbbbbbbb */
ins t0, t2, 24, 8 /* t0 =aaaaaaaarrrrrrrrggggggggbbbbbbbb */
cmpu.lt.qb t3, t0
pick.qb t0, t3, t0
addiu a1, a1, 3
sw t0, 0(a0)
bgtz a2, 1b
addiu a0, a0, 4
2:
jr ra
nop
END(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp)

View File

@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtGui module of the Qt Toolkit.
@ -101,19 +101,34 @@ extern "C" void qt_blend_argb32_on_argb32_mips_dsp_asm_x2(uint *dest, const uint
extern "C" void qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm(uint *dest, const uint *src, int length);
extern "C" void qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm(quint16 *dest, const quint16 *src, int length);
extern "C" void qt_blend_rgb16_on_rgb16_mips_dsp_asm(quint16 *dest, const quint16 *src, int length, uint const_alpha);
extern "C" uint * destfetchARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
extern "C" uint * qt_destStoreARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
extern "C" uint * fetchUntransformed_888_asm_mips_dsp(uint *buffer, const uchar *line, int length);
extern "C" uint * fetchUntransformed_444_asm_mips_dsp(uint *buffer, const uchar *line, int length);
extern "C" uint * fetchUntransformed_argb8565_premultiplied_asm_mips_dsp(uint *buffer, const uchar *line, int length);
void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void qt_blend_rgb32_on_rgb32_mips_dsp(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void qt_blend_rgb16_on_rgb16_mips_dsp(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
void comp_func_Source_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
@ -164,6 +179,21 @@ void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int l
void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
const uint * QT_FASTCALL qt_fetchUntransformed_888_mips_dsp (uint *buffer,
const Operator *,
const QSpanData *data,
int y, int x, int length);
const uint * QT_FASTCALL qt_fetchUntransformed_444_mips_dsp (uint *buffer,
const Operator *,
const QSpanData *data,
int y, int x, int length);
const uint * QT_FASTCALL qt_fetchUntransformed_argb8565_premultiplied_mips_dsp (uint *buffer,
const Operator *,
const QSpanData *data,
int y, int x, int length);
#endif // QT_COMPILER_SUPPORTS_MIPS_DSP
@ -171,6 +201,13 @@ void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length,
extern "C" void qConvertRgb16To32_asm_mips_dspr2(quint32 *dest, const quint16 *src, int length);
extern "C" void qt_blend_rgb16_on_rgb16_mips_dspr2_asm(quint16 *dest, const quint16 *src, int length, uint const_alpha);
void qt_blend_rgb16_on_rgb16_mips_dspr2(uchar *destPixels, int dbpl,
const uchar *srcPixels, int sbpl,
int w, int h,
int const_alpha);
#endif // QT_COMPILER_SUPPORTS_MIPS_DSPR2
QT_END_NAMESPACE

View File

@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtGui module of the Qt Toolkit.
@ -122,3 +122,551 @@ LEAF_MIPS_DSPR2(qConvertRgb16To32_asm_mips_dspr2)
nop
END(qConvertRgb16To32_asm_mips_dspr2)
#if defined(__MIPSEL) && __MIPSEL
# define PACK(r, s, t) packrl.ph r, s, t
# define LDHI(r, o, b) lwl r, o + 1 (b)
# define LDLO(r, o, b) lwr r, o + 2 (b)
#else
# define PACK(r, s, t) packrl.ph r, t, s
# define LDHI(r, o, b) lwr r, o + 1 (b)
# define LDLO(r, o, b) lwl r, o + 2 (b)
#endif
LEAF_MIPS_DSPR2(qt_blend_rgb16_on_rgb16_mips_dspr2_asm)
/*
+ * a0 - dst (*r5g6b5)
* a1 - src (const *r5g6b5)
* a2 - len (unsigned int) - batch length
* a3 - alpha (int)
*
* Register usage:
* t0-3 - Scratch registers
* t4 - Number of iterations to do in unrolled loops
* t5 - Inverse alpha
* t6 - Alpha >> 2
* t7 - Inverse alpha >> 2
* t8 - magic1 (0x07e007e0)
* t9 - magic2 (0xf81ff81f)
*
* NOTE:
* Cannot use DSP instructions for the multiplication of two
* 16-bit values: overflow would be always rounded or saturated.
*/
beqz a2, 0f
andi t0, a0, 0x3
andi t1, a1, 0x3
/* Adjust alpha value, and calculate inverse alpha value */
li t5, 255
or t2, t0, t1 /* t0 = (dst & 0x3) | (src & 0x3) */
sll t8, a3, 8
subu a3, t8, a3
li t8, 0x07e007e0 /* magic1 */
srl a3, a3, 8 /* alpha >>= 8 */
li t9, 0xf81ff81f /* magic2 */
subu t5, t5, a3 /* ialpha = 255 - alpha */
addiu a3, a3, 1 /* alpha++ */
addiu t5, t5, 1 /* ialpha++ */
srl t6, a3, 2 /* ashift = alpha >> 2 */
beqz t2, 4f /* both aligned */
srl t7, t5, 2 /* iashift = ialpha >> 2 */
beqz t1, 2f /* src aligned, dst unaligned */
nop
beqz t0, 3f /* dst aligned, src unaligned */
nop
/*
* Both src/dst are unaligned: read 1 halfword from each, then
* fall-off to continue with word-aligned operation.
*/
lhu t1, 0 (a1)
lhu t0, 0 (a0)
addiu a2, a2, -1 /* len-- */
andi t2, t1, 0x07e0
andi t1, t1, 0xf81f
mul t2, t2, a3
mul t1, t1, t6
andi t3, t0, 0x07e0
andi t0, t0, 0xf81f
mul t3, t3, t5
mul t0, t0, t7
addiu a1, a1, 2 /* src++ */
srl t2, t2, 8
srl t1, t1, 6
andi t2, t2, 0x07e0
andi t1, t1, 0xf81f
or t1, t1, t2
srl t3, t3, 8
srl t0, t0, 6
andi t3, t3, 0x07e0
andi t0, t0, 0xf81f
or t0, t0, t3
addu t0, t0, t1 /* src * alpha + dst * ialpha */
sh t0, 0 (a0)
addiu a0, a0, 2 /* dst++ */
/*
* Both src/dst pointers are word-aligned, process eight
* items at a time in an unrolled loop.
*/
4: beqz a2, 0f
srl t4, a2, 3 /* t4 = len / 8 */
beqz t4, 5f
andi a2, a2, 0x7 /* len = len % 8 */
SAVE_REGS_ON_STACK 12, s0, s1, s2, s3, s4, v0, v1
1: lw t1, 0 (a1) /* [s0, s1] */
lw v1, 4 (a1) /* [s2, s3] */
lw s1, 8 (a1) /* [s4, s5] */
lw s3, 12 (a1) /* [s6, s7] */
lw t0, 0 (a0) /* [d0, d1] */
lw v0, 4 (a0) /* [d2, d3] */
lw s0, 8 (a0) /* [d4, d5] */
lw s2, 12 (a0) /* [d6, d7] */
pref 4, 16 (a1)
pref 5, 16 (a0)
and t2, t1, t8
ext t3, t2, 0, 16
srl t2, t2, 16
mul t3, t3, a3
mul t2, t2, a3
and t1, t1, t9
ext s4, t1, 0, 16
mul s4, s4, t6
srl t1, t1, 16
mul t1, t1, t6
srl t3, t3, 8
srl t2, t2, 8
append t2, t3, 16
and t2, t2, t8
srl s4, s4, 6
and t3, v1, t8
srl t1, t1, 6
append t1, s4, 16
and t1, t1, t9
or t1, t1, t2
ext t2, t3, 0, 16
srl t3, t3, 16
mul t2, t2, a3
mul t3, t3, a3
and v1, v1, t9
ext s4, v1, 0, 16
mul s4, s4, t6
srl v1, v1, 16
mul v1, v1, t6
srl t2, t2, 8
srl t3, t3, 8
append t3, t2, 16
and t3, t3, t8
srl s4, s4, 6
and t2, s1, t8
srl v1, v1, 6
append v1, s4, 16
and v1, v1, t9
or v1, v1, t3
ext t3, t2, 0, 16
srl t2, t2, 16
mul t3, t3, a3
mul t2, t2, a3
and s1, s1, t9
ext s4, s1, 0, 16
mul s4, s4, t6
srl s1, s1, 16
mul s1, s1, t6
srl t3, t3, 8
srl t2, t2, 8
append t2, t3, 16
and t2, t2, t8
srl s4, s4, 6
and t3, s3, t8
srl s1, s1, 6
append s1, s4, 16
and s1, s1, t9
or s1, s1, t2
ext t2, t3, 0, 16
srl t3, t3, 16
mul t2, t2, a3
mul t3, t3, a3
and s3, s3, t9
ext s4, s3, 0, 16
mul s4, s4, t6
srl s3, s3, 16
mul s3, s3, t6
srl t2, t2, 8
srl t3, t3, 8
append t3, t2, 16
and t3, t3, t8
srl s4, s4, 6
and t2, t0, t8
srl s3, s3, 6
append s3, s4, 16
and s3, s3, t9
or s3, s3, t3
ext t3, t2, 0, 16
srl t2, t2, 16
mul t3, t3, t5
mul t2, t2, t5
and t0, t0, t9
ext s4, t0, 0, 16
mul s4, s4, t7
srl t0, t0, 16
mul t0, t0, t7
srl t3, t3, 8
srl t2, t2, 8
append t2, t3, 16
and t2, t2, t8
srl s4, s4, 6
and t3, v0, t8
srl t0, t0, 6
append t0, s4, 16
and t0, t0, t9
or t0, t0, t2
ext t2, t3, 0, 16
srl t3, t3, 16
mul t2, t2, t5
mul t3, t3, t5
and v0, v0, t9
ext s4, v0, 0, 16
mul s4, s4, t7
srl v0, v0, 16
mul v0, v0, t7
srl t2, t2, 8
srl t3, t3, 8
append t3, t2, 16
and t3, t3, t8
srl s4, s4, 6
and t2, s0, t8
srl v0, v0, 6
append v0, s4, 16
and v0, v0, t9
or v0, v0, t3
ext t3, t2, 0, 16
srl t2, t2, 16
mul t3, t3, t5
mul t2, t2, t5
and s0, s0, t9
ext s4, s0, 0, 16
mul s4, s4, t7
srl s0, s0, 16
mul s0, s0, t7
srl t3, t3, 8
srl t2, t2, 8
append t2, t3, 16
and t2, t2, t8
srl s4, s4, 6
and t3, s2, t8
srl s0, s0, 6
append s0, s4, 16
and s0, s0, t9
or s0, s0, t2
ext t2, t3, 0, 16
srl t3, t3, 16
mul t2, t2, t5
mul t3, t3, t5
and s2, s2, t9
ext s4, s2, 0, 16
mul s4, s4, t7
srl s2, s2, 16
mul s2, s2, t7
srl t2, t2, 8
srl t3, t3, 8
append t3, t2, 16
and t3, t3, t8
srl s4, s4, 6
addu.ph t0, t0, t1
srl s2, s2, 6
append s2, s4, 16
and s2, s2, t9
or s2, s2, t3
addu.ph v0, v0, v1 /* v0 = [S2 + D2, S3 + D3] */
addu.ph s0, s0, s1 /* s0 = [S4 + D4, S5 + D5] */
addu.ph s2, s2, s3 /* s2 = [S6 + D6, S7 + D7] */
sw t0, 0 (a0) /* [SS0, SS1] */
sw v0, 4 (a0) /* [SS2, SS3] */
sw s0, 8 (a0) /* [SS4, SS5] */
sw s2, 12 (a0) /* [SS6, SS7] */
addiu t4, t4, -1 /* t4-- */
addiu a1, a1, 16 /* src += 8 */
bnez t4, 1b
addiu a0, a0, 16 /* dst += 8 */
RESTORE_REGS_FROM_STACK 12, s0, s1, s2, s3, s4, v0, v1
b 5f
nop
/* dst unaligned: do one item and fall down to the src unaligned case */
2: lhu t1, 0 (a1)
lhu t0, 0 (a0)
addiu a2, a2, -1 /* len-- */
andi t2, t1, 0x07e0
andi t1, t1, 0xf81f
mul t2, t2, a3
mul t1, t1, t6
andi t3, t0, 0x07e0
andi t0, t0, 0xf81f
mul t3, t3, t5
mul t0, t0, t7
addiu a1, a1, 2 /* src++ */
srl t2, t2, 8
srl t1, t1, 6
andi t2, t2, 0x07e0
andi t1, t1, 0xf81f
or t1, t1, t2
srl t3, t3, 8
srl t0, t0, 6
andi t3, t3, 0x07e0
andi t0, t0, 0xf81f
or t0, t0, t3
addu t0, t0, t1 /* src * alpha + dst * ialpha */
sh t0, 0 (a0)
addiu a0, a0, 2 /* dst++ */
/* src unaligned */
3: beqz a2, 0f
srl t4, a2, 3 /* t4 = len / 8 */
beqz t4, 5f
andi a2, a2, 0x7 /* len = len % 8 */
SAVE_REGS_ON_STACK 12, s0, s1, s2, s3, s4, v0, v1
1: lw t0, 0 (a0) /* [d0, d1] */
lw v0, 4 (a0) /* [d2, d3] */
lw s0, 8 (a0) /* [d4, d5] */
lw s2, 12 (a0) /* [d6, d7] */
LDHI (t1, 0, a1) /* [s0, __] */
lw v1, 2 (a1) /* [s1, s2] */
lw s1, 6 (a1) /* [s3, s4] */
lw s3, 10 (a1) /* [s5, s6] */
LDLO (s4, 12, a1) /* [__, s7] */
pref 4, 14 (a1)
pref 5, 16 (a0)
PACK (t1, v1, t1) /* [s0, s1] */
PACK (v1, s1, v1) /* [s2, s3] */
PACK (s1, s3, s1) /* [s4, s5] */
PACK (s3, s4, s3) /* [s6, s7] */
and t2, t1, t8
ext t3, t2, 0, 16
srl t2, t2, 16
mul t3, t3, a3
mul t2, t2, a3
and t1, t1, t9
ext s4, t1, 0, 16
mul s4, s4, t6
srl t1, t1, 16
mul t1, t1, t6
srl t3, t3, 8
srl t2, t2, 8
append t2, t3, 16
and t2, t2, t8
srl s4, s4, 6
and t3, v1, t8
srl t1, t1, 6
append t1, s4, 16
and t1, t1, t9
or t1, t1, t2
ext t2, t3, 0, 16
srl t3, t3, 16
mul t2, t2, a3
mul t3, t3, a3
and v1, v1, t9
ext s4, v1, 0, 16
mul s4, s4, t6
srl v1, v1, 16
mul v1, v1, t6
srl t2, t2, 8
srl t3, t3, 8
append t3, t2, 16
and t3, t3, t8
srl s4, s4, 6
and t2, s1, t8
srl v1, v1, 6
append v1, s4, 16
and v1, v1, t9
or v1, v1, t3
ext t3, t2, 0, 16
srl t2, t2, 16
mul t3, t3, a3
mul t2, t2, a3
and s1, s1, t9
ext s4, s1, 0, 16
mul s4, s4, t6
srl s1, s1, 16
mul s1, s1, t6
srl t3, t3, 8
srl t2, t2, 8
append t2, t3, 16
and t2, t2, t8
srl s4, s4, 6
and t3, s3, t8
srl s1, s1, 6
append s1, s4, 16
and s1, s1, t9
or s1, s1, t2
ext t2, t3, 0, 16
srl t3, t3, 16
mul t2, t2, a3
mul t3, t3, a3
and s3, s3, t9
ext s4, s3, 0, 16
mul s4, s4, t6
srl s3, s3, 16
mul s3, s3, t6
srl t2, t2, 8
srl t3, t3, 8
append t3, t2, 16
and t3, t3, t8
srl s4, s4, 6
and t2, t0, t8
srl s3, s3, 6
append s3, s4, 16
and s3, s3, t9
or s3, s3, t3
ext t3, t2, 0, 16
srl t2, t2, 16
mul t3, t3, t5
mul t2, t2, t5
and t0, t0, t9
ext s4, t0, 0, 16
mul s4, s4, t7
srl t0, t0, 16
mul t0, t0, t7
srl t3, t3, 8
srl t2, t2, 8
append t2, t3, 16
and t2, t2, t8
srl s4, s4, 6
and t3, v0, t8
srl t0, t0, 6
append t0, s4, 16
and t0, t0, t9
or t0, t0, t2
ext t2, t3, 0, 16
srl t3, t3, 16
mul t2, t2, t5
mul t3, t3, t5
and v0, v0, t9
ext s4, v0, 0, 16
mul s4, s4, t7
srl v0, v0, 16
mul v0, v0, t7
srl t2, t2, 8
srl t3, t3, 8
append t3, t2, 16
and t3, t3, t8
srl s4, s4, 6
and t2, s0, t8
srl v0, v0, 6
append v0, s4, 16
and v0, v0, t9
or v0, v0, t3
ext t3, t2, 0, 16
srl t2, t2, 16
mul t3, t3, t5
mul t2, t2, t5
and s0, s0, t9
ext s4, s0, 0, 16
mul s4, s4, t7
srl s0, s0, 16
mul s0, s0, t7
srl t3, t3, 8
srl t2, t2, 8
append t2, t3, 16
and t2, t2, t8
srl s4, s4, 6
and t3, s2, t8
srl s0, s0, 6
append s0, s4, 16
and s0, s0, t9
or s0, s0, t2
ext t2, t3, 0, 16
srl t3, t3, 16
mul t2, t2, t5
mul t3, t3, t5
and s2, s2, t9
ext s4, s2, 0, 16
mul s4, s4, t7
srl s2, s2, 16
mul s2, s2, t7
srl t2, t2, 8
srl t3, t3, 8
append t3, t2, 16
and t3, t3, t8
srl s4, s4, 6
addu.ph t0, t0, t1
srl s2, s2, 6
append s2, s4, 16
and s2, s2, t9
or s2, s2, t3
addu.ph v0, v0, v1 /* v0 = [S2 + D2, S3 + D3] */
addu.ph s0, s0, s1 /* s0 = [S4 + D4, S5 + D5] */
addu.ph s2, s2, s3 /* s2 = [S6 + D6, S7 + D7] */
sw t0, 0 (a0) /* [SS0, SS1] */
sw v0, 4 (a0) /* [SS2, SS3] */
sw s0, 8 (a0) /* [SS4, SS5] */
sw s2, 12 (a0) /* [SS6, SS7] */
addiu t4, t4, -1 /* t4-- */
addiu a1, a1, 16 /* src += 8 */
bnez t4, 1b
addiu a0, a0, 16 /* dst += 8 */
RESTORE_REGS_FROM_STACK 12, s0, s1, s2, s3, s4, v0, v1
5: /* Process remaining items (len < 8), one at a time */
beqz a2, 0f
nop
1: lhu t1, 0 (a1)
lhu t0, 0 (a0)
addiu a1, a1, 2 /* src++ */
andi t2, t1, 0x07e0
andi t1, t1, 0xf81f
mul t2, t2, a3
mul t1, t1, t6
andi t3, t0, 0x07e0
andi t0, t0, 0xf81f
mul t3, t3, t5
mul t0, t0, t7
addiu a2, a2, -1 /* len-- */
srl t2, t2, 8
srl t1, t1, 6
andi t2, t2, 0x07e0
andi t1, t1, 0xf81f
or t1, t1, t2
srl t3, t3, 8
srl t0, t0, 6
andi t3, t3, 0x07e0
andi t0, t0, 0xf81f
or t0, t0, t3
addu t0, t0, t1 /* src*alpha + dst*ialpha */
sh t0, 0 (a0)
bnez a2, 1b
addiu a0, a0, 2 /* dst++ */
0: jr ra
nop
END(qt_blend_rgb16_on_rgb16_mips_dspr2_asm)
#undef PACK
#undef LDHI
#undef LDLO

View File

@ -1,6 +1,6 @@
/****************************************************************************
**
** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtGui module of the Qt Toolkit.
@ -261,4 +261,168 @@ LEAF_MIPS32R2(symbol) \
or \out_1, \scratch1, \scratch3
.endm
/*
* Checks if stack offset is big enough for storing/restoring regs_num
* number of register to/from stack. Stack offset must be greater than
* or equal to the number of bytes needed for storing registers (regs_num*4).
* Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
* preserved for input arguments of the functions, already stored in a0-a3),
* stack size can be further optimized by utilizing this space.
*/
.macro CHECK_STACK_OFFSET regs_num, stack_offset
.if \stack_offset < \regs_num * 4 - 16
.error "Stack offset too small."
.endif
.endm
/*
* Saves set of registers on stack. Maximum number of registers that
* can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
* Stack offset is number of bytes that are added to stack pointer (sp)
* before registers are pushed in order to provide enough space on stack
* (offset must be multiple of 4, and must be big enough, as described by
* CHECK_STACK_OFFSET macro). This macro is intended to be used in
* combination with RESTORE_REGS_FROM_STACK macro. Example:
* SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
* RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
*/
.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
r2 = 0, r3 = 0, r4 = 0, \
r5 = 0, r6 = 0, r7 = 0, \
r8 = 0, r9 = 0, r10 = 0, \
r11 = 0, r12 = 0, r13 = 0, \
r14 = 0
.if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
.error "Stack offset must be positive and multiple of 4."
.endif
.if \stack_offset != 0
addiu sp, sp, -\stack_offset
.endif
sw \r1, 0(sp)
.if \r2 != 0
sw \r2, 4(sp)
.endif
.if \r3 != 0
sw \r3, 8(sp)
.endif
.if \r4 != 0
sw \r4, 12(sp)
.endif
.if \r5 != 0
CHECK_STACK_OFFSET 5, \stack_offset
sw \r5, 16(sp)
.endif
.if \r6 != 0
CHECK_STACK_OFFSET 6, \stack_offset
sw \r6, 20(sp)
.endif
.if \r7 != 0
CHECK_STACK_OFFSET 7, \stack_offset
sw \r7, 24(sp)
.endif
.if \r8 != 0
CHECK_STACK_OFFSET 8, \stack_offset
sw \r8, 28(sp)
.endif
.if \r9 != 0
CHECK_STACK_OFFSET 9, \stack_offset
sw \r9, 32(sp)
.endif
.if \r10 != 0
CHECK_STACK_OFFSET 10, \stack_offset
sw \r10, 36(sp)
.endif
.if \r11 != 0
CHECK_STACK_OFFSET 11, \stack_offset
sw \r11, 40(sp)
.endif
.if \r12 != 0
CHECK_STACK_OFFSET 12, \stack_offset
sw \r12, 44(sp)
.endif
.if \r13 != 0
CHECK_STACK_OFFSET 13, \stack_offset
sw \r13, 48(sp)
.endif
.if \r14 != 0
CHECK_STACK_OFFSET 14, \stack_offset
sw \r14, 52(sp)
.endif
.endm
/*
* Restores set of registers from stack. Maximum number of registers that
* can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
* Stack offset is number of bytes that are added to stack pointer (sp)
* after registers are restored (offset must be multiple of 4, and must
* be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
* intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
* Example:
* SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
* RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
*/
.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
r2 = 0, r3 = 0, r4 = 0, \
r5 = 0, r6 = 0, r7 = 0, \
r8 = 0, r9 = 0, r10 = 0, \
r11 = 0, r12 = 0, r13 = 0, \
r14 = 0
.if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
.error "Stack offset must be pozitive and multiple of 4."
.endif
lw \r1, 0(sp)
.if \r2 != 0
lw \r2, 4(sp)
.endif
.if \r3 != 0
lw \r3, 8(sp)
.endif
.if \r4 != 0
lw \r4, 12(sp)
.endif
.if \r5 != 0
CHECK_STACK_OFFSET 5, \stack_offset
lw \r5, 16(sp)
.endif
.if \r6 != 0
CHECK_STACK_OFFSET 6, \stack_offset
lw \r6, 20(sp)
.endif
.if \r7 != 0
CHECK_STACK_OFFSET 7, \stack_offset
lw \r7, 24(sp)
.endif
.if \r8 != 0
CHECK_STACK_OFFSET 8, \stack_offset
lw \r8, 28(sp)
.endif
.if \r9 != 0
CHECK_STACK_OFFSET 9, \stack_offset
lw \r9, 32(sp)
.endif
.if \r10 != 0
CHECK_STACK_OFFSET 10, \stack_offset
lw \r10, 36(sp)
.endif
.if \r11 != 0
CHECK_STACK_OFFSET 11, \stack_offset
lw \r11, 40(sp)
.endif
.if \r12 != 0
CHECK_STACK_OFFSET 12, \stack_offset
lw \r12, 44(sp)
.endif
.if \r13 != 0
CHECK_STACK_OFFSET 13, \stack_offset
lw \r13, 48(sp)
.endif
.if \r14 != 0
CHECK_STACK_OFFSET 14, \stack_offset
lw \r14, 52(sp)
.endif
.if \stack_offset != 0
addiu sp, sp, \stack_offset
.endif
.endm
#endif // QT_MIPS_ASM_DSP_H