MIPS DSP build system fix and additional optimizations.
Changed MIPS DSP portion of the mkspecs/features/simd.prf file in order to fix the corrupted build system for MIPS platforms. List of the additionally optimized functions from file src/gui/painting/qdrawhelper.cpp: - qt_blend_rgb16_on_rgb16 - qt_fetchUntransformed_888 - qt_fetchUntransformed_444 - qt_fetchUntransformed_argb8565 from file src/gui/image/qimage.cpp: - convert_ARGB_to_ARGB_PM_inplace from file src/corelib/qstring.cpp: - ucstrncmp - toLatin1_helper - fromLatin1_helper Change-Id: I5c47a69784917eee29a8dbd2718828a390b27c93 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
parent
48caaee170
commit
f10356ead1
@ -174,10 +174,19 @@ QT_CPU_FEATURES = $$eval(QT_CPU_FEATURES.$$QT_ARCH)
|
||||
mips_dsp_assembler.name = assembling[mips_dsp] ${QMAKE_FILE_IN}
|
||||
silent:mips_dsp_assembler.commands = @echo assembling[mips_dsp] ${QMAKE_FILE_IN} && $$mips_dsp_assembler.commands
|
||||
QMAKE_EXTRA_COMPILERS += mips_dsp_compiler
|
||||
QMAKE_EXTRA_COMPILERS += mips_dsp_assembler
|
||||
}
|
||||
mips_dspr2 {
|
||||
HEADERS += $$MIPS_DSP_HEADERS
|
||||
|
||||
mips_dspr2_compiler.commands = $$QMAKE_CXX -c
|
||||
mips_dspr2_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
|
||||
mips_dspr2_compiler.dependency_type = TYPE_C
|
||||
mips_dspr2_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
|
||||
mips_dspr2_compiler.input = MIPS_DSPR2_SOURCES
|
||||
mips_dspr2_compiler.variable_out = OBJECTS
|
||||
mips_dspr2_compiler.name = compiling[mips_dspr2] ${QMAKE_FILE_IN}
|
||||
silent:mips_dspr2_compiler.commands = @echo compiling[mips_dspr2] ${QMAKE_FILE_IN} && $$mips_dspr2_compiler.commands
|
||||
mips_dspr2_assembler.commands = $$QMAKE_CC -c
|
||||
mips_dspr2_assembler.commands += $(CFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
|
||||
mips_dspr2_assembler.dependency_type = TYPE_C
|
||||
@ -186,6 +195,7 @@ QT_CPU_FEATURES = $$eval(QT_CPU_FEATURES.$$QT_ARCH)
|
||||
mips_dspr2_assembler.variable_out = OBJECTS
|
||||
mips_dspr2_assembler.name = assembling[mips_dspr2] ${QMAKE_FILE_IN}
|
||||
silent:mips_dspr2_assembler.commands = @echo assembling[mips_dspr2] ${QMAKE_FILE_IN} && $$mips_dspr2_assembler.commands
|
||||
QMAKE_EXTRA_COMPILERS += mips_dspr2_compiler
|
||||
QMAKE_EXTRA_COMPILERS += mips_dspr2_assembler
|
||||
}
|
||||
} else:win32-msvc*|winrt {
|
||||
@ -297,7 +307,7 @@ QT_CPU_FEATURES = $$eval(QT_CPU_FEATURES.$$QT_ARCH)
|
||||
$$AVX_SOURCES $$AVX2_SOURCES \
|
||||
$$NEON_SOURCES $$NEON_ASM \
|
||||
$$IWMMXT_SOURCES \
|
||||
$$MIPS_DSP_SOURCES $$MIPS_DSP_ASM $$MIPS_DSPR2_ASM
|
||||
$$MIPS_DSP_SOURCES $$MIPS_DSPR2_SOURCES $$MIPS_DSP_ASM $$MIPS_DSPR2_ASM
|
||||
|
||||
# Headers are already done in the above sections.
|
||||
}
|
||||
|
@ -111,3 +111,17 @@ ctest_qt5_module_files.files += $$ctest_macros_file.output $$cmake_extras_mkspec
|
||||
ctest_qt5_module_files.path = $$[QT_INSTALL_LIBS]/cmake/Qt5Core
|
||||
|
||||
INSTALLS += ctest_qt5_module_files cmake_qt5_umbrella_module_files
|
||||
|
||||
mips_dsp:*-g++* {
|
||||
HEADERS += $$MIPS_DSP_HEADERS
|
||||
|
||||
mips_dsp_corelib_assembler.commands = $$QMAKE_CXX -c
|
||||
mips_dsp_corelib_assembler.commands += $(CXXFLAGS) $(INCPATH) -mips32r2 -mdsp ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
|
||||
mips_dsp_corelib_assembler.dependency_type = TYPE_C
|
||||
mips_dsp_corelib_assembler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
|
||||
mips_dsp_corelib_assembler.input = MIPS_DSP_ASM
|
||||
mips_dsp_corelib_assembler.variable_out = OBJECTS
|
||||
mips_dsp_corelib_assembler.name = assembling[mips_dsp] ${QMAKE_FILE_IN}
|
||||
silent:mips_dsp_corelib_assembler.commands = @echo assembling[mips_dsp] ${QMAKE_FILE_IN} && $$mips_dsp_corelib_assembler.commands
|
||||
QMAKE_EXTRA_COMPILERS += mips_dsp_corelib_assembler
|
||||
}
|
||||
|
@ -188,9 +188,23 @@ static int ucstricmp(const ushort *a, const ushort *ae, const uchar *b, const uc
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if defined(__mips_dsp)
|
||||
// From qstring_mips_dsp_asm.S
|
||||
extern "C" int qt_ucstrncmp_mips_dsp_asm(const ushort *a,
|
||||
const ushort *b,
|
||||
unsigned len);
|
||||
#endif
|
||||
|
||||
// Unicode case-sensitive compare two same-sized strings
|
||||
static int ucstrncmp(const QChar *a, const QChar *b, int l)
|
||||
{
|
||||
#if defined(__mips_dsp)
|
||||
if (l >= 8) {
|
||||
return qt_ucstrncmp_mips_dsp_asm(reinterpret_cast<const ushort*>(a),
|
||||
reinterpret_cast<const ushort*>(b),
|
||||
l);
|
||||
}
|
||||
#endif // __mips_dsp
|
||||
while (l-- && *a == *b)
|
||||
a++,b++;
|
||||
if (l==-1)
|
||||
@ -3937,6 +3951,10 @@ static inline __m128i mergeQuestionMarks(__m128i chunk)
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__mips_dsp)
|
||||
extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const ushort *src, int length);
|
||||
#endif
|
||||
|
||||
static QByteArray toLatin1_helper(const QChar *data, int length)
|
||||
{
|
||||
QByteArray ba;
|
||||
@ -3989,10 +4007,14 @@ static QByteArray toLatin1_helper(const QChar *data, int length)
|
||||
length = length % 8;
|
||||
}
|
||||
#endif
|
||||
#if defined(__mips_dsp)
|
||||
qt_toLatin1_mips_dsp_asm(dst, src, length);
|
||||
#else
|
||||
while (length--) {
|
||||
*dst++ = (*src>0xff) ? '?' : (uchar) *src;
|
||||
++src;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return ba;
|
||||
}
|
||||
@ -4104,6 +4126,12 @@ QVector<uint> QString::toUcs4() const
|
||||
return v;
|
||||
}
|
||||
|
||||
#if defined(__mips_dsp)
|
||||
// From qstring_mips_dsp_asm.S
|
||||
extern "C" void qt_fromlatin1_mips_asm_unroll4 (ushort*, const char*, uint);
|
||||
extern "C" void qt_fromlatin1_mips_asm_unroll8 (ushort*, const char*, uint);
|
||||
#endif
|
||||
|
||||
QString::Data *QString::fromLatin1_helper(const char *str, int size)
|
||||
{
|
||||
Data *d;
|
||||
@ -4144,8 +4172,15 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
|
||||
size = size % 16;
|
||||
}
|
||||
#endif
|
||||
#if defined(__mips_dsp)
|
||||
if (size > 20)
|
||||
qt_fromlatin1_mips_asm_unroll8(dst, str, size);
|
||||
else
|
||||
qt_fromlatin1_mips_asm_unroll4(dst, str, size);
|
||||
#else
|
||||
while (size--)
|
||||
*dst++ = (uchar)*str++;
|
||||
#endif
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
449
src/corelib/tools/qstring_mips_dsp_asm.S
Normal file
449
src/corelib/tools/qstring_mips_dsp_asm.S
Normal file
@ -0,0 +1,449 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
|
||||
** Contact: http://www.qt-project.org/legal
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and Digia. For licensing terms and
|
||||
** conditions see http://qt.digia.com/licensing. For further information
|
||||
** use the contact form at http://qt.digia.com/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.LGPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU Lesser General Public License version 2.1 requirements
|
||||
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** In addition, as a special exception, Digia gives you certain additional
|
||||
** rights. These rights are described in the Digia Qt LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include "../../gui/painting/qt_mips_asm_dsp_p.h"
|
||||
|
||||
.macro EXTRACT_HALVES_x2 src1, src2, hi_1, hi_2
|
||||
#if defined(__MIPSEL) && __MIPSEL
|
||||
srl \hi_1, \src1, 16
|
||||
ext \src1, \src1, 0, 16
|
||||
srl \hi_2, \src2, 16
|
||||
ext \src2, \src2, 0, 16
|
||||
#else
|
||||
ext \hi_1, \src1, 0, 16
|
||||
srl \src1, \src1, 16
|
||||
ext \hi_2, \src2, 0, 16
|
||||
srl \src2, \src2, 16
|
||||
#endif
|
||||
.endm
|
||||
|
||||
|
||||
LEAF_MIPS_DSP(qt_ucstrncmp_mips_dsp_asm)
|
||||
/*
|
||||
* Arguments:
|
||||
* a0 - string_a (uint16_t*)
|
||||
* a1 - string_b (uint16_t*)
|
||||
* a2 - length (uint32_t)
|
||||
*
|
||||
* Register usage:
|
||||
* t0 - batches
|
||||
*/
|
||||
|
||||
move v0, zero /* result = 0 */
|
||||
andi t0, a0, 0x3 /* t1 = string_a % 4 */
|
||||
andi t1, a1, 0x3 /* t0 = string_b % 4 */
|
||||
or t2, t0, t1 /* t2 = t0 | t1 */
|
||||
|
||||
beqz t2, 5f /* both aligned */
|
||||
and t2, t0 ,t1
|
||||
beqz t2, 6f /* one aligned */
|
||||
nop
|
||||
|
||||
/*
|
||||
* Both strings are unaligned: read 1 halfword from each,
|
||||
* then fall-off to continue with the both-aligned case.
|
||||
*/
|
||||
lhu t0, 0 (a0)
|
||||
lhu t1, 0 (a1)
|
||||
addiu a2, a2, -1 /* len-- */
|
||||
sub v0, t0, t1 /* v0 = t0-t1 */
|
||||
addiu a0, a0, 2 /* string_a++ */
|
||||
bnez v0, 0f /* if (t0-t1): return */
|
||||
addiu a1, a1, 2 /* string_b++ */
|
||||
beqz a2, 0f /* if !len: return */
|
||||
/* next instruction (srl) fills delay branch slot */
|
||||
|
||||
5: /* Both string pointers are aligned */
|
||||
srl t0, a2, 3 /* batches = length / 8 */
|
||||
beqz t0, 9f /* if !batches: tail */
|
||||
andi a2, a2, 0x7 /* length = length % 8 */
|
||||
|
||||
SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
|
||||
|
||||
1: lw t1, 0 (a0) /* [a0 a1] */
|
||||
lw t3, 4 (a0) /* [a2 a3] */
|
||||
lw t5, 8 (a0) /* [a4 a5] */
|
||||
lw t7, 12 (a0) /* [a6 a7] */
|
||||
|
||||
lw t2, 0 (a1) /* [b0 b1] */
|
||||
lw t4, 4 (a1) /* [b2 b3] */
|
||||
lw t6, 8 (a1) /* [b4 b5] */
|
||||
lw t8, 12 (a1) /* [b6 b7] */
|
||||
|
||||
/*
|
||||
* Subtract elements one by one, if the result is zero
|
||||
* both halves of the registers (shorts) are equal.
|
||||
*/
|
||||
subq.ph s0, t1, t2 /* [a0-b0 a1-b1] */
|
||||
subq.ph s1, t3, t4 /* [a2-b2 a3-b3] */
|
||||
|
||||
bnez s0, 1f
|
||||
subq.ph s2, t5, t6 /* [a4-b4 a5-b5] */
|
||||
bnez s1, 2f
|
||||
subq.ph s3, t7, t8 /* [a6-b6 a7-b7] */
|
||||
bnez s2, 3f
|
||||
addiu t0, t0, -1 /* batches-- */
|
||||
bnez s3, 4f
|
||||
addiu a0, a0, 8*2 /* string_a += 8 */
|
||||
|
||||
bnez t0, 1b /* if batches: loop */
|
||||
addiu a1, a1, 8*2 /* string_b += 8 */
|
||||
|
||||
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
|
||||
|
||||
bnez a2, 9f /* if length: tail */
|
||||
nop
|
||||
jr ra
|
||||
nop
|
||||
|
||||
|
||||
1: /* Check t1 [a0 a1] vs. t2 [b0 b1] */
|
||||
EXTRACT_HALVES_x2 t1, t2, t3, t4 /* a0, b0, a1, b1 */
|
||||
sub v0, t1, t2
|
||||
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
|
||||
bnez v0, 0f /* if (a0-b0): return */
|
||||
nop
|
||||
jr ra
|
||||
sub v0, t3, t4 /* return a1-b1 */
|
||||
|
||||
2: /* Check t3 [a2 a3] vs. t4 [b2 b3] */
|
||||
EXTRACT_HALVES_x2 t3, t4, t1, t2 /* a2, b2, a3, b3 */
|
||||
sub v0, t3, t4
|
||||
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
|
||||
bnez v0, 0f /* if (a2-b2): return */
|
||||
nop
|
||||
jr ra
|
||||
sub v0, t1, t2 /* return a3-b3 */
|
||||
|
||||
3: /* Check t5 [a4 a5] vs. t6 [b4 b5] */
|
||||
EXTRACT_HALVES_x2 t5, t6, t1, t2 /* a4, b4, a5, b5 */
|
||||
sub v0, t5, t6
|
||||
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
|
||||
bnez v0, 0f /* if (a4-b4): return */
|
||||
nop
|
||||
jr ra
|
||||
sub v0, t1, t2 /* return a5-b5 */
|
||||
|
||||
4: /* Check t7 [a6 a7] vs. t8 [b6 b7] */
|
||||
EXTRACT_HALVES_x2 t7, t8, t1, t2 /* a6, b6, a7, b7 */
|
||||
sub v0, t7, t8
|
||||
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
|
||||
bnez v0, 0f /* if (a6-b6): return */
|
||||
nop
|
||||
jr ra
|
||||
sub v0, t1, t2
|
||||
|
||||
|
||||
/* Process remaining tail items. */
|
||||
9: lhu t1, 0 (a0) /* a */
|
||||
lhu t2, 0 (a1) /* b */
|
||||
addiu a2, a2, -1 /* length-- */
|
||||
sub v0, t1, t2 /* result = (a - b) */
|
||||
|
||||
bnez v0, 0f /* if (a - b): return */
|
||||
addiu a0, a0, 2 /* string_a++ */
|
||||
|
||||
bnez a2, 9b /* if length: loop */
|
||||
addiu a1, a1, 2 /* string_b++ */
|
||||
|
||||
0: jr ra
|
||||
nop
|
||||
|
||||
|
||||
/* One of the inputs is unaligned, do unrolled half-word loads */
|
||||
6: srl t0, a2, 3 /* batches = length / 8 */
|
||||
andi a2, a2, 0x7 /* length = length % 8 */
|
||||
|
||||
1: lhu t1, 0 (a0)
|
||||
lhu t2, 0 (a1)
|
||||
lhu t3, 2 (a0)
|
||||
lhu t4, 2 (a1)
|
||||
lhu t5, 4 (a0)
|
||||
lhu t6, 4 (a1)
|
||||
lhu t7, 6 (a0)
|
||||
lhu t8, 6 (a1)
|
||||
|
||||
sub v0, t1, t2
|
||||
sub t1, t3, t4
|
||||
|
||||
bnez v0, 0f
|
||||
sub t2, t5, t6
|
||||
bnez t1, 2f
|
||||
sub t3, t7, t8
|
||||
bnez t2, 3f
|
||||
lhu t1, 8 (a0)
|
||||
bnez t3, 4f
|
||||
lhu t2, 8 (a1)
|
||||
|
||||
lhu t3, 10 (a0)
|
||||
lhu t4, 10 (a1)
|
||||
lhu t5, 12 (a0)
|
||||
lhu t6, 12 (a1)
|
||||
lhu t7, 14 (a0)
|
||||
lhu t8, 14 (a1)
|
||||
|
||||
sub v0, t1, t2
|
||||
sub t1, t3, t4
|
||||
|
||||
bnez v0, 0f
|
||||
sub t2, t5, t6
|
||||
bnez t1, 2f
|
||||
sub t3, t7, t8
|
||||
bnez t2, 3f
|
||||
addiu t0, t0, -1 /* batches-- */
|
||||
bnez t3, 4f
|
||||
addiu a0, a0, 8*2 /* string_a += 8 */
|
||||
|
||||
bnez t0, 1b
|
||||
addiu a1, a1, 8*2 /* string_b += 8 */
|
||||
|
||||
bnez a2, 9b /* if length: tail */
|
||||
nop
|
||||
|
||||
0: jr ra
|
||||
nop
|
||||
2: jr ra
|
||||
move v0, t1
|
||||
3: jr ra
|
||||
move v0, t2
|
||||
4: jr ra
|
||||
move v0, t3
|
||||
|
||||
END(qt_ucstrncmp_mips_dsp_asm)
|
||||
|
||||
|
||||
#if defined(__mips_dspr2)
|
||||
LEAF_MIPS_DSPR2(qt_fromlatin1_mips_asm_unroll8)
|
||||
#else
|
||||
LEAF_MIPS_DSP(qt_fromlatin1_mips_asm_unroll8)
|
||||
#endif
|
||||
|
||||
andi t0, a1, 0x3
|
||||
beqz t0, 9f /* check that src is aligned */
|
||||
nop
|
||||
|
||||
1: lbu t1, 0 (a1)
|
||||
addiu a1, a1, 1
|
||||
addiu a2, a2, -1
|
||||
sh t1, 0 (a0)
|
||||
beqz a2, 0f
|
||||
andi t0, a1, 0x3
|
||||
bnez t0, 1b
|
||||
addiu a0, a0, 2
|
||||
|
||||
9: /* source pointer is aligned: do batches of 8 elements */
|
||||
andi t0, a0, 3 /* check if dst is aligned */
|
||||
bnez t0, 6f
|
||||
srl t0, a2, 3 /* batches = len / 8 */
|
||||
andi a2, a2, 0x7 /* tail = len % 8 */
|
||||
|
||||
beqz t0, 8f /* if !batches: tail */
|
||||
nop
|
||||
|
||||
1: lw t1, 0 (a1)
|
||||
lw t2, 4 (a1)
|
||||
|
||||
addiu a1, a1, 8*1
|
||||
addiu t0, t0, -1
|
||||
|
||||
preceu.ph.qbl t3, t1
|
||||
preceu.ph.qbr t1, t1
|
||||
preceu.ph.qbl t4, t2
|
||||
preceu.ph.qbr t2, t2
|
||||
|
||||
#if defined(__MIPSEL) && __MIPSEL
|
||||
sw t1, 0 (a0)
|
||||
sw t3, 4 (a0)
|
||||
sw t2, 8 (a0)
|
||||
sw t4, 12 (a0)
|
||||
#else
|
||||
sw t3, 0 (a0)
|
||||
sw t1, 4 (a0)
|
||||
sw t4, 8 (a0)
|
||||
sw t2, 12 (a0)
|
||||
#endif
|
||||
|
||||
bnez t0, 1b
|
||||
addiu a0, a0, 8*2
|
||||
|
||||
8: /* process tail items */
|
||||
beqz a2, 0f
|
||||
nop
|
||||
|
||||
1: lbu t1, 0 (a1)
|
||||
addiu a2, a2, -1
|
||||
sh t1, 0 (a0)
|
||||
addiu a1, a1, 1
|
||||
bnez a2, 1b
|
||||
addiu a0, a0, 2
|
||||
|
||||
0: jr ra
|
||||
nop
|
||||
|
||||
6: beqz t0, 8b
|
||||
andi a2, a2, 7
|
||||
7: lw t1, 0(a1)
|
||||
lw t2, 4(a1)
|
||||
addiu t0, t0, -1
|
||||
addiu a1, a1, 8
|
||||
andi t3, t1, 0xff
|
||||
#if defined(__mips_dspr2)
|
||||
prepend t1, t2, 8
|
||||
#else
|
||||
sll t4, t4, 24
|
||||
srl t1, t1, 8
|
||||
or t1, t1, t4
|
||||
#endif
|
||||
srl t2, t2, 8
|
||||
preceu.ph.qbr t4, t1
|
||||
preceu.ph.qbl t1, t1
|
||||
preceu.ph.qbr t5, t2
|
||||
srl t2, t2, 16
|
||||
sh t3, 0(a0)
|
||||
sw t4, 2(a0)
|
||||
sw t1, 6(a0)
|
||||
sw t5, 10(a0)
|
||||
sh t2, 14(a0)
|
||||
bnez t0, 7b
|
||||
addiu a0, a0, 16
|
||||
bnez a2, 1b
|
||||
nop
|
||||
|
||||
jr ra
|
||||
nop
|
||||
|
||||
END(qt_fromlatin1_mips_asm_unroll8)
|
||||
|
||||
|
||||
LEAF_MIPS_DSP(qt_fromlatin1_mips_asm_unroll4)
|
||||
/*
|
||||
* Arguments:
|
||||
* a0 - dst (uint16_t*)
|
||||
* a1 - src (const char*)
|
||||
* a2 - len (unsigned int)
|
||||
*/
|
||||
|
||||
/*
|
||||
* QString::fromLatin1_helper() already handles the len==0
|
||||
* case: assume that len is never zero.
|
||||
*/
|
||||
srl t0, a2, 2
|
||||
beqz t0, 9f
|
||||
andi a2, a2, 0x3
|
||||
|
||||
1: lbu t1, 0(a1)
|
||||
lbu t2, 1(a1)
|
||||
lbu t3, 2(a1)
|
||||
lbu t4, 3(a1)
|
||||
sh t1, 0(a0)
|
||||
sh t2, 2(a0)
|
||||
sh t3, 4(a0)
|
||||
sh t4, 6(a0)
|
||||
addiu t0, t0, -1
|
||||
addiu a1, a1, 4
|
||||
bnez t0, 1b
|
||||
addiu a0, a0, 8
|
||||
|
||||
8: beqz a2, 0f
|
||||
nop
|
||||
|
||||
9: lbu t1, 0(a1)
|
||||
addiu a2, a2, -1
|
||||
addiu a1, a1, 1
|
||||
sh t1, 0(a0)
|
||||
bnez a2, 9b
|
||||
addiu a0, a0, 2
|
||||
|
||||
0: jr ra
|
||||
nop
|
||||
|
||||
END(qt_fromlatin1_mips_asm_unroll4)
|
||||
|
||||
|
||||
LEAF_MIPS_DSP(qt_toLatin1_mips_dsp_asm)
|
||||
/*
|
||||
* a0 - dst
|
||||
* a1 - src
|
||||
* a2 - length
|
||||
*/
|
||||
|
||||
addiu t9, zero, 0x3f
|
||||
srl t8, a2, 2
|
||||
beqz t8, 2f
|
||||
andi a2, a2, 3
|
||||
1:
|
||||
lhu t0, 0(a1)
|
||||
lhu t1, 2(a1)
|
||||
lhu t2, 4(a1)
|
||||
lhu t3, 6(a1)
|
||||
srl t4, t0, 8
|
||||
srl t5, t1, 8
|
||||
srl t6, t2, 8
|
||||
srl t7, t3, 8
|
||||
movn t0, t9, t4
|
||||
movn t1, t9, t5
|
||||
movn t2, t9, t6
|
||||
movn t3, t9, t7
|
||||
addiu a1, a1, 8
|
||||
addiu t8, t8, -1
|
||||
sb t0, 0(a0)
|
||||
sb t1, 1(a0)
|
||||
sb t2, 2(a0)
|
||||
sb t3, 3(a0)
|
||||
bgtz t8, 1b
|
||||
addiu a0, a0, 4
|
||||
2: beqz a2, 4f
|
||||
nop
|
||||
3:
|
||||
lhu t0, 0(a1)
|
||||
addiu a1, a1, 2
|
||||
addiu a2, a2, -1
|
||||
srl t1, t0, 8
|
||||
movn t0, t9, t1
|
||||
sb t0, 0(a0)
|
||||
bgtz a2, 3b
|
||||
addiu a0, a0, 1
|
||||
4:
|
||||
jr ra
|
||||
nop
|
||||
|
||||
END(qt_toLatin1_mips_dsp_asm)
|
||||
|
@ -192,3 +192,7 @@ INCLUDEPATH += ../3rdparty/md5 \
|
||||
!macx-icc:!vxworks:unix:LIBS_PRIVATE += -lm
|
||||
|
||||
TR_EXCLUDE += ../3rdparty/*
|
||||
|
||||
# MIPS DSP
|
||||
MIPS_DSP_ASM += tools/qstring_mips_dsp_asm.S
|
||||
MIPS_DSP_HEADERS += ../gui/painting/qt_mips_asm_dsp_p.h
|
||||
|
@ -78,3 +78,5 @@ NEON_SOURCES += image/qimage_neon.cpp
|
||||
SSE2_SOURCES += image/qimage_sse2.cpp
|
||||
SSSE3_SOURCES += image/qimage_ssse3.cpp
|
||||
AVX_SOURCES += image/qimage_avx.cpp
|
||||
MIPS_DSPR2_SOURCES += image/qimage_mips_dspr2.cpp
|
||||
MIPS_DSPR2_ASM += image/qimage_mips_dspr2_asm.S
|
||||
|
@ -3960,6 +3960,12 @@ void qInitImageConversions()
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef QT_COMPILER_SUPPORTS_MIPS_DSPR2
|
||||
extern bool convert_ARGB_to_ARGB_PM_inplace_mips_dspr2(QImageData *data, Qt::ImageConversionFlags);
|
||||
inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_mips_dspr2;
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
extern const uchar *qt_pow_rgb_gamma();
|
||||
|
69
src/gui/image/qimage_mips_dspr2.cpp
Normal file
69
src/gui/image/qimage_mips_dspr2.cpp
Normal file
@ -0,0 +1,69 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
|
||||
** Contact: http://www.qt-project.org/legal
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and Digia. For licensing terms and
|
||||
** conditions see http://qt.digia.com/licensing. For further information
|
||||
** use the contact form at http://qt.digia.com/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.LGPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU Lesser General Public License version 2.1 requirements
|
||||
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** In addition, as a special exception, Digia gives you certain additional
|
||||
** rights. These rights are described in the Digia Qt LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include "qimage.h"
|
||||
#include <private/qimage_p.h>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
|
||||
// Defined in qimage_mips_dspr2_asm.S
|
||||
//
|
||||
extern "C" void premultiply_argb_inplace_mips_asm(void*, unsigned, unsigned, int);
|
||||
|
||||
bool convert_ARGB_to_ARGB_PM_inplace_mips_dspr2(QImageData *data, Qt::ImageConversionFlags)
|
||||
{
|
||||
Q_ASSERT(data->format == QImage::Format_ARGB32);
|
||||
|
||||
if (!data->width || !data->height)
|
||||
return true;
|
||||
|
||||
Q_ASSERT((data->bytes_per_line - (data->width << 2)) >= 0);
|
||||
|
||||
premultiply_argb_inplace_mips_asm(data->data,
|
||||
data->height,
|
||||
data->width,
|
||||
data->bytes_per_line - (data->width << 2));
|
||||
|
||||
data->format = QImage::Format_ARGB32_Premultiplied;
|
||||
return true;
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
207
src/gui/image/qimage_mips_dspr2_asm.S
Normal file
207
src/gui/image/qimage_mips_dspr2_asm.S
Normal file
@ -0,0 +1,207 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
|
||||
** Contact: http://www.qt-project.org/legal
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
**
|
||||
** $QT_BEGIN_LICENSE:LGPL$
|
||||
** Commercial License Usage
|
||||
** Licensees holding valid commercial Qt licenses may use this file in
|
||||
** accordance with the commercial license agreement provided with the
|
||||
** Software or, alternatively, in accordance with the terms contained in
|
||||
** a written agreement between you and Digia. For licensing terms and
|
||||
** conditions see http://qt.digia.com/licensing. For further information
|
||||
** use the contact form at http://qt.digia.com/contact-us.
|
||||
**
|
||||
** GNU Lesser General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU Lesser
|
||||
** General Public License version 2.1 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.LGPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU Lesser General Public License version 2.1 requirements
|
||||
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
||||
**
|
||||
** In addition, as a special exception, Digia gives you certain additional
|
||||
** rights. These rights are described in the Digia Qt LGPL Exception
|
||||
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
||||
**
|
||||
** GNU General Public License Usage
|
||||
** Alternatively, this file may be used under the terms of the GNU
|
||||
** General Public License version 3.0 as published by the Free Software
|
||||
** Foundation and appearing in the file LICENSE.GPL included in the
|
||||
** packaging of this file. Please review the following information to
|
||||
** ensure the GNU General Public License version 3.0 requirements will be
|
||||
** met: http://www.gnu.org/copyleft/gpl.html.
|
||||
**
|
||||
**
|
||||
** $QT_END_LICENSE$
|
||||
**
|
||||
****************************************************************************/
|
||||
|
||||
#include "../painting/qt_mips_asm_dsp_p.h"
|
||||
|
||||
LEAF_MIPS_DSPR2(premultiply_argb_inplace_mips_asm)
|
||||
|
||||
SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
|
||||
|
||||
3: srl v1, a2, 3 /* t1 = linelen / 8 */
|
||||
addiu a1, a1, -1 /* numlines-- */
|
||||
beqz v1, 1f /* if (!(linelen / 8)): tail */
|
||||
andi v0, a2, 0x7 /* v0 = linelen % 8 */
|
||||
pref 5, 0 (a0) /* cache-hint: store-streamed */
|
||||
|
||||
/* unrolled loop, handles (v1 = len / 8) batches of 8 pixels */
|
||||
2: addiu v1, v1, -1
|
||||
pref 5, 0(a0)
|
||||
pref 5, 32(a0)
|
||||
|
||||
lw t0, 0(a0)
|
||||
lw t1, 4(a0)
|
||||
lw t2, 8(a0)
|
||||
lw t3, 12(a0)
|
||||
srl t4, t0, 24 /* 00|00|00|A1 */
|
||||
replv.ph t5, t4 /* 00|A1|00|A1 */
|
||||
srl t6, t1, 24 /* 00|00|00|A2 */
|
||||
replv.ph t7, t6 /* 00|A2|00|A2 */
|
||||
muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */
|
||||
muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */
|
||||
muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */
|
||||
muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */
|
||||
srl t5, t2, 24 /* 00|00|00|A3 */
|
||||
replv.ph s0, t5 /* 00|A3|00|A3 */
|
||||
srl t7, t3, 24 /* 00|00|00|A4 */
|
||||
replv.ph s1, t7 /* 00|A4|00|A4 */
|
||||
muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */
|
||||
muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */
|
||||
muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */
|
||||
muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */
|
||||
preceu.ph.qbla s1, t8
|
||||
preceu.ph.qbla s3, t0
|
||||
addu.ph t8, t8, s1
|
||||
addu.ph t0, t0, s3
|
||||
preceu.ph.qbla s1, t9
|
||||
preceu.ph.qbla s3, t1
|
||||
addu.ph t9, t9, s1
|
||||
addu.ph t1, t1, s3
|
||||
preceu.ph.qbla s1, s2
|
||||
preceu.ph.qbla s3, t2
|
||||
addu.ph s2, s2, s1
|
||||
addu.ph t2, t2, s3
|
||||
preceu.ph.qbla s1, s0
|
||||
preceu.ph.qbla s3, t3
|
||||
addu.ph s0, s0, s1
|
||||
addu.ph t3, t3, s3
|
||||
shra_r.ph t8, t8, 8 /* xxAA1|xxRR1 */
|
||||
shra_r.ph t0, t0, 8 /* xxBB1|xxGG1 */
|
||||
shra_r.ph t9, t9, 8
|
||||
shra_r.ph t1, t1, 8
|
||||
shra_r.ph s2, s2, 8
|
||||
shra_r.ph t2, t2, 8
|
||||
shra_r.ph s0, s0, 8
|
||||
shra_r.ph t3, t3, 8
|
||||
precr.qb.ph t0, t8, t0
|
||||
precr.qb.ph t1, t9, t1
|
||||
precr.qb.ph t2, s2, t2
|
||||
precr.qb.ph t3, s0, t3
|
||||
append t4, t0, 24
|
||||
append t6, t1, 24
|
||||
append t5, t2, 24
|
||||
append t7, t3, 24
|
||||
sw t4, 0(a0)
|
||||
sw t6, 4(a0)
|
||||
sw t5, 8(a0)
|
||||
sw t7, 12(a0)
|
||||
|
||||
lw t0, 16(a0)
|
||||
lw t1, 20(a0)
|
||||
lw t2, 24(a0)
|
||||
lw t3, 28(a0)
|
||||
srl t4, t0, 24 /* 00|00|00|A1 */
|
||||
replv.ph t5, t4 /* 00|A1|00|A1 */
|
||||
srl t6, t1, 24 /* 00|00|00|A2 */
|
||||
replv.ph t7, t6 /* 00|A2|00|A2 */
|
||||
muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */
|
||||
muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */
|
||||
muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */
|
||||
muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */
|
||||
srl t5, t2, 24 /* 00|00|00|A3 */
|
||||
replv.ph s0, t5 /* 00|A3|00|A3 */
|
||||
srl t7, t3, 24 /* 00|00|00|A4 */
|
||||
replv.ph s1, t7 /* 00|A4|00|A4 */
|
||||
muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */
|
||||
muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */
|
||||
muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */
|
||||
muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */
|
||||
preceu.ph.qbla s1, t8
|
||||
preceu.ph.qbla s3, t0
|
||||
addu.ph t8, t8, s1
|
||||
addu.ph t0, t0, s3
|
||||
preceu.ph.qbla s1, t9
|
||||
preceu.ph.qbla s3, t1
|
||||
addu.ph t9, t9, s1
|
||||
addu.ph t1, t1, s3
|
||||
preceu.ph.qbla s1, s2
|
||||
preceu.ph.qbla s3, t2
|
||||
addu.ph s2, s2, s1
|
||||
addu.ph t2, t2, s3
|
||||
preceu.ph.qbla s1, s0
|
||||
preceu.ph.qbla s3, t3
|
||||
addu.ph s0, s0, s1
|
||||
addu.ph t3, t3, s3
|
||||
shra_r.ph t8, t8, 8 /* xxAA1|xxRR1 */
|
||||
shra_r.ph t0, t0, 8 /* xxBB1|xxGG1 */
|
||||
shra_r.ph t9, t9, 8
|
||||
shra_r.ph t1, t1, 8
|
||||
shra_r.ph s2, s2, 8
|
||||
shra_r.ph t2, t2, 8
|
||||
shra_r.ph s0, s0, 8
|
||||
shra_r.ph t3, t3, 8
|
||||
precr.qb.ph t0, t8, t0
|
||||
precr.qb.ph t1, t9, t1
|
||||
precr.qb.ph t2, s2, t2
|
||||
precr.qb.ph t3, s0, t3
|
||||
append t4, t0, 24
|
||||
append t6, t1, 24
|
||||
append t5, t2, 24
|
||||
append t7, t3, 24
|
||||
sw t4, 16(a0)
|
||||
sw t6, 20(a0)
|
||||
sw t5, 24(a0)
|
||||
sw t7, 28(a0)
|
||||
bgtz v1, 2b /* if (t1): unrolled loop */
|
||||
addiu a0, a0, 32 /* data += 8 */
|
||||
|
||||
beqz v0, 4f /* if (!v0): skip tail loop */
|
||||
nop
|
||||
|
||||
/* tail loop, handles (len < 8), one pixel at a time */
|
||||
1: lw t1, 0 (a0)
|
||||
addiu v0, v0, -1 /* len-- */
|
||||
srl t2, t1, 24 /* t2 = alpha */
|
||||
replv.ph t3, t2
|
||||
muleu_s.ph.qbl t4, t1, t3
|
||||
muleu_s.ph.qbr t1, t1, t3
|
||||
preceu.ph.qbla t3, t4
|
||||
preceu.ph.qbla t5, t1
|
||||
addu.ph t4, t4, t3
|
||||
addu.ph t1, t1, t5
|
||||
shra_r.ph t4, t4, 8
|
||||
shra_r.ph t1, t1, 8
|
||||
precr.qb.ph t1, t4, t1
|
||||
append t2, t1, 24
|
||||
sw t2, 0(a0)
|
||||
bgtz v0, 1b
|
||||
addiu a0, a0, 4 /* src++ */
|
||||
|
||||
4: bnez a1, 3b /* if (numlines): loop */
|
||||
addu a0, a0, a3 /* src += srclineskip */
|
||||
|
||||
0: /* return */
|
||||
RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
|
||||
|
||||
jr ra
|
||||
nop
|
||||
|
||||
END(premultiply_argb_inplace_mips_asm)
|
||||
|
@ -1790,7 +1790,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
|
||||
return buffer;
|
||||
}
|
||||
|
||||
static const SourceFetchProc sourceFetch[NBlendTypes][QImage::NImageFormats] = {
|
||||
static SourceFetchProc sourceFetch[NBlendTypes][QImage::NImageFormats] = {
|
||||
// Untransformed
|
||||
{
|
||||
0, // Invalid
|
||||
@ -6402,6 +6402,21 @@ void qInitDrawhelperAsm()
|
||||
|
||||
destStoreProc[QImage::Format_ARGB32] = qt_destStoreARGB32_mips_dsp;
|
||||
|
||||
sourceFetch[BlendUntransformed][QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
|
||||
sourceFetch[BlendTiled][QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
|
||||
|
||||
sourceFetch[BlendUntransformed][QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
|
||||
sourceFetch[BlendTiled][QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
|
||||
|
||||
sourceFetch[BlendUntransformed][QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
|
||||
sourceFetch[BlendTiled][QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
|
||||
|
||||
#if defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
|
||||
qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dspr2;
|
||||
#else
|
||||
qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dsp;
|
||||
#endif // QT_COMPILER_SUPPORTS_MIPS_DSPR2
|
||||
|
||||
#endif // QT_COMPILER_SUPPORTS_MIPS_DSP
|
||||
if (functionForModeSolidAsm) {
|
||||
const int destinationMode = QPainter::CompositionMode_Destination;
|
||||
|
@ -1,6 +1,6 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
|
||||
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
|
||||
** Contact: http://www.qt-project.org/legal
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
@ -110,6 +110,78 @@ void qt_blend_rgb32_on_rgb32_mips_dsp(uchar *destPixels, int dbpl,
|
||||
}
|
||||
}
|
||||
|
||||
#if defined QT_COMPILER_SUPPORTS_MIPS_DSPR2
|
||||
void qt_blend_rgb16_on_rgb16_mips_dspr2(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
if (const_alpha == 256) {
|
||||
if (w < 256) {
|
||||
const quint16 *src = (const quint16*) srcPixels;
|
||||
quint16 *dst = (quint16*) destPixels;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm(dst, src, w);
|
||||
dst = (quint16*) (((uchar*) dst) + dbpl);
|
||||
src = (quint16*) (((uchar*) src) + sbpl);
|
||||
}
|
||||
}
|
||||
else {
|
||||
int length = w << 1;
|
||||
while (h--) {
|
||||
memcpy(destPixels, srcPixels, length);
|
||||
destPixels += dbpl;
|
||||
srcPixels += sbpl;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (const_alpha != 0) {
|
||||
const quint16 *src = (const quint16*) srcPixels;
|
||||
quint16 *dst = (quint16*) destPixels;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
qt_blend_rgb16_on_rgb16_mips_dspr2_asm(dst, src, w, const_alpha);
|
||||
dst = (quint16*) (((uchar*) dst) + dbpl);
|
||||
src = (quint16*) (((uchar*) src) + sbpl);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
void qt_blend_rgb16_on_rgb16_mips_dsp(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha)
|
||||
{
|
||||
if (const_alpha == 256) {
|
||||
if (w < 256) {
|
||||
const quint16 *src = (const quint16*) srcPixels;
|
||||
quint16 *dst = (quint16*) destPixels;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm(dst, src, w);
|
||||
dst = (quint16*) (((uchar*) dst) + dbpl);
|
||||
src = (quint16*) (((uchar*) src) + sbpl);
|
||||
}
|
||||
}
|
||||
else {
|
||||
int length = w << 1;
|
||||
while (h--) {
|
||||
memcpy(destPixels, srcPixels, length);
|
||||
destPixels += dbpl;
|
||||
srcPixels += sbpl;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (const_alpha != 0) {
|
||||
const quint16 *src = (const quint16*) srcPixels;
|
||||
quint16 *dst = (quint16*) destPixels;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
qt_blend_rgb16_on_rgb16_mips_dsp_asm(dst, src, w, const_alpha);
|
||||
dst = (quint16*) (((uchar*) dst) + dbpl);
|
||||
src = (quint16*) (((uchar*) src) + sbpl);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void comp_func_Source_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
|
||||
{
|
||||
if (const_alpha == 255) {
|
||||
@ -422,5 +494,28 @@ void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int l
|
||||
comp_func_SourceOut_dsp_asm_x2(dest, src, length, const_alpha);
|
||||
}
|
||||
|
||||
const uint * QT_FASTCALL qt_fetchUntransformed_888_mips_dsp (uint *buffer, const Operator *, const QSpanData *data,
|
||||
int y, int x, int length)
|
||||
{
|
||||
uchar *line = (uchar *)data->texture.scanLine(y) + x;
|
||||
fetchUntransformed_888_asm_mips_dsp(buffer, line, length);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
const uint * QT_FASTCALL qt_fetchUntransformed_444_mips_dsp (uint *buffer, const Operator *, const QSpanData *data,
|
||||
int y, int x, int length)
|
||||
{
|
||||
uchar *line = (uchar *)data->texture.scanLine(y) + x;
|
||||
fetchUntransformed_444_asm_mips_dsp(buffer, line, length);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
const uint * QT_FASTCALL qt_fetchUntransformed_argb8565_premultiplied_mips_dsp (uint *buffer, const Operator *, const QSpanData *data,
|
||||
int y, int x, int length)
|
||||
{
|
||||
uchar *line = (uchar *)data->texture.scanLine(y) + x;
|
||||
fetchUntransformed_argb8565_premultiplied_asm_mips_dsp(buffer, line, length);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
@ -1,6 +1,6 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
|
||||
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
|
||||
** Contact: http://www.qt-project.org/legal
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
@ -1601,3 +1601,479 @@ LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm)
|
||||
nop
|
||||
|
||||
END(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm)
|
||||
|
||||
|
||||
#if defined(__MIPSEL) && __MIPSEL
|
||||
# define PACK(r, s, t) packrl.ph r, s, t
|
||||
# define SWHI(r, o, b) swl r, o + 1 (b)
|
||||
# define SWLO(r, o, b) swr r, o + 0 (b)
|
||||
# define LDHI(r, o, b) lwl r, o + 1 (b)
|
||||
# define LDLO(r, o, b) lwr r, o + 2 (b)
|
||||
#else
|
||||
# define PACK(r, s, t) packrl.ph r, t, s
|
||||
# define SWHI(r, o, b) swr r, o + 1 (b)
|
||||
# define SWLO(r, o, b) swl r, o + 0 (b)
|
||||
# define LDHI(r, o, b) lwr r, o + 1 (b)
|
||||
# define LDLO(r, o, b) lwl r, o + 2 (b)
|
||||
#endif
|
||||
|
||||
LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm)
|
||||
/*
|
||||
* a0 - dst (*r5g6b5)
|
||||
* a1 - src (const *r5g6b5)
|
||||
* a2 - len (unsigned int)
|
||||
*
|
||||
* Register usage:
|
||||
* t0-3 - Scratch registers
|
||||
* t4 - Number of iterations to do in unrolled loops
|
||||
* t5-7 - Auxiliary scratch registers.
|
||||
*
|
||||
* Check if base addresses of src/dst are aligned, cases:
|
||||
* a) Both aligned.
|
||||
* b) Both unaligned:
|
||||
* 1. Copy a halfword
|
||||
* 2. Use aligned case.
|
||||
* c) dst aligned, src unaligned:
|
||||
* 1. Read a word from dst, halfword from src.
|
||||
* 2. Continue reading words from both.
|
||||
* d) dst unaligned, src aligned:
|
||||
* 1. Read a word from src, halfword from dst.
|
||||
* 2. Continue reading words from both.
|
||||
*/
|
||||
|
||||
beqz a2, 0f /* if (a2:len == 0): return */
|
||||
andi t0, a0, 0x3 /* t0 = a0:dst % 4 */
|
||||
andi t1, a1, 0x3 /* t1 = a1:dst % 4 */
|
||||
or t2, t0, t1 /* t1 = t0 | t1 */
|
||||
|
||||
beqz t2, 4f /* both aligned */
|
||||
nop
|
||||
beqz t0, 3f /* dst aligned, src unaligned */
|
||||
nop
|
||||
beqz t1, 2f /* src aligned, dst unaligned */
|
||||
nop
|
||||
|
||||
/*
|
||||
* Both src/dst are unaligned: read 1 halfword from each,
|
||||
* the fall-off to continue with word-aligned copy.
|
||||
*/
|
||||
lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */
|
||||
addiu a1, a1, 2 /* src++ */
|
||||
addiu a2, a2,-1 /* len-- */
|
||||
sh t0, 0 (a0) /* t1 -> ((uint16_t*) dst)[0] */
|
||||
addiu a0, a0, 2 /* dst++ */
|
||||
|
||||
/*
|
||||
* Both src/dst pointers are word-aligned, process eight
|
||||
* items at a time in an unrolled loop.
|
||||
*/
|
||||
4: beqz a2, 0f /* if (len == 0): return */
|
||||
srl t4, a2, 3 /* t4 = len / 8 */
|
||||
|
||||
beqz t4, 5f /* if (t4 == 0): tail */
|
||||
andi a2, a2, 0x07 /* len = len % 8 */
|
||||
|
||||
1: lw t0, 0 (a1)
|
||||
lw t1, 4 (a1)
|
||||
lw t2, 8 (a1)
|
||||
lw t3, 12 (a1)
|
||||
|
||||
addiu t4, t4, -1 /* t4-- */
|
||||
addiu a1, a1, 16 /* src += 8 */
|
||||
|
||||
sw t0, 0 (a0)
|
||||
sw t1, 4 (a0)
|
||||
sw t2, 8 (a0)
|
||||
sw t3, 12 (a0)
|
||||
|
||||
bnez t4, 1b
|
||||
addiu a0, a0, 16 /* dst += 8 */
|
||||
|
||||
b 5f
|
||||
nop
|
||||
|
||||
|
||||
/*
|
||||
* dst pointer is unaligned
|
||||
*/
|
||||
2: beqz a2, 0f /* if (len == 0): return */
|
||||
srl t4, a2, 3 /* t4 = len / 8 */
|
||||
beqz t4, 5f /* if (t4 == 0): tail */
|
||||
andi a2, a2, 0x07 /* len = len % 8 */
|
||||
|
||||
1: lw t0, 0 (a1)
|
||||
lw t1, 4 (a1)
|
||||
lw t2, 8 (a1)
|
||||
lw t3, 12 (a1)
|
||||
|
||||
addiu t4, t4, -1 /* t4-- */
|
||||
addiu a1, a1, 16 /* src += 8 */
|
||||
|
||||
SWLO (t0, 0, a0)
|
||||
PACK (t5, t1, t0)
|
||||
PACK (t6, t2, t1)
|
||||
PACK (t7, t3, t2)
|
||||
SWHI (t3, 14, a0)
|
||||
sw t5, 2 (a0)
|
||||
sw t6, 6 (a0)
|
||||
sw t7, 10 (a0)
|
||||
|
||||
bnez t4, 1b
|
||||
addiu a0, a0, 16 /* dst += 8 */
|
||||
|
||||
b 5f
|
||||
nop
|
||||
|
||||
/*
|
||||
* src pointer is unaligned
|
||||
*/
|
||||
3: beqz a2, 0f /* if (len == 0): return */
|
||||
srl t4, a2, 3 /* t4 = len / 8 */
|
||||
beqz t4, 5f /* if (t4 == 0): tail */
|
||||
andi a2, a2, 0x07 /* len = len % 8 */
|
||||
|
||||
1: LDHI (t0, 0, a1)
|
||||
lw t1, 2 (a1)
|
||||
lw t2, 6 (a1)
|
||||
lw t3, 10 (a1)
|
||||
LDLO (t5, 12, a1)
|
||||
|
||||
addiu t4, t4, -1 /* t4-- */
|
||||
addiu a1, a1, 16 /* src += 8 */
|
||||
|
||||
PACK (t0, t1, t0)
|
||||
PACK (t6, t2, t1)
|
||||
PACK (t7, t3, t2)
|
||||
sw t0, 0 (a0)
|
||||
PACK (t0, t5, t3)
|
||||
sw t6, 4 (a0)
|
||||
sw t7, 8 (a0)
|
||||
sw t0, 12 (a0)
|
||||
|
||||
bnez t4, 1b
|
||||
addiu a0, a0, 16 /* dst += 8 */
|
||||
|
||||
|
||||
5: /* Process remaining items (a2:len < 4), one at a time */
|
||||
beqz a2, 0f
|
||||
nop
|
||||
|
||||
1: lhu t0, 0 (a1) /* t0 <- ((uint16_t*) src)[0] */
|
||||
addiu a2, a2,-1 /* len-- */
|
||||
addiu a1, a1, 2 /* src++ */
|
||||
sh t0, 0 (a0) /* to -> ((uint16_t*) dst)[0] */
|
||||
bnez a2, 1b /* if (len != 0): loop */
|
||||
addiu a0, a0, 2 /* dst++ */
|
||||
|
||||
0: jr ra
|
||||
nop
|
||||
|
||||
END(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm)
|
||||
|
||||
|
||||
#undef LDHI
|
||||
#undef LDLO
|
||||
#undef PACK
|
||||
#undef SWHI
|
||||
#undef SWLO
|
||||
|
||||
|
||||
LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_mips_dsp_asm)
|
||||
/*
|
||||
* a0 - dst (*r5g6b5)
|
||||
* a1 - src (const *r5g6b5)
|
||||
* a2 - len (unsigned int) - batch length
|
||||
* a3 - alpha (int)
|
||||
*/
|
||||
|
||||
beqz a2, 2f
|
||||
li t9, 255
|
||||
sll t8, a3, 8
|
||||
subu a3, t8, a3
|
||||
srl a3, a3, 8
|
||||
subu t9, t9, a3
|
||||
addiu a3, a3, 1
|
||||
srl t4, a3, 2
|
||||
addiu t9, t9, 1
|
||||
srl t5, t9, 2
|
||||
1:
|
||||
lhu t0, 0(a1)
|
||||
lhu t1, 0(a0)
|
||||
addiu a2, a2, -1
|
||||
andi t2, t0, 0x07e0
|
||||
andi t0, t0, 0xf81f
|
||||
mul t2, t2, a3
|
||||
mul t0, t0, t4
|
||||
andi t3, t1, 0x07e0
|
||||
andi t1, t1, 0xf81f
|
||||
mul t3, t3, t9
|
||||
mul t1, t1, t5
|
||||
addiu a1, a1, 2
|
||||
srl t2, t2, 8
|
||||
srl t0, t0, 6
|
||||
andi t2, t2, 0x07e0
|
||||
andi t0, t0, 0xf81f
|
||||
or t0, t0, t2
|
||||
srl t3, t3, 8
|
||||
srl t1, t1, 6
|
||||
andi t3, t3, 0x07e0
|
||||
andi t1, t1, 0xf81f
|
||||
or t1, t1, t3
|
||||
addu t0, t0, t1
|
||||
sh t0, 0(a0)
|
||||
bgtz a2, 1b
|
||||
addiu a0, a0, 2
|
||||
2:
|
||||
jr ra
|
||||
nop
|
||||
|
||||
END(qt_blend_rgb16_on_rgb16_mips_dsp_asm)
|
||||
|
||||
|
||||
LEAF_MIPS_DSP(fetchUntransformed_888_asm_mips_dsp)
|
||||
/*
|
||||
* a0 - dst address (address of 32-bit aRGB value)
|
||||
* a1 - src address
|
||||
* a2 - length
|
||||
*/
|
||||
|
||||
beqz a2, 4f
|
||||
lui t8, 0xff00
|
||||
andi t0, a2, 0x1
|
||||
beqz t0, 1f
|
||||
nop
|
||||
/* case for one pixel */
|
||||
lbu t1, 0(a1)
|
||||
lbu v1, 2(a1)
|
||||
lbu t0, 1(a1)
|
||||
addiu a1, a1, 3
|
||||
addiu a2, a2, -1
|
||||
sll t1, t1, 0x10
|
||||
or v1, v1, t8
|
||||
sll t0, t0, 0x8
|
||||
or v1, v1, t1
|
||||
or v1, v1, t0
|
||||
sw v1, 0(a0)
|
||||
addiu a0, a0, 4
|
||||
|
||||
beqz a2, 4f /* only one pixel is present (length = 1) */
|
||||
nop
|
||||
1:
|
||||
andi t0, a1, 0x1
|
||||
beqz t0, 3f
|
||||
nop
|
||||
2:
|
||||
lbu t0, 0(a1) /* t0 = | 0 | 0 | 0 | R1 | */
|
||||
lhu t1, 1(a1) /* t1 = | 0 | 0 | B1 | G1 | */
|
||||
addiu a1, a1, 3
|
||||
lhu t2, 0(a1) /* t2 = | 0 | 0 | G2 | R2 | */
|
||||
lbu t3, 2(a1) /* t3 = | 0 | 0 | 0 | B2 | */
|
||||
|
||||
sll t0, t0, 16
|
||||
or t0, t0, t8 /* t0 = | ff | R1 | 0 | 0 | */
|
||||
shll.ph t4, t1, 8 /* t4 = | 0 | 0 | G1 | 0 | */
|
||||
srl t5, t1, 8
|
||||
or t4, t4, t5 /* t4 = | 0 | 0 | G1 | B1 | */
|
||||
or t0, t0, t4 /* t0 = | ff | R1 | G1 | B1 | */
|
||||
|
||||
shll.ph t4, t2, 8 /* t4 = | 0 | 0 | R2 | 0 | */
|
||||
srl t5, t2, 8 /* t5 = | 0 | 0 | 0 | G2 | */
|
||||
or t4, t4, t5
|
||||
sll t4, t4, 8 /* t4 = | 0 | R2 | G2 | 0 | */
|
||||
or t5, t3, t8
|
||||
or t2, t4, t5 /* t2 = | ff | R2 | G2 | B2 | */
|
||||
|
||||
sw t0, 0(a0)
|
||||
addiu a1, a1, 3
|
||||
sw t2, 4(a0)
|
||||
addiu a2, a2, -2
|
||||
bnez a2, 2b
|
||||
addiu a0, a0, 8
|
||||
b 4f
|
||||
nop
|
||||
3:
|
||||
lhu t0, 0(a1) /* t0 = | 0 | 0 | G1 | R1 | */
|
||||
lbu t1, 2(a1) /* t1 = | 0 | 0 | 0 | B1 | */
|
||||
addiu a1, a1, 3
|
||||
lbu t2, 0(a1) /* t2 = | 0 | 0 | 0 | R2 | */
|
||||
lhu t3, 1(a1) /* t3 = | 0 | 0 | B2 | G2 | */
|
||||
|
||||
srl t4, t0, 8 /* t4 = | 0 | 0 | 0 | G1 | */
|
||||
shll.ph t5, t0, 8 /* t5 = | 0 | 0 | R1 | 0 | */
|
||||
or t0, t4, t5
|
||||
sll t6, t0, 8 /* t6 = | 0 | R1 | G1 | 0 | */
|
||||
or t4, t1, t8 /* t4 = | ff | 0 | 0 | B1 | */
|
||||
or t0, t6, t4
|
||||
|
||||
sll t2, t2, 16
|
||||
srl t4, t3, 8
|
||||
shll.ph t5, t3, 8
|
||||
or t3, t4, t5
|
||||
or t2, t2, t3
|
||||
or t2, t2, t8
|
||||
|
||||
sw t0, 0(a0)
|
||||
addiu a1, a1, 3
|
||||
sw t2, 4(a0)
|
||||
addiu a2, a2, -2
|
||||
bnez a2, 3b
|
||||
addiu a0, a0, 8
|
||||
4:
|
||||
jr ra
|
||||
nop
|
||||
|
||||
END(fetchUntransformed_888_asm_mips_dsp)
|
||||
|
||||
|
||||
LEAF_MIPS_DSP(fetchUntransformed_444_asm_mips_dsp)
|
||||
/*
|
||||
* a0 - dst address (address of 32-bit aRGB value)
|
||||
* a1 - src address
|
||||
* a2 - length
|
||||
*/
|
||||
|
||||
lui t8, 0xff00
|
||||
li t4, 0x1
|
||||
|
||||
beqz a2, 5f
|
||||
move v0, a0 /* just return the address of buffer
|
||||
* for storing returning values */
|
||||
andi t0, a2, 0x1
|
||||
beqz t0, 2f /* there is more then one pixel
|
||||
* (check src memory alignment (word)) */
|
||||
nop
|
||||
1:
|
||||
lhu v0, 0(a1)
|
||||
addiu a1, a1, 2
|
||||
addiu a2, a2, -1
|
||||
andi t0, v0, 0xf00
|
||||
andi v1, v0, 0xf
|
||||
andi v0, v0, 0xf0
|
||||
sra t3, t0, 0x4
|
||||
sra t1, v0, 0x4
|
||||
sra t0, t0, 0x8
|
||||
sll t2, v1, 0x4
|
||||
or t0, t0, t3
|
||||
or v0, t1, v0
|
||||
lui t1, 0xff00
|
||||
or v1, t2, v1
|
||||
sll t0, t0, 0x10
|
||||
or v1, v1, t1
|
||||
sll v0, v0, 0x8
|
||||
or v1, v1, t0
|
||||
or v0, v1, v0
|
||||
sw v0, 0(a0)
|
||||
addiu a0, a0, 4
|
||||
beqz a2, 5f /* no more pixels for processing */
|
||||
nop
|
||||
beq a2, t4, 4f /* only one more pixel remained */
|
||||
nop
|
||||
/* check if src memory address is word aligned */
|
||||
2:
|
||||
andi t0, a1, 0x3
|
||||
beqz t0, 3f /* memory is word aligned */
|
||||
andi a3, a2, 0x1 /* set the a3 register as the comparation
|
||||
* for ending the unrolled loop
|
||||
* (1 if odd, 0 if even) */
|
||||
b 1b /* not word aligned,
|
||||
* go another turn with
|
||||
* just one pixel processing */
|
||||
nop
|
||||
3:
|
||||
lw t0, 0(a1)
|
||||
addiu a2, a2, -2
|
||||
preceu.ph.qbr t1, t0 /* t1 = | 0 | aR1 | 0 | G1B1 | */
|
||||
preceu.ph.qbl t2, t0 /* t1 = | 0 | aR2 | 0 | G2B2 | */
|
||||
shll.qb t3, t1, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */
|
||||
srl t4, t3, 4
|
||||
or t0, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */
|
||||
andi t3, t1, 0xf0
|
||||
sll t3, t3, 8
|
||||
srl t4, t3, 4
|
||||
or t1, t3, t4
|
||||
or t0, t0, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */
|
||||
or t0, t0, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */
|
||||
|
||||
shll.qb t3, t2, 4 /* t3 = | 0 | R1 0 | 0 | B1 0 | */
|
||||
srl t4, t3, 4
|
||||
or t7, t3, t4 /* t0 = | 0 | R1R1 | 0 | B1B1 | */
|
||||
andi t3, t2, 0xf0
|
||||
sll t3, t3, 8
|
||||
srl t4, t3, 4
|
||||
or t1, t3, t4
|
||||
or t2, t7, t1 /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */
|
||||
or t2, t2, t8 /* t0 = | ff | R1R1 | G1G1 | B1B1 | */
|
||||
|
||||
sw t0, 0(a0)
|
||||
addiu a1, a1, 4
|
||||
sw t2, 4(a0)
|
||||
bne a2, a3, 3b
|
||||
addiu a0, a0, 8
|
||||
beqz a2, 5f /* no more pixels for processing */
|
||||
nop
|
||||
4:
|
||||
/* one more pixel remained (after loop unrolling process finished) */
|
||||
lhu v0, 0(a1)
|
||||
addiu a1, a1, 2
|
||||
addiu a2, a2, -1
|
||||
andi t0, v0, 0xf00
|
||||
andi v1, v0, 0xf
|
||||
andi v0, v0, 0xf0
|
||||
sra t3, t0, 0x4
|
||||
sra t1, v0, 0x4
|
||||
sra t0, t0, 0x8
|
||||
sll t2, v1, 0x4
|
||||
or t0, t0, t3
|
||||
or v0, t1, v0
|
||||
lui t1, 0xff00
|
||||
or v1, t2, v1
|
||||
sll t0, t0, 0x10
|
||||
or v1, v1, t1
|
||||
sll v0, v0, 0x8
|
||||
or v1, v1, t0
|
||||
or v0, v1, v0
|
||||
sw v0, 0(a0)
|
||||
addiu a0, a0, 4
|
||||
5:
|
||||
jr ra
|
||||
nop
|
||||
|
||||
END(fetchUntransformed_444_asm_mips_dsp)
|
||||
|
||||
|
||||
LEAF_MIPS_DSP(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp)
|
||||
/*
|
||||
* a0 - dst address
|
||||
* a1 - src address
|
||||
* a2 - length
|
||||
*/
|
||||
|
||||
beqz a2, 2f
|
||||
nop
|
||||
|
||||
1:
|
||||
ulh t1, 0(a1)
|
||||
lbu t2, 2(a1)
|
||||
addiu a2, a2, -1
|
||||
wsbh t1, t1
|
||||
sll t0, t1, 8 /* t0 = 00000000rrrrrggggggbbbbb00000000 */
|
||||
ins t0, t1, 3, 16 /* t0 = 00000000rrrrrrrrrrggggggbbbbb000 */
|
||||
ins t0, t1, 5, 11 /* t0 = 00000000rrrrrrrrggggggbbbbbbb000 */
|
||||
srl t4, t1, 9 /* t4 = 0000000000000000000000000rrrrrgg */
|
||||
replv.qb t3, t2
|
||||
ins t0, t4, 8, 2 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */
|
||||
ins t0, t1, 3, 5 /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */
|
||||
srl t4, t1, 2 /* t4 = 000000000000000000rrrrrggggggbbb */
|
||||
ins t0, t4, 0, 3 /* t0 = 00000000rrrrrrrrggggggggbbbbbbbb */
|
||||
ins t0, t2, 24, 8 /* t0 =aaaaaaaarrrrrrrrggggggggbbbbbbbb */
|
||||
cmpu.lt.qb t3, t0
|
||||
pick.qb t0, t3, t0
|
||||
addiu a1, a1, 3
|
||||
sw t0, 0(a0)
|
||||
bgtz a2, 1b
|
||||
addiu a0, a0, 4
|
||||
2:
|
||||
jr ra
|
||||
nop
|
||||
|
||||
END(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
|
||||
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
|
||||
** Contact: http://www.qt-project.org/legal
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
@ -101,19 +101,34 @@ extern "C" void qt_blend_argb32_on_argb32_mips_dsp_asm_x2(uint *dest, const uint
|
||||
|
||||
extern "C" void qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm(uint *dest, const uint *src, int length);
|
||||
|
||||
extern "C" void qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm(quint16 *dest, const quint16 *src, int length);
|
||||
|
||||
extern "C" void qt_blend_rgb16_on_rgb16_mips_dsp_asm(quint16 *dest, const quint16 *src, int length, uint const_alpha);
|
||||
|
||||
extern "C" uint * destfetchARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
|
||||
|
||||
extern "C" uint * qt_destStoreARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);
|
||||
|
||||
extern "C" uint * fetchUntransformed_888_asm_mips_dsp(uint *buffer, const uchar *line, int length);
|
||||
|
||||
extern "C" uint * fetchUntransformed_444_asm_mips_dsp(uint *buffer, const uchar *line, int length);
|
||||
|
||||
extern "C" uint * fetchUntransformed_argb8565_premultiplied_asm_mips_dsp(uint *buffer, const uchar *line, int length);
|
||||
|
||||
void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
void qt_blend_rgb32_on_rgb32_mips_dsp(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
void qt_blend_rgb16_on_rgb16_mips_dsp(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
void comp_func_Source_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
|
||||
|
||||
@ -164,6 +179,21 @@ void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int l
|
||||
|
||||
void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);
|
||||
|
||||
const uint * QT_FASTCALL qt_fetchUntransformed_888_mips_dsp (uint *buffer,
|
||||
const Operator *,
|
||||
const QSpanData *data,
|
||||
int y, int x, int length);
|
||||
|
||||
const uint * QT_FASTCALL qt_fetchUntransformed_444_mips_dsp (uint *buffer,
|
||||
const Operator *,
|
||||
const QSpanData *data,
|
||||
int y, int x, int length);
|
||||
|
||||
const uint * QT_FASTCALL qt_fetchUntransformed_argb8565_premultiplied_mips_dsp (uint *buffer,
|
||||
const Operator *,
|
||||
const QSpanData *data,
|
||||
int y, int x, int length);
|
||||
|
||||
#endif // QT_COMPILER_SUPPORTS_MIPS_DSP
|
||||
|
||||
|
||||
@ -171,6 +201,13 @@ void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length,
|
||||
|
||||
extern "C" void qConvertRgb16To32_asm_mips_dspr2(quint32 *dest, const quint16 *src, int length);
|
||||
|
||||
extern "C" void qt_blend_rgb16_on_rgb16_mips_dspr2_asm(quint16 *dest, const quint16 *src, int length, uint const_alpha);
|
||||
|
||||
void qt_blend_rgb16_on_rgb16_mips_dspr2(uchar *destPixels, int dbpl,
|
||||
const uchar *srcPixels, int sbpl,
|
||||
int w, int h,
|
||||
int const_alpha);
|
||||
|
||||
#endif // QT_COMPILER_SUPPORTS_MIPS_DSPR2
|
||||
|
||||
QT_END_NAMESPACE
|
||||
|
@ -1,6 +1,6 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
|
||||
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
|
||||
** Contact: http://www.qt-project.org/legal
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
@ -122,3 +122,551 @@ LEAF_MIPS_DSPR2(qConvertRgb16To32_asm_mips_dspr2)
|
||||
nop
|
||||
|
||||
END(qConvertRgb16To32_asm_mips_dspr2)
|
||||
|
||||
|
||||
#if defined(__MIPSEL) && __MIPSEL
|
||||
# define PACK(r, s, t) packrl.ph r, s, t
|
||||
# define LDHI(r, o, b) lwl r, o + 1 (b)
|
||||
# define LDLO(r, o, b) lwr r, o + 2 (b)
|
||||
#else
|
||||
# define PACK(r, s, t) packrl.ph r, t, s
|
||||
# define LDHI(r, o, b) lwr r, o + 1 (b)
|
||||
# define LDLO(r, o, b) lwl r, o + 2 (b)
|
||||
#endif
|
||||
|
||||
|
||||
LEAF_MIPS_DSPR2(qt_blend_rgb16_on_rgb16_mips_dspr2_asm)
|
||||
/*
|
||||
+ * a0 - dst (*r5g6b5)
|
||||
* a1 - src (const *r5g6b5)
|
||||
* a2 - len (unsigned int) - batch length
|
||||
* a3 - alpha (int)
|
||||
*
|
||||
* Register usage:
|
||||
* t0-3 - Scratch registers
|
||||
* t4 - Number of iterations to do in unrolled loops
|
||||
* t5 - Inverse alpha
|
||||
* t6 - Alpha >> 2
|
||||
* t7 - Inverse alpha >> 2
|
||||
* t8 - magic1 (0x07e007e0)
|
||||
* t9 - magic2 (0xf81ff81f)
|
||||
*
|
||||
* NOTE:
|
||||
* Cannot use DSP instructions for the multiplication of two
|
||||
* 16-bit values: overflow would be always rounded or saturated.
|
||||
*/
|
||||
|
||||
beqz a2, 0f
|
||||
andi t0, a0, 0x3
|
||||
andi t1, a1, 0x3
|
||||
/* Adjust alpha value, and calculate inverse alpha value */
|
||||
li t5, 255
|
||||
or t2, t0, t1 /* t0 = (dst & 0x3) | (src & 0x3) */
|
||||
sll t8, a3, 8
|
||||
subu a3, t8, a3
|
||||
li t8, 0x07e007e0 /* magic1 */
|
||||
srl a3, a3, 8 /* alpha >>= 8 */
|
||||
li t9, 0xf81ff81f /* magic2 */
|
||||
subu t5, t5, a3 /* ialpha = 255 - alpha */
|
||||
addiu a3, a3, 1 /* alpha++ */
|
||||
addiu t5, t5, 1 /* ialpha++ */
|
||||
srl t6, a3, 2 /* ashift = alpha >> 2 */
|
||||
|
||||
beqz t2, 4f /* both aligned */
|
||||
srl t7, t5, 2 /* iashift = ialpha >> 2 */
|
||||
|
||||
beqz t1, 2f /* src aligned, dst unaligned */
|
||||
nop
|
||||
|
||||
beqz t0, 3f /* dst aligned, src unaligned */
|
||||
nop
|
||||
|
||||
/*
|
||||
* Both src/dst are unaligned: read 1 halfword from each, then
|
||||
* fall-off to continue with word-aligned operation.
|
||||
*/
|
||||
lhu t1, 0 (a1)
|
||||
lhu t0, 0 (a0)
|
||||
addiu a2, a2, -1 /* len-- */
|
||||
andi t2, t1, 0x07e0
|
||||
andi t1, t1, 0xf81f
|
||||
mul t2, t2, a3
|
||||
mul t1, t1, t6
|
||||
andi t3, t0, 0x07e0
|
||||
andi t0, t0, 0xf81f
|
||||
mul t3, t3, t5
|
||||
mul t0, t0, t7
|
||||
addiu a1, a1, 2 /* src++ */
|
||||
srl t2, t2, 8
|
||||
srl t1, t1, 6
|
||||
andi t2, t2, 0x07e0
|
||||
andi t1, t1, 0xf81f
|
||||
or t1, t1, t2
|
||||
srl t3, t3, 8
|
||||
srl t0, t0, 6
|
||||
andi t3, t3, 0x07e0
|
||||
andi t0, t0, 0xf81f
|
||||
or t0, t0, t3
|
||||
addu t0, t0, t1 /* src * alpha + dst * ialpha */
|
||||
sh t0, 0 (a0)
|
||||
addiu a0, a0, 2 /* dst++ */
|
||||
|
||||
/*
|
||||
* Both src/dst pointers are word-aligned, process eight
|
||||
* items at a time in an unrolled loop.
|
||||
*/
|
||||
4: beqz a2, 0f
|
||||
srl t4, a2, 3 /* t4 = len / 8 */
|
||||
beqz t4, 5f
|
||||
andi a2, a2, 0x7 /* len = len % 8 */
|
||||
SAVE_REGS_ON_STACK 12, s0, s1, s2, s3, s4, v0, v1
|
||||
|
||||
1: lw t1, 0 (a1) /* [s0, s1] */
|
||||
lw v1, 4 (a1) /* [s2, s3] */
|
||||
lw s1, 8 (a1) /* [s4, s5] */
|
||||
lw s3, 12 (a1) /* [s6, s7] */
|
||||
|
||||
lw t0, 0 (a0) /* [d0, d1] */
|
||||
lw v0, 4 (a0) /* [d2, d3] */
|
||||
lw s0, 8 (a0) /* [d4, d5] */
|
||||
lw s2, 12 (a0) /* [d6, d7] */
|
||||
|
||||
pref 4, 16 (a1)
|
||||
pref 5, 16 (a0)
|
||||
|
||||
and t2, t1, t8
|
||||
ext t3, t2, 0, 16
|
||||
srl t2, t2, 16
|
||||
mul t3, t3, a3
|
||||
mul t2, t2, a3
|
||||
and t1, t1, t9
|
||||
ext s4, t1, 0, 16
|
||||
mul s4, s4, t6
|
||||
srl t1, t1, 16
|
||||
mul t1, t1, t6
|
||||
srl t3, t3, 8
|
||||
srl t2, t2, 8
|
||||
append t2, t3, 16
|
||||
and t2, t2, t8
|
||||
srl s4, s4, 6
|
||||
and t3, v1, t8
|
||||
srl t1, t1, 6
|
||||
append t1, s4, 16
|
||||
and t1, t1, t9
|
||||
or t1, t1, t2
|
||||
ext t2, t3, 0, 16
|
||||
srl t3, t3, 16
|
||||
mul t2, t2, a3
|
||||
mul t3, t3, a3
|
||||
and v1, v1, t9
|
||||
ext s4, v1, 0, 16
|
||||
mul s4, s4, t6
|
||||
srl v1, v1, 16
|
||||
mul v1, v1, t6
|
||||
srl t2, t2, 8
|
||||
srl t3, t3, 8
|
||||
append t3, t2, 16
|
||||
and t3, t3, t8
|
||||
srl s4, s4, 6
|
||||
and t2, s1, t8
|
||||
srl v1, v1, 6
|
||||
append v1, s4, 16
|
||||
and v1, v1, t9
|
||||
or v1, v1, t3
|
||||
ext t3, t2, 0, 16
|
||||
srl t2, t2, 16
|
||||
mul t3, t3, a3
|
||||
mul t2, t2, a3
|
||||
and s1, s1, t9
|
||||
ext s4, s1, 0, 16
|
||||
mul s4, s4, t6
|
||||
srl s1, s1, 16
|
||||
mul s1, s1, t6
|
||||
srl t3, t3, 8
|
||||
srl t2, t2, 8
|
||||
append t2, t3, 16
|
||||
and t2, t2, t8
|
||||
srl s4, s4, 6
|
||||
and t3, s3, t8
|
||||
srl s1, s1, 6
|
||||
append s1, s4, 16
|
||||
and s1, s1, t9
|
||||
or s1, s1, t2
|
||||
ext t2, t3, 0, 16
|
||||
srl t3, t3, 16
|
||||
mul t2, t2, a3
|
||||
mul t3, t3, a3
|
||||
and s3, s3, t9
|
||||
ext s4, s3, 0, 16
|
||||
mul s4, s4, t6
|
||||
srl s3, s3, 16
|
||||
mul s3, s3, t6
|
||||
srl t2, t2, 8
|
||||
srl t3, t3, 8
|
||||
append t3, t2, 16
|
||||
and t3, t3, t8
|
||||
srl s4, s4, 6
|
||||
and t2, t0, t8
|
||||
srl s3, s3, 6
|
||||
append s3, s4, 16
|
||||
and s3, s3, t9
|
||||
or s3, s3, t3
|
||||
ext t3, t2, 0, 16
|
||||
srl t2, t2, 16
|
||||
mul t3, t3, t5
|
||||
mul t2, t2, t5
|
||||
and t0, t0, t9
|
||||
ext s4, t0, 0, 16
|
||||
mul s4, s4, t7
|
||||
srl t0, t0, 16
|
||||
mul t0, t0, t7
|
||||
srl t3, t3, 8
|
||||
srl t2, t2, 8
|
||||
append t2, t3, 16
|
||||
and t2, t2, t8
|
||||
srl s4, s4, 6
|
||||
and t3, v0, t8
|
||||
srl t0, t0, 6
|
||||
append t0, s4, 16
|
||||
and t0, t0, t9
|
||||
or t0, t0, t2
|
||||
ext t2, t3, 0, 16
|
||||
srl t3, t3, 16
|
||||
mul t2, t2, t5
|
||||
mul t3, t3, t5
|
||||
and v0, v0, t9
|
||||
ext s4, v0, 0, 16
|
||||
mul s4, s4, t7
|
||||
srl v0, v0, 16
|
||||
mul v0, v0, t7
|
||||
srl t2, t2, 8
|
||||
srl t3, t3, 8
|
||||
append t3, t2, 16
|
||||
and t3, t3, t8
|
||||
srl s4, s4, 6
|
||||
and t2, s0, t8
|
||||
srl v0, v0, 6
|
||||
append v0, s4, 16
|
||||
and v0, v0, t9
|
||||
or v0, v0, t3
|
||||
ext t3, t2, 0, 16
|
||||
srl t2, t2, 16
|
||||
mul t3, t3, t5
|
||||
mul t2, t2, t5
|
||||
and s0, s0, t9
|
||||
ext s4, s0, 0, 16
|
||||
mul s4, s4, t7
|
||||
srl s0, s0, 16
|
||||
mul s0, s0, t7
|
||||
srl t3, t3, 8
|
||||
srl t2, t2, 8
|
||||
append t2, t3, 16
|
||||
and t2, t2, t8
|
||||
srl s4, s4, 6
|
||||
and t3, s2, t8
|
||||
srl s0, s0, 6
|
||||
append s0, s4, 16
|
||||
and s0, s0, t9
|
||||
or s0, s0, t2
|
||||
ext t2, t3, 0, 16
|
||||
srl t3, t3, 16
|
||||
mul t2, t2, t5
|
||||
mul t3, t3, t5
|
||||
and s2, s2, t9
|
||||
ext s4, s2, 0, 16
|
||||
mul s4, s4, t7
|
||||
srl s2, s2, 16
|
||||
mul s2, s2, t7
|
||||
srl t2, t2, 8
|
||||
srl t3, t3, 8
|
||||
append t3, t2, 16
|
||||
and t3, t3, t8
|
||||
srl s4, s4, 6
|
||||
addu.ph t0, t0, t1
|
||||
srl s2, s2, 6
|
||||
append s2, s4, 16
|
||||
and s2, s2, t9
|
||||
or s2, s2, t3
|
||||
addu.ph v0, v0, v1 /* v0 = [S2 + D2, S3 + D3] */
|
||||
addu.ph s0, s0, s1 /* s0 = [S4 + D4, S5 + D5] */
|
||||
addu.ph s2, s2, s3 /* s2 = [S6 + D6, S7 + D7] */
|
||||
|
||||
sw t0, 0 (a0) /* [SS0, SS1] */
|
||||
sw v0, 4 (a0) /* [SS2, SS3] */
|
||||
sw s0, 8 (a0) /* [SS4, SS5] */
|
||||
sw s2, 12 (a0) /* [SS6, SS7] */
|
||||
|
||||
addiu t4, t4, -1 /* t4-- */
|
||||
addiu a1, a1, 16 /* src += 8 */
|
||||
|
||||
bnez t4, 1b
|
||||
addiu a0, a0, 16 /* dst += 8 */
|
||||
|
||||
RESTORE_REGS_FROM_STACK 12, s0, s1, s2, s3, s4, v0, v1
|
||||
b 5f
|
||||
nop
|
||||
|
||||
|
||||
/* dst unaligned: do one item and fall down to the src unaligned case */
|
||||
2: lhu t1, 0 (a1)
|
||||
lhu t0, 0 (a0)
|
||||
addiu a2, a2, -1 /* len-- */
|
||||
andi t2, t1, 0x07e0
|
||||
andi t1, t1, 0xf81f
|
||||
mul t2, t2, a3
|
||||
mul t1, t1, t6
|
||||
andi t3, t0, 0x07e0
|
||||
andi t0, t0, 0xf81f
|
||||
mul t3, t3, t5
|
||||
mul t0, t0, t7
|
||||
addiu a1, a1, 2 /* src++ */
|
||||
srl t2, t2, 8
|
||||
srl t1, t1, 6
|
||||
andi t2, t2, 0x07e0
|
||||
andi t1, t1, 0xf81f
|
||||
or t1, t1, t2
|
||||
srl t3, t3, 8
|
||||
srl t0, t0, 6
|
||||
andi t3, t3, 0x07e0
|
||||
andi t0, t0, 0xf81f
|
||||
or t0, t0, t3
|
||||
addu t0, t0, t1 /* src * alpha + dst * ialpha */
|
||||
sh t0, 0 (a0)
|
||||
addiu a0, a0, 2 /* dst++ */
|
||||
|
||||
/* src unaligned */
|
||||
3: beqz a2, 0f
|
||||
srl t4, a2, 3 /* t4 = len / 8 */
|
||||
beqz t4, 5f
|
||||
andi a2, a2, 0x7 /* len = len % 8 */
|
||||
SAVE_REGS_ON_STACK 12, s0, s1, s2, s3, s4, v0, v1
|
||||
|
||||
1: lw t0, 0 (a0) /* [d0, d1] */
|
||||
lw v0, 4 (a0) /* [d2, d3] */
|
||||
lw s0, 8 (a0) /* [d4, d5] */
|
||||
lw s2, 12 (a0) /* [d6, d7] */
|
||||
|
||||
LDHI (t1, 0, a1) /* [s0, __] */
|
||||
lw v1, 2 (a1) /* [s1, s2] */
|
||||
lw s1, 6 (a1) /* [s3, s4] */
|
||||
lw s3, 10 (a1) /* [s5, s6] */
|
||||
LDLO (s4, 12, a1) /* [__, s7] */
|
||||
|
||||
pref 4, 14 (a1)
|
||||
pref 5, 16 (a0)
|
||||
|
||||
PACK (t1, v1, t1) /* [s0, s1] */
|
||||
PACK (v1, s1, v1) /* [s2, s3] */
|
||||
PACK (s1, s3, s1) /* [s4, s5] */
|
||||
PACK (s3, s4, s3) /* [s6, s7] */
|
||||
|
||||
and t2, t1, t8
|
||||
ext t3, t2, 0, 16
|
||||
srl t2, t2, 16
|
||||
mul t3, t3, a3
|
||||
mul t2, t2, a3
|
||||
and t1, t1, t9
|
||||
ext s4, t1, 0, 16
|
||||
mul s4, s4, t6
|
||||
srl t1, t1, 16
|
||||
mul t1, t1, t6
|
||||
srl t3, t3, 8
|
||||
srl t2, t2, 8
|
||||
append t2, t3, 16
|
||||
and t2, t2, t8
|
||||
srl s4, s4, 6
|
||||
and t3, v1, t8
|
||||
srl t1, t1, 6
|
||||
append t1, s4, 16
|
||||
and t1, t1, t9
|
||||
or t1, t1, t2
|
||||
ext t2, t3, 0, 16
|
||||
srl t3, t3, 16
|
||||
mul t2, t2, a3
|
||||
mul t3, t3, a3
|
||||
and v1, v1, t9
|
||||
ext s4, v1, 0, 16
|
||||
mul s4, s4, t6
|
||||
srl v1, v1, 16
|
||||
mul v1, v1, t6
|
||||
srl t2, t2, 8
|
||||
srl t3, t3, 8
|
||||
append t3, t2, 16
|
||||
and t3, t3, t8
|
||||
srl s4, s4, 6
|
||||
and t2, s1, t8
|
||||
srl v1, v1, 6
|
||||
append v1, s4, 16
|
||||
and v1, v1, t9
|
||||
or v1, v1, t3
|
||||
ext t3, t2, 0, 16
|
||||
srl t2, t2, 16
|
||||
mul t3, t3, a3
|
||||
mul t2, t2, a3
|
||||
and s1, s1, t9
|
||||
ext s4, s1, 0, 16
|
||||
mul s4, s4, t6
|
||||
srl s1, s1, 16
|
||||
mul s1, s1, t6
|
||||
srl t3, t3, 8
|
||||
srl t2, t2, 8
|
||||
append t2, t3, 16
|
||||
and t2, t2, t8
|
||||
srl s4, s4, 6
|
||||
and t3, s3, t8
|
||||
srl s1, s1, 6
|
||||
append s1, s4, 16
|
||||
and s1, s1, t9
|
||||
or s1, s1, t2
|
||||
ext t2, t3, 0, 16
|
||||
srl t3, t3, 16
|
||||
mul t2, t2, a3
|
||||
mul t3, t3, a3
|
||||
and s3, s3, t9
|
||||
ext s4, s3, 0, 16
|
||||
mul s4, s4, t6
|
||||
srl s3, s3, 16
|
||||
mul s3, s3, t6
|
||||
srl t2, t2, 8
|
||||
srl t3, t3, 8
|
||||
append t3, t2, 16
|
||||
and t3, t3, t8
|
||||
srl s4, s4, 6
|
||||
and t2, t0, t8
|
||||
srl s3, s3, 6
|
||||
append s3, s4, 16
|
||||
and s3, s3, t9
|
||||
or s3, s3, t3
|
||||
ext t3, t2, 0, 16
|
||||
srl t2, t2, 16
|
||||
mul t3, t3, t5
|
||||
mul t2, t2, t5
|
||||
and t0, t0, t9
|
||||
ext s4, t0, 0, 16
|
||||
mul s4, s4, t7
|
||||
srl t0, t0, 16
|
||||
mul t0, t0, t7
|
||||
srl t3, t3, 8
|
||||
srl t2, t2, 8
|
||||
append t2, t3, 16
|
||||
and t2, t2, t8
|
||||
srl s4, s4, 6
|
||||
and t3, v0, t8
|
||||
srl t0, t0, 6
|
||||
append t0, s4, 16
|
||||
and t0, t0, t9
|
||||
or t0, t0, t2
|
||||
ext t2, t3, 0, 16
|
||||
srl t3, t3, 16
|
||||
mul t2, t2, t5
|
||||
mul t3, t3, t5
|
||||
and v0, v0, t9
|
||||
ext s4, v0, 0, 16
|
||||
mul s4, s4, t7
|
||||
srl v0, v0, 16
|
||||
mul v0, v0, t7
|
||||
srl t2, t2, 8
|
||||
srl t3, t3, 8
|
||||
append t3, t2, 16
|
||||
and t3, t3, t8
|
||||
srl s4, s4, 6
|
||||
and t2, s0, t8
|
||||
srl v0, v0, 6
|
||||
append v0, s4, 16
|
||||
and v0, v0, t9
|
||||
or v0, v0, t3
|
||||
ext t3, t2, 0, 16
|
||||
srl t2, t2, 16
|
||||
mul t3, t3, t5
|
||||
mul t2, t2, t5
|
||||
and s0, s0, t9
|
||||
ext s4, s0, 0, 16
|
||||
mul s4, s4, t7
|
||||
srl s0, s0, 16
|
||||
mul s0, s0, t7
|
||||
srl t3, t3, 8
|
||||
srl t2, t2, 8
|
||||
append t2, t3, 16
|
||||
and t2, t2, t8
|
||||
srl s4, s4, 6
|
||||
and t3, s2, t8
|
||||
srl s0, s0, 6
|
||||
append s0, s4, 16
|
||||
and s0, s0, t9
|
||||
or s0, s0, t2
|
||||
ext t2, t3, 0, 16
|
||||
srl t3, t3, 16
|
||||
mul t2, t2, t5
|
||||
mul t3, t3, t5
|
||||
and s2, s2, t9
|
||||
ext s4, s2, 0, 16
|
||||
mul s4, s4, t7
|
||||
srl s2, s2, 16
|
||||
mul s2, s2, t7
|
||||
srl t2, t2, 8
|
||||
srl t3, t3, 8
|
||||
append t3, t2, 16
|
||||
and t3, t3, t8
|
||||
srl s4, s4, 6
|
||||
addu.ph t0, t0, t1
|
||||
srl s2, s2, 6
|
||||
append s2, s4, 16
|
||||
and s2, s2, t9
|
||||
or s2, s2, t3
|
||||
addu.ph v0, v0, v1 /* v0 = [S2 + D2, S3 + D3] */
|
||||
addu.ph s0, s0, s1 /* s0 = [S4 + D4, S5 + D5] */
|
||||
addu.ph s2, s2, s3 /* s2 = [S6 + D6, S7 + D7] */
|
||||
|
||||
sw t0, 0 (a0) /* [SS0, SS1] */
|
||||
sw v0, 4 (a0) /* [SS2, SS3] */
|
||||
sw s0, 8 (a0) /* [SS4, SS5] */
|
||||
sw s2, 12 (a0) /* [SS6, SS7] */
|
||||
|
||||
addiu t4, t4, -1 /* t4-- */
|
||||
addiu a1, a1, 16 /* src += 8 */
|
||||
|
||||
bnez t4, 1b
|
||||
addiu a0, a0, 16 /* dst += 8 */
|
||||
|
||||
RESTORE_REGS_FROM_STACK 12, s0, s1, s2, s3, s4, v0, v1
|
||||
|
||||
5: /* Process remaining items (len < 8), one at a time */
|
||||
beqz a2, 0f
|
||||
nop
|
||||
|
||||
1: lhu t1, 0 (a1)
|
||||
lhu t0, 0 (a0)
|
||||
addiu a1, a1, 2 /* src++ */
|
||||
andi t2, t1, 0x07e0
|
||||
andi t1, t1, 0xf81f
|
||||
mul t2, t2, a3
|
||||
mul t1, t1, t6
|
||||
andi t3, t0, 0x07e0
|
||||
andi t0, t0, 0xf81f
|
||||
mul t3, t3, t5
|
||||
mul t0, t0, t7
|
||||
addiu a2, a2, -1 /* len-- */
|
||||
srl t2, t2, 8
|
||||
srl t1, t1, 6
|
||||
andi t2, t2, 0x07e0
|
||||
andi t1, t1, 0xf81f
|
||||
or t1, t1, t2
|
||||
srl t3, t3, 8
|
||||
srl t0, t0, 6
|
||||
andi t3, t3, 0x07e0
|
||||
andi t0, t0, 0xf81f
|
||||
or t0, t0, t3
|
||||
|
||||
addu t0, t0, t1 /* src*alpha + dst*ialpha */
|
||||
sh t0, 0 (a0)
|
||||
bnez a2, 1b
|
||||
addiu a0, a0, 2 /* dst++ */
|
||||
|
||||
0: jr ra
|
||||
nop
|
||||
|
||||
END(qt_blend_rgb16_on_rgb16_mips_dspr2_asm)
|
||||
|
||||
#undef PACK
|
||||
#undef LDHI
|
||||
#undef LDLO
|
||||
|
@ -1,6 +1,6 @@
|
||||
/****************************************************************************
|
||||
**
|
||||
** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
|
||||
** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
|
||||
** Contact: http://www.qt-project.org/legal
|
||||
**
|
||||
** This file is part of the QtGui module of the Qt Toolkit.
|
||||
@ -261,4 +261,168 @@ LEAF_MIPS32R2(symbol) \
|
||||
or \out_1, \scratch1, \scratch3
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Checks if stack offset is big enough for storing/restoring regs_num
|
||||
* number of register to/from stack. Stack offset must be greater than
|
||||
* or equal to the number of bytes needed for storing registers (regs_num*4).
|
||||
* Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
|
||||
* preserved for input arguments of the functions, already stored in a0-a3),
|
||||
* stack size can be further optimized by utilizing this space.
|
||||
*/
|
||||
.macro CHECK_STACK_OFFSET regs_num, stack_offset
|
||||
.if \stack_offset < \regs_num * 4 - 16
|
||||
.error "Stack offset too small."
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Saves set of registers on stack. Maximum number of registers that
|
||||
* can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
|
||||
* Stack offset is number of bytes that are added to stack pointer (sp)
|
||||
* before registers are pushed in order to provide enough space on stack
|
||||
* (offset must be multiple of 4, and must be big enough, as described by
|
||||
* CHECK_STACK_OFFSET macro). This macro is intended to be used in
|
||||
* combination with RESTORE_REGS_FROM_STACK macro. Example:
|
||||
* SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
|
||||
* RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
|
||||
*/
|
||||
.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
|
||||
r2 = 0, r3 = 0, r4 = 0, \
|
||||
r5 = 0, r6 = 0, r7 = 0, \
|
||||
r8 = 0, r9 = 0, r10 = 0, \
|
||||
r11 = 0, r12 = 0, r13 = 0, \
|
||||
r14 = 0
|
||||
.if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
|
||||
.error "Stack offset must be positive and multiple of 4."
|
||||
.endif
|
||||
.if \stack_offset != 0
|
||||
addiu sp, sp, -\stack_offset
|
||||
.endif
|
||||
sw \r1, 0(sp)
|
||||
.if \r2 != 0
|
||||
sw \r2, 4(sp)
|
||||
.endif
|
||||
.if \r3 != 0
|
||||
sw \r3, 8(sp)
|
||||
.endif
|
||||
.if \r4 != 0
|
||||
sw \r4, 12(sp)
|
||||
.endif
|
||||
.if \r5 != 0
|
||||
CHECK_STACK_OFFSET 5, \stack_offset
|
||||
sw \r5, 16(sp)
|
||||
.endif
|
||||
.if \r6 != 0
|
||||
CHECK_STACK_OFFSET 6, \stack_offset
|
||||
sw \r6, 20(sp)
|
||||
.endif
|
||||
.if \r7 != 0
|
||||
CHECK_STACK_OFFSET 7, \stack_offset
|
||||
sw \r7, 24(sp)
|
||||
.endif
|
||||
.if \r8 != 0
|
||||
CHECK_STACK_OFFSET 8, \stack_offset
|
||||
sw \r8, 28(sp)
|
||||
.endif
|
||||
.if \r9 != 0
|
||||
CHECK_STACK_OFFSET 9, \stack_offset
|
||||
sw \r9, 32(sp)
|
||||
.endif
|
||||
.if \r10 != 0
|
||||
CHECK_STACK_OFFSET 10, \stack_offset
|
||||
sw \r10, 36(sp)
|
||||
.endif
|
||||
.if \r11 != 0
|
||||
CHECK_STACK_OFFSET 11, \stack_offset
|
||||
sw \r11, 40(sp)
|
||||
.endif
|
||||
.if \r12 != 0
|
||||
CHECK_STACK_OFFSET 12, \stack_offset
|
||||
sw \r12, 44(sp)
|
||||
.endif
|
||||
.if \r13 != 0
|
||||
CHECK_STACK_OFFSET 13, \stack_offset
|
||||
sw \r13, 48(sp)
|
||||
.endif
|
||||
.if \r14 != 0
|
||||
CHECK_STACK_OFFSET 14, \stack_offset
|
||||
sw \r14, 52(sp)
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Restores set of registers from stack. Maximum number of registers that
|
||||
* can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
|
||||
* Stack offset is number of bytes that are added to stack pointer (sp)
|
||||
* after registers are restored (offset must be multiple of 4, and must
|
||||
* be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
|
||||
* intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
|
||||
* Example:
|
||||
* SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
|
||||
* RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
|
||||
*/
|
||||
.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
|
||||
r2 = 0, r3 = 0, r4 = 0, \
|
||||
r5 = 0, r6 = 0, r7 = 0, \
|
||||
r8 = 0, r9 = 0, r10 = 0, \
|
||||
r11 = 0, r12 = 0, r13 = 0, \
|
||||
r14 = 0
|
||||
.if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
|
||||
.error "Stack offset must be pozitive and multiple of 4."
|
||||
.endif
|
||||
lw \r1, 0(sp)
|
||||
.if \r2 != 0
|
||||
lw \r2, 4(sp)
|
||||
.endif
|
||||
.if \r3 != 0
|
||||
lw \r3, 8(sp)
|
||||
.endif
|
||||
.if \r4 != 0
|
||||
lw \r4, 12(sp)
|
||||
.endif
|
||||
.if \r5 != 0
|
||||
CHECK_STACK_OFFSET 5, \stack_offset
|
||||
lw \r5, 16(sp)
|
||||
.endif
|
||||
.if \r6 != 0
|
||||
CHECK_STACK_OFFSET 6, \stack_offset
|
||||
lw \r6, 20(sp)
|
||||
.endif
|
||||
.if \r7 != 0
|
||||
CHECK_STACK_OFFSET 7, \stack_offset
|
||||
lw \r7, 24(sp)
|
||||
.endif
|
||||
.if \r8 != 0
|
||||
CHECK_STACK_OFFSET 8, \stack_offset
|
||||
lw \r8, 28(sp)
|
||||
.endif
|
||||
.if \r9 != 0
|
||||
CHECK_STACK_OFFSET 9, \stack_offset
|
||||
lw \r9, 32(sp)
|
||||
.endif
|
||||
.if \r10 != 0
|
||||
CHECK_STACK_OFFSET 10, \stack_offset
|
||||
lw \r10, 36(sp)
|
||||
.endif
|
||||
.if \r11 != 0
|
||||
CHECK_STACK_OFFSET 11, \stack_offset
|
||||
lw \r11, 40(sp)
|
||||
.endif
|
||||
.if \r12 != 0
|
||||
CHECK_STACK_OFFSET 12, \stack_offset
|
||||
lw \r12, 44(sp)
|
||||
.endif
|
||||
.if \r13 != 0
|
||||
CHECK_STACK_OFFSET 13, \stack_offset
|
||||
lw \r13, 48(sp)
|
||||
.endif
|
||||
.if \r14 != 0
|
||||
CHECK_STACK_OFFSET 14, \stack_offset
|
||||
lw \r14, 52(sp)
|
||||
.endif
|
||||
.if \stack_offset != 0
|
||||
addiu sp, sp, \stack_offset
|
||||
.endif
|
||||
.endm
|
||||
|
||||
#endif // QT_MIPS_ASM_DSP_H
|
||||
|
Loading…
Reference in New Issue
Block a user