MIPS DSP build system fix and additional optimizations.

Changed MIPS DSP portion of the mkspecs/features/simd.prf file in order to fix the corrupted build system for MIPS platforms. List of the additionally optimized functions from file src/gui/painting/qdrawhelper.cpp: - qt_blend_rgb16_on_rgb16 - qt_fetchUntransformed_888 - qt_fetchUntransformed_444 - qt_fetchUntransformed_argb8565 from file src/gui/image/qimage.cpp: - convert_ARGB_to_ARGB_PM_inplace from file src/corelib/qstring.cpp: - ucstrncmp - toLatin1_helper - fromLatin1_helper Change-Id: I5c47a69784917eee29a8dbd2718828a390b27c93 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
2013-11-20 17:09:57 +01:00 · 2013-11-20 17:09:57 +01:00 · f10356ead1
commit f10356ead1
parent 48caaee170
15 changed files with 2141 additions and 10 deletions
--- a/mkspecs/features/simd.prf
+++ b/mkspecs/features/simd.prf
@ -174,10 +174,19 @@ QT_CPU_FEATURES = $$eval(QT_CPU_FEATURES.$$QT_ARCH)
        mips_dsp_assembler.name = assembling[mips_dsp] ${QMAKE_FILE_IN}
        silent:mips_dsp_assembler.commands = @echo assembling[mips_dsp] ${QMAKE_FILE_IN} && $$mips_dsp_assembler.commands
        QMAKE_EXTRA_COMPILERS += mips_dsp_compiler
+        QMAKE_EXTRA_COMPILERS += mips_dsp_assembler
    }
    mips_dspr2 {
        HEADERS += $$MIPS_DSP_HEADERS

+        mips_dspr2_compiler.commands = $$QMAKE_CXX -c
+        mips_dspr2_compiler.commands += $(CXXFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
+        mips_dspr2_compiler.dependency_type = TYPE_C
+        mips_dspr2_compiler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
+        mips_dspr2_compiler.input = MIPS_DSPR2_SOURCES
+        mips_dspr2_compiler.variable_out = OBJECTS
+        mips_dspr2_compiler.name = compiling[mips_dspr2] ${QMAKE_FILE_IN}
+        silent:mips_dspr2_compiler.commands = @echo compiling[mips_dspr2] ${QMAKE_FILE_IN} && $$mips_dspr2_compiler.commands
        mips_dspr2_assembler.commands = $$QMAKE_CC -c
        mips_dspr2_assembler.commands += $(CFLAGS) $(INCPATH) ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
        mips_dspr2_assembler.dependency_type = TYPE_C
@ -186,6 +195,7 @@ QT_CPU_FEATURES = $$eval(QT_CPU_FEATURES.$$QT_ARCH)
        mips_dspr2_assembler.variable_out = OBJECTS
        mips_dspr2_assembler.name = assembling[mips_dspr2] ${QMAKE_FILE_IN}
        silent:mips_dspr2_assembler.commands = @echo assembling[mips_dspr2] ${QMAKE_FILE_IN} && $$mips_dspr2_assembler.commands
+        QMAKE_EXTRA_COMPILERS += mips_dspr2_compiler
        QMAKE_EXTRA_COMPILERS += mips_dspr2_assembler
    }
 } else:win32-msvc*|winrt {
@ -297,7 +307,7 @@ QT_CPU_FEATURES = $$eval(QT_CPU_FEATURES.$$QT_ARCH)
        $$AVX_SOURCES $$AVX2_SOURCES \
        $$NEON_SOURCES $$NEON_ASM \
        $$IWMMXT_SOURCES \
-        $$MIPS_DSP_SOURCES $$MIPS_DSP_ASM $$MIPS_DSPR2_ASM
+        $$MIPS_DSP_SOURCES $$MIPS_DSPR2_SOURCES $$MIPS_DSP_ASM $$MIPS_DSPR2_ASM

    # Headers are already done in the above sections.
 }
--- a/src/corelib/corelib.pro
+++ b/src/corelib/corelib.pro
@ -111,3 +111,17 @@ ctest_qt5_module_files.files += $$ctest_macros_file.output $$cmake_extras_mkspec
 ctest_qt5_module_files.path = $$[QT_INSTALL_LIBS]/cmake/Qt5Core

 INSTALLS += ctest_qt5_module_files cmake_qt5_umbrella_module_files
+
+mips_dsp:*-g++* {
+    HEADERS += $$MIPS_DSP_HEADERS
+
+    mips_dsp_corelib_assembler.commands = $$QMAKE_CXX -c
+    mips_dsp_corelib_assembler.commands += $(CXXFLAGS) $(INCPATH) -mips32r2 -mdsp ${QMAKE_FILE_IN} -o ${QMAKE_FILE_OUT}
+    mips_dsp_corelib_assembler.dependency_type = TYPE_C
+    mips_dsp_corelib_assembler.output = ${QMAKE_VAR_OBJECTS_DIR}${QMAKE_FILE_BASE}$${first(QMAKE_EXT_OBJ)}
+    mips_dsp_corelib_assembler.input = MIPS_DSP_ASM
+    mips_dsp_corelib_assembler.variable_out = OBJECTS
+    mips_dsp_corelib_assembler.name = assembling[mips_dsp] ${QMAKE_FILE_IN}
+    silent:mips_dsp_corelib_assembler.commands = @echo assembling[mips_dsp] ${QMAKE_FILE_IN} && $$mips_dsp_corelib_assembler.commands
+    QMAKE_EXTRA_COMPILERS += mips_dsp_corelib_assembler
+}
--- a/src/corelib/tools/qstring.cpp
+++ b/src/corelib/tools/qstring.cpp
@ -188,9 +188,23 @@ static int ucstricmp(const ushort *a, const ushort *ae, const uchar *b, const uc
    return 1;
 }

+#if defined(__mips_dsp)
+// From qstring_mips_dsp_asm.S
+extern "C" int qt_ucstrncmp_mips_dsp_asm(const ushort *a,
+                                         const ushort *b,
+                                         unsigned len);
+#endif
+
 // Unicode case-sensitive compare two same-sized strings
 static int ucstrncmp(const QChar *a, const QChar *b, int l)
 {
+#if defined(__mips_dsp)
+    if (l >= 8) {
+        return qt_ucstrncmp_mips_dsp_asm(reinterpret_cast<const ushort*>(a),
+                                         reinterpret_cast<const ushort*>(b),
+                                         l);
+    }
+#endif // __mips_dsp
    while (l-- && *a == *b)
        a++,b++;
    if (l==-1)
@ -3937,6 +3951,10 @@ static inline __m128i mergeQuestionMarks(__m128i chunk)
 }
 #endif

+#if defined(__mips_dsp)
+extern "C" void qt_toLatin1_mips_dsp_asm(uchar *dst, const ushort *src, int length);
+#endif
+
 static QByteArray toLatin1_helper(const QChar *data, int length)
 {
    QByteArray ba;
@ -3989,10 +4007,14 @@ static QByteArray toLatin1_helper(const QChar *data, int length)
            length = length % 8;
        }
 #endif
+#if defined(__mips_dsp)
+        qt_toLatin1_mips_dsp_asm(dst, src, length);
+#else
        while (length--) {
            *dst++ = (*src>0xff) ? '?' : (uchar) *src;
            ++src;
        }
+#endif
    }
    return ba;
 }
@ -4104,6 +4126,12 @@ QVector<uint> QString::toUcs4() const
    return v;
 }

+#if defined(__mips_dsp)
+// From qstring_mips_dsp_asm.S
+extern "C" void qt_fromlatin1_mips_asm_unroll4 (ushort*, const char*, uint);
+extern "C" void qt_fromlatin1_mips_asm_unroll8 (ushort*, const char*, uint);
+#endif
+
 QString::Data *QString::fromLatin1_helper(const char *str, int size)
 {
    Data *d;
@ -4144,8 +4172,15 @@ QString::Data *QString::fromLatin1_helper(const char *str, int size)
            size = size % 16;
        }
 #endif
+#if defined(__mips_dsp)
+        if (size > 20)
+            qt_fromlatin1_mips_asm_unroll8(dst, str, size);
+        else
+            qt_fromlatin1_mips_asm_unroll4(dst, str, size);
+#else
        while (size--)
            *dst++ = (uchar)*str++;
+#endif
    }
    return d;
 }
--- a/src/corelib/tools/qstring_mips_dsp_asm.S
+++ b/src/corelib/tools/qstring_mips_dsp_asm.S
@ -0,0 +1,449 @@
+/****************************************************************************
+**
+** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
+** Contact: http://www.qt-project.org/legal
+**
+** This file is part of the QtGui module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and Digia.  For licensing terms and
+** conditions see http://qt.digia.com/licensing.  For further information
+** use the contact form at http://qt.digia.com/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Digia gives you certain additional
+** rights.  These rights are described in the Digia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "../../gui/painting/qt_mips_asm_dsp_p.h"
+
+.macro EXTRACT_HALVES_x2 src1, src2, hi_1, hi_2
+#if defined(__MIPSEL) && __MIPSEL
+    srl  \hi_1, \src1, 16
+    ext  \src1, \src1, 0, 16
+    srl  \hi_2, \src2, 16
+    ext  \src2, \src2, 0, 16
+#else
+    ext  \hi_1, \src1, 0, 16
+    srl  \src1, \src1, 16
+    ext  \hi_2, \src2, 0, 16
+    srl  \src2, \src2, 16
+#endif
+.endm
+
+
+LEAF_MIPS_DSP(qt_ucstrncmp_mips_dsp_asm)
+/*
+ * Arguments:
+ *   a0 - string_a (uint16_t*)
+ *   a1 - string_b (uint16_t*)
+ *   a2 - length   (uint32_t)
+ *
+ * Register usage:
+ *   t0 - batches
+ */
+
+    move  v0, zero      /* result = 0 */
+    andi   t0, a0, 0x3  /* t1 = string_a % 4 */
+    andi   t1, a1, 0x3  /* t0 = string_b % 4 */
+    or     t2, t0, t1   /* t2 = t0 | t1 */
+
+    beqz   t2, 5f       /* both aligned */
+     and   t2, t0 ,t1
+    beqz   t2, 6f       /* one aligned */
+     nop
+
+    /*
+     * Both strings are unaligned: read 1 halfword from each,
+     * then fall-off to continue with the both-aligned case.
+     */
+    lhu    t0, 0 (a0)
+    lhu    t1, 0 (a1)
+    addiu  a2, a2, -1   /* len-- */
+    sub    v0, t0, t1   /* v0 = t0-t1 */
+    addiu  a0, a0, 2    /* string_a++ */
+    bnez   v0, 0f       /* if (t0-t1): return */
+     addiu a1, a1, 2    /* string_b++ */
+    beqz   a2, 0f       /* if !len: return */
+     /* next instruction (srl) fills delay branch slot */
+
+5:  /* Both string pointers are aligned */
+    srl    t0, a2, 3    /* batches = length / 8 */
+    beqz   t0, 9f       /* if !batches: tail */
+     andi  a2, a2, 0x7  /* length = length % 8 */
+
+    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
+
+1:  lw     t1,  0 (a0)  /* [a0 a1] */
+    lw     t3,  4 (a0)  /* [a2 a3] */
+    lw     t5,  8 (a0)  /* [a4 a5] */
+    lw     t7, 12 (a0)  /* [a6 a7] */
+
+    lw     t2,  0 (a1)  /* [b0 b1] */
+    lw     t4,  4 (a1)  /* [b2 b3] */
+    lw     t6,  8 (a1)  /* [b4 b5] */
+    lw     t8, 12 (a1)  /* [b6 b7] */
+
+    /*
+     * Subtract elements one by one, if the result is zero
+     * both halves of the registers (shorts) are equal.
+     */
+    subq.ph s0, t1, t2  /* [a0-b0 a1-b1] */
+    subq.ph s1, t3, t4  /* [a2-b2 a3-b3] */
+
+    bnez   s0, 1f
+     subq.ph s2, t5, t6 /* [a4-b4 a5-b5] */
+    bnez   s1, 2f
+     subq.ph s3, t7, t8 /* [a6-b6 a7-b7] */
+    bnez   s2, 3f
+     addiu t0, t0, -1   /* batches-- */
+    bnez   s3, 4f
+     addiu a0, a0, 8*2  /* string_a += 8 */
+
+    bnez   t0, 1b       /* if batches: loop */
+     addiu a1, a1, 8*2  /* string_b += 8 */
+
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+
+    bnez   a2, 9f       /* if length: tail */
+     nop
+    jr ra
+     nop
+
+
+ 1: /* Check t1 [a0 a1] vs. t2 [b0 b1] */
+    EXTRACT_HALVES_x2  t1, t2, t3, t4  /* a0, b0, a1, b1 */
+    sub    v0, t1, t2
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+    bnez   v0, 0f       /* if (a0-b0): return */
+     nop
+    jr     ra
+     sub   v0, t3, t4   /* return a1-b1 */
+
+ 2: /* Check t3 [a2 a3] vs. t4 [b2 b3] */
+    EXTRACT_HALVES_x2  t3, t4, t1, t2  /* a2, b2, a3, b3 */
+    sub    v0, t3, t4
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+    bnez   v0, 0f       /* if (a2-b2): return */
+     nop
+    jr     ra
+     sub   v0, t1, t2   /* return a3-b3 */
+
+ 3: /* Check t5 [a4 a5] vs. t6 [b4 b5] */
+    EXTRACT_HALVES_x2  t5, t6, t1, t2  /* a4, b4, a5, b5 */
+    sub    v0, t5, t6
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+    bnez   v0, 0f       /* if (a4-b4): return */
+     nop
+    jr     ra
+     sub   v0, t1, t2   /* return a5-b5 */
+
+ 4: /* Check t7 [a6 a7] vs. t8 [b6 b7] */
+    EXTRACT_HALVES_x2  t7, t8, t1, t2  /* a6, b6, a7, b7 */
+    sub    v0, t7, t8
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+    bnez   v0, 0f       /* if (a6-b6): return */
+     nop
+    jr     ra
+     sub   v0, t1, t2
+
+
+    /* Process remaining tail items. */
+9:  lhu    t1, 0 (a0)   /* a */
+    lhu    t2, 0 (a1)   /* b */
+    addiu  a2, a2, -1   /* length-- */
+    sub    v0, t1, t2   /* result = (a - b) */
+
+    bnez   v0, 0f       /* if (a - b): return */
+     addiu a0, a0, 2    /* string_a++ */
+
+    bnez   a2, 9b       /* if length: loop */
+     addiu a1, a1, 2    /* string_b++ */
+
+0:  jr     ra
+     nop
+
+
+    /* One of the inputs is unaligned, do unrolled half-word loads */
+6:  srl    t0, a2, 3    /* batches = length / 8 */
+    andi  a2, a2, 0x7   /* length = length % 8 */
+
+1:  lhu    t1,  0 (a0)
+    lhu    t2,  0 (a1)
+    lhu    t3,  2 (a0)
+    lhu    t4,  2 (a1)
+    lhu    t5,  4 (a0)
+    lhu    t6,  4 (a1)
+    lhu    t7,  6 (a0)
+    lhu    t8,  6 (a1)
+
+    sub    v0, t1, t2
+    sub    t1, t3, t4
+
+    bnez   v0, 0f
+     sub   t2, t5, t6
+    bnez   t1, 2f
+     sub   t3, t7, t8
+    bnez   t2, 3f
+     lhu   t1,  8 (a0)
+    bnez   t3, 4f
+     lhu    t2,  8 (a1)
+
+    lhu    t3, 10 (a0)
+    lhu    t4, 10 (a1)
+    lhu    t5, 12 (a0)
+    lhu    t6, 12 (a1)
+    lhu    t7, 14 (a0)
+    lhu    t8, 14 (a1)
+
+    sub    v0, t1, t2
+    sub    t1, t3, t4
+
+    bnez   v0, 0f
+     sub   t2, t5, t6
+    bnez   t1, 2f
+     sub   t3, t7, t8
+    bnez   t2, 3f
+     addiu t0, t0, -1   /* batches-- */
+    bnez   t3, 4f
+     addiu a0, a0, 8*2  /* string_a += 8 */
+
+    bnez   t0, 1b
+     addiu a1, a1, 8*2  /* string_b += 8 */
+
+    bnez   a2, 9b       /* if length: tail */
+     nop
+
+0:  jr     ra
+     nop
+2:  jr     ra
+     move  v0, t1
+3:  jr     ra
+     move  v0, t2
+4:  jr     ra
+     move  v0, t3
+
+END(qt_ucstrncmp_mips_dsp_asm)
+
+
+#if defined(__mips_dspr2)
+LEAF_MIPS_DSPR2(qt_fromlatin1_mips_asm_unroll8)
+#else
+LEAF_MIPS_DSP(qt_fromlatin1_mips_asm_unroll8)
+#endif
+
+    andi   t0, a1, 0x3
+    beqz   t0, 9f       /* check that src is aligned */
+     nop
+
+1:  lbu    t1, 0 (a1)
+    addiu  a1, a1,  1
+    addiu  a2, a2, -1
+    sh     t1, 0 (a0)
+    beqz   a2, 0f
+     andi  t0, a1, 0x3
+    bnez   t0, 1b
+     addiu a0, a0,  2
+
+9:  /* source pointer is aligned: do batches of 8 elements */
+    andi   t0, a0, 3    /* check if dst is aligned */
+    bnez   t0, 6f
+     srl   t0, a2, 3    /* batches = len / 8 */
+    andi   a2, a2, 0x7  /* tail    = len % 8 */
+
+    beqz   t0, 8f       /* if !batches: tail */
+     nop
+
+1:  lw     t1, 0 (a1)
+    lw     t2, 4 (a1)
+
+    addiu  a1, a1, 8*1
+    addiu  t0, t0, -1
+
+    preceu.ph.qbl t3, t1
+    preceu.ph.qbr t1, t1
+    preceu.ph.qbl t4, t2
+    preceu.ph.qbr t2, t2
+
+#if defined(__MIPSEL) && __MIPSEL
+    sw     t1,  0 (a0)
+    sw     t3,  4 (a0)
+    sw     t2,  8 (a0)
+    sw     t4, 12 (a0)
+#else
+    sw     t3,  0 (a0)
+    sw     t1,  4 (a0)
+    sw     t4,  8 (a0)
+    sw     t2, 12 (a0)
+#endif
+
+    bnez   t0, 1b
+     addiu a0, a0, 8*2
+
+8:  /* process tail items */
+    beqz   a2, 0f
+     nop
+
+1:  lbu    t1, 0 (a1)
+    addiu  a2, a2, -1
+    sh     t1, 0 (a0)
+    addiu  a1, a1, 1
+    bnez   a2, 1b
+     addiu a0, a0, 2
+
+0:  jr     ra
+     nop
+
+6:  beqz   t0, 8b
+     andi  a2, a2, 7
+7:  lw     t1, 0(a1)
+    lw     t2, 4(a1)
+    addiu  t0, t0, -1
+    addiu  a1, a1, 8
+    andi   t3, t1, 0xff
+#if defined(__mips_dspr2)
+    prepend t1, t2, 8
+#else
+    sll    t4, t4, 24
+    srl    t1, t1, 8
+    or     t1, t1, t4
+#endif
+    srl    t2, t2, 8
+    preceu.ph.qbr t4, t1
+    preceu.ph.qbl t1, t1
+    preceu.ph.qbr t5, t2
+    srl    t2, t2, 16
+    sh     t3, 0(a0)
+    sw     t4, 2(a0)
+    sw     t1, 6(a0)
+    sw     t5, 10(a0)
+    sh     t2, 14(a0)
+    bnez   t0, 7b
+     addiu a0, a0, 16
+    bnez   a2, 1b
+     nop
+
+    jr     ra
+     nop
+
+END(qt_fromlatin1_mips_asm_unroll8)
+
+
+LEAF_MIPS_DSP(qt_fromlatin1_mips_asm_unroll4)
+/*
+ * Arguments:
+ *   a0 - dst (uint16_t*)
+ *   a1 - src (const char*)
+ *   a2 - len (unsigned int)
+ */
+
+    /*
+     * QString::fromLatin1_helper() already handles the len==0
+     * case: assume that len is never zero.
+     */
+    srl    t0, a2, 2
+    beqz   t0, 9f
+     andi  a2, a2, 0x3
+
+1:  lbu    t1, 0(a1)
+    lbu    t2, 1(a1)
+    lbu    t3, 2(a1)
+    lbu    t4, 3(a1)
+    sh     t1, 0(a0)
+    sh     t2, 2(a0)
+    sh     t3, 4(a0)
+    sh     t4, 6(a0)
+    addiu  t0, t0, -1
+    addiu  a1, a1, 4
+    bnez   t0, 1b
+     addiu a0, a0, 8
+
+8:  beqz   a2, 0f
+     nop
+
+9:  lbu    t1, 0(a1)
+    addiu  a2, a2, -1
+    addiu  a1, a1, 1
+    sh     t1, 0(a0)
+    bnez   a2, 9b
+     addiu a0, a0, 2
+
+0:  jr     ra
+     nop
+
+END(qt_fromlatin1_mips_asm_unroll4)
+
+
+LEAF_MIPS_DSP(qt_toLatin1_mips_dsp_asm)
+  /*
+   * a0 - dst
+   * a1 - src
+   * a2 - length
+   */
+
+    addiu  t9, zero, 0x3f
+    srl    t8, a2, 2
+    beqz   t8, 2f
+     andi  a2, a2, 3
+1:
+    lhu    t0, 0(a1)
+    lhu    t1, 2(a1)
+    lhu    t2, 4(a1)
+    lhu    t3, 6(a1)
+    srl    t4, t0, 8
+    srl    t5, t1, 8
+    srl    t6, t2, 8
+    srl    t7, t3, 8
+    movn   t0, t9, t4
+    movn   t1, t9, t5
+    movn   t2, t9, t6
+    movn   t3, t9, t7
+    addiu  a1, a1, 8
+    addiu  t8, t8, -1
+    sb     t0, 0(a0)
+    sb     t1, 1(a0)
+    sb     t2, 2(a0)
+    sb     t3, 3(a0)
+    bgtz   t8, 1b
+     addiu a0, a0, 4
+2:  beqz   a2, 4f
+     nop
+3:
+    lhu    t0, 0(a1)
+    addiu  a1, a1, 2
+    addiu  a2, a2, -1
+    srl    t1, t0, 8
+    movn   t0, t9, t1
+    sb     t0, 0(a0)
+    bgtz   a2, 3b
+     addiu a0, a0, 1
+4:
+    jr     ra
+     nop
+
+END(qt_toLatin1_mips_dsp_asm)
+
--- a/src/corelib/tools/tools.pri
+++ b/src/corelib/tools/tools.pri
@ -192,3 +192,7 @@ INCLUDEPATH += ../3rdparty/md5 \
 !macx-icc:!vxworks:unix:LIBS_PRIVATE += -lm

 TR_EXCLUDE += ../3rdparty/*
+
+# MIPS DSP
+MIPS_DSP_ASM += tools/qstring_mips_dsp_asm.S
+MIPS_DSP_HEADERS += ../gui/painting/qt_mips_asm_dsp_p.h
--- a/src/gui/image/image.pri
+++ b/src/gui/image/image.pri
@ -78,3 +78,5 @@ NEON_SOURCES += image/qimage_neon.cpp
 SSE2_SOURCES += image/qimage_sse2.cpp
 SSSE3_SOURCES += image/qimage_ssse3.cpp
 AVX_SOURCES += image/qimage_avx.cpp
+MIPS_DSPR2_SOURCES += image/qimage_mips_dspr2.cpp
+MIPS_DSPR2_ASM += image/qimage_mips_dspr2_asm.S
--- a/src/gui/image/qimage.cpp
+++ b/src/gui/image/qimage.cpp
@ -3960,6 +3960,12 @@ void qInitImageConversions()
        return;
    }
 #endif
+
+#ifdef QT_COMPILER_SUPPORTS_MIPS_DSPR2
+    extern bool convert_ARGB_to_ARGB_PM_inplace_mips_dspr2(QImageData *data, Qt::ImageConversionFlags);
+    inplace_converter_map[QImage::Format_ARGB32][QImage::Format_ARGB32_Premultiplied] = convert_ARGB_to_ARGB_PM_inplace_mips_dspr2;
+    return;
+#endif
 }

 extern const uchar *qt_pow_rgb_gamma();
--- a/src/gui/image/qimage_mips_dspr2.cpp
+++ b/src/gui/image/qimage_mips_dspr2.cpp
@ -0,0 +1,69 @@
+/****************************************************************************
+**
+** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
+** Contact: http://www.qt-project.org/legal
+**
+** This file is part of the QtGui module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and Digia.  For licensing terms and
+** conditions see http://qt.digia.com/licensing.  For further information
+** use the contact form at http://qt.digia.com/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Digia gives you certain additional
+** rights.  These rights are described in the Digia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qimage.h"
+#include <private/qimage_p.h>
+
+QT_BEGIN_NAMESPACE
+
+// Defined in qimage_mips_dspr2_asm.S
+//
+extern "C" void premultiply_argb_inplace_mips_asm(void*, unsigned, unsigned, int);
+
+bool convert_ARGB_to_ARGB_PM_inplace_mips_dspr2(QImageData *data, Qt::ImageConversionFlags)
+{
+    Q_ASSERT(data->format == QImage::Format_ARGB32);
+
+    if (!data->width || !data->height)
+        return true;
+
+    Q_ASSERT((data->bytes_per_line - (data->width << 2)) >= 0);
+
+    premultiply_argb_inplace_mips_asm(data->data,
+                                      data->height,
+                                      data->width,
+                                      data->bytes_per_line - (data->width << 2));
+
+    data->format = QImage::Format_ARGB32_Premultiplied;
+    return true;
+}
+
+QT_END_NAMESPACE
--- a/src/gui/image/qimage_mips_dspr2_asm.S
+++ b/src/gui/image/qimage_mips_dspr2_asm.S
@ -0,0 +1,207 @@
+/****************************************************************************
+**
+** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
+** Contact: http://www.qt-project.org/legal
+**
+** This file is part of the QtGui module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and Digia.  For licensing terms and
+** conditions see http://qt.digia.com/licensing.  For further information
+** use the contact form at http://qt.digia.com/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Digia gives you certain additional
+** rights.  These rights are described in the Digia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file.  Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "../painting/qt_mips_asm_dsp_p.h"
+
+LEAF_MIPS_DSPR2(premultiply_argb_inplace_mips_asm)
+
+    SAVE_REGS_ON_STACK  0, s0, s1, s2, s3
+
+3:  srl    v1, a2, 3    /* t1 = linelen / 8 */
+    addiu  a1, a1, -1   /* numlines-- */
+    beqz   v1, 1f       /* if (!(linelen / 8)): tail */
+     andi  v0, a2, 0x7  /* v0 = linelen % 8 */
+    pref    5,  0 (a0)  /* cache-hint: store-streamed */
+
+    /* unrolled loop, handles (v1 = len / 8) batches of 8 pixels */
+2:  addiu  v1, v1, -1
+    pref    5, 0(a0)
+    pref    5, 32(a0)
+
+    lw     t0, 0(a0)
+    lw     t1, 4(a0)
+    lw     t2, 8(a0)
+    lw     t3, 12(a0)
+    srl    t4, t0, 24  /*  00|00|00|A1 */
+    replv.ph t5, t4    /*  00|A1|00|A1 */
+    srl    t6, t1, 24  /*  00|00|00|A2 */
+    replv.ph t7, t6    /*  00|A2|00|A2 */
+    muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */
+    muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */
+    muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */
+    muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */
+    srl    t5, t2, 24  /*  00|00|00|A3 */
+    replv.ph s0, t5    /*  00|A3|00|A3 */
+    srl    t7, t3, 24  /*  00|00|00|A4 */
+    replv.ph s1, t7    /*  00|A4|00|A4 */
+    muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */
+    muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */
+    muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */
+    muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */
+    preceu.ph.qbla s1, t8
+    preceu.ph.qbla s3, t0
+    addu.ph t8, t8, s1
+    addu.ph t0, t0, s3
+    preceu.ph.qbla s1, t9
+    preceu.ph.qbla s3, t1
+    addu.ph t9, t9, s1
+    addu.ph t1, t1, s3
+    preceu.ph.qbla s1, s2
+    preceu.ph.qbla s3, t2
+    addu.ph s2, s2, s1
+    addu.ph t2, t2, s3
+    preceu.ph.qbla s1, s0
+    preceu.ph.qbla s3, t3
+    addu.ph s0, s0, s1
+    addu.ph t3, t3, s3
+    shra_r.ph t8, t8, 8 /*  xxAA1|xxRR1 */
+    shra_r.ph t0, t0, 8 /*  xxBB1|xxGG1 */
+    shra_r.ph t9, t9, 8
+    shra_r.ph t1, t1, 8
+    shra_r.ph s2, s2, 8
+    shra_r.ph t2, t2, 8
+    shra_r.ph s0, s0, 8
+    shra_r.ph t3, t3, 8
+    precr.qb.ph t0, t8, t0
+    precr.qb.ph t1, t9, t1
+    precr.qb.ph t2, s2, t2
+    precr.qb.ph t3, s0, t3
+    append t4, t0, 24
+    append t6, t1, 24
+    append t5, t2, 24
+    append t7, t3, 24
+    sw    t4, 0(a0)
+    sw    t6, 4(a0)
+    sw    t5, 8(a0)
+    sw    t7, 12(a0)
+
+    lw     t0, 16(a0)
+    lw     t1, 20(a0)
+    lw     t2, 24(a0)
+    lw     t3, 28(a0)
+    srl    t4, t0, 24  /*  00|00|00|A1 */
+    replv.ph t5, t4    /*  00|A1|00|A1 */
+    srl    t6, t1, 24  /*  00|00|00|A2 */
+    replv.ph t7, t6    /*  00|A2|00|A2 */
+    muleu_s.ph.qbl t8, t0, t5 /* A1*A1|A1*R1 */
+    muleu_s.ph.qbr t0, t0, t5 /* A1*G1|A1*B1 */
+    muleu_s.ph.qbl t9, t1, t7 /* A2*A2|A2*R2 */
+    muleu_s.ph.qbr t1, t1, t7 /* A2*G2|A2*B2 */
+    srl    t5, t2, 24  /*  00|00|00|A3 */
+    replv.ph s0, t5    /*  00|A3|00|A3 */
+    srl    t7, t3, 24  /*  00|00|00|A4 */
+    replv.ph s1, t7    /*  00|A4|00|A4 */
+    muleu_s.ph.qbl s2, t2, s0 /* A3*A3|A3*R3 */
+    muleu_s.ph.qbr t2, t2, s0 /* A3*G3|A3*B3 */
+    muleu_s.ph.qbl s0, t3, s1 /* A4*A4|A4*R4 */
+    muleu_s.ph.qbr t3, t3, s1 /* A4*G4|A4*B4 */
+    preceu.ph.qbla s1, t8
+    preceu.ph.qbla s3, t0
+    addu.ph t8, t8, s1
+    addu.ph t0, t0, s3
+    preceu.ph.qbla s1, t9
+    preceu.ph.qbla s3, t1
+    addu.ph t9, t9, s1
+    addu.ph t1, t1, s3
+    preceu.ph.qbla s1, s2
+    preceu.ph.qbla s3, t2
+    addu.ph s2, s2, s1
+    addu.ph t2, t2, s3
+    preceu.ph.qbla s1, s0
+    preceu.ph.qbla s3, t3
+    addu.ph s0, s0, s1
+    addu.ph t3, t3, s3
+    shra_r.ph t8, t8, 8 /*  xxAA1|xxRR1 */
+    shra_r.ph t0, t0, 8 /*  xxBB1|xxGG1 */
+    shra_r.ph t9, t9, 8
+    shra_r.ph t1, t1, 8
+    shra_r.ph s2, s2, 8
+    shra_r.ph t2, t2, 8
+    shra_r.ph s0, s0, 8
+    shra_r.ph t3, t3, 8
+    precr.qb.ph t0, t8, t0
+    precr.qb.ph t1, t9, t1
+    precr.qb.ph t2, s2, t2
+    precr.qb.ph t3, s0, t3
+    append t4, t0, 24
+    append t6, t1, 24
+    append t5, t2, 24
+    append t7, t3, 24
+    sw    t4, 16(a0)
+    sw    t6, 20(a0)
+    sw    t5, 24(a0)
+    sw    t7, 28(a0)
+    bgtz  v1, 2b       /* if (t1): unrolled loop */
+     addiu a0, a0, 32  /* data += 8 */
+
+    beqz   v0, 4f      /* if (!v0): skip tail loop */
+     nop
+
+    /* tail loop, handles (len < 8), one pixel at a time */
+1:  lw     t1, 0 (a0)
+    addiu  v0, v0, -1  /* len-- */
+    srl    t2, t1, 24  /* t2 = alpha */
+    replv.ph t3, t2
+    muleu_s.ph.qbl t4, t1, t3
+    muleu_s.ph.qbr t1, t1, t3
+    preceu.ph.qbla t3, t4
+    preceu.ph.qbla t5, t1
+    addu.ph   t4, t4, t3
+    addu.ph   t1, t1, t5
+    shra_r.ph t4, t4, 8
+    shra_r.ph t1, t1, 8
+    precr.qb.ph t1, t4, t1
+    append   t2, t1, 24
+    sw     t2, 0(a0)
+    bgtz   v0, 1b
+     addiu a0, a0,  4  /* src++ */
+
+4:  bnez   a1, 3b      /* if (numlines): loop */
+     addu  a0, a0, a3  /* src += srclineskip */
+
+0:  /* return */
+    RESTORE_REGS_FROM_STACK  0, s0, s1, s2, s3
+
+    jr ra
+     nop
+
+END(premultiply_argb_inplace_mips_asm)
+
--- a/src/gui/painting/qdrawhelper.cpp
+++ b/src/gui/painting/qdrawhelper.cpp
@ -1790,7 +1790,7 @@ static const uint *QT_FASTCALL fetchTransformedBilinear(uint *buffer, const Oper
    return buffer;
 }

-static const SourceFetchProc sourceFetch[NBlendTypes][QImage::NImageFormats] = {
+static SourceFetchProc sourceFetch[NBlendTypes][QImage::NImageFormats] = {
    // Untransformed
    {
        0, // Invalid
@ -6402,6 +6402,21 @@ void qInitDrawhelperAsm()

        destStoreProc[QImage::Format_ARGB32] = qt_destStoreARGB32_mips_dsp;

+        sourceFetch[BlendUntransformed][QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
+        sourceFetch[BlendTiled][QImage::Format_RGB888] = qt_fetchUntransformed_888_mips_dsp;
+
+        sourceFetch[BlendUntransformed][QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
+        sourceFetch[BlendTiled][QImage::Format_RGB444] = qt_fetchUntransformed_444_mips_dsp;
+
+        sourceFetch[BlendUntransformed][QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
+        sourceFetch[BlendTiled][QImage::Format_ARGB8565_Premultiplied] = qt_fetchUntransformed_argb8565_premultiplied_mips_dsp;
+
+#if defined(QT_COMPILER_SUPPORTS_MIPS_DSPR2)
+        qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dspr2;
+#else
+        qBlendFunctions[QImage::Format_RGB16][QImage::Format_RGB16] = qt_blend_rgb16_on_rgb16_mips_dsp;
+#endif // QT_COMPILER_SUPPORTS_MIPS_DSPR2
+
 #endif // QT_COMPILER_SUPPORTS_MIPS_DSP
    if (functionForModeSolidAsm) {
        const int destinationMode = QPainter::CompositionMode_Destination;
--- a/src/gui/painting/qdrawhelper_mips_dsp.cpp
+++ b/src/gui/painting/qdrawhelper_mips_dsp.cpp
@ -1,6 +1,6 @@
 /****************************************************************************
 **
-** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
+** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
 ** Contact: http://www.qt-project.org/legal
 **
 ** This file is part of the QtGui module of the Qt Toolkit.
@ -110,6 +110,78 @@ void qt_blend_rgb32_on_rgb32_mips_dsp(uchar *destPixels, int dbpl,
    }
 }

+#if defined QT_COMPILER_SUPPORTS_MIPS_DSPR2
+void qt_blend_rgb16_on_rgb16_mips_dspr2(uchar *destPixels, int dbpl,
+                                        const uchar *srcPixels, int sbpl,
+                                        int w, int h,
+                                        int const_alpha)
+{
+    if (const_alpha == 256) {
+        if (w < 256) {
+            const quint16 *src = (const quint16*) srcPixels;
+            quint16 *dst = (quint16*) destPixels;
+            for (int y = 0; y < h; ++y) {
+                qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm(dst, src, w);
+                dst = (quint16*) (((uchar*) dst) + dbpl);
+                src = (quint16*) (((uchar*) src) + sbpl);
+            }
+        }
+        else {
+            int length = w << 1;
+            while (h--) {
+                memcpy(destPixels, srcPixels, length);
+                destPixels += dbpl;
+                srcPixels += sbpl;
+            }
+        }
+    }
+    else if (const_alpha != 0) {
+        const quint16 *src = (const quint16*) srcPixels;
+        quint16 *dst = (quint16*) destPixels;
+        for (int y = 0; y < h; ++y) {
+            qt_blend_rgb16_on_rgb16_mips_dspr2_asm(dst, src, w, const_alpha);
+            dst = (quint16*) (((uchar*) dst) + dbpl);
+            src = (quint16*) (((uchar*) src) + sbpl);
+        }
+    }
+}
+#else
+void qt_blend_rgb16_on_rgb16_mips_dsp(uchar *destPixels, int dbpl,
+                                      const uchar *srcPixels, int sbpl,
+                                      int w, int h,
+                                      int const_alpha)
+{
+    if (const_alpha == 256) {
+        if (w < 256) {
+            const quint16 *src = (const quint16*) srcPixels;
+            quint16 *dst = (quint16*) destPixels;
+            for (int y = 0; y < h; ++y) {
+                qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm(dst, src, w);
+                dst = (quint16*) (((uchar*) dst) + dbpl);
+                src = (quint16*) (((uchar*) src) + sbpl);
+            }
+        }
+        else {
+            int length = w << 1;
+            while (h--) {
+                memcpy(destPixels, srcPixels, length);
+                destPixels += dbpl;
+                srcPixels += sbpl;
+            }
+        }
+    }
+    else if (const_alpha != 0) {
+        const quint16 *src = (const quint16*) srcPixels;
+        quint16 *dst = (quint16*) destPixels;
+        for (int y = 0; y < h; ++y) {
+            qt_blend_rgb16_on_rgb16_mips_dsp_asm(dst, src, w, const_alpha);
+            dst = (quint16*) (((uchar*) dst) + dbpl);
+            src = (quint16*) (((uchar*) src) + sbpl);
+        }
+    }
+}
+#endif
+
 void comp_func_Source_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha)
 {
    if (const_alpha == 255) {
@ -422,5 +494,28 @@ void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int l
    comp_func_SourceOut_dsp_asm_x2(dest, src, length, const_alpha);
 }

+const uint * QT_FASTCALL qt_fetchUntransformed_888_mips_dsp (uint *buffer, const Operator *, const QSpanData *data,
+                                             int y, int x, int length)
+{
+    uchar *line = (uchar *)data->texture.scanLine(y) + x;
+    fetchUntransformed_888_asm_mips_dsp(buffer, line, length);
+    return buffer;
+}
+
+const uint * QT_FASTCALL qt_fetchUntransformed_444_mips_dsp (uint *buffer, const Operator *, const QSpanData *data,
+                                             int y, int x, int length)
+{
+    uchar *line = (uchar *)data->texture.scanLine(y) + x;
+    fetchUntransformed_444_asm_mips_dsp(buffer, line, length);
+    return buffer;
+}
+
+const uint * QT_FASTCALL qt_fetchUntransformed_argb8565_premultiplied_mips_dsp (uint *buffer, const Operator *, const QSpanData *data,
+                                             int y, int x, int length)
+{
+    uchar *line = (uchar *)data->texture.scanLine(y) + x;
+    fetchUntransformed_argb8565_premultiplied_asm_mips_dsp(buffer, line, length);
+    return buffer;
+}

 QT_END_NAMESPACE
--- a/src/gui/painting/qdrawhelper_mips_dsp_asm.S
+++ b/src/gui/painting/qdrawhelper_mips_dsp_asm.S
@ -1,6 +1,6 @@
 /****************************************************************************
 **
-** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
+** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
 ** Contact: http://www.qt-project.org/legal
 **
 ** This file is part of the QtGui module of the Qt Toolkit.
@ -1601,3 +1601,479 @@ LEAF_MIPS_DSP(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm)
     nop

 END(qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm)
+
+
+#if defined(__MIPSEL) && __MIPSEL
+# define PACK(r, s, t)  packrl.ph r, s, t
+# define SWHI(r, o, b)  swl r, o + 1 (b)
+# define SWLO(r, o, b)  swr r, o + 0 (b)
+# define LDHI(r, o, b)  lwl r, o + 1 (b)
+# define LDLO(r, o, b)  lwr r, o + 2 (b)
+#else
+# define PACK(r, s, t)  packrl.ph r, t, s
+# define SWHI(r, o, b)  swr r, o + 1 (b)
+# define SWLO(r, o, b)  swl r, o + 0 (b)
+# define LDHI(r, o, b)  lwr r, o + 1 (b)
+# define LDLO(r, o, b)  lwl r, o + 2 (b)
+#endif
+
+LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm)
+/*
+ * a0 - dst (*r5g6b5)
+ * a1 - src (const *r5g6b5)
+ * a2 - len (unsigned int)
+ *
+ * Register usage:
+ *  t0-3 - Scratch registers
+ *  t4   - Number of iterations to do in unrolled loops
+ *  t5-7 - Auxiliary scratch registers.
+ *
+ * Check if base addresses of src/dst are aligned, cases:
+ *  a) Both aligned.
+ *  b) Both unaligned:
+ *      1. Copy a halfword
+ *      2. Use aligned case.
+ *  c) dst aligned, src unaligned:
+ *      1. Read a word from dst, halfword from src.
+ *      2. Continue reading words from both.
+ *  d) dst unaligned, src aligned:
+ *      1. Read a word from src, halfword from dst.
+ *      2. Continue reading words from both.
+ */
+
+    beqz   a2, 0f       /* if (a2:len == 0): return */
+     andi  t0, a0, 0x3  /* t0 = a0:dst % 4 */
+    andi   t1, a1, 0x3  /* t1 = a1:dst % 4 */
+    or     t2, t0, t1   /* t1 = t0 | t1 */
+
+    beqz   t2, 4f       /* both aligned */
+     nop
+    beqz   t0, 3f       /* dst aligned, src unaligned */
+     nop
+    beqz   t1, 2f       /* src aligned, dst unaligned */
+     nop
+
+    /*
+     * Both src/dst are unaligned: read 1 halfword from each,
+     * the fall-off to continue with word-aligned copy.
+     */
+    lhu    t0, 0 (a1)    /* t0 <- ((uint16_t*) src)[0] */
+    addiu  a1, a1, 2     /* src++ */
+    addiu  a2, a2,-1     /* len-- */
+    sh     t0, 0 (a0)    /* t1 -> ((uint16_t*) dst)[0] */
+    addiu  a0, a0, 2     /* dst++ */
+
+    /*
+     * Both src/dst pointers are word-aligned, process eight
+     * items at a time in an unrolled loop.
+     */
+4:  beqz   a2, 0f        /* if (len == 0): return */
+     srl   t4, a2, 3     /* t4 = len / 8 */
+
+    beqz   t4, 5f        /* if (t4 == 0): tail */
+     andi  a2, a2, 0x07  /* len = len % 8 */
+
+1:  lw     t0,  0 (a1)
+    lw     t1,  4 (a1)
+    lw     t2,  8 (a1)
+    lw     t3, 12 (a1)
+
+    addiu  t4, t4, -1     /* t4-- */
+    addiu  a1, a1, 16     /* src += 8 */
+
+    sw     t0,  0 (a0)
+    sw     t1,  4 (a0)
+    sw     t2,  8 (a0)
+    sw     t3, 12 (a0)
+
+    bnez   t4, 1b
+     addiu a0, a0, 16     /* dst += 8 */
+
+    b 5f
+    nop
+
+
+    /*
+     * dst pointer is unaligned
+     */
+2:  beqz   a2, 0f        /* if (len == 0): return */
+     srl   t4, a2, 3     /* t4 = len / 8 */
+    beqz   t4, 5f        /* if (t4 == 0): tail */
+     andi  a2, a2, 0x07  /* len = len % 8 */
+
+1:  lw     t0,  0 (a1)
+   lw     t1,  4 (a1)
+    lw     t2,  8 (a1)
+    lw     t3, 12 (a1)
+
+    addiu  t4, t4, -1    /* t4-- */
+    addiu  a1, a1, 16    /* src += 8 */
+
+    SWLO  (t0,  0, a0)
+    PACK  (t5, t1, t0)
+    PACK  (t6, t2, t1)
+    PACK  (t7, t3, t2)
+    SWHI  (t3, 14, a0)
+    sw     t5,  2 (a0)
+    sw     t6,  6 (a0)
+    sw     t7, 10 (a0)
+
+    bnez   t4, 1b
+     addiu a0, a0, 16    /* dst += 8 */
+
+    b 5f
+     nop
+
+    /*
+     * src pointer is unaligned
+     */
+3:  beqz   a2, 0f        /* if (len == 0): return */
+     srl   t4, a2, 3     /* t4 = len / 8 */
+    beqz   t4, 5f        /* if (t4 == 0): tail */
+     andi  a2, a2, 0x07  /* len = len % 8 */
+
+1:  LDHI  (t0,  0, a1)
+    lw     t1,  2 (a1)
+    lw     t2,  6 (a1)
+    lw     t3, 10 (a1)
+    LDLO  (t5, 12, a1)
+
+    addiu  t4, t4, -1    /* t4-- */
+    addiu  a1, a1, 16    /* src += 8 */
+
+    PACK  (t0, t1, t0)
+    PACK  (t6, t2, t1)
+    PACK  (t7, t3, t2)
+    sw     t0,  0 (a0)
+    PACK  (t0, t5, t3)
+    sw     t6,  4 (a0)
+    sw     t7,  8 (a0)
+    sw     t0, 12 (a0)
+
+    bnez   t4, 1b
+     addiu a0, a0, 16    /* dst += 8 */
+
+
+5:  /* Process remaining items (a2:len < 4), one at a time */
+    beqz   a2, 0f
+     nop
+
+1:  lhu    t0, 0 (a1)  /* t0 <- ((uint16_t*) src)[0] */
+    addiu  a2, a2,-1   /* len-- */
+    addiu  a1, a1, 2   /* src++ */
+    sh     t0, 0 (a0)  /* to -> ((uint16_t*) dst)[0] */
+    bnez   a2, 1b      /* if (len != 0): loop */
+     addiu a0, a0, 2   /* dst++ */
+
+0:  jr ra
+     nop
+
+END(qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm)
+
+
+#undef LDHI
+#undef LDLO
+#undef PACK
+#undef SWHI
+#undef SWLO
+
+
+LEAF_MIPS_DSP(qt_blend_rgb16_on_rgb16_mips_dsp_asm)
+/*
+ * a0 - dst (*r5g6b5)
+ * a1 - src (const *r5g6b5)
+ * a2 - len (unsigned int) - batch length
+ * a3 - alpha (int)
+ */
+
+    beqz    a2, 2f
+     li     t9, 255
+    sll     t8, a3, 8
+    subu    a3, t8, a3
+    srl     a3, a3, 8
+    subu    t9, t9, a3
+    addiu   a3, a3, 1
+    srl     t4, a3, 2
+    addiu   t9, t9, 1
+    srl     t5, t9, 2
+1:
+    lhu     t0, 0(a1)
+    lhu     t1, 0(a0)
+    addiu   a2, a2, -1
+    andi    t2, t0, 0x07e0
+    andi    t0, t0, 0xf81f
+    mul     t2, t2, a3
+    mul     t0, t0, t4
+    andi    t3, t1, 0x07e0
+    andi    t1, t1, 0xf81f
+    mul     t3, t3, t9
+    mul     t1, t1, t5
+    addiu   a1, a1, 2
+    srl     t2, t2, 8
+    srl     t0, t0, 6
+    andi    t2, t2, 0x07e0
+    andi    t0, t0, 0xf81f
+    or      t0, t0, t2
+    srl     t3, t3, 8
+    srl     t1, t1, 6
+    andi    t3, t3, 0x07e0
+    andi    t1, t1, 0xf81f
+    or      t1, t1, t3
+    addu    t0, t0, t1
+    sh      t0, 0(a0)
+    bgtz    a2, 1b
+     addiu  a0, a0, 2
+2:
+    jr      ra
+     nop
+
+END(qt_blend_rgb16_on_rgb16_mips_dsp_asm)
+
+
+LEAF_MIPS_DSP(fetchUntransformed_888_asm_mips_dsp)
+/*
+ * a0 - dst address (address of 32-bit aRGB value)
+ * a1 - src address
+ * a2 - length
+ */
+
+    beqz       a2, 4f
+     lui       t8, 0xff00
+    andi       t0, a2, 0x1
+    beqz       t0, 1f
+     nop
+/* case for one pixel */
+    lbu        t1, 0(a1)
+    lbu        v1, 2(a1)
+    lbu        t0, 1(a1)
+    addiu      a1, a1, 3
+    addiu      a2, a2, -1
+    sll        t1, t1, 0x10
+    or         v1, v1, t8
+    sll        t0, t0, 0x8
+    or         v1, v1, t1
+    or         v1, v1, t0
+    sw         v1, 0(a0)
+    addiu      a0, a0, 4
+
+    beqz       a2, 4f        /* only one pixel is present (length = 1) */
+     nop
+1:
+    andi       t0, a1, 0x1
+    beqz       t0, 3f
+     nop
+2:
+    lbu        t0, 0(a1)     /* t0 = | 0 | 0 | 0 | R1 | */
+    lhu        t1, 1(a1)     /* t1 = | 0 | 0 | B1 | G1 | */
+    addiu      a1, a1, 3
+    lhu        t2, 0(a1)     /* t2 = | 0 | 0 | G2 | R2 | */
+    lbu        t3, 2(a1)     /* t3 = | 0 | 0 | 0 | B2 | */
+
+    sll        t0, t0, 16
+    or         t0, t0, t8    /* t0 = | ff | R1 | 0 | 0 | */
+    shll.ph    t4, t1, 8     /* t4 = | 0 | 0 | G1 | 0 | */
+    srl        t5, t1, 8
+    or         t4, t4, t5    /* t4 = | 0 | 0 | G1 | B1 | */
+    or         t0, t0, t4    /* t0 = | ff | R1 | G1 | B1 | */
+
+    shll.ph    t4, t2, 8     /* t4 = | 0 | 0 | R2 | 0 | */
+    srl        t5, t2, 8     /* t5 = | 0 | 0 | 0 | G2 | */
+    or         t4, t4, t5
+    sll        t4, t4, 8     /* t4 = | 0 | R2 | G2 | 0 | */
+    or         t5, t3, t8
+    or         t2, t4, t5    /* t2 = | ff | R2 | G2 | B2 | */
+
+    sw         t0, 0(a0)
+    addiu      a1, a1, 3
+    sw         t2, 4(a0)
+    addiu      a2, a2, -2
+    bnez       a2, 2b
+     addiu     a0, a0, 8
+    b          4f
+     nop
+3:
+    lhu        t0, 0(a1)     /* t0 = | 0 | 0 | G1 | R1 | */
+    lbu        t1, 2(a1)     /* t1 = | 0 | 0 | 0 | B1 | */
+    addiu      a1, a1, 3
+    lbu        t2, 0(a1)     /* t2 = | 0 | 0 | 0 | R2 | */
+    lhu        t3, 1(a1)     /* t3 = | 0 | 0 | B2 | G2 | */
+
+    srl        t4, t0, 8     /* t4 = | 0 | 0 | 0 | G1 | */
+    shll.ph    t5, t0, 8     /* t5 = | 0 | 0 | R1 | 0 | */
+    or         t0, t4, t5
+    sll        t6, t0, 8     /* t6 = | 0 | R1 | G1 | 0 | */
+    or         t4, t1, t8    /* t4 = | ff | 0 | 0 | B1 | */
+    or         t0, t6, t4
+
+    sll        t2, t2, 16
+    srl        t4, t3, 8
+    shll.ph    t5, t3, 8
+    or         t3, t4, t5
+    or         t2, t2, t3
+    or         t2, t2, t8
+
+    sw         t0, 0(a0)
+    addiu      a1, a1, 3
+    sw         t2, 4(a0)
+    addiu      a2, a2, -2
+    bnez       a2, 3b
+     addiu     a0, a0, 8
+4:
+    jr         ra
+     nop
+
+END(fetchUntransformed_888_asm_mips_dsp)
+
+
+LEAF_MIPS_DSP(fetchUntransformed_444_asm_mips_dsp)
+/*
+ * a0 - dst address (address of 32-bit aRGB value)
+ * a1 - src address
+ * a2 - length
+ */
+
+    lui              t8, 0xff00
+    li               t4, 0x1
+
+    beqz             a2, 5f
+     move            v0, a0         /* just return the address of buffer
+                                     * for storing returning values */
+    andi             t0, a2, 0x1
+    beqz             t0, 2f         /* there is more then one pixel
+                                     * (check src memory alignment (word)) */
+     nop
+1:
+    lhu              v0, 0(a1)
+    addiu            a1, a1, 2
+    addiu            a2, a2, -1
+    andi             t0, v0, 0xf00
+    andi             v1, v0, 0xf
+    andi             v0, v0, 0xf0
+    sra              t3, t0, 0x4
+    sra              t1, v0, 0x4
+    sra              t0, t0, 0x8
+    sll              t2, v1, 0x4
+    or               t0, t0, t3
+    or               v0, t1, v0
+    lui              t1, 0xff00
+    or               v1, t2, v1
+    sll              t0, t0, 0x10
+    or               v1, v1, t1
+    sll              v0, v0, 0x8
+    or               v1, v1, t0
+    or               v0, v1, v0
+    sw               v0, 0(a0)
+    addiu            a0, a0, 4
+    beqz             a2, 5f         /* no more pixels for processing */
+     nop
+    beq              a2, t4, 4f     /* only one more pixel remained */
+     nop
+/* check if src memory address is word aligned */
+2:
+    andi             t0, a1, 0x3
+    beqz             t0, 3f         /* memory is word aligned */
+     andi            a3, a2, 0x1    /* set the a3 register as the comparation
+                                     * for ending the unrolled loop
+                                     * (1 if odd, 0 if even) */
+    b                1b             /* not word aligned,
+                                     * go another turn with
+                                     * just one pixel processing */
+     nop
+3:
+    lw               t0, 0(a1)
+    addiu            a2, a2, -2
+    preceu.ph.qbr    t1, t0         /* t1 = | 0 | aR1 | 0 | G1B1 | */
+    preceu.ph.qbl    t2, t0         /* t1 = | 0 | aR2 | 0 | G2B2 | */
+    shll.qb          t3, t1, 4      /* t3 = | 0 | R1 0 | 0 | B1 0 | */
+    srl              t4, t3, 4
+    or               t0, t3, t4     /* t0 = | 0 | R1R1 | 0 | B1B1 | */
+    andi             t3, t1, 0xf0
+    sll              t3, t3, 8
+    srl              t4, t3, 4
+    or               t1, t3, t4
+    or               t0, t0, t1     /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */
+    or               t0, t0, t8     /* t0 = | ff | R1R1 | G1G1 | B1B1 | */
+
+    shll.qb          t3, t2, 4      /* t3 = | 0 | R1 0 | 0 | B1 0 | */
+    srl              t4, t3, 4
+    or               t7, t3, t4     /* t0 = | 0 | R1R1 | 0 | B1B1 | */
+    andi             t3, t2, 0xf0
+    sll              t3, t3, 8
+    srl              t4, t3, 4
+    or               t1, t3, t4
+    or               t2, t7, t1     /* t0 = | 0 | R1R1 | G1G1 | B1B1 | */
+    or               t2, t2, t8     /* t0 = | ff | R1R1 | G1G1 | B1B1 | */
+
+    sw               t0, 0(a0)
+    addiu            a1, a1, 4
+    sw               t2, 4(a0)
+    bne              a2, a3, 3b
+     addiu           a0, a0, 8
+    beqz             a2, 5f         /* no more pixels for processing */
+     nop
+4:
+/* one more pixel remained (after loop unrolling process finished) */
+    lhu              v0, 0(a1)
+    addiu            a1, a1, 2
+    addiu            a2, a2, -1
+    andi             t0, v0, 0xf00
+    andi             v1, v0, 0xf
+    andi             v0, v0, 0xf0
+    sra              t3, t0, 0x4
+    sra              t1, v0, 0x4
+    sra              t0, t0, 0x8
+    sll              t2, v1, 0x4
+    or               t0, t0, t3
+    or               v0, t1, v0
+    lui              t1, 0xff00
+    or               v1, t2, v1
+    sll              t0, t0, 0x10
+    or               v1, v1, t1
+    sll              v0, v0, 0x8
+    or               v1, v1, t0
+    or               v0, v1, v0
+    sw               v0, 0(a0)
+    addiu            a0, a0, 4
+5:
+    jr               ra
+     nop
+
+END(fetchUntransformed_444_asm_mips_dsp)
+
+
+LEAF_MIPS_DSP(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp)
+/*
+ * a0 - dst address
+ * a1 - src address
+ * a2 - length
+ */
+
+    beqz      a2, 2f
+     nop
+
+1:
+    ulh       t1, 0(a1)
+    lbu       t2, 2(a1)
+    addiu     a2, a2, -1
+    wsbh      t1, t1
+    sll       t0, t1, 8       /* t0 = 00000000rrrrrggggggbbbbb00000000 */
+    ins       t0, t1, 3, 16   /* t0 = 00000000rrrrrrrrrrggggggbbbbb000 */
+    ins       t0, t1, 5, 11   /* t0 = 00000000rrrrrrrrggggggbbbbbbb000 */
+    srl       t4, t1, 9       /* t4 = 0000000000000000000000000rrrrrgg */
+    replv.qb  t3, t2
+    ins       t0, t4, 8, 2    /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */
+    ins       t0, t1, 3, 5    /* t0 = 00000000rrrrrrrrggggggggbbbbb000 */
+    srl       t4, t1, 2       /* t4 = 000000000000000000rrrrrggggggbbb */
+    ins       t0, t4, 0, 3    /* t0 = 00000000rrrrrrrrggggggggbbbbbbbb */
+    ins       t0, t2, 24, 8   /* t0 =aaaaaaaarrrrrrrrggggggggbbbbbbbb */
+    cmpu.lt.qb t3, t0
+    pick.qb   t0, t3, t0
+    addiu     a1, a1, 3
+    sw        t0, 0(a0)
+    bgtz      a2, 1b
+     addiu    a0, a0, 4
+2:
+    jr        ra
+     nop
+
+END(fetchUntransformed_argb8565_premultiplied_asm_mips_dsp)
--- a/src/gui/painting/qdrawhelper_mips_dsp_p.h
+++ b/src/gui/painting/qdrawhelper_mips_dsp_p.h
@ -1,6 +1,6 @@
 /****************************************************************************
 **
-** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
+** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
 ** Contact: http://www.qt-project.org/legal
 **
 ** This file is part of the QtGui module of the Qt Toolkit.
@ -101,19 +101,34 @@ extern "C" void qt_blend_argb32_on_argb32_mips_dsp_asm_x2(uint *dest, const uint

 extern "C" void qt_blend_argb32_on_argb32_const_alpha_256_mips_dsp_asm(uint *dest, const uint *src, int length);

+extern "C" void qt_blend_rgb16_on_rgb16_const_alpha_256_mips_dsp_asm(quint16 *dest, const quint16 *src, int length);
+
+extern "C" void qt_blend_rgb16_on_rgb16_mips_dsp_asm(quint16 *dest, const quint16 *src, int length, uint const_alpha);
+
 extern "C" uint * destfetchARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);

 extern "C" uint * qt_destStoreARGB32_asm_mips_dsp(uint *buffer, const uint *data, int length);

+extern "C" uint * fetchUntransformed_888_asm_mips_dsp(uint *buffer, const uchar *line, int length);
+
+extern "C" uint * fetchUntransformed_444_asm_mips_dsp(uint *buffer, const uchar *line, int length);
+
+extern "C" uint * fetchUntransformed_argb8565_premultiplied_asm_mips_dsp(uint *buffer, const uchar *line, int length);
+
 void qt_blend_argb32_on_argb32_mips_dsp(uchar *destPixels, int dbpl,
                                      const uchar *srcPixels, int sbpl,
                                      int w, int h,
                                      int const_alpha);

 void qt_blend_rgb32_on_rgb32_mips_dsp(uchar *destPixels, int dbpl,
-                                    const uchar *srcPixels, int sbpl,
-                                    int w, int h,
-                                    int const_alpha);
+                                      const uchar *srcPixels, int sbpl,
+                                      int w, int h,
+                                      int const_alpha);
+
+void qt_blend_rgb16_on_rgb16_mips_dsp(uchar *destPixels, int dbpl,
+                                      const uchar *srcPixels, int sbpl,
+                                      int w, int h,
+                                      int const_alpha);

 void comp_func_Source_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);

@ -164,6 +179,21 @@ void QT_FASTCALL comp_func_SourceOut_mips_dsp(uint *dest, const uint *src, int l

 void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length, uint const_alpha);

+const uint * QT_FASTCALL qt_fetchUntransformed_888_mips_dsp (uint *buffer,
+                                                             const Operator *,
+                                                             const QSpanData *data,
+                                                             int y, int x, int length);
+
+const uint * QT_FASTCALL qt_fetchUntransformed_444_mips_dsp (uint *buffer,
+                                                             const Operator *,
+                                                             const QSpanData *data,
+                                                             int y, int x, int length);
+
+const uint * QT_FASTCALL qt_fetchUntransformed_argb8565_premultiplied_mips_dsp (uint *buffer,
+                                                                                const Operator *,
+                                                                                const QSpanData *data,
+                                                                                int y, int x, int length);
+
 #endif // QT_COMPILER_SUPPORTS_MIPS_DSP


@ -171,6 +201,13 @@ void QT_FASTCALL comp_func_XOR_mips_dsp(uint *dest, const uint *src, int length,

 extern "C" void  qConvertRgb16To32_asm_mips_dspr2(quint32 *dest, const quint16 *src, int length);

+extern "C" void qt_blend_rgb16_on_rgb16_mips_dspr2_asm(quint16 *dest, const quint16 *src, int length, uint const_alpha);
+
+void qt_blend_rgb16_on_rgb16_mips_dspr2(uchar *destPixels, int dbpl,
+                                        const uchar *srcPixels, int sbpl,
+                                        int w, int h,
+                                        int const_alpha);
+
 #endif // QT_COMPILER_SUPPORTS_MIPS_DSPR2

 QT_END_NAMESPACE
--- a/src/gui/painting/qdrawhelper_mips_dspr2_asm.S
+++ b/src/gui/painting/qdrawhelper_mips_dspr2_asm.S
@ -1,6 +1,6 @@
 /****************************************************************************
 **
-** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
+** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
 ** Contact: http://www.qt-project.org/legal
 **
 ** This file is part of the QtGui module of the Qt Toolkit.
@ -122,3 +122,551 @@ LEAF_MIPS_DSPR2(qConvertRgb16To32_asm_mips_dspr2)
     nop

 END(qConvertRgb16To32_asm_mips_dspr2)
+
+
+#if defined(__MIPSEL) && __MIPSEL
+# define PACK(r, s, t)  packrl.ph r, s, t
+# define LDHI(r, o, b)  lwl r, o + 1 (b)
+# define LDLO(r, o, b)  lwr r, o + 2 (b)
+#else
+# define PACK(r, s, t)  packrl.ph r, t, s
+# define LDHI(r, o, b)  lwr r, o + 1 (b)
+# define LDLO(r, o, b)  lwl r, o + 2 (b)
+#endif
+
+
+LEAF_MIPS_DSPR2(qt_blend_rgb16_on_rgb16_mips_dspr2_asm)
+/*
+ * a0 - dst (*r5g6b5)
+ * a1 - src (const *r5g6b5)
+ * a2 - len (unsigned int) - batch length
+ * a3 - alpha (int)
+ *
+ * Register usage:
+ *  t0-3 - Scratch registers
+ *  t4   - Number of iterations to do in unrolled loops
+ *  t5   - Inverse alpha
+ *  t6   - Alpha >> 2
+ *  t7   - Inverse alpha >> 2
+ *  t8   - magic1 (0x07e007e0)
+ *  t9   - magic2 (0xf81ff81f)
+ *
+ * NOTE:
+ *   Cannot use DSP instructions for the multiplication of two
+ *   16-bit values: overflow would be always rounded or saturated.
+ */
+
+    beqz  a2, 0f
+     andi t0, a0, 0x3
+    andi  t1, a1, 0x3
+    /* Adjust alpha value, and calculate inverse alpha value */
+    li    t5, 255
+    or    t2, t0, t1      /* t0 = (dst & 0x3) | (src & 0x3) */
+    sll   t8, a3, 8
+    subu  a3, t8, a3
+    li    t8, 0x07e007e0  /* magic1 */
+    srl   a3, a3, 8       /* alpha >>= 8  */
+    li    t9, 0xf81ff81f  /* magic2 */
+    subu  t5, t5, a3      /* ialpha = 255 - alpha */
+    addiu a3, a3, 1       /* alpha++ */
+    addiu t5, t5, 1       /* ialpha++ */
+    srl   t6, a3, 2       /* ashift = alpha >> 2 */
+
+    beqz  t2, 4f /* both aligned */
+     srl  t7, t5, 2       /* iashift = ialpha >> 2 */
+
+    beqz  t1, 2f /* src aligned, dst unaligned */
+     nop
+
+    beqz  t0, 3f /* dst aligned, src unaligned */
+     nop
+
+    /*
+     * Both src/dst are unaligned: read 1 halfword from each, then
+     * fall-off to continue with word-aligned operation.
+     */
+    lhu t1, 0 (a1)
+    lhu t0, 0 (a0)
+    addiu a2, a2, -1  /* len-- */
+    andi  t2, t1, 0x07e0
+    andi  t1, t1, 0xf81f
+    mul   t2, t2, a3
+    mul   t1, t1, t6
+    andi  t3, t0, 0x07e0
+    andi  t0, t0, 0xf81f
+    mul   t3, t3, t5
+    mul   t0, t0, t7
+    addiu a1, a1,  2  /* src++ */
+    srl   t2, t2, 8
+    srl   t1, t1, 6
+    andi  t2, t2, 0x07e0
+    andi  t1, t1, 0xf81f
+    or    t1, t1, t2
+    srl   t3, t3, 8
+    srl   t0, t0, 6
+    andi  t3, t3, 0x07e0
+    andi  t0, t0, 0xf81f
+    or    t0, t0, t3
+    addu  t0, t0, t1  /* src * alpha + dst * ialpha */
+    sh    t0,  0 (a0)
+    addiu a0, a0,  2  /* dst++ */
+
+    /*
+     * Both src/dst pointers are word-aligned, process eight
+     * items at a time in an unrolled loop.
+     */
+4:  beqz   a2, 0f
+     srl   t4, a2, 3   /* t4 = len / 8 */
+    beqz   t4, 5f
+     andi  a2, a2, 0x7 /* len = len % 8 */
+    SAVE_REGS_ON_STACK 12, s0, s1, s2, s3, s4, v0, v1
+
+1:  lw  t1,  0 (a1)  /* [s0, s1] */
+    lw  v1,  4 (a1)  /* [s2, s3] */
+    lw  s1,  8 (a1)  /* [s4, s5] */
+    lw  s3, 12 (a1)  /* [s6, s7] */
+
+    lw  t0,  0 (a0)  /* [d0, d1] */
+    lw  v0,  4 (a0)  /* [d2, d3] */
+    lw  s0,  8 (a0)  /* [d4, d5] */
+    lw  s2, 12 (a0)  /* [d6, d7] */
+
+    pref 4, 16 (a1)
+    pref 5, 16 (a0)
+
+    and     t2, t1, t8
+    ext     t3, t2, 0, 16
+    srl     t2, t2, 16
+    mul     t3, t3, a3
+    mul     t2, t2, a3
+    and     t1, t1, t9
+    ext     s4, t1, 0, 16
+    mul     s4, s4, t6
+    srl     t1, t1, 16
+    mul     t1, t1, t6
+    srl     t3, t3, 8
+    srl     t2, t2, 8
+    append  t2, t3, 16
+    and     t2, t2, t8
+    srl     s4, s4, 6
+    and     t3, v1, t8
+    srl     t1, t1, 6
+    append  t1, s4, 16
+    and     t1, t1, t9
+    or      t1, t1, t2
+    ext     t2, t3, 0, 16
+    srl     t3, t3, 16
+    mul     t2, t2, a3
+    mul     t3, t3, a3
+    and     v1, v1, t9
+    ext     s4, v1, 0, 16
+    mul     s4, s4, t6
+    srl     v1, v1, 16
+    mul     v1, v1, t6
+    srl     t2, t2, 8
+    srl     t3, t3, 8
+    append  t3, t2, 16
+    and     t3, t3, t8
+    srl     s4, s4, 6
+    and     t2, s1, t8
+    srl     v1, v1, 6
+    append  v1, s4, 16
+    and     v1, v1, t9
+    or      v1, v1, t3
+    ext     t3, t2, 0, 16
+    srl     t2, t2, 16
+    mul     t3, t3, a3
+    mul     t2, t2, a3
+    and     s1, s1, t9
+    ext     s4, s1, 0, 16
+    mul     s4, s4, t6
+    srl     s1, s1, 16
+    mul     s1, s1, t6
+    srl     t3, t3, 8
+    srl     t2, t2, 8
+    append  t2, t3, 16
+    and     t2, t2, t8
+    srl     s4, s4, 6
+    and     t3, s3, t8
+    srl     s1, s1, 6
+    append  s1, s4, 16
+    and     s1, s1, t9
+    or      s1, s1, t2
+    ext     t2, t3, 0, 16
+    srl     t3, t3, 16
+    mul     t2, t2, a3
+    mul     t3, t3, a3
+    and     s3, s3, t9
+    ext     s4, s3, 0, 16
+    mul     s4, s4, t6
+    srl     s3, s3, 16
+    mul     s3, s3, t6
+    srl     t2, t2, 8
+    srl     t3, t3, 8
+    append  t3, t2, 16
+    and     t3, t3, t8
+    srl     s4, s4, 6
+    and     t2, t0, t8
+    srl     s3, s3, 6
+    append  s3, s4, 16
+    and     s3, s3, t9
+    or      s3, s3, t3
+    ext     t3, t2, 0, 16
+    srl     t2, t2, 16
+    mul     t3, t3, t5
+    mul     t2, t2, t5
+    and     t0, t0, t9
+    ext     s4, t0, 0, 16
+    mul     s4, s4, t7
+    srl     t0, t0, 16
+    mul     t0, t0, t7
+    srl     t3, t3, 8
+    srl     t2, t2, 8
+    append  t2, t3, 16
+    and     t2, t2, t8
+    srl     s4, s4, 6
+    and     t3, v0, t8
+    srl     t0, t0, 6
+    append  t0, s4, 16
+    and     t0, t0, t9
+    or      t0, t0, t2
+    ext     t2, t3, 0, 16
+    srl     t3, t3, 16
+    mul     t2, t2, t5
+    mul     t3, t3, t5
+    and     v0, v0, t9
+    ext     s4, v0, 0, 16
+    mul     s4, s4, t7
+    srl     v0, v0, 16
+    mul     v0, v0, t7
+    srl     t2, t2, 8
+    srl     t3, t3, 8
+    append  t3, t2, 16
+    and     t3, t3, t8
+    srl     s4, s4, 6
+    and     t2, s0, t8
+    srl     v0, v0, 6
+    append  v0, s4, 16
+    and     v0, v0, t9
+    or      v0, v0, t3
+    ext     t3, t2, 0, 16
+    srl     t2, t2, 16
+    mul     t3, t3, t5
+    mul     t2, t2, t5
+    and     s0, s0, t9
+    ext     s4, s0, 0, 16
+    mul     s4, s4, t7
+    srl     s0, s0, 16
+    mul     s0, s0, t7
+    srl     t3, t3, 8
+    srl     t2, t2, 8
+    append  t2, t3, 16
+    and     t2, t2, t8
+    srl     s4, s4, 6
+    and     t3, s2, t8
+    srl     s0, s0, 6
+    append  s0, s4, 16
+    and     s0, s0, t9
+    or      s0, s0, t2
+    ext     t2, t3, 0, 16
+    srl     t3, t3, 16
+    mul     t2, t2, t5
+    mul     t3, t3, t5
+    and     s2, s2, t9
+    ext     s4, s2, 0, 16
+    mul     s4, s4, t7
+    srl     s2, s2, 16
+    mul     s2, s2, t7
+    srl     t2, t2, 8
+    srl     t3, t3, 8
+    append  t3, t2, 16
+    and     t3, t3, t8
+    srl     s4, s4, 6
+    addu.ph t0, t0, t1
+    srl     s2, s2, 6
+    append  s2, s4, 16
+    and     s2, s2, t9
+    or      s2, s2, t3
+    addu.ph v0, v0, v1  /* v0 = [S2 + D2, S3 + D3] */
+    addu.ph s0, s0, s1  /* s0 = [S4 + D4, S5 + D5] */
+    addu.ph s2, s2, s3  /* s2 = [S6 + D6, S7 + D7] */
+
+    sw      t0,  0 (a0) /* [SS0, SS1] */
+    sw      v0,  4 (a0) /* [SS2, SS3] */
+    sw      s0,  8 (a0) /* [SS4, SS5] */
+    sw      s2, 12 (a0) /* [SS6, SS7] */
+
+    addiu   t4, t4, -1   /* t4-- */
+    addiu   a1, a1, 16   /* src += 8 */
+
+    bnez    t4, 1b
+     addiu  a0, a0, 16   /* dst += 8 */
+
+    RESTORE_REGS_FROM_STACK 12, s0, s1, s2, s3, s4, v0, v1
+    b 5f
+     nop
+
+
+    /* dst unaligned: do one item and fall down to the src unaligned case */
+2:  lhu t1, 0 (a1)
+    lhu t0, 0 (a0)
+    addiu a2, a2, -1  /* len-- */
+    andi  t2, t1, 0x07e0
+    andi  t1, t1, 0xf81f
+    mul   t2, t2, a3
+    mul   t1, t1, t6
+    andi  t3, t0, 0x07e0
+    andi  t0, t0, 0xf81f
+    mul   t3, t3, t5
+    mul   t0, t0, t7
+    addiu a1, a1,  2  /* src++ */
+    srl   t2, t2, 8
+    srl   t1, t1, 6
+    andi  t2, t2, 0x07e0
+    andi  t1, t1, 0xf81f
+    or    t1, t1, t2
+    srl   t3, t3, 8
+    srl   t0, t0, 6
+    andi  t3, t3, 0x07e0
+    andi  t0, t0, 0xf81f
+    or    t0, t0, t3
+    addu  t0, t0, t1  /* src * alpha + dst * ialpha */
+    sh    t0,  0 (a0)
+    addiu a0, a0,  2  /* dst++ */
+
+    /* src unaligned */
+3:  beqz   a2, 0f
+     srl   t4, a2, 3   /* t4 = len / 8 */
+    beqz   t4, 5f
+     andi  a2, a2, 0x7 /* len = len % 8 */
+    SAVE_REGS_ON_STACK 12, s0, s1, s2, s3, s4, v0, v1
+
+1:  lw     t0,  0 (a0) /* [d0, d1] */
+    lw     v0,  4 (a0) /* [d2, d3] */
+    lw     s0,  8 (a0) /* [d4, d5] */
+    lw     s2, 12 (a0) /* [d6, d7] */
+
+    LDHI  (t1,  0, a1) /* [s0, __] */
+    lw     v1,  2 (a1) /* [s1, s2] */
+    lw     s1,  6 (a1) /* [s3, s4] */
+    lw     s3, 10 (a1) /* [s5, s6] */
+    LDLO  (s4, 12, a1) /* [__, s7] */
+
+    pref    4, 14 (a1)
+    pref    5, 16 (a0)
+
+    PACK  (t1, v1, t1) /* [s0, s1] */
+    PACK  (v1, s1, v1) /* [s2, s3] */
+    PACK  (s1, s3, s1) /* [s4, s5] */
+    PACK  (s3, s4, s3) /* [s6, s7] */
+
+    and     t2, t1, t8
+    ext     t3, t2, 0, 16
+    srl     t2, t2, 16
+    mul     t3, t3, a3
+    mul     t2, t2, a3
+    and     t1, t1, t9
+    ext     s4, t1, 0, 16
+    mul     s4, s4, t6
+    srl     t1, t1, 16
+    mul     t1, t1, t6
+    srl     t3, t3, 8
+    srl     t2, t2, 8
+    append  t2, t3, 16
+    and     t2, t2, t8
+    srl     s4, s4, 6
+    and     t3, v1, t8
+    srl     t1, t1, 6
+    append  t1, s4, 16
+    and     t1, t1, t9
+    or      t1, t1, t2
+    ext     t2, t3, 0, 16
+    srl     t3, t3, 16
+    mul     t2, t2, a3
+    mul     t3, t3, a3
+    and     v1, v1, t9
+    ext     s4, v1, 0, 16
+    mul     s4, s4, t6
+    srl     v1, v1, 16
+    mul     v1, v1, t6
+    srl     t2, t2, 8
+    srl     t3, t3, 8
+    append  t3, t2, 16
+    and     t3, t3, t8
+    srl     s4, s4, 6
+    and     t2, s1, t8
+    srl     v1, v1, 6
+    append  v1, s4, 16
+    and     v1, v1, t9
+    or      v1, v1, t3
+    ext     t3, t2, 0, 16
+    srl     t2, t2, 16
+    mul     t3, t3, a3
+    mul     t2, t2, a3
+    and     s1, s1, t9
+    ext     s4, s1, 0, 16
+    mul     s4, s4, t6
+    srl     s1, s1, 16
+    mul     s1, s1, t6
+    srl     t3, t3, 8
+    srl     t2, t2, 8
+    append  t2, t3, 16
+    and     t2, t2, t8
+    srl     s4, s4, 6
+    and     t3, s3, t8
+    srl     s1, s1, 6
+    append  s1, s4, 16
+    and     s1, s1, t9
+    or      s1, s1, t2
+    ext     t2, t3, 0, 16
+    srl     t3, t3, 16
+    mul     t2, t2, a3
+    mul     t3, t3, a3
+    and     s3, s3, t9
+    ext     s4, s3, 0, 16
+    mul     s4, s4, t6
+    srl     s3, s3, 16
+    mul     s3, s3, t6
+    srl     t2, t2, 8
+    srl     t3, t3, 8
+    append  t3, t2, 16
+    and     t3, t3, t8
+    srl     s4, s4, 6
+    and     t2, t0, t8
+    srl     s3, s3, 6
+    append  s3, s4, 16
+    and     s3, s3, t9
+    or      s3, s3, t3
+    ext     t3, t2, 0, 16
+    srl     t2, t2, 16
+    mul     t3, t3, t5
+    mul     t2, t2, t5
+    and     t0, t0, t9
+    ext     s4, t0, 0, 16
+    mul     s4, s4, t7
+    srl     t0, t0, 16
+    mul     t0, t0, t7
+    srl     t3, t3, 8
+    srl     t2, t2, 8
+    append  t2, t3, 16
+    and     t2, t2, t8
+    srl     s4, s4, 6
+    and     t3, v0, t8
+    srl     t0, t0, 6
+    append  t0, s4, 16
+    and     t0, t0, t9
+    or      t0, t0, t2
+    ext     t2, t3, 0, 16
+    srl     t3, t3, 16
+    mul     t2, t2, t5
+    mul     t3, t3, t5
+    and     v0, v0, t9
+    ext     s4, v0, 0, 16
+    mul     s4, s4, t7
+    srl     v0, v0, 16
+    mul     v0, v0, t7
+    srl     t2, t2, 8
+    srl     t3, t3, 8
+    append  t3, t2, 16
+    and     t3, t3, t8
+    srl     s4, s4, 6
+    and     t2, s0, t8
+    srl     v0, v0, 6
+    append  v0, s4, 16
+    and     v0, v0, t9
+    or      v0, v0, t3
+    ext     t3, t2, 0, 16
+    srl     t2, t2, 16
+    mul     t3, t3, t5
+    mul     t2, t2, t5
+    and     s0, s0, t9
+    ext     s4, s0, 0, 16
+    mul     s4, s4, t7
+    srl     s0, s0, 16
+    mul     s0, s0, t7
+    srl     t3, t3, 8
+    srl     t2, t2, 8
+    append  t2, t3, 16
+    and     t2, t2, t8
+    srl     s4, s4, 6
+    and     t3, s2, t8
+    srl     s0, s0, 6
+    append  s0, s4, 16
+    and     s0, s0, t9
+    or      s0, s0, t2
+    ext     t2, t3, 0, 16
+    srl     t3, t3, 16
+    mul     t2, t2, t5
+    mul     t3, t3, t5
+    and     s2, s2, t9
+    ext     s4, s2, 0, 16
+    mul     s4, s4, t7
+    srl     s2, s2, 16
+    mul     s2, s2, t7
+    srl     t2, t2, 8
+    srl     t3, t3, 8
+    append  t3, t2, 16
+    and     t3, t3, t8
+    srl     s4, s4, 6
+    addu.ph t0, t0, t1
+    srl     s2, s2, 6
+    append  s2, s4, 16
+    and     s2, s2, t9
+    or      s2, s2, t3
+    addu.ph v0, v0, v1  /* v0 = [S2 + D2, S3 + D3] */
+    addu.ph s0, s0, s1  /* s0 = [S4 + D4, S5 + D5] */
+    addu.ph s2, s2, s3  /* s2 = [S6 + D6, S7 + D7] */
+
+    sw      t0,  0 (a0) /* [SS0, SS1] */
+    sw      v0,  4 (a0) /* [SS2, SS3] */
+    sw      s0,  8 (a0) /* [SS4, SS5] */
+    sw      s2, 12 (a0) /* [SS6, SS7] */
+
+    addiu   t4, t4, -1   /* t4-- */
+    addiu   a1, a1, 16   /* src += 8 */
+
+    bnez    t4, 1b
+     addiu  a0, a0, 16   /* dst += 8 */
+
+    RESTORE_REGS_FROM_STACK 12, s0, s1, s2, s3, s4, v0, v1
+
+5:  /* Process remaining items (len < 8), one at a time */
+    beqz  a2, 0f
+     nop
+
+1:  lhu    t1, 0 (a1)
+    lhu    t0, 0 (a0)
+    addiu  a1, a1, 2   /* src++ */
+    andi  t2, t1, 0x07e0
+    andi  t1, t1, 0xf81f
+    mul   t2, t2, a3
+    mul   t1, t1, t6
+    andi  t3, t0, 0x07e0
+    andi  t0, t0, 0xf81f
+    mul   t3, t3, t5
+    mul   t0, t0, t7
+    addiu a2, a2,  -1   /* len-- */
+    srl   t2, t2, 8
+    srl   t1, t1, 6
+    andi  t2, t2, 0x07e0
+    andi  t1, t1, 0xf81f
+    or    t1, t1, t2
+    srl   t3, t3, 8
+    srl   t0, t0, 6
+    andi  t3, t3, 0x07e0
+    andi  t0, t0, 0xf81f
+    or    t0, t0, t3
+
+    addu   t0, t0, t1  /* src*alpha + dst*ialpha */
+    sh     t0, 0 (a0)
+    bnez   a2, 1b
+     addiu a0, a0, 2   /* dst++ */
+
+0:  jr ra
+     nop
+
+END(qt_blend_rgb16_on_rgb16_mips_dspr2_asm)
+
+#undef PACK
+#undef LDHI
+#undef LDLO
--- a/src/gui/painting/qt_mips_asm_dsp_p.h
+++ b/src/gui/painting/qt_mips_asm_dsp_p.h
@ -1,6 +1,6 @@
 /****************************************************************************
 **
-** Copyright (C) 2012 MIPS Technologies, www.mips.com, author Damir Tatalovic <dtatalovic@mips.com>
+** Copyright (C) 2013 Imagination Technologies Limited, www.imgtec.com
 ** Contact: http://www.qt-project.org/legal
 **
 ** This file is part of the QtGui module of the Qt Toolkit.
@ -261,4 +261,168 @@ LEAF_MIPS32R2(symbol)                                   \
    or                \out_1,    \scratch1, \scratch3
 .endm

+/*
+ * Checks if stack offset is big enough for storing/restoring regs_num
+ * number of register to/from stack. Stack offset must be greater than
+ * or equal to the number of bytes needed for storing registers (regs_num*4).
+ * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
+ * preserved for input arguments of the functions, already stored in a0-a3),
+ * stack size can be further optimized by utilizing this space.
+ */
+.macro CHECK_STACK_OFFSET regs_num, stack_offset
+.if \stack_offset < \regs_num * 4 - 16
+.error "Stack offset too small."
+.endif
+.endm
+
+/*
+ * Saves set of registers on stack. Maximum number of registers that
+ * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * before registers are pushed in order to provide enough space on stack
+ * (offset must be multiple of 4, and must be big enough, as described by
+ * CHECK_STACK_OFFSET macro). This macro is intended to be used in
+ * combination with RESTORE_REGS_FROM_STACK macro. Example:
+ *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
+ *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
+                          r2  = 0, r3  = 0, r4  = 0, \
+                          r5  = 0, r6  = 0, r7  = 0, \
+                          r8  = 0, r9  = 0, r10 = 0, \
+                          r11 = 0, r12 = 0, r13 = 0, \
+                          r14 = 0
+    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
+    .error "Stack offset must be positive and multiple of 4."
+    .endif
+    .if \stack_offset != 0
+    addiu           sp, sp, -\stack_offset
+    .endif
+    sw              \r1, 0(sp)
+    .if \r2 != 0
+    sw              \r2, 4(sp)
+    .endif
+    .if \r3 != 0
+    sw              \r3, 8(sp)
+    .endif
+    .if \r4 != 0
+    sw              \r4, 12(sp)
+    .endif
+    .if \r5 != 0
+    CHECK_STACK_OFFSET 5, \stack_offset
+    sw              \r5, 16(sp)
+    .endif
+    .if \r6 != 0
+    CHECK_STACK_OFFSET 6, \stack_offset
+    sw              \r6, 20(sp)
+    .endif
+    .if \r7 != 0
+    CHECK_STACK_OFFSET 7, \stack_offset
+    sw              \r7, 24(sp)
+    .endif
+    .if \r8 != 0
+    CHECK_STACK_OFFSET 8, \stack_offset
+    sw              \r8, 28(sp)
+    .endif
+    .if \r9 != 0
+    CHECK_STACK_OFFSET 9, \stack_offset
+    sw              \r9, 32(sp)
+    .endif
+    .if \r10 != 0
+    CHECK_STACK_OFFSET 10, \stack_offset
+    sw              \r10, 36(sp)
+    .endif
+    .if \r11 != 0
+    CHECK_STACK_OFFSET 11, \stack_offset
+    sw              \r11, 40(sp)
+    .endif
+    .if \r12 != 0
+    CHECK_STACK_OFFSET 12, \stack_offset
+    sw              \r12, 44(sp)
+    .endif
+    .if \r13 != 0
+    CHECK_STACK_OFFSET 13, \stack_offset
+    sw              \r13, 48(sp)
+    .endif
+    .if \r14 != 0
+    CHECK_STACK_OFFSET 14, \stack_offset
+    sw              \r14, 52(sp)
+    .endif
+.endm
+
+/*
+ * Restores set of registers from stack. Maximum number of registers that
+ * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * after registers are restored (offset must be multiple of 4, and must
+ * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
+ * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
+ * Example:
+ *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
+ *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
+                               r2  = 0, r3  = 0, r4  = 0, \
+                               r5  = 0, r6  = 0, r7  = 0, \
+                               r8  = 0, r9  = 0, r10 = 0, \
+                               r11 = 0, r12 = 0, r13 = 0, \
+                               r14 = 0
+    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
+    .error "Stack offset must be pozitive and multiple of 4."
+    .endif
+    lw              \r1, 0(sp)
+    .if \r2 != 0
+    lw              \r2, 4(sp)
+    .endif
+    .if \r3 != 0
+    lw              \r3, 8(sp)
+    .endif
+    .if \r4 != 0
+    lw              \r4, 12(sp)
+    .endif
+    .if \r5 != 0
+    CHECK_STACK_OFFSET 5, \stack_offset
+    lw              \r5, 16(sp)
+    .endif
+    .if \r6 != 0
+    CHECK_STACK_OFFSET 6, \stack_offset
+    lw              \r6, 20(sp)
+    .endif
+    .if \r7 != 0
+    CHECK_STACK_OFFSET 7, \stack_offset
+    lw              \r7, 24(sp)
+    .endif
+    .if \r8 != 0
+    CHECK_STACK_OFFSET 8, \stack_offset
+    lw              \r8, 28(sp)
+    .endif
+    .if \r9 != 0
+    CHECK_STACK_OFFSET 9, \stack_offset
+    lw              \r9, 32(sp)
+    .endif
+    .if \r10 != 0
+    CHECK_STACK_OFFSET 10, \stack_offset
+    lw              \r10, 36(sp)
+    .endif
+    .if \r11 != 0
+    CHECK_STACK_OFFSET 11, \stack_offset
+    lw              \r11, 40(sp)
+    .endif
+    .if \r12 != 0
+    CHECK_STACK_OFFSET 12, \stack_offset
+    lw              \r12, 44(sp)
+    .endif
+    .if \r13 != 0
+    CHECK_STACK_OFFSET 13, \stack_offset
+    lw              \r13, 48(sp)
+    .endif
+    .if \r14 != 0
+    CHECK_STACK_OFFSET 14, \stack_offset
+    lw              \r14, 52(sp)
+    .endif
+    .if \stack_offset != 0
+    addiu           sp, sp, \stack_offset
+    .endif
+.endm
+
 #endif // QT_MIPS_ASM_DSP_H