Create a function that merges the SSE common code
Reviewed-by: Samuel Rødal (cherry picked from commit bb3bd601560132df769c32808ae0b36c56d1caab) Change-Id: Icd7f661785a793effcd4d8cd08ffa8bb5a592cd9 Reviewed-on: http://codereview.qt-project.org/4467 Reviewed-by: Qt Sanity Bot <qt_sanity_bot@ovi.com> Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@nokia.com> Reviewed-by: Samuel Rødal <samuel.rodal@nokia.com>
This commit is contained in:
parent
85f963b2f1
commit
526c851902
@ -3535,6 +3535,38 @@ bool QString::endsWith(const QChar &c, Qt::CaseSensitivity cs) const
|
||||
}
|
||||
|
||||
|
||||
#if defined(QT_ALWAYS_HAVE_SSE2)
|
||||
static inline __m128i mergeQuestionMarks(__m128i chunk)
|
||||
{
|
||||
const __m128i questionMark = _mm_set1_epi16('?');
|
||||
|
||||
// SSE has no compare instruction for unsigned comparison.
|
||||
// The variables must be shiffted + 0x8000 to be compared
|
||||
const __m128i signedBitOffset = _mm_set1_epi16(0x8000);
|
||||
const __m128i thresholdMask = _mm_set1_epi16(0xff + 0x8000);
|
||||
|
||||
const __m128i signedChunk = _mm_add_epi16(chunk, signedBitOffset);
|
||||
const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
|
||||
|
||||
# ifdef __SSE4_1__
|
||||
// replace the non-Latin 1 characters in the chunk with question marks
|
||||
chunk = _mm_blendv_epi8(chunk, questionMark, offLimitMask);
|
||||
# else
|
||||
// offLimitQuestionMark contains '?' for each 16 bits that was off-limit
|
||||
// the 16 bits that were correct contains zeros
|
||||
const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
|
||||
|
||||
// correctBytes contains the bytes that were in limit
|
||||
// the 16 bits that were off limits contains zeros
|
||||
const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk);
|
||||
|
||||
// merge offLimitQuestionMark and correctBytes to have the result
|
||||
chunk = _mm_or_si128(correctBytes, offLimitQuestionMark);
|
||||
# endif
|
||||
return chunk;
|
||||
}
|
||||
#endif
|
||||
|
||||
static QByteArray toLatin1_helper(const QChar *data, int length)
|
||||
{
|
||||
QByteArray ba;
|
||||
@ -3545,50 +3577,15 @@ static QByteArray toLatin1_helper(const QChar *data, int length)
|
||||
#if defined(QT_ALWAYS_HAVE_SSE2)
|
||||
if (length >= 16) {
|
||||
const int chunkCount = length >> 4; // divided by 16
|
||||
const __m128i questionMark = _mm_set1_epi16('?');
|
||||
// SSE has no compare instruction for unsigned comparison.
|
||||
// The variables must be shiffted + 0x8000 to be compared
|
||||
const __m128i signedBitOffset = _mm_set1_epi16(short(0x8000));
|
||||
const __m128i thresholdMask = _mm_set1_epi16(short(0xff + 0x8000));
|
||||
|
||||
for (int i = 0; i < chunkCount; ++i) {
|
||||
__m128i chunk1 = _mm_loadu_si128((__m128i*)src); // load
|
||||
chunk1 = mergeQuestionMarks(chunk1);
|
||||
src += 8;
|
||||
{
|
||||
// each 16 bit is equal to 0xFF if the source is outside latin 1 (>0xff)
|
||||
const __m128i signedChunk = _mm_add_epi16(chunk1, signedBitOffset);
|
||||
const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
chunk1 = _mm_blendv_epi8(chunk1, questionMark, offLimitMask);
|
||||
#else
|
||||
|
||||
// offLimitQuestionMark contains '?' for each 16 bits that was off-limit
|
||||
// the 16 bits that were correct contains zeros
|
||||
const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
|
||||
|
||||
// correctBytes contains the bytes that were in limit
|
||||
// the 16 bits that were off limits contains zeros
|
||||
const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk1);
|
||||
|
||||
// merge offLimitQuestionMark and correctBytes to have the result
|
||||
chunk1 = _mm_or_si128(correctBytes, offLimitQuestionMark);
|
||||
#endif
|
||||
}
|
||||
|
||||
__m128i chunk2 = _mm_loadu_si128((__m128i*)src); // load
|
||||
chunk2 = mergeQuestionMarks(chunk2);
|
||||
src += 8;
|
||||
{
|
||||
// exactly the same operations as for the previous chunk of data
|
||||
const __m128i signedChunk = _mm_add_epi16(chunk2, signedBitOffset);
|
||||
const __m128i offLimitMask = _mm_cmpgt_epi16(signedChunk, thresholdMask);
|
||||
#ifdef __SSE4_1__
|
||||
chunk2 = _mm_blendv_epi8(chunk2, questionMark, offLimitMask);
|
||||
#else
|
||||
const __m128i offLimitQuestionMark = _mm_and_si128(offLimitMask, questionMark);
|
||||
const __m128i correctBytes = _mm_andnot_si128(offLimitMask, chunk2);
|
||||
chunk2 = _mm_or_si128(correctBytes, offLimitQuestionMark);
|
||||
#endif
|
||||
}
|
||||
|
||||
// pack the two vector to 16 x 8bits elements
|
||||
const __m128i result = _mm_packus_epi16(chunk1, chunk2);
|
||||
|
Loading…
Reference in New Issue
Block a user