QPixelFormat: don't use bit fields

GCC at least is generating horrible code for them, so do the bit twiddling
ourselves. This is not premature optimization, because it might be the seed
for a QBitField class, as suggested by Andrei Alexandrescu on GN2013.

The assembler code of default and copy ctors is unchanged between the old
and new versions. Accessors such as redSize() are virtually identical (with
an and and a mov exchanging place). The interesting thing is what happend in
operator== and the set-all-fields ctor. These are the disassemblies of
functions wrapping op== and the ctor, resp.:

bool pf_comparison(QPixelFormat, QPixelFormat):
        .cfi_startproc                                   |        .cfi_startproc
        movl    %esi, %edx                               |        cmpq    %rsi, %rdi
        xorl    %eax, %eax                               |        sete    %al
        xorl    %edi, %edx                               |        ret
        andl    $15, %edx                                |        .cfi_endproc
        jne     .L53                                     |
        movl    %esi, %edx                               |
        xorl    %edi, %edx                               |
        testw   $1008, %dx                               |
        jne     .L53                                     |
        movl    %esi, %edx                               |
        pushq   %rbx                                     |
        .cfi_def_cfa_offset 16                           |
        .cfi_offset 3, -16                               |
        movl    %edi, %ebx                               |
        movzbl  %dh, %ecx                                |
        movzbl  %bh, %edx                                |
        xorl    %ecx, %edx                               |
        andl    $252, %edx                               |
        jne     .L40                                     |
        movq    %rdi, %rcx                               |
        movq    %rsi, %rdx                               |
        shrq    $16, %rcx                                |
        shrq    $16, %rdx                                |
        movl    %ecx, %ebx                               |
        xorl    %edx, %ebx                               |
        movl    %ebx, %r8d                               |
        andl    $63, %r8d                                |
        jne     .L40                                     |
        xorl    %ecx, %edx                               |
        testw   $4032, %dx                               |
        jne     .L40                                     |
        movq    %rsi, %rcx                               |
        movabsq $16911433728, %rdx                       |
        xorq    %rdi, %rcx                               |
        testq   %rdx, %rcx                               |
        jne     .L40                                     |
        movq    %rdi, %rdx                               |
        movq    %rsi, %rcx                               |
        shrq    $32, %rdx                                |
        shrq    $32, %rcx                                |
        movl    %edx, %ebx                               |
        xorl    %ecx, %ebx                               |
        movl    %ebx, %r8d                               |
        andl    $252, %r8d                               |
        jne     .L40                                     |
        movq    %rsi, %r9                                |
        movq    %rdi, %r8                                |
        shrq    $40, %r9                                 |
        shrq    $40, %r8                                 |
        xorl    %r9d, %r8d                               |
        andl    $127, %r8d                               |
        jne     .L40                                     |
        xorl    %ecx, %edx                               |
        andl    $98304, %edx                             |
        jne     .L40                                     |
        movq    %rsi, %rcx                               |
        movq    %rdi, %rdx                               |
        shrq    $48, %rcx                                |
        shrq    $48, %rdx                                |
        xorl    %ecx, %edx                               |
        andl    $126, %edx                               |
        jne     .L40                                     |
        shrq    $48, %rdi                                |
        movq    %rcx, %rax                               |
        xorl    %edi, %eax                               |
        testw   $-128, %ax                               |
        sete    %al                                      |
        .p2align 4,,10                                   |
        .p2align 3                                       |
.L40:                                                    |
        popq    %rbx                                     |
        .cfi_restore 3                                   |
        .cfi_def_cfa_offset 8                            |
.L53:                                                    |
        rep                                              |
        ret                                              |
        .cfi_endproc                                     |

That one is pretty obvious. Hint: the right one is the new version.

QPixelFormat pf_unwieldy_ctor(QPixelFormat::ColorModel, ...)
        .cfi_startproc                                   |        .cfi_startproc
        movq    %rbp, -32(%rsp)                          |        movq    %rbx, -40(%rsp)
        movq    %r12, -24(%rsp)                          |        .cfi_offset 3, -48
        andl    $15, %edi                                |        movzbl  8(%rsp), %ebx
        movq    %r13, -16(%rsp)                          |        andl    $63, %esi
        movq    %r14, -8(%rsp)                           |        movq    %rbp, -32(%rsp)
        andl    $63, %esi                                |        movq    %r12, -24(%rsp)
        movl    48(%rsp), %r11d                          |        andl    $63, %edx
        movzbl  8(%rsp), %r10d                           |        .cfi_offset 6, -40
        andl    $63, %edx                                |        .cfi_offset 12, -32
        .cfi_offset 6, -40                               |        movl    16(%rsp), %ebp
        .cfi_offset 12, -32                              |        movl    32(%rsp), %r11d
        .cfi_offset 13, -24                              |        andl    $63, %ecx
        .cfi_offset 14, -16                              |        movl    40(%rsp), %r10d
        movzbl  16(%rsp), %r14d                          |        movq    %r13, -16(%rsp)
        movzbl  24(%rsp), %r13d                          |        andl    $63, %r8d
        andl    $63, %ecx                                |        andl    $63, %ebx
        movzbl  32(%rsp), %r12d                          |        .cfi_offset 13, -24
        movzbl  40(%rsp), %ebp                           |        movl    48(%rsp), %r13d
        andl    $63, %r8d                                |        andl    $63, %r9d
        movl    %r11d, %eax                              |        movq    %rbx, %r12
        movq    %rbx, -40(%rsp)                          |        movl    24(%rsp), %ebx
        .cfi_offset 3, -48                               |        andl    $1, %ebp
        andl    $63, %r9d                                |        andl    $1, %r11d
        andl    $3, %eax                                 |        andl    $15, %r10d
        andl    $63, %r10d                               |        movq    %rdi, %rax
        xorl    %ebx, %ebx                               |        andl    $15, %eax
        andl    $1, %r14d                                |        salq    $4, %rsi
        andl    $1, %r13d                                |        salq    $10, %rdx
        andl    $1, %r12d                                |        andl    $1, %ebx
        andl    $15, %ebp                                |        salq    $16, %rcx
        cmpl    $2, %r11d                                |        salq    $22, %r8
        cmovne  %eax, %ebx                               |        salq    $28, %r9
        andl    $63, %esi                                |        salq    $34, %r12
        movq    %rdi, %rax                               |        salq    $40, %rbp
        salq    $4, %rsi                                 |        salq    $41, %rbx
        andl    $15, %eax                                |        salq    $42, %r11
        andl    $63, %edx                                |        salq    $43, %r10
        salq    $10, %rdx                                |        cmpl    $2, %r13d
        orq     %rsi, %rax                               |        movq    %r14, -8(%rsp)
        andl    $63, %ecx                                |        .cfi_offset 14, -16
        salq    $16, %rcx                                |        movzbl  56(%rsp), %r14d
        orq     %rdx, %rax                               |        je      .L45
        andl    $63, %r8d                                |        andl    $3, %r13d
        orq     %rcx, %rax                               |        salq    $47, %r13
        salq    $22, %r8                                 |.L44:
        andl    $63, %r9d                                |        orq     %rsi, %rax
        salq    $28, %r9                                 |        orq     %rdx, %rax
        orq     %r8, %rax                                |        movq    %r14, %rdx
        andl    $63, %r10d                               |        movq    -8(%rsp), %r14
        movq    %r14, %rcx                               |        orq     %rcx, %rax
        salq    $34, %r10                                |        andl    $63, %edx
        orq     %r9, %rax                                |        orq     %r8, %rax
        andl    $1, %ecx                                 |        salq    $49, %rdx
        movq    %r13, %rdx                               |        orq     %r9, %rax
        orq     %r10, %rax                               |        orq     %r12, %rax
        salq    $40, %rcx                                |        movq    -24(%rsp), %r12
        andl    $1, %edx                                 |        orq     %rbp, %rax
        movq    -16(%rsp), %r13                          |        movq    -32(%rsp), %rbp
        salq    $41, %rdx                                |        orq     %rbx, %rax
        orq     %rcx, %rax                               |        movq    -40(%rsp), %rbx
        movq    %r12, %rcx                               |        orq     %r11, %rax
        orq     %rdx, %rax                               |        orq     %r10, %rax
        andl    $1, %ecx                                 |        orq     %rdx, %rax
        movq    %rbp, %rdx                               |        orq     %r13, %rax
        salq    $42, %rcx                                |        movq    -16(%rsp), %r13
        andl    $15, %edx                                |        ret
        movq    -32(%rsp), %rbp                          |        .p2align 4,,10
        salq    $43, %rdx                                |        .p2align 3
        orq     %rcx, %rax                               |.L45:
        movq    %rbx, %rcx                               |        xorl    %r13d, %r13d
        orq     %rdx, %rax                               |        jmp     .L44
        movzbl  56(%rsp), %edx                           |        .cfi_endproc
        andl    $3, %ecx                                 |
        salq    $47, %rcx                                |
        movq    -40(%rsp), %rbx                          |
        movq    -24(%rsp), %r12                          |
        movq    -8(%rsp), %r14                           |
        andl    $63, %edx                                |
        orq     %rcx, %rax                               |
        salq    $49, %rdx                                |
        orq     %rdx, %rax                               |
        ret                                              |
        .cfi_endproc                                     |

Without bothering to understand the details, they look pretty similar, with the new
version being slightly shorter. But that may not mean anything.

Change-Id: I31e84c9109ccd0c7282351b2e2802407a9b360b4
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
This commit is contained in:
Marc Mutz 2014-08-03 01:17:11 +02:00 committed by Oswald Buddenhagen
parent 35105f173e
commit 3469c262fc

View File

@ -48,6 +48,58 @@ QT_BEGIN_NAMESPACE
class QPixelFormat
{
// QPixelFormat basically is a glorified quint64, split into several fields.
// We could use bit-fields, but GCC at least generates horrible, horrible code for them,
// so we do the bit-twiddling ourselves.
enum FieldWidth {
ModelFieldWidth = 4,
FirstFieldWidth = 6,
SecondFieldWidth = FirstFieldWidth,
ThirdFieldWidth = FirstFieldWidth,
FourthFieldWidth = FirstFieldWidth,
FifthFieldWidth = FirstFieldWidth,
AlphaFieldWidth = FirstFieldWidth,
AlphaUsageFieldWidth = 1,
AlphaPositionFieldWidth = 1,
PremulFieldWidth = 1,
TypeInterpretationFieldWidth = 4,
ByteOrderFieldWidth = 2,
SubEnumFieldWidth = 6,
UnusedFieldWidth = 9,
TotalFieldWidthByWidths = ModelFieldWidth + FirstFieldWidth + SecondFieldWidth + ThirdFieldWidth +
FourthFieldWidth + FifthFieldWidth + AlphaFieldWidth + AlphaUsageFieldWidth +
AlphaPositionFieldWidth + PremulFieldWidth + TypeInterpretationFieldWidth +
ByteOrderFieldWidth + SubEnumFieldWidth + UnusedFieldWidth
};
enum Field {
ModelField = 0,
FirstField = ModelField + ModelFieldWidth,
SecondField = FirstField + FirstFieldWidth,
ThirdField = SecondField + SecondFieldWidth,
FourthField = ThirdField + ThirdFieldWidth,
FifthField = FourthField + FourthFieldWidth,
AlphaField = FifthField + FifthFieldWidth,
AlphaUsageField = AlphaField + AlphaFieldWidth,
AlphaPositionField = AlphaUsageField + AlphaUsageFieldWidth,
PremulField = AlphaPositionField + AlphaPositionFieldWidth,
TypeInterpretationField = PremulField + PremulFieldWidth,
ByteOrderField = TypeInterpretationField + TypeInterpretationFieldWidth,
SubEnumField = ByteOrderField + ByteOrderFieldWidth,
UnusedField = SubEnumField + SubEnumFieldWidth,
TotalFieldWidthByOffsets = UnusedField + UnusedFieldWidth
};
Q_STATIC_ASSERT(uint(TotalFieldWidthByWidths) == uint(TotalFieldWidthByOffsets));
Q_STATIC_ASSERT(uint(TotalFieldWidthByWidths) == 8 * sizeof(quint64));
Q_DECL_CONSTEXPR inline uchar get(Field offset, FieldWidth width) const Q_DECL_NOTHROW
{ return uchar((data >> uint(offset)) & ((Q_UINT64_C(1) << uint(width)) - Q_UINT64_C(1))); }
Q_DECL_CONSTEXPR static inline quint64 set(Field offset, FieldWidth width, uchar value)
{ return (quint64(value) & ((Q_UINT64_C(1) << uint(width)) - Q_UINT64_C(1))) << uint(offset); }
public:
enum ColorModel {
RGB,
@ -122,61 +174,52 @@ public:
ByteOrder byteOrder = CurrentSystemEndian,
uchar subEnum = 0) Q_DECL_NOTHROW;
Q_DECL_CONSTEXPR inline ColorModel colorModel() const Q_DECL_NOTHROW { return ColorModel(model); }
Q_DECL_CONSTEXPR inline uchar channelCount() const Q_DECL_NOTHROW { return (first > 0) +
(second > 0) +
(third > 0) +
(fourth > 0) +
(fifth > 0) +
(alpha > 0); }
Q_DECL_CONSTEXPR inline ColorModel colorModel() const Q_DECL_NOTHROW { return ColorModel(get(ModelField, ModelFieldWidth)); }
Q_DECL_CONSTEXPR inline uchar channelCount() const Q_DECL_NOTHROW { return (get(FirstField, FirstFieldWidth) > 0) +
(get(SecondField, SecondFieldWidth) > 0) +
(get(ThirdField, ThirdFieldWidth) > 0) +
(get(FourthField, FourthFieldWidth) > 0) +
(get(FifthField, FifthFieldWidth) > 0) +
(get(AlphaField, AlphaFieldWidth) > 0); }
Q_DECL_CONSTEXPR inline uchar redSize() const Q_DECL_NOTHROW { return first; }
Q_DECL_CONSTEXPR inline uchar greenSize() const Q_DECL_NOTHROW { return second; }
Q_DECL_CONSTEXPR inline uchar blueSize() const Q_DECL_NOTHROW { return third; }
Q_DECL_CONSTEXPR inline uchar redSize() const Q_DECL_NOTHROW { return get(FirstField, FirstFieldWidth); }
Q_DECL_CONSTEXPR inline uchar greenSize() const Q_DECL_NOTHROW { return get(SecondField, SecondFieldWidth); }
Q_DECL_CONSTEXPR inline uchar blueSize() const Q_DECL_NOTHROW { return get(ThirdField, ThirdFieldWidth); }
Q_DECL_CONSTEXPR inline uchar cyanSize() const Q_DECL_NOTHROW { return first; }
Q_DECL_CONSTEXPR inline uchar magentaSize() const Q_DECL_NOTHROW { return second; }
Q_DECL_CONSTEXPR inline uchar yellowSize() const Q_DECL_NOTHROW { return third; }
Q_DECL_CONSTEXPR inline uchar blackSize() const Q_DECL_NOTHROW { return fourth; }
Q_DECL_CONSTEXPR inline uchar cyanSize() const Q_DECL_NOTHROW { return get(FirstField, FirstFieldWidth); }
Q_DECL_CONSTEXPR inline uchar magentaSize() const Q_DECL_NOTHROW { return get(SecondField, SecondFieldWidth); }
Q_DECL_CONSTEXPR inline uchar yellowSize() const Q_DECL_NOTHROW { return get(ThirdField, ThirdFieldWidth); }
Q_DECL_CONSTEXPR inline uchar blackSize() const Q_DECL_NOTHROW { return get(FourthField, FourthFieldWidth); }
Q_DECL_CONSTEXPR inline uchar hueSize() const Q_DECL_NOTHROW { return first; }
Q_DECL_CONSTEXPR inline uchar saturationSize() const Q_DECL_NOTHROW { return second; }
Q_DECL_CONSTEXPR inline uchar lightnessSize() const Q_DECL_NOTHROW { return third; }
Q_DECL_CONSTEXPR inline uchar brightnessSize() const Q_DECL_NOTHROW { return third; }
Q_DECL_CONSTEXPR inline uchar hueSize() const Q_DECL_NOTHROW { return get(FirstField, FirstFieldWidth); }
Q_DECL_CONSTEXPR inline uchar saturationSize() const Q_DECL_NOTHROW { return get(SecondField, SecondFieldWidth); }
Q_DECL_CONSTEXPR inline uchar lightnessSize() const Q_DECL_NOTHROW { return get(ThirdField, ThirdFieldWidth); }
Q_DECL_CONSTEXPR inline uchar brightnessSize() const Q_DECL_NOTHROW { return get(ThirdField, ThirdFieldWidth); }
Q_DECL_CONSTEXPR inline uchar alphaSize() const Q_DECL_NOTHROW { return alpha; }
Q_DECL_CONSTEXPR inline uchar alphaSize() const Q_DECL_NOTHROW { return get(AlphaField, AlphaFieldWidth); }
Q_DECL_CONSTEXPR inline uchar bitsPerPixel() const Q_DECL_NOTHROW { return first +
second +
third +
fourth +
fifth +
alpha; }
Q_DECL_CONSTEXPR inline uchar bitsPerPixel() const Q_DECL_NOTHROW { return get(FirstField, FirstFieldWidth) +
get(SecondField, SecondFieldWidth) +
get(ThirdField, ThirdFieldWidth) +
get(FourthField, FourthFieldWidth) +
get(FifthField, FifthFieldWidth) +
get(AlphaField, AlphaFieldWidth); }
Q_DECL_CONSTEXPR inline AlphaUsage alphaUsage() const Q_DECL_NOTHROW { return AlphaUsage(alpha_usage); }
Q_DECL_CONSTEXPR inline AlphaPosition alphaPosition() const Q_DECL_NOTHROW { return AlphaPosition(alpha_position); }
Q_DECL_CONSTEXPR inline AlphaPremultiplied premultiplied() const Q_DECL_NOTHROW { return AlphaPremultiplied(premul); }
Q_DECL_CONSTEXPR inline TypeInterpretation typeInterpretation() const Q_DECL_NOTHROW { return TypeInterpretation(type_interpretation); }
Q_DECL_CONSTEXPR inline ByteOrder byteOrder() const Q_DECL_NOTHROW { return ByteOrder(byte_order); }
Q_DECL_CONSTEXPR inline AlphaUsage alphaUsage() const Q_DECL_NOTHROW { return AlphaUsage(get(AlphaUsageField, AlphaUsageFieldWidth)); }
Q_DECL_CONSTEXPR inline AlphaPosition alphaPosition() const Q_DECL_NOTHROW { return AlphaPosition(get(AlphaPositionField, AlphaPositionFieldWidth)); }
Q_DECL_CONSTEXPR inline AlphaPremultiplied premultiplied() const Q_DECL_NOTHROW { return AlphaPremultiplied(get(PremulField, PremulFieldWidth)); }
Q_DECL_CONSTEXPR inline TypeInterpretation typeInterpretation() const Q_DECL_NOTHROW { return TypeInterpretation(get(TypeInterpretationField, TypeInterpretationFieldWidth)); }
Q_DECL_CONSTEXPR inline ByteOrder byteOrder() const Q_DECL_NOTHROW { return ByteOrder(get(ByteOrderField, ByteOrderFieldWidth)); }
Q_DECL_CONSTEXPR inline YUVLayout yuvLayout() const Q_DECL_NOTHROW { return YUVLayout(sub_enum); }
Q_DECL_CONSTEXPR inline uchar subEnum() const Q_DECL_NOTHROW { return sub_enum; }
Q_DECL_CONSTEXPR inline YUVLayout yuvLayout() const Q_DECL_NOTHROW { return YUVLayout(get(SubEnumField, SubEnumFieldWidth)); }
Q_DECL_CONSTEXPR inline uchar subEnum() const Q_DECL_NOTHROW { return get(SubEnumField, SubEnumFieldWidth); }
private:
quint64 model : 4;
quint64 first : 6;
quint64 second : 6;
quint64 third : 6;
quint64 fourth : 6;
quint64 fifth : 6;
quint64 alpha : 6;
quint64 alpha_usage : 1;
quint64 alpha_position : 1;
quint64 premul: 1;
quint64 type_interpretation : 4;
quint64 byte_order : 2;
quint64 sub_enum : 6;
quint64 unused : 9;
Q_DECL_CONSTEXPR static inline ByteOrder resolveByteOrder(ByteOrder bo)
{ return bo == CurrentSystemEndian ? Q_BYTE_ORDER == Q_LITTLE_ENDIAN ? LittleEndian : BigEndian : bo ; }
private:
quint64 data;
friend Q_DECL_CONST_FUNCTION Q_DECL_CONSTEXPR inline bool operator==(const QPixelFormat &fmt1, const QPixelFormat &fmt2);
friend Q_DECL_CONST_FUNCTION Q_DECL_CONSTEXPR inline bool operator!=(const QPixelFormat &fmt1, const QPixelFormat &fmt2);
@ -263,20 +306,7 @@ public:
Q_DECL_CONSTEXPR
QPixelFormat::QPixelFormat() Q_DECL_NOTHROW
: model(0)
, first(0)
, second(0)
, third(0)
, fourth(0)
, fifth(0)
, alpha(0)
, alpha_usage(0)
, alpha_position(0)
, premul(0)
, type_interpretation(0)
, byte_order(0)
, sub_enum(0)
, unused(0)
: data(0)
{
}
@ -294,44 +324,25 @@ QPixelFormat::QPixelFormat(ColorModel mdl,
TypeInterpretation typeInterp,
ByteOrder b_order,
uchar s_enum) Q_DECL_NOTHROW
: model(mdl)
, first(firstSize)
, second(secondSize)
, third(thirdSize)
, fourth(fourthSize)
, fifth(fifthSize)
, alpha(alfa)
, alpha_usage(usage)
, alpha_position(position)
, premul(premult)
, type_interpretation(typeInterp)
#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
, byte_order(b_order == CurrentSystemEndian ? LittleEndian : b_order)
#else
, byte_order(b_order == CurrentSystemEndian ? BigEndian : b_order)
#endif
, sub_enum(s_enum)
, unused(0)
: data(set(ModelField, ModelFieldWidth, uchar(mdl)) |
set(FirstField, FirstFieldWidth, firstSize) |
set(SecondField, SecondFieldWidth, secondSize) |
set(ThirdField, ThirdFieldWidth, thirdSize) |
set(FourthField, FourthFieldWidth, fourthSize) |
set(FifthField, FifthFieldWidth, fifthSize) |
set(AlphaField, AlphaFieldWidth, alfa) |
set(AlphaUsageField, AlphaUsageFieldWidth, uchar(usage)) |
set(AlphaPositionField, AlphaPositionFieldWidth, uchar(position)) |
set(PremulField, PremulFieldWidth, uchar(premult)) |
set(TypeInterpretationField, TypeInterpretationFieldWidth, uchar(typeInterp)) |
set(ByteOrderField, ByteOrderFieldWidth, uchar(resolveByteOrder(b_order))) |
set(SubEnumField, SubEnumFieldWidth, s_enum) |
set(UnusedField, UnusedFieldWidth, 0))
{
}
Q_DECL_CONST_FUNCTION Q_DECL_CONSTEXPR inline bool operator==(const QPixelFormat &fmt1, const QPixelFormat &fmt2)
{
return fmt1.model == fmt2.model
&& fmt1.first == fmt2.first
&& fmt1.second == fmt2.second
&& fmt1.third == fmt2.third
&& fmt1.fourth == fmt2.fourth
&& fmt1.fifth == fmt2.fifth
&& fmt1.alpha == fmt2.alpha
&& fmt1.alpha_usage == fmt2.alpha_usage
&& fmt1.alpha_position == fmt2.alpha_position
&& fmt1.premul == fmt2.premul
&& fmt1.type_interpretation == fmt2.type_interpretation
&& fmt1.byte_order == fmt2.byte_order
&& fmt1.sub_enum == fmt2.sub_enum
&& fmt1.unused == fmt2.unused;
}
{ return fmt1.data == fmt2.data; }
Q_DECL_CONST_FUNCTION Q_DECL_CONSTEXPR inline bool operator!=(const QPixelFormat &fmt1, const QPixelFormat &fmt2)
{ return !(fmt1 == fmt2); }