lowp: add constant_color, swap, move_dst_src

This is enough for us to do some really simple draws.
Also add some debug tools to help prioritize porting.

Change-Id: I334f8fd2133be1aeec3f3406371a81aa6c184776
Reviewed-on: https://skia-review.googlesource.com/18597
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
Mike Klein 2017-06-05 12:20:56 -04:00 committed by Skia Commit-Bot
parent 0e022297fe
commit 727b09c898
4 changed files with 240 additions and 55 deletions

View File

@ -26,6 +26,37 @@ static K kConstants = {
{0,1,2,3,4,5,6,7},
};
#define M(st) +1
static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
#undef M
#if !__has_feature(memory_sanitizer) && (defined(__x86_64__) || defined(_M_X64))
#if 0
#include <atomic>
#define M(st) #st,
static const char* kStageNames[] = { SK_RASTER_PIPELINE_STAGES(M) };
#undef M
static std::atomic<int> gMissingStageCounters[kNumStages];
static void log_missing(SkRasterPipeline::StockStage st) {
static SkOnce once;
once([] { atexit([] {
for (int i = 0; i < kNumStages; i++) {
if (int count = gMissingStageCounters[i].load()) {
SkDebugf("%7d\t%s\n", count, kStageNames[i]);
}
}
}); });
gMissingStageCounters[st]++;
}
#else
static void log_missing(SkRasterPipeline::StockStage) {}
#endif
#endif
// We can't express the real types of most stage functions portably, so we use a stand-in.
// We'll only ever call start_pipeline(), which then chains into the rest for us.
using StageFn = void(void);
@ -38,6 +69,17 @@ using StartPipelineFn = void(size_t,size_t,size_t,void**,K*);
#define ASM(name, suffix) _sk_##name##_##suffix
#endif
// Some stages have low-precision (~15 bit) versions from SkJumper_stages_lowp.cpp.
#define LOWP_STAGES(M) \
M(constant_color) \
M(load_8888) \
M(store_8888) \
M(swap_rb) \
M(swap) \
M(move_src_dst) \
M(move_dst_src) \
M(srcover)
extern "C" {
#if __has_feature(memory_sanitizer)
@ -83,11 +125,9 @@ extern "C" {
SK_RASTER_PIPELINE_STAGES(M)
#undef M
StageFn ASM(load_8888, ssse3_lowp),
ASM(store_8888, ssse3_lowp),
ASM(swap_rb, ssse3_lowp),
ASM(move_src_dst, ssse3_lowp),
ASM(srcover, ssse3_lowp);
#define M(st) StageFn ASM(st,ssse3_lowp);
LOWP_STAGES(M)
#undef M
#endif
// Portable, single-pixel stages.
@ -98,10 +138,6 @@ extern "C" {
#undef M
}
#define M(st) +1
static const int kNumStages = SK_RASTER_PIPELINE_STAGES(M);
#undef M
// Engines comprise everything we need to run SkRasterPipelines.
struct SkJumper_Engine {
StageFn* stages[kNumStages];
@ -188,13 +224,11 @@ StartPipelineFn* SkRasterPipeline::build_pipeline(void** ip) const {
for (const StageList* st = fStages; st; st = st->prev) {
StageFn* fn = nullptr;
switch (st->stage) {
case SkRasterPipeline::load_8888: fn = ASM(load_8888, ssse3_lowp); break;
case SkRasterPipeline::store_8888: fn = ASM(store_8888, ssse3_lowp); break;
case SkRasterPipeline::swap_rb: fn = ASM(swap_rb, ssse3_lowp); break;
case SkRasterPipeline::move_src_dst: fn = ASM(move_src_dst, ssse3_lowp); break;
case SkRasterPipeline::srcover: fn = ASM(srcover, ssse3_lowp); break;
#define M(st) case SkRasterPipeline::st: fn = ASM(st, ssse3_lowp); break;
LOWP_STAGES(M)
#undef M
default:
//SkDebugf("can't %d\n", st->stage);
log_missing(st->stage);
ip = reset_point;
}
if (ip == reset_point) {

View File

@ -36943,6 +36943,38 @@ FUNCTION(_sk_just_return_ssse3_lowp)
_sk_just_return_ssse3_lowp:
.byte 195 // retq
HIDDEN _sk_constant_color_ssse3_lowp
.globl _sk_constant_color_ssse3_lowp
FUNCTION(_sk_constant_color_ssse3_lowp)
_sk_constant_color_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 243,15,16,29,114,3,0,0 // movss 0x372(%rip),%xmm3 # 428 <_sk_srcover_ssse3_lowp+0x65>
.byte 243,15,16,0 // movss (%rax),%xmm0
.byte 243,15,89,195 // mulss %xmm3,%xmm0
.byte 243,68,15,44,200 // cvttss2si %xmm0,%r9d
.byte 102,65,15,110,193 // movd %r9d,%xmm0
.byte 242,15,112,192,0 // pshuflw $0x0,%xmm0,%xmm0
.byte 102,15,112,192,80 // pshufd $0x50,%xmm0,%xmm0
.byte 243,15,16,72,4 // movss 0x4(%rax),%xmm1
.byte 243,15,89,203 // mulss %xmm3,%xmm1
.byte 243,68,15,44,201 // cvttss2si %xmm1,%r9d
.byte 102,65,15,110,201 // movd %r9d,%xmm1
.byte 242,15,112,201,0 // pshuflw $0x0,%xmm1,%xmm1
.byte 102,15,112,201,80 // pshufd $0x50,%xmm1,%xmm1
.byte 243,15,16,80,8 // movss 0x8(%rax),%xmm2
.byte 243,15,89,211 // mulss %xmm3,%xmm2
.byte 243,68,15,44,202 // cvttss2si %xmm2,%r9d
.byte 102,65,15,110,209 // movd %r9d,%xmm2
.byte 242,15,112,210,0 // pshuflw $0x0,%xmm2,%xmm2
.byte 102,15,112,210,80 // pshufd $0x50,%xmm2,%xmm2
.byte 243,15,89,88,12 // mulss 0xc(%rax),%xmm3
.byte 243,15,44,195 // cvttss2si %xmm3,%eax
.byte 102,15,110,216 // movd %eax,%xmm3
.byte 242,15,112,219,0 // pshuflw $0x0,%xmm3,%xmm3
.byte 102,15,112,219,80 // pshufd $0x50,%xmm3,%xmm3
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
HIDDEN _sk_load_8888_ssse3_lowp
.globl _sk_load_8888_ssse3_lowp
FUNCTION(_sk_load_8888_ssse3_lowp)
@ -36950,10 +36982,10 @@ _sk_load_8888_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 76,139,24 // mov (%rax),%r11
.byte 77,133,192 // test %r8,%r8
.byte 117,113 // jne 127 <_sk_load_8888_ssse3_lowp+0x7b>
.byte 117,113 // jne 1a2 <_sk_load_8888_ssse3_lowp+0x7b>
.byte 69,15,16,76,147,16 // movups 0x10(%r11,%rdx,4),%xmm9
.byte 69,15,16,4,147 // movups (%r11,%rdx,4),%xmm8
.byte 102,15,111,5,167,2,0,0 // movdqa 0x2a7(%rip),%xmm0 # 370 <_sk_srcover_ssse3_lowp+0x65>
.byte 102,15,111,5,236,2,0,0 // movdqa 0x2ec(%rip),%xmm0 # 430 <_sk_srcover_ssse3_lowp+0x6d>
.byte 102,68,15,56,0,192 // pshufb %xmm0,%xmm8
.byte 102,68,15,56,0,200 // pshufb %xmm0,%xmm9
.byte 102,65,15,111,208 // movdqa %xmm8,%xmm2
@ -36967,7 +36999,7 @@ _sk_load_8888_ssse3_lowp:
.byte 102,15,239,210 // pxor %xmm2,%xmm2
.byte 102,65,15,96,208 // punpcklbw %xmm8,%xmm2
.byte 102,65,15,104,216 // punpckhbw %xmm8,%xmm3
.byte 102,68,15,111,5,113,2,0,0 // movdqa 0x271(%rip),%xmm8 # 380 <_sk_srcover_ssse3_lowp+0x75>
.byte 102,68,15,111,5,182,2,0,0 // movdqa 0x2b6(%rip),%xmm8 # 440 <_sk_srcover_ssse3_lowp+0x7d>
.byte 102,65,15,228,192 // pmulhuw %xmm8,%xmm0
.byte 102,65,15,228,200 // pmulhuw %xmm8,%xmm1
.byte 102,65,15,228,208 // pmulhuw %xmm8,%xmm2
@ -36980,9 +37012,9 @@ _sk_load_8888_ssse3_lowp:
.byte 69,15,87,192 // xorps %xmm8,%xmm8
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
.byte 119,129 // ja c1 <_sk_load_8888_ssse3_lowp+0x15>
.byte 119,129 // ja 13c <_sk_load_8888_ssse3_lowp+0x15>
.byte 69,15,182,201 // movzbl %r9b,%r9d
.byte 76,141,21,133,0,0,0 // lea 0x85(%rip),%r10 # 1d0 <_sk_load_8888_ssse3_lowp+0x124>
.byte 76,141,21,130,0,0,0 // lea 0x82(%rip),%r10 # 248 <_sk_load_8888_ssse3_lowp+0x121>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@ -37007,21 +37039,20 @@ _sk_load_8888_ssse3_lowp:
.byte 68,15,40,192 // movaps %xmm0,%xmm8
.byte 243,65,15,16,4,147 // movss (%r11,%rdx,4),%xmm0
.byte 243,68,15,16,192 // movss %xmm0,%xmm8
.byte 233,244,254,255,255 // jmpq c1 <_sk_load_8888_ssse3_lowp+0x15>
.byte 15,31,0 // nopl (%rax)
.byte 237 // in (%dx),%eax
.byte 233,244,254,255,255 // jmpq 13c <_sk_load_8888_ssse3_lowp+0x15>
.byte 240,255 // lock (bad)
.byte 255 // (bad)
.byte 255 // (bad)
.byte 219,255 // (bad)
.byte 255 // (bad)
.byte 255,202 // dec %edx
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // (bad)
.byte 216,255 // fdivr %st(7),%st
.byte 255 // (bad)
.byte 255,199 // inc %edi
.byte 185,255,255,255,173 // mov $0xadffffff,%ecx
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,182,255,255,255,170 // pushq -0x55000001(%rsi)
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255,149,255,255,255,132 // callq *-0x7b000001(%rbp)
.byte 255,152,255,255,255,135 // lcall *-0x78000001(%rax)
.byte 255 // (bad)
.byte 255 // (bad)
.byte 255 // .byte 0xff
@ -37049,7 +37080,7 @@ _sk_store_8888_ssse3_lowp:
.byte 102,69,15,97,194 // punpcklwd %xmm10,%xmm8
.byte 102,69,15,105,202 // punpckhwd %xmm10,%xmm9
.byte 77,133,192 // test %r8,%r8
.byte 117,17 // jne 25b <_sk_store_8888_ssse3_lowp+0x6f>
.byte 117,17 // jne 2d3 <_sk_store_8888_ssse3_lowp+0x6f>
.byte 243,69,15,127,76,147,16 // movdqu %xmm9,0x10(%r11,%rdx,4)
.byte 243,69,15,127,4,147 // movdqu %xmm8,(%r11,%rdx,4)
.byte 72,173 // lods %ds:(%rsi),%rax
@ -37058,9 +37089,9 @@ _sk_store_8888_ssse3_lowp:
.byte 65,128,225,7 // and $0x7,%r9b
.byte 65,254,201 // dec %r9b
.byte 65,128,249,6 // cmp $0x6,%r9b
.byte 119,236 // ja 257 <_sk_store_8888_ssse3_lowp+0x6b>
.byte 119,236 // ja 2cf <_sk_store_8888_ssse3_lowp+0x6b>
.byte 69,15,182,201 // movzbl %r9b,%r9d
.byte 76,141,21,90,0,0,0 // lea 0x5a(%rip),%r10 # 2d0 <_sk_store_8888_ssse3_lowp+0xe4>
.byte 76,141,21,90,0,0,0 // lea 0x5a(%rip),%r10 # 348 <_sk_store_8888_ssse3_lowp+0xe4>
.byte 75,99,4,138 // movslq (%r10,%r9,4),%rax
.byte 76,1,208 // add %r10,%rax
.byte 255,224 // jmpq *%rax
@ -37076,7 +37107,7 @@ _sk_store_8888_ssse3_lowp:
.byte 102,69,15,112,200,229 // pshufd $0xe5,%xmm8,%xmm9
.byte 102,69,15,126,76,147,4 // movd %xmm9,0x4(%r11,%rdx,4)
.byte 102,69,15,126,4,147 // movd %xmm8,(%r11,%rdx,4)
.byte 235,136 // jmp 257 <_sk_store_8888_ssse3_lowp+0x6b>
.byte 235,136 // jmp 2cf <_sk_store_8888_ssse3_lowp+0x6b>
.byte 144 // nop
.byte 247,255 // idiv %edi
.byte 255 // (bad)
@ -37109,6 +37140,25 @@ _sk_swap_rb_ssse3_lowp:
.byte 65,15,40,208 // movaps %xmm8,%xmm2
.byte 255,224 // jmpq *%rax
HIDDEN _sk_swap_ssse3_lowp
.globl _sk_swap_ssse3_lowp
FUNCTION(_sk_swap_ssse3_lowp)
_sk_swap_ssse3_lowp:
.byte 68,15,40,195 // movaps %xmm3,%xmm8
.byte 68,15,40,202 // movaps %xmm2,%xmm9
.byte 68,15,40,209 // movaps %xmm1,%xmm10
.byte 68,15,40,216 // movaps %xmm0,%xmm11
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 15,40,196 // movaps %xmm4,%xmm0
.byte 15,40,205 // movaps %xmm5,%xmm1
.byte 15,40,214 // movaps %xmm6,%xmm2
.byte 15,40,223 // movaps %xmm7,%xmm3
.byte 65,15,40,227 // movaps %xmm11,%xmm4
.byte 65,15,40,234 // movaps %xmm10,%xmm5
.byte 65,15,40,241 // movaps %xmm9,%xmm6
.byte 65,15,40,248 // movaps %xmm8,%xmm7
.byte 255,224 // jmpq *%rax
HIDDEN _sk_move_src_dst_ssse3_lowp
.globl _sk_move_src_dst_ssse3_lowp
FUNCTION(_sk_move_src_dst_ssse3_lowp)
@ -37120,11 +37170,22 @@ _sk_move_src_dst_ssse3_lowp:
.byte 15,40,251 // movaps %xmm3,%xmm7
.byte 255,224 // jmpq *%rax
HIDDEN _sk_move_dst_src_ssse3_lowp
.globl _sk_move_dst_src_ssse3_lowp
FUNCTION(_sk_move_dst_src_ssse3_lowp)
_sk_move_dst_src_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 15,40,196 // movaps %xmm4,%xmm0
.byte 15,40,205 // movaps %xmm5,%xmm1
.byte 15,40,214 // movaps %xmm6,%xmm2
.byte 15,40,223 // movaps %xmm7,%xmm3
.byte 255,224 // jmpq *%rax
HIDDEN _sk_srcover_ssse3_lowp
.globl _sk_srcover_ssse3_lowp
FUNCTION(_sk_srcover_ssse3_lowp)
_sk_srcover_ssse3_lowp:
.byte 102,68,15,111,5,124,0,0,0 // movdqa 0x7c(%rip),%xmm8 # 390 <_sk_srcover_ssse3_lowp+0x85>
.byte 102,68,15,111,5,132,0,0,0 // movdqa 0x84(%rip),%xmm8 # 450 <_sk_srcover_ssse3_lowp+0x8d>
.byte 102,68,15,249,195 // psubw %xmm3,%xmm8
.byte 102,68,15,111,204 // movdqa %xmm4,%xmm9
.byte 102,69,15,56,11,200 // pmulhrsw %xmm8,%xmm9
@ -37144,6 +37205,11 @@ _sk_srcover_ssse3_lowp:
.byte 72,173 // lods %ds:(%rsi),%rax
.byte 255,224 // jmpq *%rax
BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 0 // .byte 0x0
.byte 71 // rex.RXB
BALIGN16
.byte 0,4,8 // add %al,(%rax,%rcx,1)
.byte 12,1 // or $0x1,%al

View File

@ -26358,15 +26358,45 @@ PUBLIC _sk_just_return_ssse3_lowp
_sk_just_return_ssse3_lowp LABEL PROC
DB 195 ; retq
PUBLIC _sk_constant_color_ssse3_lowp
_sk_constant_color_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 243,15,16,29,114,3,0,0 ; movss 0x372(%rip),%xmm3 # 4b8 <_sk_srcover_ssse3_lowp+0x65>
DB 243,15,16,0 ; movss (%rax),%xmm0
DB 243,15,89,195 ; mulss %xmm3,%xmm0
DB 243,68,15,44,200 ; cvttss2si %xmm0,%r9d
DB 102,65,15,110,193 ; movd %r9d,%xmm0
DB 242,15,112,192,0 ; pshuflw $0x0,%xmm0,%xmm0
DB 102,15,112,192,80 ; pshufd $0x50,%xmm0,%xmm0
DB 243,15,16,72,4 ; movss 0x4(%rax),%xmm1
DB 243,15,89,203 ; mulss %xmm3,%xmm1
DB 243,68,15,44,201 ; cvttss2si %xmm1,%r9d
DB 102,65,15,110,201 ; movd %r9d,%xmm1
DB 242,15,112,201,0 ; pshuflw $0x0,%xmm1,%xmm1
DB 102,15,112,201,80 ; pshufd $0x50,%xmm1,%xmm1
DB 243,15,16,80,8 ; movss 0x8(%rax),%xmm2
DB 243,15,89,211 ; mulss %xmm3,%xmm2
DB 243,68,15,44,202 ; cvttss2si %xmm2,%r9d
DB 102,65,15,110,209 ; movd %r9d,%xmm2
DB 242,15,112,210,0 ; pshuflw $0x0,%xmm2,%xmm2
DB 102,15,112,210,80 ; pshufd $0x50,%xmm2,%xmm2
DB 243,15,89,88,12 ; mulss 0xc(%rax),%xmm3
DB 243,15,44,195 ; cvttss2si %xmm3,%eax
DB 102,15,110,216 ; movd %eax,%xmm3
DB 242,15,112,219,0 ; pshuflw $0x0,%xmm3,%xmm3
DB 102,15,112,219,80 ; pshufd $0x50,%xmm3,%xmm3
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
PUBLIC _sk_load_8888_ssse3_lowp
_sk_load_8888_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 76,139,24 ; mov (%rax),%r11
DB 77,133,192 ; test %r8,%r8
DB 117,113 ; jne 1b7 <_sk_load_8888_ssse3_lowp+0x7b>
DB 117,113 ; jne 232 <_sk_load_8888_ssse3_lowp+0x7b>
DB 69,15,16,76,147,16 ; movups 0x10(%r11,%rdx,4),%xmm9
DB 69,15,16,4,147 ; movups (%r11,%rdx,4),%xmm8
DB 102,15,111,5,167,2,0,0 ; movdqa 0x2a7(%rip),%xmm0 # 400 <_sk_srcover_ssse3_lowp+0x65>
DB 102,15,111,5,236,2,0,0 ; movdqa 0x2ec(%rip),%xmm0 # 4c0 <_sk_srcover_ssse3_lowp+0x6d>
DB 102,68,15,56,0,192 ; pshufb %xmm0,%xmm8
DB 102,68,15,56,0,200 ; pshufb %xmm0,%xmm9
DB 102,65,15,111,208 ; movdqa %xmm8,%xmm2
@ -26380,7 +26410,7 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 102,15,239,210 ; pxor %xmm2,%xmm2
DB 102,65,15,96,208 ; punpcklbw %xmm8,%xmm2
DB 102,65,15,104,216 ; punpckhbw %xmm8,%xmm3
DB 102,68,15,111,5,113,2,0,0 ; movdqa 0x271(%rip),%xmm8 # 410 <_sk_srcover_ssse3_lowp+0x75>
DB 102,68,15,111,5,182,2,0,0 ; movdqa 0x2b6(%rip),%xmm8 # 4d0 <_sk_srcover_ssse3_lowp+0x7d>
DB 102,65,15,228,192 ; pmulhuw %xmm8,%xmm0
DB 102,65,15,228,200 ; pmulhuw %xmm8,%xmm1
DB 102,65,15,228,208 ; pmulhuw %xmm8,%xmm2
@ -26393,9 +26423,9 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 69,15,87,192 ; xorps %xmm8,%xmm8
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
DB 119,129 ; ja 151 <_sk_load_8888_ssse3_lowp+0x15>
DB 119,129 ; ja 1cc <_sk_load_8888_ssse3_lowp+0x15>
DB 69,15,182,201 ; movzbl %r9b,%r9d
DB 76,141,21,133,0,0,0 ; lea 0x85(%rip),%r10 # 260 <_sk_load_8888_ssse3_lowp+0x124>
DB 76,141,21,130,0,0,0 ; lea 0x82(%rip),%r10 # 2d8 <_sk_load_8888_ssse3_lowp+0x121>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@ -26420,21 +26450,20 @@ _sk_load_8888_ssse3_lowp LABEL PROC
DB 68,15,40,192 ; movaps %xmm0,%xmm8
DB 243,65,15,16,4,147 ; movss (%r11,%rdx,4),%xmm0
DB 243,68,15,16,192 ; movss %xmm0,%xmm8
DB 233,244,254,255,255 ; jmpq 151 <_sk_load_8888_ssse3_lowp+0x15>
DB 15,31,0 ; nopl (%rax)
DB 237 ; in (%dx),%eax
DB 233,244,254,255,255 ; jmpq 1cc <_sk_load_8888_ssse3_lowp+0x15>
DB 240,255 ; lock (bad)
DB 255 ; (bad)
DB 255 ; (bad)
DB 219,255 ; (bad)
DB 255 ; (bad)
DB 255,202 ; dec %edx
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; (bad)
DB 216,255 ; fdivr %st(7),%st
DB 255 ; (bad)
DB 255,199 ; inc %edi
DB 185,255,255,255,173 ; mov $0xadffffff,%ecx
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,182,255,255,255,170 ; pushq -0x55000001(%rsi)
DB 255 ; (bad)
DB 255 ; (bad)
DB 255,149,255,255,255,132 ; callq *-0x7b000001(%rbp)
DB 255,152,255,255,255,135 ; lcall *-0x78000001(%rax)
DB 255 ; (bad)
DB 255 ; (bad)
DB 255 ; .byte 0xff
@ -26460,7 +26489,7 @@ _sk_store_8888_ssse3_lowp LABEL PROC
DB 102,69,15,97,194 ; punpcklwd %xmm10,%xmm8
DB 102,69,15,105,202 ; punpckhwd %xmm10,%xmm9
DB 77,133,192 ; test %r8,%r8
DB 117,17 ; jne 2eb <_sk_store_8888_ssse3_lowp+0x6f>
DB 117,17 ; jne 363 <_sk_store_8888_ssse3_lowp+0x6f>
DB 243,69,15,127,76,147,16 ; movdqu %xmm9,0x10(%r11,%rdx,4)
DB 243,69,15,127,4,147 ; movdqu %xmm8,(%r11,%rdx,4)
DB 72,173 ; lods %ds:(%rsi),%rax
@ -26469,9 +26498,9 @@ _sk_store_8888_ssse3_lowp LABEL PROC
DB 65,128,225,7 ; and $0x7,%r9b
DB 65,254,201 ; dec %r9b
DB 65,128,249,6 ; cmp $0x6,%r9b
DB 119,236 ; ja 2e7 <_sk_store_8888_ssse3_lowp+0x6b>
DB 119,236 ; ja 35f <_sk_store_8888_ssse3_lowp+0x6b>
DB 69,15,182,201 ; movzbl %r9b,%r9d
DB 76,141,21,90,0,0,0 ; lea 0x5a(%rip),%r10 # 360 <_sk_store_8888_ssse3_lowp+0xe4>
DB 76,141,21,90,0,0,0 ; lea 0x5a(%rip),%r10 # 3d8 <_sk_store_8888_ssse3_lowp+0xe4>
DB 75,99,4,138 ; movslq (%r10,%r9,4),%rax
DB 76,1,208 ; add %r10,%rax
DB 255,224 ; jmpq *%rax
@ -26487,7 +26516,7 @@ _sk_store_8888_ssse3_lowp LABEL PROC
DB 102,69,15,112,200,229 ; pshufd $0xe5,%xmm8,%xmm9
DB 102,69,15,126,76,147,4 ; movd %xmm9,0x4(%r11,%rdx,4)
DB 102,69,15,126,4,147 ; movd %xmm8,(%r11,%rdx,4)
DB 235,136 ; jmp 2e7 <_sk_store_8888_ssse3_lowp+0x6b>
DB 235,136 ; jmp 35f <_sk_store_8888_ssse3_lowp+0x6b>
DB 144 ; nop
DB 247,255 ; idiv %edi
DB 255 ; (bad)
@ -26518,6 +26547,23 @@ _sk_swap_rb_ssse3_lowp LABEL PROC
DB 65,15,40,208 ; movaps %xmm8,%xmm2
DB 255,224 ; jmpq *%rax
PUBLIC _sk_swap_ssse3_lowp
_sk_swap_ssse3_lowp LABEL PROC
DB 68,15,40,195 ; movaps %xmm3,%xmm8
DB 68,15,40,202 ; movaps %xmm2,%xmm9
DB 68,15,40,209 ; movaps %xmm1,%xmm10
DB 68,15,40,216 ; movaps %xmm0,%xmm11
DB 72,173 ; lods %ds:(%rsi),%rax
DB 15,40,196 ; movaps %xmm4,%xmm0
DB 15,40,205 ; movaps %xmm5,%xmm1
DB 15,40,214 ; movaps %xmm6,%xmm2
DB 15,40,223 ; movaps %xmm7,%xmm3
DB 65,15,40,227 ; movaps %xmm11,%xmm4
DB 65,15,40,234 ; movaps %xmm10,%xmm5
DB 65,15,40,241 ; movaps %xmm9,%xmm6
DB 65,15,40,248 ; movaps %xmm8,%xmm7
DB 255,224 ; jmpq *%rax
PUBLIC _sk_move_src_dst_ssse3_lowp
_sk_move_src_dst_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
@ -26527,9 +26573,18 @@ _sk_move_src_dst_ssse3_lowp LABEL PROC
DB 15,40,251 ; movaps %xmm3,%xmm7
DB 255,224 ; jmpq *%rax
PUBLIC _sk_move_dst_src_ssse3_lowp
_sk_move_dst_src_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 15,40,196 ; movaps %xmm4,%xmm0
DB 15,40,205 ; movaps %xmm5,%xmm1
DB 15,40,214 ; movaps %xmm6,%xmm2
DB 15,40,223 ; movaps %xmm7,%xmm3
DB 255,224 ; jmpq *%rax
PUBLIC _sk_srcover_ssse3_lowp
_sk_srcover_ssse3_lowp LABEL PROC
DB 102,68,15,111,5,124,0,0,0 ; movdqa 0x7c(%rip),%xmm8 # 420 <_sk_srcover_ssse3_lowp+0x85>
DB 102,68,15,111,5,132,0,0,0 ; movdqa 0x84(%rip),%xmm8 # 4e0 <_sk_srcover_ssse3_lowp+0x8d>
DB 102,68,15,249,195 ; psubw %xmm3,%xmm8
DB 102,68,15,111,204 ; movdqa %xmm4,%xmm9
DB 102,69,15,56,11,200 ; pmulhrsw %xmm8,%xmm9
@ -26549,6 +26604,11 @@ _sk_srcover_ssse3_lowp LABEL PROC
DB 72,173 ; lods %ds:(%rsi),%rax
DB 255,224 ; jmpq *%rax
ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 0 ; .byte 0x0
DB 71 ; rex.RXB
ALIGN 16
DB 0,4,8 ; add %al,(%rax,%rcx,1)
DB 12,1 ; or $0x1,%al

View File

@ -165,6 +165,14 @@ SI U32 to_8888(F r, F g, F b, F a) {
// Stages!
STAGE(constant_color) {
auto rgba = (const float*)ctx;
r = rgba[0];
g = rgba[1];
b = rgba[2];
a = rgba[3];
}
STAGE(load_8888) {
auto ptr = *(const uint32_t**)ctx + x;
from_8888(load<U32>(ptr, tail), &r,&g,&b,&a);
@ -180,12 +188,29 @@ STAGE(swap_rb) {
b = tmp;
}
STAGE(swap) {
auto swap = [](F& v, F& dv) {
auto tmp = v;
v = dv;
dv = tmp;
};
swap(r, dr);
swap(g, dg);
swap(b, db);
swap(a, da);
}
STAGE(move_src_dst) {
dr = r;
dg = g;
db = b;
da = a;
}
STAGE(move_dst_src) {
r = dr;
g = dg;
b = db;
a = da;
}
// Most blend modes apply the same logic to each channel.
#define BLEND_MODE(name) \