jumper, rework callback a bit, use it for color_lookup_table
Looks like the color-space images have this well tested (even without lab_to_xyz) and the diffs look like rounding/FMA. The old plan to keep loads and stores outside callback was: 1) awkward, with too many pointers and pointers to pointers to track 2) misguided... load and store stages march ahead by x, working at ptr+0, ptr+8, ptr+16, etc. while callback always wants to be working at the same spot in the buffer. I spent a frustrating day in lldb to understood 2). :/ So now the stage always store4's its pixels to a buffer in the context before the callback, and when the callback returns it load4's them back from a pointer in the context, defaulting to that same buffer. Instead of passing a void* into the callback, we pass the context itself. This lets us subclass the context and add our own data... C-compatible object-oriented programming. Change-Id: I7a03439b3abd2efb000a6973631a9336452e9a43 Reviewed-on: https://skia-review.googlesource.com/13985 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
8f7dc9f6ca
commit
c17dc24fa9
@ -16,6 +16,7 @@
|
||||
#include "SkNx.h"
|
||||
#include "SkSRGB.h"
|
||||
#include "SkTypes.h"
|
||||
#include "../jumper/SkJumper.h"
|
||||
|
||||
bool SkColorSpaceXform_A2B::onApply(ColorFormat dstFormat, void* dst, ColorFormat srcFormat,
|
||||
const void* src, int count, SkAlphaType alphaType) const {
|
||||
@ -183,8 +184,27 @@ SkColorSpaceXform_A2B::SkColorSpaceXform_A2B(SkColorSpace_A2B* srcSpace,
|
||||
case SkColorSpace_A2B::Element::Type::kCLUT: {
|
||||
SkCSXformPrintf("CLUT (%d -> %d) stage added\n", e.colorLUT().inputChannels(),
|
||||
e.colorLUT().outputChannels());
|
||||
auto clut = this->copy(sk_ref_sp(&e.colorLUT()));
|
||||
fElementsPipeline.append(SkRasterPipeline::color_lookup_table, clut->get());
|
||||
struct CallbackCtx : SkJumper_CallbackCtx {
|
||||
sk_sp<const SkColorLookUpTable> clut;
|
||||
// clut->interp() can't always safely alias its arguments,
|
||||
// so we allocate a second buffer to hold our results.
|
||||
float results[4*SkJumper_kMaxStride];
|
||||
};
|
||||
auto cb = fAlloc.make<CallbackCtx>();
|
||||
cb->clut = sk_ref_sp(&e.colorLUT());
|
||||
cb->read_from = cb->results;
|
||||
cb->fn = [](SkJumper_CallbackCtx* ctx, int active_pixels) {
|
||||
auto c = (CallbackCtx*)ctx;
|
||||
for (int i = 0; i < active_pixels; i++) {
|
||||
// Look up red, green, and blue for this pixel using 3-4 values from rgba.
|
||||
c->clut->interp(c->results+4*i, c->rgba+4*i);
|
||||
|
||||
// If we used 3 inputs (rgb) preserve the fourth as alpha.
|
||||
// If we used 4 inputs (cmyk) force alpha to 1.
|
||||
c->results[4*i+3] = (3 == c->clut->inputChannels()) ? c->rgba[4*i+3] : 1.0f;
|
||||
}
|
||||
};
|
||||
fElementsPipeline.append(SkRasterPipeline::callback, cb);
|
||||
break;
|
||||
}
|
||||
case SkColorSpace_A2B::Element::Type::kMatrix:
|
||||
|
@ -87,7 +87,7 @@
|
||||
M(parametric_r) M(parametric_g) M(parametric_b) \
|
||||
M(parametric_a) \
|
||||
M(table_r) M(table_g) M(table_b) M(table_a) \
|
||||
M(color_lookup_table) M(lab_to_xyz) \
|
||||
M(lab_to_xyz) \
|
||||
M(clamp_x) M(mirror_x) M(repeat_x) \
|
||||
M(clamp_y) M(mirror_y) M(repeat_y) \
|
||||
M(gather_a8) M(gather_g8) M(gather_i8) \
|
||||
|
@ -82,8 +82,12 @@ struct SkJumper_SamplerCtx {
|
||||
};
|
||||
|
||||
struct SkJumper_CallbackCtx {
|
||||
MAYBE_MSABI void (*fn)(void* arg, int active_pixels/*<= SkJumper_kMaxStride*/);
|
||||
void* arg;
|
||||
MAYBE_MSABI void (*fn)(SkJumper_CallbackCtx* self, int active_pixels/*<= SkJumper_kMaxStride*/);
|
||||
|
||||
// When called, fn() will have our active pixels available in rgba.
|
||||
// When fn() returns, the pipeline will read back those active pixels from read_from.
|
||||
float rgba[4*SkJumper_kMaxStride];
|
||||
float* read_from = rgba;
|
||||
};
|
||||
|
||||
struct SkJumper_LoadTablesCtx {
|
||||
|
@ -3587,32 +3587,34 @@ HIDDEN _sk_callback_aarch64
|
||||
.globl _sk_callback_aarch64
|
||||
FUNCTION(_sk_callback_aarch64)
|
||||
_sk_callback_aarch64:
|
||||
.long 0xd10283ff // sub sp, sp, #0xa0
|
||||
.long 0xa90853f5 // stp x21, x20, [sp, #128]
|
||||
.long 0xa9097bf3 // stp x19, x30, [sp, #144]
|
||||
.long 0xad031fe6 // stp q6, q7, [sp, #96]
|
||||
.long 0xad0217e4 // stp q4, q5, [sp, #64]
|
||||
.long 0xad010fe2 // stp q2, q3, [sp, #32]
|
||||
.long 0xad0007e0 // stp q0, q1, [sp]
|
||||
.long 0xd101c3ff // sub sp, sp, #0x70
|
||||
.long 0xf90023f6 // str x22, [sp, #64]
|
||||
.long 0xa90553f5 // stp x21, x20, [sp, #80]
|
||||
.long 0xa9067bf3 // stp x19, x30, [sp, #96]
|
||||
.long 0xad011fe6 // stp q6, q7, [sp, #32]
|
||||
.long 0xad0017e4 // stp q4, q5, [sp]
|
||||
.long 0xaa0103f4 // mov x20, x1
|
||||
.long 0xf9400288 // ldr x8, [x20]
|
||||
.long 0xaa0003f5 // mov x21, x0
|
||||
.long 0xf9400295 // ldr x21, [x20]
|
||||
.long 0xaa0003f6 // mov x22, x0
|
||||
.long 0x321e03e1 // orr w1, wzr, #0x4
|
||||
.long 0xaa0203f3 // mov x19, x2
|
||||
.long 0xa9402109 // ldp x9, x8, [x8]
|
||||
.long 0xaa0803e0 // mov x0, x8
|
||||
.long 0xd63f0120 // blr x9
|
||||
.long 0x910022a8 // add x8, x21, #0x8
|
||||
.long 0x4c000900 // st4 {v0.4s-v3.4s}, [x8]
|
||||
.long 0xf94002a8 // ldr x8, [x21]
|
||||
.long 0xaa1503e0 // mov x0, x21
|
||||
.long 0xd63f0100 // blr x8
|
||||
.long 0xf94046a8 // ldr x8, [x21, #136]
|
||||
.long 0xf9400683 // ldr x3, [x20, #8]
|
||||
.long 0x91004281 // add x1, x20, #0x10
|
||||
.long 0xaa1503e0 // mov x0, x21
|
||||
.long 0xaa1603e0 // mov x0, x22
|
||||
.long 0x4c400900 // ld4 {v0.4s-v3.4s}, [x8]
|
||||
.long 0xaa1303e2 // mov x2, x19
|
||||
.long 0xad4007e0 // ldp q0, q1, [sp]
|
||||
.long 0xad410fe2 // ldp q2, q3, [sp, #32]
|
||||
.long 0xad4217e4 // ldp q4, q5, [sp, #64]
|
||||
.long 0xad431fe6 // ldp q6, q7, [sp, #96]
|
||||
.long 0xa9497bf3 // ldp x19, x30, [sp, #144]
|
||||
.long 0xa94853f5 // ldp x21, x20, [sp, #128]
|
||||
.long 0x910283ff // add sp, sp, #0xa0
|
||||
.long 0xad4017e4 // ldp q4, q5, [sp]
|
||||
.long 0xad411fe6 // ldp q6, q7, [sp, #32]
|
||||
.long 0xa9467bf3 // ldp x19, x30, [sp, #96]
|
||||
.long 0xa94553f5 // ldp x21, x20, [sp, #80]
|
||||
.long 0xf94023f6 // ldr x22, [sp, #64]
|
||||
.long 0x9101c3ff // add sp, sp, #0x70
|
||||
.long 0xd61f0060 // br x3
|
||||
#elif defined(__arm__)
|
||||
.balign 4
|
||||
@ -7506,38 +7508,34 @@ HIDDEN _sk_callback_vfp4
|
||||
.globl _sk_callback_vfp4
|
||||
FUNCTION(_sk_callback_vfp4)
|
||||
_sk_callback_vfp4:
|
||||
.long 0xe92d4070 // push {r4, r5, r6, lr}
|
||||
.long 0xed2d8b10 // vpush {d8-d15}
|
||||
.long 0xe92d48f0 // push {r4, r5, r6, r7, fp, lr}
|
||||
.long 0xed2d8b08 // vpush {d8-d11}
|
||||
.long 0xe1a05001 // mov r5, r1
|
||||
.long 0xe1a06000 // mov r6, r0
|
||||
.long 0xe5950000 // ldr r0, [r5]
|
||||
.long 0xe5957000 // ldr r7, [r5]
|
||||
.long 0xe1a04002 // mov r4, r2
|
||||
.long 0xe3a01002 // mov r1, #2
|
||||
.long 0xeeb08b47 // vmov.f64 d8, d7
|
||||
.long 0xe5902000 // ldr r2, [r0]
|
||||
.long 0xe5900004 // ldr r0, [r0, #4]
|
||||
.long 0xe2870004 // add r0, r7, #4
|
||||
.long 0xf400008f // vst4.32 {d0-d3}, [r0]
|
||||
.long 0xe1a00007 // mov r0, r7
|
||||
.long 0xe5972000 // ldr r2, [r7]
|
||||
.long 0xeeb09b46 // vmov.f64 d9, d6
|
||||
.long 0xeeb0ab45 // vmov.f64 d10, d5
|
||||
.long 0xeeb0bb44 // vmov.f64 d11, d4
|
||||
.long 0xeeb0cb43 // vmov.f64 d12, d3
|
||||
.long 0xeeb0db42 // vmov.f64 d13, d2
|
||||
.long 0xeeb0eb41 // vmov.f64 d14, d1
|
||||
.long 0xeeb0fb40 // vmov.f64 d15, d0
|
||||
.long 0xe12fff32 // blx r2
|
||||
.long 0xe5970084 // ldr r0, [r7, #132]
|
||||
.long 0xe2851008 // add r1, r5, #8
|
||||
.long 0xe5953004 // ldr r3, [r5, #4]
|
||||
.long 0xe1a00006 // mov r0, r6
|
||||
.long 0xe1a02004 // mov r2, r4
|
||||
.long 0xeeb00b4f // vmov.f64 d0, d15
|
||||
.long 0xeeb01b4e // vmov.f64 d1, d14
|
||||
.long 0xeeb02b4d // vmov.f64 d2, d13
|
||||
.long 0xeeb03b4c // vmov.f64 d3, d12
|
||||
.long 0xeeb04b4b // vmov.f64 d4, d11
|
||||
.long 0xf420008f // vld4.32 {d0-d3}, [r0]
|
||||
.long 0xe1a00006 // mov r0, r6
|
||||
.long 0xeeb05b4a // vmov.f64 d5, d10
|
||||
.long 0xeeb06b49 // vmov.f64 d6, d9
|
||||
.long 0xeeb07b48 // vmov.f64 d7, d8
|
||||
.long 0xecbd8b10 // vpop {d8-d15}
|
||||
.long 0xe8bd4070 // pop {r4, r5, r6, lr}
|
||||
.long 0xecbd8b08 // vpop {d8-d11}
|
||||
.long 0xe8bd48f0 // pop {r4, r5, r6, r7, fp, lr}
|
||||
.long 0xe12fff13 // bx r3
|
||||
#elif defined(__x86_64__)
|
||||
|
||||
@ -11932,44 +11930,72 @@ FUNCTION(_sk_callback_hsw)
|
||||
_sk_callback_hsw:
|
||||
.byte 65,87 // push %r15
|
||||
.byte 65,86 // push %r14
|
||||
.byte 65,85 // push %r13
|
||||
.byte 65,84 // push %r12
|
||||
.byte 83 // push %rbx
|
||||
.byte 72,129,236,24,1,0,0 // sub $0x118,%rsp
|
||||
.byte 197,252,17,188,36,224,0,0,0 // vmovups %ymm7,0xe0(%rsp)
|
||||
.byte 197,252,17,180,36,192,0,0,0 // vmovups %ymm6,0xc0(%rsp)
|
||||
.byte 197,252,17,172,36,160,0,0,0 // vmovups %ymm5,0xa0(%rsp)
|
||||
.byte 197,252,17,164,36,128,0,0,0 // vmovups %ymm4,0x80(%rsp)
|
||||
.byte 197,252,17,92,36,96 // vmovups %ymm3,0x60(%rsp)
|
||||
.byte 197,252,17,84,36,64 // vmovups %ymm2,0x40(%rsp)
|
||||
.byte 197,252,17,76,36,32 // vmovups %ymm1,0x20(%rsp)
|
||||
.byte 197,252,17,4,36 // vmovups %ymm0,(%rsp)
|
||||
.byte 72,137,203 // mov %rcx,%rbx
|
||||
.byte 72,129,236,144,0,0,0 // sub $0x90,%rsp
|
||||
.byte 197,252,17,124,36,96 // vmovups %ymm7,0x60(%rsp)
|
||||
.byte 197,252,17,116,36,64 // vmovups %ymm6,0x40(%rsp)
|
||||
.byte 197,252,17,108,36,32 // vmovups %ymm5,0x20(%rsp)
|
||||
.byte 197,252,17,36,36 // vmovups %ymm4,(%rsp)
|
||||
.byte 73,137,205 // mov %rcx,%r13
|
||||
.byte 73,137,214 // mov %rdx,%r14
|
||||
.byte 73,137,255 // mov %rdi,%r15
|
||||
.byte 72,173 // lods %ds:(%rsi),%rax
|
||||
.byte 72,137,195 // mov %rax,%rbx
|
||||
.byte 73,137,244 // mov %rsi,%r12
|
||||
.byte 72,139,120,8 // mov 0x8(%rax),%rdi
|
||||
.byte 72,133,219 // test %rbx,%rbx
|
||||
.byte 197,252,20,225 // vunpcklps %ymm1,%ymm0,%ymm4
|
||||
.byte 197,252,21,193 // vunpckhps %ymm1,%ymm0,%ymm0
|
||||
.byte 197,236,20,203 // vunpcklps %ymm3,%ymm2,%ymm1
|
||||
.byte 197,236,21,211 // vunpckhps %ymm3,%ymm2,%ymm2
|
||||
.byte 197,221,20,217 // vunpcklpd %ymm1,%ymm4,%ymm3
|
||||
.byte 197,221,21,201 // vunpckhpd %ymm1,%ymm4,%ymm1
|
||||
.byte 197,253,20,226 // vunpcklpd %ymm2,%ymm0,%ymm4
|
||||
.byte 197,253,21,194 // vunpckhpd %ymm2,%ymm0,%ymm0
|
||||
.byte 196,227,101,24,209,1 // vinsertf128 $0x1,%xmm1,%ymm3,%ymm2
|
||||
.byte 196,227,93,24,232,1 // vinsertf128 $0x1,%xmm0,%ymm4,%ymm5
|
||||
.byte 196,227,101,6,201,49 // vperm2f128 $0x31,%ymm1,%ymm3,%ymm1
|
||||
.byte 196,227,93,6,192,49 // vperm2f128 $0x31,%ymm0,%ymm4,%ymm0
|
||||
.byte 197,253,17,83,8 // vmovupd %ymm2,0x8(%rbx)
|
||||
.byte 197,253,17,107,40 // vmovupd %ymm5,0x28(%rbx)
|
||||
.byte 197,253,17,75,72 // vmovupd %ymm1,0x48(%rbx)
|
||||
.byte 197,253,17,67,104 // vmovupd %ymm0,0x68(%rbx)
|
||||
.byte 77,133,237 // test %r13,%r13
|
||||
.byte 190,8,0,0,0 // mov $0x8,%esi
|
||||
.byte 15,69,243 // cmovne %ebx,%esi
|
||||
.byte 65,15,69,245 // cmovne %r13d,%esi
|
||||
.byte 72,137,223 // mov %rbx,%rdi
|
||||
.byte 197,248,119 // vzeroupper
|
||||
.byte 255,16 // callq *(%rax)
|
||||
.byte 255,19 // callq *(%rbx)
|
||||
.byte 72,139,131,136,0,0,0 // mov 0x88(%rbx),%rax
|
||||
.byte 197,248,16,0 // vmovups (%rax),%xmm0
|
||||
.byte 197,248,16,72,16 // vmovups 0x10(%rax),%xmm1
|
||||
.byte 197,248,16,80,32 // vmovups 0x20(%rax),%xmm2
|
||||
.byte 197,248,16,88,48 // vmovups 0x30(%rax),%xmm3
|
||||
.byte 196,227,101,24,88,112,1 // vinsertf128 $0x1,0x70(%rax),%ymm3,%ymm3
|
||||
.byte 196,227,109,24,80,96,1 // vinsertf128 $0x1,0x60(%rax),%ymm2,%ymm2
|
||||
.byte 196,227,117,24,72,80,1 // vinsertf128 $0x1,0x50(%rax),%ymm1,%ymm1
|
||||
.byte 196,227,125,24,64,64,1 // vinsertf128 $0x1,0x40(%rax),%ymm0,%ymm0
|
||||
.byte 197,252,20,225 // vunpcklps %ymm1,%ymm0,%ymm4
|
||||
.byte 197,252,21,233 // vunpckhps %ymm1,%ymm0,%ymm5
|
||||
.byte 197,236,20,203 // vunpcklps %ymm3,%ymm2,%ymm1
|
||||
.byte 197,236,21,219 // vunpckhps %ymm3,%ymm2,%ymm3
|
||||
.byte 197,221,20,193 // vunpcklpd %ymm1,%ymm4,%ymm0
|
||||
.byte 197,221,21,201 // vunpckhpd %ymm1,%ymm4,%ymm1
|
||||
.byte 197,213,20,211 // vunpcklpd %ymm3,%ymm5,%ymm2
|
||||
.byte 197,213,21,219 // vunpckhpd %ymm3,%ymm5,%ymm3
|
||||
.byte 76,137,230 // mov %r12,%rsi
|
||||
.byte 72,173 // lods %ds:(%rsi),%rax
|
||||
.byte 76,137,255 // mov %r15,%rdi
|
||||
.byte 76,137,242 // mov %r14,%rdx
|
||||
.byte 72,137,217 // mov %rbx,%rcx
|
||||
.byte 197,252,16,4,36 // vmovups (%rsp),%ymm0
|
||||
.byte 197,252,16,76,36,32 // vmovups 0x20(%rsp),%ymm1
|
||||
.byte 197,252,16,84,36,64 // vmovups 0x40(%rsp),%ymm2
|
||||
.byte 197,252,16,92,36,96 // vmovups 0x60(%rsp),%ymm3
|
||||
.byte 197,252,16,164,36,128,0,0,0 // vmovups 0x80(%rsp),%ymm4
|
||||
.byte 197,252,16,172,36,160,0,0,0 // vmovups 0xa0(%rsp),%ymm5
|
||||
.byte 197,252,16,180,36,192,0,0,0 // vmovups 0xc0(%rsp),%ymm6
|
||||
.byte 197,252,16,188,36,224,0,0,0 // vmovups 0xe0(%rsp),%ymm7
|
||||
.byte 72,129,196,24,1,0,0 // add $0x118,%rsp
|
||||
.byte 76,137,233 // mov %r13,%rcx
|
||||
.byte 197,252,16,36,36 // vmovups (%rsp),%ymm4
|
||||
.byte 197,252,16,108,36,32 // vmovups 0x20(%rsp),%ymm5
|
||||
.byte 197,252,16,116,36,64 // vmovups 0x40(%rsp),%ymm6
|
||||
.byte 197,252,16,124,36,96 // vmovups 0x60(%rsp),%ymm7
|
||||
.byte 72,129,196,144,0,0,0 // add $0x90,%rsp
|
||||
.byte 91 // pop %rbx
|
||||
.byte 65,92 // pop %r12
|
||||
.byte 65,93 // pop %r13
|
||||
.byte 65,94 // pop %r14
|
||||
.byte 65,95 // pop %r15
|
||||
.byte 255,224 // jmpq *%rax
|
||||
@ -17777,44 +17803,72 @@ FUNCTION(_sk_callback_avx)
|
||||
_sk_callback_avx:
|
||||
.byte 65,87 // push %r15
|
||||
.byte 65,86 // push %r14
|
||||
.byte 65,85 // push %r13
|
||||
.byte 65,84 // push %r12
|
||||
.byte 83 // push %rbx
|
||||
.byte 72,129,236,24,1,0,0 // sub $0x118,%rsp
|
||||
.byte 197,252,17,188,36,224,0,0,0 // vmovups %ymm7,0xe0(%rsp)
|
||||
.byte 197,252,17,180,36,192,0,0,0 // vmovups %ymm6,0xc0(%rsp)
|
||||
.byte 197,252,17,172,36,160,0,0,0 // vmovups %ymm5,0xa0(%rsp)
|
||||
.byte 197,252,17,164,36,128,0,0,0 // vmovups %ymm4,0x80(%rsp)
|
||||
.byte 197,252,17,92,36,96 // vmovups %ymm3,0x60(%rsp)
|
||||
.byte 197,252,17,84,36,64 // vmovups %ymm2,0x40(%rsp)
|
||||
.byte 197,252,17,76,36,32 // vmovups %ymm1,0x20(%rsp)
|
||||
.byte 197,252,17,4,36 // vmovups %ymm0,(%rsp)
|
||||
.byte 72,137,203 // mov %rcx,%rbx
|
||||
.byte 72,129,236,144,0,0,0 // sub $0x90,%rsp
|
||||
.byte 197,252,17,124,36,96 // vmovups %ymm7,0x60(%rsp)
|
||||
.byte 197,252,17,116,36,64 // vmovups %ymm6,0x40(%rsp)
|
||||
.byte 197,252,17,108,36,32 // vmovups %ymm5,0x20(%rsp)
|
||||
.byte 197,252,17,36,36 // vmovups %ymm4,(%rsp)
|
||||
.byte 73,137,205 // mov %rcx,%r13
|
||||
.byte 73,137,214 // mov %rdx,%r14
|
||||
.byte 73,137,255 // mov %rdi,%r15
|
||||
.byte 72,173 // lods %ds:(%rsi),%rax
|
||||
.byte 72,137,195 // mov %rax,%rbx
|
||||
.byte 73,137,244 // mov %rsi,%r12
|
||||
.byte 72,139,120,8 // mov 0x8(%rax),%rdi
|
||||
.byte 72,133,219 // test %rbx,%rbx
|
||||
.byte 197,252,20,225 // vunpcklps %ymm1,%ymm0,%ymm4
|
||||
.byte 197,252,21,193 // vunpckhps %ymm1,%ymm0,%ymm0
|
||||
.byte 197,236,20,203 // vunpcklps %ymm3,%ymm2,%ymm1
|
||||
.byte 197,236,21,211 // vunpckhps %ymm3,%ymm2,%ymm2
|
||||
.byte 197,221,20,217 // vunpcklpd %ymm1,%ymm4,%ymm3
|
||||
.byte 197,221,21,201 // vunpckhpd %ymm1,%ymm4,%ymm1
|
||||
.byte 197,253,20,226 // vunpcklpd %ymm2,%ymm0,%ymm4
|
||||
.byte 197,253,21,194 // vunpckhpd %ymm2,%ymm0,%ymm0
|
||||
.byte 196,227,101,24,209,1 // vinsertf128 $0x1,%xmm1,%ymm3,%ymm2
|
||||
.byte 196,227,93,24,232,1 // vinsertf128 $0x1,%xmm0,%ymm4,%ymm5
|
||||
.byte 196,227,101,6,201,49 // vperm2f128 $0x31,%ymm1,%ymm3,%ymm1
|
||||
.byte 196,227,93,6,192,49 // vperm2f128 $0x31,%ymm0,%ymm4,%ymm0
|
||||
.byte 197,253,17,83,8 // vmovupd %ymm2,0x8(%rbx)
|
||||
.byte 197,253,17,107,40 // vmovupd %ymm5,0x28(%rbx)
|
||||
.byte 197,253,17,75,72 // vmovupd %ymm1,0x48(%rbx)
|
||||
.byte 197,253,17,67,104 // vmovupd %ymm0,0x68(%rbx)
|
||||
.byte 77,133,237 // test %r13,%r13
|
||||
.byte 190,8,0,0,0 // mov $0x8,%esi
|
||||
.byte 15,69,243 // cmovne %ebx,%esi
|
||||
.byte 65,15,69,245 // cmovne %r13d,%esi
|
||||
.byte 72,137,223 // mov %rbx,%rdi
|
||||
.byte 197,248,119 // vzeroupper
|
||||
.byte 255,16 // callq *(%rax)
|
||||
.byte 255,19 // callq *(%rbx)
|
||||
.byte 72,139,131,136,0,0,0 // mov 0x88(%rbx),%rax
|
||||
.byte 197,248,16,0 // vmovups (%rax),%xmm0
|
||||
.byte 197,248,16,72,16 // vmovups 0x10(%rax),%xmm1
|
||||
.byte 197,248,16,80,32 // vmovups 0x20(%rax),%xmm2
|
||||
.byte 197,248,16,88,48 // vmovups 0x30(%rax),%xmm3
|
||||
.byte 196,227,101,24,88,112,1 // vinsertf128 $0x1,0x70(%rax),%ymm3,%ymm3
|
||||
.byte 196,227,109,24,80,96,1 // vinsertf128 $0x1,0x60(%rax),%ymm2,%ymm2
|
||||
.byte 196,227,117,24,72,80,1 // vinsertf128 $0x1,0x50(%rax),%ymm1,%ymm1
|
||||
.byte 196,227,125,24,64,64,1 // vinsertf128 $0x1,0x40(%rax),%ymm0,%ymm0
|
||||
.byte 197,252,20,225 // vunpcklps %ymm1,%ymm0,%ymm4
|
||||
.byte 197,252,21,233 // vunpckhps %ymm1,%ymm0,%ymm5
|
||||
.byte 197,236,20,203 // vunpcklps %ymm3,%ymm2,%ymm1
|
||||
.byte 197,236,21,219 // vunpckhps %ymm3,%ymm2,%ymm3
|
||||
.byte 197,221,20,193 // vunpcklpd %ymm1,%ymm4,%ymm0
|
||||
.byte 197,221,21,201 // vunpckhpd %ymm1,%ymm4,%ymm1
|
||||
.byte 197,213,20,211 // vunpcklpd %ymm3,%ymm5,%ymm2
|
||||
.byte 197,213,21,219 // vunpckhpd %ymm3,%ymm5,%ymm3
|
||||
.byte 76,137,230 // mov %r12,%rsi
|
||||
.byte 72,173 // lods %ds:(%rsi),%rax
|
||||
.byte 76,137,255 // mov %r15,%rdi
|
||||
.byte 76,137,242 // mov %r14,%rdx
|
||||
.byte 72,137,217 // mov %rbx,%rcx
|
||||
.byte 197,252,16,4,36 // vmovups (%rsp),%ymm0
|
||||
.byte 197,252,16,76,36,32 // vmovups 0x20(%rsp),%ymm1
|
||||
.byte 197,252,16,84,36,64 // vmovups 0x40(%rsp),%ymm2
|
||||
.byte 197,252,16,92,36,96 // vmovups 0x60(%rsp),%ymm3
|
||||
.byte 197,252,16,164,36,128,0,0,0 // vmovups 0x80(%rsp),%ymm4
|
||||
.byte 197,252,16,172,36,160,0,0,0 // vmovups 0xa0(%rsp),%ymm5
|
||||
.byte 197,252,16,180,36,192,0,0,0 // vmovups 0xc0(%rsp),%ymm6
|
||||
.byte 197,252,16,188,36,224,0,0,0 // vmovups 0xe0(%rsp),%ymm7
|
||||
.byte 72,129,196,24,1,0,0 // add $0x118,%rsp
|
||||
.byte 76,137,233 // mov %r13,%rcx
|
||||
.byte 197,252,16,36,36 // vmovups (%rsp),%ymm4
|
||||
.byte 197,252,16,108,36,32 // vmovups 0x20(%rsp),%ymm5
|
||||
.byte 197,252,16,116,36,64 // vmovups 0x40(%rsp),%ymm6
|
||||
.byte 197,252,16,124,36,96 // vmovups 0x60(%rsp),%ymm7
|
||||
.byte 72,129,196,144,0,0,0 // add $0x90,%rsp
|
||||
.byte 91 // pop %rbx
|
||||
.byte 65,92 // pop %r12
|
||||
.byte 65,93 // pop %r13
|
||||
.byte 65,94 // pop %r14
|
||||
.byte 65,95 // pop %r15
|
||||
.byte 255,224 // jmpq *%rax
|
||||
@ -22267,37 +22321,65 @@ FUNCTION(_sk_callback_sse41)
|
||||
_sk_callback_sse41:
|
||||
.byte 65,87 // push %r15
|
||||
.byte 65,86 // push %r14
|
||||
.byte 65,84 // push %r12
|
||||
.byte 83 // push %rbx
|
||||
.byte 72,129,236,128,0,0,0 // sub $0x80,%rsp
|
||||
.byte 15,41,124,36,112 // movaps %xmm7,0x70(%rsp)
|
||||
.byte 15,41,116,36,96 // movaps %xmm6,0x60(%rsp)
|
||||
.byte 15,41,108,36,80 // movaps %xmm5,0x50(%rsp)
|
||||
.byte 15,41,100,36,64 // movaps %xmm4,0x40(%rsp)
|
||||
.byte 15,41,92,36,48 // movaps %xmm3,0x30(%rsp)
|
||||
.byte 15,41,84,36,32 // movaps %xmm2,0x20(%rsp)
|
||||
.byte 15,41,76,36,16 // movaps %xmm1,0x10(%rsp)
|
||||
.byte 15,41,4,36 // movaps %xmm0,(%rsp)
|
||||
.byte 72,131,236,72 // sub $0x48,%rsp
|
||||
.byte 15,41,124,36,48 // movaps %xmm7,0x30(%rsp)
|
||||
.byte 15,41,116,36,32 // movaps %xmm6,0x20(%rsp)
|
||||
.byte 15,41,108,36,16 // movaps %xmm5,0x10(%rsp)
|
||||
.byte 15,41,36,36 // movaps %xmm4,(%rsp)
|
||||
.byte 73,137,214 // mov %rdx,%r14
|
||||
.byte 73,137,255 // mov %rdi,%r15
|
||||
.byte 72,173 // lods %ds:(%rsi),%rax
|
||||
.byte 72,137,243 // mov %rsi,%rbx
|
||||
.byte 72,139,120,8 // mov 0x8(%rax),%rdi
|
||||
.byte 72,137,195 // mov %rax,%rbx
|
||||
.byte 73,137,244 // mov %rsi,%r12
|
||||
.byte 15,40,224 // movaps %xmm0,%xmm4
|
||||
.byte 15,20,225 // unpcklps %xmm1,%xmm4
|
||||
.byte 15,40,234 // movaps %xmm2,%xmm5
|
||||
.byte 15,20,235 // unpcklps %xmm3,%xmm5
|
||||
.byte 15,21,193 // unpckhps %xmm1,%xmm0
|
||||
.byte 15,21,211 // unpckhps %xmm3,%xmm2
|
||||
.byte 15,40,204 // movaps %xmm4,%xmm1
|
||||
.byte 102,15,20,205 // unpcklpd %xmm5,%xmm1
|
||||
.byte 15,18,236 // movhlps %xmm4,%xmm5
|
||||
.byte 15,40,216 // movaps %xmm0,%xmm3
|
||||
.byte 102,15,20,218 // unpcklpd %xmm2,%xmm3
|
||||
.byte 15,18,208 // movhlps %xmm0,%xmm2
|
||||
.byte 102,15,17,75,8 // movupd %xmm1,0x8(%rbx)
|
||||
.byte 15,17,107,24 // movups %xmm5,0x18(%rbx)
|
||||
.byte 102,15,17,91,40 // movupd %xmm3,0x28(%rbx)
|
||||
.byte 15,17,83,56 // movups %xmm2,0x38(%rbx)
|
||||
.byte 190,4,0,0,0 // mov $0x4,%esi
|
||||
.byte 255,16 // callq *(%rax)
|
||||
.byte 72,137,222 // mov %rbx,%rsi
|
||||
.byte 72,137,223 // mov %rbx,%rdi
|
||||
.byte 255,19 // callq *(%rbx)
|
||||
.byte 72,139,131,136,0,0,0 // mov 0x88(%rbx),%rax
|
||||
.byte 15,16,32 // movups (%rax),%xmm4
|
||||
.byte 15,16,64,16 // movups 0x10(%rax),%xmm0
|
||||
.byte 15,16,88,32 // movups 0x20(%rax),%xmm3
|
||||
.byte 15,16,80,48 // movups 0x30(%rax),%xmm2
|
||||
.byte 15,40,236 // movaps %xmm4,%xmm5
|
||||
.byte 15,20,232 // unpcklps %xmm0,%xmm5
|
||||
.byte 15,40,203 // movaps %xmm3,%xmm1
|
||||
.byte 15,20,202 // unpcklps %xmm2,%xmm1
|
||||
.byte 15,21,224 // unpckhps %xmm0,%xmm4
|
||||
.byte 15,21,218 // unpckhps %xmm2,%xmm3
|
||||
.byte 15,40,197 // movaps %xmm5,%xmm0
|
||||
.byte 102,15,20,193 // unpcklpd %xmm1,%xmm0
|
||||
.byte 15,18,205 // movhlps %xmm5,%xmm1
|
||||
.byte 15,40,212 // movaps %xmm4,%xmm2
|
||||
.byte 102,15,20,211 // unpcklpd %xmm3,%xmm2
|
||||
.byte 15,18,220 // movhlps %xmm4,%xmm3
|
||||
.byte 76,137,230 // mov %r12,%rsi
|
||||
.byte 72,173 // lods %ds:(%rsi),%rax
|
||||
.byte 76,137,255 // mov %r15,%rdi
|
||||
.byte 76,137,242 // mov %r14,%rdx
|
||||
.byte 15,40,4,36 // movaps (%rsp),%xmm0
|
||||
.byte 15,40,76,36,16 // movaps 0x10(%rsp),%xmm1
|
||||
.byte 15,40,84,36,32 // movaps 0x20(%rsp),%xmm2
|
||||
.byte 15,40,92,36,48 // movaps 0x30(%rsp),%xmm3
|
||||
.byte 15,40,100,36,64 // movaps 0x40(%rsp),%xmm4
|
||||
.byte 15,40,108,36,80 // movaps 0x50(%rsp),%xmm5
|
||||
.byte 15,40,116,36,96 // movaps 0x60(%rsp),%xmm6
|
||||
.byte 15,40,124,36,112 // movaps 0x70(%rsp),%xmm7
|
||||
.byte 72,129,196,128,0,0,0 // add $0x80,%rsp
|
||||
.byte 15,40,36,36 // movaps (%rsp),%xmm4
|
||||
.byte 15,40,108,36,16 // movaps 0x10(%rsp),%xmm5
|
||||
.byte 15,40,116,36,32 // movaps 0x20(%rsp),%xmm6
|
||||
.byte 15,40,124,36,48 // movaps 0x30(%rsp),%xmm7
|
||||
.byte 72,131,196,72 // add $0x48,%rsp
|
||||
.byte 91 // pop %rbx
|
||||
.byte 65,92 // pop %r12
|
||||
.byte 65,94 // pop %r14
|
||||
.byte 65,95 // pop %r15
|
||||
.byte 255,224 // jmpq *%rax
|
||||
@ -27052,37 +27134,65 @@ FUNCTION(_sk_callback_sse2)
|
||||
_sk_callback_sse2:
|
||||
.byte 65,87 // push %r15
|
||||
.byte 65,86 // push %r14
|
||||
.byte 65,84 // push %r12
|
||||
.byte 83 // push %rbx
|
||||
.byte 72,129,236,128,0,0,0 // sub $0x80,%rsp
|
||||
.byte 15,41,124,36,112 // movaps %xmm7,0x70(%rsp)
|
||||
.byte 15,41,116,36,96 // movaps %xmm6,0x60(%rsp)
|
||||
.byte 15,41,108,36,80 // movaps %xmm5,0x50(%rsp)
|
||||
.byte 15,41,100,36,64 // movaps %xmm4,0x40(%rsp)
|
||||
.byte 15,41,92,36,48 // movaps %xmm3,0x30(%rsp)
|
||||
.byte 15,41,84,36,32 // movaps %xmm2,0x20(%rsp)
|
||||
.byte 15,41,76,36,16 // movaps %xmm1,0x10(%rsp)
|
||||
.byte 15,41,4,36 // movaps %xmm0,(%rsp)
|
||||
.byte 72,131,236,72 // sub $0x48,%rsp
|
||||
.byte 15,41,124,36,48 // movaps %xmm7,0x30(%rsp)
|
||||
.byte 15,41,116,36,32 // movaps %xmm6,0x20(%rsp)
|
||||
.byte 15,41,108,36,16 // movaps %xmm5,0x10(%rsp)
|
||||
.byte 15,41,36,36 // movaps %xmm4,(%rsp)
|
||||
.byte 73,137,214 // mov %rdx,%r14
|
||||
.byte 73,137,255 // mov %rdi,%r15
|
||||
.byte 72,173 // lods %ds:(%rsi),%rax
|
||||
.byte 72,137,243 // mov %rsi,%rbx
|
||||
.byte 72,139,120,8 // mov 0x8(%rax),%rdi
|
||||
.byte 72,137,195 // mov %rax,%rbx
|
||||
.byte 73,137,244 // mov %rsi,%r12
|
||||
.byte 15,40,224 // movaps %xmm0,%xmm4
|
||||
.byte 15,20,225 // unpcklps %xmm1,%xmm4
|
||||
.byte 15,40,234 // movaps %xmm2,%xmm5
|
||||
.byte 15,20,235 // unpcklps %xmm3,%xmm5
|
||||
.byte 15,21,193 // unpckhps %xmm1,%xmm0
|
||||
.byte 15,21,211 // unpckhps %xmm3,%xmm2
|
||||
.byte 15,40,204 // movaps %xmm4,%xmm1
|
||||
.byte 102,15,20,205 // unpcklpd %xmm5,%xmm1
|
||||
.byte 15,18,236 // movhlps %xmm4,%xmm5
|
||||
.byte 15,40,216 // movaps %xmm0,%xmm3
|
||||
.byte 102,15,20,218 // unpcklpd %xmm2,%xmm3
|
||||
.byte 15,18,208 // movhlps %xmm0,%xmm2
|
||||
.byte 102,15,17,75,8 // movupd %xmm1,0x8(%rbx)
|
||||
.byte 15,17,107,24 // movups %xmm5,0x18(%rbx)
|
||||
.byte 102,15,17,91,40 // movupd %xmm3,0x28(%rbx)
|
||||
.byte 15,17,83,56 // movups %xmm2,0x38(%rbx)
|
||||
.byte 190,4,0,0,0 // mov $0x4,%esi
|
||||
.byte 255,16 // callq *(%rax)
|
||||
.byte 72,137,222 // mov %rbx,%rsi
|
||||
.byte 72,137,223 // mov %rbx,%rdi
|
||||
.byte 255,19 // callq *(%rbx)
|
||||
.byte 72,139,131,136,0,0,0 // mov 0x88(%rbx),%rax
|
||||
.byte 15,16,32 // movups (%rax),%xmm4
|
||||
.byte 15,16,64,16 // movups 0x10(%rax),%xmm0
|
||||
.byte 15,16,88,32 // movups 0x20(%rax),%xmm3
|
||||
.byte 15,16,80,48 // movups 0x30(%rax),%xmm2
|
||||
.byte 15,40,236 // movaps %xmm4,%xmm5
|
||||
.byte 15,20,232 // unpcklps %xmm0,%xmm5
|
||||
.byte 15,40,203 // movaps %xmm3,%xmm1
|
||||
.byte 15,20,202 // unpcklps %xmm2,%xmm1
|
||||
.byte 15,21,224 // unpckhps %xmm0,%xmm4
|
||||
.byte 15,21,218 // unpckhps %xmm2,%xmm3
|
||||
.byte 15,40,197 // movaps %xmm5,%xmm0
|
||||
.byte 102,15,20,193 // unpcklpd %xmm1,%xmm0
|
||||
.byte 15,18,205 // movhlps %xmm5,%xmm1
|
||||
.byte 15,40,212 // movaps %xmm4,%xmm2
|
||||
.byte 102,15,20,211 // unpcklpd %xmm3,%xmm2
|
||||
.byte 15,18,220 // movhlps %xmm4,%xmm3
|
||||
.byte 76,137,230 // mov %r12,%rsi
|
||||
.byte 72,173 // lods %ds:(%rsi),%rax
|
||||
.byte 76,137,255 // mov %r15,%rdi
|
||||
.byte 76,137,242 // mov %r14,%rdx
|
||||
.byte 15,40,4,36 // movaps (%rsp),%xmm0
|
||||
.byte 15,40,76,36,16 // movaps 0x10(%rsp),%xmm1
|
||||
.byte 15,40,84,36,32 // movaps 0x20(%rsp),%xmm2
|
||||
.byte 15,40,92,36,48 // movaps 0x30(%rsp),%xmm3
|
||||
.byte 15,40,100,36,64 // movaps 0x40(%rsp),%xmm4
|
||||
.byte 15,40,108,36,80 // movaps 0x50(%rsp),%xmm5
|
||||
.byte 15,40,116,36,96 // movaps 0x60(%rsp),%xmm6
|
||||
.byte 15,40,124,36,112 // movaps 0x70(%rsp),%xmm7
|
||||
.byte 72,129,196,128,0,0,0 // add $0x80,%rsp
|
||||
.byte 15,40,36,36 // movaps (%rsp),%xmm4
|
||||
.byte 15,40,108,36,16 // movaps 0x10(%rsp),%xmm5
|
||||
.byte 15,40,116,36,32 // movaps 0x20(%rsp),%xmm6
|
||||
.byte 15,40,124,36,48 // movaps 0x30(%rsp),%xmm7
|
||||
.byte 72,131,196,72 // add $0x48,%rsp
|
||||
.byte 91 // pop %rbx
|
||||
.byte 65,92 // pop %r12
|
||||
.byte 65,94 // pop %r14
|
||||
.byte 65,95 // pop %r15
|
||||
.byte 255,224 // jmpq *%rax
|
||||
|
@ -4203,40 +4203,68 @@ _sk_bicubic_p3y_hsw LABEL PROC
|
||||
|
||||
PUBLIC _sk_callback_hsw
|
||||
_sk_callback_hsw LABEL PROC
|
||||
DB 65,87 ; push %r15
|
||||
DB 65,86 ; push %r14
|
||||
DB 83 ; push %rbx
|
||||
DB 72,129,236,40,1,0,0 ; sub $0x128,%rsp
|
||||
DB 197,252,17,188,36,0,1,0,0 ; vmovups %ymm7,0x100(%rsp)
|
||||
DB 197,252,17,180,36,224,0,0,0 ; vmovups %ymm6,0xe0(%rsp)
|
||||
DB 197,252,17,172,36,192,0,0,0 ; vmovups %ymm5,0xc0(%rsp)
|
||||
DB 197,252,17,164,36,160,0,0,0 ; vmovups %ymm4,0xa0(%rsp)
|
||||
DB 197,252,17,156,36,128,0,0,0 ; vmovups %ymm3,0x80(%rsp)
|
||||
DB 197,252,17,84,36,96 ; vmovups %ymm2,0x60(%rsp)
|
||||
DB 197,252,17,76,36,64 ; vmovups %ymm1,0x40(%rsp)
|
||||
DB 197,252,17,68,36,32 ; vmovups %ymm0,0x20(%rsp)
|
||||
DB 72,137,203 ; mov %rcx,%rbx
|
||||
DB 72,129,236,160,0,0,0 ; sub $0xa0,%rsp
|
||||
DB 197,252,17,188,36,128,0,0,0 ; vmovups %ymm7,0x80(%rsp)
|
||||
DB 197,252,17,116,36,96 ; vmovups %ymm6,0x60(%rsp)
|
||||
DB 197,252,17,108,36,64 ; vmovups %ymm5,0x40(%rsp)
|
||||
DB 197,252,17,100,36,32 ; vmovups %ymm4,0x20(%rsp)
|
||||
DB 73,137,207 ; mov %rcx,%r15
|
||||
DB 73,137,214 ; mov %rdx,%r14
|
||||
DB 72,173 ; lods %ds:(%rsi),%rax
|
||||
DB 72,139,72,8 ; mov 0x8(%rax),%rcx
|
||||
DB 72,133,219 ; test %rbx,%rbx
|
||||
DB 72,137,195 ; mov %rax,%rbx
|
||||
DB 197,252,20,225 ; vunpcklps %ymm1,%ymm0,%ymm4
|
||||
DB 197,252,21,193 ; vunpckhps %ymm1,%ymm0,%ymm0
|
||||
DB 197,236,20,203 ; vunpcklps %ymm3,%ymm2,%ymm1
|
||||
DB 197,236,21,211 ; vunpckhps %ymm3,%ymm2,%ymm2
|
||||
DB 197,221,20,217 ; vunpcklpd %ymm1,%ymm4,%ymm3
|
||||
DB 197,221,21,201 ; vunpckhpd %ymm1,%ymm4,%ymm1
|
||||
DB 197,253,20,226 ; vunpcklpd %ymm2,%ymm0,%ymm4
|
||||
DB 197,253,21,194 ; vunpckhpd %ymm2,%ymm0,%ymm0
|
||||
DB 196,227,101,24,209,1 ; vinsertf128 $0x1,%xmm1,%ymm3,%ymm2
|
||||
DB 196,227,93,24,232,1 ; vinsertf128 $0x1,%xmm0,%ymm4,%ymm5
|
||||
DB 196,227,101,6,201,49 ; vperm2f128 $0x31,%ymm1,%ymm3,%ymm1
|
||||
DB 196,227,93,6,192,49 ; vperm2f128 $0x31,%ymm0,%ymm4,%ymm0
|
||||
DB 197,253,17,83,8 ; vmovupd %ymm2,0x8(%rbx)
|
||||
DB 197,253,17,107,40 ; vmovupd %ymm5,0x28(%rbx)
|
||||
DB 197,253,17,75,72 ; vmovupd %ymm1,0x48(%rbx)
|
||||
DB 197,253,17,67,104 ; vmovupd %ymm0,0x68(%rbx)
|
||||
DB 77,133,255 ; test %r15,%r15
|
||||
DB 186,8,0,0,0 ; mov $0x8,%edx
|
||||
DB 15,69,211 ; cmovne %ebx,%edx
|
||||
DB 65,15,69,215 ; cmovne %r15d,%edx
|
||||
DB 72,137,217 ; mov %rbx,%rcx
|
||||
DB 197,248,119 ; vzeroupper
|
||||
DB 255,16 ; callq *(%rax)
|
||||
DB 255,19 ; callq *(%rbx)
|
||||
DB 72,139,131,136,0,0,0 ; mov 0x88(%rbx),%rax
|
||||
DB 197,248,16,0 ; vmovups (%rax),%xmm0
|
||||
DB 197,248,16,72,16 ; vmovups 0x10(%rax),%xmm1
|
||||
DB 197,248,16,80,32 ; vmovups 0x20(%rax),%xmm2
|
||||
DB 197,248,16,88,48 ; vmovups 0x30(%rax),%xmm3
|
||||
DB 196,227,101,24,88,112,1 ; vinsertf128 $0x1,0x70(%rax),%ymm3,%ymm3
|
||||
DB 196,227,109,24,80,96,1 ; vinsertf128 $0x1,0x60(%rax),%ymm2,%ymm2
|
||||
DB 196,227,117,24,72,80,1 ; vinsertf128 $0x1,0x50(%rax),%ymm1,%ymm1
|
||||
DB 196,227,125,24,64,64,1 ; vinsertf128 $0x1,0x40(%rax),%ymm0,%ymm0
|
||||
DB 197,252,20,225 ; vunpcklps %ymm1,%ymm0,%ymm4
|
||||
DB 197,252,21,233 ; vunpckhps %ymm1,%ymm0,%ymm5
|
||||
DB 197,236,20,203 ; vunpcklps %ymm3,%ymm2,%ymm1
|
||||
DB 197,236,21,219 ; vunpckhps %ymm3,%ymm2,%ymm3
|
||||
DB 197,221,20,193 ; vunpcklpd %ymm1,%ymm4,%ymm0
|
||||
DB 197,221,21,201 ; vunpckhpd %ymm1,%ymm4,%ymm1
|
||||
DB 197,213,20,211 ; vunpcklpd %ymm3,%ymm5,%ymm2
|
||||
DB 197,213,21,219 ; vunpckhpd %ymm3,%ymm5,%ymm3
|
||||
DB 72,173 ; lods %ds:(%rsi),%rax
|
||||
DB 76,137,242 ; mov %r14,%rdx
|
||||
DB 72,137,217 ; mov %rbx,%rcx
|
||||
DB 197,252,16,68,36,32 ; vmovups 0x20(%rsp),%ymm0
|
||||
DB 197,252,16,76,36,64 ; vmovups 0x40(%rsp),%ymm1
|
||||
DB 197,252,16,84,36,96 ; vmovups 0x60(%rsp),%ymm2
|
||||
DB 197,252,16,156,36,128,0,0,0 ; vmovups 0x80(%rsp),%ymm3
|
||||
DB 197,252,16,164,36,160,0,0,0 ; vmovups 0xa0(%rsp),%ymm4
|
||||
DB 197,252,16,172,36,192,0,0,0 ; vmovups 0xc0(%rsp),%ymm5
|
||||
DB 197,252,16,180,36,224,0,0,0 ; vmovups 0xe0(%rsp),%ymm6
|
||||
DB 197,252,16,188,36,0,1,0,0 ; vmovups 0x100(%rsp),%ymm7
|
||||
DB 72,129,196,40,1,0,0 ; add $0x128,%rsp
|
||||
DB 76,137,249 ; mov %r15,%rcx
|
||||
DB 197,252,16,100,36,32 ; vmovups 0x20(%rsp),%ymm4
|
||||
DB 197,252,16,108,36,64 ; vmovups 0x40(%rsp),%ymm5
|
||||
DB 197,252,16,116,36,96 ; vmovups 0x60(%rsp),%ymm6
|
||||
DB 197,252,16,188,36,128,0,0,0 ; vmovups 0x80(%rsp),%ymm7
|
||||
DB 72,129,196,160,0,0,0 ; add $0xa0,%rsp
|
||||
DB 91 ; pop %rbx
|
||||
DB 65,94 ; pop %r14
|
||||
DB 65,95 ; pop %r15
|
||||
DB 255,224 ; jmpq *%rax
|
||||
|
||||
PUBLIC _sk_start_pipeline_avx
|
||||
@ -9849,40 +9877,68 @@ _sk_bicubic_p3y_avx LABEL PROC
|
||||
|
||||
PUBLIC _sk_callback_avx
|
||||
_sk_callback_avx LABEL PROC
|
||||
DB 65,87 ; push %r15
|
||||
DB 65,86 ; push %r14
|
||||
DB 83 ; push %rbx
|
||||
DB 72,129,236,40,1,0,0 ; sub $0x128,%rsp
|
||||
DB 197,252,17,188,36,0,1,0,0 ; vmovups %ymm7,0x100(%rsp)
|
||||
DB 197,252,17,180,36,224,0,0,0 ; vmovups %ymm6,0xe0(%rsp)
|
||||
DB 197,252,17,172,36,192,0,0,0 ; vmovups %ymm5,0xc0(%rsp)
|
||||
DB 197,252,17,164,36,160,0,0,0 ; vmovups %ymm4,0xa0(%rsp)
|
||||
DB 197,252,17,156,36,128,0,0,0 ; vmovups %ymm3,0x80(%rsp)
|
||||
DB 197,252,17,84,36,96 ; vmovups %ymm2,0x60(%rsp)
|
||||
DB 197,252,17,76,36,64 ; vmovups %ymm1,0x40(%rsp)
|
||||
DB 197,252,17,68,36,32 ; vmovups %ymm0,0x20(%rsp)
|
||||
DB 72,137,203 ; mov %rcx,%rbx
|
||||
DB 72,129,236,160,0,0,0 ; sub $0xa0,%rsp
|
||||
DB 197,252,17,188,36,128,0,0,0 ; vmovups %ymm7,0x80(%rsp)
|
||||
DB 197,252,17,116,36,96 ; vmovups %ymm6,0x60(%rsp)
|
||||
DB 197,252,17,108,36,64 ; vmovups %ymm5,0x40(%rsp)
|
||||
DB 197,252,17,100,36,32 ; vmovups %ymm4,0x20(%rsp)
|
||||
DB 73,137,207 ; mov %rcx,%r15
|
||||
DB 73,137,214 ; mov %rdx,%r14
|
||||
DB 72,173 ; lods %ds:(%rsi),%rax
|
||||
DB 72,139,72,8 ; mov 0x8(%rax),%rcx
|
||||
DB 72,133,219 ; test %rbx,%rbx
|
||||
DB 72,137,195 ; mov %rax,%rbx
|
||||
DB 197,252,20,225 ; vunpcklps %ymm1,%ymm0,%ymm4
|
||||
DB 197,252,21,193 ; vunpckhps %ymm1,%ymm0,%ymm0
|
||||
DB 197,236,20,203 ; vunpcklps %ymm3,%ymm2,%ymm1
|
||||
DB 197,236,21,211 ; vunpckhps %ymm3,%ymm2,%ymm2
|
||||
DB 197,221,20,217 ; vunpcklpd %ymm1,%ymm4,%ymm3
|
||||
DB 197,221,21,201 ; vunpckhpd %ymm1,%ymm4,%ymm1
|
||||
DB 197,253,20,226 ; vunpcklpd %ymm2,%ymm0,%ymm4
|
||||
DB 197,253,21,194 ; vunpckhpd %ymm2,%ymm0,%ymm0
|
||||
DB 196,227,101,24,209,1 ; vinsertf128 $0x1,%xmm1,%ymm3,%ymm2
|
||||
DB 196,227,93,24,232,1 ; vinsertf128 $0x1,%xmm0,%ymm4,%ymm5
|
||||
DB 196,227,101,6,201,49 ; vperm2f128 $0x31,%ymm1,%ymm3,%ymm1
|
||||
DB 196,227,93,6,192,49 ; vperm2f128 $0x31,%ymm0,%ymm4,%ymm0
|
||||
DB 197,253,17,83,8 ; vmovupd %ymm2,0x8(%rbx)
|
||||
DB 197,253,17,107,40 ; vmovupd %ymm5,0x28(%rbx)
|
||||
DB 197,253,17,75,72 ; vmovupd %ymm1,0x48(%rbx)
|
||||
DB 197,253,17,67,104 ; vmovupd %ymm0,0x68(%rbx)
|
||||
DB 77,133,255 ; test %r15,%r15
|
||||
DB 186,8,0,0,0 ; mov $0x8,%edx
|
||||
DB 15,69,211 ; cmovne %ebx,%edx
|
||||
DB 65,15,69,215 ; cmovne %r15d,%edx
|
||||
DB 72,137,217 ; mov %rbx,%rcx
|
||||
DB 197,248,119 ; vzeroupper
|
||||
DB 255,16 ; callq *(%rax)
|
||||
DB 255,19 ; callq *(%rbx)
|
||||
DB 72,139,131,136,0,0,0 ; mov 0x88(%rbx),%rax
|
||||
DB 197,248,16,0 ; vmovups (%rax),%xmm0
|
||||
DB 197,248,16,72,16 ; vmovups 0x10(%rax),%xmm1
|
||||
DB 197,248,16,80,32 ; vmovups 0x20(%rax),%xmm2
|
||||
DB 197,248,16,88,48 ; vmovups 0x30(%rax),%xmm3
|
||||
DB 196,227,101,24,88,112,1 ; vinsertf128 $0x1,0x70(%rax),%ymm3,%ymm3
|
||||
DB 196,227,109,24,80,96,1 ; vinsertf128 $0x1,0x60(%rax),%ymm2,%ymm2
|
||||
DB 196,227,117,24,72,80,1 ; vinsertf128 $0x1,0x50(%rax),%ymm1,%ymm1
|
||||
DB 196,227,125,24,64,64,1 ; vinsertf128 $0x1,0x40(%rax),%ymm0,%ymm0
|
||||
DB 197,252,20,225 ; vunpcklps %ymm1,%ymm0,%ymm4
|
||||
DB 197,252,21,233 ; vunpckhps %ymm1,%ymm0,%ymm5
|
||||
DB 197,236,20,203 ; vunpcklps %ymm3,%ymm2,%ymm1
|
||||
DB 197,236,21,219 ; vunpckhps %ymm3,%ymm2,%ymm3
|
||||
DB 197,221,20,193 ; vunpcklpd %ymm1,%ymm4,%ymm0
|
||||
DB 197,221,21,201 ; vunpckhpd %ymm1,%ymm4,%ymm1
|
||||
DB 197,213,20,211 ; vunpcklpd %ymm3,%ymm5,%ymm2
|
||||
DB 197,213,21,219 ; vunpckhpd %ymm3,%ymm5,%ymm3
|
||||
DB 72,173 ; lods %ds:(%rsi),%rax
|
||||
DB 76,137,242 ; mov %r14,%rdx
|
||||
DB 72,137,217 ; mov %rbx,%rcx
|
||||
DB 197,252,16,68,36,32 ; vmovups 0x20(%rsp),%ymm0
|
||||
DB 197,252,16,76,36,64 ; vmovups 0x40(%rsp),%ymm1
|
||||
DB 197,252,16,84,36,96 ; vmovups 0x60(%rsp),%ymm2
|
||||
DB 197,252,16,156,36,128,0,0,0 ; vmovups 0x80(%rsp),%ymm3
|
||||
DB 197,252,16,164,36,160,0,0,0 ; vmovups 0xa0(%rsp),%ymm4
|
||||
DB 197,252,16,172,36,192,0,0,0 ; vmovups 0xc0(%rsp),%ymm5
|
||||
DB 197,252,16,180,36,224,0,0,0 ; vmovups 0xe0(%rsp),%ymm6
|
||||
DB 197,252,16,188,36,0,1,0,0 ; vmovups 0x100(%rsp),%ymm7
|
||||
DB 72,129,196,40,1,0,0 ; add $0x128,%rsp
|
||||
DB 76,137,249 ; mov %r15,%rcx
|
||||
DB 197,252,16,100,36,32 ; vmovups 0x20(%rsp),%ymm4
|
||||
DB 197,252,16,108,36,64 ; vmovups 0x40(%rsp),%ymm5
|
||||
DB 197,252,16,116,36,96 ; vmovups 0x60(%rsp),%ymm6
|
||||
DB 197,252,16,188,36,128,0,0,0 ; vmovups 0x80(%rsp),%ymm7
|
||||
DB 72,129,196,160,0,0,0 ; add $0xa0,%rsp
|
||||
DB 91 ; pop %rbx
|
||||
DB 65,94 ; pop %r14
|
||||
DB 65,95 ; pop %r15
|
||||
DB 255,224 ; jmpq *%rax
|
||||
|
||||
PUBLIC _sk_start_pipeline_sse41
|
||||
@ -14144,29 +14200,57 @@ _sk_bicubic_p3y_sse41 LABEL PROC
|
||||
|
||||
PUBLIC _sk_callback_sse41
|
||||
_sk_callback_sse41 LABEL PROC
|
||||
DB 65,86 ; push %r14
|
||||
DB 83 ; push %rbx
|
||||
DB 72,131,236,32 ; sub $0x20,%rsp
|
||||
DB 72,131,236,40 ; sub $0x28,%rsp
|
||||
DB 68,15,40,197 ; movaps %xmm5,%xmm8
|
||||
DB 68,15,40,204 ; movaps %xmm4,%xmm9
|
||||
DB 68,15,40,211 ; movaps %xmm3,%xmm10
|
||||
DB 68,15,40,218 ; movaps %xmm2,%xmm11
|
||||
DB 68,15,40,225 ; movaps %xmm1,%xmm12
|
||||
DB 68,15,40,232 ; movaps %xmm0,%xmm13
|
||||
DB 72,137,211 ; mov %rdx,%rbx
|
||||
DB 73,137,214 ; mov %rdx,%r14
|
||||
DB 72,173 ; lods %ds:(%rsi),%rax
|
||||
DB 72,139,72,8 ; mov 0x8(%rax),%rcx
|
||||
DB 72,137,195 ; mov %rax,%rbx
|
||||
DB 15,40,224 ; movaps %xmm0,%xmm4
|
||||
DB 15,20,225 ; unpcklps %xmm1,%xmm4
|
||||
DB 15,40,234 ; movaps %xmm2,%xmm5
|
||||
DB 15,20,235 ; unpcklps %xmm3,%xmm5
|
||||
DB 15,21,193 ; unpckhps %xmm1,%xmm0
|
||||
DB 15,21,211 ; unpckhps %xmm3,%xmm2
|
||||
DB 15,40,204 ; movaps %xmm4,%xmm1
|
||||
DB 102,15,20,205 ; unpcklpd %xmm5,%xmm1
|
||||
DB 15,18,236 ; movhlps %xmm4,%xmm5
|
||||
DB 15,40,216 ; movaps %xmm0,%xmm3
|
||||
DB 102,15,20,218 ; unpcklpd %xmm2,%xmm3
|
||||
DB 15,18,208 ; movhlps %xmm0,%xmm2
|
||||
DB 102,15,17,75,8 ; movupd %xmm1,0x8(%rbx)
|
||||
DB 15,17,107,24 ; movups %xmm5,0x18(%rbx)
|
||||
DB 102,15,17,91,40 ; movupd %xmm3,0x28(%rbx)
|
||||
DB 15,17,83,56 ; movups %xmm2,0x38(%rbx)
|
||||
DB 186,4,0,0,0 ; mov $0x4,%edx
|
||||
DB 255,16 ; callq *(%rax)
|
||||
DB 72,137,217 ; mov %rbx,%rcx
|
||||
DB 255,19 ; callq *(%rbx)
|
||||
DB 72,139,131,136,0,0,0 ; mov 0x88(%rbx),%rax
|
||||
DB 15,16,32 ; movups (%rax),%xmm4
|
||||
DB 15,16,64,16 ; movups 0x10(%rax),%xmm0
|
||||
DB 15,16,88,32 ; movups 0x20(%rax),%xmm3
|
||||
DB 15,16,80,48 ; movups 0x30(%rax),%xmm2
|
||||
DB 15,40,236 ; movaps %xmm4,%xmm5
|
||||
DB 15,20,232 ; unpcklps %xmm0,%xmm5
|
||||
DB 15,40,203 ; movaps %xmm3,%xmm1
|
||||
DB 15,20,202 ; unpcklps %xmm2,%xmm1
|
||||
DB 15,21,224 ; unpckhps %xmm0,%xmm4
|
||||
DB 15,21,218 ; unpckhps %xmm2,%xmm3
|
||||
DB 15,40,197 ; movaps %xmm5,%xmm0
|
||||
DB 102,15,20,193 ; unpcklpd %xmm1,%xmm0
|
||||
DB 15,18,205 ; movhlps %xmm5,%xmm1
|
||||
DB 15,40,212 ; movaps %xmm4,%xmm2
|
||||
DB 102,15,20,211 ; unpcklpd %xmm3,%xmm2
|
||||
DB 15,18,220 ; movhlps %xmm4,%xmm3
|
||||
DB 72,173 ; lods %ds:(%rsi),%rax
|
||||
DB 72,137,218 ; mov %rbx,%rdx
|
||||
DB 65,15,40,197 ; movaps %xmm13,%xmm0
|
||||
DB 65,15,40,204 ; movaps %xmm12,%xmm1
|
||||
DB 65,15,40,211 ; movaps %xmm11,%xmm2
|
||||
DB 65,15,40,218 ; movaps %xmm10,%xmm3
|
||||
DB 76,137,242 ; mov %r14,%rdx
|
||||
DB 65,15,40,225 ; movaps %xmm9,%xmm4
|
||||
DB 65,15,40,232 ; movaps %xmm8,%xmm5
|
||||
DB 72,131,196,32 ; add $0x20,%rsp
|
||||
DB 72,131,196,40 ; add $0x28,%rsp
|
||||
DB 91 ; pop %rbx
|
||||
DB 65,94 ; pop %r14
|
||||
DB 255,224 ; jmpq *%rax
|
||||
|
||||
PUBLIC _sk_start_pipeline_sse2
|
||||
@ -18726,29 +18810,57 @@ _sk_bicubic_p3y_sse2 LABEL PROC
|
||||
|
||||
PUBLIC _sk_callback_sse2
|
||||
_sk_callback_sse2 LABEL PROC
|
||||
DB 65,86 ; push %r14
|
||||
DB 83 ; push %rbx
|
||||
DB 72,131,236,32 ; sub $0x20,%rsp
|
||||
DB 72,131,236,40 ; sub $0x28,%rsp
|
||||
DB 68,15,40,197 ; movaps %xmm5,%xmm8
|
||||
DB 68,15,40,204 ; movaps %xmm4,%xmm9
|
||||
DB 68,15,40,211 ; movaps %xmm3,%xmm10
|
||||
DB 68,15,40,218 ; movaps %xmm2,%xmm11
|
||||
DB 68,15,40,225 ; movaps %xmm1,%xmm12
|
||||
DB 68,15,40,232 ; movaps %xmm0,%xmm13
|
||||
DB 72,137,211 ; mov %rdx,%rbx
|
||||
DB 73,137,214 ; mov %rdx,%r14
|
||||
DB 72,173 ; lods %ds:(%rsi),%rax
|
||||
DB 72,139,72,8 ; mov 0x8(%rax),%rcx
|
||||
DB 72,137,195 ; mov %rax,%rbx
|
||||
DB 15,40,224 ; movaps %xmm0,%xmm4
|
||||
DB 15,20,225 ; unpcklps %xmm1,%xmm4
|
||||
DB 15,40,234 ; movaps %xmm2,%xmm5
|
||||
DB 15,20,235 ; unpcklps %xmm3,%xmm5
|
||||
DB 15,21,193 ; unpckhps %xmm1,%xmm0
|
||||
DB 15,21,211 ; unpckhps %xmm3,%xmm2
|
||||
DB 15,40,204 ; movaps %xmm4,%xmm1
|
||||
DB 102,15,20,205 ; unpcklpd %xmm5,%xmm1
|
||||
DB 15,18,236 ; movhlps %xmm4,%xmm5
|
||||
DB 15,40,216 ; movaps %xmm0,%xmm3
|
||||
DB 102,15,20,218 ; unpcklpd %xmm2,%xmm3
|
||||
DB 15,18,208 ; movhlps %xmm0,%xmm2
|
||||
DB 102,15,17,75,8 ; movupd %xmm1,0x8(%rbx)
|
||||
DB 15,17,107,24 ; movups %xmm5,0x18(%rbx)
|
||||
DB 102,15,17,91,40 ; movupd %xmm3,0x28(%rbx)
|
||||
DB 15,17,83,56 ; movups %xmm2,0x38(%rbx)
|
||||
DB 186,4,0,0,0 ; mov $0x4,%edx
|
||||
DB 255,16 ; callq *(%rax)
|
||||
DB 72,137,217 ; mov %rbx,%rcx
|
||||
DB 255,19 ; callq *(%rbx)
|
||||
DB 72,139,131,136,0,0,0 ; mov 0x88(%rbx),%rax
|
||||
DB 15,16,32 ; movups (%rax),%xmm4
|
||||
DB 15,16,64,16 ; movups 0x10(%rax),%xmm0
|
||||
DB 15,16,88,32 ; movups 0x20(%rax),%xmm3
|
||||
DB 15,16,80,48 ; movups 0x30(%rax),%xmm2
|
||||
DB 15,40,236 ; movaps %xmm4,%xmm5
|
||||
DB 15,20,232 ; unpcklps %xmm0,%xmm5
|
||||
DB 15,40,203 ; movaps %xmm3,%xmm1
|
||||
DB 15,20,202 ; unpcklps %xmm2,%xmm1
|
||||
DB 15,21,224 ; unpckhps %xmm0,%xmm4
|
||||
DB 15,21,218 ; unpckhps %xmm2,%xmm3
|
||||
DB 15,40,197 ; movaps %xmm5,%xmm0
|
||||
DB 102,15,20,193 ; unpcklpd %xmm1,%xmm0
|
||||
DB 15,18,205 ; movhlps %xmm5,%xmm1
|
||||
DB 15,40,212 ; movaps %xmm4,%xmm2
|
||||
DB 102,15,20,211 ; unpcklpd %xmm3,%xmm2
|
||||
DB 15,18,220 ; movhlps %xmm4,%xmm3
|
||||
DB 72,173 ; lods %ds:(%rsi),%rax
|
||||
DB 72,137,218 ; mov %rbx,%rdx
|
||||
DB 65,15,40,197 ; movaps %xmm13,%xmm0
|
||||
DB 65,15,40,204 ; movaps %xmm12,%xmm1
|
||||
DB 65,15,40,211 ; movaps %xmm11,%xmm2
|
||||
DB 65,15,40,218 ; movaps %xmm10,%xmm3
|
||||
DB 76,137,242 ; mov %r14,%rdx
|
||||
DB 65,15,40,225 ; movaps %xmm9,%xmm4
|
||||
DB 65,15,40,232 ; movaps %xmm8,%xmm5
|
||||
DB 72,131,196,32 ; add $0x20,%rsp
|
||||
DB 72,131,196,40 ; add $0x28,%rsp
|
||||
DB 91 ; pop %rbx
|
||||
DB 65,94 ; pop %r14
|
||||
DB 255,224 ; jmpq *%rax
|
||||
ENDIF
|
||||
END
|
||||
|
@ -1070,6 +1070,8 @@ STAGE(bicubic_p1y) { bicubic_y<+1>(ctx, &g); }
|
||||
STAGE(bicubic_p3y) { bicubic_y<+3>(ctx, &g); }
|
||||
|
||||
STAGE(callback) {
|
||||
auto c = (const SkJumper_CallbackCtx*)ctx;
|
||||
c->fn(c->arg, tail ? tail : kStride);
|
||||
auto c = (SkJumper_CallbackCtx*)ctx;
|
||||
store4(c->rgba,0, r,g,b,a);
|
||||
c->fn(c, tail ? tail : kStride);
|
||||
load4(c->read_from,0, &r,&g,&b,&a);
|
||||
}
|
||||
|
@ -9,7 +9,6 @@
|
||||
#define SkRasterPipeline_opts_DEFINED
|
||||
|
||||
#include "SkColorPriv.h"
|
||||
#include "SkColorLookUpTable.h"
|
||||
#include "SkColorSpaceXform_A2B.h"
|
||||
#include "SkColorSpaceXformPriv.h"
|
||||
#include "SkHalf.h"
|
||||
@ -796,29 +795,6 @@ STAGE_CTX(table_g, const SkTableTransferFn*) { g = table(g, *ctx); }
|
||||
STAGE_CTX(table_b, const SkTableTransferFn*) { b = table(b, *ctx); }
|
||||
STAGE_CTX(table_a, const SkTableTransferFn*) { a = table(a, *ctx); }
|
||||
|
||||
STAGE_CTX(color_lookup_table, const SkColorLookUpTable*) {
|
||||
const SkColorLookUpTable* colorLUT = ctx;
|
||||
SkASSERT(3 == colorLUT->inputChannels() || 4 == colorLUT->inputChannels());
|
||||
SkASSERT(3 == colorLUT->outputChannels());
|
||||
float result[3][N];
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const float in[4] = { r[i], g[i], b[i], a[i] };
|
||||
float out[3];
|
||||
colorLUT->interp(out, in);
|
||||
for (int j = 0; j < colorLUT->outputChannels(); ++j) {
|
||||
result[j][i] = out[j];
|
||||
}
|
||||
}
|
||||
r = SkNf::Load(result[0]);
|
||||
g = SkNf::Load(result[1]);
|
||||
b = SkNf::Load(result[2]);
|
||||
if (4 == colorLUT->inputChannels()) {
|
||||
// we must set the pixel to opaque, as the alpha channel was used
|
||||
// as input before this.
|
||||
a = 1.f;
|
||||
}
|
||||
}
|
||||
|
||||
STAGE(lab_to_xyz) {
|
||||
const auto lab_l = r * 100.0f;
|
||||
const auto lab_a = g * 255.0f - 128.0f;
|
||||
@ -1099,8 +1075,10 @@ STAGE_CTX(shader_adapter, SkShader::Context*) {
|
||||
}
|
||||
|
||||
STAGE_CTX(callback, const void*) {
|
||||
auto c = (const SkJumper_CallbackCtx*)ctx;
|
||||
c->fn(c->arg, tail ? tail : N);
|
||||
auto c = (SkJumper_CallbackCtx*)ctx;
|
||||
SkNf::Store4(c->rgba, r,g,b,a);
|
||||
c->fn(c, tail ? tail : N);
|
||||
SkNf::Load4(c->read_from, &r,&g,&b,&a);
|
||||
}
|
||||
|
||||
SI Fn enum_to_Fn(SkRasterPipeline::StockStage st) {
|
||||
|
Loading…
Reference in New Issue
Block a user