pretty up some SkSplicer code
- build_stages.py uses gobjdump for both architectures, minor formatting changes - let byte arrays be written inline in splice() Change-Id: I84bd47c18e5ae0b34b35f8c2f0a329fb1ea58f60 Reviewed-on: https://skia-review.googlesource.com/6833 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
parent
3444cadedd
commit
fb00390b46
@ -285,248 +285,248 @@ static const unsigned int kSplice_store_f16[] = {
|
|||||||
#else
|
#else
|
||||||
|
|
||||||
static const unsigned char kSplice_clear[] = {
|
static const unsigned char kSplice_clear[] = {
|
||||||
0xc5,0xfc,0x57,0xc0, // vxorps %ymm0, %ymm0, %ymm0
|
0xc5,0xfc,0x57,0xc0, // vxorps %ymm0,%ymm0,%ymm0
|
||||||
0xc5,0xf4,0x57,0xc9, // vxorps %ymm1, %ymm1, %ymm1
|
0xc5,0xf4,0x57,0xc9, // vxorps %ymm1,%ymm1,%ymm1
|
||||||
0xc5,0xec,0x57,0xd2, // vxorps %ymm2, %ymm2, %ymm2
|
0xc5,0xec,0x57,0xd2, // vxorps %ymm2,%ymm2,%ymm2
|
||||||
0xc5,0xe4,0x57,0xdb, // vxorps %ymm3, %ymm3, %ymm3
|
0xc5,0xe4,0x57,0xdb, // vxorps %ymm3,%ymm3,%ymm3
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_plus[] = {
|
static const unsigned char kSplice_plus[] = {
|
||||||
0xc5,0xfc,0x58,0xc4, // vaddps %ymm4, %ymm0, %ymm0
|
0xc5,0xfc,0x58,0xc4, // vaddps %ymm4,%ymm0,%ymm0
|
||||||
0xc5,0xf4,0x58,0xcd, // vaddps %ymm5, %ymm1, %ymm1
|
0xc5,0xf4,0x58,0xcd, // vaddps %ymm5,%ymm1,%ymm1
|
||||||
0xc5,0xec,0x58,0xd6, // vaddps %ymm6, %ymm2, %ymm2
|
0xc5,0xec,0x58,0xd6, // vaddps %ymm6,%ymm2,%ymm2
|
||||||
0xc5,0xe4,0x58,0xdf, // vaddps %ymm7, %ymm3, %ymm3
|
0xc5,0xe4,0x58,0xdf, // vaddps %ymm7,%ymm3,%ymm3
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_srcover[] = {
|
static const unsigned char kSplice_srcover[] = {
|
||||||
0xc4,0x62,0x7d,0x18,0x41,0x04, // vbroadcastss 0x4(%rcx), %ymm8
|
0xc4,0x62,0x7d,0x18,0x41,0x04, // vbroadcastss 0x4(%rcx),%ymm8
|
||||||
0xc5,0x3c,0x5c,0xc3, // vsubps %ymm3, %ymm8, %ymm8
|
0xc5,0x3c,0x5c,0xc3, // vsubps %ymm3,%ymm8,%ymm8
|
||||||
0xc4,0xc2,0x5d,0xb8,0xc0, // vfmadd231ps %ymm8, %ymm4, %ymm0
|
0xc4,0xc2,0x5d,0xb8,0xc0, // vfmadd231ps %ymm8,%ymm4,%ymm0
|
||||||
0xc4,0xc2,0x55,0xb8,0xc8, // vfmadd231ps %ymm8, %ymm5, %ymm1
|
0xc4,0xc2,0x55,0xb8,0xc8, // vfmadd231ps %ymm8,%ymm5,%ymm1
|
||||||
0xc4,0xc2,0x4d,0xb8,0xd0, // vfmadd231ps %ymm8, %ymm6, %ymm2
|
0xc4,0xc2,0x4d,0xb8,0xd0, // vfmadd231ps %ymm8,%ymm6,%ymm2
|
||||||
0xc4,0xc2,0x4d,0xb8,0xd8, // vfmadd231ps %ymm8, %ymm6, %ymm3
|
0xc4,0xc2,0x4d,0xb8,0xd8, // vfmadd231ps %ymm8,%ymm6,%ymm3
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_dstover[] = {
|
static const unsigned char kSplice_dstover[] = {
|
||||||
0xc4,0x62,0x7d,0x18,0x41,0x04, // vbroadcastss 0x4(%rcx), %ymm8
|
0xc4,0x62,0x7d,0x18,0x41,0x04, // vbroadcastss 0x4(%rcx),%ymm8
|
||||||
0xc5,0x3c,0x5c,0xc7, // vsubps %ymm7, %ymm8, %ymm8
|
0xc5,0x3c,0x5c,0xc7, // vsubps %ymm7,%ymm8,%ymm8
|
||||||
0xc4,0xc2,0x7d,0xb8,0xe0, // vfmadd231ps %ymm8, %ymm0, %ymm4
|
0xc4,0xc2,0x7d,0xb8,0xe0, // vfmadd231ps %ymm8,%ymm0,%ymm4
|
||||||
0xc4,0xc2,0x75,0xb8,0xe8, // vfmadd231ps %ymm8, %ymm1, %ymm5
|
0xc4,0xc2,0x75,0xb8,0xe8, // vfmadd231ps %ymm8,%ymm1,%ymm5
|
||||||
0xc4,0xc2,0x6d,0xb8,0xf0, // vfmadd231ps %ymm8, %ymm2, %ymm6
|
0xc4,0xc2,0x6d,0xb8,0xf0, // vfmadd231ps %ymm8,%ymm2,%ymm6
|
||||||
0xc4,0xc2,0x6d,0xb8,0xf8, // vfmadd231ps %ymm8, %ymm2, %ymm7
|
0xc4,0xc2,0x6d,0xb8,0xf8, // vfmadd231ps %ymm8,%ymm2,%ymm7
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_clamp_0[] = {
|
static const unsigned char kSplice_clamp_0[] = {
|
||||||
0xc4,0x41,0x3c,0x57,0xc0, // vxorps %ymm8, %ymm8, %ymm8
|
0xc4,0x41,0x3c,0x57,0xc0, // vxorps %ymm8,%ymm8,%ymm8
|
||||||
0xc4,0xc1,0x7c,0x5f,0xc0, // vmaxps %ymm8, %ymm0, %ymm0
|
0xc4,0xc1,0x7c,0x5f,0xc0, // vmaxps %ymm8,%ymm0,%ymm0
|
||||||
0xc4,0xc1,0x74,0x5f,0xc8, // vmaxps %ymm8, %ymm1, %ymm1
|
0xc4,0xc1,0x74,0x5f,0xc8, // vmaxps %ymm8,%ymm1,%ymm1
|
||||||
0xc4,0xc1,0x6c,0x5f,0xd0, // vmaxps %ymm8, %ymm2, %ymm2
|
0xc4,0xc1,0x6c,0x5f,0xd0, // vmaxps %ymm8,%ymm2,%ymm2
|
||||||
0xc4,0xc1,0x64,0x5f,0xd8, // vmaxps %ymm8, %ymm3, %ymm3
|
0xc4,0xc1,0x64,0x5f,0xd8, // vmaxps %ymm8,%ymm3,%ymm3
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_clamp_1[] = {
|
static const unsigned char kSplice_clamp_1[] = {
|
||||||
0xc4,0x62,0x7d,0x18,0x41,0x04, // vbroadcastss 0x4(%rcx), %ymm8
|
0xc4,0x62,0x7d,0x18,0x41,0x04, // vbroadcastss 0x4(%rcx),%ymm8
|
||||||
0xc4,0xc1,0x7c,0x5d,0xc0, // vminps %ymm8, %ymm0, %ymm0
|
0xc4,0xc1,0x7c,0x5d,0xc0, // vminps %ymm8,%ymm0,%ymm0
|
||||||
0xc4,0xc1,0x74,0x5d,0xc8, // vminps %ymm8, %ymm1, %ymm1
|
0xc4,0xc1,0x74,0x5d,0xc8, // vminps %ymm8,%ymm1,%ymm1
|
||||||
0xc4,0xc1,0x6c,0x5d,0xd0, // vminps %ymm8, %ymm2, %ymm2
|
0xc4,0xc1,0x6c,0x5d,0xd0, // vminps %ymm8,%ymm2,%ymm2
|
||||||
0xc4,0xc1,0x64,0x5d,0xd8, // vminps %ymm8, %ymm3, %ymm3
|
0xc4,0xc1,0x64,0x5d,0xd8, // vminps %ymm8,%ymm3,%ymm3
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_clamp_a[] = {
|
static const unsigned char kSplice_clamp_a[] = {
|
||||||
0xc4,0x62,0x7d,0x18,0x41,0x04, // vbroadcastss 0x4(%rcx), %ymm8
|
0xc4,0x62,0x7d,0x18,0x41,0x04, // vbroadcastss 0x4(%rcx),%ymm8
|
||||||
0xc4,0xc1,0x64,0x5d,0xd8, // vminps %ymm8, %ymm3, %ymm3
|
0xc4,0xc1,0x64,0x5d,0xd8, // vminps %ymm8,%ymm3,%ymm3
|
||||||
0xc5,0xfc,0x5d,0xc3, // vminps %ymm3, %ymm0, %ymm0
|
0xc5,0xfc,0x5d,0xc3, // vminps %ymm3,%ymm0,%ymm0
|
||||||
0xc5,0xf4,0x5d,0xcb, // vminps %ymm3, %ymm1, %ymm1
|
0xc5,0xf4,0x5d,0xcb, // vminps %ymm3,%ymm1,%ymm1
|
||||||
0xc5,0xec,0x5d,0xd3, // vminps %ymm3, %ymm2, %ymm2
|
0xc5,0xec,0x5d,0xd3, // vminps %ymm3,%ymm2,%ymm2
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_swap[] = {
|
static const unsigned char kSplice_swap[] = {
|
||||||
0xc5,0x7c,0x28,0xc3, // vmovaps %ymm3, %ymm8
|
0xc5,0x7c,0x28,0xc3, // vmovaps %ymm3,%ymm8
|
||||||
0xc5,0x7c,0x28,0xca, // vmovaps %ymm2, %ymm9
|
0xc5,0x7c,0x28,0xca, // vmovaps %ymm2,%ymm9
|
||||||
0xc5,0x7c,0x28,0xd1, // vmovaps %ymm1, %ymm10
|
0xc5,0x7c,0x28,0xd1, // vmovaps %ymm1,%ymm10
|
||||||
0xc5,0x7c,0x28,0xd8, // vmovaps %ymm0, %ymm11
|
0xc5,0x7c,0x28,0xd8, // vmovaps %ymm0,%ymm11
|
||||||
0xc5,0xfc,0x28,0xc4, // vmovaps %ymm4, %ymm0
|
0xc5,0xfc,0x28,0xc4, // vmovaps %ymm4,%ymm0
|
||||||
0xc5,0xfc,0x28,0xcd, // vmovaps %ymm5, %ymm1
|
0xc5,0xfc,0x28,0xcd, // vmovaps %ymm5,%ymm1
|
||||||
0xc5,0xfc,0x28,0xd6, // vmovaps %ymm6, %ymm2
|
0xc5,0xfc,0x28,0xd6, // vmovaps %ymm6,%ymm2
|
||||||
0xc5,0xfc,0x28,0xdf, // vmovaps %ymm7, %ymm3
|
0xc5,0xfc,0x28,0xdf, // vmovaps %ymm7,%ymm3
|
||||||
0xc5,0x7c,0x29,0xdc, // vmovaps %ymm11, %ymm4
|
0xc5,0x7c,0x29,0xdc, // vmovaps %ymm11,%ymm4
|
||||||
0xc5,0x7c,0x29,0xd5, // vmovaps %ymm10, %ymm5
|
0xc5,0x7c,0x29,0xd5, // vmovaps %ymm10,%ymm5
|
||||||
0xc5,0x7c,0x29,0xce, // vmovaps %ymm9, %ymm6
|
0xc5,0x7c,0x29,0xce, // vmovaps %ymm9,%ymm6
|
||||||
0xc5,0x7c,0x29,0xc7, // vmovaps %ymm8, %ymm7
|
0xc5,0x7c,0x29,0xc7, // vmovaps %ymm8,%ymm7
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_move_src_dst[] = {
|
static const unsigned char kSplice_move_src_dst[] = {
|
||||||
0xc5,0xfc,0x28,0xe0, // vmovaps %ymm0, %ymm4
|
0xc5,0xfc,0x28,0xe0, // vmovaps %ymm0,%ymm4
|
||||||
0xc5,0xfc,0x28,0xe9, // vmovaps %ymm1, %ymm5
|
0xc5,0xfc,0x28,0xe9, // vmovaps %ymm1,%ymm5
|
||||||
0xc5,0xfc,0x28,0xf2, // vmovaps %ymm2, %ymm6
|
0xc5,0xfc,0x28,0xf2, // vmovaps %ymm2,%ymm6
|
||||||
0xc5,0xfc,0x28,0xfb, // vmovaps %ymm3, %ymm7
|
0xc5,0xfc,0x28,0xfb, // vmovaps %ymm3,%ymm7
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_move_dst_src[] = {
|
static const unsigned char kSplice_move_dst_src[] = {
|
||||||
0xc5,0xfc,0x28,0xc4, // vmovaps %ymm4, %ymm0
|
0xc5,0xfc,0x28,0xc4, // vmovaps %ymm4,%ymm0
|
||||||
0xc5,0xfc,0x28,0xcd, // vmovaps %ymm5, %ymm1
|
0xc5,0xfc,0x28,0xcd, // vmovaps %ymm5,%ymm1
|
||||||
0xc5,0xfc,0x28,0xd6, // vmovaps %ymm6, %ymm2
|
0xc5,0xfc,0x28,0xd6, // vmovaps %ymm6,%ymm2
|
||||||
0xc5,0xfc,0x28,0xdf, // vmovaps %ymm7, %ymm3
|
0xc5,0xfc,0x28,0xdf, // vmovaps %ymm7,%ymm3
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_premul[] = {
|
static const unsigned char kSplice_premul[] = {
|
||||||
0xc5,0xfc,0x59,0xc3, // vmulps %ymm3, %ymm0, %ymm0
|
0xc5,0xfc,0x59,0xc3, // vmulps %ymm3,%ymm0,%ymm0
|
||||||
0xc5,0xf4,0x59,0xcb, // vmulps %ymm3, %ymm1, %ymm1
|
0xc5,0xf4,0x59,0xcb, // vmulps %ymm3,%ymm1,%ymm1
|
||||||
0xc5,0xec,0x59,0xd3, // vmulps %ymm3, %ymm2, %ymm2
|
0xc5,0xec,0x59,0xd3, // vmulps %ymm3,%ymm2,%ymm2
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_unpremul[] = {
|
static const unsigned char kSplice_unpremul[] = {
|
||||||
0xc4,0x41,0x3c,0x57,0xc0, // vxorps %ymm8, %ymm8, %ymm8
|
0xc4,0x41,0x3c,0x57,0xc0, // vxorps %ymm8,%ymm8,%ymm8
|
||||||
0xc4,0x41,0x64,0xc2,0xc8,0x00, // vcmpeqps %ymm8, %ymm3, %ymm9
|
0xc4,0x41,0x64,0xc2,0xc8,0x00, // vcmpeqps %ymm8,%ymm3,%ymm9
|
||||||
0xc4,0x62,0x7d,0x18,0x51,0x04, // vbroadcastss 0x4(%rcx), %ymm10
|
0xc4,0x62,0x7d,0x18,0x51,0x04, // vbroadcastss 0x4(%rcx),%ymm10
|
||||||
0xc5,0x2c,0x5e,0xd3, // vdivps %ymm3, %ymm10, %ymm10
|
0xc5,0x2c,0x5e,0xd3, // vdivps %ymm3,%ymm10,%ymm10
|
||||||
0xc4,0x43,0x2d,0x4a,0xc0,0x90, // vblendvps %ymm9, %ymm8, %ymm10, %ymm8
|
0xc4,0x43,0x2d,0x4a,0xc0,0x90, // vblendvps %ymm9,%ymm8,%ymm10,%ymm8
|
||||||
0xc5,0xbc,0x59,0xc0, // vmulps %ymm0, %ymm8, %ymm0
|
0xc5,0xbc,0x59,0xc0, // vmulps %ymm0,%ymm8,%ymm0
|
||||||
0xc5,0xbc,0x59,0xc9, // vmulps %ymm1, %ymm8, %ymm1
|
0xc5,0xbc,0x59,0xc9, // vmulps %ymm1,%ymm8,%ymm1
|
||||||
0xc5,0xbc,0x59,0xd2, // vmulps %ymm2, %ymm8, %ymm2
|
0xc5,0xbc,0x59,0xd2, // vmulps %ymm2,%ymm8,%ymm2
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_from_srgb[] = {
|
static const unsigned char kSplice_from_srgb[] = {
|
||||||
0xc4,0x62,0x7d,0x18,0x41,0x1c, // vbroadcastss 0x1c(%rcx), %ymm8
|
0xc4,0x62,0x7d,0x18,0x41,0x1c, // vbroadcastss 0x1c(%rcx),%ymm8
|
||||||
0xc5,0x3c,0x59,0xc8, // vmulps %ymm0, %ymm8, %ymm9
|
0xc5,0x3c,0x59,0xc8, // vmulps %ymm0,%ymm8,%ymm9
|
||||||
0xc5,0x7c,0x59,0xd0, // vmulps %ymm0, %ymm0, %ymm10
|
0xc5,0x7c,0x59,0xd0, // vmulps %ymm0,%ymm0,%ymm10
|
||||||
0xc4,0x62,0x7d,0x18,0x59,0x18, // vbroadcastss 0x18(%rcx), %ymm11
|
0xc4,0x62,0x7d,0x18,0x59,0x18, // vbroadcastss 0x18(%rcx),%ymm11
|
||||||
0xc4,0x62,0x7d,0x18,0x61,0x14, // vbroadcastss 0x14(%rcx), %ymm12
|
0xc4,0x62,0x7d,0x18,0x61,0x14, // vbroadcastss 0x14(%rcx),%ymm12
|
||||||
0xc4,0x41,0x7c,0x28,0xeb, // vmovaps %ymm11, %ymm13
|
0xc4,0x41,0x7c,0x28,0xeb, // vmovaps %ymm11,%ymm13
|
||||||
0xc4,0x42,0x7d,0xa8,0xec, // vfmadd213ps %ymm12, %ymm0, %ymm13
|
0xc4,0x42,0x7d,0xa8,0xec, // vfmadd213ps %ymm12,%ymm0,%ymm13
|
||||||
0xc4,0x62,0x7d,0x18,0x71,0x10, // vbroadcastss 0x10(%rcx), %ymm14
|
0xc4,0x62,0x7d,0x18,0x71,0x10, // vbroadcastss 0x10(%rcx),%ymm14
|
||||||
0xc4,0x42,0x2d,0xa8,0xee, // vfmadd213ps %ymm14, %ymm10, %ymm13
|
0xc4,0x42,0x2d,0xa8,0xee, // vfmadd213ps %ymm14,%ymm10,%ymm13
|
||||||
0xc4,0x62,0x7d,0x18,0x51,0x20, // vbroadcastss 0x20(%rcx), %ymm10
|
0xc4,0x62,0x7d,0x18,0x51,0x20, // vbroadcastss 0x20(%rcx),%ymm10
|
||||||
0xc4,0xc1,0x7c,0xc2,0xc2,0x01, // vcmpltps %ymm10, %ymm0, %ymm0
|
0xc4,0xc1,0x7c,0xc2,0xc2,0x01, // vcmpltps %ymm10,%ymm0,%ymm0
|
||||||
0xc4,0xc3,0x15,0x4a,0xc1,0x00, // vblendvps %ymm0, %ymm9, %ymm13, %ymm0
|
0xc4,0xc3,0x15,0x4a,0xc1,0x00, // vblendvps %ymm0,%ymm9,%ymm13,%ymm0
|
||||||
0xc5,0x3c,0x59,0xc9, // vmulps %ymm1, %ymm8, %ymm9
|
0xc5,0x3c,0x59,0xc9, // vmulps %ymm1,%ymm8,%ymm9
|
||||||
0xc5,0x74,0x59,0xe9, // vmulps %ymm1, %ymm1, %ymm13
|
0xc5,0x74,0x59,0xe9, // vmulps %ymm1,%ymm1,%ymm13
|
||||||
0xc4,0x41,0x7c,0x28,0xfb, // vmovaps %ymm11, %ymm15
|
0xc4,0x41,0x7c,0x28,0xfb, // vmovaps %ymm11,%ymm15
|
||||||
0xc4,0x42,0x75,0xa8,0xfc, // vfmadd213ps %ymm12, %ymm1, %ymm15
|
0xc4,0x42,0x75,0xa8,0xfc, // vfmadd213ps %ymm12,%ymm1,%ymm15
|
||||||
0xc4,0x42,0x15,0xa8,0xfe, // vfmadd213ps %ymm14, %ymm13, %ymm15
|
0xc4,0x42,0x15,0xa8,0xfe, // vfmadd213ps %ymm14,%ymm13,%ymm15
|
||||||
0xc4,0xc1,0x74,0xc2,0xca,0x01, // vcmpltps %ymm10, %ymm1, %ymm1
|
0xc4,0xc1,0x74,0xc2,0xca,0x01, // vcmpltps %ymm10,%ymm1,%ymm1
|
||||||
0xc4,0xc3,0x05,0x4a,0xc9,0x10, // vblendvps %ymm1, %ymm9, %ymm15, %ymm1
|
0xc4,0xc3,0x05,0x4a,0xc9,0x10, // vblendvps %ymm1,%ymm9,%ymm15,%ymm1
|
||||||
0xc5,0x3c,0x59,0xc2, // vmulps %ymm2, %ymm8, %ymm8
|
0xc5,0x3c,0x59,0xc2, // vmulps %ymm2,%ymm8,%ymm8
|
||||||
0xc5,0x6c,0x59,0xca, // vmulps %ymm2, %ymm2, %ymm9
|
0xc5,0x6c,0x59,0xca, // vmulps %ymm2,%ymm2,%ymm9
|
||||||
0xc4,0x42,0x6d,0xa8,0xdc, // vfmadd213ps %ymm12, %ymm2, %ymm11
|
0xc4,0x42,0x6d,0xa8,0xdc, // vfmadd213ps %ymm12,%ymm2,%ymm11
|
||||||
0xc4,0x42,0x35,0xa8,0xde, // vfmadd213ps %ymm14, %ymm9, %ymm11
|
0xc4,0x42,0x35,0xa8,0xde, // vfmadd213ps %ymm14,%ymm9,%ymm11
|
||||||
0xc4,0xc1,0x6c,0xc2,0xd2,0x01, // vcmpltps %ymm10, %ymm2, %ymm2
|
0xc4,0xc1,0x6c,0xc2,0xd2,0x01, // vcmpltps %ymm10,%ymm2,%ymm2
|
||||||
0xc4,0xc3,0x25,0x4a,0xd0,0x20, // vblendvps %ymm2, %ymm8, %ymm11, %ymm2
|
0xc4,0xc3,0x25,0x4a,0xd0,0x20, // vblendvps %ymm2,%ymm8,%ymm11,%ymm2
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_to_srgb[] = {
|
static const unsigned char kSplice_to_srgb[] = {
|
||||||
0xc5,0x7c,0x52,0xc0, // vrsqrtps %ymm0, %ymm8
|
0xc5,0x7c,0x52,0xc0, // vrsqrtps %ymm0,%ymm8
|
||||||
0xc4,0x41,0x7c,0x53,0xc8, // vrcpps %ymm8, %ymm9
|
0xc4,0x41,0x7c,0x53,0xc8, // vrcpps %ymm8,%ymm9
|
||||||
0xc4,0x41,0x7c,0x52,0xd0, // vrsqrtps %ymm8, %ymm10
|
0xc4,0x41,0x7c,0x52,0xd0, // vrsqrtps %ymm8,%ymm10
|
||||||
0xc4,0x62,0x7d,0x18,0x41,0x24, // vbroadcastss 0x24(%rcx), %ymm8
|
0xc4,0x62,0x7d,0x18,0x41,0x24, // vbroadcastss 0x24(%rcx),%ymm8
|
||||||
0xc5,0x3c,0x59,0xd8, // vmulps %ymm0, %ymm8, %ymm11
|
0xc5,0x3c,0x59,0xd8, // vmulps %ymm0,%ymm8,%ymm11
|
||||||
0xc4,0x62,0x7d,0x18,0x61,0x04, // vbroadcastss 0x4(%rcx), %ymm12
|
0xc4,0x62,0x7d,0x18,0x61,0x04, // vbroadcastss 0x4(%rcx),%ymm12
|
||||||
0xc4,0x62,0x7d,0x18,0x69,0x28, // vbroadcastss 0x28(%rcx), %ymm13
|
0xc4,0x62,0x7d,0x18,0x69,0x28, // vbroadcastss 0x28(%rcx),%ymm13
|
||||||
0xc4,0x62,0x7d,0x18,0x71,0x2c, // vbroadcastss 0x2c(%rcx), %ymm14
|
0xc4,0x62,0x7d,0x18,0x71,0x2c, // vbroadcastss 0x2c(%rcx),%ymm14
|
||||||
0xc4,0x62,0x7d,0x18,0x79,0x30, // vbroadcastss 0x30(%rcx), %ymm15
|
0xc4,0x62,0x7d,0x18,0x79,0x30, // vbroadcastss 0x30(%rcx),%ymm15
|
||||||
0xc4,0x42,0x0d,0xa8,0xcf, // vfmadd213ps %ymm15, %ymm14, %ymm9
|
0xc4,0x42,0x0d,0xa8,0xcf, // vfmadd213ps %ymm15,%ymm14,%ymm9
|
||||||
0xc4,0x42,0x15,0xb8,0xca, // vfmadd231ps %ymm10, %ymm13, %ymm9
|
0xc4,0x42,0x15,0xb8,0xca, // vfmadd231ps %ymm10,%ymm13,%ymm9
|
||||||
0xc4,0x41,0x1c,0x5d,0xc9, // vminps %ymm9, %ymm12, %ymm9
|
0xc4,0x41,0x1c,0x5d,0xc9, // vminps %ymm9,%ymm12,%ymm9
|
||||||
0xc4,0x62,0x7d,0x18,0x51,0x34, // vbroadcastss 0x34(%rcx), %ymm10
|
0xc4,0x62,0x7d,0x18,0x51,0x34, // vbroadcastss 0x34(%rcx),%ymm10
|
||||||
0xc4,0xc1,0x7c,0xc2,0xc2,0x01, // vcmpltps %ymm10, %ymm0, %ymm0
|
0xc4,0xc1,0x7c,0xc2,0xc2,0x01, // vcmpltps %ymm10,%ymm0,%ymm0
|
||||||
0xc4,0xc3,0x35,0x4a,0xc3,0x00, // vblendvps %ymm0, %ymm11, %ymm9, %ymm0
|
0xc4,0xc3,0x35,0x4a,0xc3,0x00, // vblendvps %ymm0,%ymm11,%ymm9,%ymm0
|
||||||
0xc5,0x7c,0x52,0xc9, // vrsqrtps %ymm1, %ymm9
|
0xc5,0x7c,0x52,0xc9, // vrsqrtps %ymm1,%ymm9
|
||||||
0xc4,0x41,0x7c,0x53,0xd9, // vrcpps %ymm9, %ymm11
|
0xc4,0x41,0x7c,0x53,0xd9, // vrcpps %ymm9,%ymm11
|
||||||
0xc4,0x41,0x7c,0x52,0xc9, // vrsqrtps %ymm9, %ymm9
|
0xc4,0x41,0x7c,0x52,0xc9, // vrsqrtps %ymm9,%ymm9
|
||||||
0xc4,0x42,0x0d,0xa8,0xdf, // vfmadd213ps %ymm15, %ymm14, %ymm11
|
0xc4,0x42,0x0d,0xa8,0xdf, // vfmadd213ps %ymm15,%ymm14,%ymm11
|
||||||
0xc4,0x42,0x15,0xb8,0xd9, // vfmadd231ps %ymm9, %ymm13, %ymm11
|
0xc4,0x42,0x15,0xb8,0xd9, // vfmadd231ps %ymm9,%ymm13,%ymm11
|
||||||
0xc5,0x3c,0x59,0xc9, // vmulps %ymm1, %ymm8, %ymm9
|
0xc5,0x3c,0x59,0xc9, // vmulps %ymm1,%ymm8,%ymm9
|
||||||
0xc4,0x41,0x1c,0x5d,0xdb, // vminps %ymm11, %ymm12, %ymm11
|
0xc4,0x41,0x1c,0x5d,0xdb, // vminps %ymm11,%ymm12,%ymm11
|
||||||
0xc4,0xc1,0x74,0xc2,0xca,0x01, // vcmpltps %ymm10, %ymm1, %ymm1
|
0xc4,0xc1,0x74,0xc2,0xca,0x01, // vcmpltps %ymm10,%ymm1,%ymm1
|
||||||
0xc4,0xc3,0x25,0x4a,0xc9,0x10, // vblendvps %ymm1, %ymm9, %ymm11, %ymm1
|
0xc4,0xc3,0x25,0x4a,0xc9,0x10, // vblendvps %ymm1,%ymm9,%ymm11,%ymm1
|
||||||
0xc5,0x7c,0x52,0xca, // vrsqrtps %ymm2, %ymm9
|
0xc5,0x7c,0x52,0xca, // vrsqrtps %ymm2,%ymm9
|
||||||
0xc4,0x41,0x7c,0x53,0xd9, // vrcpps %ymm9, %ymm11
|
0xc4,0x41,0x7c,0x53,0xd9, // vrcpps %ymm9,%ymm11
|
||||||
0xc4,0x42,0x0d,0xa8,0xdf, // vfmadd213ps %ymm15, %ymm14, %ymm11
|
0xc4,0x42,0x0d,0xa8,0xdf, // vfmadd213ps %ymm15,%ymm14,%ymm11
|
||||||
0xc4,0x41,0x7c,0x52,0xc9, // vrsqrtps %ymm9, %ymm9
|
0xc4,0x41,0x7c,0x52,0xc9, // vrsqrtps %ymm9,%ymm9
|
||||||
0xc4,0x42,0x15,0xb8,0xd9, // vfmadd231ps %ymm9, %ymm13, %ymm11
|
0xc4,0x42,0x15,0xb8,0xd9, // vfmadd231ps %ymm9,%ymm13,%ymm11
|
||||||
0xc4,0x41,0x1c,0x5d,0xcb, // vminps %ymm11, %ymm12, %ymm9
|
0xc4,0x41,0x1c,0x5d,0xcb, // vminps %ymm11,%ymm12,%ymm9
|
||||||
0xc5,0x3c,0x59,0xc2, // vmulps %ymm2, %ymm8, %ymm8
|
0xc5,0x3c,0x59,0xc2, // vmulps %ymm2,%ymm8,%ymm8
|
||||||
0xc4,0xc1,0x6c,0xc2,0xd2,0x01, // vcmpltps %ymm10, %ymm2, %ymm2
|
0xc4,0xc1,0x6c,0xc2,0xd2,0x01, // vcmpltps %ymm10,%ymm2,%ymm2
|
||||||
0xc4,0xc3,0x35,0x4a,0xd0,0x20, // vblendvps %ymm2, %ymm8, %ymm9, %ymm2
|
0xc4,0xc3,0x35,0x4a,0xd0,0x20, // vblendvps %ymm2,%ymm8,%ymm9,%ymm2
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_scale_u8[] = {
|
static const unsigned char kSplice_scale_u8[] = {
|
||||||
0x48,0x8b,0x02, // movq (%rdx), %rax
|
0x48,0x8b,0x02, // mov (%rdx),%rax
|
||||||
0xc4,0x62,0x7d,0x31,0x04,0x38, // vpmovzxbd (%rax,%rdi), %ymm8
|
0xc4,0x62,0x7d,0x31,0x04,0x38, // vpmovzxbd (%rax,%rdi,1),%ymm8
|
||||||
0xc4,0x41,0x7c,0x5b,0xc0, // vcvtdq2ps %ymm8, %ymm8
|
0xc4,0x41,0x7c,0x5b,0xc0, // vcvtdq2ps %ymm8,%ymm8
|
||||||
0xc4,0x62,0x7d,0x18,0x49,0x0c, // vbroadcastss 0xc(%rcx), %ymm9
|
0xc4,0x62,0x7d,0x18,0x49,0x0c, // vbroadcastss 0xc(%rcx),%ymm9
|
||||||
0xc4,0x41,0x3c,0x59,0xc1, // vmulps %ymm9, %ymm8, %ymm8
|
0xc4,0x41,0x3c,0x59,0xc1, // vmulps %ymm9,%ymm8,%ymm8
|
||||||
0xc5,0xbc,0x59,0xc0, // vmulps %ymm0, %ymm8, %ymm0
|
0xc5,0xbc,0x59,0xc0, // vmulps %ymm0,%ymm8,%ymm0
|
||||||
0xc5,0xbc,0x59,0xc9, // vmulps %ymm1, %ymm8, %ymm1
|
0xc5,0xbc,0x59,0xc9, // vmulps %ymm1,%ymm8,%ymm1
|
||||||
0xc5,0xbc,0x59,0xd2, // vmulps %ymm2, %ymm8, %ymm2
|
0xc5,0xbc,0x59,0xd2, // vmulps %ymm2,%ymm8,%ymm2
|
||||||
0xc5,0xbc,0x59,0xdb, // vmulps %ymm3, %ymm8, %ymm3
|
0xc5,0xbc,0x59,0xdb, // vmulps %ymm3,%ymm8,%ymm3
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_load_8888[] = {
|
static const unsigned char kSplice_load_8888[] = {
|
||||||
0x48,0x8b,0x02, // movq (%rdx), %rax
|
0x48,0x8b,0x02, // mov (%rdx),%rax
|
||||||
0xc5,0xfc,0x10,0x1c,0xb8, // vmovups (%rax,%rdi,4), %ymm3
|
0xc5,0xfc,0x10,0x1c,0xb8, // vmovups (%rax,%rdi,4),%ymm3
|
||||||
0xc4,0xe2,0x7d,0x18,0x11, // vbroadcastss (%rcx), %ymm2
|
0xc4,0xe2,0x7d,0x18,0x11, // vbroadcastss (%rcx),%ymm2
|
||||||
0xc5,0xec,0x54,0xc3, // vandps %ymm3, %ymm2, %ymm0
|
0xc5,0xec,0x54,0xc3, // vandps %ymm3,%ymm2,%ymm0
|
||||||
0xc5,0xfc,0x5b,0xc0, // vcvtdq2ps %ymm0, %ymm0
|
0xc5,0xfc,0x5b,0xc0, // vcvtdq2ps %ymm0,%ymm0
|
||||||
0xc4,0x62,0x7d,0x18,0x41,0x0c, // vbroadcastss 0xc(%rcx), %ymm8
|
0xc4,0x62,0x7d,0x18,0x41,0x0c, // vbroadcastss 0xc(%rcx),%ymm8
|
||||||
0xc5,0xbc,0x59,0xc0, // vmulps %ymm0, %ymm8, %ymm0
|
0xc5,0xbc,0x59,0xc0, // vmulps %ymm0,%ymm8,%ymm0
|
||||||
0xc5,0xf5,0x72,0xd3,0x08, // vpsrld $0x8, %ymm3, %ymm1
|
0xc5,0xf5,0x72,0xd3,0x08, // vpsrld $0x8,%ymm3,%ymm1
|
||||||
0xc5,0xec,0x54,0xc9, // vandps %ymm1, %ymm2, %ymm1
|
0xc5,0xec,0x54,0xc9, // vandps %ymm1,%ymm2,%ymm1
|
||||||
0xc5,0xfc,0x5b,0xc9, // vcvtdq2ps %ymm1, %ymm1
|
0xc5,0xfc,0x5b,0xc9, // vcvtdq2ps %ymm1,%ymm1
|
||||||
0xc5,0xbc,0x59,0xc9, // vmulps %ymm1, %ymm8, %ymm1
|
0xc5,0xbc,0x59,0xc9, // vmulps %ymm1,%ymm8,%ymm1
|
||||||
0xc5,0xb5,0x72,0xd3,0x10, // vpsrld $0x10, %ymm3, %ymm9
|
0xc5,0xb5,0x72,0xd3,0x10, // vpsrld $0x10,%ymm3,%ymm9
|
||||||
0xc4,0xc1,0x6c,0x54,0xd1, // vandps %ymm9, %ymm2, %ymm2
|
0xc4,0xc1,0x6c,0x54,0xd1, // vandps %ymm9,%ymm2,%ymm2
|
||||||
0xc5,0xfc,0x5b,0xd2, // vcvtdq2ps %ymm2, %ymm2
|
0xc5,0xfc,0x5b,0xd2, // vcvtdq2ps %ymm2,%ymm2
|
||||||
0xc5,0xbc,0x59,0xd2, // vmulps %ymm2, %ymm8, %ymm2
|
0xc5,0xbc,0x59,0xd2, // vmulps %ymm2,%ymm8,%ymm2
|
||||||
0xc5,0xe5,0x72,0xd3,0x18, // vpsrld $0x18, %ymm3, %ymm3
|
0xc5,0xe5,0x72,0xd3,0x18, // vpsrld $0x18,%ymm3,%ymm3
|
||||||
0xc5,0xfc,0x5b,0xdb, // vcvtdq2ps %ymm3, %ymm3
|
0xc5,0xfc,0x5b,0xdb, // vcvtdq2ps %ymm3,%ymm3
|
||||||
0xc4,0xc1,0x64,0x59,0xd8, // vmulps %ymm8, %ymm3, %ymm3
|
0xc4,0xc1,0x64,0x59,0xd8, // vmulps %ymm8,%ymm3,%ymm3
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_store_8888[] = {
|
static const unsigned char kSplice_store_8888[] = {
|
||||||
0x48,0x8b,0x02, // movq (%rdx), %rax
|
0x48,0x8b,0x02, // mov (%rdx),%rax
|
||||||
0xc4,0x62,0x7d,0x18,0x41,0x08, // vbroadcastss 0x8(%rcx), %ymm8
|
0xc4,0x62,0x7d,0x18,0x41,0x08, // vbroadcastss 0x8(%rcx),%ymm8
|
||||||
0xc5,0x3c,0x59,0xc8, // vmulps %ymm0, %ymm8, %ymm9
|
0xc5,0x3c,0x59,0xc8, // vmulps %ymm0,%ymm8,%ymm9
|
||||||
0xc4,0x41,0x7d,0x5b,0xc9, // vcvtps2dq %ymm9, %ymm9
|
0xc4,0x41,0x7d,0x5b,0xc9, // vcvtps2dq %ymm9,%ymm9
|
||||||
0xc5,0x3c,0x59,0xd1, // vmulps %ymm1, %ymm8, %ymm10
|
0xc5,0x3c,0x59,0xd1, // vmulps %ymm1,%ymm8,%ymm10
|
||||||
0xc4,0x41,0x7d,0x5b,0xd2, // vcvtps2dq %ymm10, %ymm10
|
0xc4,0x41,0x7d,0x5b,0xd2, // vcvtps2dq %ymm10,%ymm10
|
||||||
0xc4,0xc1,0x2d,0x72,0xf2,0x08, // vpslld $0x8, %ymm10, %ymm10
|
0xc4,0xc1,0x2d,0x72,0xf2,0x08, // vpslld $0x8,%ymm10,%ymm10
|
||||||
0xc4,0x41,0x2d,0xeb,0xc9, // vpor %ymm9, %ymm10, %ymm9
|
0xc4,0x41,0x2d,0xeb,0xc9, // vpor %ymm9,%ymm10,%ymm9
|
||||||
0xc5,0x3c,0x59,0xd2, // vmulps %ymm2, %ymm8, %ymm10
|
0xc5,0x3c,0x59,0xd2, // vmulps %ymm2,%ymm8,%ymm10
|
||||||
0xc4,0x41,0x7d,0x5b,0xd2, // vcvtps2dq %ymm10, %ymm10
|
0xc4,0x41,0x7d,0x5b,0xd2, // vcvtps2dq %ymm10,%ymm10
|
||||||
0xc4,0xc1,0x2d,0x72,0xf2,0x10, // vpslld $0x10, %ymm10, %ymm10
|
0xc4,0xc1,0x2d,0x72,0xf2,0x10, // vpslld $0x10,%ymm10,%ymm10
|
||||||
0xc5,0x3c,0x59,0xc3, // vmulps %ymm3, %ymm8, %ymm8
|
0xc5,0x3c,0x59,0xc3, // vmulps %ymm3,%ymm8,%ymm8
|
||||||
0xc4,0x41,0x7d,0x5b,0xc0, // vcvtps2dq %ymm8, %ymm8
|
0xc4,0x41,0x7d,0x5b,0xc0, // vcvtps2dq %ymm8,%ymm8
|
||||||
0xc4,0xc1,0x3d,0x72,0xf0,0x18, // vpslld $0x18, %ymm8, %ymm8
|
0xc4,0xc1,0x3d,0x72,0xf0,0x18, // vpslld $0x18,%ymm8,%ymm8
|
||||||
0xc4,0x41,0x2d,0xeb,0xc0, // vpor %ymm8, %ymm10, %ymm8
|
0xc4,0x41,0x2d,0xeb,0xc0, // vpor %ymm8,%ymm10,%ymm8
|
||||||
0xc4,0x41,0x35,0xeb,0xc0, // vpor %ymm8, %ymm9, %ymm8
|
0xc4,0x41,0x35,0xeb,0xc0, // vpor %ymm8,%ymm9,%ymm8
|
||||||
0xc5,0x7e,0x7f,0x04,0xb8, // vmovdqu %ymm8, (%rax,%rdi,4)
|
0xc5,0x7e,0x7f,0x04,0xb8, // vmovdqu %ymm8,(%rax,%rdi,4)
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_load_f16[] = {
|
static const unsigned char kSplice_load_f16[] = {
|
||||||
0x48,0x8b,0x02, // movq (%rdx), %rax
|
0x48,0x8b,0x02, // mov (%rdx),%rax
|
||||||
0xc5,0xfa,0x6f,0x04,0xf8, // vmovdqu (%rax,%rdi,8), %xmm0
|
0xc5,0xfa,0x6f,0x04,0xf8, // vmovdqu (%rax,%rdi,8),%xmm0
|
||||||
0xc5,0xfa,0x6f,0x4c,0xf8,0x10, // vmovdqu 0x10(%rax,%rdi,8), %xmm1
|
0xc5,0xfa,0x6f,0x4c,0xf8,0x10, // vmovdqu 0x10(%rax,%rdi,8),%xmm1
|
||||||
0xc5,0xfa,0x6f,0x54,0xf8,0x20, // vmovdqu 0x20(%rax,%rdi,8), %xmm2
|
0xc5,0xfa,0x6f,0x54,0xf8,0x20, // vmovdqu 0x20(%rax,%rdi,8),%xmm2
|
||||||
0xc5,0xfa,0x6f,0x5c,0xf8,0x30, // vmovdqu 0x30(%rax,%rdi,8), %xmm3
|
0xc5,0xfa,0x6f,0x5c,0xf8,0x30, // vmovdqu 0x30(%rax,%rdi,8),%xmm3
|
||||||
0xc5,0x79,0x61,0xc1, // vpunpcklwd %xmm1, %xmm0, %xmm8
|
0xc5,0x79,0x61,0xc1, // vpunpcklwd %xmm1,%xmm0,%xmm8
|
||||||
0xc5,0xf9,0x69,0xc1, // vpunpckhwd %xmm1, %xmm0, %xmm0
|
0xc5,0xf9,0x69,0xc1, // vpunpckhwd %xmm1,%xmm0,%xmm0
|
||||||
0xc5,0xe9,0x61,0xcb, // vpunpcklwd %xmm3, %xmm2, %xmm1
|
0xc5,0xe9,0x61,0xcb, // vpunpcklwd %xmm3,%xmm2,%xmm1
|
||||||
0xc5,0xe9,0x69,0xd3, // vpunpckhwd %xmm3, %xmm2, %xmm2
|
0xc5,0xe9,0x69,0xd3, // vpunpckhwd %xmm3,%xmm2,%xmm2
|
||||||
0xc5,0x39,0x61,0xc8, // vpunpcklwd %xmm0, %xmm8, %xmm9
|
0xc5,0x39,0x61,0xc8, // vpunpcklwd %xmm0,%xmm8,%xmm9
|
||||||
0xc5,0x39,0x69,0xc0, // vpunpckhwd %xmm0, %xmm8, %xmm8
|
0xc5,0x39,0x69,0xc0, // vpunpckhwd %xmm0,%xmm8,%xmm8
|
||||||
0xc5,0xf1,0x61,0xda, // vpunpcklwd %xmm2, %xmm1, %xmm3
|
0xc5,0xf1,0x61,0xda, // vpunpcklwd %xmm2,%xmm1,%xmm3
|
||||||
0xc5,0x71,0x69,0xd2, // vpunpckhwd %xmm2, %xmm1, %xmm10
|
0xc5,0x71,0x69,0xd2, // vpunpckhwd %xmm2,%xmm1,%xmm10
|
||||||
0xc5,0xb1,0x6c,0xc3, // vpunpcklqdq %xmm3, %xmm9, %xmm0
|
0xc5,0xb1,0x6c,0xc3, // vpunpcklqdq %xmm3,%xmm9,%xmm0
|
||||||
0xc4,0xe2,0x7d,0x13,0xc0, // vcvtph2ps %xmm0, %ymm0
|
0xc4,0xe2,0x7d,0x13,0xc0, // vcvtph2ps %xmm0,%ymm0
|
||||||
0xc5,0xb1,0x6d,0xcb, // vpunpckhqdq %xmm3, %xmm9, %xmm1
|
0xc5,0xb1,0x6d,0xcb, // vpunpckhqdq %xmm3,%xmm9,%xmm1
|
||||||
0xc4,0xe2,0x7d,0x13,0xc9, // vcvtph2ps %xmm1, %ymm1
|
0xc4,0xe2,0x7d,0x13,0xc9, // vcvtph2ps %xmm1,%ymm1
|
||||||
0xc4,0xc1,0x39,0x6c,0xd2, // vpunpcklqdq %xmm10, %xmm8, %xmm2
|
0xc4,0xc1,0x39,0x6c,0xd2, // vpunpcklqdq %xmm10,%xmm8,%xmm2
|
||||||
0xc4,0xe2,0x7d,0x13,0xd2, // vcvtph2ps %xmm2, %ymm2
|
0xc4,0xe2,0x7d,0x13,0xd2, // vcvtph2ps %xmm2,%ymm2
|
||||||
0xc4,0xc1,0x39,0x6d,0xda, // vpunpckhqdq %xmm10, %xmm8, %xmm3
|
0xc4,0xc1,0x39,0x6d,0xda, // vpunpckhqdq %xmm10,%xmm8,%xmm3
|
||||||
0xc4,0xe2,0x7d,0x13,0xdb, // vcvtph2ps %xmm3, %ymm3
|
0xc4,0xe2,0x7d,0x13,0xdb, // vcvtph2ps %xmm3,%ymm3
|
||||||
};
|
};
|
||||||
static const unsigned char kSplice_store_f16[] = {
|
static const unsigned char kSplice_store_f16[] = {
|
||||||
0x48,0x8b,0x02, // movq (%rdx), %rax
|
0x48,0x8b,0x02, // mov (%rdx),%rax
|
||||||
0xc4,0xc3,0x7d,0x1d,0xc0,0x04, // vcvtps2ph $0x4, %ymm0, %xmm8
|
0xc4,0xc3,0x7d,0x1d,0xc0,0x04, // vcvtps2ph $0x4,%ymm0,%xmm8
|
||||||
0xc4,0xc3,0x7d,0x1d,0xc9,0x04, // vcvtps2ph $0x4, %ymm1, %xmm9
|
0xc4,0xc3,0x7d,0x1d,0xc9,0x04, // vcvtps2ph $0x4,%ymm1,%xmm9
|
||||||
0xc4,0xc3,0x7d,0x1d,0xd2,0x04, // vcvtps2ph $0x4, %ymm2, %xmm10
|
0xc4,0xc3,0x7d,0x1d,0xd2,0x04, // vcvtps2ph $0x4,%ymm2,%xmm10
|
||||||
0xc4,0xc3,0x7d,0x1d,0xdb,0x04, // vcvtps2ph $0x4, %ymm3, %xmm11
|
0xc4,0xc3,0x7d,0x1d,0xdb,0x04, // vcvtps2ph $0x4,%ymm3,%xmm11
|
||||||
0xc4,0x41,0x39,0x61,0xe1, // vpunpcklwd %xmm9, %xmm8, %xmm12
|
0xc4,0x41,0x39,0x61,0xe1, // vpunpcklwd %xmm9,%xmm8,%xmm12
|
||||||
0xc4,0x41,0x39,0x69,0xc1, // vpunpckhwd %xmm9, %xmm8, %xmm8
|
0xc4,0x41,0x39,0x69,0xc1, // vpunpckhwd %xmm9,%xmm8,%xmm8
|
||||||
0xc4,0x41,0x29,0x61,0xcb, // vpunpcklwd %xmm11, %xmm10, %xmm9
|
0xc4,0x41,0x29,0x61,0xcb, // vpunpcklwd %xmm11,%xmm10,%xmm9
|
||||||
0xc4,0x41,0x29,0x69,0xd3, // vpunpckhwd %xmm11, %xmm10, %xmm10
|
0xc4,0x41,0x29,0x69,0xd3, // vpunpckhwd %xmm11,%xmm10,%xmm10
|
||||||
0xc4,0x41,0x19,0x62,0xd9, // vpunpckldq %xmm9, %xmm12, %xmm11
|
0xc4,0x41,0x19,0x62,0xd9, // vpunpckldq %xmm9,%xmm12,%xmm11
|
||||||
0xc5,0x7a,0x7f,0x1c,0xf8, // vmovdqu %xmm11, (%rax,%rdi,8)
|
0xc5,0x7a,0x7f,0x1c,0xf8, // vmovdqu %xmm11,(%rax,%rdi,8)
|
||||||
0xc4,0x41,0x19,0x6a,0xc9, // vpunpckhdq %xmm9, %xmm12, %xmm9
|
0xc4,0x41,0x19,0x6a,0xc9, // vpunpckhdq %xmm9,%xmm12,%xmm9
|
||||||
0xc5,0x7a,0x7f,0x4c,0xf8,0x10, // vmovdqu %xmm9, 0x10(%rax,%rdi,8)
|
0xc5,0x7a,0x7f,0x4c,0xf8,0x10, // vmovdqu %xmm9,0x10(%rax,%rdi,8)
|
||||||
0xc4,0x41,0x39,0x62,0xca, // vpunpckldq %xmm10, %xmm8, %xmm9
|
0xc4,0x41,0x39,0x62,0xca, // vpunpckldq %xmm10,%xmm8,%xmm9
|
||||||
0xc5,0x7a,0x7f,0x4c,0xf8,0x20, // vmovdqu %xmm9, 0x20(%rax,%rdi,8)
|
0xc5,0x7a,0x7f,0x4c,0xf8,0x20, // vmovdqu %xmm9,0x20(%rax,%rdi,8)
|
||||||
0xc4,0x41,0x39,0x6a,0xc2, // vpunpckhdq %xmm10, %xmm8, %xmm8
|
0xc4,0x41,0x39,0x6a,0xc2, // vpunpckhdq %xmm10,%xmm8,%xmm8
|
||||||
0xc5,0x7a,0x7f,0x44,0xf8,0x30, // vmovdqu %xmm8, 0x30(%rax,%rdi,8)
|
0xc5,0x7a,0x7f,0x44,0xf8,0x30, // vmovdqu %xmm8,0x30(%rax,%rdi,8)
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -25,6 +25,39 @@ subprocess.check_call(['clang++'] + cflags + aarch64 +
|
|||||||
['-c', 'src/splicer/SkSplicer_stages.cpp'] +
|
['-c', 'src/splicer/SkSplicer_stages.cpp'] +
|
||||||
['-o', 'aarch64.o'])
|
['-o', 'aarch64.o'])
|
||||||
|
|
||||||
|
def parse_object_file(dot_o, array_type, done):
|
||||||
|
for line in subprocess.check_output(['gobjdump', '-d', dot_o]).split('\n'):
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith(dot_o) or line.startswith('Disassembly'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# E.g. 00000000000003a4 <_load_f16>:
|
||||||
|
m = re.match('''................ <_?(.*)>:''', line)
|
||||||
|
if m:
|
||||||
|
print 'static const', array_type, 'kSplice_' + m.group(1) + '[] = {'
|
||||||
|
continue
|
||||||
|
|
||||||
|
columns = line.split('\t')
|
||||||
|
code = columns[1]
|
||||||
|
if len(columns) == 4:
|
||||||
|
inst = columns[2]
|
||||||
|
args = columns[3]
|
||||||
|
else:
|
||||||
|
inst, args = columns[2].split(' ', 1)
|
||||||
|
code, inst, args = code.strip(), inst.strip(), args.strip()
|
||||||
|
|
||||||
|
# We can't splice code that uses ip-relative addressing.
|
||||||
|
for arg in args:
|
||||||
|
assert 'rip' not in arg # TODO: detect on aarch64 too
|
||||||
|
|
||||||
|
if code == done:
|
||||||
|
print '};'
|
||||||
|
continue
|
||||||
|
|
||||||
|
hexed = ''.join('0x'+x+',' for x in code.split(' '))
|
||||||
|
print ' ' + hexed + ' '*(44-len(hexed)) + \
|
||||||
|
'// ' + inst + ' '*(14-len(inst)) + args
|
||||||
|
|
||||||
print '''/*
|
print '''/*
|
||||||
* Copyright 2017 Google Inc.
|
* Copyright 2017 Google Inc.
|
||||||
*
|
*
|
||||||
@ -40,74 +73,8 @@ print '''/*
|
|||||||
|
|
||||||
#if defined(__aarch64__)
|
#if defined(__aarch64__)
|
||||||
'''
|
'''
|
||||||
for line in subprocess.check_output(['gobjdump', '-d',
|
parse_object_file('aarch64.o', 'unsigned int', '14000000')
|
||||||
'aarch64.o']).split('\n'):
|
print '\n#else\n'
|
||||||
line = line.strip()
|
parse_object_file('hsw.o', 'unsigned char', 'e9 00 00 00 00')
|
||||||
if not line or line.startswith('aarch64.o') or line.startswith('Disassembly'):
|
print '\n#endif\n'
|
||||||
continue
|
print '#endif//SkSplicer_generated_DEFINED'
|
||||||
|
|
||||||
m = re.match('''................ <(.*)>:''', line)
|
|
||||||
if m:
|
|
||||||
print 'static const unsigned int kSplice_' + m.group(1) + '[] = {'
|
|
||||||
continue
|
|
||||||
|
|
||||||
_, code, inst, args = line.split('\t')
|
|
||||||
code = code.strip()
|
|
||||||
|
|
||||||
# b done, where done has not yet been filled in by the linker.
|
|
||||||
if code == '14000000':
|
|
||||||
print '};'
|
|
||||||
continue
|
|
||||||
print ' ', '0x'+code+',' + ' // ' + inst + ' ' + args
|
|
||||||
|
|
||||||
print '''
|
|
||||||
#else
|
|
||||||
'''
|
|
||||||
|
|
||||||
# TODO: port this to gobjdump too
|
|
||||||
for line in subprocess.check_output(['otool', '-tvj', 'hsw.o']).split('\n'):
|
|
||||||
line = line.strip()
|
|
||||||
if line == '' or line == 'hsw.o:' or line == '(__TEXT,__text) section':
|
|
||||||
continue
|
|
||||||
|
|
||||||
m = re.match('_(.*):', line)
|
|
||||||
if m:
|
|
||||||
name = m.group(1)
|
|
||||||
print 'static const unsigned char kSplice_' + m.group(1) + '[] = {'
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Skip the leading 16 byte address and a tab,
|
|
||||||
# leaving the code, instruction mnemonic, and its arguments.
|
|
||||||
line = line[17:]
|
|
||||||
columns = line.split('\t')
|
|
||||||
code = columns[0].strip()
|
|
||||||
inst = columns[1]
|
|
||||||
args = columns[2:]
|
|
||||||
|
|
||||||
# We can't splice code that uses rip relative addressing.
|
|
||||||
for arg in args:
|
|
||||||
assert 'rip' not in arg
|
|
||||||
|
|
||||||
# jmp done, the end of each stage (the address of done is not yet filled in)
|
|
||||||
if code == 'e9 00 00 00 00':
|
|
||||||
print '};'
|
|
||||||
continue
|
|
||||||
|
|
||||||
sys.stdout.write(' ')
|
|
||||||
_bytes = code.split(' ')
|
|
||||||
# This is the meat of things: copy the code to a C unsigned char array.
|
|
||||||
for byte in _bytes:
|
|
||||||
sys.stdout.write('0x' + byte + ',')
|
|
||||||
# From here on we're just making the generated file readable and pretty.
|
|
||||||
sys.stdout.write(' ' * (44 - 5*len(_bytes)))
|
|
||||||
sys.stdout.write('// ' + inst)
|
|
||||||
if args:
|
|
||||||
sys.stdout.write(' ' * (13 - len(inst)))
|
|
||||||
sys.stdout.write(' '.join(args))
|
|
||||||
sys.stdout.write('\n')
|
|
||||||
|
|
||||||
print '''
|
|
||||||
#endif
|
|
||||||
'''
|
|
||||||
|
|
||||||
print '''#endif//SkSplicer_generated_DEFINED'''
|
|
||||||
|
Loading…
Reference in New Issue
Block a user