Revert "Generate sse2/sse4.1 splices, use them."
This reverts commit 1fe55dc9fa
.
Reason for revert: breaking Win GPU bots?
Original change's description:
> Generate sse2/sse4.1 splices, use them.
>
> While we're at it, tidy up build_stages.py a bit.
> Redirecting stdout seems a lot easier than print >>f all over the place.
>
> TODO: non-VEX-encoded before_loop() and after_loop()
>
> CQ_INCLUDE_TRYBOTS=skia.primary:Test-Win2k8-MSVC-GCE-CPU-AVX2-x86_64-Debug
>
> Change-Id: I3f38e55f081670dd598c6050435466d9f394e5be
> Reviewed-on: https://skia-review.googlesource.com/8230
> Commit-Queue: Mike Klein <mtklein@chromium.org>
> Reviewed-by: Herb Derby <herb@google.com>
>
TBR=mtklein@chromium.org,herb@google.com
NOPRESUBMIT=true
NOTREECHECKS=true
NOTRY=true
CQ_INCLUDE_TRYBOTS=skia.primary:Test-Win2k8-MSVC-GCE-CPU-AVX2-x86_64-Debug
Change-Id: Iba1905c54cb2dc42a880b9e6a8093835ffd530a2
Reviewed-on: https://skia-review.googlesource.com/8347
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
3c727d2386
commit
f8866c5b3a
@ -112,6 +112,12 @@ namespace {
|
||||
splice(buf, jb_near); // jb <next 4 bytes> (b == "before", unsigned less than)
|
||||
splice(buf, loop_start - (int)(buf->bytesWritten() + 4));
|
||||
}
|
||||
static void ret(SkWStream* buf) {
|
||||
static const uint8_t vzeroupper[] = { 0xc5, 0xf8, 0x77 };
|
||||
static const uint8_t ret[] = { 0xc3 };
|
||||
splice(buf, vzeroupper);
|
||||
splice(buf, ret);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
@ -121,16 +127,16 @@ namespace {
|
||||
0x56, // push %rsi
|
||||
0x57, // push %rdi
|
||||
0x48,0x81,0xec,0xa8,0x00,0x00,0x00, // sub $0xa8,%rsp
|
||||
0x44,0x0f,0x29,0xbc,0x24,0x90,0x00,0x00,0x00, // movaps %xmm15,0x90(%rsp)
|
||||
0x44,0x0f,0x29,0xb4,0x24,0x80,0x00,0x00,0x00, // movaps %xmm14,0x80(%rsp)
|
||||
0x44,0x0f,0x29,0x6c,0x24,0x70, // movaps %xmm13,0x70(%rsp)
|
||||
0x44,0x0f,0x29,0x64,0x24,0x60, // movaps %xmm12,0x60(%rsp)
|
||||
0x44,0x0f,0x29,0x5c,0x24,0x50, // movaps %xmm11,0x50(%rsp)
|
||||
0x44,0x0f,0x29,0x54,0x24,0x40, // movaps %xmm10,0x40(%rsp)
|
||||
0x44,0x0f,0x29,0x4c,0x24,0x30, // movaps %xmm9,0x30(%rsp)
|
||||
0x44,0x0f,0x29,0x44,0x24,0x20, // movaps %xmm8,0x20(%rsp)
|
||||
0x0f,0x29,0x7c,0x24,0x10, // movaps %xmm7,0x10(%rsp)
|
||||
0x0f,0x29,0x34,0x24, // movaps %xmm6,(%rsp)
|
||||
0xc5,0x78,0x29,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps %xmm15,0x90(%rsp)
|
||||
0xc5,0x78,0x29,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps %xmm14,0x80(%rsp)
|
||||
0xc5,0x78,0x29,0x6c,0x24,0x70, // vmovaps %xmm13,0x70(%rsp)
|
||||
0xc5,0x78,0x29,0x64,0x24,0x60, // vmovaps %xmm12,0x60(%rsp)
|
||||
0xc5,0x78,0x29,0x5c,0x24,0x50, // vmovaps %xmm11,0x50(%rsp)
|
||||
0xc5,0x78,0x29,0x54,0x24,0x40, // vmovaps %xmm10,0x40(%rsp)
|
||||
0xc5,0x78,0x29,0x4c,0x24,0x30, // vmovaps %xmm9,0x30(%rsp)
|
||||
0xc5,0x78,0x29,0x44,0x24,0x20, // vmovaps %xmm8,0x20(%rsp)
|
||||
0xc5,0xf8,0x29,0x7c,0x24,0x10, // vmovaps %xmm7,0x10(%rsp)
|
||||
0xc5,0xf8,0x29,0x34,0x24, // vmovaps %xmm6,(%rsp)
|
||||
0x48,0x89,0xcf, // mov %rcx,%rdi
|
||||
0x48,0x89,0xd6, // mov %rdx,%rsi
|
||||
0x4c,0x89,0xc2, // mov %r8,%rdx
|
||||
@ -140,17 +146,16 @@ namespace {
|
||||
}
|
||||
static void after_loop(SkWStream* buf) {
|
||||
static const uint8_t system_v_to_ms[] = {
|
||||
// TODO: vzeroupper here?
|
||||
0x0f,0x28,0x34,0x24, // movaps (%rsp),%xmm6
|
||||
0x0f,0x28,0x7c,0x24,0x10, // movaps 0x10(%rsp),%xmm7
|
||||
0x44,0x0f,0x28,0x44,0x24,0x20, // movaps 0x20(%rsp),%xmm8
|
||||
0x44,0x0f,0x28,0x4c,0x24,0x30, // movaps 0x30(%rsp),%xmm9
|
||||
0x44,0x0f,0x28,0x54,0x24,0x40, // movaps 0x40(%rsp),%xmm10
|
||||
0x44,0x0f,0x28,0x5c,0x24,0x50, // movaps 0x50(%rsp),%xmm11
|
||||
0x44,0x0f,0x28,0x64,0x24,0x60, // movaps 0x60(%rsp),%xmm12
|
||||
0x44,0x0f,0x28,0x6c,0x24,0x70, // movaps 0x70(%rsp),%xmm13
|
||||
0x44,0x0f,0x28,0xb4,0x24,0x80,0x00,0x00,0x00, // movaps 0x80(%rsp),%xmm14
|
||||
0x44,0x0f,0x28,0xbc,0x24,0x90,0x00,0x00,0x00, // movaps 0x90(%rsp),%xmm15
|
||||
0xc5,0xf8,0x28,0x34,0x24, // vmovaps (%rsp),%xmm6
|
||||
0xc5,0xf8,0x28,0x7c,0x24,0x10, // vmovaps 0x10(%rsp),%xmm7
|
||||
0xc5,0x78,0x28,0x44,0x24,0x20, // vmovaps 0x20(%rsp),%xmm8
|
||||
0xc5,0x78,0x28,0x4c,0x24,0x30, // vmovaps 0x30(%rsp),%xmm9
|
||||
0xc5,0x78,0x28,0x54,0x24,0x40, // vmovaps 0x40(%rsp),%xmm10
|
||||
0xc5,0x78,0x28,0x5c,0x24,0x50, // vmovaps 0x50(%rsp),%xmm11
|
||||
0xc5,0x78,0x28,0x64,0x24,0x60, // vmovaps 0x60(%rsp),%xmm12
|
||||
0xc5,0x78,0x28,0x6c,0x24,0x70, // vmovaps 0x70(%rsp),%xmm13
|
||||
0xc5,0x78,0x28,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps 0x80(%rsp),%xmm14
|
||||
0xc5,0x78,0x28,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps 0x90(%rsp),%xmm15
|
||||
0x48,0x81,0xc4,0xa8,0x00,0x00,0x00, // add $0xa8,%rsp
|
||||
0x5f, // pop %rdi
|
||||
0x5e, // pop %rsi
|
||||
@ -236,46 +241,35 @@ namespace {
|
||||
}
|
||||
#endif
|
||||
|
||||
#define CASE(prefix, st) case SkRasterPipeline::st: splice_until_ret(buf, prefix##_##st); break
|
||||
#define DEFINE_SPLICE(prefix) \
|
||||
static bool prefix##_##splice(SkWStream* buf, SkRasterPipeline::StockStage st) { \
|
||||
switch (st) { \
|
||||
default: return false; \
|
||||
CASE(prefix, clear); \
|
||||
CASE(prefix, plus_); \
|
||||
CASE(prefix, srcover); \
|
||||
CASE(prefix, dstover); \
|
||||
CASE(prefix, clamp_0); \
|
||||
CASE(prefix, clamp_1); \
|
||||
CASE(prefix, clamp_a); \
|
||||
CASE(prefix, swap); \
|
||||
CASE(prefix, move_src_dst); \
|
||||
CASE(prefix, move_dst_src); \
|
||||
CASE(prefix, premul); \
|
||||
CASE(prefix, unpremul); \
|
||||
CASE(prefix, from_srgb); \
|
||||
CASE(prefix, to_srgb); \
|
||||
CASE(prefix, scale_u8); \
|
||||
CASE(prefix, load_tables); \
|
||||
CASE(prefix, load_8888); \
|
||||
CASE(prefix, store_8888); \
|
||||
CASE(prefix, load_f16); \
|
||||
CASE(prefix, store_f16); \
|
||||
CASE(prefix, matrix_3x4); \
|
||||
} \
|
||||
return true; \
|
||||
static bool splice(SkWStream* buf, SkRasterPipeline::StockStage st) {
|
||||
switch (st) {
|
||||
default: return false;
|
||||
#define CASE(st) case SkRasterPipeline::st: splice_until_ret(buf, kSplice_##st); break
|
||||
CASE(clear);
|
||||
CASE(plus_);
|
||||
CASE(srcover);
|
||||
CASE(dstover);
|
||||
CASE(clamp_0);
|
||||
CASE(clamp_1);
|
||||
CASE(clamp_a);
|
||||
CASE(swap);
|
||||
CASE(move_src_dst);
|
||||
CASE(move_dst_src);
|
||||
CASE(premul);
|
||||
CASE(unpremul);
|
||||
CASE(from_srgb);
|
||||
CASE(to_srgb);
|
||||
CASE(scale_u8);
|
||||
CASE(load_tables);
|
||||
CASE(load_8888);
|
||||
CASE(store_8888);
|
||||
CASE(load_f16);
|
||||
CASE(store_f16);
|
||||
CASE(matrix_3x4);
|
||||
#undef CASE
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#if defined(__aarch64__)
|
||||
DEFINE_SPLICE(aarch64)
|
||||
#elif defined(__ARM_NEON__)
|
||||
DEFINE_SPLICE(armv7)
|
||||
#else
|
||||
DEFINE_SPLICE(sse2)
|
||||
DEFINE_SPLICE(sse41)
|
||||
DEFINE_SPLICE(hsw)
|
||||
#endif
|
||||
#undef DEFINE_SPLICE
|
||||
#undef CASE
|
||||
|
||||
struct Spliced {
|
||||
|
||||
@ -288,46 +282,17 @@ namespace {
|
||||
fSpliced = nullptr;
|
||||
// If we return early anywhere in here, !fSpliced means we'll use fBackup instead.
|
||||
|
||||
|
||||
#if defined(__aarch64__)
|
||||
auto splice_stage = [](SkWStream* buf, SkRasterPipeline::StockStage st) {
|
||||
return aarch64_splice(buf, st);
|
||||
};
|
||||
auto inc_x = [](SkWStream* buf) { splice_until_ret(buf, aarch64_inc_x); };
|
||||
#elif defined(__ARM_NEON__)
|
||||
// Late generation ARMv7, e.g. Cortex A15 or Krait.
|
||||
if (!SkCpu::Supports(SkCpu::NEON|SkCpu::NEON_FMA|SkCpu::VFP_FP16)) {
|
||||
return;
|
||||
}
|
||||
auto splice_stage = [](SkWStream* buf, SkRasterPipeline::StockStage st) {
|
||||
return armv7_splice(buf, st);
|
||||
};
|
||||
auto inc_x = [](SkWStream* buf) { splice_until_ret(buf, armv7_inc_x); };
|
||||
#else
|
||||
// To keep things simple, only x86-64 for now.
|
||||
if (sizeof(void*) != 8) {
|
||||
// To keep things simple, only one x86 target supported: Haswell+ x86-64.
|
||||
if (!SkCpu::Supports(SkCpu::HSW) || sizeof(void*) != 8) {
|
||||
return;
|
||||
}
|
||||
bool hsw = true && SkCpu::Supports(SkCpu::HSW),
|
||||
sse41 = true && SkCpu::Supports(SkCpu::SSE41);
|
||||
auto splice_stage = [&](SkWStream* buf, SkRasterPipeline::StockStage st) {
|
||||
if ( hsw) { return hsw_splice(buf, st); }
|
||||
if (sse41) { return sse41_splice(buf, st); }
|
||||
return sse2_splice(buf, st);
|
||||
};
|
||||
auto inc_x = [&](SkWStream* buf) {
|
||||
if ( hsw) { splice_until_ret(buf, hsw_inc_x); return; }
|
||||
if (sse41) { splice_until_ret(buf, sse41_inc_x); return; }
|
||||
splice_until_ret(buf, sse2_inc_x);
|
||||
};
|
||||
auto ret = [&](SkWStream* buf) {
|
||||
static const uint8_t vzeroupper[] = { 0xc5, 0xf8, 0x77 };
|
||||
static const uint8_t ret[] = { 0xc3 };
|
||||
if (hsw) {
|
||||
splice(buf, vzeroupper);
|
||||
}
|
||||
splice(buf, ret);
|
||||
};
|
||||
#endif
|
||||
|
||||
SkDynamicMemoryWStream buf;
|
||||
@ -347,13 +312,13 @@ namespace {
|
||||
}
|
||||
|
||||
// Splice in the code for the Stages, generated offline into SkSplicer_generated.h.
|
||||
if (!splice_stage(&buf, stages[i].stage)) {
|
||||
if (!splice(&buf, stages[i].stage)) {
|
||||
//SkDebugf("SkSplicer can't yet handle stage %d.\n", stages[i].stage);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
inc_x(&buf);
|
||||
splice_until_ret(&buf, kSplice_inc_x);
|
||||
loop(&buf, loop_start); // Loop back to handle more pixels if not done.
|
||||
after_loop(&buf);
|
||||
ret(&buf); // We're done.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -9,8 +9,6 @@ import re
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
sys.stdout = open('src/splicer/SkSplicer_generated.h', 'w')
|
||||
|
||||
ndk = '/Users/mtklein/brew/opt/android-ndk/'
|
||||
objdump = 'gobjdump'
|
||||
|
||||
@ -53,9 +51,8 @@ subprocess.check_call(['clang++'] + cflags + armv7 +
|
||||
['-c', 'src/splicer/SkSplicer_stages.cpp'] +
|
||||
['-o', 'armv7.o'])
|
||||
|
||||
def parse_object_file(dot_o, array_type, jump, ret, target=None):
|
||||
prefix = dot_o.replace('.o', '_')
|
||||
cmd = [ objdump, '-d', '--insn-width=8', dot_o]
|
||||
def parse_object_file(dst, dot_o, array_type, jump, ret, target=None):
|
||||
cmd = [ objdump, '-d', dot_o]
|
||||
if target:
|
||||
cmd += ['--target', target]
|
||||
for line in subprocess.check_output(cmd).split('\n'):
|
||||
@ -66,7 +63,7 @@ def parse_object_file(dot_o, array_type, jump, ret, target=None):
|
||||
# E.g. 00000000000003a4 <_load_f16>:
|
||||
m = re.match('''[0-9a-f]+ <_?(.*)>:''', line)
|
||||
if m:
|
||||
print 'static const', array_type, prefix + m.group(1) + '[] = {'
|
||||
print >>dst,'static const', array_type, 'kSplice_' + m.group(1) + '[] = {'
|
||||
continue
|
||||
|
||||
columns = line.split('\t')
|
||||
@ -87,15 +84,16 @@ def parse_object_file(dot_o, array_type, jump, ret, target=None):
|
||||
if code == jump:
|
||||
code = ret
|
||||
inst = 'return'
|
||||
args = ''
|
||||
args = '(synthetic)'
|
||||
|
||||
hexed = ''.join('0x'+x+',' for x in code.split(' '))
|
||||
print ' ' + hexed + ' '*(44-len(hexed)) + \
|
||||
'// ' + inst + (' '*(14-len(inst)) + args if args else '')
|
||||
print >>dst,' ' + hexed + ' '*(44-len(hexed)) + \
|
||||
'// ' + inst + ' '*(14-len(inst)) + args
|
||||
if code == ret:
|
||||
print '};'
|
||||
print >>dst,'};'
|
||||
|
||||
print '''/*
|
||||
with open('src/splicer/SkSplicer_generated.h', 'w') as f:
|
||||
print >>f,'''/*
|
||||
* Copyright 2017 Google Inc.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license that can be
|
||||
@ -107,11 +105,14 @@ print '''/*
|
||||
|
||||
// This file is generated semi-automatically with this command:
|
||||
// $ src/splicer/build_stages.py
|
||||
|
||||
#if defined(__aarch64__)
|
||||
'''
|
||||
parse_object_file('aarch64.o', 'unsigned int', '14000000', 'd65f03c0')
|
||||
parse_object_file( 'armv7.o', 'unsigned int', 'eafffffe', 'e12fff1e',
|
||||
parse_object_file(f, 'aarch64.o', 'unsigned int', '14000000', 'd65f03c0')
|
||||
print >>f,'\n#elif defined(__ARM_NEON__)\n'
|
||||
parse_object_file(f, 'armv7.o', 'unsigned int', 'eafffffe', 'e12fff1e',
|
||||
target='elf32-littlearm')
|
||||
parse_object_file( 'sse2.o', 'unsigned char', 'e9 00 00 00 00', 'c3')
|
||||
parse_object_file('sse41.o', 'unsigned char', 'e9 00 00 00 00', 'c3')
|
||||
parse_object_file( 'hsw.o', 'unsigned char', 'e9 00 00 00 00', 'c3')
|
||||
print '#endif//SkSplicer_generated_DEFINED'
|
||||
print >>f,'\n#else\n'
|
||||
parse_object_file(f, 'hsw.o', 'unsigned char', 'e9 00 00 00 00', 'c3')
|
||||
print >>f,'\n#endif\n'
|
||||
print >>f,'#endif//SkSplicer_generated_DEFINED'
|
||||
|
Loading…
Reference in New Issue
Block a user