pretty up some SkSplicer code

- build_stages.py uses gobjdump for both architectures, minor formatting changes
  - let byte arrays be written inline in splice()

Change-Id: I84bd47c18e5ae0b34b35f8c2f0a329fb1ea58f60
Reviewed-on: https://skia-review.googlesource.com/6833
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
Mike Klein 2017-01-10 16:11:15 -05:00 committed by Skia Commit-Bot
parent 3444cadedd
commit fb00390b46
2 changed files with 474 additions and 507 deletions

View File

@ -437,8 +437,8 @@ static const unsigned char kSplice_to_srgb[] = {
0xc4,0xc3,0x35,0x4a,0xd0,0x20, // vblendvps %ymm2,%ymm8,%ymm9,%ymm2 0xc4,0xc3,0x35,0x4a,0xd0,0x20, // vblendvps %ymm2,%ymm8,%ymm9,%ymm2
}; };
static const unsigned char kSplice_scale_u8[] = { static const unsigned char kSplice_scale_u8[] = {
0x48,0x8b,0x02, // movq (%rdx), %rax 0x48,0x8b,0x02, // mov (%rdx),%rax
0xc4,0x62,0x7d,0x31,0x04,0x38, // vpmovzxbd (%rax,%rdi), %ymm8 0xc4,0x62,0x7d,0x31,0x04,0x38, // vpmovzxbd (%rax,%rdi,1),%ymm8
0xc4,0x41,0x7c,0x5b,0xc0, // vcvtdq2ps %ymm8,%ymm8 0xc4,0x41,0x7c,0x5b,0xc0, // vcvtdq2ps %ymm8,%ymm8
0xc4,0x62,0x7d,0x18,0x49,0x0c, // vbroadcastss 0xc(%rcx),%ymm9 0xc4,0x62,0x7d,0x18,0x49,0x0c, // vbroadcastss 0xc(%rcx),%ymm9
0xc4,0x41,0x3c,0x59,0xc1, // vmulps %ymm9,%ymm8,%ymm8 0xc4,0x41,0x3c,0x59,0xc1, // vmulps %ymm9,%ymm8,%ymm8
@ -448,7 +448,7 @@ static const unsigned char kSplice_scale_u8[] = {
0xc5,0xbc,0x59,0xdb, // vmulps %ymm3,%ymm8,%ymm3 0xc5,0xbc,0x59,0xdb, // vmulps %ymm3,%ymm8,%ymm3
}; };
static const unsigned char kSplice_load_8888[] = { static const unsigned char kSplice_load_8888[] = {
0x48,0x8b,0x02, // movq (%rdx), %rax 0x48,0x8b,0x02, // mov (%rdx),%rax
0xc5,0xfc,0x10,0x1c,0xb8, // vmovups (%rax,%rdi,4),%ymm3 0xc5,0xfc,0x10,0x1c,0xb8, // vmovups (%rax,%rdi,4),%ymm3
0xc4,0xe2,0x7d,0x18,0x11, // vbroadcastss (%rcx),%ymm2 0xc4,0xe2,0x7d,0x18,0x11, // vbroadcastss (%rcx),%ymm2
0xc5,0xec,0x54,0xc3, // vandps %ymm3,%ymm2,%ymm0 0xc5,0xec,0x54,0xc3, // vandps %ymm3,%ymm2,%ymm0
@ -468,7 +468,7 @@ static const unsigned char kSplice_load_8888[] = {
0xc4,0xc1,0x64,0x59,0xd8, // vmulps %ymm8,%ymm3,%ymm3 0xc4,0xc1,0x64,0x59,0xd8, // vmulps %ymm8,%ymm3,%ymm3
}; };
static const unsigned char kSplice_store_8888[] = { static const unsigned char kSplice_store_8888[] = {
0x48,0x8b,0x02, // movq (%rdx), %rax 0x48,0x8b,0x02, // mov (%rdx),%rax
0xc4,0x62,0x7d,0x18,0x41,0x08, // vbroadcastss 0x8(%rcx),%ymm8 0xc4,0x62,0x7d,0x18,0x41,0x08, // vbroadcastss 0x8(%rcx),%ymm8
0xc5,0x3c,0x59,0xc8, // vmulps %ymm0,%ymm8,%ymm9 0xc5,0x3c,0x59,0xc8, // vmulps %ymm0,%ymm8,%ymm9
0xc4,0x41,0x7d,0x5b,0xc9, // vcvtps2dq %ymm9,%ymm9 0xc4,0x41,0x7d,0x5b,0xc9, // vcvtps2dq %ymm9,%ymm9
@ -487,7 +487,7 @@ static const unsigned char kSplice_store_8888[] = {
0xc5,0x7e,0x7f,0x04,0xb8, // vmovdqu %ymm8,(%rax,%rdi,4) 0xc5,0x7e,0x7f,0x04,0xb8, // vmovdqu %ymm8,(%rax,%rdi,4)
}; };
static const unsigned char kSplice_load_f16[] = { static const unsigned char kSplice_load_f16[] = {
0x48,0x8b,0x02, // movq (%rdx), %rax 0x48,0x8b,0x02, // mov (%rdx),%rax
0xc5,0xfa,0x6f,0x04,0xf8, // vmovdqu (%rax,%rdi,8),%xmm0 0xc5,0xfa,0x6f,0x04,0xf8, // vmovdqu (%rax,%rdi,8),%xmm0
0xc5,0xfa,0x6f,0x4c,0xf8,0x10, // vmovdqu 0x10(%rax,%rdi,8),%xmm1 0xc5,0xfa,0x6f,0x4c,0xf8,0x10, // vmovdqu 0x10(%rax,%rdi,8),%xmm1
0xc5,0xfa,0x6f,0x54,0xf8,0x20, // vmovdqu 0x20(%rax,%rdi,8),%xmm2 0xc5,0xfa,0x6f,0x54,0xf8,0x20, // vmovdqu 0x20(%rax,%rdi,8),%xmm2
@ -510,7 +510,7 @@ static const unsigned char kSplice_load_f16[] = {
0xc4,0xe2,0x7d,0x13,0xdb, // vcvtph2ps %xmm3,%ymm3 0xc4,0xe2,0x7d,0x13,0xdb, // vcvtph2ps %xmm3,%ymm3
}; };
static const unsigned char kSplice_store_f16[] = { static const unsigned char kSplice_store_f16[] = {
0x48,0x8b,0x02, // movq (%rdx), %rax 0x48,0x8b,0x02, // mov (%rdx),%rax
0xc4,0xc3,0x7d,0x1d,0xc0,0x04, // vcvtps2ph $0x4,%ymm0,%xmm8 0xc4,0xc3,0x7d,0x1d,0xc0,0x04, // vcvtps2ph $0x4,%ymm0,%xmm8
0xc4,0xc3,0x7d,0x1d,0xc9,0x04, // vcvtps2ph $0x4,%ymm1,%xmm9 0xc4,0xc3,0x7d,0x1d,0xc9,0x04, // vcvtps2ph $0x4,%ymm1,%xmm9
0xc4,0xc3,0x7d,0x1d,0xd2,0x04, // vcvtps2ph $0x4,%ymm2,%xmm10 0xc4,0xc3,0x7d,0x1d,0xd2,0x04, // vcvtps2ph $0x4,%ymm2,%xmm10

View File

@ -25,6 +25,39 @@ subprocess.check_call(['clang++'] + cflags + aarch64 +
['-c', 'src/splicer/SkSplicer_stages.cpp'] + ['-c', 'src/splicer/SkSplicer_stages.cpp'] +
['-o', 'aarch64.o']) ['-o', 'aarch64.o'])
def parse_object_file(dot_o, array_type, done):
for line in subprocess.check_output(['gobjdump', '-d', dot_o]).split('\n'):
line = line.strip()
if not line or line.startswith(dot_o) or line.startswith('Disassembly'):
continue
# E.g. 00000000000003a4 <_load_f16>:
m = re.match('''................ <_?(.*)>:''', line)
if m:
print 'static const', array_type, 'kSplice_' + m.group(1) + '[] = {'
continue
columns = line.split('\t')
code = columns[1]
if len(columns) == 4:
inst = columns[2]
args = columns[3]
else:
inst, args = columns[2].split(' ', 1)
code, inst, args = code.strip(), inst.strip(), args.strip()
# We can't splice code that uses ip-relative addressing.
for arg in args:
assert 'rip' not in arg # TODO: detect on aarch64 too
if code == done:
print '};'
continue
hexed = ''.join('0x'+x+',' for x in code.split(' '))
print ' ' + hexed + ' '*(44-len(hexed)) + \
'// ' + inst + ' '*(14-len(inst)) + args
print '''/* print '''/*
* Copyright 2017 Google Inc. * Copyright 2017 Google Inc.
* *
@ -40,74 +73,8 @@ print '''/*
#if defined(__aarch64__) #if defined(__aarch64__)
''' '''
for line in subprocess.check_output(['gobjdump', '-d', parse_object_file('aarch64.o', 'unsigned int', '14000000')
'aarch64.o']).split('\n'): print '\n#else\n'
line = line.strip() parse_object_file('hsw.o', 'unsigned char', 'e9 00 00 00 00')
if not line or line.startswith('aarch64.o') or line.startswith('Disassembly'): print '\n#endif\n'
continue print '#endif//SkSplicer_generated_DEFINED'
m = re.match('''................ <(.*)>:''', line)
if m:
print 'static const unsigned int kSplice_' + m.group(1) + '[] = {'
continue
_, code, inst, args = line.split('\t')
code = code.strip()
# b done, where done has not yet been filled in by the linker.
if code == '14000000':
print '};'
continue
print ' ', '0x'+code+',' + ' // ' + inst + ' ' + args
print '''
#else
'''
# TODO: port this to gobjdump too
for line in subprocess.check_output(['otool', '-tvj', 'hsw.o']).split('\n'):
line = line.strip()
if line == '' or line == 'hsw.o:' or line == '(__TEXT,__text) section':
continue
m = re.match('_(.*):', line)
if m:
name = m.group(1)
print 'static const unsigned char kSplice_' + m.group(1) + '[] = {'
continue
# Skip the leading 16 byte address and a tab,
# leaving the code, instruction mnemonic, and its arguments.
line = line[17:]
columns = line.split('\t')
code = columns[0].strip()
inst = columns[1]
args = columns[2:]
# We can't splice code that uses rip relative addressing.
for arg in args:
assert 'rip' not in arg
# jmp done, the end of each stage (the address of done is not yet filled in)
if code == 'e9 00 00 00 00':
print '};'
continue
sys.stdout.write(' ')
_bytes = code.split(' ')
# This is the meat of things: copy the code to a C unsigned char array.
for byte in _bytes:
sys.stdout.write('0x' + byte + ',')
# From here on we're just making the generated file readable and pretty.
sys.stdout.write(' ' * (44 - 5*len(_bytes)))
sys.stdout.write('// ' + inst)
if args:
sys.stdout.write(' ' * (13 - len(inst)))
sys.stdout.write(' '.join(args))
sys.stdout.write('\n')
print '''
#endif
'''
print '''#endif//SkSplicer_generated_DEFINED'''