SkJumper: be more precise by rejecting data sections.

This allows %rip addressing as long as it's not going into a data
section.  This lets us use switch tables, avoiding loops and stack.

On HSW,
  SkRasterPipeline_f16:   90 -> 63
  SkRasterPipeline_srgb: 170 -> 97

Change-Id: I3ca2e4ff819b70beea78be75579f9d80c06979e8
Reviewed-on: https://skia-review.googlesource.com/9146
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
Mike Klein 2017-03-02 11:16:22 -05:00 committed by Skia Commit-Bot
parent b56dedf70b
commit 4e7fc0c5da
4 changed files with 1495 additions and 634 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -240,11 +240,17 @@ static const size_t kStride = sizeof(F) / sizeof(float);
template <typename V, typename T>
static inline V load(const T* src, size_t tail) {
#if defined(JUMPER)
__builtin_assume(tail < kStride);
if (__builtin_expect(tail, 0)) {
V v{}; // Any inactive lanes are zeroed.
#pragma nounroll
for (size_t i = 0; i < tail; i++) {
v[i] = src[i];
switch (tail-1) {
case 6: v[6] = src[6];
case 5: v[5] = src[5];
case 4: v[4] = src[4];
case 3: v[3] = src[3];
case 2: v[2] = src[2];
case 1: v[1] = src[1];
case 0: v[0] = src[0];
}
return v;
}
@ -272,10 +278,16 @@ static inline V load(const T* src, size_t tail) {
template <typename V, typename T>
static inline void store(T* dst, V v, size_t tail) {
#if defined(JUMPER)
__builtin_assume(tail < kStride);
if (__builtin_expect(tail, 0)) {
#pragma nounroll
for (size_t i = 0; i < tail; i++) {
dst[i] = v[i];
switch (tail-1) {
case 6: dst[6] = v[6];
case 5: dst[5] = v[5];
case 4: dst[4] = v[4];
case 3: dst[3] = v[3];
case 2: dst[2] = v[2];
case 1: dst[1] = v[1];
case 0: dst[0] = v[0];
}
return;
}

View File

@ -80,11 +80,20 @@ def parse_object_file(dot_o, directive, target=None):
if directive != '.long':
dehex = lambda h: str(int(h, 16))
cmd = [ objdump, '-d', '--insn-width=9', dot_o]
cmd = [objdump]
if target:
cmd += ['--target', target]
for line in subprocess.check_output(cmd).split('\n'):
# Look for sections we know we can't handle.
section_headers = subprocess.check_output(cmd + ['-h', dot_o])
for section in ['.literal4', '.literal8', '.literal16', '.const']:
if section in section_headers:
print >>sys.stderr, 'Found %s section, which we cannot handle.' % section
assert section not in section_headers
# Ok. Let's disassemble.
disassemble = ['-d', '--insn-width=9', dot_o]
for line in subprocess.check_output(cmd + disassemble).split('\n'):
line = line.strip()
if not line or line.startswith(dot_o) or line.startswith('Disassembly'):
@ -98,12 +107,6 @@ def parse_object_file(dot_o, directive, target=None):
print '_' + m.group(1) + label
continue
# ip-relative addressing usually means we're loading a constant,
# which we don't support.
if '%rip' in line:
print >>sys.stderr, line
assert '%rip' not in line
columns = line.split('\t')
code = columns[1]
if len(columns) >= 4: