SkJumper: be more precise by rejecting data sections.
This allows %rip addressing as long as it's not going into a data section. This lets us use switch tables, avoiding loops and stack. On HSW, SkRasterPipeline_f16: 90 -> 63 SkRasterPipeline_srgb: 170 -> 97 Change-Id: I3ca2e4ff819b70beea78be75579f9d80c06979e8 Reviewed-on: https://skia-review.googlesource.com/9146 Reviewed-by: Herb Derby <herb@google.com> Commit-Queue: Mike Klein <mtklein@chromium.org>
This commit is contained in:
parent
b56dedf70b
commit
4e7fc0c5da
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -240,11 +240,17 @@ static const size_t kStride = sizeof(F) / sizeof(float);
|
||||
template <typename V, typename T>
|
||||
static inline V load(const T* src, size_t tail) {
|
||||
#if defined(JUMPER)
|
||||
__builtin_assume(tail < kStride);
|
||||
if (__builtin_expect(tail, 0)) {
|
||||
V v{}; // Any inactive lanes are zeroed.
|
||||
#pragma nounroll
|
||||
for (size_t i = 0; i < tail; i++) {
|
||||
v[i] = src[i];
|
||||
switch (tail-1) {
|
||||
case 6: v[6] = src[6];
|
||||
case 5: v[5] = src[5];
|
||||
case 4: v[4] = src[4];
|
||||
case 3: v[3] = src[3];
|
||||
case 2: v[2] = src[2];
|
||||
case 1: v[1] = src[1];
|
||||
case 0: v[0] = src[0];
|
||||
}
|
||||
return v;
|
||||
}
|
||||
@ -272,10 +278,16 @@ static inline V load(const T* src, size_t tail) {
|
||||
template <typename V, typename T>
|
||||
static inline void store(T* dst, V v, size_t tail) {
|
||||
#if defined(JUMPER)
|
||||
__builtin_assume(tail < kStride);
|
||||
if (__builtin_expect(tail, 0)) {
|
||||
#pragma nounroll
|
||||
for (size_t i = 0; i < tail; i++) {
|
||||
dst[i] = v[i];
|
||||
switch (tail-1) {
|
||||
case 6: dst[6] = v[6];
|
||||
case 5: dst[5] = v[5];
|
||||
case 4: dst[4] = v[4];
|
||||
case 3: dst[3] = v[3];
|
||||
case 2: dst[2] = v[2];
|
||||
case 1: dst[1] = v[1];
|
||||
case 0: dst[0] = v[0];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@ -80,11 +80,20 @@ def parse_object_file(dot_o, directive, target=None):
|
||||
if directive != '.long':
|
||||
dehex = lambda h: str(int(h, 16))
|
||||
|
||||
cmd = [ objdump, '-d', '--insn-width=9', dot_o]
|
||||
cmd = [objdump]
|
||||
if target:
|
||||
cmd += ['--target', target]
|
||||
|
||||
for line in subprocess.check_output(cmd).split('\n'):
|
||||
# Look for sections we know we can't handle.
|
||||
section_headers = subprocess.check_output(cmd + ['-h', dot_o])
|
||||
for section in ['.literal4', '.literal8', '.literal16', '.const']:
|
||||
if section in section_headers:
|
||||
print >>sys.stderr, 'Found %s section, which we cannot handle.' % section
|
||||
assert section not in section_headers
|
||||
|
||||
# Ok. Let's disassemble.
|
||||
disassemble = ['-d', '--insn-width=9', dot_o]
|
||||
for line in subprocess.check_output(cmd + disassemble).split('\n'):
|
||||
line = line.strip()
|
||||
|
||||
if not line or line.startswith(dot_o) or line.startswith('Disassembly'):
|
||||
@ -98,12 +107,6 @@ def parse_object_file(dot_o, directive, target=None):
|
||||
print '_' + m.group(1) + label
|
||||
continue
|
||||
|
||||
# ip-relative addressing usually means we're loading a constant,
|
||||
# which we don't support.
|
||||
if '%rip' in line:
|
||||
print >>sys.stderr, line
|
||||
assert '%rip' not in line
|
||||
|
||||
columns = line.split('\t')
|
||||
code = columns[1]
|
||||
if len(columns) >= 4:
|
||||
|
Loading…
Reference in New Issue
Block a user