tool support for SkVM JIT on aarch64
- Add quick-building skvmtool
- Remove SkString dependency
- Add aarch64 support for perf dumps
Here's what I see on now for the tiny skvmtool program:
x86-64 (Xeon Gold 6154)
│ skvm-jit-1707131987():
24.93 │ 0: vmovups (%rsi),%ymm0
50.13 │ vpmulld %ymm0,%ymm0,%ymm0
24.93 │ vmovups %ymm0,(%rsi)
│ add $0x20,%rsi
│ sub $0x8,%rdi
│ ↑ jne 0
│ vzeroupper
│ ← retq
aarch64 (Cortex A53)
│ skvm-jit-485593645():
11.55 │ 0: ldr q0, [x1]
47.65 │ mul v0.4s, v0.4s, v0.4s
31.77 │ str q0, [x1]
│ add x1, x1, #0x10
8.66 │ subs x0, x0, #0x4
│ ↑ b.ne 0
0.36 │ ← ret
Change-Id: Ia83ebdc6d96c8bd367bce0e8f2792b5e5c79f750
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/225186
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-07-02 19:16:26 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2019 Google LLC
|
|
|
|
*
|
|
|
|
* Use of this source code is governed by a BSD-style license that can be
|
|
|
|
* found in the LICENSE file.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "src/core/SkCpu.h"
|
|
|
|
#include "src/core/SkVM.h"
|
2019-07-23 16:46:09 +00:00
|
|
|
#include "tools/SkVMBuilders.h"
|
|
|
|
#include <chrono>
|
tool support for SkVM JIT on aarch64
- Add quick-building skvmtool
- Remove SkString dependency
- Add aarch64 support for perf dumps
Here's what I see on now for the tiny skvmtool program:
x86-64 (Xeon Gold 6154)
│ skvm-jit-1707131987():
24.93 │ 0: vmovups (%rsi),%ymm0
50.13 │ vpmulld %ymm0,%ymm0,%ymm0
24.93 │ vmovups %ymm0,(%rsi)
│ add $0x20,%rsi
│ sub $0x8,%rdi
│ ↑ jne 0
│ vzeroupper
│ ← retq
aarch64 (Cortex A53)
│ skvm-jit-485593645():
11.55 │ 0: ldr q0, [x1]
47.65 │ mul v0.4s, v0.4s, v0.4s
31.77 │ str q0, [x1]
│ add x1, x1, #0x10
8.66 │ subs x0, x0, #0x4
│ ↑ b.ne 0
0.36 │ ← ret
Change-Id: Ia83ebdc6d96c8bd367bce0e8f2792b5e5c79f750
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/225186
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-07-02 19:16:26 +00:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
void sk_abort_no_print() {
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
void SkDebugf(const char* fmt, ...) {
|
|
|
|
va_list args;
|
|
|
|
va_start(args, fmt);
|
|
|
|
vfprintf(stderr, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
2019-07-23 16:46:09 +00:00
|
|
|
static skvm::Program plus_one() {
|
|
|
|
skvm::Builder b;
|
tool support for SkVM JIT on aarch64
- Add quick-building skvmtool
- Remove SkString dependency
- Add aarch64 support for perf dumps
Here's what I see on now for the tiny skvmtool program:
x86-64 (Xeon Gold 6154)
│ skvm-jit-1707131987():
24.93 │ 0: vmovups (%rsi),%ymm0
50.13 │ vpmulld %ymm0,%ymm0,%ymm0
24.93 │ vmovups %ymm0,(%rsi)
│ add $0x20,%rsi
│ sub $0x8,%rdi
│ ↑ jne 0
│ vzeroupper
│ ← retq
aarch64 (Cortex A53)
│ skvm-jit-485593645():
11.55 │ 0: ldr q0, [x1]
47.65 │ mul v0.4s, v0.4s, v0.4s
31.77 │ str q0, [x1]
│ add x1, x1, #0x10
8.66 │ subs x0, x0, #0x4
│ ↑ b.ne 0
0.36 │ ← ret
Change-Id: Ia83ebdc6d96c8bd367bce0e8f2792b5e5c79f750
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/225186
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-07-02 19:16:26 +00:00
|
|
|
|
2019-07-23 16:46:09 +00:00
|
|
|
skvm::Arg ptr = b.arg<int>();
|
|
|
|
skvm::I32 v = b.load32(ptr);
|
|
|
|
b.store32(ptr, b.add(v, b.splat(1)));
|
tool support for SkVM JIT on aarch64
- Add quick-building skvmtool
- Remove SkString dependency
- Add aarch64 support for perf dumps
Here's what I see on now for the tiny skvmtool program:
x86-64 (Xeon Gold 6154)
│ skvm-jit-1707131987():
24.93 │ 0: vmovups (%rsi),%ymm0
50.13 │ vpmulld %ymm0,%ymm0,%ymm0
24.93 │ vmovups %ymm0,(%rsi)
│ add $0x20,%rsi
│ sub $0x8,%rdi
│ ↑ jne 0
│ vzeroupper
│ ← retq
aarch64 (Cortex A53)
│ skvm-jit-485593645():
11.55 │ 0: ldr q0, [x1]
47.65 │ mul v0.4s, v0.4s, v0.4s
31.77 │ str q0, [x1]
│ add x1, x1, #0x10
8.66 │ subs x0, x0, #0x4
│ ↑ b.ne 0
0.36 │ ← ret
Change-Id: Ia83ebdc6d96c8bd367bce0e8f2792b5e5c79f750
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/225186
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-07-02 19:16:26 +00:00
|
|
|
|
2019-07-23 16:46:09 +00:00
|
|
|
return b.done("plus_one");
|
|
|
|
}
|
|
|
|
|
|
|
|
static skvm::Program square() {
|
|
|
|
skvm::Builder b;
|
|
|
|
|
|
|
|
skvm::Arg ptr = b.arg<int>();
|
|
|
|
skvm::I32 v = b.load32(ptr);
|
tool support for SkVM JIT on aarch64
- Add quick-building skvmtool
- Remove SkString dependency
- Add aarch64 support for perf dumps
Here's what I see on now for the tiny skvmtool program:
x86-64 (Xeon Gold 6154)
│ skvm-jit-1707131987():
24.93 │ 0: vmovups (%rsi),%ymm0
50.13 │ vpmulld %ymm0,%ymm0,%ymm0
24.93 │ vmovups %ymm0,(%rsi)
│ add $0x20,%rsi
│ sub $0x8,%rdi
│ ↑ jne 0
│ vzeroupper
│ ← retq
aarch64 (Cortex A53)
│ skvm-jit-485593645():
11.55 │ 0: ldr q0, [x1]
47.65 │ mul v0.4s, v0.4s, v0.4s
31.77 │ str q0, [x1]
│ add x1, x1, #0x10
8.66 │ subs x0, x0, #0x4
│ ↑ b.ne 0
0.36 │ ← ret
Change-Id: Ia83ebdc6d96c8bd367bce0e8f2792b5e5c79f750
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/225186
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-07-02 19:16:26 +00:00
|
|
|
b.store32(ptr, b.mul(v,v));
|
|
|
|
|
2019-07-23 16:46:09 +00:00
|
|
|
return b.done("square");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void print(double val, const char* units) {
|
|
|
|
const char* scales[] = { "", "K", "M", "G", "T" };
|
|
|
|
const char** scale = scales;
|
|
|
|
|
|
|
|
while (val > 10000.0) {
|
|
|
|
val *= 1/1000.0;
|
|
|
|
scale++;
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("%4d %s%s", (int)val, *scale, units);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename Fn>
|
|
|
|
static double measure(Fn&& fn) {
|
|
|
|
using clock = std::chrono::steady_clock;
|
|
|
|
|
|
|
|
int loops = 0;
|
|
|
|
auto start = clock::now();
|
|
|
|
std::chrono::duration<double> elapsed;
|
|
|
|
do {
|
|
|
|
fn();
|
|
|
|
loops++;
|
|
|
|
elapsed = clock::now() - start;
|
|
|
|
} while (elapsed < std::chrono::milliseconds(100));
|
|
|
|
|
|
|
|
return loops / elapsed.count();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename... Args>
|
|
|
|
static void time(const char* name, const skvm::Program& program, Args... args) {
|
|
|
|
printf("%20s", name);
|
|
|
|
|
|
|
|
for (int N : { 15, 255, 4095 }) {
|
|
|
|
double loops_per_sec = measure([&]{
|
|
|
|
program.eval(N, args...);
|
|
|
|
});
|
|
|
|
|
|
|
|
printf("\t");
|
|
|
|
print(N*loops_per_sec, "px/s");
|
|
|
|
}
|
|
|
|
printf("\n");
|
tool support for SkVM JIT on aarch64
- Add quick-building skvmtool
- Remove SkString dependency
- Add aarch64 support for perf dumps
Here's what I see on now for the tiny skvmtool program:
x86-64 (Xeon Gold 6154)
│ skvm-jit-1707131987():
24.93 │ 0: vmovups (%rsi),%ymm0
50.13 │ vpmulld %ymm0,%ymm0,%ymm0
24.93 │ vmovups %ymm0,(%rsi)
│ add $0x20,%rsi
│ sub $0x8,%rdi
│ ↑ jne 0
│ vzeroupper
│ ← retq
aarch64 (Cortex A53)
│ skvm-jit-485593645():
11.55 │ 0: ldr q0, [x1]
47.65 │ mul v0.4s, v0.4s, v0.4s
31.77 │ str q0, [x1]
│ add x1, x1, #0x10
8.66 │ subs x0, x0, #0x4
│ ↑ b.ne 0
0.36 │ ← ret
Change-Id: Ia83ebdc6d96c8bd367bce0e8f2792b5e5c79f750
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/225186
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-07-02 19:16:26 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
#if defined(__x86_64__)
|
|
|
|
SkCpu::CacheRuntimeFeatures();
|
|
|
|
#endif
|
2019-07-23 16:46:09 +00:00
|
|
|
|
|
|
|
int src[4096],
|
|
|
|
dst[4096];
|
|
|
|
time("plus_one", plus_one(), dst);
|
|
|
|
time( "square", square(), dst);
|
|
|
|
|
|
|
|
time("srcover_f32" , SrcoverBuilder_F32 ().done("srcover_f32" ), src, dst);
|
|
|
|
time("srcover_i32" , SrcoverBuilder_I32 ().done("srcover_i32" ), src, dst);
|
|
|
|
time("srcover_i32_naive", SrcoverBuilder_I32_Naive().done("srcover_i32_naive"), src, dst);
|
|
|
|
time("srcover_i32_SWAR" , SrcoverBuilder_I32_SWAR ().done("srcover_i32_SWAR" ), src, dst);
|
|
|
|
|
tool support for SkVM JIT on aarch64
- Add quick-building skvmtool
- Remove SkString dependency
- Add aarch64 support for perf dumps
Here's what I see on now for the tiny skvmtool program:
x86-64 (Xeon Gold 6154)
│ skvm-jit-1707131987():
24.93 │ 0: vmovups (%rsi),%ymm0
50.13 │ vpmulld %ymm0,%ymm0,%ymm0
24.93 │ vmovups %ymm0,(%rsi)
│ add $0x20,%rsi
│ sub $0x8,%rdi
│ ↑ jne 0
│ vzeroupper
│ ← retq
aarch64 (Cortex A53)
│ skvm-jit-485593645():
11.55 │ 0: ldr q0, [x1]
47.65 │ mul v0.4s, v0.4s, v0.4s
31.77 │ str q0, [x1]
│ add x1, x1, #0x10
8.66 │ subs x0, x0, #0x4
│ ↑ b.ne 0
0.36 │ ← ret
Change-Id: Ia83ebdc6d96c8bd367bce0e8f2792b5e5c79f750
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/225186
Reviewed-by: Herb Derby <herb@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
2019-07-02 19:16:26 +00:00
|
|
|
return 0;
|
|
|
|
}
|