SkSplicer: start on Windows support
should just be missing copy_to_executable_mem() / cleanup_executable_mem() Change-Id: I47d34d4b57a40c07120cca7dc03f6ae22ecbe910 Reviewed-on: https://skia-review.googlesource.com/6854 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
parent
58d2a72afa
commit
09326e7a92
2
BUILD.gn
2
BUILD.gn
@ -32,7 +32,7 @@ declare_args() {
|
||||
skia_enable_pdf = true
|
||||
skia_enable_splicer =
|
||||
is_skia_standalone && sanitize != "MSAN" &&
|
||||
(is_linux || is_mac || (is_android && target_cpu == "arm64"))
|
||||
(is_linux || is_mac || is_win || (is_android && target_cpu == "arm64"))
|
||||
skia_enable_tools = is_skia_standalone
|
||||
skia_enable_vulkan_debug_layers = is_skia_standalone && is_debug
|
||||
skia_vulkan_sdk = getenv("VULKAN_SDK")
|
||||
|
@ -9,7 +9,11 @@
|
||||
#include "SkOpts.h"
|
||||
#include "SkRasterPipeline.h"
|
||||
#include "SkStream.h"
|
||||
#include <sys/mman.h>
|
||||
#if defined(_MSC_VER)
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
#include "SkSplicer_generated.h"
|
||||
#include "SkSplicer_shared.h"
|
||||
@ -86,44 +90,126 @@ namespace {
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(__aarch64__) && defined(DUMP)
|
||||
#if defined(_MSC_VER)
|
||||
// Adapt from MS ABI to System V ABI used by stages.
|
||||
static void before_loop(SkWStream* buf) {
|
||||
static const uint8_t ms_to_system_v[] = {
|
||||
0x56, // push %rsi
|
||||
0x57, // push %rdi
|
||||
0x48,0x81,0xec,0xa8,0x00,0x00,0x00, // sub $0xa8,%rsp
|
||||
0xc5,0x78,0x29,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps %xmm15,0x90(%rsp)
|
||||
0xc5,0x78,0x29,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps %xmm14,0x80(%rsp)
|
||||
0xc5,0x78,0x29,0x6c,0x24,0x70, // vmovaps %xmm13,0x70(%rsp)
|
||||
0xc5,0x78,0x29,0x64,0x24,0x60, // vmovaps %xmm12,0x60(%rsp)
|
||||
0xc5,0x78,0x29,0x5c,0x24,0x50, // vmovaps %xmm11,0x50(%rsp)
|
||||
0xc5,0x78,0x29,0x54,0x24,0x40, // vmovaps %xmm10,0x40(%rsp)
|
||||
0xc5,0x78,0x29,0x4c,0x24,0x30, // vmovaps %xmm9,0x30(%rsp)
|
||||
0xc5,0x78,0x29,0x44,0x24,0x20, // vmovaps %xmm8,0x20(%rsp)
|
||||
0xc5,0xf8,0x29,0x7c,0x24,0x10, // vmovaps %xmm7,0x10(%rsp)
|
||||
0xc5,0xf8,0x29,0x34,0x24, // vmovaps %xmm6,(%rsp)
|
||||
0x48,0x89,0xcf, // mov %rcx,%rdi
|
||||
0x48,0x89,0xd6, // mov %rdx,%rsi
|
||||
0x4c,0x89,0xc2, // mov %r8,%rdx
|
||||
0x4c,0x89,0xc9, // mov %r9,%rcx
|
||||
};
|
||||
splice(buf, ms_to_system_v);
|
||||
}
|
||||
static void after_loop(SkWStream* buf) {
|
||||
static const uint8_t system_v_to_ms[] = {
|
||||
0xc5,0xf8,0x28,0x34,0x24, // vmovaps (%rsp),%xmm6
|
||||
0xc5,0xf8,0x28,0x7c,0x24,0x10, // vmovaps 0x10(%rsp),%xmm7
|
||||
0xc5,0x78,0x28,0x44,0x24,0x20, // vmovaps 0x20(%rsp),%xmm8
|
||||
0xc5,0x78,0x28,0x4c,0x24,0x30, // vmovaps 0x30(%rsp),%xmm9
|
||||
0xc5,0x78,0x28,0x54,0x24,0x40, // vmovaps 0x40(%rsp),%xmm10
|
||||
0xc5,0x78,0x28,0x5c,0x24,0x50, // vmovaps 0x50(%rsp),%xmm11
|
||||
0xc5,0x78,0x28,0x64,0x24,0x60, // vmovaps 0x60(%rsp),%xmm12
|
||||
0xc5,0x78,0x28,0x6c,0x24,0x70, // vmovaps 0x70(%rsp),%xmm13
|
||||
0xc5,0x78,0x28,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps 0x80(%rsp),%xmm14
|
||||
0xc5,0x78,0x28,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps 0x90(%rsp),%xmm15
|
||||
0x48,0x81,0xc4,0xa8,0x00,0x00,0x00, // add $0xa8,%rsp
|
||||
0x5f, // pop %rdi
|
||||
0x5e, // pop %rsi
|
||||
};
|
||||
splice(buf, system_v_to_ms);
|
||||
}
|
||||
#elif !defined(__aarch64__) && defined(DUMP)
|
||||
// IACA start and end markers.
|
||||
static const uint8_t ud2[] = { 0x0f, 0x0b }; // undefined... crashes when run
|
||||
static const uint8_t nop3[] = { 0x64, 0x67, 0x90 }; // 3 byte no-op
|
||||
static const uint8_t movl_ebx[] = { 0xbb }; // move next 4 bytes into ebx
|
||||
|
||||
static void iaca_start(SkWStream* buf) {
|
||||
static void before_loop(SkWStream* buf) {
|
||||
splice(buf, ud2);
|
||||
splice(buf, movl_ebx);
|
||||
splice(buf, 111);
|
||||
splice(buf, nop3);
|
||||
}
|
||||
static void iaca_end(SkWStream* buf) {
|
||||
static void after_loop(SkWStream* buf) {
|
||||
splice(buf, movl_ebx);
|
||||
splice(buf, 222);
|
||||
splice(buf, nop3);
|
||||
splice(buf, ud2);
|
||||
}
|
||||
#else
|
||||
static void iaca_start(SkWStream*) {}
|
||||
static void iaca_end (SkWStream*) {}
|
||||
static void before_loop(SkWStream*) {}
|
||||
static void after_loop (SkWStream*) {}
|
||||
#endif
|
||||
|
||||
// Copy len bytes from src to memory that's executable. cleanup with cleanup_executable_mem().
|
||||
static void* copy_to_executable_mem(const void* src, size_t len) {
|
||||
if (src && len) {
|
||||
// TODO: w^x
|
||||
auto fn = mmap(nullptr, len, PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0);
|
||||
memcpy(fn, src, len);
|
||||
__builtin___clear_cache((char*)fn, (char*)fn + len);
|
||||
return fn;
|
||||
// We can only mprotect / VirtualProtect at 4K page granularity.
|
||||
static size_t round_up_to_full_pages(size_t len) {
|
||||
size_t size = 0;
|
||||
while (size < len) {
|
||||
size += 4096;
|
||||
}
|
||||
return nullptr;
|
||||
return size;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// Copy len bytes from src to memory that's executable. cleanup with cleanup_executable_mem().
|
||||
static void* copy_to_executable_mem(const void* src, size_t* len) {
|
||||
if (!src || !*len) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t alloc = round_up_to_full_pages(*len);
|
||||
|
||||
auto fn = VirtualAlloc(nullptr, alloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
|
||||
memcpy(fn, src, *len);
|
||||
|
||||
DWORD dont_care;
|
||||
VirtualProtect(fn, alloc, PAGE_EXECUTE_READ, &dont_care);
|
||||
|
||||
*len = alloc;
|
||||
return fn;
|
||||
}
|
||||
static void cleanup_executable_mem(void* fn, size_t len) {
|
||||
if (fn) {
|
||||
VirtualFree(fn, 0, MEM_RELEASE);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static void* copy_to_executable_mem(const void* src, size_t* len) {
|
||||
if (!src || !*len) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
size_t alloc = round_up_to_full_pages(*len);
|
||||
|
||||
auto fn = mmap(nullptr, alloc, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
|
||||
memcpy(fn, src, *len);
|
||||
|
||||
mprotect(fn, alloc, PROT_READ|PROT_EXEC);
|
||||
__builtin___clear_cache((char*)fn, (char*)fn + *len); // Essential on ARM; no-op on x86.
|
||||
|
||||
*len = alloc;
|
||||
return fn;
|
||||
}
|
||||
static void cleanup_executable_mem(void* fn, size_t len) {
|
||||
if (fn) {
|
||||
munmap(fn, len);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
struct Spliced {
|
||||
|
||||
@ -150,7 +236,7 @@ namespace {
|
||||
// ... run spliced stages...
|
||||
// x += kStride;
|
||||
// } while(x < limit);
|
||||
iaca_start(&buf);
|
||||
before_loop(&buf);
|
||||
auto loop_start = buf.bytesWritten(); // Think of this like a label, loop_start:
|
||||
|
||||
for (int i = 0; i < nstages; i++) {
|
||||
@ -189,12 +275,12 @@ namespace {
|
||||
}
|
||||
|
||||
loop(&buf, loop_start); // Loop back to handle more pixels if not done.
|
||||
iaca_end(&buf);
|
||||
after_loop(&buf);
|
||||
ret(&buf); // We're done.
|
||||
|
||||
auto data = buf.detachAsData();
|
||||
fSplicedLen = data->size();
|
||||
fSpliced = copy_to_executable_mem(data->data(), fSplicedLen);
|
||||
fSpliced = copy_to_executable_mem(data->data(), &fSplicedLen);
|
||||
|
||||
#if defined(DUMP)
|
||||
SkFILEWStream(DUMP).write(data->data(), data->size());
|
||||
@ -204,7 +290,7 @@ namespace {
|
||||
// Spliced is stored in a std::function, so it needs to be copyable.
|
||||
Spliced(const Spliced& o) : fBackup (o.fBackup)
|
||||
, fSplicedLen(o.fSplicedLen)
|
||||
, fSpliced (copy_to_executable_mem(o.fSpliced, fSplicedLen)) {}
|
||||
, fSpliced (copy_to_executable_mem(o.fSpliced, &fSplicedLen)) {}
|
||||
|
||||
~Spliced() {
|
||||
cleanup_executable_mem(fSpliced, fSplicedLen);
|
||||
|
Loading…
Reference in New Issue
Block a user