SkSplicer: start on Windows support

should just be missing copy_to_executable_mem() / cleanup_executable_mem()

Change-Id: I47d34d4b57a40c07120cca7dc03f6ae22ecbe910
Reviewed-on: https://skia-review.googlesource.com/6854
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
This commit is contained in:
Mike Klein 2017-01-11 13:41:30 -05:00 committed by Skia Commit-Bot
parent 58d2a72afa
commit 09326e7a92
2 changed files with 106 additions and 20 deletions

View File

@ -32,7 +32,7 @@ declare_args() {
skia_enable_pdf = true
skia_enable_splicer =
is_skia_standalone && sanitize != "MSAN" &&
(is_linux || is_mac || (is_android && target_cpu == "arm64"))
(is_linux || is_mac || is_win || (is_android && target_cpu == "arm64"))
skia_enable_tools = is_skia_standalone
skia_enable_vulkan_debug_layers = is_skia_standalone && is_debug
skia_vulkan_sdk = getenv("VULKAN_SDK")

View File

@ -9,7 +9,11 @@
#include "SkOpts.h"
#include "SkRasterPipeline.h"
#include "SkStream.h"
#include <sys/mman.h>
#if defined(_MSC_VER)
#include <windows.h>
#else
#include <sys/mman.h>
#endif
#include "SkSplicer_generated.h"
#include "SkSplicer_shared.h"
@ -86,44 +90,126 @@ namespace {
}
#endif
#if !defined(__aarch64__) && defined(DUMP)
#if defined(_MSC_VER)
// Adapt from MS ABI to System V ABI used by stages.
static void before_loop(SkWStream* buf) {
static const uint8_t ms_to_system_v[] = {
0x56, // push %rsi
0x57, // push %rdi
0x48,0x81,0xec,0xa8,0x00,0x00,0x00, // sub $0xa8,%rsp
0xc5,0x78,0x29,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps %xmm15,0x90(%rsp)
0xc5,0x78,0x29,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps %xmm14,0x80(%rsp)
0xc5,0x78,0x29,0x6c,0x24,0x70, // vmovaps %xmm13,0x70(%rsp)
0xc5,0x78,0x29,0x64,0x24,0x60, // vmovaps %xmm12,0x60(%rsp)
0xc5,0x78,0x29,0x5c,0x24,0x50, // vmovaps %xmm11,0x50(%rsp)
0xc5,0x78,0x29,0x54,0x24,0x40, // vmovaps %xmm10,0x40(%rsp)
0xc5,0x78,0x29,0x4c,0x24,0x30, // vmovaps %xmm9,0x30(%rsp)
0xc5,0x78,0x29,0x44,0x24,0x20, // vmovaps %xmm8,0x20(%rsp)
0xc5,0xf8,0x29,0x7c,0x24,0x10, // vmovaps %xmm7,0x10(%rsp)
0xc5,0xf8,0x29,0x34,0x24, // vmovaps %xmm6,(%rsp)
0x48,0x89,0xcf, // mov %rcx,%rdi
0x48,0x89,0xd6, // mov %rdx,%rsi
0x4c,0x89,0xc2, // mov %r8,%rdx
0x4c,0x89,0xc9, // mov %r9,%rcx
};
splice(buf, ms_to_system_v);
}
static void after_loop(SkWStream* buf) {
static const uint8_t system_v_to_ms[] = {
0xc5,0xf8,0x28,0x34,0x24, // vmovaps (%rsp),%xmm6
0xc5,0xf8,0x28,0x7c,0x24,0x10, // vmovaps 0x10(%rsp),%xmm7
0xc5,0x78,0x28,0x44,0x24,0x20, // vmovaps 0x20(%rsp),%xmm8
0xc5,0x78,0x28,0x4c,0x24,0x30, // vmovaps 0x30(%rsp),%xmm9
0xc5,0x78,0x28,0x54,0x24,0x40, // vmovaps 0x40(%rsp),%xmm10
0xc5,0x78,0x28,0x5c,0x24,0x50, // vmovaps 0x50(%rsp),%xmm11
0xc5,0x78,0x28,0x64,0x24,0x60, // vmovaps 0x60(%rsp),%xmm12
0xc5,0x78,0x28,0x6c,0x24,0x70, // vmovaps 0x70(%rsp),%xmm13
0xc5,0x78,0x28,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps 0x80(%rsp),%xmm14
0xc5,0x78,0x28,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps 0x90(%rsp),%xmm15
0x48,0x81,0xc4,0xa8,0x00,0x00,0x00, // add $0xa8,%rsp
0x5f, // pop %rdi
0x5e, // pop %rsi
};
splice(buf, system_v_to_ms);
}
#elif !defined(__aarch64__) && defined(DUMP)
// IACA start and end markers.
static const uint8_t ud2[] = { 0x0f, 0x0b }; // undefined... crashes when run
static const uint8_t nop3[] = { 0x64, 0x67, 0x90 }; // 3 byte no-op
static const uint8_t movl_ebx[] = { 0xbb }; // move next 4 bytes into ebx
static void iaca_start(SkWStream* buf) {
static void before_loop(SkWStream* buf) {
splice(buf, ud2);
splice(buf, movl_ebx);
splice(buf, 111);
splice(buf, nop3);
}
static void iaca_end(SkWStream* buf) {
static void after_loop(SkWStream* buf) {
splice(buf, movl_ebx);
splice(buf, 222);
splice(buf, nop3);
splice(buf, ud2);
}
#else
static void iaca_start(SkWStream*) {}
static void iaca_end (SkWStream*) {}
static void before_loop(SkWStream*) {}
static void after_loop (SkWStream*) {}
#endif
// Copy len bytes from src to memory that's executable. cleanup with cleanup_executable_mem().
static void* copy_to_executable_mem(const void* src, size_t len) {
if (src && len) {
// TODO: w^x
auto fn = mmap(nullptr, len, PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0);
memcpy(fn, src, len);
__builtin___clear_cache((char*)fn, (char*)fn + len);
return fn;
// We can only mprotect / VirtualProtect at 4K page granularity.
static size_t round_up_to_full_pages(size_t len) {
size_t size = 0;
while (size < len) {
size += 4096;
}
return nullptr;
return size;
}
#if defined(_MSC_VER)
// Copy len bytes from src to memory that's executable. cleanup with cleanup_executable_mem().
static void* copy_to_executable_mem(const void* src, size_t* len) {
if (!src || !*len) {
return nullptr;
}
size_t alloc = round_up_to_full_pages(*len);
auto fn = VirtualAlloc(nullptr, alloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
memcpy(fn, src, *len);
DWORD dont_care;
VirtualProtect(fn, alloc, PAGE_EXECUTE_READ, &dont_care);
*len = alloc;
return fn;
}
static void cleanup_executable_mem(void* fn, size_t len) {
if (fn) {
VirtualFree(fn, 0, MEM_RELEASE);
}
}
#else
static void* copy_to_executable_mem(const void* src, size_t* len) {
if (!src || !*len) {
return nullptr;
}
size_t alloc = round_up_to_full_pages(*len);
auto fn = mmap(nullptr, alloc, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
memcpy(fn, src, *len);
mprotect(fn, alloc, PROT_READ|PROT_EXEC);
__builtin___clear_cache((char*)fn, (char*)fn + *len); // Essential on ARM; no-op on x86.
*len = alloc;
return fn;
}
static void cleanup_executable_mem(void* fn, size_t len) {
if (fn) {
munmap(fn, len);
}
}
#endif
struct Spliced {
@ -150,7 +236,7 @@ namespace {
// ... run spliced stages...
// x += kStride;
// } while(x < limit);
iaca_start(&buf);
before_loop(&buf);
auto loop_start = buf.bytesWritten(); // Think of this like a label, loop_start:
for (int i = 0; i < nstages; i++) {
@ -189,12 +275,12 @@ namespace {
}
loop(&buf, loop_start); // Loop back to handle more pixels if not done.
iaca_end(&buf);
after_loop(&buf);
ret(&buf); // We're done.
auto data = buf.detachAsData();
fSplicedLen = data->size();
fSpliced = copy_to_executable_mem(data->data(), fSplicedLen);
fSpliced = copy_to_executable_mem(data->data(), &fSplicedLen);
#if defined(DUMP)
SkFILEWStream(DUMP).write(data->data(), data->size());
@ -204,7 +290,7 @@ namespace {
// Spliced is stored in a std::function, so it needs to be copyable.
Spliced(const Spliced& o) : fBackup (o.fBackup)
, fSplicedLen(o.fSplicedLen)
, fSpliced (copy_to_executable_mem(o.fSpliced, fSplicedLen)) {}
, fSpliced (copy_to_executable_mem(o.fSpliced, &fSplicedLen)) {}
~Spliced() {
cleanup_executable_mem(fSpliced, fSplicedLen);