diff --git a/src/core/SkOpts.cpp b/src/core/SkOpts.cpp
index 6dd3fcf796..24deb783e3 100644
--- a/src/core/SkOpts.cpp
+++ b/src/core/SkOpts.cpp
@@ -43,7 +43,6 @@
 #include "src/opts/SkRasterPipeline_opts.h"
 #include "src/opts/SkSwizzler_opts.h"
 #include "src/opts/SkUtils_opts.h"
-#include "src/opts/SkVM_opts.h"
 #include "src/opts/SkXfermode_opts.h"
 
 namespace SkOpts {
@@ -81,8 +80,6 @@ namespace SkOpts {
     DEFINE_DEFAULT(hash_fn);
 
     DEFINE_DEFAULT(S32_alpha_D32_filter_DX);
-
-    DEFINE_DEFAULT(eval);
 #undef DEFINE_DEFAULT
 
 #define M(st) (StageFn)SK_OPTS_NS::st,
diff --git a/src/core/SkOpts.h b/src/core/SkOpts.h
index 129541b8bd..3a4d9504d2 100644
--- a/src/core/SkOpts.h
+++ b/src/core/SkOpts.h
@@ -14,8 +14,6 @@
 
 struct SkBitmapProcState;
 
-namespace skvm { struct ProgramInstruction; }
-
 namespace SkOpts {
     // Call to replace pointers to portable functions with pointers to CPU-specific functions.
     // Thread-safe and idempotent.
@@ -76,9 +74,6 @@ namespace SkOpts {
     extern void (*start_pipeline_lowp )(size_t,size_t,size_t,size_t, void**);
 #undef M
 
-    extern void (*eval)(const skvm::ProgramInstruction[], int ninsts, int nregs, int loop,
-                        int n, void* args[], size_t strides[], int nargs);
-
 }
 
 #endif//SkOpts_DEFINED
diff --git a/src/core/SkVM.cpp b/src/core/SkVM.cpp
index 23c8e05813..ee58fa4416 100644
--- a/src/core/SkVM.cpp
+++ b/src/core/SkVM.cpp
@@ -10,7 +10,6 @@
 #include "include/private/SkThreadID.h"
 #include "include/private/SkVx.h"
 #include "src/core/SkCpu.h"
-#include "src/core/SkOpts.h"
 #include "src/core/SkVM.h"
 #include <string.h>
 #if defined(SKVM_JIT)
@@ -1240,7 +1239,16 @@ namespace skvm {
                 static SkSpinlock dump_lock;
                 SkAutoSpinlock lock(dump_lock);
 
-                uint32_t hash = SkOpts::hash(fJIT.buf, fJIT.size);
+                auto fnv1a = [](const void* vbuf, size_t n) {
+                    uint32_t hash = 2166136261;
+                    for (auto buf = (const uint8_t*)vbuf; n --> 0; buf++) {
+                        hash ^= *buf;
+                        hash *= 16777619;
+                    }
+                    return hash;
+                };
+
+                uint32_t hash = fnv1a(fJIT.buf, fJIT.size);
                 SkString name = SkStringPrintf("skvm-jit-%u", hash);
 
                 // Create a jit-<pid>.dump file that we can `perf inject -j` into a
@@ -1342,9 +1350,157 @@ namespace skvm {
                 SkASSERT(arg == args + nargs);
             }
         }
+
         if (n) {
-            SkOpts::eval(fInstructions.data(), (int)fInstructions.size(), fRegs, fLoop,
-                         n, args, strides, nargs);
+            // We'll operate in SIMT style, knocking off K-size chunks from n while possible.
+            constexpr int K = 16;
+            using I32 = skvx::Vec<K, int>;
+            using F32 = skvx::Vec<K, float>;
+            using U32 = skvx::Vec<K, uint32_t>;
+            using  U8 = skvx::Vec<K, uint8_t>;
+
+            using I16x2 = skvx::Vec<2*K,  int16_t>;
+            using U16x2 = skvx::Vec<2*K, uint16_t>;
+
+            union Slot {
+                I32 i32;
+                U32 u32;
+                F32 f32;
+            };
+
+            Slot                     few_regs[16];
+            std::unique_ptr<char[]> many_regs;
+
+            Slot* regs = few_regs;
+
+            if (fRegs > (int)SK_ARRAY_COUNT(few_regs)) {
+                // Annoyingly we can't trust that malloc() or new will work with Slot because
+                // the skvx::Vec types may have alignment greater than what they provide.
+                // We'll overallocate one extra register so we can align manually.
+                many_regs.reset(new char[ sizeof(Slot) * (fRegs + 1) ]);
+
+                uintptr_t addr = (uintptr_t)many_regs.get();
+                addr += alignof(Slot) -
+                         (addr & (alignof(Slot) - 1));
+                SkASSERT((addr & (alignof(Slot) - 1)) == 0);
+                regs = (Slot*)addr;
+            }
+
+
+            auto r = [&](Reg id) -> Slot& {
+                SkASSERT(0 <= id && id < fRegs);
+                return regs[id];
+            };
+            auto arg = [&](int ix) {
+                SkASSERT(0 <= ix && ix < nargs);
+                return args[ix];
+            };
+
+            // Step each argument pointer ahead by its stride a number of times.
+            auto step_args = [&](int times) {
+                // Looping by marching pointers until *arg == nullptr helps the
+                // compiler to keep this loop scalar.  Otherwise it'd create a
+                // rather large and useless autovectorized version.
+                void**        arg    = args;
+                const size_t* stride = strides;
+                for (; *arg; arg++, stride++) {
+                    *arg = (void*)( (char*)*arg + times * *stride );
+                }
+                SkASSERT(arg == args + nargs);
+            };
+
+            int start = 0,
+                stride;
+            for ( ; n > 0; start = fLoop, n -= stride, step_args(stride)) {
+                stride = n >= K ? K : 1;
+
+                for (int i = start; i < (int)fInstructions.size(); i++) {
+                    Instruction inst = fInstructions[i];
+
+                    // d = op(x,y,z/imm)
+                    Reg   d = inst.d,
+                          x = inst.x,
+                          y = inst.y,
+                          z = inst.z;
+                    int imm = inst.imm;
+
+                    // Ops that interact with memory need to know whether we're stride=1 or K,
+                    // but all non-memory ops can run the same code no matter the stride.
+                    switch (2*(int)inst.op + (stride == K ? 1 : 0)) {
+
+                    #define STRIDE_1(op) case 2*(int)op
+                    #define STRIDE_K(op) case 2*(int)op + 1
+                        STRIDE_1(Op::store8 ): memcpy(arg(imm), &r(x).i32, 1); break;
+                        STRIDE_1(Op::store32): memcpy(arg(imm), &r(x).i32, 4); break;
+
+                        STRIDE_K(Op::store8 ): skvx::cast<uint8_t>(r(x).i32).store(arg(imm)); break;
+                        STRIDE_K(Op::store32):                    (r(x).i32).store(arg(imm)); break;
+
+                        STRIDE_1(Op::load8 ): r(d).i32 = 0; memcpy(&r(d).i32, arg(imm), 1); break;
+                        STRIDE_1(Op::load32): r(d).i32 = 0; memcpy(&r(d).i32, arg(imm), 4); break;
+
+                        STRIDE_K(Op::load8 ): r(d).i32= skvx::cast<int>(U8 ::Load(arg(imm))); break;
+                        STRIDE_K(Op::load32): r(d).i32=                 I32::Load(arg(imm)) ; break;
+                    #undef STRIDE_1
+                    #undef STRIDE_K
+
+                        // Ops that don't interact with memory should never care about the stride.
+                    #define CASE(op) case 2*(int)op: /*fallthrough*/ case 2*(int)op+1
+                        CASE(Op::splat): r(d).i32 = imm; break;
+
+                        CASE(Op::add_f32): r(d).f32 = r(x).f32 + r(y).f32; break;
+                        CASE(Op::sub_f32): r(d).f32 = r(x).f32 - r(y).f32; break;
+                        CASE(Op::mul_f32): r(d).f32 = r(x).f32 * r(y).f32; break;
+                        CASE(Op::div_f32): r(d).f32 = r(x).f32 / r(y).f32; break;
+
+                        CASE(Op::mad_f32): r(d).f32 = r(x).f32 * r(y).f32 + r(z).f32; break;
+
+                        CASE(Op::add_i32): r(d).i32 = r(x).i32 + r(y).i32; break;
+                        CASE(Op::sub_i32): r(d).i32 = r(x).i32 - r(y).i32; break;
+                        CASE(Op::mul_i32): r(d).i32 = r(x).i32 * r(y).i32; break;
+
+                        CASE(Op::sub_i16x2):
+                            r(d).i32 = skvx::bit_pun<I32>(skvx::bit_pun<I16x2>(r(x).i32) -
+                                                          skvx::bit_pun<I16x2>(r(y).i32) ); break;
+                        CASE(Op::mul_i16x2):
+                            r(d).i32 = skvx::bit_pun<I32>(skvx::bit_pun<I16x2>(r(x).i32) *
+                                                          skvx::bit_pun<I16x2>(r(y).i32) ); break;
+                        CASE(Op::shr_i16x2):
+                            r(d).i32 = skvx::bit_pun<I32>(skvx::bit_pun<U16x2>(r(x).i32) >> imm);
+                            break;
+
+                        CASE(Op::bit_and):   r(d).i32 = r(x).i32 &  r(y).i32; break;
+                        CASE(Op::bit_or ):   r(d).i32 = r(x).i32 |  r(y).i32; break;
+                        CASE(Op::bit_xor):   r(d).i32 = r(x).i32 ^  r(y).i32; break;
+                        CASE(Op::bit_clear): r(d).i32 = r(x).i32 & ~r(y).i32; break;
+
+                        CASE(Op::shl): r(d).i32 = r(x).i32 << imm; break;
+                        CASE(Op::sra): r(d).i32 = r(x).i32 >> imm; break;
+                        CASE(Op::shr): r(d).u32 = r(x).u32 >> imm; break;
+
+                        CASE(Op::extract): r(d).u32 = (r(x).u32 >> imm) & r(y).u32; break;
+                        CASE(Op::pack):    r(d).u32 = r(x).u32 | (r(y).u32 << imm); break;
+
+                        CASE(Op::bytes): {
+                            const U32 table[] = {
+                                0,
+                                (r(x).u32      ) & 0xff,
+                                (r(x).u32 >>  8) & 0xff,
+                                (r(x).u32 >> 16) & 0xff,
+                                (r(x).u32 >> 24) & 0xff,
+                            };
+                            r(d).u32 = table[(imm >>  0) & 0xf] <<  0
+                                     | table[(imm >>  4) & 0xf] <<  8
+                                     | table[(imm >>  8) & 0xf] << 16
+                                     | table[(imm >> 12) & 0xf] << 24;
+                        } break;
+
+                        CASE(Op::to_f32): r(d).f32 = skvx::cast<float>(r(x).i32); break;
+                        CASE(Op::to_i32): r(d).i32 = skvx::cast<int>  (r(x).f32); break;
+                    #undef CASE
+                    }
+                }
+            }
         }
     }
 }
diff --git a/src/core/SkVM.h b/src/core/SkVM.h
index 203c1dfb58..8e436253e6 100644
--- a/src/core/SkVM.h
+++ b/src/core/SkVM.h
@@ -192,16 +192,13 @@ namespace skvm {
 
     using Reg = int;
 
-    struct ProgramInstruction {   // d = op(x, y, z/imm)
-        Op  op;
-        Reg d,x,y;
-        union { Reg z; int imm; };
-    };
-
     class Program {
     public:
-        // Moved outside Program so it can be forward-declared.
-        using Instruction = ProgramInstruction;
+        struct Instruction {   // d = op(x, y, z/imm)
+            Op  op;
+            Reg d,x,y;
+            union { Reg z; int imm; };
+        };
 
         Program(std::vector<Instruction>, int regs, int loop);
         Program() : Program({}, 0, 0) {}
diff --git a/src/opts/SkOpts_hsw.cpp b/src/opts/SkOpts_hsw.cpp
index 2a2d814861..fae5c858cd 100644
--- a/src/opts/SkOpts_hsw.cpp
+++ b/src/opts/SkOpts_hsw.cpp
@@ -11,7 +11,6 @@
 #include "src/opts/SkBlitRow_opts.h"
 #include "src/opts/SkRasterPipeline_opts.h"
 #include "src/opts/SkUtils_opts.h"
-#include "src/opts/SkVM_opts.h"
 
 namespace SkOpts {
     void Init_hsw() {
@@ -29,7 +28,5 @@ namespace SkOpts {
         just_return_lowp = (StageFn)SK_OPTS_NS::lowp::just_return;
         start_pipeline_lowp = SK_OPTS_NS::lowp::start_pipeline;
     #undef M
-
-        eval = hsw::eval;
     }
 }
diff --git a/src/opts/SkVM_opts.h b/src/opts/SkVM_opts.h
deleted file mode 100644
index 0655960a2c..0000000000
--- a/src/opts/SkVM_opts.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright 2019 Google Inc.
- *
- * Use of this source code is governed by a BSD-style license that can be
- * found in the LICENSE file.
- */
-
-#ifndef SkVM_opts_DEFINED
-#define SkVM_opts_DEFINED
-
-#include "src/core/SkVM.h"
-
-#include "include/private/SkVx.h"
-
-namespace SK_OPTS_NS {
-
-    inline void eval(const skvm::Program::Instruction insts[], const int ninsts,
-                     const int nregs, const int loop,
-                     int n, void* args[], size_t strides[], const int nargs) {
-        using namespace skvm;
-
-        // We'll operate in SIMT style, knocking off K-size chunks from n while possible.
-    #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
-        constexpr int K = 32;
-    #else
-        constexpr int K = 16;
-    #endif
-        using I32 = skvx::Vec<K, int>;
-        using F32 = skvx::Vec<K, float>;
-        using U32 = skvx::Vec<K, uint32_t>;
-        using  U8 = skvx::Vec<K, uint8_t>;
-
-        using I16x2 = skvx::Vec<2*K,  int16_t>;
-        using U16x2 = skvx::Vec<2*K, uint16_t>;
-
-        union Slot {
-            I32 i32;
-            U32 u32;
-            F32 f32;
-        };
-
-        Slot                     few_regs[16];
-        std::unique_ptr<char[]> many_regs;
-
-        Slot* regs = few_regs;
-
-        if (nregs > (int)SK_ARRAY_COUNT(few_regs)) {
-            // Annoyingly we can't trust that malloc() or new will work with Slot because
-            // the skvx::Vec types may have alignment greater than what they provide.
-            // We'll overallocate one extra register so we can align manually.
-            many_regs.reset(new char[ sizeof(Slot) * (nregs + 1) ]);
-
-            uintptr_t addr = (uintptr_t)many_regs.get();
-            addr += alignof(Slot) -
-                     (addr & (alignof(Slot) - 1));
-            SkASSERT((addr & (alignof(Slot) - 1)) == 0);
-            regs = (Slot*)addr;
-        }
-
-
-        auto r = [&](Reg id) -> Slot& {
-            SkASSERT(0 <= id && id < nregs);
-            return regs[id];
-        };
-        auto arg = [&](int ix) {
-            SkASSERT(0 <= ix && ix < nargs);
-            return args[ix];
-        };
-
-        // Step each argument pointer ahead by its stride a number of times.
-        auto step_args = [&](int times) {
-            // Looping by marching pointers until *arg == nullptr helps the
-            // compiler to keep this loop scalar.  Otherwise it'd create a
-            // rather large and useless autovectorized version.
-            void**        arg    = args;
-            const size_t* stride = strides;
-            for (; *arg; arg++, stride++) {
-                *arg = (void*)( (char*)*arg + times * *stride );
-            }
-            SkASSERT(arg == args + nargs);
-        };
-
-        int start = 0,
-            stride;
-        for ( ; n > 0; start = loop, n -= stride, step_args(stride)) {
-            stride = n >= K ? K : 1;
-
-            for (int i = start; i < ninsts; i++) {
-                skvm::Program::Instruction inst = insts[i];
-
-                // d = op(x,y,z/imm)
-                Reg   d = inst.d,
-                      x = inst.x,
-                      y = inst.y,
-                      z = inst.z;
-                int imm = inst.imm;
-
-                // Ops that interact with memory need to know whether we're stride=1 or stride=K,
-                // but all non-memory ops can run the same code no matter the stride.
-                switch (2*(int)inst.op + (stride == K ? 1 : 0)) {
-
-                #define STRIDE_1(op) case 2*(int)op
-                #define STRIDE_K(op) case 2*(int)op + 1
-                    STRIDE_1(Op::store8 ): memcpy(arg(imm), &r(x).i32, 1); break;
-                    STRIDE_1(Op::store32): memcpy(arg(imm), &r(x).i32, 4); break;
-
-                    STRIDE_K(Op::store8 ): skvx::cast<uint8_t>(r(x).i32).store(arg(imm)); break;
-                    STRIDE_K(Op::store32):                    (r(x).i32).store(arg(imm)); break;
-
-                    STRIDE_1(Op::load8 ): r(d).i32 = 0; memcpy(&r(d).i32, arg(imm), 1); break;
-                    STRIDE_1(Op::load32): r(d).i32 = 0; memcpy(&r(d).i32, arg(imm), 4); break;
-
-                    STRIDE_K(Op::load8 ): r(d).i32 = skvx::cast<int>(U8 ::Load(arg(imm))); break;
-                    STRIDE_K(Op::load32): r(d).i32 =                 I32::Load(arg(imm)) ; break;
-                #undef STRIDE_1
-                #undef STRIDE_K
-
-                    // Ops that don't interact with memory should never care about the stride.
-                #define CASE(op) case 2*(int)op: /*fallthrough*/ case 2*(int)op+1
-                    CASE(Op::splat): r(d).i32 = imm; break;
-
-                    CASE(Op::add_f32): r(d).f32 = r(x).f32 + r(y).f32; break;
-                    CASE(Op::sub_f32): r(d).f32 = r(x).f32 - r(y).f32; break;
-                    CASE(Op::mul_f32): r(d).f32 = r(x).f32 * r(y).f32; break;
-                    CASE(Op::div_f32): r(d).f32 = r(x).f32 / r(y).f32; break;
-
-                    CASE(Op::mad_f32): r(d).f32 = r(x).f32 * r(y).f32 + r(z).f32; break;
-
-                    CASE(Op::add_i32): r(d).i32 = r(x).i32 + r(y).i32; break;
-                    CASE(Op::sub_i32): r(d).i32 = r(x).i32 - r(y).i32; break;
-                    CASE(Op::mul_i32): r(d).i32 = r(x).i32 * r(y).i32; break;
-
-                    CASE(Op::sub_i16x2):
-                        r(d).i32 = skvx::bit_pun<I32>(skvx::bit_pun<I16x2>(r(x).i32) -
-                                                      skvx::bit_pun<I16x2>(r(y).i32) ); break;
-                    CASE(Op::mul_i16x2):
-                        r(d).i32 = skvx::bit_pun<I32>(skvx::bit_pun<I16x2>(r(x).i32) *
-                                                      skvx::bit_pun<I16x2>(r(y).i32) ); break;
-                    CASE(Op::shr_i16x2):
-                        r(d).i32 = skvx::bit_pun<I32>(skvx::bit_pun<U16x2>(r(x).i32) >> imm);
-                        break;
-
-                    CASE(Op::bit_and):   r(d).i32 = r(x).i32 &  r(y).i32; break;
-                    CASE(Op::bit_or ):   r(d).i32 = r(x).i32 |  r(y).i32; break;
-                    CASE(Op::bit_xor):   r(d).i32 = r(x).i32 ^  r(y).i32; break;
-                    CASE(Op::bit_clear): r(d).i32 = r(x).i32 & ~r(y).i32; break;
-
-                    CASE(Op::shl): r(d).i32 = r(x).i32 << imm; break;
-                    CASE(Op::sra): r(d).i32 = r(x).i32 >> imm; break;
-                    CASE(Op::shr): r(d).u32 = r(x).u32 >> imm; break;
-
-                    CASE(Op::extract): r(d).u32 = (r(x).u32 >> imm) & r(y).u32; break;
-                    CASE(Op::pack):    r(d).u32 = r(x).u32 | (r(y).u32 << imm); break;
-
-                    CASE(Op::bytes): {
-                        const U32 table[] = {
-                            0,
-                            (r(x).u32      ) & 0xff,
-                            (r(x).u32 >>  8) & 0xff,
-                            (r(x).u32 >> 16) & 0xff,
-                            (r(x).u32 >> 24) & 0xff,
-                        };
-                        r(d).u32 = table[(imm >>  0) & 0xf] <<  0
-                                 | table[(imm >>  4) & 0xf] <<  8
-                                 | table[(imm >>  8) & 0xf] << 16
-                                 | table[(imm >> 12) & 0xf] << 24;
-                    } break;
-
-                    CASE(Op::to_f32): r(d).f32 = skvx::cast<float>(r(x).i32); break;
-                    CASE(Op::to_i32): r(d).i32 = skvx::cast<int>  (r(x).f32); break;
-                #undef CASE
-                }
-            }
-        }
-    }
-
-}  // namespace SK_OPTS_NS
-
-#endif//SkVM_opts_DEFINED