From e5288369c89e85b4530e2c96efee7b70270d3f2f Mon Sep 17 00:00:00 2001 From: Mike Klein Date: Fri, 17 May 2019 11:57:10 -0500 Subject: [PATCH] byte align everything in SkSLInterpreter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's nicer to write code without having to think about alignment, and this appears to be faster too: $ ninja -C out nanobench && out/nanobench --config 8888 -m GM_runtime_cf_interp_1 --loops 0 Before: 24/24 MB 1 18.4ms 18.5ms 18.5ms 18.6ms 0% █▆▅▅▅▁▅▅▅▅ 8888 GM_runtime_cf_interp_1 After: 23/23 MB 1 16.6ms 16.6ms 16.6ms 16.7ms 0% ▁▁▃█▅▂▁▁█▅ 8888 GM_runtime_cf_interp_1 While byte-aligning things I noticed the write16 and write32 calls could do all their bytes at once, in one call to resize() instead of 2-4 calls push_back. Looking at that disassembly, I noticed vector_instruction can be static. Change-Id: I22985b49d6745797da10bbd6b6f2002a7618f2ae Reviewed-on: https://skia-review.googlesource.com/c/skia/+/214338 Reviewed-by: Brian Osman Reviewed-by: Ethan Nicholas Commit-Queue: Mike Klein --- src/sksl/SkSLByteCode.h | 3 -- src/sksl/SkSLByteCodeGenerator.cpp | 44 +++++------------------------- src/sksl/SkSLByteCodeGenerator.h | 2 -- src/sksl/SkSLInterpreter.cpp | 27 ++++++------------ 4 files changed, 15 insertions(+), 61 deletions(-) diff --git a/src/sksl/SkSLByteCode.h b/src/sksl/SkSLByteCode.h index 00b50f8ee3..ca6e77f5c9 100644 --- a/src/sksl/SkSLByteCode.h +++ b/src/sksl/SkSLByteCode.h @@ -22,9 +22,6 @@ struct FunctionDeclaration; #define VECTOR(name) name, name ## 2, name ## 3, name ## 4 enum class ByteCodeInstruction : uint8_t { kInvalid, - kNop1, - kNop2, - kNop3, // B = bool, F = float, I = int, S = signed, U = unsigned VECTOR(kAddF), VECTOR(kAddI), diff --git a/src/sksl/SkSLByteCodeGenerator.cpp b/src/sksl/SkSLByteCodeGenerator.cpp index 5c877a7e9c..91a2d70cd3 100644 --- a/src/sksl/SkSLByteCodeGenerator.cpp +++ b/src/sksl/SkSLByteCodeGenerator.cpp @@ -161,40 +161,27 @@ int ByteCodeGenerator::getLocation(const Variable& var) { } } -void ByteCodeGenerator::align(int divisor, int remainder) { - switch (remainder - (int) fCode->size() % divisor) { - case 0: return; - case 3: this->write(ByteCodeInstruction::kNop3); // fall through - case 2: this->write(ByteCodeInstruction::kNop2); // fall through - case 1: this->write(ByteCodeInstruction::kNop1); - break; - default: SkASSERT(false); - } -} - void ByteCodeGenerator::write8(uint8_t b) { fCode->push_back(b); } void ByteCodeGenerator::write16(uint16_t i) { - SkASSERT(fCode->size() % 2 == 0); - this->write8(i >> 0); - this->write8(i >> 8); + size_t n = fCode->size(); + fCode->resize(n+2); + memcpy(fCode->data() + n, &i, 2); } void ByteCodeGenerator::write32(uint32_t i) { - SkASSERT(fCode->size() % 4 == 0); - this->write8((i >> 0) & 0xFF); - this->write8((i >> 8) & 0xFF); - this->write8((i >> 16) & 0xFF); - this->write8((i >> 24) & 0xFF); + size_t n = fCode->size(); + fCode->resize(n+4); + memcpy(fCode->data() + n, &i, 4); } void ByteCodeGenerator::write(ByteCodeInstruction i) { this->write8((uint8_t) i); } -ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) { +static ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) { return ((ByteCodeInstruction) ((int) base + count - 1)); } @@ -323,7 +310,6 @@ void ByteCodeGenerator::writeBinaryExpression(const BinaryExpression& b) { } void ByteCodeGenerator::writeBoolLiteral(const BoolLiteral& b) { - this->align(4, 3); this->write(ByteCodeInstruction::kPushImmediate); this->write32(b.fValue ? 1 : 0); } @@ -378,7 +364,6 @@ void ByteCodeGenerator::writeFieldAccess(const FieldAccess& f) { } void ByteCodeGenerator::writeFloatLiteral(const FloatLiteral& f) { - this->align(4, 3); this->write(ByteCodeInstruction::kPushImmediate); this->write32(Interpreter::Value((float) f.fValue).fUnsigned); } @@ -397,7 +382,6 @@ void ByteCodeGenerator::writeIndexExpression(const IndexExpression& i) { } void ByteCodeGenerator::writeIntLiteral(const IntLiteral& i) { - this->align(4, 3); this->write(ByteCodeInstruction::kPushImmediate); this->write32(i.fValue); } @@ -414,7 +398,6 @@ void ByteCodeGenerator::writePrefixExpression(const PrefixExpression& p) { SkASSERT(slot_count(p.fOperand->fType) == 1); std::unique_ptr lvalue = this->getLValue(*p.fOperand); lvalue->load(); - this->align(4, 3); this->write(ByteCodeInstruction::kPushImmediate); this->write32(type_category(p.fType) == TypeCategory::kFloat ? Interpreter::Value(1.0f).fUnsigned : 1); @@ -518,11 +501,9 @@ void ByteCodeGenerator::writeVariableReference(const VariableReference& v) { void ByteCodeGenerator::writeTernaryExpression(const TernaryExpression& t) { this->writeExpression(*t.fTest); - this->align(2, 1); this->write(ByteCodeInstruction::kConditionalBranch); DeferredLocation trueLocation(this); this->writeExpression(*t.fIfFalse); - this->align(2, 1); this->write(ByteCodeInstruction::kBranch); DeferredLocation endLocation(this); trueLocation.set(); @@ -720,13 +701,11 @@ void ByteCodeGenerator::setContinueTargets() { } void ByteCodeGenerator::writeBreakStatement(const BreakStatement& b) { - this->align(2, 1); this->write(ByteCodeInstruction::kBranch); fBreakTargets.top().emplace_back(this); } void ByteCodeGenerator::writeContinueStatement(const ContinueStatement& c) { - this->align(2, 1); this->write(ByteCodeInstruction::kBranch); fContinueTargets.top().emplace_back(this); } @@ -738,7 +717,6 @@ void ByteCodeGenerator::writeDoStatement(const DoStatement& d) { this->writeStatement(*d.fStatement); this->setContinueTargets(); this->writeExpression(*d.fTest); - this->align(2, 1); this->write(ByteCodeInstruction::kConditionalBranch); this->write16(start); this->setBreakTargets(); @@ -754,7 +732,6 @@ void ByteCodeGenerator::writeForStatement(const ForStatement& f) { if (f.fTest) { this->writeExpression(*f.fTest); this->write(ByteCodeInstruction::kNot); - this->align(2, 1); this->write(ByteCodeInstruction::kConditionalBranch); DeferredLocation endLocation(this); this->writeStatement(*f.fStatement); @@ -763,7 +740,6 @@ void ByteCodeGenerator::writeForStatement(const ForStatement& f) { this->writeExpression(*f.fNext); this->write(vector_instruction(ByteCodeInstruction::kPop, slot_count(f.fNext->fType))); } - this->align(2, 1); this->write(ByteCodeInstruction::kBranch); this->write16(start); endLocation.set(); @@ -774,7 +750,6 @@ void ByteCodeGenerator::writeForStatement(const ForStatement& f) { this->writeExpression(*f.fNext); this->write(vector_instruction(ByteCodeInstruction::kPop, slot_count(f.fNext->fType))); } - this->align(2, 1); this->write(ByteCodeInstruction::kBranch); this->write16(start); } @@ -785,11 +760,9 @@ void ByteCodeGenerator::writeIfStatement(const IfStatement& i) { if (i.fIfFalse) { // if (test) { ..ifTrue.. } else { .. ifFalse .. } this->writeExpression(*i.fTest); - this->align(2, 1); this->write(ByteCodeInstruction::kConditionalBranch); DeferredLocation trueLocation(this); this->writeStatement(*i.fIfFalse); - this->align(2, 1); this->write(ByteCodeInstruction::kBranch); DeferredLocation endLocation(this); trueLocation.set(); @@ -799,7 +772,6 @@ void ByteCodeGenerator::writeIfStatement(const IfStatement& i) { // if (test) { ..ifTrue.. } this->writeExpression(*i.fTest); this->write(ByteCodeInstruction::kNot); - this->align(2, 1); this->write(ByteCodeInstruction::kConditionalBranch); DeferredLocation endLocation(this); this->writeStatement(*i.fIfTrue); @@ -839,12 +811,10 @@ void ByteCodeGenerator::writeWhileStatement(const WhileStatement& w) { size_t start = fCode->size(); this->writeExpression(*w.fTest); this->write(ByteCodeInstruction::kNot); - this->align(2, 1); this->write(ByteCodeInstruction::kConditionalBranch); DeferredLocation endLocation(this); this->writeStatement(*w.fStatement); this->setContinueTargets(); - this->align(2, 1); this->write(ByteCodeInstruction::kBranch); this->write16(start); endLocation.set(); diff --git a/src/sksl/SkSLByteCodeGenerator.h b/src/sksl/SkSLByteCodeGenerator.h index 170f476297..240aca1f20 100644 --- a/src/sksl/SkSLByteCodeGenerator.h +++ b/src/sksl/SkSLByteCodeGenerator.h @@ -84,8 +84,6 @@ public: bool generateCode() override; - void align(int divisor, int remainder); - void write8(uint8_t b); void write16(uint16_t b); diff --git a/src/sksl/SkSLInterpreter.cpp b/src/sksl/SkSLInterpreter.cpp index 293a4cd21c..fc79e97045 100644 --- a/src/sksl/SkSLInterpreter.cpp +++ b/src/sksl/SkSLInterpreter.cpp @@ -87,15 +87,15 @@ struct CallbackCtx : public SkRasterPipeline_CallbackCtx { #define READ8() (*(ip++)) -#define READ16() \ - (SkASSERT((intptr_t) ip % 2 == 0), \ - ip += 2, \ - *(uint16_t*) (ip - 2)) +template +static T unaligned_load(const void* ptr) { + T val; + memcpy(&val, ptr, sizeof(val)); + return val; +} -#define READ32() \ - (SkASSERT((intptr_t) ip % 4 == 0), \ - ip += 4, \ - *(uint32_t*) (ip - 4)) +#define READ16() (ip += 2, unaligned_load(ip - 2)) +#define READ32() (ip += 4, unaligned_load(ip - 4)) static String value_string(uint32_t v) { union { uint32_t u; float f; } pun = { v }; @@ -174,9 +174,6 @@ void Interpreter::disassemble(const ByteCodeFunction& f) { VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi") VECTOR_DISASSEMBLE(kNegateF, "negatef") VECTOR_DISASSEMBLE(kNegateS, "negates") - case ByteCodeInstruction::kNop1: printf("nop1"); break; - case ByteCodeInstruction::kNop2: printf("nop2"); break; - case ByteCodeInstruction::kNop3: printf("nop3"); break; VECTOR_DISASSEMBLE(kNot, "not") VECTOR_DISASSEMBLE(kOrB, "orb") VECTOR_DISASSEMBLE(kOrI, "ori") @@ -452,14 +449,6 @@ void Interpreter::run(const ByteCodeFunction& f, Value* stack, Value args[], Val case ByteCodeInstruction::kNegateS: sp[0].fSigned = -sp[0].fSigned; break; - case ByteCodeInstruction::kNop1: - continue; - case ByteCodeInstruction::kNop2: - ++ip; - continue; - case ByteCodeInstruction::kNop3: - ip += 2; - continue; case ByteCodeInstruction::kPop: POP(); break;