byte align everything in SkSLInterpreter

It's nicer to write code without having to think about alignment,
and this appears to be faster too:

    $ ninja -C out nanobench && out/nanobench --config 8888 -m GM_runtime_cf_interp_1 --loops 0
    Before:  24/24  MB	1	18.4ms	18.5ms	18.5ms	18.6ms	0%	█▆▅▅▅▁▅▅▅▅	8888	GM_runtime_cf_interp_1
    After:   23/23  MB	1	16.6ms	16.6ms	16.6ms	16.7ms	0%	▁▁▃█▅▂▁▁█▅	8888	GM_runtime_cf_interp_1

While byte-aligning things I noticed the write16 and write32 calls could
do all their bytes at once, in one call to resize() instead of 2-4 calls
push_back.

Looking at that disassembly, I noticed vector_instruction can be static.

Change-Id: I22985b49d6745797da10bbd6b6f2002a7618f2ae
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/214338
Reviewed-by: Brian Osman <brianosman@google.com>
Reviewed-by: Ethan Nicholas <ethannicholas@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
This commit is contained in:
Mike Klein 2019-05-17 11:57:10 -05:00 committed by Skia Commit-Bot
parent 63af31a0ab
commit e5288369c8
4 changed files with 15 additions and 61 deletions

View File

@ -22,9 +22,6 @@ struct FunctionDeclaration;
#define VECTOR(name) name, name ## 2, name ## 3, name ## 4
enum class ByteCodeInstruction : uint8_t {
kInvalid,
kNop1,
kNop2,
kNop3,
// B = bool, F = float, I = int, S = signed, U = unsigned
VECTOR(kAddF),
VECTOR(kAddI),

View File

@ -161,40 +161,27 @@ int ByteCodeGenerator::getLocation(const Variable& var) {
}
}
void ByteCodeGenerator::align(int divisor, int remainder) {
switch (remainder - (int) fCode->size() % divisor) {
case 0: return;
case 3: this->write(ByteCodeInstruction::kNop3); // fall through
case 2: this->write(ByteCodeInstruction::kNop2); // fall through
case 1: this->write(ByteCodeInstruction::kNop1);
break;
default: SkASSERT(false);
}
}
void ByteCodeGenerator::write8(uint8_t b) {
fCode->push_back(b);
}
void ByteCodeGenerator::write16(uint16_t i) {
SkASSERT(fCode->size() % 2 == 0);
this->write8(i >> 0);
this->write8(i >> 8);
size_t n = fCode->size();
fCode->resize(n+2);
memcpy(fCode->data() + n, &i, 2);
}
void ByteCodeGenerator::write32(uint32_t i) {
SkASSERT(fCode->size() % 4 == 0);
this->write8((i >> 0) & 0xFF);
this->write8((i >> 8) & 0xFF);
this->write8((i >> 16) & 0xFF);
this->write8((i >> 24) & 0xFF);
size_t n = fCode->size();
fCode->resize(n+4);
memcpy(fCode->data() + n, &i, 4);
}
void ByteCodeGenerator::write(ByteCodeInstruction i) {
this->write8((uint8_t) i);
}
ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) {
static ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) {
return ((ByteCodeInstruction) ((int) base + count - 1));
}
@ -323,7 +310,6 @@ void ByteCodeGenerator::writeBinaryExpression(const BinaryExpression& b) {
}
void ByteCodeGenerator::writeBoolLiteral(const BoolLiteral& b) {
this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(b.fValue ? 1 : 0);
}
@ -378,7 +364,6 @@ void ByteCodeGenerator::writeFieldAccess(const FieldAccess& f) {
}
void ByteCodeGenerator::writeFloatLiteral(const FloatLiteral& f) {
this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(Interpreter::Value((float) f.fValue).fUnsigned);
}
@ -397,7 +382,6 @@ void ByteCodeGenerator::writeIndexExpression(const IndexExpression& i) {
}
void ByteCodeGenerator::writeIntLiteral(const IntLiteral& i) {
this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(i.fValue);
}
@ -414,7 +398,6 @@ void ByteCodeGenerator::writePrefixExpression(const PrefixExpression& p) {
SkASSERT(slot_count(p.fOperand->fType) == 1);
std::unique_ptr<LValue> lvalue = this->getLValue(*p.fOperand);
lvalue->load();
this->align(4, 3);
this->write(ByteCodeInstruction::kPushImmediate);
this->write32(type_category(p.fType) == TypeCategory::kFloat
? Interpreter::Value(1.0f).fUnsigned : 1);
@ -518,11 +501,9 @@ void ByteCodeGenerator::writeVariableReference(const VariableReference& v) {
void ByteCodeGenerator::writeTernaryExpression(const TernaryExpression& t) {
this->writeExpression(*t.fTest);
this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation trueLocation(this);
this->writeExpression(*t.fIfFalse);
this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
DeferredLocation endLocation(this);
trueLocation.set();
@ -720,13 +701,11 @@ void ByteCodeGenerator::setContinueTargets() {
}
void ByteCodeGenerator::writeBreakStatement(const BreakStatement& b) {
this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
fBreakTargets.top().emplace_back(this);
}
void ByteCodeGenerator::writeContinueStatement(const ContinueStatement& c) {
this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
fContinueTargets.top().emplace_back(this);
}
@ -738,7 +717,6 @@ void ByteCodeGenerator::writeDoStatement(const DoStatement& d) {
this->writeStatement(*d.fStatement);
this->setContinueTargets();
this->writeExpression(*d.fTest);
this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
this->write16(start);
this->setBreakTargets();
@ -754,7 +732,6 @@ void ByteCodeGenerator::writeForStatement(const ForStatement& f) {
if (f.fTest) {
this->writeExpression(*f.fTest);
this->write(ByteCodeInstruction::kNot);
this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation endLocation(this);
this->writeStatement(*f.fStatement);
@ -763,7 +740,6 @@ void ByteCodeGenerator::writeForStatement(const ForStatement& f) {
this->writeExpression(*f.fNext);
this->write(vector_instruction(ByteCodeInstruction::kPop, slot_count(f.fNext->fType)));
}
this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
this->write16(start);
endLocation.set();
@ -774,7 +750,6 @@ void ByteCodeGenerator::writeForStatement(const ForStatement& f) {
this->writeExpression(*f.fNext);
this->write(vector_instruction(ByteCodeInstruction::kPop, slot_count(f.fNext->fType)));
}
this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
this->write16(start);
}
@ -785,11 +760,9 @@ void ByteCodeGenerator::writeIfStatement(const IfStatement& i) {
if (i.fIfFalse) {
// if (test) { ..ifTrue.. } else { .. ifFalse .. }
this->writeExpression(*i.fTest);
this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation trueLocation(this);
this->writeStatement(*i.fIfFalse);
this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
DeferredLocation endLocation(this);
trueLocation.set();
@ -799,7 +772,6 @@ void ByteCodeGenerator::writeIfStatement(const IfStatement& i) {
// if (test) { ..ifTrue.. }
this->writeExpression(*i.fTest);
this->write(ByteCodeInstruction::kNot);
this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation endLocation(this);
this->writeStatement(*i.fIfTrue);
@ -839,12 +811,10 @@ void ByteCodeGenerator::writeWhileStatement(const WhileStatement& w) {
size_t start = fCode->size();
this->writeExpression(*w.fTest);
this->write(ByteCodeInstruction::kNot);
this->align(2, 1);
this->write(ByteCodeInstruction::kConditionalBranch);
DeferredLocation endLocation(this);
this->writeStatement(*w.fStatement);
this->setContinueTargets();
this->align(2, 1);
this->write(ByteCodeInstruction::kBranch);
this->write16(start);
endLocation.set();

View File

@ -84,8 +84,6 @@ public:
bool generateCode() override;
void align(int divisor, int remainder);
void write8(uint8_t b);
void write16(uint16_t b);

View File

@ -87,15 +87,15 @@ struct CallbackCtx : public SkRasterPipeline_CallbackCtx {
#define READ8() (*(ip++))
#define READ16() \
(SkASSERT((intptr_t) ip % 2 == 0), \
ip += 2, \
*(uint16_t*) (ip - 2))
template <typename T>
static T unaligned_load(const void* ptr) {
T val;
memcpy(&val, ptr, sizeof(val));
return val;
}
#define READ32() \
(SkASSERT((intptr_t) ip % 4 == 0), \
ip += 4, \
*(uint32_t*) (ip - 4))
#define READ16() (ip += 2, unaligned_load<uint16_t>(ip - 2))
#define READ32() (ip += 4, unaligned_load<uint32_t>(ip - 4))
static String value_string(uint32_t v) {
union { uint32_t u; float f; } pun = { v };
@ -174,9 +174,6 @@ void Interpreter::disassemble(const ByteCodeFunction& f) {
VECTOR_DISASSEMBLE(kMultiplyI, "multiplyi")
VECTOR_DISASSEMBLE(kNegateF, "negatef")
VECTOR_DISASSEMBLE(kNegateS, "negates")
case ByteCodeInstruction::kNop1: printf("nop1"); break;
case ByteCodeInstruction::kNop2: printf("nop2"); break;
case ByteCodeInstruction::kNop3: printf("nop3"); break;
VECTOR_DISASSEMBLE(kNot, "not")
VECTOR_DISASSEMBLE(kOrB, "orb")
VECTOR_DISASSEMBLE(kOrI, "ori")
@ -452,14 +449,6 @@ void Interpreter::run(const ByteCodeFunction& f, Value* stack, Value args[], Val
case ByteCodeInstruction::kNegateS:
sp[0].fSigned = -sp[0].fSigned;
break;
case ByteCodeInstruction::kNop1:
continue;
case ByteCodeInstruction::kNop2:
++ip;
continue;
case ByteCodeInstruction::kNop3:
ip += 2;
continue;
case ByteCodeInstruction::kPop:
POP();
break;