Revert "Revert "switched SkSL interpreter over to threaded code""

This reverts commit cbdc829ff1.

Bug: skia:
Change-Id: Idb7ad413431e6556bc040369df258655c2349ef2
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/239447
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
This commit is contained in:
Ethan Nicholas 2019-09-05 16:50:52 -04:00 committed by Skia Commit-Bot
parent eedd08576a
commit c70027b151
4 changed files with 1264 additions and 764 deletions

File diff suppressed because it is too large Load Diff

View File

@ -18,11 +18,25 @@ namespace SkSL {
class ExternalValue; class ExternalValue;
struct FunctionDeclaration; struct FunctionDeclaration;
#define VECTOR(name) name, name ## 2, name ## 3, name ## 4 // GCC and Clang support the "labels as values" extension which we need to implement the interpreter
#define VECTOR_MATRIX(name) name, name ## 2, name ## 3, name ## 4, name ## N // using threaded code. Otherwise, we fall back to using a switch statement in a for loop.
#if defined(__GNUC__) || defined(__clang__)
#define SKSLC_THREADED_CODE
using instruction = void*;
#else
using instruction = uint16_t;
#endif
#define VECTOR(name) name ## 4, name ## 3, name ## 2, name
#define VECTOR_MATRIX(name) name ## 4, name ## 3, name ## 2, name, name ## N
enum class ByteCodeInstruction : uint16_t { enum class ByteCodeInstruction : uint16_t {
// B = bool, F = float, I = int, S = signed, U = unsigned // B = bool, F = float, I = int, S = signed, U = unsigned
// All binary VECTOR instructions (kAddF, KSubtractI, kCompareIEQ, etc.) are followed by a byte
// indicating the count, even though it is redundant due to the count appearing in the opcode.
// This is because the original opcodes are lost after we preprocess it into threaded code, and
// we need to still be able to access the count so as to permit the implementation to use opcode
// fallthrough.
VECTOR_MATRIX(kAddF), VECTOR_MATRIX(kAddF),
VECTOR(kAddI), VECTOR(kAddI),
kAndB, kAndB,
@ -37,11 +51,11 @@ enum class ByteCodeInstruction : uint16_t {
VECTOR(kCompareIEQ), VECTOR(kCompareIEQ),
VECTOR(kCompareINEQ), VECTOR(kCompareINEQ),
VECTOR_MATRIX(kCompareFEQ), VECTOR_MATRIX(kCompareFEQ),
VECTOR_MATRIX(kCompareFNEQ),
VECTOR(kCompareFGT), VECTOR(kCompareFGT),
VECTOR(kCompareFGTEQ), VECTOR(kCompareFGTEQ),
VECTOR(kCompareFLT), VECTOR(kCompareFLT),
VECTOR(kCompareFLTEQ), VECTOR(kCompareFLTEQ),
VECTOR_MATRIX(kCompareFNEQ),
VECTOR(kCompareSGT), VECTOR(kCompareSGT),
VECTOR(kCompareSGTEQ), VECTOR(kCompareSGTEQ),
VECTOR(kCompareSLT), VECTOR(kCompareSLT),
@ -53,14 +67,18 @@ enum class ByteCodeInstruction : uint16_t {
VECTOR(kConvertFtoI), VECTOR(kConvertFtoI),
VECTOR(kConvertStoF), VECTOR(kConvertStoF),
VECTOR(kConvertUtoF), VECTOR(kConvertUtoF),
// Followed by a (redundant) byte indicating the count
VECTOR(kCos), VECTOR(kCos),
VECTOR_MATRIX(kDivideF), VECTOR_MATRIX(kDivideF),
VECTOR(kDivideS), VECTOR(kDivideS),
VECTOR(kDivideU), VECTOR(kDivideU),
// Duplicates the top stack value // Duplicates the top stack value. Followed by a (redundant) byte indicating the count.
VECTOR_MATRIX(kDup), VECTOR_MATRIX(kDup),
kInverse2x2, kInverse3x3, kInverse4x4, kInverse2x2,
// kLoad/kLoadGlobal are followed by a byte indicating the local/global slot to load kInverse3x3,
kInverse4x4,
// kLoad/kLoadGlobal are followed by a byte indicating the count, and a byte indicating the
// local/global slot to load
VECTOR(kLoad), VECTOR(kLoad),
VECTOR(kLoadGlobal), VECTOR(kLoadGlobal),
// As kLoad/kLoadGlobal, then a count byte (1-4), and then one byte per swizzle component (0-3). // As kLoad/kLoadGlobal, then a count byte (1-4), and then one byte per swizzle component (0-3).
@ -99,6 +117,7 @@ enum class ByteCodeInstruction : uint16_t {
// Takes a single value from the top of the stack, and converts to a CxR matrix with that value // Takes a single value from the top of the stack, and converts to a CxR matrix with that value
// replicated along the diagonal (and zero elsewhere), per the GLSL matrix construction rules. // replicated along the diagonal (and zero elsewhere), per the GLSL matrix construction rules.
kScalarToMatrix, kScalarToMatrix,
// Followed by a (redundant) byte indicating the count
VECTOR(kSin), VECTOR(kSin),
VECTOR(kSqrt), VECTOR(kSqrt),
// kStore/kStoreGlobal are followed by a byte indicating the local/global slot to store // kStore/kStoreGlobal are followed by a byte indicating the local/global slot to store
@ -121,6 +140,7 @@ enum class ByteCodeInstruction : uint16_t {
kSwizzle, kSwizzle,
VECTOR_MATRIX(kSubtractF), VECTOR_MATRIX(kSubtractF),
VECTOR(kSubtractI), VECTOR(kSubtractI),
// Followed by a (redundant) byte indicating the count
VECTOR(kTan), VECTOR(kTan),
// Followed by a byte indicating external value to write // Followed by a byte indicating external value to write
VECTOR(kWriteExternal), VECTOR(kWriteExternal),
@ -160,12 +180,18 @@ struct ByteCodeFunction {
int fConditionCount = 0; int fConditionCount = 0;
int fLoopCount = 0; int fLoopCount = 0;
int fReturnCount = 0; int fReturnCount = 0;
bool fPreprocessed = 0;
std::vector<uint8_t> fCode; std::vector<uint8_t> fCode;
/** /**
* Print bytecode disassembly to stdout. * Print bytecode disassembly to stdout.
*/ */
void disassemble() const; void disassemble() const;
/**
* Replace each opcode with the corresponding entry from the labels array.
*/
void preprocess(const void* labels[]);
}; };
struct SK_API ByteCode { struct SK_API ByteCode {

View File

@ -441,6 +441,7 @@ int ByteCodeGenerator::getLocation(const Expression& expr, Variable::Storage* st
this->write(ByteCodeInstruction::kPushImmediate); this->write(ByteCodeInstruction::kPushImmediate);
this->write32(offset); this->write32(offset);
this->write(ByteCodeInstruction::kAddI); this->write(ByteCodeInstruction::kAddI);
this->write8(1);
} }
return -1; return -1;
} else { } else {
@ -475,6 +476,7 @@ int ByteCodeGenerator::getLocation(const Expression& expr, Variable::Storage* st
this->write(ByteCodeInstruction::kPushImmediate); this->write(ByteCodeInstruction::kPushImmediate);
this->write32(stride); this->write32(stride);
this->write(ByteCodeInstruction::kMultiplyI); this->write(ByteCodeInstruction::kMultiplyI);
this->write8(1);
} }
} }
int baseAddr = this->getLocation(*i.fBase, storage); int baseAddr = this->getLocation(*i.fBase, storage);
@ -501,6 +503,7 @@ int ByteCodeGenerator::getLocation(const Expression& expr, Variable::Storage* st
this->write32(offset); this->write32(offset);
} }
this->write(ByteCodeInstruction::kAddI); this->write(ByteCodeInstruction::kAddI);
this->write8(1);
return -1; return -1;
} }
case Expression::kSwizzle_Kind: { case Expression::kSwizzle_Kind: {
@ -513,6 +516,7 @@ int ByteCodeGenerator::getLocation(const Expression& expr, Variable::Storage* st
this->write(ByteCodeInstruction::kPushImmediate); this->write(ByteCodeInstruction::kPushImmediate);
this->write32(offset); this->write32(offset);
this->write(ByteCodeInstruction::kAddI); this->write(ByteCodeInstruction::kAddI);
this->write8(1);
} }
return -1; return -1;
} else { } else {
@ -556,19 +560,22 @@ void ByteCodeGenerator::write(ByteCodeInstruction i, int count) {
case ByteCodeInstruction::kMaskBlend: this->exitCondition(); break; case ByteCodeInstruction::kMaskBlend: this->exitCondition(); break;
default: /* Do nothing */ break; default: /* Do nothing */ break;
} }
this->write16((uint16_t)i); instruction val = (instruction) i;
size_t n = fCode->size();
fCode->resize(n + sizeof(val));
memcpy(fCode->data() + n, &val, sizeof(val));
fStackCount += StackUsage(i, count); fStackCount += StackUsage(i, count);
fMaxStackCount = std::max(fMaxStackCount, fStackCount); fMaxStackCount = std::max(fMaxStackCount, fStackCount);
} }
static ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) { static ByteCodeInstruction vector_instruction(ByteCodeInstruction base, int count) {
SkASSERT(count >= 1 && count <= 4); SkASSERT(count >= 1 && count <= 4);
return ((ByteCodeInstruction) ((int) base + count - 1)); return ((ByteCodeInstruction) ((int) base + 1 - count));
} }
void ByteCodeGenerator::writeTypedInstruction(const Type& type, ByteCodeInstruction s, void ByteCodeGenerator::writeTypedInstruction(const Type& type, ByteCodeInstruction s,
ByteCodeInstruction u, ByteCodeInstruction f, ByteCodeInstruction u, ByteCodeInstruction f,
int count) { int count, bool writeCount) {
switch (type_category(type)) { switch (type_category(type)) {
case TypeCategory::kSigned: case TypeCategory::kSigned:
this->write(vector_instruction(s, count)); this->write(vector_instruction(s, count));
@ -578,8 +585,7 @@ void ByteCodeGenerator::writeTypedInstruction(const Type& type, ByteCodeInstruct
break; break;
case TypeCategory::kFloat: { case TypeCategory::kFloat: {
if (count > 4) { if (count > 4) {
this->write((ByteCodeInstruction)((int)f + 4), count); this->write((ByteCodeInstruction)((int)f + 1), count);
this->write8(count);
} else { } else {
this->write(vector_instruction(f, count)); this->write(vector_instruction(f, count));
} }
@ -588,6 +594,9 @@ void ByteCodeGenerator::writeTypedInstruction(const Type& type, ByteCodeInstruct
default: default:
SkASSERT(false); SkASSERT(false);
} }
if (writeCount) {
this->write8(count);
}
} }
bool ByteCodeGenerator::writeBinaryExpression(const BinaryExpression& b, bool discard) { bool ByteCodeGenerator::writeBinaryExpression(const BinaryExpression& b, bool discard) {
@ -614,6 +623,7 @@ bool ByteCodeGenerator::writeBinaryExpression(const BinaryExpression& b, bool di
if (!lVecOrMtx && rVecOrMtx) { if (!lVecOrMtx && rVecOrMtx) {
for (int i = SlotCount(rType); i > 1; --i) { for (int i = SlotCount(rType); i > 1; --i) {
this->write(ByteCodeInstruction::kDup); this->write(ByteCodeInstruction::kDup);
this->write8(1);
} }
} }
} }
@ -621,6 +631,7 @@ bool ByteCodeGenerator::writeBinaryExpression(const BinaryExpression& b, bool di
if (lVecOrMtx && !rVecOrMtx) { if (lVecOrMtx && !rVecOrMtx) {
for (int i = SlotCount(lType); i > 1; --i) { for (int i = SlotCount(lType); i > 1; --i) {
this->write(ByteCodeInstruction::kDup); this->write(ByteCodeInstruction::kDup);
this->write8(1);
} }
} }
// Special case for M*V, V*M, M*M (but not V*V!) // Special case for M*V, V*M, M*M (but not V*V!)
@ -799,6 +810,7 @@ void ByteCodeGenerator::writeConstructor(const Constructor& c) {
SkASSERT(outType.kind() == Type::kVector_Kind); SkASSERT(outType.kind() == Type::kVector_Kind);
for (; inCount != outCount; ++inCount) { for (; inCount != outCount; ++inCount) {
this->write(ByteCodeInstruction::kDup); this->write(ByteCodeInstruction::kDup);
this->write8(1);
} }
} }
} }
@ -822,8 +834,9 @@ void ByteCodeGenerator::writeExternalFunctionCall(const ExternalFunctionCall& f)
} }
void ByteCodeGenerator::writeExternalValue(const ExternalValueReference& e) { void ByteCodeGenerator::writeExternalValue(const ExternalValueReference& e) {
this->write(vector_instruction(ByteCodeInstruction::kReadExternal, int count = SlotCount(e.fValue->type());
SlotCount(e.fValue->type()))); this->write(vector_instruction(ByteCodeInstruction::kReadExternal, count));
this->write8(count);
int index = fOutput->fExternalValues.size(); int index = fOutput->fExternalValues.size();
fOutput->fExternalValues.push_back(e.fValue); fOutput->fExternalValues.push_back(e.fValue);
SkASSERT(index <= 255); SkASSERT(index <= 255);
@ -848,6 +861,7 @@ void ByteCodeGenerator::writeVariableExpression(const Expression& expr) {
this->write(vector_instruction(isGlobal ? ByteCodeInstruction::kLoadGlobal this->write(vector_instruction(isGlobal ? ByteCodeInstruction::kLoadGlobal
: ByteCodeInstruction::kLoad, : ByteCodeInstruction::kLoad,
count)); count));
this->write8(count);
this->write8(location); this->write8(location);
} }
} }
@ -876,9 +890,11 @@ void ByteCodeGenerator::writeIntrinsicCall(const FunctionCall& c) {
case SpecialIntrinsic::kDot: { case SpecialIntrinsic::kDot: {
SkASSERT(c.fArguments.size() == 2); SkASSERT(c.fArguments.size() == 2);
SkASSERT(count == SlotCount(c.fArguments[1]->fType)); SkASSERT(count == SlotCount(c.fArguments[1]->fType));
this->write((ByteCodeInstruction)((int)ByteCodeInstruction::kMultiplyF + count-1)); this->write(vector_instruction(ByteCodeInstruction::kMultiplyF, count));
this->write8(count);
for (int i = count; i > 1; --i) { for (int i = count; i > 1; --i) {
this->write(ByteCodeInstruction::kAddF); this->write(ByteCodeInstruction::kAddF);
this->write8(1);
} }
break; break;
} }
@ -889,11 +905,14 @@ void ByteCodeGenerator::writeIntrinsicCall(const FunctionCall& c) {
switch (found->second.fValue.fInstruction) { switch (found->second.fValue.fInstruction) {
case ByteCodeInstruction::kCos: case ByteCodeInstruction::kCos:
case ByteCodeInstruction::kSin: case ByteCodeInstruction::kSin:
case ByteCodeInstruction::kSqrt:
case ByteCodeInstruction::kTan: case ByteCodeInstruction::kTan:
SkASSERT(c.fArguments.size() > 0); SkASSERT(c.fArguments.size() > 0);
this->write((ByteCodeInstruction) ((int) found->second.fValue.fInstruction + this->write(vector_instruction(found->second.fValue.fInstruction, count));
count - 1)); this->write8(count);
break;
case ByteCodeInstruction::kSqrt:
SkASSERT(c.fArguments.size() > 0);
this->write(vector_instruction(found->second.fValue.fInstruction, count));
break; break;
case ByteCodeInstruction::kInverse2x2: { case ByteCodeInstruction::kInverse2x2: {
SkASSERT(c.fArguments.size() > 0); SkASSERT(c.fArguments.size() > 0);
@ -1039,7 +1058,8 @@ bool ByteCodeGenerator::writePrefixExpression(const PrefixExpression& p, bool di
ByteCodeInstruction::kNegateI, ByteCodeInstruction::kNegateI,
ByteCodeInstruction::kNegateI, ByteCodeInstruction::kNegateI,
ByteCodeInstruction::kNegateF, ByteCodeInstruction::kNegateF,
SlotCount(p.fOperand->fType)); SlotCount(p.fOperand->fType),
false);
break; break;
} }
default: default:
@ -1058,6 +1078,7 @@ bool ByteCodeGenerator::writePostfixExpression(const PostfixExpression& p, bool
// If we're not supposed to discard the result, then make a copy *before* the +/- // If we're not supposed to discard the result, then make a copy *before* the +/-
if (!discard) { if (!discard) {
this->write(ByteCodeInstruction::kDup); this->write(ByteCodeInstruction::kDup);
this->write8(1);
} }
this->write(ByteCodeInstruction::kPushImmediate); this->write(ByteCodeInstruction::kPushImmediate);
this->write32(type_category(p.fType) == TypeCategory::kFloat ? float_to_bits(1.0f) : 1); this->write32(type_category(p.fType) == TypeCategory::kFloat ? float_to_bits(1.0f) : 1);
@ -1202,14 +1223,17 @@ public:
void load() override { void load() override {
fGenerator.write(vector_instruction(ByteCodeInstruction::kReadExternal, fCount)); fGenerator.write(vector_instruction(ByteCodeInstruction::kReadExternal, fCount));
fGenerator.write8(fCount);
fGenerator.write8(fIndex); fGenerator.write8(fIndex);
} }
void store(bool discard) override { void store(bool discard) override {
if (!discard) { if (!discard) {
fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, fCount)); fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, fCount));
fGenerator.write8(fCount);
} }
fGenerator.write(vector_instruction(ByteCodeInstruction::kWriteExternal, fCount)); fGenerator.write(vector_instruction(ByteCodeInstruction::kWriteExternal, fCount));
fGenerator.write8(fCount);
fGenerator.write8(fIndex); fGenerator.write8(fIndex);
} }
@ -1235,6 +1259,7 @@ public:
int count = fSwizzle.fComponents.size(); int count = fSwizzle.fComponents.size();
if (!discard) { if (!discard) {
fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, count)); fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, count));
fGenerator.write8(count);
} }
Variable::Storage storage = Variable::kLocal_Storage; Variable::Storage storage = Variable::kLocal_Storage;
int location = fGenerator.getLocation(*fSwizzle.fBase, &storage); int location = fGenerator.getLocation(*fSwizzle.fBase, &storage);
@ -1279,6 +1304,7 @@ public:
fGenerator.write8(count); fGenerator.write8(count);
} else { } else {
fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, count)); fGenerator.write(vector_instruction(ByteCodeInstruction::kDup, count));
fGenerator.write8(count);
} }
} }
Variable::Storage storage = Variable::kLocal_Storage; Variable::Storage storage = Variable::kLocal_Storage;

View File

@ -94,7 +94,7 @@ public:
* Based on 'type', writes the s (signed), u (unsigned), or f (float) instruction. * Based on 'type', writes the s (signed), u (unsigned), or f (float) instruction.
*/ */
void writeTypedInstruction(const Type& type, ByteCodeInstruction s, ByteCodeInstruction u, void writeTypedInstruction(const Type& type, ByteCodeInstruction s, ByteCodeInstruction u,
ByteCodeInstruction f, int count); ByteCodeInstruction f, int count, bool writeCount = true);
static int SlotCount(const Type& type); static int SlotCount(const Type& type);