mirror of
https://github.com/KhronosGroup/SPIRV-Tools
synced 2024-11-26 13:20:05 +00:00
parent
64f36ea529
commit
9215c1b7df
@ -685,9 +685,9 @@ Optimizer::PassToken CreateSSARewritePass();
|
||||
// any resulting half precision values back to float32 as needed. No variables
|
||||
// are changed. No image operations are changed.
|
||||
//
|
||||
// Best if run late since it will generate better code with unneeded function
|
||||
// scope loads and stores and composite inserts and extracts removed. Also best
|
||||
// if followed by instruction simplification, redundancy elimination and DCE.
|
||||
// Best if run after function scope store/load and composite operation
|
||||
// eliminations are run. Also best if followed by instruction simplification,
|
||||
// redundancy elimination and DCE.
|
||||
Optimizer::PassToken CreateConvertRelaxedToHalfPass();
|
||||
|
||||
// Create relax float ops pass.
|
||||
|
@ -42,7 +42,7 @@ bool ConvertToHalfPass::IsFloat(Instruction* inst, uint32_t width) {
|
||||
return Pass::IsFloat(ty_id, width);
|
||||
}
|
||||
|
||||
bool ConvertToHalfPass::IsRelaxed(Instruction* inst) {
|
||||
bool ConvertToHalfPass::IsDecoratedRelaxed(Instruction* inst) {
|
||||
uint32_t r_id = inst->result_id();
|
||||
for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false))
|
||||
if (r_inst->opcode() == SpvOpDecorate &&
|
||||
@ -51,6 +51,12 @@ bool ConvertToHalfPass::IsRelaxed(Instruction* inst) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ConvertToHalfPass::IsRelaxed(uint32_t id) {
|
||||
return relaxed_ids_set_.count(id) > 0;
|
||||
}
|
||||
|
||||
void ConvertToHalfPass::AddRelaxed(uint32_t id) { relaxed_ids_set_.insert(id); }
|
||||
|
||||
analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) {
|
||||
analysis::Float float_ty(width);
|
||||
return context()->get_type_mgr()->GetRegisteredType(&float_ty);
|
||||
@ -87,16 +93,19 @@ uint32_t ConvertToHalfPass::EquivFloatTypeId(uint32_t ty_id, uint32_t width) {
|
||||
}
|
||||
|
||||
void ConvertToHalfPass::GenConvert(uint32_t* val_idp, uint32_t width,
|
||||
InstructionBuilder* builder) {
|
||||
Instruction* inst) {
|
||||
Instruction* val_inst = get_def_use_mgr()->GetDef(*val_idp);
|
||||
uint32_t ty_id = val_inst->type_id();
|
||||
uint32_t nty_id = EquivFloatTypeId(ty_id, width);
|
||||
if (nty_id == ty_id) return;
|
||||
Instruction* cvt_inst;
|
||||
InstructionBuilder builder(
|
||||
context(), inst,
|
||||
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
|
||||
if (val_inst->opcode() == SpvOpUndef)
|
||||
cvt_inst = builder->AddNullaryOp(nty_id, SpvOpUndef);
|
||||
cvt_inst = builder.AddNullaryOp(nty_id, SpvOpUndef);
|
||||
else
|
||||
cvt_inst = builder->AddUnaryOp(nty_id, SpvOpFConvert, *val_idp);
|
||||
cvt_inst = builder.AddUnaryOp(nty_id, SpvOpFConvert, *val_idp);
|
||||
*val_idp = cvt_inst->result_id();
|
||||
}
|
||||
|
||||
@ -153,17 +162,15 @@ bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
|
||||
bool modified = false;
|
||||
// Convert all float32 based operands to float16 equivalent and change
|
||||
// instruction type to float16 equivalent.
|
||||
InstructionBuilder builder(
|
||||
context(), inst,
|
||||
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
|
||||
inst->ForEachInId([&builder, &modified, this](uint32_t* idp) {
|
||||
inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
|
||||
Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
|
||||
if (!IsFloat(op_inst, 32)) return;
|
||||
GenConvert(idp, 16, &builder);
|
||||
GenConvert(idp, 16, inst);
|
||||
modified = true;
|
||||
});
|
||||
if (IsFloat(inst, 32)) {
|
||||
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
|
||||
converted_ids_.insert(inst->result_id());
|
||||
modified = true;
|
||||
}
|
||||
if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
|
||||
@ -171,23 +178,10 @@ bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
|
||||
}
|
||||
|
||||
bool ConvertToHalfPass::ProcessPhi(Instruction* inst) {
|
||||
// Skip if not float32
|
||||
if (!IsFloat(inst, 32)) return false;
|
||||
// Skip if no relaxed operands.
|
||||
bool relaxed_found = false;
|
||||
uint32_t ocnt = 0;
|
||||
inst->ForEachInId([&ocnt, &relaxed_found, this](uint32_t* idp) {
|
||||
if (ocnt % 2 == 0) {
|
||||
Instruction* val_inst = get_def_use_mgr()->GetDef(*idp);
|
||||
if (IsRelaxed(val_inst)) relaxed_found = true;
|
||||
}
|
||||
++ocnt;
|
||||
});
|
||||
if (!relaxed_found) return false;
|
||||
// Add float16 converts of any float32 operands and change type
|
||||
// of phi to float16 equivalent. Operand converts need to be added to
|
||||
// preceeding blocks.
|
||||
ocnt = 0;
|
||||
uint32_t ocnt = 0;
|
||||
uint32_t* prev_idp;
|
||||
inst->ForEachInId([&ocnt, &prev_idp, this](uint32_t* idp) {
|
||||
if (ocnt % 2 == 0) {
|
||||
@ -203,65 +197,32 @@ bool ConvertToHalfPass::ProcessPhi(Instruction* inst) {
|
||||
insert_before->opcode() != SpvOpLoopMerge)
|
||||
++insert_before;
|
||||
}
|
||||
InstructionBuilder builder(context(), &*insert_before,
|
||||
IRContext::kAnalysisDefUse |
|
||||
IRContext::kAnalysisInstrToBlockMapping);
|
||||
GenConvert(prev_idp, 16, &builder);
|
||||
GenConvert(prev_idp, 16, &*insert_before);
|
||||
}
|
||||
}
|
||||
++ocnt;
|
||||
});
|
||||
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
|
||||
get_def_use_mgr()->AnalyzeInstUse(inst);
|
||||
converted_ids_.insert(inst->result_id());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ConvertToHalfPass::ProcessExtract(Instruction* inst) {
|
||||
bool modified = false;
|
||||
uint32_t comp_id = inst->GetSingleWordInOperand(0);
|
||||
Instruction* comp_inst = get_def_use_mgr()->GetDef(comp_id);
|
||||
// If extract is relaxed float32 based type and the composite is a relaxed
|
||||
// float32 based type, convert it to float16 equivalent. This is slightly
|
||||
// aggressive and pushes any likely conversion to apply to the whole
|
||||
// composite rather than apply to each extracted component later. This
|
||||
// can be a win if the platform can convert the entire composite in the same
|
||||
// time as one component. It risks converting components that may not be
|
||||
// used, although empirical data on a large set of real-world shaders seems
|
||||
// to suggest this is not common and the composite convert is the best choice.
|
||||
if (IsFloat(inst, 32) && IsRelaxed(inst) && IsFloat(comp_inst, 32) &&
|
||||
IsRelaxed(comp_inst)) {
|
||||
InstructionBuilder builder(
|
||||
context(), inst,
|
||||
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
|
||||
GenConvert(&comp_id, 16, &builder);
|
||||
inst->SetInOperand(0, {comp_id});
|
||||
comp_inst = get_def_use_mgr()->GetDef(comp_id);
|
||||
modified = true;
|
||||
}
|
||||
// If the composite is a float16 based type, make sure the type of the
|
||||
// extract agrees.
|
||||
if (IsFloat(comp_inst, 16) && !IsFloat(inst, 16)) {
|
||||
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
|
||||
modified = true;
|
||||
}
|
||||
if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
|
||||
return modified;
|
||||
}
|
||||
|
||||
bool ConvertToHalfPass::ProcessConvert(Instruction* inst) {
|
||||
// If float32 and relaxed, change to float16 convert
|
||||
if (IsFloat(inst, 32) && IsRelaxed(inst)) {
|
||||
if (IsFloat(inst, 32) && IsRelaxed(inst->result_id())) {
|
||||
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
|
||||
get_def_use_mgr()->AnalyzeInstUse(inst);
|
||||
converted_ids_.insert(inst->result_id());
|
||||
}
|
||||
// If operand and result types are the same, replace result with operand
|
||||
// and change convert to copy to keep validator happy; DCE will clean it up
|
||||
// If operand and result types are the same, change FConvert to CopyObject to
|
||||
// keep validator happy; simplification and DCE will clean it up
|
||||
// One way this can happen is if an FConvert generated during this pass
|
||||
// (likely by ProcessPhi) is later encountered here and its operand has been
|
||||
// changed to half.
|
||||
uint32_t val_id = inst->GetSingleWordInOperand(0);
|
||||
Instruction* val_inst = get_def_use_mgr()->GetDef(val_id);
|
||||
if (inst->type_id() == val_inst->type_id()) {
|
||||
context()->ReplaceAllUsesWith(inst->result_id(), val_id);
|
||||
inst->SetOpcode(SpvOpCopyObject);
|
||||
}
|
||||
if (inst->type_id() == val_inst->type_id()) inst->SetOpcode(SpvOpCopyObject);
|
||||
return true; // modified
|
||||
}
|
||||
|
||||
@ -270,12 +231,8 @@ bool ConvertToHalfPass::ProcessImageRef(Instruction* inst) {
|
||||
// If image reference, only need to convert dref args back to float32
|
||||
if (dref_image_ops_.count(inst->opcode()) != 0) {
|
||||
uint32_t dref_id = inst->GetSingleWordInOperand(kImageSampleDrefIdInIdx);
|
||||
Instruction* dref_inst = get_def_use_mgr()->GetDef(dref_id);
|
||||
if (IsFloat(dref_inst, 16) && IsRelaxed(dref_inst)) {
|
||||
InstructionBuilder builder(
|
||||
context(), inst,
|
||||
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
|
||||
GenConvert(&dref_id, 32, &builder);
|
||||
if (converted_ids_.count(dref_id) > 0) {
|
||||
GenConvert(&dref_id, 32, inst);
|
||||
inst->SetInOperand(kImageSampleDrefIdInIdx, {dref_id});
|
||||
get_def_use_mgr()->AnalyzeInstUse(inst);
|
||||
modified = true;
|
||||
@ -288,32 +245,24 @@ bool ConvertToHalfPass::ProcessDefault(Instruction* inst) {
|
||||
bool modified = false;
|
||||
// If non-relaxed instruction has changed operands, need to convert
|
||||
// them back to float32
|
||||
InstructionBuilder builder(
|
||||
context(), inst,
|
||||
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
|
||||
inst->ForEachInId([&builder, &modified, this](uint32_t* idp) {
|
||||
Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
|
||||
if (!IsFloat(op_inst, 16)) return;
|
||||
if (!IsRelaxed(op_inst)) return;
|
||||
inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
|
||||
if (converted_ids_.count(*idp) == 0) return;
|
||||
uint32_t old_id = *idp;
|
||||
GenConvert(idp, 32, &builder);
|
||||
GenConvert(idp, 32, inst);
|
||||
if (*idp != old_id) modified = true;
|
||||
});
|
||||
if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
|
||||
return modified;
|
||||
}
|
||||
|
||||
bool ConvertToHalfPass::GenHalfCode(Instruction* inst) {
|
||||
bool ConvertToHalfPass::GenHalfInst(Instruction* inst) {
|
||||
bool modified = false;
|
||||
// Remember id for later deletion of RelaxedPrecision decoration
|
||||
bool inst_relaxed = IsRelaxed(inst);
|
||||
if (inst_relaxed) relaxed_ids_.push_back(inst->result_id());
|
||||
bool inst_relaxed = IsRelaxed(inst->result_id());
|
||||
if (IsArithmetic(inst) && inst_relaxed)
|
||||
modified = GenHalfArith(inst);
|
||||
else if (inst->opcode() == SpvOpPhi)
|
||||
else if (inst->opcode() == SpvOpPhi && inst_relaxed)
|
||||
modified = ProcessPhi(inst);
|
||||
else if (inst->opcode() == SpvOpCompositeExtract)
|
||||
modified = ProcessExtract(inst);
|
||||
else if (inst->opcode() == SpvOpFConvert)
|
||||
modified = ProcessConvert(inst);
|
||||
else if (image_ops_.count(inst->opcode()) != 0)
|
||||
@ -323,13 +272,62 @@ bool ConvertToHalfPass::GenHalfCode(Instruction* inst) {
|
||||
return modified;
|
||||
}
|
||||
|
||||
bool ConvertToHalfPass::CloseRelaxInst(Instruction* inst) {
|
||||
if (inst->result_id() == 0) return false;
|
||||
if (IsRelaxed(inst->result_id())) return false;
|
||||
if (!IsFloat(inst, 32)) return false;
|
||||
if (IsDecoratedRelaxed(inst)) {
|
||||
AddRelaxed(inst->result_id());
|
||||
return true;
|
||||
}
|
||||
if (closure_ops_.count(inst->opcode()) == 0) return false;
|
||||
// Can relax if all float operands are relaxed
|
||||
bool relax = true;
|
||||
inst->ForEachInId([&relax, this](uint32_t* idp) {
|
||||
Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
|
||||
if (!IsFloat(op_inst, 32)) return;
|
||||
if (!IsRelaxed(*idp)) relax = false;
|
||||
});
|
||||
if (relax) {
|
||||
AddRelaxed(inst->result_id());
|
||||
return true;
|
||||
}
|
||||
// Can relax if all uses are relaxed
|
||||
relax = true;
|
||||
get_def_use_mgr()->ForEachUser(inst, [&relax, this](Instruction* uinst) {
|
||||
if (uinst->result_id() == 0 || !IsFloat(uinst, 32) ||
|
||||
(!IsDecoratedRelaxed(uinst) && !IsRelaxed(uinst->result_id()))) {
|
||||
relax = false;
|
||||
return;
|
||||
}
|
||||
});
|
||||
if (relax) {
|
||||
AddRelaxed(inst->result_id());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ConvertToHalfPass::ProcessFunction(Function* func) {
|
||||
// Do a closure of Relaxed on composite and phi instructions
|
||||
bool changed = true;
|
||||
while (changed) {
|
||||
changed = false;
|
||||
cfg()->ForEachBlockInReversePostOrder(
|
||||
func->entry().get(), [&changed, this](BasicBlock* bb) {
|
||||
for (auto ii = bb->begin(); ii != bb->end(); ++ii)
|
||||
changed |= CloseRelaxInst(&*ii);
|
||||
});
|
||||
}
|
||||
// Do convert of relaxed instructions to half precision
|
||||
bool modified = false;
|
||||
cfg()->ForEachBlockInReversePostOrder(
|
||||
func->entry().get(), [&modified, this](BasicBlock* bb) {
|
||||
for (auto ii = bb->begin(); ii != bb->end(); ++ii)
|
||||
modified |= GenHalfCode(&*ii);
|
||||
modified |= GenHalfInst(&*ii);
|
||||
});
|
||||
// Replace invalid converts of matrix into equivalent vector extracts,
|
||||
// converts and finally a composite construct
|
||||
cfg()->ForEachBlockInReversePostOrder(
|
||||
func->entry().get(), [&modified, this](BasicBlock* bb) {
|
||||
for (auto ii = bb->begin(); ii != bb->end(); ++ii)
|
||||
@ -346,7 +344,7 @@ Pass::Status ConvertToHalfPass::ProcessImpl() {
|
||||
// If modified, make sure module has Float16 capability
|
||||
if (modified) context()->AddCapability(SpvCapabilityFloat16);
|
||||
// Remove all RelaxedPrecision decorations from instructions and globals
|
||||
for (auto c_id : relaxed_ids_) RemoveRelaxedDecoration(c_id);
|
||||
for (auto c_id : relaxed_ids_set_) RemoveRelaxedDecoration(c_id);
|
||||
for (auto& val : get_module()->types_values()) {
|
||||
uint32_t v_id = val.result_id();
|
||||
if (v_id != 0) RemoveRelaxedDecoration(v_id);
|
||||
@ -366,6 +364,7 @@ void ConvertToHalfPass::Initialize() {
|
||||
SpvOpVectorShuffle,
|
||||
SpvOpCompositeConstruct,
|
||||
SpvOpCompositeInsert,
|
||||
SpvOpCompositeExtract,
|
||||
SpvOpCopyObject,
|
||||
SpvOpTranspose,
|
||||
SpvOpConvertSToF,
|
||||
@ -453,7 +452,19 @@ void ConvertToHalfPass::Initialize() {
|
||||
SpvOpImageSparseSampleProjDrefExplicitLod,
|
||||
SpvOpImageSparseDrefGather,
|
||||
};
|
||||
relaxed_ids_.clear();
|
||||
closure_ops_ = {
|
||||
SpvOpVectorExtractDynamic,
|
||||
SpvOpVectorInsertDynamic,
|
||||
SpvOpVectorShuffle,
|
||||
SpvOpCompositeConstruct,
|
||||
SpvOpCompositeInsert,
|
||||
SpvOpCompositeExtract,
|
||||
SpvOpCopyObject,
|
||||
SpvOpTranspose,
|
||||
SpvOpPhi,
|
||||
};
|
||||
relaxed_ids_set_.clear();
|
||||
converted_ids_.clear();
|
||||
}
|
||||
|
||||
} // namespace opt
|
||||
|
@ -38,7 +38,8 @@ class ConvertToHalfPass : public Pass {
|
||||
const char* name() const override { return "convert-to-half-pass"; }
|
||||
|
||||
private:
|
||||
// Return true if |inst| is an arithmetic op that can be of type float16
|
||||
// Return true if |inst| is an arithmetic, composite or phi op that can be
|
||||
// of type float16
|
||||
bool IsArithmetic(Instruction* inst);
|
||||
|
||||
// Return true if |inst| returns scalar, vector or matrix type with base
|
||||
@ -46,7 +47,13 @@ class ConvertToHalfPass : public Pass {
|
||||
bool IsFloat(Instruction* inst, uint32_t width);
|
||||
|
||||
// Return true if |inst| is decorated with RelaxedPrecision
|
||||
bool IsRelaxed(Instruction* inst);
|
||||
bool IsDecoratedRelaxed(Instruction* inst);
|
||||
|
||||
// Return true if |id| has been added to the relaxed id set
|
||||
bool IsRelaxed(uint32_t id);
|
||||
|
||||
// Add |id| to the relaxed id set
|
||||
void AddRelaxed(uint32_t id);
|
||||
|
||||
// Return type id for float with |width|
|
||||
analysis::Type* FloatScalarType(uint32_t width);
|
||||
@ -64,19 +71,23 @@ class ConvertToHalfPass : public Pass {
|
||||
|
||||
// Append instructions to builder to convert value |*val_idp| to type
|
||||
// |ty_id| but with |width|. Set |*val_idp| to the new id.
|
||||
void GenConvert(uint32_t* val_idp, uint32_t width,
|
||||
InstructionBuilder* builder);
|
||||
void GenConvert(uint32_t* val_idp, uint32_t width, Instruction* inst);
|
||||
|
||||
// Remove RelaxedPrecision decoration of |id|.
|
||||
void RemoveRelaxedDecoration(uint32_t id);
|
||||
|
||||
// Add |inst| to relaxed instruction set if warranted. Specifically, if
|
||||
// it is float32 and either decorated relaxed or a composite or phi
|
||||
// instruction where all operands are relaxed or all uses are relaxed.
|
||||
bool CloseRelaxInst(Instruction* inst);
|
||||
|
||||
// If |inst| is an arithmetic, phi, extract or convert instruction of float32
|
||||
// base type and decorated with RelaxedPrecision, change it to the equivalent
|
||||
// float16 based type instruction. Specifically, insert instructions to
|
||||
// convert all operands to float16 (if needed) and change its type to the
|
||||
// equivalent float16 type. Otherwise, insert instructions to convert its
|
||||
// operands back to their original types, if needed.
|
||||
bool GenHalfCode(Instruction* inst);
|
||||
bool GenHalfInst(Instruction* inst);
|
||||
|
||||
// Gen code for relaxed arithmetic |inst|
|
||||
bool GenHalfArith(Instruction* inst);
|
||||
@ -84,9 +95,6 @@ class ConvertToHalfPass : public Pass {
|
||||
// Gen code for relaxed phi |inst|
|
||||
bool ProcessPhi(Instruction* inst);
|
||||
|
||||
// Gen code for relaxed extract |inst|
|
||||
bool ProcessExtract(Instruction* inst);
|
||||
|
||||
// Gen code for relaxed convert |inst|
|
||||
bool ProcessConvert(Instruction* inst);
|
||||
|
||||
@ -98,11 +106,11 @@ class ConvertToHalfPass : public Pass {
|
||||
|
||||
// If |inst| is an FConvert of a matrix type, decompose it to a series
|
||||
// of vector extracts, converts and inserts into an Undef. These are
|
||||
// generated by GenHalfCode because they are easier to manipulate, but are
|
||||
// generated by GenHalfInst because they are easier to manipulate, but are
|
||||
// invalid so we need to clean them up.
|
||||
bool MatConvertCleanup(Instruction* inst);
|
||||
|
||||
// Call GenHalfCode on every instruction in |func|.
|
||||
// Call GenHalfInst on every instruction in |func|.
|
||||
// If code is generated for an instruction, replace the instruction
|
||||
// with the new instructions that are generated.
|
||||
bool ProcessFunction(Function* func);
|
||||
@ -124,8 +132,14 @@ class ConvertToHalfPass : public Pass {
|
||||
// Set of dref sample operations
|
||||
std::unordered_set<uint32_t> dref_image_ops_;
|
||||
|
||||
// Set of dref sample operations
|
||||
std::unordered_set<uint32_t> closure_ops_;
|
||||
|
||||
// Set of ids of all relaxed instructions
|
||||
std::unordered_set<uint32_t> relaxed_ids_set_;
|
||||
|
||||
// Ids of all converted instructions
|
||||
std::vector<uint32_t> relaxed_ids_;
|
||||
std::unordered_set<uint32_t> converted_ids_;
|
||||
};
|
||||
|
||||
} // namespace opt
|
||||
|
@ -997,7 +997,7 @@ OpFunctionEnd
|
||||
%99 = OpFConvert %v4half %15
|
||||
OpBranch %65
|
||||
%65 = OpLabel
|
||||
%96 = OpPhi %v4half %99 %5 %76 %71
|
||||
%96 = OpPhi %v4half %99 %5 %100 %71
|
||||
%95 = OpPhi %int %int_0 %5 %78 %71
|
||||
%70 = OpSLessThan %bool %95 %int_10
|
||||
OpLoopMerge %66 %71 None
|
||||
@ -1222,6 +1222,115 @@ OpFunctionEnd
|
||||
defs_after + func_after, true, true);
|
||||
}
|
||||
|
||||
TEST_F(ConvertToHalfTest, ConvertToHalfWithClosure) {
|
||||
// Include as many contiguous composite instructions as possible into
|
||||
// half-precision computations
|
||||
//
|
||||
// Compiled with glslang -V -Os
|
||||
//
|
||||
// clang-format off
|
||||
//
|
||||
// #version 410 core
|
||||
//
|
||||
// precision mediump float;
|
||||
//
|
||||
// layout(location = 1) in vec3 foo;
|
||||
// layout(location = 2) in mat2 bar;
|
||||
// layout(location = 1) out vec3 res;
|
||||
//
|
||||
// vec3 func(vec3 tap, mat2 M) {
|
||||
// return vec3(M * tap.xy, 1.0);
|
||||
// }
|
||||
//
|
||||
// void main() {
|
||||
// res = func(foo, bar);
|
||||
// }
|
||||
//
|
||||
// clang-format on
|
||||
|
||||
const std::string defs =
|
||||
R"(OpCapability Shader
|
||||
; CHECK: OpCapability Float16
|
||||
%1 = OpExtInstImport "GLSL.std.450"
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint Fragment %main "main" %res %foo %bar
|
||||
OpExecutionMode %main OriginUpperLeft
|
||||
OpSource GLSL 410
|
||||
OpName %main "main"
|
||||
OpName %res "res"
|
||||
OpName %foo "foo"
|
||||
OpName %bar "bar"
|
||||
OpDecorate %res RelaxedPrecision
|
||||
; CHECK-NOT: OpDecorate %res RelaxedPrecision
|
||||
OpDecorate %res Location 1
|
||||
OpDecorate %foo RelaxedPrecision
|
||||
; CHECK-NOT: OpDecorate %foo RelaxedPrecision
|
||||
OpDecorate %foo Location 1
|
||||
OpDecorate %bar RelaxedPrecision
|
||||
; CHECK-NOT: OpDecorate %bar RelaxedPrecision
|
||||
OpDecorate %bar Location 2
|
||||
OpDecorate %34 RelaxedPrecision
|
||||
OpDecorate %36 RelaxedPrecision
|
||||
OpDecorate %41 RelaxedPrecision
|
||||
OpDecorate %42 RelaxedPrecision
|
||||
; CHECK-NOT: OpDecorate %34 RelaxedPrecision
|
||||
; CHECK-NOT: OpDecorate %36 RelaxedPrecision
|
||||
; CHECK-NOT: OpDecorate %41 RelaxedPrecision
|
||||
; CHECK-NOT: OpDecorate %42 RelaxedPrecision
|
||||
%void = OpTypeVoid
|
||||
%3 = OpTypeFunction %void
|
||||
%float = OpTypeFloat 32
|
||||
%v3float = OpTypeVector %float 3
|
||||
%v2float = OpTypeVector %float 2
|
||||
%mat2v2float = OpTypeMatrix %v2float 2
|
||||
%float_1 = OpConstant %float 1
|
||||
%_ptr_Output_v3float = OpTypePointer Output %v3float
|
||||
%res = OpVariable %_ptr_Output_v3float Output
|
||||
%_ptr_Input_v3float = OpTypePointer Input %v3float
|
||||
%foo = OpVariable %_ptr_Input_v3float Input
|
||||
%_ptr_Input_mat2v2float = OpTypePointer Input %mat2v2float
|
||||
%bar = OpVariable %_ptr_Input_mat2v2float Input
|
||||
)";
|
||||
|
||||
const std::string func =
|
||||
R"(%main = OpFunction %void None %3
|
||||
%5 = OpLabel
|
||||
%34 = OpLoad %v3float %foo
|
||||
%36 = OpLoad %mat2v2float %bar
|
||||
; CHECK: %48 = OpFConvert %v3half %34
|
||||
; CHECK: %49 = OpFConvert %v3half %34
|
||||
%41 = OpVectorShuffle %v2float %34 %34 0 1
|
||||
; CHECK-NOT: %41 = OpVectorShuffle %v2float %34 %34 0 1
|
||||
; CHECK: %41 = OpVectorShuffle %v2half %48 %49 0 1
|
||||
%42 = OpMatrixTimesVector %v2float %36 %41
|
||||
; CHECK-NOT: %42 = OpMatrixTimesVector %v2float %36 %41
|
||||
; CHECK: %55 = OpCompositeExtract %v2float %36 0
|
||||
; CHECK: %56 = OpFConvert %v2half %55
|
||||
; CHECK: %57 = OpCompositeExtract %v2float %36 1
|
||||
; CHECK: %58 = OpFConvert %v2half %57
|
||||
; CHECK: %59 = OpCompositeConstruct %mat2v2half %56 %58
|
||||
; CHECK: %52 = OpCopyObject %mat2v2float %36
|
||||
; CHECK: %42 = OpMatrixTimesVector %v2half %59 %41
|
||||
%43 = OpCompositeExtract %float %42 0
|
||||
%44 = OpCompositeExtract %float %42 1
|
||||
; CHECK-NOT: %43 = OpCompositeExtract %float %42 0
|
||||
; CHECK-NOT: %44 = OpCompositeExtract %float %42 1
|
||||
; CHECK: %43 = OpCompositeExtract %half %42 0
|
||||
; CHECK: %44 = OpCompositeExtract %half %42 1
|
||||
%45 = OpCompositeConstruct %v3float %43 %44 %float_1
|
||||
; CHECK-NOT: %45 = OpCompositeConstruct %v3float %43 %44 %float_1
|
||||
; CHECK: %53 = OpFConvert %float %43
|
||||
; CHECK: %54 = OpFConvert %float %44
|
||||
; CHECK: %45 = OpCompositeConstruct %v3float %53 %54 %float_1
|
||||
OpStore %res %45
|
||||
OpReturn
|
||||
OpFunctionEnd
|
||||
)";
|
||||
|
||||
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
|
||||
SinglePassRunAndMatch<ConvertToHalfPass>(defs + func, true);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace opt
|
||||
} // namespace spvtools
|
||||
|
Loading…
Reference in New Issue
Block a user