Fix convert-relax-to-half invalid code (#3099) (#3106)

This commit is contained in:
greg-lunarg 2019-12-20 19:08:12 -07:00 committed by Steven Perron
parent 64f36ea529
commit 9215c1b7df
4 changed files with 238 additions and 104 deletions

View File

@ -685,9 +685,9 @@ Optimizer::PassToken CreateSSARewritePass();
// any resulting half precision values back to float32 as needed. No variables
// are changed. No image operations are changed.
//
// Best if run late since it will generate better code with unneeded function
// scope loads and stores and composite inserts and extracts removed. Also best
// if followed by instruction simplification, redundancy elimination and DCE.
// Best if run after function scope store/load and composite operation
// eliminations are run. Also best if followed by instruction simplification,
// redundancy elimination and DCE.
Optimizer::PassToken CreateConvertRelaxedToHalfPass();
// Create relax float ops pass.

View File

@ -42,7 +42,7 @@ bool ConvertToHalfPass::IsFloat(Instruction* inst, uint32_t width) {
return Pass::IsFloat(ty_id, width);
}
bool ConvertToHalfPass::IsRelaxed(Instruction* inst) {
bool ConvertToHalfPass::IsDecoratedRelaxed(Instruction* inst) {
uint32_t r_id = inst->result_id();
for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false))
if (r_inst->opcode() == SpvOpDecorate &&
@ -51,6 +51,12 @@ bool ConvertToHalfPass::IsRelaxed(Instruction* inst) {
return false;
}
bool ConvertToHalfPass::IsRelaxed(uint32_t id) {
return relaxed_ids_set_.count(id) > 0;
}
void ConvertToHalfPass::AddRelaxed(uint32_t id) { relaxed_ids_set_.insert(id); }
analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) {
analysis::Float float_ty(width);
return context()->get_type_mgr()->GetRegisteredType(&float_ty);
@ -87,16 +93,19 @@ uint32_t ConvertToHalfPass::EquivFloatTypeId(uint32_t ty_id, uint32_t width) {
}
void ConvertToHalfPass::GenConvert(uint32_t* val_idp, uint32_t width,
InstructionBuilder* builder) {
Instruction* inst) {
Instruction* val_inst = get_def_use_mgr()->GetDef(*val_idp);
uint32_t ty_id = val_inst->type_id();
uint32_t nty_id = EquivFloatTypeId(ty_id, width);
if (nty_id == ty_id) return;
Instruction* cvt_inst;
InstructionBuilder builder(
context(), inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
if (val_inst->opcode() == SpvOpUndef)
cvt_inst = builder->AddNullaryOp(nty_id, SpvOpUndef);
cvt_inst = builder.AddNullaryOp(nty_id, SpvOpUndef);
else
cvt_inst = builder->AddUnaryOp(nty_id, SpvOpFConvert, *val_idp);
cvt_inst = builder.AddUnaryOp(nty_id, SpvOpFConvert, *val_idp);
*val_idp = cvt_inst->result_id();
}
@ -153,17 +162,15 @@ bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
bool modified = false;
// Convert all float32 based operands to float16 equivalent and change
// instruction type to float16 equivalent.
InstructionBuilder builder(
context(), inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
inst->ForEachInId([&builder, &modified, this](uint32_t* idp) {
inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
if (!IsFloat(op_inst, 32)) return;
GenConvert(idp, 16, &builder);
GenConvert(idp, 16, inst);
modified = true;
});
if (IsFloat(inst, 32)) {
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
converted_ids_.insert(inst->result_id());
modified = true;
}
if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
@ -171,23 +178,10 @@ bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
}
bool ConvertToHalfPass::ProcessPhi(Instruction* inst) {
// Skip if not float32
if (!IsFloat(inst, 32)) return false;
// Skip if no relaxed operands.
bool relaxed_found = false;
uint32_t ocnt = 0;
inst->ForEachInId([&ocnt, &relaxed_found, this](uint32_t* idp) {
if (ocnt % 2 == 0) {
Instruction* val_inst = get_def_use_mgr()->GetDef(*idp);
if (IsRelaxed(val_inst)) relaxed_found = true;
}
++ocnt;
});
if (!relaxed_found) return false;
// Add float16 converts of any float32 operands and change type
// of phi to float16 equivalent. Operand converts need to be added to
// preceeding blocks.
ocnt = 0;
uint32_t ocnt = 0;
uint32_t* prev_idp;
inst->ForEachInId([&ocnt, &prev_idp, this](uint32_t* idp) {
if (ocnt % 2 == 0) {
@ -203,65 +197,32 @@ bool ConvertToHalfPass::ProcessPhi(Instruction* inst) {
insert_before->opcode() != SpvOpLoopMerge)
++insert_before;
}
InstructionBuilder builder(context(), &*insert_before,
IRContext::kAnalysisDefUse |
IRContext::kAnalysisInstrToBlockMapping);
GenConvert(prev_idp, 16, &builder);
GenConvert(prev_idp, 16, &*insert_before);
}
}
++ocnt;
});
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
get_def_use_mgr()->AnalyzeInstUse(inst);
converted_ids_.insert(inst->result_id());
return true;
}
bool ConvertToHalfPass::ProcessExtract(Instruction* inst) {
bool modified = false;
uint32_t comp_id = inst->GetSingleWordInOperand(0);
Instruction* comp_inst = get_def_use_mgr()->GetDef(comp_id);
// If extract is relaxed float32 based type and the composite is a relaxed
// float32 based type, convert it to float16 equivalent. This is slightly
// aggressive and pushes any likely conversion to apply to the whole
// composite rather than apply to each extracted component later. This
// can be a win if the platform can convert the entire composite in the same
// time as one component. It risks converting components that may not be
// used, although empirical data on a large set of real-world shaders seems
// to suggest this is not common and the composite convert is the best choice.
if (IsFloat(inst, 32) && IsRelaxed(inst) && IsFloat(comp_inst, 32) &&
IsRelaxed(comp_inst)) {
InstructionBuilder builder(
context(), inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
GenConvert(&comp_id, 16, &builder);
inst->SetInOperand(0, {comp_id});
comp_inst = get_def_use_mgr()->GetDef(comp_id);
modified = true;
}
// If the composite is a float16 based type, make sure the type of the
// extract agrees.
if (IsFloat(comp_inst, 16) && !IsFloat(inst, 16)) {
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
modified = true;
}
if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
return modified;
}
bool ConvertToHalfPass::ProcessConvert(Instruction* inst) {
// If float32 and relaxed, change to float16 convert
if (IsFloat(inst, 32) && IsRelaxed(inst)) {
if (IsFloat(inst, 32) && IsRelaxed(inst->result_id())) {
inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
get_def_use_mgr()->AnalyzeInstUse(inst);
converted_ids_.insert(inst->result_id());
}
// If operand and result types are the same, replace result with operand
// and change convert to copy to keep validator happy; DCE will clean it up
// If operand and result types are the same, change FConvert to CopyObject to
// keep validator happy; simplification and DCE will clean it up
// One way this can happen is if an FConvert generated during this pass
// (likely by ProcessPhi) is later encountered here and its operand has been
// changed to half.
uint32_t val_id = inst->GetSingleWordInOperand(0);
Instruction* val_inst = get_def_use_mgr()->GetDef(val_id);
if (inst->type_id() == val_inst->type_id()) {
context()->ReplaceAllUsesWith(inst->result_id(), val_id);
inst->SetOpcode(SpvOpCopyObject);
}
if (inst->type_id() == val_inst->type_id()) inst->SetOpcode(SpvOpCopyObject);
return true; // modified
}
@ -270,12 +231,8 @@ bool ConvertToHalfPass::ProcessImageRef(Instruction* inst) {
// If image reference, only need to convert dref args back to float32
if (dref_image_ops_.count(inst->opcode()) != 0) {
uint32_t dref_id = inst->GetSingleWordInOperand(kImageSampleDrefIdInIdx);
Instruction* dref_inst = get_def_use_mgr()->GetDef(dref_id);
if (IsFloat(dref_inst, 16) && IsRelaxed(dref_inst)) {
InstructionBuilder builder(
context(), inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
GenConvert(&dref_id, 32, &builder);
if (converted_ids_.count(dref_id) > 0) {
GenConvert(&dref_id, 32, inst);
inst->SetInOperand(kImageSampleDrefIdInIdx, {dref_id});
get_def_use_mgr()->AnalyzeInstUse(inst);
modified = true;
@ -288,32 +245,24 @@ bool ConvertToHalfPass::ProcessDefault(Instruction* inst) {
bool modified = false;
// If non-relaxed instruction has changed operands, need to convert
// them back to float32
InstructionBuilder builder(
context(), inst,
IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
inst->ForEachInId([&builder, &modified, this](uint32_t* idp) {
Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
if (!IsFloat(op_inst, 16)) return;
if (!IsRelaxed(op_inst)) return;
inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
if (converted_ids_.count(*idp) == 0) return;
uint32_t old_id = *idp;
GenConvert(idp, 32, &builder);
GenConvert(idp, 32, inst);
if (*idp != old_id) modified = true;
});
if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
return modified;
}
bool ConvertToHalfPass::GenHalfCode(Instruction* inst) {
bool ConvertToHalfPass::GenHalfInst(Instruction* inst) {
bool modified = false;
// Remember id for later deletion of RelaxedPrecision decoration
bool inst_relaxed = IsRelaxed(inst);
if (inst_relaxed) relaxed_ids_.push_back(inst->result_id());
bool inst_relaxed = IsRelaxed(inst->result_id());
if (IsArithmetic(inst) && inst_relaxed)
modified = GenHalfArith(inst);
else if (inst->opcode() == SpvOpPhi)
else if (inst->opcode() == SpvOpPhi && inst_relaxed)
modified = ProcessPhi(inst);
else if (inst->opcode() == SpvOpCompositeExtract)
modified = ProcessExtract(inst);
else if (inst->opcode() == SpvOpFConvert)
modified = ProcessConvert(inst);
else if (image_ops_.count(inst->opcode()) != 0)
@ -323,13 +272,62 @@ bool ConvertToHalfPass::GenHalfCode(Instruction* inst) {
return modified;
}
bool ConvertToHalfPass::CloseRelaxInst(Instruction* inst) {
if (inst->result_id() == 0) return false;
if (IsRelaxed(inst->result_id())) return false;
if (!IsFloat(inst, 32)) return false;
if (IsDecoratedRelaxed(inst)) {
AddRelaxed(inst->result_id());
return true;
}
if (closure_ops_.count(inst->opcode()) == 0) return false;
// Can relax if all float operands are relaxed
bool relax = true;
inst->ForEachInId([&relax, this](uint32_t* idp) {
Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
if (!IsFloat(op_inst, 32)) return;
if (!IsRelaxed(*idp)) relax = false;
});
if (relax) {
AddRelaxed(inst->result_id());
return true;
}
// Can relax if all uses are relaxed
relax = true;
get_def_use_mgr()->ForEachUser(inst, [&relax, this](Instruction* uinst) {
if (uinst->result_id() == 0 || !IsFloat(uinst, 32) ||
(!IsDecoratedRelaxed(uinst) && !IsRelaxed(uinst->result_id()))) {
relax = false;
return;
}
});
if (relax) {
AddRelaxed(inst->result_id());
return true;
}
return false;
}
bool ConvertToHalfPass::ProcessFunction(Function* func) {
// Do a closure of Relaxed on composite and phi instructions
bool changed = true;
while (changed) {
changed = false;
cfg()->ForEachBlockInReversePostOrder(
func->entry().get(), [&changed, this](BasicBlock* bb) {
for (auto ii = bb->begin(); ii != bb->end(); ++ii)
changed |= CloseRelaxInst(&*ii);
});
}
// Do convert of relaxed instructions to half precision
bool modified = false;
cfg()->ForEachBlockInReversePostOrder(
func->entry().get(), [&modified, this](BasicBlock* bb) {
for (auto ii = bb->begin(); ii != bb->end(); ++ii)
modified |= GenHalfCode(&*ii);
modified |= GenHalfInst(&*ii);
});
// Replace invalid converts of matrix into equivalent vector extracts,
// converts and finally a composite construct
cfg()->ForEachBlockInReversePostOrder(
func->entry().get(), [&modified, this](BasicBlock* bb) {
for (auto ii = bb->begin(); ii != bb->end(); ++ii)
@ -346,7 +344,7 @@ Pass::Status ConvertToHalfPass::ProcessImpl() {
// If modified, make sure module has Float16 capability
if (modified) context()->AddCapability(SpvCapabilityFloat16);
// Remove all RelaxedPrecision decorations from instructions and globals
for (auto c_id : relaxed_ids_) RemoveRelaxedDecoration(c_id);
for (auto c_id : relaxed_ids_set_) RemoveRelaxedDecoration(c_id);
for (auto& val : get_module()->types_values()) {
uint32_t v_id = val.result_id();
if (v_id != 0) RemoveRelaxedDecoration(v_id);
@ -366,6 +364,7 @@ void ConvertToHalfPass::Initialize() {
SpvOpVectorShuffle,
SpvOpCompositeConstruct,
SpvOpCompositeInsert,
SpvOpCompositeExtract,
SpvOpCopyObject,
SpvOpTranspose,
SpvOpConvertSToF,
@ -453,7 +452,19 @@ void ConvertToHalfPass::Initialize() {
SpvOpImageSparseSampleProjDrefExplicitLod,
SpvOpImageSparseDrefGather,
};
relaxed_ids_.clear();
closure_ops_ = {
SpvOpVectorExtractDynamic,
SpvOpVectorInsertDynamic,
SpvOpVectorShuffle,
SpvOpCompositeConstruct,
SpvOpCompositeInsert,
SpvOpCompositeExtract,
SpvOpCopyObject,
SpvOpTranspose,
SpvOpPhi,
};
relaxed_ids_set_.clear();
converted_ids_.clear();
}
} // namespace opt

View File

@ -38,7 +38,8 @@ class ConvertToHalfPass : public Pass {
const char* name() const override { return "convert-to-half-pass"; }
private:
// Return true if |inst| is an arithmetic op that can be of type float16
// Return true if |inst| is an arithmetic, composite or phi op that can be
// of type float16
bool IsArithmetic(Instruction* inst);
// Return true if |inst| returns scalar, vector or matrix type with base
@ -46,7 +47,13 @@ class ConvertToHalfPass : public Pass {
bool IsFloat(Instruction* inst, uint32_t width);
// Return true if |inst| is decorated with RelaxedPrecision
bool IsRelaxed(Instruction* inst);
bool IsDecoratedRelaxed(Instruction* inst);
// Return true if |id| has been added to the relaxed id set
bool IsRelaxed(uint32_t id);
// Add |id| to the relaxed id set
void AddRelaxed(uint32_t id);
// Return type id for float with |width|
analysis::Type* FloatScalarType(uint32_t width);
@ -64,19 +71,23 @@ class ConvertToHalfPass : public Pass {
// Append instructions to builder to convert value |*val_idp| to type
// |ty_id| but with |width|. Set |*val_idp| to the new id.
void GenConvert(uint32_t* val_idp, uint32_t width,
InstructionBuilder* builder);
void GenConvert(uint32_t* val_idp, uint32_t width, Instruction* inst);
// Remove RelaxedPrecision decoration of |id|.
void RemoveRelaxedDecoration(uint32_t id);
// Add |inst| to relaxed instruction set if warranted. Specifically, if
// it is float32 and either decorated relaxed or a composite or phi
// instruction where all operands are relaxed or all uses are relaxed.
bool CloseRelaxInst(Instruction* inst);
// If |inst| is an arithmetic, phi, extract or convert instruction of float32
// base type and decorated with RelaxedPrecision, change it to the equivalent
// float16 based type instruction. Specifically, insert instructions to
// convert all operands to float16 (if needed) and change its type to the
// equivalent float16 type. Otherwise, insert instructions to convert its
// operands back to their original types, if needed.
bool GenHalfCode(Instruction* inst);
bool GenHalfInst(Instruction* inst);
// Gen code for relaxed arithmetic |inst|
bool GenHalfArith(Instruction* inst);
@ -84,9 +95,6 @@ class ConvertToHalfPass : public Pass {
// Gen code for relaxed phi |inst|
bool ProcessPhi(Instruction* inst);
// Gen code for relaxed extract |inst|
bool ProcessExtract(Instruction* inst);
// Gen code for relaxed convert |inst|
bool ProcessConvert(Instruction* inst);
@ -98,11 +106,11 @@ class ConvertToHalfPass : public Pass {
// If |inst| is an FConvert of a matrix type, decompose it to a series
// of vector extracts, converts and inserts into an Undef. These are
// generated by GenHalfCode because they are easier to manipulate, but are
// generated by GenHalfInst because they are easier to manipulate, but are
// invalid so we need to clean them up.
bool MatConvertCleanup(Instruction* inst);
// Call GenHalfCode on every instruction in |func|.
// Call GenHalfInst on every instruction in |func|.
// If code is generated for an instruction, replace the instruction
// with the new instructions that are generated.
bool ProcessFunction(Function* func);
@ -124,8 +132,14 @@ class ConvertToHalfPass : public Pass {
// Set of dref sample operations
std::unordered_set<uint32_t> dref_image_ops_;
// Set of dref sample operations
std::unordered_set<uint32_t> closure_ops_;
// Set of ids of all relaxed instructions
std::unordered_set<uint32_t> relaxed_ids_set_;
// Ids of all converted instructions
std::vector<uint32_t> relaxed_ids_;
std::unordered_set<uint32_t> converted_ids_;
};
} // namespace opt

View File

@ -997,7 +997,7 @@ OpFunctionEnd
%99 = OpFConvert %v4half %15
OpBranch %65
%65 = OpLabel
%96 = OpPhi %v4half %99 %5 %76 %71
%96 = OpPhi %v4half %99 %5 %100 %71
%95 = OpPhi %int %int_0 %5 %78 %71
%70 = OpSLessThan %bool %95 %int_10
OpLoopMerge %66 %71 None
@ -1222,6 +1222,115 @@ OpFunctionEnd
defs_after + func_after, true, true);
}
TEST_F(ConvertToHalfTest, ConvertToHalfWithClosure) {
// Include as many contiguous composite instructions as possible into
// half-precision computations
//
// Compiled with glslang -V -Os
//
// clang-format off
//
// #version 410 core
//
// precision mediump float;
//
// layout(location = 1) in vec3 foo;
// layout(location = 2) in mat2 bar;
// layout(location = 1) out vec3 res;
//
// vec3 func(vec3 tap, mat2 M) {
// return vec3(M * tap.xy, 1.0);
// }
//
// void main() {
// res = func(foo, bar);
// }
//
// clang-format on
const std::string defs =
R"(OpCapability Shader
; CHECK: OpCapability Float16
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %main "main" %res %foo %bar
OpExecutionMode %main OriginUpperLeft
OpSource GLSL 410
OpName %main "main"
OpName %res "res"
OpName %foo "foo"
OpName %bar "bar"
OpDecorate %res RelaxedPrecision
; CHECK-NOT: OpDecorate %res RelaxedPrecision
OpDecorate %res Location 1
OpDecorate %foo RelaxedPrecision
; CHECK-NOT: OpDecorate %foo RelaxedPrecision
OpDecorate %foo Location 1
OpDecorate %bar RelaxedPrecision
; CHECK-NOT: OpDecorate %bar RelaxedPrecision
OpDecorate %bar Location 2
OpDecorate %34 RelaxedPrecision
OpDecorate %36 RelaxedPrecision
OpDecorate %41 RelaxedPrecision
OpDecorate %42 RelaxedPrecision
; CHECK-NOT: OpDecorate %34 RelaxedPrecision
; CHECK-NOT: OpDecorate %36 RelaxedPrecision
; CHECK-NOT: OpDecorate %41 RelaxedPrecision
; CHECK-NOT: OpDecorate %42 RelaxedPrecision
%void = OpTypeVoid
%3 = OpTypeFunction %void
%float = OpTypeFloat 32
%v3float = OpTypeVector %float 3
%v2float = OpTypeVector %float 2
%mat2v2float = OpTypeMatrix %v2float 2
%float_1 = OpConstant %float 1
%_ptr_Output_v3float = OpTypePointer Output %v3float
%res = OpVariable %_ptr_Output_v3float Output
%_ptr_Input_v3float = OpTypePointer Input %v3float
%foo = OpVariable %_ptr_Input_v3float Input
%_ptr_Input_mat2v2float = OpTypePointer Input %mat2v2float
%bar = OpVariable %_ptr_Input_mat2v2float Input
)";
const std::string func =
R"(%main = OpFunction %void None %3
%5 = OpLabel
%34 = OpLoad %v3float %foo
%36 = OpLoad %mat2v2float %bar
; CHECK: %48 = OpFConvert %v3half %34
; CHECK: %49 = OpFConvert %v3half %34
%41 = OpVectorShuffle %v2float %34 %34 0 1
; CHECK-NOT: %41 = OpVectorShuffle %v2float %34 %34 0 1
; CHECK: %41 = OpVectorShuffle %v2half %48 %49 0 1
%42 = OpMatrixTimesVector %v2float %36 %41
; CHECK-NOT: %42 = OpMatrixTimesVector %v2float %36 %41
; CHECK: %55 = OpCompositeExtract %v2float %36 0
; CHECK: %56 = OpFConvert %v2half %55
; CHECK: %57 = OpCompositeExtract %v2float %36 1
; CHECK: %58 = OpFConvert %v2half %57
; CHECK: %59 = OpCompositeConstruct %mat2v2half %56 %58
; CHECK: %52 = OpCopyObject %mat2v2float %36
; CHECK: %42 = OpMatrixTimesVector %v2half %59 %41
%43 = OpCompositeExtract %float %42 0
%44 = OpCompositeExtract %float %42 1
; CHECK-NOT: %43 = OpCompositeExtract %float %42 0
; CHECK-NOT: %44 = OpCompositeExtract %float %42 1
; CHECK: %43 = OpCompositeExtract %half %42 0
; CHECK: %44 = OpCompositeExtract %half %42 1
%45 = OpCompositeConstruct %v3float %43 %44 %float_1
; CHECK-NOT: %45 = OpCompositeConstruct %v3float %43 %44 %float_1
; CHECK: %53 = OpFConvert %float %43
; CHECK: %54 = OpFConvert %float %44
; CHECK: %45 = OpCompositeConstruct %v3float %53 %54 %float_1
OpStore %res %45
OpReturn
OpFunctionEnd
)";
SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
SinglePassRunAndMatch<ConvertToHalfPass>(defs + func, true);
}
} // namespace
} // namespace opt
} // namespace spvtools