Fix convert-relax-to-half invalid code (#3099) (#3106)

2024-11-26 13:20:05 +00:00 · 2019-12-20 19:08:12 -07:00 · 2019-12-20 19:08:12 -07:00 · 9215c1b7df
commit 9215c1b7df
parent 64f36ea529
4 changed files with 238 additions and 104 deletions
--- a/include/spirv-tools/optimizer.hpp
+++ b/include/spirv-tools/optimizer.hpp
@ -685,9 +685,9 @@ Optimizer::PassToken CreateSSARewritePass();
 // any resulting half precision values back to float32 as needed. No variables
 // are changed. No image operations are changed.
 //
-// Best if run late since it will generate better code with unneeded function
-// scope loads and stores and composite inserts and extracts removed. Also best
-// if followed by instruction simplification, redundancy elimination and DCE.
+// Best if run after function scope store/load and composite operation
+// eliminations are run. Also best if followed by instruction simplification,
+// redundancy elimination and DCE.
 Optimizer::PassToken CreateConvertRelaxedToHalfPass();

 // Create relax float ops pass.
--- a/source/opt/convert_to_half_pass.cpp
+++ b/source/opt/convert_to_half_pass.cpp
@ -42,7 +42,7 @@ bool ConvertToHalfPass::IsFloat(Instruction* inst, uint32_t width) {
  return Pass::IsFloat(ty_id, width);
 }

-bool ConvertToHalfPass::IsRelaxed(Instruction* inst) {
+bool ConvertToHalfPass::IsDecoratedRelaxed(Instruction* inst) {
  uint32_t r_id = inst->result_id();
  for (auto r_inst : get_decoration_mgr()->GetDecorationsFor(r_id, false))
    if (r_inst->opcode() == SpvOpDecorate &&
@ -51,6 +51,12 @@ bool ConvertToHalfPass::IsRelaxed(Instruction* inst) {
  return false;
 }

+bool ConvertToHalfPass::IsRelaxed(uint32_t id) {
+  return relaxed_ids_set_.count(id) > 0;
+}
+
+void ConvertToHalfPass::AddRelaxed(uint32_t id) { relaxed_ids_set_.insert(id); }
+
 analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) {
  analysis::Float float_ty(width);
  return context()->get_type_mgr()->GetRegisteredType(&float_ty);
@ -87,16 +93,19 @@ uint32_t ConvertToHalfPass::EquivFloatTypeId(uint32_t ty_id, uint32_t width) {
 }

 void ConvertToHalfPass::GenConvert(uint32_t* val_idp, uint32_t width,
-                                   InstructionBuilder* builder) {
+                                   Instruction* inst) {
  Instruction* val_inst = get_def_use_mgr()->GetDef(*val_idp);
  uint32_t ty_id = val_inst->type_id();
  uint32_t nty_id = EquivFloatTypeId(ty_id, width);
  if (nty_id == ty_id) return;
  Instruction* cvt_inst;
+  InstructionBuilder builder(
+      context(), inst,
+      IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
  if (val_inst->opcode() == SpvOpUndef)
-    cvt_inst = builder->AddNullaryOp(nty_id, SpvOpUndef);
+    cvt_inst = builder.AddNullaryOp(nty_id, SpvOpUndef);
  else
-    cvt_inst = builder->AddUnaryOp(nty_id, SpvOpFConvert, *val_idp);
+    cvt_inst = builder.AddUnaryOp(nty_id, SpvOpFConvert, *val_idp);
  *val_idp = cvt_inst->result_id();
 }

@ -153,17 +162,15 @@ bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
  bool modified = false;
  // Convert all float32 based operands to float16 equivalent and change
  // instruction type to float16 equivalent.
-  InstructionBuilder builder(
-      context(), inst,
-      IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
-  inst->ForEachInId([&builder, &modified, this](uint32_t* idp) {
+  inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
    Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
    if (!IsFloat(op_inst, 32)) return;
-    GenConvert(idp, 16, &builder);
+    GenConvert(idp, 16, inst);
    modified = true;
  });
  if (IsFloat(inst, 32)) {
    inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
+    converted_ids_.insert(inst->result_id());
    modified = true;
  }
  if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
@ -171,23 +178,10 @@ bool ConvertToHalfPass::GenHalfArith(Instruction* inst) {
 }

 bool ConvertToHalfPass::ProcessPhi(Instruction* inst) {
-  // Skip if not float32
-  if (!IsFloat(inst, 32)) return false;
-  // Skip if no relaxed operands.
-  bool relaxed_found = false;
-  uint32_t ocnt = 0;
-  inst->ForEachInId([&ocnt, &relaxed_found, this](uint32_t* idp) {
-    if (ocnt % 2 == 0) {
-      Instruction* val_inst = get_def_use_mgr()->GetDef(*idp);
-      if (IsRelaxed(val_inst)) relaxed_found = true;
-    }
-    ++ocnt;
-  });
-  if (!relaxed_found) return false;
  // Add float16 converts of any float32 operands and change type
  // of phi to float16 equivalent. Operand converts need to be added to
  // preceeding blocks.
-  ocnt = 0;
+  uint32_t ocnt = 0;
  uint32_t* prev_idp;
  inst->ForEachInId([&ocnt, &prev_idp, this](uint32_t* idp) {
    if (ocnt % 2 == 0) {
@ -203,65 +197,32 @@ bool ConvertToHalfPass::ProcessPhi(Instruction* inst) {
              insert_before->opcode() != SpvOpLoopMerge)
            ++insert_before;
        }
-        InstructionBuilder builder(context(), &*insert_before,
-                                   IRContext::kAnalysisDefUse |
-                                       IRContext::kAnalysisInstrToBlockMapping);
-        GenConvert(prev_idp, 16, &builder);
+        GenConvert(prev_idp, 16, &*insert_before);
      }
    }
    ++ocnt;
  });
  inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
  get_def_use_mgr()->AnalyzeInstUse(inst);
+  converted_ids_.insert(inst->result_id());
  return true;
 }

-bool ConvertToHalfPass::ProcessExtract(Instruction* inst) {
-  bool modified = false;
-  uint32_t comp_id = inst->GetSingleWordInOperand(0);
-  Instruction* comp_inst = get_def_use_mgr()->GetDef(comp_id);
-  // If extract is relaxed float32 based type and the composite is a relaxed
-  // float32 based type, convert it to float16 equivalent. This is slightly
-  // aggressive and pushes any likely conversion to apply to the whole
-  // composite rather than apply to each extracted component later. This
-  // can be a win if the platform can convert the entire composite in the same
-  // time as one component. It risks converting components that may not be
-  // used, although empirical data on a large set of real-world shaders seems
-  // to suggest this is not common and the composite convert is the best choice.
-  if (IsFloat(inst, 32) && IsRelaxed(inst) && IsFloat(comp_inst, 32) &&
-      IsRelaxed(comp_inst)) {
-    InstructionBuilder builder(
-        context(), inst,
-        IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
-    GenConvert(&comp_id, 16, &builder);
-    inst->SetInOperand(0, {comp_id});
-    comp_inst = get_def_use_mgr()->GetDef(comp_id);
-    modified = true;
-  }
-  // If the composite is a float16 based type, make sure the type of the
-  // extract agrees.
-  if (IsFloat(comp_inst, 16) && !IsFloat(inst, 16)) {
-    inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
-    modified = true;
-  }
-  if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
-  return modified;
-}
-
 bool ConvertToHalfPass::ProcessConvert(Instruction* inst) {
  // If float32 and relaxed, change to float16 convert
-  if (IsFloat(inst, 32) && IsRelaxed(inst)) {
+  if (IsFloat(inst, 32) && IsRelaxed(inst->result_id())) {
    inst->SetResultType(EquivFloatTypeId(inst->type_id(), 16));
    get_def_use_mgr()->AnalyzeInstUse(inst);
+    converted_ids_.insert(inst->result_id());
  }
-  // If operand and result types are the same, replace result with operand
-  // and change convert to copy to keep validator happy; DCE will clean it up
+  // If operand and result types are the same, change FConvert to CopyObject to
+  // keep validator happy; simplification and DCE will clean it up
+  // One way this can happen is if an FConvert generated during this pass
+  // (likely by ProcessPhi) is later encountered here and its operand has been
+  // changed to half.
  uint32_t val_id = inst->GetSingleWordInOperand(0);
  Instruction* val_inst = get_def_use_mgr()->GetDef(val_id);
-  if (inst->type_id() == val_inst->type_id()) {
-    context()->ReplaceAllUsesWith(inst->result_id(), val_id);
-    inst->SetOpcode(SpvOpCopyObject);
-  }
+  if (inst->type_id() == val_inst->type_id()) inst->SetOpcode(SpvOpCopyObject);
  return true;  // modified
 }

@ -270,12 +231,8 @@ bool ConvertToHalfPass::ProcessImageRef(Instruction* inst) {
  // If image reference, only need to convert dref args back to float32
  if (dref_image_ops_.count(inst->opcode()) != 0) {
    uint32_t dref_id = inst->GetSingleWordInOperand(kImageSampleDrefIdInIdx);
-    Instruction* dref_inst = get_def_use_mgr()->GetDef(dref_id);
-    if (IsFloat(dref_inst, 16) && IsRelaxed(dref_inst)) {
-      InstructionBuilder builder(
-          context(), inst,
-          IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
-      GenConvert(&dref_id, 32, &builder);
+    if (converted_ids_.count(dref_id) > 0) {
+      GenConvert(&dref_id, 32, inst);
      inst->SetInOperand(kImageSampleDrefIdInIdx, {dref_id});
      get_def_use_mgr()->AnalyzeInstUse(inst);
      modified = true;
@ -288,32 +245,24 @@ bool ConvertToHalfPass::ProcessDefault(Instruction* inst) {
  bool modified = false;
  // If non-relaxed instruction has changed operands, need to convert
  // them back to float32
-  InstructionBuilder builder(
-      context(), inst,
-      IRContext::kAnalysisDefUse | IRContext::kAnalysisInstrToBlockMapping);
-  inst->ForEachInId([&builder, &modified, this](uint32_t* idp) {
-    Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
-    if (!IsFloat(op_inst, 16)) return;
-    if (!IsRelaxed(op_inst)) return;
+  inst->ForEachInId([&inst, &modified, this](uint32_t* idp) {
+    if (converted_ids_.count(*idp) == 0) return;
    uint32_t old_id = *idp;
-    GenConvert(idp, 32, &builder);
+    GenConvert(idp, 32, inst);
    if (*idp != old_id) modified = true;
  });
  if (modified) get_def_use_mgr()->AnalyzeInstUse(inst);
  return modified;
 }

-bool ConvertToHalfPass::GenHalfCode(Instruction* inst) {
+bool ConvertToHalfPass::GenHalfInst(Instruction* inst) {
  bool modified = false;
  // Remember id for later deletion of RelaxedPrecision decoration
-  bool inst_relaxed = IsRelaxed(inst);
-  if (inst_relaxed) relaxed_ids_.push_back(inst->result_id());
+  bool inst_relaxed = IsRelaxed(inst->result_id());
  if (IsArithmetic(inst) && inst_relaxed)
    modified = GenHalfArith(inst);
-  else if (inst->opcode() == SpvOpPhi)
+  else if (inst->opcode() == SpvOpPhi && inst_relaxed)
    modified = ProcessPhi(inst);
-  else if (inst->opcode() == SpvOpCompositeExtract)
-    modified = ProcessExtract(inst);
  else if (inst->opcode() == SpvOpFConvert)
    modified = ProcessConvert(inst);
  else if (image_ops_.count(inst->opcode()) != 0)
@ -323,13 +272,62 @@ bool ConvertToHalfPass::GenHalfCode(Instruction* inst) {
  return modified;
 }

+bool ConvertToHalfPass::CloseRelaxInst(Instruction* inst) {
+  if (inst->result_id() == 0) return false;
+  if (IsRelaxed(inst->result_id())) return false;
+  if (!IsFloat(inst, 32)) return false;
+  if (IsDecoratedRelaxed(inst)) {
+    AddRelaxed(inst->result_id());
+    return true;
+  }
+  if (closure_ops_.count(inst->opcode()) == 0) return false;
+  // Can relax if all float operands are relaxed
+  bool relax = true;
+  inst->ForEachInId([&relax, this](uint32_t* idp) {
+    Instruction* op_inst = get_def_use_mgr()->GetDef(*idp);
+    if (!IsFloat(op_inst, 32)) return;
+    if (!IsRelaxed(*idp)) relax = false;
+  });
+  if (relax) {
+    AddRelaxed(inst->result_id());
+    return true;
+  }
+  // Can relax if all uses are relaxed
+  relax = true;
+  get_def_use_mgr()->ForEachUser(inst, [&relax, this](Instruction* uinst) {
+    if (uinst->result_id() == 0 || !IsFloat(uinst, 32) ||
+        (!IsDecoratedRelaxed(uinst) && !IsRelaxed(uinst->result_id()))) {
+      relax = false;
+      return;
+    }
+  });
+  if (relax) {
+    AddRelaxed(inst->result_id());
+    return true;
+  }
+  return false;
+}
+
 bool ConvertToHalfPass::ProcessFunction(Function* func) {
+  // Do a closure of Relaxed on composite and phi instructions
+  bool changed = true;
+  while (changed) {
+    changed = false;
+    cfg()->ForEachBlockInReversePostOrder(
+        func->entry().get(), [&changed, this](BasicBlock* bb) {
+          for (auto ii = bb->begin(); ii != bb->end(); ++ii)
+            changed |= CloseRelaxInst(&*ii);
+        });
+  }
+  // Do convert of relaxed instructions to half precision
  bool modified = false;
  cfg()->ForEachBlockInReversePostOrder(
      func->entry().get(), [&modified, this](BasicBlock* bb) {
        for (auto ii = bb->begin(); ii != bb->end(); ++ii)
-          modified |= GenHalfCode(&*ii);
+          modified |= GenHalfInst(&*ii);
      });
+  // Replace invalid converts of matrix into equivalent vector extracts,
+  // converts and finally a composite construct
  cfg()->ForEachBlockInReversePostOrder(
      func->entry().get(), [&modified, this](BasicBlock* bb) {
        for (auto ii = bb->begin(); ii != bb->end(); ++ii)
@ -346,7 +344,7 @@ Pass::Status ConvertToHalfPass::ProcessImpl() {
  // If modified, make sure module has Float16 capability
  if (modified) context()->AddCapability(SpvCapabilityFloat16);
  // Remove all RelaxedPrecision decorations from instructions and globals
-  for (auto c_id : relaxed_ids_) RemoveRelaxedDecoration(c_id);
+  for (auto c_id : relaxed_ids_set_) RemoveRelaxedDecoration(c_id);
  for (auto& val : get_module()->types_values()) {
    uint32_t v_id = val.result_id();
    if (v_id != 0) RemoveRelaxedDecoration(v_id);
@ -366,6 +364,7 @@ void ConvertToHalfPass::Initialize() {
      SpvOpVectorShuffle,
      SpvOpCompositeConstruct,
      SpvOpCompositeInsert,
+      SpvOpCompositeExtract,
      SpvOpCopyObject,
      SpvOpTranspose,
      SpvOpConvertSToF,
@ -453,7 +452,19 @@ void ConvertToHalfPass::Initialize() {
      SpvOpImageSparseSampleProjDrefExplicitLod,
      SpvOpImageSparseDrefGather,
  };
-  relaxed_ids_.clear();
+  closure_ops_ = {
+      SpvOpVectorExtractDynamic,
+      SpvOpVectorInsertDynamic,
+      SpvOpVectorShuffle,
+      SpvOpCompositeConstruct,
+      SpvOpCompositeInsert,
+      SpvOpCompositeExtract,
+      SpvOpCopyObject,
+      SpvOpTranspose,
+      SpvOpPhi,
+  };
+  relaxed_ids_set_.clear();
+  converted_ids_.clear();
 }

 }  // namespace opt
--- a/source/opt/convert_to_half_pass.h
+++ b/source/opt/convert_to_half_pass.h
@ -38,7 +38,8 @@ class ConvertToHalfPass : public Pass {
  const char* name() const override { return "convert-to-half-pass"; }

 private:
-  // Return true if |inst| is an arithmetic op that can be of type float16
+  // Return true if |inst| is an arithmetic, composite or phi op that can be
+  // of type float16
  bool IsArithmetic(Instruction* inst);

  // Return true if |inst| returns scalar, vector or matrix type with base
@ -46,7 +47,13 @@ class ConvertToHalfPass : public Pass {
  bool IsFloat(Instruction* inst, uint32_t width);

  // Return true if |inst| is decorated with RelaxedPrecision
-  bool IsRelaxed(Instruction* inst);
+  bool IsDecoratedRelaxed(Instruction* inst);
+
+  // Return true if |id| has been added to the relaxed id set
+  bool IsRelaxed(uint32_t id);
+
+  // Add |id| to the relaxed id set
+  void AddRelaxed(uint32_t id);

  // Return type id for float with |width|
  analysis::Type* FloatScalarType(uint32_t width);
@ -64,19 +71,23 @@ class ConvertToHalfPass : public Pass {

  // Append instructions to builder to convert value |*val_idp| to type
  // |ty_id| but with |width|. Set |*val_idp| to the new id.
-  void GenConvert(uint32_t* val_idp, uint32_t width,
-                  InstructionBuilder* builder);
+  void GenConvert(uint32_t* val_idp, uint32_t width, Instruction* inst);

  // Remove RelaxedPrecision decoration of |id|.
  void RemoveRelaxedDecoration(uint32_t id);

+  // Add |inst| to relaxed instruction set if warranted. Specifically, if
+  // it is float32 and either decorated relaxed or a composite or phi
+  // instruction where all operands are relaxed or all uses are relaxed.
+  bool CloseRelaxInst(Instruction* inst);
+
  // If |inst| is an arithmetic, phi, extract or convert instruction of float32
  // base type and decorated with RelaxedPrecision, change it to the equivalent
  // float16 based type instruction. Specifically, insert instructions to
  // convert all operands to float16 (if needed) and change its type to the
  // equivalent float16 type. Otherwise, insert instructions to convert its
  // operands back to their original types, if needed.
-  bool GenHalfCode(Instruction* inst);
+  bool GenHalfInst(Instruction* inst);

  // Gen code for relaxed arithmetic |inst|
  bool GenHalfArith(Instruction* inst);
@ -84,9 +95,6 @@ class ConvertToHalfPass : public Pass {
  // Gen code for relaxed phi |inst|
  bool ProcessPhi(Instruction* inst);

-  // Gen code for relaxed extract |inst|
-  bool ProcessExtract(Instruction* inst);
-
  // Gen code for relaxed convert |inst|
  bool ProcessConvert(Instruction* inst);

@ -98,11 +106,11 @@ class ConvertToHalfPass : public Pass {

  // If |inst| is an FConvert of a matrix type, decompose it to a series
  // of vector extracts, converts and inserts into an Undef. These are
-  // generated by GenHalfCode because they are easier to manipulate, but are
+  // generated by GenHalfInst because they are easier to manipulate, but are
  // invalid so we need to clean them up.
  bool MatConvertCleanup(Instruction* inst);

-  // Call GenHalfCode on every instruction in |func|.
+  // Call GenHalfInst on every instruction in |func|.
  // If code is generated for an instruction, replace the instruction
  // with the new instructions that are generated.
  bool ProcessFunction(Function* func);
@ -124,8 +132,14 @@ class ConvertToHalfPass : public Pass {
  // Set of dref sample operations
  std::unordered_set<uint32_t> dref_image_ops_;

+  // Set of dref sample operations
+  std::unordered_set<uint32_t> closure_ops_;
+
+  // Set of ids of all relaxed instructions
+  std::unordered_set<uint32_t> relaxed_ids_set_;
+
  // Ids of all converted instructions
-  std::vector<uint32_t> relaxed_ids_;
+  std::unordered_set<uint32_t> converted_ids_;
 };

 }  // namespace opt
--- a/test/opt/convert_relaxed_to_half_test.cpp
+++ b/test/opt/convert_relaxed_to_half_test.cpp
@ -997,7 +997,7 @@ OpFunctionEnd
 %99 = OpFConvert %v4half %15
 OpBranch %65
 %65 = OpLabel
-%96 = OpPhi %v4half %99 %5 %76 %71
+%96 = OpPhi %v4half %99 %5 %100 %71
 %95 = OpPhi %int %int_0 %5 %78 %71
 %70 = OpSLessThan %bool %95 %int_10
 OpLoopMerge %66 %71 None
@ -1222,6 +1222,115 @@ OpFunctionEnd
                                           defs_after + func_after, true, true);
 }

+TEST_F(ConvertToHalfTest, ConvertToHalfWithClosure) {
+  // Include as many contiguous composite instructions as possible into
+  // half-precision computations
+  //
+  // Compiled with glslang -V -Os
+  //
+  // clang-format off
+  //
+  // #version 410 core
+  //
+  //   precision mediump float;
+  //
+  // layout(location = 1) in vec3 foo;
+  // layout(location = 2) in mat2 bar;
+  // layout(location = 1) out vec3 res;
+  //
+  // vec3 func(vec3 tap, mat2 M) {
+  //   return vec3(M * tap.xy, 1.0);
+  // }
+  //
+  // void main() {
+  //   res = func(foo, bar);
+  // }
+  //
+  // clang-format on
+
+  const std::string defs =
+      R"(OpCapability Shader
+; CHECK: OpCapability Float16
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %res %foo %bar
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 410
+OpName %main "main"
+OpName %res "res"
+OpName %foo "foo"
+OpName %bar "bar"
+OpDecorate %res RelaxedPrecision
+; CHECK-NOT: OpDecorate %res RelaxedPrecision
+OpDecorate %res Location 1
+OpDecorate %foo RelaxedPrecision
+; CHECK-NOT: OpDecorate %foo RelaxedPrecision
+OpDecorate %foo Location 1
+OpDecorate %bar RelaxedPrecision
+; CHECK-NOT: OpDecorate %bar RelaxedPrecision
+OpDecorate %bar Location 2
+OpDecorate %34 RelaxedPrecision
+OpDecorate %36 RelaxedPrecision
+OpDecorate %41 RelaxedPrecision
+OpDecorate %42 RelaxedPrecision
+; CHECK-NOT: OpDecorate %34 RelaxedPrecision
+; CHECK-NOT: OpDecorate %36 RelaxedPrecision
+; CHECK-NOT: OpDecorate %41 RelaxedPrecision
+; CHECK-NOT: OpDecorate %42 RelaxedPrecision
+%void = OpTypeVoid
+%3 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v3float = OpTypeVector %float 3
+%v2float = OpTypeVector %float 2
+%mat2v2float = OpTypeMatrix %v2float 2
+%float_1 = OpConstant %float 1
+%_ptr_Output_v3float = OpTypePointer Output %v3float
+%res = OpVariable %_ptr_Output_v3float Output
+%_ptr_Input_v3float = OpTypePointer Input %v3float
+%foo = OpVariable %_ptr_Input_v3float Input
+%_ptr_Input_mat2v2float = OpTypePointer Input %mat2v2float
+%bar = OpVariable %_ptr_Input_mat2v2float Input
+)";
+
+  const std::string func =
+      R"(%main = OpFunction %void None %3
+%5 = OpLabel
+%34 = OpLoad %v3float %foo
+%36 = OpLoad %mat2v2float %bar
+; CHECK: %48 = OpFConvert %v3half %34
+; CHECK: %49 = OpFConvert %v3half %34
+%41 = OpVectorShuffle %v2float %34 %34 0 1
+; CHECK-NOT: %41 = OpVectorShuffle %v2float %34 %34 0 1
+; CHECK: %41 = OpVectorShuffle %v2half %48 %49 0 1
+%42 = OpMatrixTimesVector %v2float %36 %41
+; CHECK-NOT: %42 = OpMatrixTimesVector %v2float %36 %41
+; CHECK: %55 = OpCompositeExtract %v2float %36 0
+; CHECK: %56 = OpFConvert %v2half %55
+; CHECK: %57 = OpCompositeExtract %v2float %36 1
+; CHECK: %58 = OpFConvert %v2half %57
+; CHECK: %59 = OpCompositeConstruct %mat2v2half %56 %58
+; CHECK: %52 = OpCopyObject %mat2v2float %36
+; CHECK: %42 = OpMatrixTimesVector %v2half %59 %41
+%43 = OpCompositeExtract %float %42 0
+%44 = OpCompositeExtract %float %42 1
+; CHECK-NOT: %43 = OpCompositeExtract %float %42 0
+; CHECK-NOT: %44 = OpCompositeExtract %float %42 1
+; CHECK: %43 = OpCompositeExtract %half %42 0
+; CHECK: %44 = OpCompositeExtract %half %42 1
+%45 = OpCompositeConstruct %v3float %43 %44 %float_1
+; CHECK-NOT: %45 = OpCompositeConstruct %v3float %43 %44 %float_1
+; CHECK: %53 = OpFConvert %float %43
+; CHECK: %54 = OpFConvert %float %44
+; CHECK: %45 = OpCompositeConstruct %v3float %53 %54 %float_1
+OpStore %res %45
+OpReturn
+OpFunctionEnd
+)";
+
+  SetAssembleOptions(SPV_TEXT_TO_BINARY_OPTION_PRESERVE_NUMERIC_IDS);
+  SinglePassRunAndMatch<ConvertToHalfPass>(defs + func, true);
+}
+
 }  // namespace
 }  // namespace opt
 }  // namespace spvtools