Vector DCE (#1512)

Introduce a pass that does a DCE type analysis for vector elements instead of the whole vector as a single element. It will then rewrite instructions that are not used with something else. For example, an instruction whose value are not used, even though it is referenced, is replaced with an OpUndef.
2024-10-18 11:10:05 +00:00 · 2018-04-23 11:13:07 -04:00 · 2018-04-23 11:13:07 -04:00 · 2c0ce87210
commit 2c0ce87210
parent 7a59283587
17 changed files with 1447 additions and 4 deletions
--- a/Android.mk
+++ b/Android.mk
@ -25,6 +25,7 @@ SPVTOOLS_SRC_FILES := \
 		source/text.cpp \
 		source/text_handler.cpp \
 		source/util/bit_stream.cpp \
+		source/util/bit_vector.cpp \
 		source/util/parse_number.cpp \
 		source/util/string_utils.cpp \
 		source/util/timer.cpp \
@ -135,6 +136,7 @@ SPVTOOLS_OPT_SRC_FILES := \
 		source/opt/types.cpp \
 		source/opt/unify_const_pass.cpp \
 		source/opt/value_number_table.cpp \
+		source/opt/vector_dce.cpp \
 		source/opt/workaround1209.cpp

 # Locations of grammar files.
--- a/include/spirv-tools/optimizer.hpp
+++ b/include/spirv-tools/optimizer.hpp
@ -560,6 +560,13 @@ Optimizer::PassToken CreateSSARewritePass();
 // This pass looks to copy propagate memory references for arrays.  It looks
 // for specific code patterns to recognize array copies.
 Optimizer::PassToken CreateCopyPropagateArraysPass();
+
+// Create a vector dce pass.
+// This pass looks for components of vectors that are unused, and removes them
+// from the vector.  Note this would still leave around lots of dead code that
+// a pass of ADCE will be able to remove.
+Optimizer::PassToken CreateVectorDCEPass();
+
 }  // namespace spvtools

 #endif  // SPIRV_TOOLS_OPTIMIZER_HPP_
--- a/source/opcode.cpp
+++ b/source/opcode.cpp
@ -496,3 +496,90 @@ bool spvOpcodeIsNonUniformGroupOperation(SpvOp opcode) {
      return false;
  }
 }
+
+bool spvOpcodeIsScalarizable(SpvOp opcode) {
+  switch (opcode) {
+    case SpvOpPhi:
+    case SpvOpCopyObject:
+    case SpvOpConvertFToU:
+    case SpvOpConvertFToS:
+    case SpvOpConvertSToF:
+    case SpvOpConvertUToF:
+    case SpvOpUConvert:
+    case SpvOpSConvert:
+    case SpvOpFConvert:
+    case SpvOpQuantizeToF16:
+    case SpvOpVectorInsertDynamic:
+    case SpvOpSNegate:
+    case SpvOpFNegate:
+    case SpvOpIAdd:
+    case SpvOpFAdd:
+    case SpvOpISub:
+    case SpvOpFSub:
+    case SpvOpIMul:
+    case SpvOpFMul:
+    case SpvOpUDiv:
+    case SpvOpSDiv:
+    case SpvOpFDiv:
+    case SpvOpUMod:
+    case SpvOpSRem:
+    case SpvOpSMod:
+    case SpvOpFRem:
+    case SpvOpFMod:
+    case SpvOpVectorTimesScalar:
+    case SpvOpIAddCarry:
+    case SpvOpISubBorrow:
+    case SpvOpUMulExtended:
+    case SpvOpSMulExtended:
+    case SpvOpShiftRightLogical:
+    case SpvOpShiftRightArithmetic:
+    case SpvOpShiftLeftLogical:
+    case SpvOpBitwiseOr:
+    case SpvOpBitwiseAnd:
+    case SpvOpNot:
+    case SpvOpBitFieldInsert:
+    case SpvOpBitFieldSExtract:
+    case SpvOpBitFieldUExtract:
+    case SpvOpBitReverse:
+    case SpvOpBitCount:
+    case SpvOpIsNan:
+    case SpvOpIsInf:
+    case SpvOpIsFinite:
+    case SpvOpIsNormal:
+    case SpvOpSignBitSet:
+    case SpvOpLessOrGreater:
+    case SpvOpOrdered:
+    case SpvOpUnordered:
+    case SpvOpLogicalEqual:
+    case SpvOpLogicalNotEqual:
+    case SpvOpLogicalOr:
+    case SpvOpLogicalAnd:
+    case SpvOpLogicalNot:
+    case SpvOpSelect:
+    case SpvOpIEqual:
+    case SpvOpINotEqual:
+    case SpvOpUGreaterThan:
+    case SpvOpSGreaterThan:
+    case SpvOpUGreaterThanEqual:
+    case SpvOpSGreaterThanEqual:
+    case SpvOpULessThan:
+    case SpvOpSLessThan:
+    case SpvOpULessThanEqual:
+    case SpvOpSLessThanEqual:
+    case SpvOpFOrdEqual:
+    case SpvOpFUnordEqual:
+    case SpvOpFOrdNotEqual:
+    case SpvOpFUnordNotEqual:
+    case SpvOpFOrdLessThan:
+    case SpvOpFUnordLessThan:
+    case SpvOpFOrdGreaterThan:
+    case SpvOpFUnordGreaterThan:
+    case SpvOpFOrdLessThanEqual:
+    case SpvOpFUnordLessThanEqual:
+    case SpvOpFOrdGreaterThanEqual:
+    case SpvOpFUnordGreaterThanEqual:
+      return true;
+    default:
+      return false;
+  }
+}
--- a/source/opcode.h
+++ b/source/opcode.h
@ -121,4 +121,8 @@ bool spvOpcodeIsBaseOpaqueType(SpvOp opcode);

 // Returns true if the given opcode is a non-uniform group operation.
 bool spvOpcodeIsNonUniformGroupOperation(SpvOp opcode);
+
+// Returns true if the result of an instruction with this opcode are computed
+// per component.
+bool spvOpcodeIsScalarizable(SpvOp opcode);
 #endif  // LIBSPIRV_OPCODE_H_
--- a/source/opt/CMakeLists.txt
+++ b/source/opt/CMakeLists.txt
@ -93,6 +93,7 @@ add_library(SPIRV-Tools-opt
  types.h
  unify_const_pass.h
  value_number_table.h
+  vector_dce.h
  workaround1209.h

  aggressive_dead_code_elim_pass.cpp
@ -171,6 +172,7 @@ add_library(SPIRV-Tools-opt
  types.cpp
  unify_const_pass.cpp
  value_number_table.cpp
+  vector_dce.cpp
  workaround1209.cpp
 )

--- a/source/opt/optimizer.cpp
+++ b/source/opt/optimizer.cpp
@ -126,7 +126,7 @@ Optimizer& Optimizer::RegisterLegalizationPasses() {
          // https://github.com/Microsoft/DirectXShaderCompiler/pull/930
          // Get rid of unused code that contain traces of illegal code
          // or unused references to unbound external objects
-          .RegisterPass(CreateDeadInsertElimPass())
+          .RegisterPass(CreateVectorDCEPass())
          .RegisterPass(CreateAggressiveDCEPass());
 }

@ -149,7 +149,7 @@ Optimizer& Optimizer::RegisterPerformancePasses() {
      .RegisterPass(CreateAggressiveDCEPass())
      .RegisterPass(CreateRedundancyEliminationPass())
      .RegisterPass(CreateInsertExtractElimPass())
-      .RegisterPass(CreateDeadInsertElimPass())
+      .RegisterPass(CreateVectorDCEPass())
      .RegisterPass(CreateDeadBranchElimPass())
      .RegisterPass(CreateSimplificationPass())
      .RegisterPass(CreateIfConversionPass())
@ -450,4 +450,8 @@ Optimizer::PassToken CreateCopyPropagateArraysPass() {
      MakeUnique<opt::CopyPropagateArrays>());
 }

+Optimizer::PassToken CreateVectorDCEPass() {
+  return MakeUnique<Optimizer::PassToken::Impl>(MakeUnique<opt::VectorDCE>());
+}
+
 }  // namespace spvtools
--- a/source/opt/passes.h
+++ b/source/opt/passes.h
@ -58,5 +58,6 @@
 #include "strip_debug_info_pass.h"
 #include "strip_reflect_info_pass.h"
 #include "unify_const_pass.h"
+#include "vector_dce.h"
 #include "workaround1209.h"
 #endif  // LIBSPIRV_OPT_PASSES_H_
--- a/source/opt/vector_dce.cpp
+++ b/source/opt/vector_dce.cpp
@ -0,0 +1,275 @@
+// Copyright (c) 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "vector_dce.h"
+
+namespace {
+const uint32_t kExtractCompositeIdInIdx = 0;
+const uint32_t kInsertCompositeIdInIdx = 1;
+}  // namespace
+
+namespace spvtools {
+namespace opt {
+
+Pass::Status VectorDCE::Process(ir::IRContext* ctx) {
+  InitializeProcessing(ctx);
+
+  bool modified = false;
+  for (ir::Function& function : *get_module()) {
+    modified |= VectorDCEFunction(&function);
+  }
+  return (modified ? Status::SuccessWithChange : Status::SuccessWithoutChange);
+}
+
+bool VectorDCE::VectorDCEFunction(ir::Function* function) {
+  LiveComponentMap live_components;
+  FindLiveComponents(function, &live_components);
+  return RewriteInstructions(function, live_components);
+}
+
+void VectorDCE::FindLiveComponents(ir::Function* function,
+                                   LiveComponentMap* live_components) {
+  std::vector<WorkListItem> work_list;
+
+  // Prime the work list.  We will assume that any instruction that does
+  // not result in a vector is live.
+  //
+  // TODO: If we want to remove all of the dead code with the current
+  // implementation, we will have to iterate VectorDCE and ADCE.  That can be
+  // avoided if we treat scalar results as if they are vectors with 1 element.
+  //
+  // Extending to structures and matrices is not as straight forward because of
+  // the nesting.  We cannot simply us a bit vector to keep track of which
+  // components are live because of arbitrary nesting of structs.
+  function->ForEachInst(
+      [&work_list, this, live_components](ir::Instruction* current_inst) {
+        bool is_vector = HasVectorResult(current_inst);
+        if (!is_vector) {
+          switch (current_inst->opcode()) {
+            case SpvOpCompositeExtract:
+              MarkExtractUseAsLive(current_inst, live_components, &work_list);
+              break;
+
+            default:
+              MarkUsesAsLive(current_inst, all_components_live_,
+                             live_components, &work_list);
+              break;
+          }
+        }
+      });
+
+  // Process the work list propagating liveness.
+  for (uint32_t i = 0; i < work_list.size(); i++) {
+    WorkListItem current_item = work_list[i];
+    ir::Instruction* current_inst = current_item.instruction;
+
+    switch (current_inst->opcode()) {
+      case SpvOpCompositeInsert:
+        MarkInsertUsesAsLive(current_item, live_components, &work_list);
+        break;
+      case SpvOpVectorShuffle:
+        MarkVectorShuffleUsesAsLive(current_item, live_components, &work_list);
+        break;
+      case SpvOpCompositeConstruct:
+        // TODO: If we have a vector parameter, then we can determine which
+        // of its components are live.
+        MarkUsesAsLive(current_inst, all_components_live_, live_components,
+                       &work_list);
+        break;
+      default:
+        if (spvOpcodeIsScalarizable(current_inst->opcode())) {
+          MarkUsesAsLive(current_inst, current_item.components, live_components,
+                         &work_list);
+        } else {
+          MarkUsesAsLive(current_inst, all_components_live_, live_components,
+                         &work_list);
+        }
+        break;
+    }
+  }
+}
+
+void VectorDCE::MarkExtractUseAsLive(const ir::Instruction* current_inst,
+                                     LiveComponentMap* live_components,
+                                     std::vector<WorkListItem>* work_list) {
+  analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr();
+  uint32_t operand_id =
+      current_inst->GetSingleWordInOperand(kExtractCompositeIdInIdx);
+  ir::Instruction* operand_inst = def_use_mgr->GetDef(operand_id);
+
+  if (HasVectorResult(operand_inst)) {
+    WorkListItem new_item;
+    new_item.instruction = operand_inst;
+    new_item.components.Set(current_inst->GetSingleWordInOperand(1));
+    AddItemToWorkListIfNeeded(new_item, live_components, work_list);
+  }
+}
+
+void VectorDCE::MarkInsertUsesAsLive(
+    const VectorDCE::WorkListItem& current_item,
+    LiveComponentMap* live_components,
+    std::vector<VectorDCE::WorkListItem>* work_list) {
+  analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr();
+
+  uint32_t operand_id =
+      current_item.instruction->GetSingleWordInOperand(kInsertCompositeIdInIdx);
+  ir::Instruction* operand_inst = def_use_mgr->GetDef(operand_id);
+
+  WorkListItem new_item;
+  new_item.instruction = operand_inst;
+  new_item.components = current_item.components;
+  new_item.components.Clear(
+      current_item.instruction->GetSingleWordInOperand(2));
+
+  AddItemToWorkListIfNeeded(new_item, live_components, work_list);
+}
+
+void VectorDCE::MarkVectorShuffleUsesAsLive(
+    const WorkListItem& current_item,
+    VectorDCE::LiveComponentMap* live_components,
+    std::vector<WorkListItem>* work_list) {
+  analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr();
+
+  WorkListItem first_operand;
+  first_operand.instruction =
+      def_use_mgr->GetDef(current_item.instruction->GetSingleWordInOperand(0));
+  WorkListItem second_operand;
+  second_operand.instruction =
+      def_use_mgr->GetDef(current_item.instruction->GetSingleWordInOperand(1));
+
+  analysis::TypeManager* type_mgr = context()->get_type_mgr();
+  analysis::Vector* first_type =
+      type_mgr->GetType(first_operand.instruction->type_id())->AsVector();
+  uint32_t size_of_first_operand = first_type->element_count();
+
+  for (uint32_t in_op = 2; in_op < current_item.instruction->NumInOperands();
+       ++in_op) {
+    uint32_t index = current_item.instruction->GetSingleWordInOperand(in_op);
+    if (current_item.components.Get(in_op - 2)) {
+      if (index < size_of_first_operand) {
+        first_operand.components.Set(index);
+      } else {
+        second_operand.components.Set(index - size_of_first_operand);
+      }
+    }
+  }
+
+  AddItemToWorkListIfNeeded(first_operand, live_components, work_list);
+  AddItemToWorkListIfNeeded(second_operand, live_components, work_list);
+}
+
+void VectorDCE::MarkUsesAsLive(
+    ir::Instruction* current_inst, const utils::BitVector& live_elements,
+    LiveComponentMap* live_components,
+    std::vector<VectorDCE::WorkListItem>* work_list) {
+  analysis::DefUseManager* def_use_mgr = context()->get_def_use_mgr();
+
+  current_inst->ForEachInId([&work_list, &live_elements, this, live_components,
+                             def_use_mgr](uint32_t* operand_id) {
+    ir::Instruction* operand_inst = def_use_mgr->GetDef(*operand_id);
+
+    if (HasVectorResult(operand_inst)) {
+      WorkListItem new_item;
+      new_item.instruction = operand_inst;
+      new_item.components = live_elements;
+      AddItemToWorkListIfNeeded(new_item, live_components, work_list);
+    }
+  });
+}
+
+bool VectorDCE::HasVectorResult(const ir::Instruction* inst) const {
+  analysis::TypeManager* type_mgr = context()->get_type_mgr();
+  if (inst->type_id() == 0) {
+    return false;
+  }
+
+  const analysis::Type* current_type = type_mgr->GetType(inst->type_id());
+  bool is_vector = current_type->AsVector() != nullptr;
+  return is_vector;
+}
+
+bool VectorDCE::RewriteInstructions(
+    ir::Function* function,
+    const VectorDCE::LiveComponentMap& live_components) {
+  bool modified = false;
+  function->ForEachInst(
+      [&modified, this, live_components](ir::Instruction* current_inst) {
+        if (!context()->IsCombinatorInstruction(current_inst)) {
+          return;
+        }
+
+        auto live_component = live_components.find(current_inst->result_id());
+        if (live_component == live_components.end()) {
+          // If this instruction is not in live_components then it does not
+          // produce a vector, or it is never referenced and ADCE will remove
+          // it.  No point in trying to differentiate.
+          return;
+        }
+
+        // If no element in the current instruction is used replace it with an
+        // OpUndef.
+        if (live_component->second.Empty()) {
+          modified = true;
+          uint32_t undef_id = this->Type2Undef(current_inst->type_id());
+          context()->KillNamesAndDecorates(current_inst);
+          context()->ReplaceAllUsesWith(current_inst->result_id(), undef_id);
+          context()->KillInst(current_inst);
+          return;
+        }
+
+        switch (current_inst->opcode()) {
+          case SpvOpCompositeInsert: {
+            // If the value being inserted is not live, then we can skip the
+            // insert.
+            uint32_t insert_index = current_inst->GetSingleWordInOperand(2);
+            if (!live_component->second.Get(insert_index)) {
+              modified = true;
+              context()->KillNamesAndDecorates(current_inst->result_id());
+              uint32_t composite_id =
+                  current_inst->GetSingleWordInOperand(kInsertCompositeIdInIdx);
+              context()->ReplaceAllUsesWith(current_inst->result_id(),
+                                            composite_id);
+            }
+          } break;
+          case SpvOpCompositeConstruct:
+            // TODO: The members that are not live can be replaced by an undef
+            // or constant. This will remove uses of those values, and possibly
+            // create opportunities for ADCE.
+            break;
+          default:
+            // Do nothing.
+            break;
+        }
+      });
+  return modified;
+}
+
+void VectorDCE::AddItemToWorkListIfNeeded(
+    WorkListItem work_item, VectorDCE::LiveComponentMap* live_components,
+    std::vector<WorkListItem>* work_list) {
+  ir::Instruction* current_inst = work_item.instruction;
+  auto it = live_components->find(current_inst->result_id());
+  if (it == live_components->end()) {
+    live_components->emplace(
+        std::make_pair(current_inst->result_id(), work_item.components));
+    work_list->emplace_back(work_item);
+  } else {
+    if (it->second.Or(work_item.components)) {
+      work_list->emplace_back(work_item);
+    }
+  }
+}
+
+}  // namespace opt
+}  // namespace spvtools
--- a/source/opt/vector_dce.h
+++ b/source/opt/vector_dce.h
@ -0,0 +1,123 @@
+// Copyright (c) 2018 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef LIBSPIRV_OPT_VECTOR_DCE_H_
+#define LIBSPIRV_OPT_VECTOR_DCE_H_
+
+#include <util/bit_vector.h>
+#include "mem_pass.h"
+
+namespace spvtools {
+namespace opt {
+
+class VectorDCE : public MemPass {
+ private:
+  using LiveComponentMap = std::unordered_map<uint32_t, utils::BitVector>;
+
+  // According to the SPEC the maximum size for a vector is 16.  See the data
+  // rules in the universal validation rules (section 2.16.1).
+  enum { kMaxVectorSize = 16 };
+
+  struct WorkListItem {
+    WorkListItem() : instruction(nullptr), components(kMaxVectorSize) {}
+
+    ir::Instruction* instruction;
+    utils::BitVector components;
+  };
+
+ public:
+  const char* name() const override { return "vector-dce"; }
+  Status Process(ir::IRContext*) override;
+
+  VectorDCE() : all_components_live_(kMaxVectorSize) {
+    for (uint32_t i = 0; i < kMaxVectorSize; i++) {
+      all_components_live_.Set(i);
+    }
+  }
+
+  ir::IRContext::Analysis GetPreservedAnalyses() override {
+    return ir::IRContext::kAnalysisDefUse | ir::IRContext::kAnalysisCFG |
+           ir::IRContext::kAnalysisInstrToBlockMapping |
+           ir::IRContext::kAnalysisLoopAnalysis |
+           ir::IRContext::kAnalysisDecorations |
+           ir::IRContext::kAnalysisDominatorAnalysis |
+           ir::IRContext::kAnalysisNameMap;
+  }
+
+ private:
+  // Runs the vector dce pass on |function|.  Returns true if |function| was
+  // modified.
+  bool VectorDCEFunction(ir::Function* function);
+
+  // Identifies the live components of the vectors that are results of
+  // instructions in |function|.  The results are stored in |live_components|.
+  void FindLiveComponents(ir::Function* function,
+                          LiveComponentMap* live_components);
+
+  // Rewrites instructions in |function| that are dead or partially dead.  If an
+  // instruction does not have an entry in |live_components|, then it is not
+  // changed.  Returns true if |function| was modified.
+  bool RewriteInstructions(ir::Function* function,
+                           const LiveComponentMap& live_components);
+
+  // Returns true if the result of |inst| is a vector.
+  bool HasVectorResult(const ir::Instruction* inst) const;
+
+  // Adds |work_item| to |work_list| if it is not already live according to
+  // |live_components|.  |live_components| is updated to indicate that
+  // |work_item| is now live.
+  void AddItemToWorkListIfNeeded(WorkListItem work_item,
+                                 LiveComponentMap* live_components,
+                                 std::vector<WorkListItem>* work_list);
+
+  // Marks the components |live_elements| of the uses in |current_inst| as live
+  // according to |live_components|. If they were not live before, then they are
+  // added to |work_list|.
+  void MarkUsesAsLive(ir::Instruction* current_inst,
+                      const utils::BitVector& live_elements,
+                      LiveComponentMap* live_components,
+                      std::vector<WorkListItem>* work_list);
+
+  // Marks the uses in the OpVectorShuffle instruction in |current_item| as live
+  // based on the live components in |current_item|. If anything becomes live
+  // they are added to |work_list| and |live_components| is updated
+  // accordingly.
+  void MarkVectorShuffleUsesAsLive(const WorkListItem& current_item,
+                                   VectorDCE::LiveComponentMap* live_components,
+                                   std::vector<WorkListItem>* work_list);
+
+  // Marks the uses in the OpCompositeInsert instruction in |current_item| as
+  // live based on the live components in |current_item|. If anything becomes
+  // live they are added to |work_list| and |live_components| is updated
+  // accordingly.
+  void MarkInsertUsesAsLive(const WorkListItem& current_item,
+                            LiveComponentMap* live_components,
+                            std::vector<WorkListItem>* work_list);
+
+  // Marks the uses in the OpCompositeExtract instruction |current_inst| as
+  // live. If anything becomes live they are added to |work_list| and
+  // |live_components| is updated accordingly.
+  void MarkExtractUseAsLive(const ir::Instruction* current_inst,
+                            LiveComponentMap* live_components,
+                            std::vector<WorkListItem>* work_list);
+
+  // A BitVector that can always be used to say that all components of a vector
+  // are live.
+  utils::BitVector all_components_live_;
+};
+
+}  // namespace opt
+}  // namespace spvtools
+
+#endif  // LIBSPIRV_OPT_VECTOR_DCE_H_
--- a/source/util/bit_vector.cpp
+++ b/source/util/bit_vector.cpp
@ -14,6 +14,7 @@

 #include "bit_vector.h"

+#include <cassert>
 #include <iostream>

 namespace spvtools {
@ -36,5 +37,46 @@ void BitVector::ReportDensity(std::ostream& out) {
      << ", bytes per element="
      << (double)(bits_.size() * sizeof(BitContainer)) / (double)(count);
 }
+
+bool BitVector::Or(const BitVector& other) {
+  auto this_it = this->bits_.begin();
+  auto other_it = other.bits_.begin();
+  bool modified = false;
+
+  while (this_it != this->bits_.end() && other_it != other.bits_.end()) {
+    auto temp = *this_it | *other_it;
+    if (temp != *this_it) {
+      modified = true;
+      *this_it = temp;
+    }
+    ++this_it;
+    ++other_it;
+  }
+
+  if (other_it != other.bits_.end()) {
+    modified = true;
+    this->bits_.insert(this->bits_.end(), other_it, other.bits_.end());
+  }
+
+  return modified;
+}
+
+std::ostream& operator<<(std::ostream& out, const BitVector& bv) {
+  out << "{";
+  for (uint32_t i = 0; i < bv.bits_.size(); ++i) {
+    BitVector::BitContainer b = bv.bits_[i];
+    uint32_t j = 0;
+    while (b != 0) {
+      if (b & 1) {
+        out << ' ' << i * BitVector::kBitContainerSize + j;
+      }
+      ++j;
+      b = b >> 1;
+    }
+  }
+  out << "}";
+  return out;
+}
+
 }  // namespace utils
 }  // namespace spvtools
--- a/source/util/bit_vector.h
+++ b/source/util/bit_vector.h
@ -33,7 +33,8 @@ class BitVector {

 public:
  // Creates a bit vector contianing 0s.
-  BitVector() : bits_(kInitialNumBits / kBitContainerSize, 0) {}
+  BitVector(uint32_t reserved_size = kInitialNumBits)
+      : bits_((reserved_size - 1) / kBitContainerSize + 1, 0) {}

  // Sets the |i|th bit to 1.  Returns the |i|th bit before it was set.
  bool Set(uint32_t i) {
@ -88,10 +89,26 @@ class BitVector {
            (static_cast<BitContainer>(1) << bit_in_element)) != 0;
  }

+  // Returns true if every bit is 0.
+  bool Empty() const {
+    for (BitContainer b : bits_) {
+      if (b != 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
  // Print a report on the densicy of the bit vector, number of 1 bits, number
  // of bytes, and average bytes for 1 bit, to |out|.
  void ReportDensity(std::ostream& out);

+  friend std::ostream& operator<<(std::ostream&, const BitVector&);
+
+  // Performs a bitwise-or operation on |this| and |that|, storing the result in
+  // |this|.  Return true if |this| changed.
+  bool Or(const BitVector& that);
+
 private:
  std::vector<BitContainer> bits_;
 };
--- a/source/util/ilist_node.h
+++ b/source/util/ilist_node.h
@ -67,7 +67,7 @@ class IntrusiveNodeBase {
  // from that list.
  //
  // It is assumed that the given node is of type NodeType.  It is an error if
-  // |pos| is not already in a list.
+  // |pos| is not already in a list, or if |pos| is equal to |this|.
  inline void InsertAfter(NodeType* pos);

  // Removes the given node from the list.  It is assumed that the node is
@ -185,6 +185,8 @@ template <class NodeType>
 inline void IntrusiveNodeBase<NodeType>::InsertAfter(NodeType* pos) {
  assert(!this->is_sentinel_ && "Sentinel nodes cannot be moved around.");
  assert(pos->IsInAList() && "Pos should already be in a list.");
+  assert(this != pos && "Can't insert a node after itself.");
+
  if (this->IsInAList()) {
    this->RemoveFromList();
  }
--- a/test/opt/CMakeLists.txt
+++ b/test/opt/CMakeLists.txt
@ -317,3 +317,8 @@ add_spvtools_unittest(TARGET scalar_analysis
  LIBS SPIRV-Tools-opt
 )

+add_spvtools_unittest(TARGET vector_dce
+  SRCS vector_dce_test.cpp pass_utils.cpp
+  LIBS SPIRV-Tools-opt
+)
+
--- a/test/opt/vector_dce_test.cpp
+++ b/test/opt/vector_dce_test.cpp
@ -0,0 +1,816 @@
+// Copyright (c) 2018 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "pass_fixture.h"
+#include "pass_utils.h"
+
+namespace {
+
+using namespace spvtools;
+
+using VectorDCETest = PassTest<::testing::Test>;
+
+TEST_F(VectorDCETest, InsertAfterInsertElim) {
+  // With two insertions to the same offset, the first is dead.
+  //
+  // Note: The SPIR-V assembly has had store/load elimination
+  // performed to allow the inserts and extracts to directly
+  // reference each other.
+  //
+  // #version 450
+  //
+  // layout (location=0) in float In0;
+  // layout (location=1) in float In1;
+  // layout (location=2) in vec2 In2;
+  // layout (location=0) out vec4 OutColor;
+  //
+  // void main()
+  // {
+  //     vec2 v = In2;
+  //     v.x = In0 + In1; // dead
+  //     v.x = 0.0;
+  //     OutColor = v.xyxy;
+  // }
+
+  const std::string before_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In2 %In0 %In1 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %In2 "In2"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %OutColor "OutColor"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_b"
+OpMemberName %_Globals_ 1 "g_n"
+OpName %_ ""
+OpDecorate %In2 Location 2
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %OutColor Location 0
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpMemberDecorate %_Globals_ 1 Offset 4
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+%void = OpTypeVoid
+%11 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%In2 = OpVariable %_ptr_Input_v2float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In0 = OpVariable %_ptr_Input_float Input
+%In1 = OpVariable %_ptr_Input_float Input
+%uint = OpTypeInt 32 0
+%_ptr_Function_float = OpTypePointer Function %float
+%float_0 = OpConstant %float 0
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%int = OpTypeInt 32 1
+%_Globals_ = OpTypeStruct %uint %int
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+)";
+
+  const std::string after_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In2 %In0 %In1 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %In2 "In2"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %OutColor "OutColor"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_b"
+OpMemberName %_Globals_ 1 "g_n"
+OpName %_ ""
+OpDecorate %In2 Location 2
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %OutColor Location 0
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpMemberDecorate %_Globals_ 1 Offset 4
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%_ptr_Input_v2float = OpTypePointer Input %v2float
+%In2 = OpVariable %_ptr_Input_v2float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In0 = OpVariable %_ptr_Input_float Input
+%In1 = OpVariable %_ptr_Input_float Input
+%uint = OpTypeInt 32 0
+%_ptr_Function_float = OpTypePointer Function %float
+%float_0 = OpConstant %float 0
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%int = OpTypeInt 32 1
+%_Globals_ = OpTypeStruct %uint %int
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %11
+%25 = OpLabel
+%26 = OpLoad %v2float %In2
+%27 = OpLoad %float %In0
+%28 = OpLoad %float %In1
+%29 = OpFAdd %float %27 %28
+%35 = OpCompositeInsert %v2float %29 %26 0
+%37 = OpCompositeInsert %v2float %float_0 %35 0
+%33 = OpVectorShuffle %v4float %37 %37 0 1 0 1
+OpStore %OutColor %33
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%main = OpFunction %void None %10
+%23 = OpLabel
+%24 = OpLoad %v2float %In2
+%25 = OpLoad %float %In0
+%26 = OpLoad %float %In1
+%27 = OpFAdd %float %25 %26
+%28 = OpCompositeInsert %v2float %27 %24 0
+%29 = OpCompositeInsert %v2float %float_0 %24 0
+%30 = OpVectorShuffle %v4float %29 %29 0 1 0 1
+OpStore %OutColor %30
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<opt::VectorDCE>(before_predefs + before,
+                                        after_predefs + after, true, true);
+}
+
+TEST_F(VectorDCETest, DeadInsertInChainWithPhi) {
+  // Dead insert eliminated with phi in insertion chain.
+  //
+  // Note: The SPIR-V assembly has had store/load elimination
+  // performed to allow the inserts and extracts to directly
+  // reference each other.
+  //
+  // #version 450
+  //
+  // layout (location=0) in vec4 In0;
+  // layout (location=1) in float In1;
+  // layout (location=2) in float In2;
+  // layout (location=0) out vec4 OutColor;
+  //
+  // layout(std140, binding = 0 ) uniform _Globals_
+  // {
+  //     bool g_b;
+  // };
+  //
+  // void main()
+  // {
+  //     vec4 v = In0;
+  //     v.z = In1 + In2;
+  //     if (g_b) v.w = 1.0;
+  //     OutColor = vec4(v.x,v.y,0.0,v.w);
+  // }
+
+  const std::string before_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In0 %In1 %In2 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %In2 "In2"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_b"
+OpName %_ ""
+OpName %OutColor "OutColor"
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %In2 Location 2
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+OpDecorate %OutColor Location 0
+%void = OpTypeVoid
+%11 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%In0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In1 = OpVariable %_ptr_Input_float Input
+%In2 = OpVariable %_ptr_Input_float Input
+%uint = OpTypeInt 32 0
+%_ptr_Function_float = OpTypePointer Function %float
+%_Globals_ = OpTypeStruct %uint
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%bool = OpTypeBool
+%uint_0 = OpConstant %uint 0
+%float_1 = OpConstant %float 1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%float_0 = OpConstant %float 0
+)";
+
+  const std::string after_predefs =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In0 %In1 %In2 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %In2 "In2"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_b"
+OpName %_ ""
+OpName %OutColor "OutColor"
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %In2 Location 2
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+OpDecorate %OutColor Location 0
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%_ptr_Function_v4float = OpTypePointer Function %v4float
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%In0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In1 = OpVariable %_ptr_Input_float Input
+%In2 = OpVariable %_ptr_Input_float Input
+%uint = OpTypeInt 32 0
+%_ptr_Function_float = OpTypePointer Function %float
+%_Globals_ = OpTypeStruct %uint
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%_ptr_Uniform_uint = OpTypePointer Uniform %uint
+%bool = OpTypeBool
+%uint_0 = OpConstant %uint 0
+%float_1 = OpConstant %float 1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%float_0 = OpConstant %float 0
+)";
+
+  const std::string before =
+      R"(%main = OpFunction %void None %11
+%31 = OpLabel
+%32 = OpLoad %v4float %In0
+%33 = OpLoad %float %In1
+%34 = OpLoad %float %In2
+%35 = OpFAdd %float %33 %34
+%51 = OpCompositeInsert %v4float %35 %32 2
+%37 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+%38 = OpLoad %uint %37
+%39 = OpINotEqual %bool %38 %uint_0
+OpSelectionMerge %40 None
+OpBranchConditional %39 %41 %40
+%41 = OpLabel
+%53 = OpCompositeInsert %v4float %float_1 %51 3
+OpBranch %40
+%40 = OpLabel
+%60 = OpPhi %v4float %51 %31 %53 %41
+%55 = OpCompositeExtract %float %60 0
+%57 = OpCompositeExtract %float %60 1
+%59 = OpCompositeExtract %float %60 3
+%49 = OpCompositeConstruct %v4float %55 %57 %float_0 %59
+OpStore %OutColor %49
+OpReturn
+OpFunctionEnd
+)";
+
+  const std::string after =
+      R"(%main = OpFunction %void None %10
+%27 = OpLabel
+%28 = OpLoad %v4float %In0
+%29 = OpLoad %float %In1
+%30 = OpLoad %float %In2
+%31 = OpFAdd %float %29 %30
+%32 = OpCompositeInsert %v4float %31 %28 2
+%33 = OpAccessChain %_ptr_Uniform_uint %_ %int_0
+%34 = OpLoad %uint %33
+%35 = OpINotEqual %bool %34 %uint_0
+OpSelectionMerge %36 None
+OpBranchConditional %35 %37 %36
+%37 = OpLabel
+%38 = OpCompositeInsert %v4float %float_1 %28 3
+OpBranch %36
+%36 = OpLabel
+%39 = OpPhi %v4float %28 %27 %38 %37
+%40 = OpCompositeExtract %float %39 0
+%41 = OpCompositeExtract %float %39 1
+%42 = OpCompositeExtract %float %39 3
+%43 = OpCompositeConstruct %v4float %40 %41 %float_0 %42
+OpStore %OutColor %43
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<opt::VectorDCE>(before_predefs + before,
+                                        after_predefs + after, true, true);
+}
+
+TEST_F(VectorDCETest, DeadInsertInCycleToDo) {
+  // Dead insert in chain with cycle. Demonstrates analysis can handle
+  // cycles in chains going through scalars intermediate values.
+  //
+  // TODO: Improve algorithm to remove dead insert into v.y.  Need to treat
+  //       scalars at if they are vectors with a single element.
+  //
+  // Note: The SPIR-V assembly has had store/load elimination
+  // performed to allow the inserts and extracts to directly
+  // reference each other.
+  //
+  // #version 450
+  //
+  // layout (location=0) in vec4 In0;
+  // layout (location=1) in float In1;
+  // layout (location=2) in float In2;
+  // layout (location=0) out vec4 OutColor;
+  //
+  // layout(std140, binding = 0 ) uniform _Globals_
+  // {
+  //     int g_n  ;
+  // };
+  //
+  // void main()
+  // {
+  //     vec2 v = vec2(0.0, 1.0);
+  //     for (int i = 0; i < g_n; i++) {
+  //       v.x = v.x + 1;
+  //       v.y = v.y * 0.9; // dead
+  //     }
+  //     OutColor = vec4(v.x);
+  // }
+
+  const std::string assembly =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %OutColor %In0 %In1 %In2
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpName %main "main"
+OpName %_Globals_ "_Globals_"
+OpMemberName %_Globals_ 0 "g_n"
+OpName %_ ""
+OpName %OutColor "OutColor"
+OpName %In0 "In0"
+OpName %In1 "In1"
+OpName %In2 "In2"
+OpMemberDecorate %_Globals_ 0 Offset 0
+OpDecorate %_Globals_ Block
+OpDecorate %_ DescriptorSet 0
+OpDecorate %_ Binding 0
+OpDecorate %OutColor Location 0
+OpDecorate %In0 Location 0
+OpDecorate %In1 Location 1
+OpDecorate %In2 Location 2
+%void = OpTypeVoid
+%10 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v2float = OpTypeVector %float 2
+%_ptr_Function_v2float = OpTypePointer Function %v2float
+%float_0 = OpConstant %float 0
+%float_1 = OpConstant %float 1
+%16 = OpConstantComposite %v2float %float_0 %float_1
+%int = OpTypeInt 32 1
+%_ptr_Function_int = OpTypePointer Function %int
+%int_0 = OpConstant %int 0
+%_Globals_ = OpTypeStruct %int
+%_ptr_Uniform__Globals_ = OpTypePointer Uniform %_Globals_
+%_ = OpVariable %_ptr_Uniform__Globals_ Uniform
+%_ptr_Uniform_int = OpTypePointer Uniform %int
+%bool = OpTypeBool
+%float_0_75 = OpConstant %float 0.75
+%int_1 = OpConstant %int 1
+%v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%In0 = OpVariable %_ptr_Input_v4float Input
+%_ptr_Input_float = OpTypePointer Input %float
+%In1 = OpVariable %_ptr_Input_float Input
+%In2 = OpVariable %_ptr_Input_float Input
+%main = OpFunction %void None %10
+%29 = OpLabel
+OpBranch %30
+%30 = OpLabel
+%31 = OpPhi %v2float %16 %29 %32 %33
+%34 = OpPhi %int %int_0 %29 %35 %33
+OpLoopMerge %36 %33 None
+OpBranch %37
+%37 = OpLabel
+%38 = OpAccessChain %_ptr_Uniform_int %_ %int_0
+%39 = OpLoad %int %38
+%40 = OpSLessThan %bool %34 %39
+OpBranchConditional %40 %41 %36
+%41 = OpLabel
+%42 = OpCompositeExtract %float %31 0
+%43 = OpFAdd %float %42 %float_1
+%44 = OpCompositeInsert %v2float %43 %31 0
+%45 = OpCompositeExtract %float %44 1
+%46 = OpFMul %float %45 %float_0_75
+%32 = OpCompositeInsert %v2float %46 %44 1
+OpBranch %33
+%33 = OpLabel
+%35 = OpIAdd %int %34 %int_1
+OpBranch %30
+%36 = OpLabel
+%47 = OpCompositeExtract %float %31 0
+%48 = OpCompositeConstruct %v4float %47 %47 %47 %47
+OpStore %OutColor %48
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<opt::VectorDCE>(assembly, assembly, true, true);
+}
+
+TEST_F(VectorDCETest, DeadLoadFeedingCompositeConstruct) {
+  // Detach the loads feeding the CompositeConstruct for the unused elements.
+  // TODO: Implement the rewrite for CompositeConstruct.
+
+  const std::string assembly =
+      R"(OpCapability Shader
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %main "main" %In0 %OutColor
+OpExecutionMode %main OriginUpperLeft
+OpSource GLSL 450
+OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+OpSourceExtension "GL_GOOGLE_include_directive"
+OpName %main "main"
+OpName %In0 "In0"
+OpName %OutColor "OutColor"
+OpDecorate %In0 Location 0
+OpDecorate %OutColor Location 0
+%void = OpTypeVoid
+%6 = OpTypeFunction %void
+%float = OpTypeFloat 32
+%v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+%In0 = OpVariable %_ptr_Input_v4float Input
+%uint = OpTypeInt 32 0
+%uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+%uint_1 = OpConstant %uint 1
+%uint_2 = OpConstant %uint 2
+%v3float = OpTypeVector %float 3
+%int = OpTypeInt 32 1
+%int_0 = OpConstant %int 0
+%int_20 = OpConstant %int 20
+%bool = OpTypeBool
+%float_1 = OpConstant %float 1
+%int_1 = OpConstant %int 1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+%OutColor = OpVariable %_ptr_Output_v4float Output
+%23 = OpUndef %v3float
+%main = OpFunction %void None %6
+%24 = OpLabel
+%25 = OpAccessChain %_ptr_Input_float %In0 %uint_0
+%26 = OpLoad %float %25
+%27 = OpAccessChain %_ptr_Input_float %In0 %uint_1
+%28 = OpLoad %float %27
+%29 = OpAccessChain %_ptr_Input_float %In0 %uint_2
+%30 = OpLoad %float %29
+%31 = OpCompositeConstruct %v3float %30 %28 %26
+OpBranch %32
+%32 = OpLabel
+%33 = OpPhi %v3float %31 %24 %34 %35
+%36 = OpPhi %int %int_0 %24 %37 %35
+OpLoopMerge %38 %35 None
+OpBranch %39
+%39 = OpLabel
+%40 = OpSLessThan %bool %36 %int_20
+OpBranchConditional %40 %41 %38
+%41 = OpLabel
+%42 = OpCompositeExtract %float %33 0
+%43 = OpFAdd %float %42 %float_1
+%34 = OpCompositeInsert %v3float %43 %33 0
+OpBranch %35
+%35 = OpLabel
+%37 = OpIAdd %int %36 %int_1
+OpBranch %32
+%38 = OpLabel
+%44 = OpCompositeExtract %float %33 0
+%45 = OpCompositeConstruct %v4float %44 %44 %44 %44
+OpStore %OutColor %45
+OpReturn
+OpFunctionEnd
+)";
+
+  SinglePassRunAndCheck<opt::VectorDCE>(assembly, assembly, true, true);
+}
+
+#ifdef SPIRV_EFFCEE
+TEST_F(VectorDCETest, DeadLoadFeedingVectorShuffle) {
+  // Detach the loads feeding the CompositeConstruct for the unused elements.
+  // TODO: Implement the rewrite for CompositeConstruct.
+
+  const std::string assembly =
+      R"(
+; MemPass Type2Undef does not reuse and already existing undef.
+; CHECK: {{%\w+}} = OpUndef %v3float
+; CHECK: [[undef:%\w+]] = OpUndef %v3float
+; CHECK: OpFunction
+; CHECK: OpVectorShuffle %v3float {{%\w+}} [[undef]] 0 4 5
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %In0 %OutColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %In0 "In0"
+               OpName %OutColor "OutColor"
+               OpDecorate %In0 Location 0
+               OpDecorate %OutColor Location 0
+       %void = OpTypeVoid
+          %6 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v4float = OpTypeVector %float 4
+%_ptr_Input_v4float = OpTypePointer Input %v4float
+        %In0 = OpVariable %_ptr_Input_v4float Input
+       %uint = OpTypeInt 32 0
+     %uint_0 = OpConstant %uint 0
+%_ptr_Input_float = OpTypePointer Input %float
+     %uint_1 = OpConstant %uint 1
+     %uint_2 = OpConstant %uint 2
+    %v3float = OpTypeVector %float 3
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_20 = OpConstant %int 20
+       %bool = OpTypeBool
+    %float_1 = OpConstant %float 1
+    %vec_const = OpConstantComposite %v3float %float_1 %float_1 %float_1
+      %int_1 = OpConstant %int 1
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+   %OutColor = OpVariable %_ptr_Output_v4float Output
+         %23 = OpUndef %v3float
+       %main = OpFunction %void None %6
+         %24 = OpLabel
+         %25 = OpAccessChain %_ptr_Input_float %In0 %uint_0
+         %26 = OpLoad %float %25
+         %27 = OpAccessChain %_ptr_Input_float %In0 %uint_1
+         %28 = OpLoad %float %27
+         %29 = OpAccessChain %_ptr_Input_float %In0 %uint_2
+         %30 = OpLoad %float %29
+         %31 = OpCompositeConstruct %v3float %30 %28 %26
+         %sh = OpVectorShuffle %v3float %vec_const %31 0 4 5
+               OpBranch %32
+         %32 = OpLabel
+         %33 = OpPhi %v3float %sh %24 %34 %35
+         %36 = OpPhi %int %int_0 %24 %37 %35
+               OpLoopMerge %38 %35 None
+               OpBranch %39
+         %39 = OpLabel
+         %40 = OpSLessThan %bool %36 %int_20
+               OpBranchConditional %40 %41 %38
+         %41 = OpLabel
+         %42 = OpCompositeExtract %float %33 0
+         %43 = OpFAdd %float %42 %float_1
+         %34 = OpCompositeInsert %v3float %43 %33 0
+               OpBranch %35
+         %35 = OpLabel
+         %37 = OpIAdd %int %36 %int_1
+               OpBranch %32
+         %38 = OpLabel
+         %44 = OpCompositeExtract %float %33 0
+         %45 = OpCompositeConstruct %v4float %44 %44 %44 %44
+               OpStore %OutColor %45
+               OpReturn
+               OpFunctionEnd
+)";
+
+  SinglePassRunAndMatch<opt::VectorDCE>(assembly, true);
+}
+
+TEST_F(VectorDCETest, DeadInstThroughShuffle) {
+  // Dead insert in chain with cycle. Demonstrates analysis can handle
+  // cycles in chains.
+  //
+  // Note: The SPIR-V assembly has had store/load elimination
+  // performed to allow the inserts and extracts to directly
+  // reference each other.
+  //
+  // #version 450
+  //
+  // layout (location=0) out vec4 OutColor;
+  //
+  // void main()
+  // {
+  //     vec2 v;
+  //     v.x = 0.0;
+  //     v.y = 0.1; // dead
+  //     for (int i = 0; i < 20; i++) {
+  //       v.x = v.x + 1;
+  //       v = v * 0.9;
+  //     }
+  //     OutColor = vec4(v.x);
+  // }
+
+  const std::string assembly =
+      R"(
+; CHECK: OpFunction
+; CHECK-NOT: OpCompositeInsert %v2float {{%\w+}} 1
+; CHECK: OpFunctionEnd
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %OutColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %OutColor "OutColor"
+               OpDecorate %OutColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %float_0 = OpConstant %float 0
+%float_0_100000001 = OpConstant %float 0.100000001
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_20 = OpConstant %int 20
+       %bool = OpTypeBool
+    %float_1 = OpConstant %float 1
+%float_0_899999976 = OpConstant %float 0.899999976
+      %int_1 = OpConstant %int 1
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+   %OutColor = OpVariable %_ptr_Output_v4float Output
+         %58 = OpUndef %v2float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %49 = OpCompositeInsert %v2float %float_0 %58 0
+         %51 = OpCompositeInsert %v2float %float_0_100000001 %49 1
+               OpBranch %22
+         %22 = OpLabel
+         %60 = OpPhi %v2float %51 %5 %38 %25
+         %59 = OpPhi %int %int_0 %5 %41 %25
+               OpLoopMerge %24 %25 None
+               OpBranch %26
+         %26 = OpLabel
+         %30 = OpSLessThan %bool %59 %int_20
+               OpBranchConditional %30 %23 %24
+         %23 = OpLabel
+         %53 = OpCompositeExtract %float %60 0
+         %34 = OpFAdd %float %53 %float_1
+         %55 = OpCompositeInsert %v2float %34 %60 0
+         %38 = OpVectorTimesScalar %v2float %55 %float_0_899999976
+               OpBranch %25
+         %25 = OpLabel
+         %41 = OpIAdd %int %59 %int_1
+               OpBranch %22
+         %24 = OpLabel
+         %57 = OpCompositeExtract %float %60 0
+         %47 = OpCompositeConstruct %v4float %57 %57 %57 %57
+               OpStore %OutColor %47
+               OpReturn
+               OpFunctionEnd
+)";
+
+  SinglePassRunAndMatch<opt::VectorDCE>(assembly, true);
+}
+
+TEST_F(VectorDCETest, DeadInsertThroughOtherInst) {
+  // Dead insert in chain with cycle. Demonstrates analysis can handle
+  // cycles in chains.
+  //
+  // Note: The SPIR-V assembly has had store/load elimination
+  // performed to allow the inserts and extracts to directly
+  // reference each other.
+  //
+  // #version 450
+  //
+  // layout (location=0) out vec4 OutColor;
+  //
+  // void main()
+  // {
+  //     vec2 v;
+  //     v.x = 0.0;
+  //     v.y = 0.1; // dead
+  //     for (int i = 0; i < 20; i++) {
+  //       v.x = v.x + 1;
+  //       v = v * 0.9;
+  //     }
+  //     OutColor = vec4(v.x);
+  // }
+
+  const std::string assembly =
+      R"(
+; CHECK: OpFunction
+; CHECK-NOT: OpCompositeInsert %v2float {{%\w+}} 1
+; CHECK: OpFunctionEnd
+               OpCapability Shader
+          %1 = OpExtInstImport "GLSL.std.450"
+               OpMemoryModel Logical GLSL450
+               OpEntryPoint Fragment %main "main" %OutColor
+               OpExecutionMode %main OriginUpperLeft
+               OpSource GLSL 450
+               OpSourceExtension "GL_GOOGLE_cpp_style_line_directive"
+               OpSourceExtension "GL_GOOGLE_include_directive"
+               OpName %main "main"
+               OpName %OutColor "OutColor"
+               OpDecorate %OutColor Location 0
+       %void = OpTypeVoid
+          %3 = OpTypeFunction %void
+      %float = OpTypeFloat 32
+    %v2float = OpTypeVector %float 2
+    %float_0 = OpConstant %float 0
+%float_0_100000001 = OpConstant %float 0.100000001
+        %int = OpTypeInt 32 1
+      %int_0 = OpConstant %int 0
+     %int_20 = OpConstant %int 20
+       %bool = OpTypeBool
+    %float_1 = OpConstant %float 1
+%float_0_899999976 = OpConstant %float 0.899999976
+      %int_1 = OpConstant %int 1
+    %v4float = OpTypeVector %float 4
+%_ptr_Output_v4float = OpTypePointer Output %v4float
+   %OutColor = OpVariable %_ptr_Output_v4float Output
+         %58 = OpUndef %v2float
+       %main = OpFunction %void None %3
+          %5 = OpLabel
+         %49 = OpCompositeInsert %v2float %float_0 %58 0
+         %51 = OpCompositeInsert %v2float %float_0_100000001 %49 1
+               OpBranch %22
+         %22 = OpLabel
+         %60 = OpPhi %v2float %51 %5 %38 %25
+         %59 = OpPhi %int %int_0 %5 %41 %25
+               OpLoopMerge %24 %25 None
+               OpBranch %26
+         %26 = OpLabel
+         %30 = OpSLessThan %bool %59 %int_20
+               OpBranchConditional %30 %23 %24
+         %23 = OpLabel
+         %53 = OpCompositeExtract %float %60 0
+         %34 = OpFAdd %float %53 %float_1
+         %55 = OpCompositeInsert %v2float %34 %60 0
+         %38 = OpVectorTimesScalar %v2float %55 %float_0_899999976
+               OpBranch %25
+         %25 = OpLabel
+         %41 = OpIAdd %int %59 %int_1
+               OpBranch %22
+         %24 = OpLabel
+         %57 = OpCompositeExtract %float %60 0
+         %47 = OpCompositeConstruct %v4float %57 %57 %57 %57
+               OpStore %OutColor %47
+               OpReturn
+               OpFunctionEnd
+)";
+
+  SinglePassRunAndMatch<opt::VectorDCE>(assembly, true);
+}
+#endif
+
+}  // anonymous namespace
--- a/test/util/CMakeLists.txt
+++ b/test/util/CMakeLists.txt
@ -18,4 +18,5 @@ add_spvtools_unittest(TARGET util_intrusive_list

 add_spvtools_unittest(TARGET bit_vector
  SRCS bit_vector_test.cpp
+  LIBS SPIRV-Tools-opt
 )
--- a/test/util/bit_vector_test.cpp
+++ b/test/util/bit_vector_test.cpp
@ -109,4 +109,53 @@ TEST(BitVectorTest, ClearReturnValue) {
  }
 }

+TEST(BitVectorTest, SimpleOrTest) {
+  BitVector bvec1;
+  bvec1.Set(3);
+  bvec1.Set(4);
+
+  BitVector bvec2;
+  bvec2.Set(2);
+  bvec2.Set(4);
+
+  // Check that |bvec1| changed when doing the |Or| operation.
+  EXPECT_TRUE(bvec1.Or(bvec2));
+
+  // Check that the values are all correct.
+  EXPECT_FALSE(bvec1.Get(0));
+  EXPECT_FALSE(bvec1.Get(1));
+  EXPECT_TRUE(bvec1.Get(2));
+  EXPECT_TRUE(bvec1.Get(3));
+  EXPECT_TRUE(bvec1.Get(4));
+}
+
+TEST(BitVectorTest, ResizingOrTest) {
+  BitVector bvec1;
+  bvec1.Set(3);
+  bvec1.Set(4);
+
+  BitVector bvec2;
+  bvec2.Set(10000);
+
+  // Similar to above except with a large value to test resizing.
+  EXPECT_TRUE(bvec1.Or(bvec2));
+  EXPECT_FALSE(bvec1.Get(0));
+  EXPECT_FALSE(bvec1.Get(1));
+  EXPECT_FALSE(bvec1.Get(2));
+  EXPECT_TRUE(bvec1.Get(3));
+  EXPECT_TRUE(bvec1.Get(10000));
+}
+
+TEST(BitVectorTest, SubsetOrTest) {
+  BitVector bvec1;
+  bvec1.Set(3);
+  bvec1.Set(4);
+
+  BitVector bvec2;
+  bvec2.Set(3);
+
+  // |Or| returns false if |bvec1| does not change.
+  EXPECT_FALSE(bvec1.Or(bvec2));
+}
+
 }  // namespace
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@ -310,6 +310,10 @@ Options (in lexicographical order):
               prints CPU/WALL/USR/SYS time (and RSS if possible), but note that
               USR/SYS time are returned by getrusage() and can have a small
               error.
+  --vector-dce
+               This pass looks for components of vectors that are unused, and
+               removes them from the vector.  Note this would still leave around
+               lots of dead code that a pass of ADCE will be able to remove.
  --workaround-1209
               Rewrites instructions for which there are known driver bugs to
               avoid triggering those bugs.
@ -555,6 +559,8 @@ OptStatus ParseFlags(int argc, const char** argv, Optimizer* optimizer,
        optimizer->RegisterPass(CreateCopyPropagateArraysPass());
      } else if (0 == strcmp(cur_arg, "--loop-unroll")) {
        optimizer->RegisterPass(CreateLoopUnrollPass(true));
+      } else if (0 == strcmp(cur_arg, "--vector-dce")) {
+        optimizer->RegisterPass(CreateVectorDCEPass());
      } else if (0 == strcmp(cur_arg, "--loop-unroll-partial")) {
        OptStatus status =
            ParseLoopUnrollPartialArg(argc, argv, ++argi, optimizer);