diff --git a/source/opt/convert_to_half_pass.cpp b/source/opt/convert_to_half_pass.cpp index 2c4a631e1..cb0065d2d 100644 --- a/source/opt/convert_to_half_pass.cpp +++ b/source/opt/convert_to_half_pass.cpp @@ -63,6 +63,10 @@ bool ConvertToHalfPass::IsRelaxed(uint32_t id) { void ConvertToHalfPass::AddRelaxed(uint32_t id) { relaxed_ids_set_.insert(id); } +bool ConvertToHalfPass::CanRelaxOpOperands(Instruction* inst) { + return image_ops_.count(inst->opcode()) == 0; +} + analysis::Type* ConvertToHalfPass::FloatScalarType(uint32_t width) { analysis::Float float_ty(width); return context()->get_type_mgr()->GetRegisteredType(&float_ty); @@ -313,7 +317,8 @@ bool ConvertToHalfPass::CloseRelaxInst(Instruction* inst) { relax = true; get_def_use_mgr()->ForEachUser(inst, [&relax, this](Instruction* uinst) { if (uinst->result_id() == 0 || !IsFloat(uinst, 32) || - (!IsDecoratedRelaxed(uinst) && !IsRelaxed(uinst->result_id()))) { + (!IsDecoratedRelaxed(uinst) && !IsRelaxed(uinst->result_id())) || + !CanRelaxOpOperands(uinst)) { relax = false; return; } diff --git a/source/opt/convert_to_half_pass.h b/source/opt/convert_to_half_pass.h index 24a478ffc..8e10c4fb9 100644 --- a/source/opt/convert_to_half_pass.h +++ b/source/opt/convert_to_half_pass.h @@ -56,6 +56,9 @@ class ConvertToHalfPass : public Pass { // Add |id| to the relaxed id set void AddRelaxed(uint32_t id); + // Return true if the instruction's operands can be relaxed + bool CanRelaxOpOperands(Instruction* inst); + // Return type id for float with |width| analysis::Type* FloatScalarType(uint32_t width); @@ -133,13 +136,13 @@ class ConvertToHalfPass : public Pass { // Set of 450 extension operations to be processed std::unordered_set target_ops_450_; - // Set of sample operations + // Set of all sample operations, including dref and non-dref operations std::unordered_set image_ops_; - // Set of dref sample operations + // Set of only dref sample operations std::unordered_set dref_image_ops_; - // Set of dref sample operations + // Set of operations that can be marked as relaxed std::unordered_set closure_ops_; // Set of ids of all relaxed instructions diff --git a/test/opt/convert_relaxed_to_half_test.cpp b/test/opt/convert_relaxed_to_half_test.cpp index 27330e109..62b9ae453 100644 --- a/test/opt/convert_relaxed_to_half_test.cpp +++ b/test/opt/convert_relaxed_to_half_test.cpp @@ -1613,6 +1613,106 @@ OpFunctionEnd SinglePassRunAndCheck(test, test, true); } +TEST_F(ConvertToHalfTest, PreserveImageOperandPrecision) { + // Ensure that a non-relaxed texture coordinate does not get relaxed nor + // converted to half precision if the image instruction is marked relaxed. + + // Also ensure that a relaxed local variable does get converted to half + // precision before being passed to an image opeartor. + + // #version 310 es + // + // precision mediump float; + // + // layout(location = 10) in highp vec4 vertex_uv01; + // layout(binding = 0, set = 3) uniform sampler2D materialParams_baseColorMap; + // + // layout(location = 0) out vec4 fragColor; + // + // void main() { + // vec4 uv = vec4(2.0); + // fragColor = texture(materialParams_baseColorMap, uv.xy); + // fragColor = texture(materialParams_baseColorMap, vertex_uv01.xy); + // } + const std::string test = R"( + OpCapability Shader + OpCapability Float16 + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %4 "main" %13 %25 + OpExecutionMode %4 OriginUpperLeft + OpSource ESSL 310 + OpDecorate %9 RelaxedPrecision +;CHECK: OpDecorate [[uv:%\w+]] RelaxedPrecision + OpDecorate %13 Location 0 + OpDecorate %17 DescriptorSet 3 + OpDecorate %17 Binding 0 + OpDecorate %18 RelaxedPrecision + OpDecorate %23 RelaxedPrecision + OpDecorate %25 Location 10 + %2 = OpTypeVoid + %3 = OpTypeFunction %2 + %6 = OpTypeFloat 32 +;CHECK: [[float32_t:%\w+]] = OpTypeFloat 32 + %7 = OpTypeVector %6 4 +;CHECK: [[vec4_t:%\w+]] = OpTypeVector [[float32_t]] 4 + %8 = OpTypePointer Function %7 + %10 = OpConstant %6 2 + %11 = OpConstantComposite %7 %10 %10 %10 %10 + %12 = OpTypePointer Output %7 +;CHECK: [[output_ptr_t:%\w+]] = OpTypePointer Output [[vec4_t]] + %13 = OpVariable %12 Output +;CHECK: [[output:%\w+]] = OpVariable [[output_ptr_t]] Output + %14 = OpTypeImage %6 2D 0 0 0 1 Unknown + %15 = OpTypeSampledImage %14 + %16 = OpTypePointer UniformConstant %15 + %17 = OpVariable %16 UniformConstant + %19 = OpTypeVector %6 2 +;CHECK: [[vec2_t:%\w+]] = OpTypeVector [[float32_t]] 2 + %24 = OpTypePointer Input %7 +;CHECK: [[input_ptr_t:%\w+]] = OpTypePointer Input [[vec4_t]] + %25 = OpVariable %24 Input + %29 = OpTypeFloat 16 +;CHECK: [[float16_t:%\w+]] = OpTypeFloat 16 + %30 = OpTypeVector %29 4 + %33 = OpTypeVector %29 2 +;CHECK: [[vec2_16b_t:%\w+]] = OpTypeVector [[float16_t]] 2 + %4 = OpFunction %2 None %3 + %5 = OpLabel + +; The only Function storage variable is marked as relaxed + %9 = OpVariable %8 Function +;CHECK: [[uv]] = OpVariable {{%\w+}} Function + OpStore %9 %11 + %18 = OpLoad %15 %17 + %20 = OpLoad %7 %9 + %31 = OpFConvert %30 %20 + %32 = OpFConvert %30 %20 + +; The first sample op should get a 16b coordinate + %21 = OpVectorShuffle %33 %31 %32 0 1 +;CHECK: [[uv_16b:%\w+]] = OpVectorShuffle [[vec2_16b_t]] + %22 = OpImageSampleImplicitLod %7 %18 %21 +;CHECK: OpImageSampleImplicitLod [[vec4_t]] {{%\w+}} [[uv_16b]] + + OpStore %13 %22 + %23 = OpLoad %15 %17 + %26 = OpLoad %7 %25 + +; The second sample op should get a 32b coordinate + %27 = OpVectorShuffle %19 %26 %26 0 1 +;CHECK: [[uv_32b:%\w+]] = OpVectorShuffle [[vec2_t]] + %28 = OpImageSampleImplicitLod %7 %23 %27 +;CHECK: OpImageSampleImplicitLod [[vec4_t]] {{%\w+}} [[uv_32b]] + + OpStore %13 %28 + OpReturn + OpFunctionEnd + )"; + + SinglePassRunAndMatch(test, true); +} + } // namespace } // namespace opt } // namespace spvtools