Merge pull request #2093 from rdb/emulate-unsupported-hyperbolic-funcs

GLSL/HLSL: Add emulated fallbacks for hyperbolic math functions
2023-01-27 12:15:28 +01:00 · 2023-01-27 12:15:28 +01:00 · 86a985f28c
commit 86a985f28c
parent cd612e7dde 53974b4fae
9 changed files with 384 additions and 7 deletions
--- a/reference/opt/shaders-hlsl/frag/hyperbolic.frag
+++ b/reference/opt/shaders-hlsl/frag/hyperbolic.frag
@ -0,0 +1,65 @@
+static float4 result;
+static float scalar;
+static float3 _vector;
+
+struct SPIRV_Cross_Input
+{
+    float scalar : TEXCOORD0;
+    float3 _vector : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 result : SV_Target0;
+};
+
+void frag_main()
+{
+    result = 1.0f.xxxx;
+    result.w *= sinh(scalar);
+    result.w *= cosh(scalar);
+    result.w *= tanh(scalar);
+    result.w *= log(scalar + sqrt(scalar * scalar + 1.0f));
+    result.w *= log(scalar + sqrt(scalar * scalar - 1.0f));
+    result.w *= (log((1.0f + scalar) / (1.0f - scalar)) * 0.5f);
+    float4 _58 = result;
+    float3 _60 = _58.xyz * sinh(_vector);
+    result.x = _60.x;
+    result.y = _60.y;
+    result.z = _60.z;
+    float4 _72 = result;
+    float3 _74 = _72.xyz * cosh(_vector);
+    result.x = _74.x;
+    result.y = _74.y;
+    result.z = _74.z;
+    float4 _83 = result;
+    float3 _85 = _83.xyz * tanh(_vector);
+    result.x = _85.x;
+    result.y = _85.y;
+    result.z = _85.z;
+    float4 _94 = result;
+    float3 _96 = _94.xyz * log(_vector + sqrt(_vector * _vector + 1.0f));
+    result.x = _96.x;
+    result.y = _96.y;
+    result.z = _96.z;
+    float4 _105 = result;
+    float3 _107 = _105.xyz * log(_vector + sqrt(_vector * _vector - 1.0f));
+    result.x = _107.x;
+    result.y = _107.y;
+    result.z = _107.z;
+    float4 _116 = result;
+    float3 _118 = _116.xyz * (log((1.0f + _vector) / (1.0f - _vector)) * 0.5f);
+    result.x = _118.x;
+    result.y = _118.y;
+    result.z = _118.z;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    scalar = stage_input.scalar;
+    _vector = stage_input._vector;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.result = result;
+    return stage_output;
+}
--- a/reference/opt/shaders/legacy/fragment/hyperbolic.legacy.frag
+++ b/reference/opt/shaders/legacy/fragment/hyperbolic.legacy.frag
@ -0,0 +1,52 @@
+#version 100
+precision mediump float;
+precision highp int;
+
+varying highp float scalar;
+varying highp vec3 vector;
+
+void main()
+{
+    gl_FragData[0] = vec4(1.0);
+    gl_FragData[0].w *= ((exp(scalar) - exp(-scalar)) * 0.5);
+    gl_FragData[0].w *= ((exp(scalar) + exp(-scalar)) * 0.5);
+    highp float _125 = exp(scalar);
+    highp float _126 = exp(-scalar);
+    gl_FragData[0].w *= ((_125 - _126) / (_125 + _126));
+    gl_FragData[0].w *= log(scalar + sqrt(scalar * scalar + 1.0));
+    gl_FragData[0].w *= log(scalar + sqrt(scalar * scalar - 1.0));
+    gl_FragData[0].w *= (log((1.0 + scalar) / (1.0 - scalar)) * 0.5);
+    highp vec4 _58 = gl_FragData[0];
+    highp vec3 _60 = _58.xyz * ((exp(vector) - exp(-vector)) * 0.5);
+    gl_FragData[0].x = _60.x;
+    gl_FragData[0].y = _60.y;
+    gl_FragData[0].z = _60.z;
+    highp vec4 _72 = gl_FragData[0];
+    highp vec3 _74 = _72.xyz * ((exp(vector) + exp(-vector)) * 0.5);
+    gl_FragData[0].x = _74.x;
+    gl_FragData[0].y = _74.y;
+    gl_FragData[0].z = _74.z;
+    highp vec3 _127 = exp(vector);
+    highp vec3 _128 = exp(-vector);
+    highp vec4 _83 = gl_FragData[0];
+    highp vec3 _85 = _83.xyz * ((_127 - _128) / (_127 + _128));
+    gl_FragData[0].x = _85.x;
+    gl_FragData[0].y = _85.y;
+    gl_FragData[0].z = _85.z;
+    highp vec4 _94 = gl_FragData[0];
+    highp vec3 _96 = _94.xyz * log(vector + sqrt(vector * vector + 1.0));
+    gl_FragData[0].x = _96.x;
+    gl_FragData[0].y = _96.y;
+    gl_FragData[0].z = _96.z;
+    highp vec4 _105 = gl_FragData[0];
+    highp vec3 _107 = _105.xyz * log(vector + sqrt(vector * vector - 1.0));
+    gl_FragData[0].x = _107.x;
+    gl_FragData[0].y = _107.y;
+    gl_FragData[0].z = _107.z;
+    highp vec4 _116 = gl_FragData[0];
+    highp vec3 _118 = _116.xyz * (log((1.0 + vector) / (1.0 - vector)) * 0.5);
+    gl_FragData[0].x = _118.x;
+    gl_FragData[0].y = _118.y;
+    gl_FragData[0].z = _118.z;
+}
+
--- a/reference/shaders-hlsl/frag/hyperbolic.frag
+++ b/reference/shaders-hlsl/frag/hyperbolic.frag
@ -0,0 +1,65 @@
+static float4 result;
+static float scalar;
+static float3 _vector;
+
+struct SPIRV_Cross_Input
+{
+    float scalar : TEXCOORD0;
+    float3 _vector : TEXCOORD1;
+};
+
+struct SPIRV_Cross_Output
+{
+    float4 result : SV_Target0;
+};
+
+void frag_main()
+{
+    result = 1.0f.xxxx;
+    result.w *= sinh(scalar);
+    result.w *= cosh(scalar);
+    result.w *= tanh(scalar);
+    result.w *= log(scalar + sqrt(scalar * scalar + 1.0f));
+    result.w *= log(scalar + sqrt(scalar * scalar - 1.0f));
+    result.w *= (log((1.0f + scalar) / (1.0f - scalar)) * 0.5f);
+    float4 _58 = result;
+    float3 _60 = _58.xyz * sinh(_vector);
+    result.x = _60.x;
+    result.y = _60.y;
+    result.z = _60.z;
+    float4 _72 = result;
+    float3 _74 = _72.xyz * cosh(_vector);
+    result.x = _74.x;
+    result.y = _74.y;
+    result.z = _74.z;
+    float4 _83 = result;
+    float3 _85 = _83.xyz * tanh(_vector);
+    result.x = _85.x;
+    result.y = _85.y;
+    result.z = _85.z;
+    float4 _94 = result;
+    float3 _96 = _94.xyz * log(_vector + sqrt(_vector * _vector + 1.0f));
+    result.x = _96.x;
+    result.y = _96.y;
+    result.z = _96.z;
+    float4 _105 = result;
+    float3 _107 = _105.xyz * log(_vector + sqrt(_vector * _vector - 1.0f));
+    result.x = _107.x;
+    result.y = _107.y;
+    result.z = _107.z;
+    float4 _116 = result;
+    float3 _118 = _116.xyz * (log((1.0f + _vector) / (1.0f - _vector)) * 0.5f);
+    result.x = _118.x;
+    result.y = _118.y;
+    result.z = _118.z;
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    scalar = stage_input.scalar;
+    _vector = stage_input._vector;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.result = result;
+    return stage_output;
+}
--- a/reference/shaders/legacy/fragment/hyperbolic.legacy.frag
+++ b/reference/shaders/legacy/fragment/hyperbolic.legacy.frag
@ -0,0 +1,52 @@
+#version 100
+precision mediump float;
+precision highp int;
+
+varying highp float scalar;
+varying highp vec3 vector;
+
+void main()
+{
+    gl_FragData[0] = vec4(1.0);
+    gl_FragData[0].w *= ((exp(scalar) - exp(-scalar)) * 0.5);
+    gl_FragData[0].w *= ((exp(scalar) + exp(-scalar)) * 0.5);
+    highp float _125 = exp(scalar);
+    highp float _126 = exp(-scalar);
+    gl_FragData[0].w *= ((_125 - _126) / (_125 + _126));
+    gl_FragData[0].w *= log(scalar + sqrt(scalar * scalar + 1.0));
+    gl_FragData[0].w *= log(scalar + sqrt(scalar * scalar - 1.0));
+    gl_FragData[0].w *= (log((1.0 + scalar) / (1.0 - scalar)) * 0.5);
+    highp vec4 _58 = gl_FragData[0];
+    highp vec3 _60 = _58.xyz * ((exp(vector) - exp(-vector)) * 0.5);
+    gl_FragData[0].x = _60.x;
+    gl_FragData[0].y = _60.y;
+    gl_FragData[0].z = _60.z;
+    highp vec4 _72 = gl_FragData[0];
+    highp vec3 _74 = _72.xyz * ((exp(vector) + exp(-vector)) * 0.5);
+    gl_FragData[0].x = _74.x;
+    gl_FragData[0].y = _74.y;
+    gl_FragData[0].z = _74.z;
+    highp vec3 _127 = exp(vector);
+    highp vec3 _128 = exp(-vector);
+    highp vec4 _83 = gl_FragData[0];
+    highp vec3 _85 = _83.xyz * ((_127 - _128) / (_127 + _128));
+    gl_FragData[0].x = _85.x;
+    gl_FragData[0].y = _85.y;
+    gl_FragData[0].z = _85.z;
+    highp vec4 _94 = gl_FragData[0];
+    highp vec3 _96 = _94.xyz * log(vector + sqrt(vector * vector + 1.0));
+    gl_FragData[0].x = _96.x;
+    gl_FragData[0].y = _96.y;
+    gl_FragData[0].z = _96.z;
+    highp vec4 _105 = gl_FragData[0];
+    highp vec3 _107 = _105.xyz * log(vector + sqrt(vector * vector - 1.0));
+    gl_FragData[0].x = _107.x;
+    gl_FragData[0].y = _107.y;
+    gl_FragData[0].z = _107.z;
+    highp vec4 _116 = gl_FragData[0];
+    highp vec3 _118 = _116.xyz * (log((1.0 + vector) / (1.0 - vector)) * 0.5);
+    gl_FragData[0].x = _118.x;
+    gl_FragData[0].y = _118.y;
+    gl_FragData[0].z = _118.z;
+}
+
--- a/shaders-hlsl/frag/hyperbolic.frag
+++ b/shaders-hlsl/frag/hyperbolic.frag
@ -0,0 +1,26 @@
+#version 450
+
+layout(location=0) in float scalar;
+layout(location=1) in vec3 vector;
+
+layout(location=0) out vec4 result;
+
+void main() {
+	result = vec4(1.0);
+
+	result.w *= sinh(scalar);
+	result.w *= cosh(scalar);
+	result.w *= tanh(scalar);
+
+	result.w *= asinh(scalar);
+	result.w *= acosh(scalar);
+	result.w *= atanh(scalar);
+
+	result.xyz *= sinh(vector);
+	result.xyz *= cosh(vector);
+	result.xyz *= tanh(vector);
+
+	result.xyz *= asinh(vector);
+	result.xyz *= acosh(vector);
+	result.xyz *= atanh(vector);
+}
--- a/shaders/legacy/fragment/hyperbolic.legacy.frag
+++ b/shaders/legacy/fragment/hyperbolic.legacy.frag
@ -0,0 +1,26 @@
+#version 450
+
+layout(location=0) in float scalar;
+layout(location=1) in vec3 vector;
+
+layout(location=0) out vec4 result;
+
+void main() {
+	result = vec4(1.0);
+
+	result.w *= sinh(scalar);
+	result.w *= cosh(scalar);
+	result.w *= tanh(scalar);
+
+	result.w *= asinh(scalar);
+	result.w *= acosh(scalar);
+	result.w *= atanh(scalar);
+
+	result.xyz *= sinh(vector);
+	result.xyz *= cosh(vector);
+	result.xyz *= tanh(vector);
+
+	result.xyz *= asinh(vector);
+	result.xyz *= acosh(vector);
+	result.xyz *= atanh(vector);
+}
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -8021,22 +8021,77 @@ void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		emit_unary_func_op(result_type, id, args[0], "atan");
 		break;
 	case GLSLstd450Sinh:
-		emit_unary_func_op(result_type, id, args[0], "sinh");
+		if (!is_legacy())
+			emit_unary_func_op(result_type, id, args[0], "sinh");
+		else
+		{
+			bool forward = should_forward(args[0]);
+			auto expr = join("(exp(", to_expression(args[0]), ") - exp(-", to_enclosed_expression(args[0]), ")) * 0.5");
+			emit_op(result_type, id, expr, forward);
+			inherit_expression_dependencies(id, args[0]);
+		}
 		break;
 	case GLSLstd450Cosh:
-		emit_unary_func_op(result_type, id, args[0], "cosh");
+		if (!is_legacy())
+			emit_unary_func_op(result_type, id, args[0], "cosh");
+		else
+		{
+			bool forward = should_forward(args[0]);
+			auto expr = join("(exp(", to_expression(args[0]), ") + exp(-", to_enclosed_expression(args[0]), ")) * 0.5");
+			emit_op(result_type, id, expr, forward);
+			inherit_expression_dependencies(id, args[0]);
+		}
 		break;
 	case GLSLstd450Tanh:
-		emit_unary_func_op(result_type, id, args[0], "tanh");
+		if (!is_legacy())
+			emit_unary_func_op(result_type, id, args[0], "tanh");
+		else
+		{
+			// Create temporaries to store the result of exp(arg) and exp(-arg).
+			uint32_t &ids = extra_sub_expressions[id];
+			if (!ids)
+			{
+				ids = ir.increase_bound_by(2);
+
+				// Inherit precision qualifier (legacy has no NoContraction).
+				if (has_decoration(id, DecorationRelaxedPrecision))
+				{
+					set_decoration(ids, DecorationRelaxedPrecision);
+					set_decoration(ids + 1, DecorationRelaxedPrecision);
+				}
+			}
+			uint32_t epos_id = ids;
+			uint32_t eneg_id = ids + 1;
+
+			emit_op(result_type, epos_id, join("exp(", to_expression(args[0]), ")"), false);
+			emit_op(result_type, eneg_id, join("exp(-", to_enclosed_expression(args[0]), ")"), false);
+			inherit_expression_dependencies(epos_id, args[0]);
+			inherit_expression_dependencies(eneg_id, args[0]);
+
+			auto expr = join("(", to_enclosed_expression(epos_id), " - ", to_enclosed_expression(eneg_id), ") / "
+			                 "(", to_enclosed_expression(epos_id), " + ", to_enclosed_expression(eneg_id), ")");
+			emit_op(result_type, id, expr, true);
+			inherit_expression_dependencies(id, epos_id);
+			inherit_expression_dependencies(id, eneg_id);
+		}
 		break;
 	case GLSLstd450Asinh:
-		emit_unary_func_op(result_type, id, args[0], "asinh");
+		if (!is_legacy())
+			emit_unary_func_op(result_type, id, args[0], "asinh");
+		else
+			emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Asinh);
 		break;
 	case GLSLstd450Acosh:
-		emit_unary_func_op(result_type, id, args[0], "acosh");
+		if (!is_legacy())
+			emit_unary_func_op(result_type, id, args[0], "acosh");
+		else
+			emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Acosh);
 		break;
 	case GLSLstd450Atanh:
-		emit_unary_func_op(result_type, id, args[0], "atanh");
+		if (!is_legacy())
+			emit_unary_func_op(result_type, id, args[0], "atanh");
+		else
+			emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Atanh);
 		break;
 	case GLSLstd450Atan2:
 		emit_binary_func_op(result_type, id, args[0], args[1], "atan");
@ -8285,6 +8340,39 @@ void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t o
 	emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id);
 }

+void CompilerGLSL::emit_emulated_ahyper_op(uint32_t result_type, uint32_t id, uint32_t op0, GLSLstd450 op)
+{
+	const char *one = backend.float_literal_suffix ? "1.0f" : "1.0";
+	std::string expr;
+	bool forward = should_forward(op0);
+
+	switch (op)
+	{
+	case GLSLstd450Asinh:
+		expr = join("log(", to_enclosed_expression(op0), " + sqrt(",
+		            to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " + ", one, "))");
+		emit_op(result_type, id, expr, forward);
+		break;
+
+	case GLSLstd450Acosh:
+		expr = join("log(", to_enclosed_expression(op0), " + sqrt(",
+		            to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " - ", one, "))");
+		break;
+
+	case GLSLstd450Atanh:
+		expr = join("log((", one, " + ", to_enclosed_expression(op0), ") / "
+		            "(", one, " - ", to_enclosed_expression(op0), ")) * 0.5",
+		            backend.float_literal_suffix ? "f" : "");
+		break;
+
+	default:
+		SPIRV_CROSS_THROW("Invalid op.");
+	}
+
+	emit_op(result_type, id, expr, forward);
+	inherit_expression_dependencies(id, op0);
+}
+
 void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args,
                                                 uint32_t)
 {
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -663,6 +663,7 @@ protected:
 	bool should_suppress_usage_tracking(uint32_t id) const;
 	void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp);
 	void emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op);
+	void emit_emulated_ahyper_op(uint32_t result_type, uint32_t result_id, uint32_t op0, GLSLstd450 op);
 	bool to_trivial_mix_op(const SPIRType &type, std::string &op, uint32_t left, uint32_t right, uint32_t lerp);
 	void emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, uint32_t op2,
 	                             uint32_t op3, const char *op);
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@ -4131,7 +4131,9 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 	case GLSLstd450Acosh:
 	case GLSLstd450Asinh:
 	case GLSLstd450Atanh:
-		SPIRV_CROSS_THROW("Inverse hyperbolics are not supported on HLSL.");
+		// These are not supported in HLSL, always emulate them.
+		emit_emulated_ahyper_op(result_type, id, args[0], op);
+		break;

 	case GLSLstd450FMix:
 	case GLSLstd450IMix: