Confine -mf16c to a single source file

We can't use this flag for any code that may get run outside the __builtin_cpu_supports() check, and meson doesn't allow per-file cflags. So we have to split this code off into its own static library.
2024-12-24 12:41:16 +00:00 · 2021-05-05 18:58:23 -04:00 · 2021-05-05 18:58:23 -04:00 · 930ff499ee
commit 930ff499ee
parent 399fb76652
5 changed files with 82 additions and 36 deletions
--- a/gsk/meson.build
+++ b/gsk/meson.build
@ -88,6 +88,10 @@ gsk_private_sources = files([
  'ngl/fp16.c',
 ])

+gsk_f16c_sources = files([
+  'ngl/fp16i.c',
+])
+
 gsk_public_headers = files([
  'gskcairorenderer.h',
  'gskenums.h',
@ -209,6 +213,17 @@ gsk_deps = [
  libgdk_dep,
 ]

+libgsk_f16c = static_library('gsk_f16c',
+  sources: gsk_f16c_sources,
+  dependencies: gsk_deps,
+  include_directories: [ confinc, ],
+  c_args: [
+    '-DGTK_COMPILATION',
+    '-DG_LOG_DOMAIN="Gsk"',
+    '-DG_LOG_STRUCTURED=1',
+  ] + common_cflags + f16c_cflags,
+)
+
 libgsk = static_library('gsk',
  sources: [
    gsk_public_sources,
@ -223,7 +238,7 @@ libgsk = static_library('gsk',
    '-DG_LOG_DOMAIN="Gsk"',
    '-DG_LOG_STRUCTURED=1',
  ] + common_cflags,
-  link_with: libgdk,
+  link_with: [ libgdk, libgsk_f16c]
 )

 # We don't have link_with: to internal static libs here on purpose, just
--- a/gsk/ngl/fp16.c
+++ b/gsk/ngl/fp16.c
@ -18,14 +18,10 @@
 * SPDX-License-Identifier: LGPL-2.1-or-later
 */

-#include <config.h>
+#include "config.h"

 #include "fp16private.h"

-#ifdef HAVE_F16C
-#include <immintrin.h>
-#endif
-
 static inline guint
 as_uint (const float x)
 {
@ -80,33 +76,6 @@ half_to_float4_c (const guint16 h[4],

 #ifdef HAVE_F16C

-#if defined(_MSC_VER) && !defined(__clang__)
-#define CAST_M128I_P(a) (__m128i const *) a
-#else
-#define CAST_M128I_P(a) (__m128i_u const *) a
-#endif
-
-static void
-float_to_half4_f16c (const float f[4],
-                     guint16     h[4])
-{
-  __m128 s = _mm_loadu_ps (f);
-  __m128i i = _mm_cvtps_ph (s, 0);
-  _mm_storel_epi64 ((__m128i*)h, i);
-}
-
-static void
-half_to_float4_f16c (const guint16 h[4],
-                     float         f[4])
-{
-  __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h));
-  __m128 s = _mm_cvtph_ps (i);
-
-  _mm_store_ps (f, s);
-}
-
-#undef CAST_M128I_P
-
 #if defined(_MSC_VER) && !defined(__clang__)
 /* based on info from https://walbourn.github.io/directxmath-f16c-and-fma/ */
 static gboolean
@ -154,6 +123,7 @@ half_to_float4 (const guint16 h[4], float f[4])
 }

 #else
+
 void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4")));
 void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4")));

@ -176,9 +146,10 @@ resolve_half_to_float4 (void)
  else
    return half_to_float4_c;
 }
+
 #endif

-#else
+#else /* ! HAVE_F16C */

 #if defined(__APPLE__) || (defined(_MSC_VER) && !defined(__clang__))
 // turns out aliases don't work on Darwin nor Visual Studio
@ -204,4 +175,4 @@ void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half

 #endif

-#endif  /* GTK_HAS_F16C */
+#endif  /* HAVE_F16C */
--- a/gsk/ngl/fp16i.c
+++ b/gsk/ngl/fp16i.c
@ -0,0 +1,53 @@
+/* fp16i.c
+ *
+ * Copyright 2021 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * SPDX-License-Identifier: LGPL-2.1-or-later
+ */
+
+#include "config.h"
+
+#include "fp16private.h"
+
+#ifdef HAVE_F16C
+#include <immintrin.h>
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#define CAST_M128I_P(a) (__m128i const *) a
+#else
+#define CAST_M128I_P(a) (__m128i_u const *) a
+#endif
+
+void
+float_to_half4_f16c (const float f[4],
+                     guint16     h[4])
+{
+  __m128 s = _mm_loadu_ps (f);
+  __m128i i = _mm_cvtps_ph (s, 0);
+  _mm_storel_epi64 ((__m128i*)h, i);
+}
+
+void
+half_to_float4_f16c (const guint16 h[4],
+                     float         f[4])
+{
+  __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h));
+  __m128 s = _mm_cvtph_ps (i);
+
+  _mm_store_ps (f, s);
+}
+
+#endif  /* HAVE_F16C */
--- a/gsk/ngl/fp16private.h
+++ b/gsk/ngl/fp16private.h
@ -35,6 +35,12 @@ void float_to_half4 (const float f[4],
 void half_to_float4 (const guint16 h[4],
                     float         f[4]);

+void float_to_half4_f16c (const float f[4],
+                          guint16     h[4]);
+
+void half_to_float4_f16c (const guint16 h[4],
+                          float         f[4]);
+
 G_END_DECLS

 #endif
--- a/meson.build
+++ b/meson.build
@ -728,7 +728,8 @@ int main () {
  if cc.compiles(f16c_prog, args: test_f16c_cflags, name: 'F16C intrinsics')
    cdata.set('HAVE_F16C', 1)
    f16c_cflags = test_f16c_cflags
-    common_cflags += test_f16c_cflags
+  else
+    f16c_cflags = []
  endif
 endif