Confine -mf16c to a single source file

We can't use this flag for any code that may get run outside the __builtin_cpu_supports() check, and meson doesn't allow per-file cflags. So we have to split this code off into its own static library.
2025-01-12 05:20:17 +00:00 · 2021-05-05 18:58:23 -04:00 · 2021-05-05 18:58:23 -04:00 · 930ff499ee
commit 930ff499ee
parent 399fb76652
5 changed files with 82 additions and 36 deletions
--- a/gsk/meson.build
+++ b/gsk/meson.build
@ -88,6 +88,10 @@ gsk_private_sources = files([
  'ngl/fp16.c',
 ])
 gsk_f16c_sources = files([
  'ngl/fp16i.c',
 ])
 gsk_public_headers = files([
  'gskcairorenderer.h',
  'gskenums.h',
@ -209,6 +213,17 @@ gsk_deps = [
  libgdk_dep,
 ]
 libgsk_f16c = static_library('gsk_f16c',
  sources: gsk_f16c_sources,
  dependencies: gsk_deps,
  include_directories: [ confinc, ],
  c_args: [
    '-DGTK_COMPILATION',
    '-DG_LOG_DOMAIN="Gsk"',
    '-DG_LOG_STRUCTURED=1',
  ] + common_cflags + f16c_cflags,
 )
 libgsk = static_library('gsk',
  sources: [
    gsk_public_sources,
@ -223,7 +238,7 @@ libgsk = static_library('gsk',
    '-DG_LOG_DOMAIN="Gsk"',
    '-DG_LOG_STRUCTURED=1',
  ] + common_cflags,
-  link_with: libgdk,
+  link_with: [ libgdk, libgsk_f16c]
 )
 # We don't have link_with: to internal static libs here on purpose, just
--- a/gsk/ngl/fp16.c
+++ b/gsk/ngl/fp16.c
@ -18,14 +18,10 @@
 * SPDX-License-Identifier: LGPL-2.1-or-later
 */
-#include <config.h>
+#include "config.h"
 #include "fp16private.h"
 #ifdef HAVE_F16C
 #include <immintrin.h>
 #endif
 static inline guint
 as_uint (const float x)
 {
@ -80,33 +76,6 @@ half_to_float4_c (const guint16 h[4],
 #ifdef HAVE_F16C
 #if defined(_MSC_VER) && !defined(__clang__)
 #define CAST_M128I_P(a) (__m128i const *) a
 #else
 #define CAST_M128I_P(a) (__m128i_u const *) a
 #endif
 static void
 float_to_half4_f16c (const float f[4],
                     guint16     h[4])
 {
  __m128 s = _mm_loadu_ps (f);
  __m128i i = _mm_cvtps_ph (s, 0);
  _mm_storel_epi64 ((__m128i*)h, i);
 }
 static void
 half_to_float4_f16c (const guint16 h[4],
                     float         f[4])
 {
  __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h));
  __m128 s = _mm_cvtph_ps (i);
  _mm_store_ps (f, s);
 }
 #undef CAST_M128I_P
 #if defined(_MSC_VER) && !defined(__clang__)
 /* based on info from https://walbourn.github.io/directxmath-f16c-and-fma/ */
 static gboolean
@ -154,6 +123,7 @@ half_to_float4 (const guint16 h[4], float f[4])
 }
 #else
 void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4")));
 void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4")));
@ -176,9 +146,10 @@ resolve_half_to_float4 (void)
  else
    return half_to_float4_c;
 }
 #endif
-#else
+#else /* ! HAVE_F16C */
 #if defined(__APPLE__) || (defined(_MSC_VER) && !defined(__clang__))
 // turns out aliases don't work on Darwin nor Visual Studio
@ -204,4 +175,4 @@ void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half
 #endif
-#endif  /* GTK_HAS_F16C */
+#endif  /* HAVE_F16C */
--- a/gsk/ngl/fp16i.c
+++ b/gsk/ngl/fp16i.c
@ -0,0 +1,53 @@
 /* fp16i.c
 *
 * Copyright 2021 Red Hat, Inc.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * SPDX-License-Identifier: LGPL-2.1-or-later
 */
 #include "config.h"
 #include "fp16private.h"
 #ifdef HAVE_F16C
 #include <immintrin.h>
 #if defined(_MSC_VER) && !defined(__clang__)
 #define CAST_M128I_P(a) (__m128i const *) a
 #else
 #define CAST_M128I_P(a) (__m128i_u const *) a
 #endif
 void
 float_to_half4_f16c (const float f[4],
                     guint16     h[4])
 {
  __m128 s = _mm_loadu_ps (f);
  __m128i i = _mm_cvtps_ph (s, 0);
  _mm_storel_epi64 ((__m128i*)h, i);
 }
 void
 half_to_float4_f16c (const guint16 h[4],
                     float         f[4])
 {
  __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h));
  __m128 s = _mm_cvtph_ps (i);
  _mm_store_ps (f, s);
 }
 #endif  /* HAVE_F16C */
--- a/gsk/ngl/fp16private.h
+++ b/gsk/ngl/fp16private.h
@ -35,6 +35,12 @@ void float_to_half4 (const float f[4],
 void half_to_float4 (const guint16 h[4],
                     float         f[4]);
 void float_to_half4_f16c (const float f[4],
                          guint16     h[4]);
 void half_to_float4_f16c (const guint16 h[4],
                          float         f[4]);
 G_END_DECLS
 #endif
--- a/meson.build
+++ b/meson.build
@ -728,7 +728,8 @@ int main () {
  if cc.compiles(f16c_prog, args: test_f16c_cflags, name: 'F16C intrinsics')
    cdata.set('HAVE_F16C', 1)
    f16c_cflags = test_f16c_cflags
-    common_cflags += test_f16c_cflags
+  else
    f16c_cflags = []
  endif
 endif