From 930ff499ee25d8606c6ca0b00084dea3ac0d1fd9 Mon Sep 17 00:00:00 2001 From: Matthias Clasen Date: Wed, 5 May 2021 18:58:23 -0400 Subject: [PATCH] Confine -mf16c to a single source file We can't use this flag for any code that may get run outside the __builtin_cpu_supports() check, and meson doesn't allow per-file cflags. So we have to split this code off into its own static library. --- gsk/meson.build | 17 +++++++++++++- gsk/ngl/fp16.c | 39 ++++--------------------------- gsk/ngl/fp16i.c | 53 +++++++++++++++++++++++++++++++++++++++++++ gsk/ngl/fp16private.h | 6 +++++ meson.build | 3 ++- 5 files changed, 82 insertions(+), 36 deletions(-) create mode 100644 gsk/ngl/fp16i.c diff --git a/gsk/meson.build b/gsk/meson.build index f351941c22..7b82108286 100644 --- a/gsk/meson.build +++ b/gsk/meson.build @@ -88,6 +88,10 @@ gsk_private_sources = files([ 'ngl/fp16.c', ]) +gsk_f16c_sources = files([ + 'ngl/fp16i.c', +]) + gsk_public_headers = files([ 'gskcairorenderer.h', 'gskenums.h', @@ -209,6 +213,17 @@ gsk_deps = [ libgdk_dep, ] +libgsk_f16c = static_library('gsk_f16c', + sources: gsk_f16c_sources, + dependencies: gsk_deps, + include_directories: [ confinc, ], + c_args: [ + '-DGTK_COMPILATION', + '-DG_LOG_DOMAIN="Gsk"', + '-DG_LOG_STRUCTURED=1', + ] + common_cflags + f16c_cflags, +) + libgsk = static_library('gsk', sources: [ gsk_public_sources, @@ -223,7 +238,7 @@ libgsk = static_library('gsk', '-DG_LOG_DOMAIN="Gsk"', '-DG_LOG_STRUCTURED=1', ] + common_cflags, - link_with: libgdk, + link_with: [ libgdk, libgsk_f16c] ) # We don't have link_with: to internal static libs here on purpose, just diff --git a/gsk/ngl/fp16.c b/gsk/ngl/fp16.c index 1e11faafd8..100d13e997 100644 --- a/gsk/ngl/fp16.c +++ b/gsk/ngl/fp16.c @@ -18,14 +18,10 @@ * SPDX-License-Identifier: LGPL-2.1-or-later */ -#include +#include "config.h" #include "fp16private.h" -#ifdef HAVE_F16C -#include -#endif - static inline guint as_uint (const float x) { @@ -80,33 +76,6 @@ half_to_float4_c (const guint16 h[4], #ifdef HAVE_F16C -#if defined(_MSC_VER) && !defined(__clang__) -#define CAST_M128I_P(a) (__m128i const *) a -#else -#define CAST_M128I_P(a) (__m128i_u const *) a -#endif - -static void -float_to_half4_f16c (const float f[4], - guint16 h[4]) -{ - __m128 s = _mm_loadu_ps (f); - __m128i i = _mm_cvtps_ph (s, 0); - _mm_storel_epi64 ((__m128i*)h, i); -} - -static void -half_to_float4_f16c (const guint16 h[4], - float f[4]) -{ - __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h)); - __m128 s = _mm_cvtph_ps (i); - - _mm_store_ps (f, s); -} - -#undef CAST_M128I_P - #if defined(_MSC_VER) && !defined(__clang__) /* based on info from https://walbourn.github.io/directxmath-f16c-and-fma/ */ static gboolean @@ -154,6 +123,7 @@ half_to_float4 (const guint16 h[4], float f[4]) } #else + void float_to_half4 (const float f[4], guint16 h[4]) __attribute__((ifunc ("resolve_float_to_half4"))); void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((ifunc ("resolve_half_to_float4"))); @@ -176,9 +146,10 @@ resolve_half_to_float4 (void) else return half_to_float4_c; } + #endif -#else +#else /* ! HAVE_F16C */ #if defined(__APPLE__) || (defined(_MSC_VER) && !defined(__clang__)) // turns out aliases don't work on Darwin nor Visual Studio @@ -204,4 +175,4 @@ void half_to_float4 (const guint16 h[4], float f[4]) __attribute__((alias ("half #endif -#endif /* GTK_HAS_F16C */ +#endif /* HAVE_F16C */ diff --git a/gsk/ngl/fp16i.c b/gsk/ngl/fp16i.c new file mode 100644 index 0000000000..74c5827ff8 --- /dev/null +++ b/gsk/ngl/fp16i.c @@ -0,0 +1,53 @@ +/* fp16i.c + * + * Copyright 2021 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program. If not, see . + * + * SPDX-License-Identifier: LGPL-2.1-or-later + */ + +#include "config.h" + +#include "fp16private.h" + +#ifdef HAVE_F16C +#include + +#if defined(_MSC_VER) && !defined(__clang__) +#define CAST_M128I_P(a) (__m128i const *) a +#else +#define CAST_M128I_P(a) (__m128i_u const *) a +#endif + +void +float_to_half4_f16c (const float f[4], + guint16 h[4]) +{ + __m128 s = _mm_loadu_ps (f); + __m128i i = _mm_cvtps_ph (s, 0); + _mm_storel_epi64 ((__m128i*)h, i); +} + +void +half_to_float4_f16c (const guint16 h[4], + float f[4]) +{ + __m128i i = _mm_loadl_epi64 (CAST_M128I_P (h)); + __m128 s = _mm_cvtph_ps (i); + + _mm_store_ps (f, s); +} + +#endif /* HAVE_F16C */ diff --git a/gsk/ngl/fp16private.h b/gsk/ngl/fp16private.h index d76f18a04f..a2c53d6c2d 100644 --- a/gsk/ngl/fp16private.h +++ b/gsk/ngl/fp16private.h @@ -35,6 +35,12 @@ void float_to_half4 (const float f[4], void half_to_float4 (const guint16 h[4], float f[4]); +void float_to_half4_f16c (const float f[4], + guint16 h[4]); + +void half_to_float4_f16c (const guint16 h[4], + float f[4]); + G_END_DECLS #endif diff --git a/meson.build b/meson.build index d8660cbe85..9ae976d40a 100644 --- a/meson.build +++ b/meson.build @@ -728,7 +728,8 @@ int main () { if cc.compiles(f16c_prog, args: test_f16c_cflags, name: 'F16C intrinsics') cdata.set('HAVE_F16C', 1) f16c_cflags = test_f16c_cflags - common_cflags += test_f16c_cflags + else + f16c_cflags = [] endif endif