mirror of
https://gitlab.gnome.org/GNOME/gtk.git
synced 2025-01-04 09:40:19 +00:00
Merge branch 'lrn/aligned-alloc' into 'master'
Aligned allocators for GTK4 Closes #856 See merge request GNOME/gtk!187
This commit is contained in:
commit
bc13a58f3c
@ -305,3 +305,13 @@
|
||||
#mesondefine HAVE_PANGOFT
|
||||
|
||||
#mesondefine ISO_CODES_PREFIX
|
||||
|
||||
#mesondefine MALLOC_IS_ALIGNED16
|
||||
|
||||
#mesondefine HAVE_POSIX_MEMALIGN
|
||||
|
||||
#mesondefine HAVE_MEMALIGN
|
||||
|
||||
#mesondefine HAVE_ALIGNED_ALLOC
|
||||
|
||||
#mesondefine HAVE__ALIGNED_MALLOC
|
||||
|
130
gsk/gskalloc.c
Normal file
130
gsk/gskalloc.c
Normal file
@ -0,0 +1,130 @@
|
||||
#include "config.h"
|
||||
|
||||
#if defined(HAVE_POSIX_MEMALIGN) && !defined(_XOPEN_SOURCE)
|
||||
# define _XOPEN_SOURCE 600
|
||||
#endif
|
||||
|
||||
#include "gskallocprivate.h"
|
||||
|
||||
#if defined(HAVE_MEMALIGN) || defined(HAVE__ALIGNED_MALLOC)
|
||||
/* Required for _aligned_malloc() and _aligned_free() on Windows */
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef HAVE__ALIGNED_MALLOC
|
||||
/* _aligned_malloc() takes parameters of aligned_alloc() in reverse order */
|
||||
# define aligned_alloc(alignment, size) _aligned_malloc (size, alignment)
|
||||
|
||||
/* _aligned_malloc()'ed memory must be freed by _align_free() on Windows */
|
||||
# define aligned_free(x) _aligned_free (x)
|
||||
#else
|
||||
# define aligned_free(x) free (x)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* gsk_aligned_alloc:
|
||||
* @size: the size of the memory to allocate
|
||||
* @number: the multiples of @size to allocate
|
||||
* @alignment: the alignment to be enforced, as a power of 2
|
||||
*
|
||||
* Allocates @number times @size memory, with the given @alignment.
|
||||
*
|
||||
* If the total requested memory overflows %G_MAXSIZE, this function
|
||||
* will abort.
|
||||
*
|
||||
* If allocation fails, this function will abort, in line with
|
||||
* the behaviour of GLib.
|
||||
*
|
||||
* Returns: (transfer full): the allocated memory
|
||||
*/
|
||||
gpointer
|
||||
gsk_aligned_alloc (gsize size,
|
||||
gsize number,
|
||||
gsize alignment)
|
||||
{
|
||||
gpointer res = NULL;
|
||||
gsize real_size;
|
||||
|
||||
if (G_UNLIKELY (number > G_MAXSIZE / size))
|
||||
{
|
||||
#ifndef G_DISABLE_ASSERT
|
||||
g_error ("%s: overflow in the allocation of (%"G_GSIZE_FORMAT" x %"G_GSIZE_FORMAT") bytes",
|
||||
G_STRLOC, size, number);
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
real_size = size * number;
|
||||
|
||||
#ifndef G_DISABLE_ASSERT
|
||||
errno = 0;
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_POSIX_MEMALIGN) && !defined (MALLOC_IS_ALIGNED16)
|
||||
errno = posix_memalign (&res, alignment, real_size);
|
||||
#elif (defined(HAVE_ALIGNED_ALLOC) || defined(HAVE__ALIGNED_MALLOC)) && !defined (MALLOC_IS_ALIGNED16)
|
||||
/* real_size must be a multiple of alignment */
|
||||
if (real_size % alignment != 0)
|
||||
{
|
||||
gsize offset = real_size % alignment;
|
||||
real_size += (alignment - offset);
|
||||
}
|
||||
|
||||
res = aligned_alloc (alignment, real_size);
|
||||
#elif defined(HAVE_MEMALIGN) && !defined (MALLOC_IS_ALIGNED16)
|
||||
res = memalign (alignment, real_size);
|
||||
#else
|
||||
res = malloc (real_size);
|
||||
#endif
|
||||
|
||||
#ifndef G_DISABLE_ASSERT
|
||||
if (errno != 0 || res == NULL)
|
||||
{
|
||||
g_error ("%s: error in the allocation of (%"G_GSIZE_FORMAT" x %"G_GSIZE_FORMAT") bytes: %s",
|
||||
G_STRLOC, size, number, strerror (errno));
|
||||
}
|
||||
#endif
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* gsk_aligned_alloc0:
|
||||
* @size: the size of the memory to allocate
|
||||
* @number: the multiples of @size to allocate
|
||||
* @alignment: the alignment to be enforced, as a power of 2
|
||||
*
|
||||
* Allocates @number times @size memory, with the given @alignment,
|
||||
* like gsk_aligned_alloc(), but it also clears the memory.
|
||||
*
|
||||
* Returns: (transfer full): the allocated, cleared memory
|
||||
*/
|
||||
gpointer
|
||||
gsk_aligned_alloc0 (gsize size,
|
||||
gsize number,
|
||||
gsize alignment)
|
||||
{
|
||||
gpointer mem;
|
||||
|
||||
mem = gsk_aligned_alloc (size, number, alignment);
|
||||
|
||||
if (mem)
|
||||
memset (mem, 0, size * number);
|
||||
|
||||
return mem;
|
||||
}
|
||||
|
||||
/*
|
||||
* gsk_aligned_free:
|
||||
* @mem: the memory to deallocate
|
||||
*
|
||||
* Frees the memory allocated by gsk_aligned_alloc().
|
||||
*/
|
||||
void
|
||||
gsk_aligned_free (gpointer mem)
|
||||
{
|
||||
aligned_free (mem);
|
||||
}
|
18
gsk/gskallocprivate.h
Normal file
18
gsk/gskallocprivate.h
Normal file
@ -0,0 +1,18 @@
|
||||
#ifndef __GSK_ALLOC_PRIVATE_H__
|
||||
#define __GSK_ALLOC_PRIVATE_H__
|
||||
|
||||
#include <glib.h>
|
||||
|
||||
G_BEGIN_DECLS
|
||||
|
||||
gpointer gsk_aligned_alloc (gsize size,
|
||||
gsize number,
|
||||
gsize alignment);
|
||||
gpointer gsk_aligned_alloc0 (gsize size,
|
||||
gsize number,
|
||||
gsize alignment);
|
||||
void gsk_aligned_free (gpointer mem);
|
||||
|
||||
G_END_DECLS
|
||||
|
||||
#endif /* __GSK_ALLOC_PRIVATE_H__ */
|
@ -42,6 +42,7 @@
|
||||
|
||||
#include "gskdebugprivate.h"
|
||||
#include "gskrendererprivate.h"
|
||||
#include "gskallocprivate.h"
|
||||
|
||||
#include <graphene-gobject.h>
|
||||
|
||||
@ -66,7 +67,7 @@ gsk_render_node_finalize (GskRenderNode *self)
|
||||
{
|
||||
self->node_class->finalize (self);
|
||||
|
||||
g_free (self);
|
||||
gsk_aligned_free (self);
|
||||
}
|
||||
|
||||
/*< private >
|
||||
@ -83,7 +84,7 @@ gsk_render_node_new (const GskRenderNodeClass *node_class, gsize extra_size)
|
||||
g_return_val_if_fail (node_class != NULL, NULL);
|
||||
g_return_val_if_fail (node_class->node_type != GSK_NOT_A_RENDER_NODE, NULL);
|
||||
|
||||
self = g_malloc0 (node_class->struct_size + extra_size);
|
||||
self = gsk_aligned_alloc0 (node_class->struct_size + extra_size, 1, 16);
|
||||
|
||||
self->node_class = node_class;
|
||||
|
||||
|
@ -30,6 +30,7 @@ gsk_public_sources = files([
|
||||
])
|
||||
|
||||
gsk_private_sources = files([
|
||||
'gskalloc.c',
|
||||
'gskcairoblur.c',
|
||||
'gskcairorenderer.c',
|
||||
'gskdebug.c',
|
||||
|
@ -31,9 +31,138 @@
|
||||
|
||||
#include "gsk/gskrendernodeprivate.h"
|
||||
|
||||
#include "gsk/gskallocprivate.h"
|
||||
|
||||
#include "gtk/gskpango.h"
|
||||
|
||||
|
||||
static void gtk_snapshot_state_clear (GtkSnapshotState *state);
|
||||
|
||||
/* Returns the smallest power of 2 greater than n, or n if
|
||||
* such power does not fit in a guint
|
||||
*/
|
||||
static guint
|
||||
g_nearest_pow (gint num)
|
||||
{
|
||||
guint n = 1;
|
||||
|
||||
while (n < num && n > 0)
|
||||
n <<= 1;
|
||||
|
||||
return n ? n : num;
|
||||
}
|
||||
|
||||
typedef struct _GtkRealSnapshotStateArray GtkRealSnapshotStateArray;
|
||||
|
||||
struct _GtkRealSnapshotStateArray
|
||||
{
|
||||
GtkSnapshotState *data;
|
||||
guint len;
|
||||
guint alloc;
|
||||
gint ref_count;
|
||||
};
|
||||
|
||||
static GtkSnapshotStateArray*
|
||||
gtk_snapshot_state_array_new (void)
|
||||
{
|
||||
GtkRealSnapshotStateArray *array;
|
||||
|
||||
g_return_val_if_fail (sizeof (GtkSnapshotState) % 16 == 0, NULL);
|
||||
|
||||
array = g_slice_new (GtkRealSnapshotStateArray);
|
||||
|
||||
array->data = NULL;
|
||||
array->len = 0;
|
||||
array->alloc = 0;
|
||||
array->ref_count = 1;
|
||||
|
||||
return (GtkSnapshotStateArray *) array;
|
||||
}
|
||||
|
||||
static GtkSnapshotState *
|
||||
gtk_snapshot_state_array_free (GtkSnapshotStateArray *farray)
|
||||
{
|
||||
GtkRealSnapshotStateArray *array = (GtkRealSnapshotStateArray*) farray;
|
||||
guint i;
|
||||
|
||||
g_return_val_if_fail (array, NULL);
|
||||
|
||||
for (i = 0; i < array->len; i++)
|
||||
gtk_snapshot_state_clear (&array->data[i]);
|
||||
|
||||
gsk_aligned_free (array->data);
|
||||
|
||||
g_slice_free1 (sizeof (GtkRealSnapshotStateArray), array);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#define MIN_ARRAY_SIZE 16
|
||||
|
||||
static void
|
||||
gtk_snapshot_state_array_maybe_expand (GtkRealSnapshotStateArray *array,
|
||||
gint len)
|
||||
{
|
||||
guint want_alloc = sizeof (GtkSnapshotState) * (array->len + len);
|
||||
GtkSnapshotState *new_data;
|
||||
|
||||
if (want_alloc <= array->alloc)
|
||||
return;
|
||||
|
||||
want_alloc = g_nearest_pow (want_alloc);
|
||||
want_alloc = MAX (want_alloc, MIN_ARRAY_SIZE);
|
||||
new_data = gsk_aligned_alloc0 (want_alloc, 1, 16);
|
||||
memcpy (new_data, array->data, sizeof (GtkSnapshotState) * array->len);
|
||||
gsk_aligned_free (array->data);
|
||||
array->data = new_data;
|
||||
array->alloc = want_alloc;
|
||||
}
|
||||
|
||||
static GtkSnapshotStateArray*
|
||||
gtk_snapshot_state_array_remove_index (GtkSnapshotStateArray *farray,
|
||||
guint index_)
|
||||
{
|
||||
GtkRealSnapshotStateArray *array = (GtkRealSnapshotStateArray*) farray;
|
||||
|
||||
g_return_val_if_fail (array, NULL);
|
||||
g_return_val_if_fail (index_ < array->len, NULL);
|
||||
|
||||
gtk_snapshot_state_clear (&array->data[index_]);
|
||||
|
||||
memmove (&array->data[index_],
|
||||
&array->data[index_ + 1],
|
||||
(array->len - index_ - 1) * sizeof (GtkSnapshotState));
|
||||
|
||||
array->len -= 1;
|
||||
|
||||
return farray;
|
||||
}
|
||||
|
||||
#define gtk_snapshot_state_array_append_val(a,v) gtk_snapshot_state_array_append_vals (a, &(v), 1)
|
||||
|
||||
static GtkSnapshotStateArray*
|
||||
gtk_snapshot_state_array_append_vals (GtkSnapshotStateArray *farray,
|
||||
gconstpointer data,
|
||||
guint len)
|
||||
{
|
||||
GtkRealSnapshotStateArray *array = (GtkRealSnapshotStateArray*) farray;
|
||||
|
||||
g_return_val_if_fail (array, NULL);
|
||||
|
||||
if (len == 0)
|
||||
return farray;
|
||||
|
||||
gtk_snapshot_state_array_maybe_expand (array, len);
|
||||
|
||||
memcpy (&array->data[array->len], data,
|
||||
sizeof (GtkSnapshotState) * len);
|
||||
|
||||
array->len += len;
|
||||
|
||||
return farray;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* SECTION:gtksnapshot
|
||||
* @Short_description: Auxiliary object for snapshots
|
||||
@ -120,9 +249,9 @@ gtk_snapshot_push_state (GtkSnapshot *snapshot,
|
||||
state.start_node_index = snapshot->nodes->len;
|
||||
state.n_nodes = 0;
|
||||
|
||||
g_array_append_val (snapshot->state_stack, state);
|
||||
gtk_snapshot_state_array_append_val (snapshot->state_stack, state);
|
||||
|
||||
return &g_array_index (snapshot->state_stack, GtkSnapshotState, snapshot->state_stack->len - 1);
|
||||
return &snapshot->state_stack->data[snapshot->state_stack->len - 1];
|
||||
}
|
||||
|
||||
static GtkSnapshotState *
|
||||
@ -130,7 +259,7 @@ gtk_snapshot_get_current_state (const GtkSnapshot *snapshot)
|
||||
{
|
||||
g_assert (snapshot->state_stack->len > 0);
|
||||
|
||||
return &g_array_index (snapshot->state_stack, GtkSnapshotState, snapshot->state_stack->len - 1);
|
||||
return &snapshot->state_stack->data[snapshot->state_stack->len - 1];
|
||||
}
|
||||
|
||||
static GtkSnapshotState *
|
||||
@ -138,7 +267,7 @@ gtk_snapshot_get_previous_state (const GtkSnapshot *snapshot)
|
||||
{
|
||||
g_assert (snapshot->state_stack->len > 1);
|
||||
|
||||
return &g_array_index (snapshot->state_stack, GtkSnapshotState, snapshot->state_stack->len - 2);
|
||||
return &snapshot->state_stack->data[snapshot->state_stack->len - 2];
|
||||
}
|
||||
|
||||
static void
|
||||
@ -160,8 +289,7 @@ gtk_snapshot_new (void)
|
||||
|
||||
snapshot = g_object_new (GTK_TYPE_SNAPSHOT, NULL);
|
||||
|
||||
snapshot->state_stack = g_array_new (FALSE, TRUE, sizeof (GtkSnapshotState));
|
||||
g_array_set_clear_func (snapshot->state_stack, (GDestroyNotify)gtk_snapshot_state_clear);
|
||||
snapshot->state_stack = gtk_snapshot_state_array_new ();
|
||||
snapshot->nodes = g_ptr_array_new_with_free_func ((GDestroyNotify)gsk_render_node_unref);
|
||||
|
||||
gtk_snapshot_push_state (snapshot,
|
||||
@ -925,7 +1053,7 @@ gtk_snapshot_pop_internal (GtkSnapshot *snapshot)
|
||||
snapshot->nodes->len - state->n_nodes,
|
||||
state->n_nodes);
|
||||
|
||||
g_array_remove_index (snapshot->state_stack, state_index);
|
||||
gtk_snapshot_state_array_remove_index (snapshot->state_stack, state_index);
|
||||
|
||||
return node;
|
||||
}
|
||||
@ -955,7 +1083,7 @@ gtk_snapshot_to_node (GtkSnapshot *snapshot)
|
||||
|
||||
result = gtk_snapshot_pop_internal (snapshot);
|
||||
|
||||
g_array_free (snapshot->state_stack, TRUE);
|
||||
gtk_snapshot_state_array_free (snapshot->state_stack);
|
||||
snapshot->state_stack = NULL;
|
||||
|
||||
g_ptr_array_free (snapshot->nodes, TRUE);
|
||||
|
@ -23,6 +23,17 @@
|
||||
G_BEGIN_DECLS
|
||||
|
||||
typedef struct _GtkSnapshotState GtkSnapshotState;
|
||||
typedef struct _GtkSnapshotStateArray GtkSnapshotStateArray;
|
||||
|
||||
/* This is a stripped-down copy of GArray tailored specifically
|
||||
* for GtkSnapshotState and guaranteed to be aligned to 16 byte
|
||||
* bounaries.
|
||||
*/
|
||||
struct _GtkSnapshotStateArray
|
||||
{
|
||||
GtkSnapshotState *data;
|
||||
guint len;
|
||||
};
|
||||
|
||||
typedef GskRenderNode * (* GtkSnapshotCollectFunc) (GtkSnapshot *snapshot,
|
||||
GtkSnapshotState *state,
|
||||
@ -88,7 +99,7 @@ struct _GtkSnapshotState {
|
||||
struct _GdkSnapshot {
|
||||
GObject parent_instance; /* it's really GdkSnapshot, but don't tell anyone! */
|
||||
|
||||
GArray *state_stack;
|
||||
GtkSnapshotStateArray *state_stack;
|
||||
GPtrArray *nodes;
|
||||
};
|
||||
|
||||
|
126
meson.build
126
meson.build
@ -640,6 +640,132 @@ if cloudproviders_enabled
|
||||
endif
|
||||
endif
|
||||
|
||||
graphene_has_sse2 = graphene_dep.get_pkgconfig_variable('graphene_has_sse2') == '1'
|
||||
graphene_has_gcc = graphene_dep.get_pkgconfig_variable('graphene_has_gcc') == '1'
|
||||
|
||||
malloc_is_aligned = false
|
||||
|
||||
if not meson.is_cross_build() or meson.has_exe_wrapper()
|
||||
malloc_test = cc.run ('''
|
||||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#define COUNT 100
|
||||
#define is_aligned(POINTER, BYTE_COUNT) \
|
||||
(((uintptr_t)(const void *)(POINTER)) % (BYTE_COUNT) == 0)
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
void **pointers;
|
||||
int i, a, min_a;
|
||||
FILE *f;
|
||||
int wrote;
|
||||
pointers = malloc (sizeof (void *) * COUNT);
|
||||
for (i = 0, min_a = 128; i < COUNT; i++, pointers++)
|
||||
{
|
||||
*pointers = malloc (sizeof (void *));
|
||||
for (a = 1; a <= 128; a = a * 2)
|
||||
{
|
||||
if (!is_aligned (*pointers, a))
|
||||
{
|
||||
a = a / 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (a > 128)
|
||||
a = 128;
|
||||
if (a < min_a)
|
||||
min_a = a;
|
||||
}
|
||||
wrote = fprintf (stderr, "%d", min_a);
|
||||
if (wrote <= 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
''')
|
||||
|
||||
if not malloc_test.compiled() or malloc_test.returncode() != 0
|
||||
message ('malloc() alignment test failed, assuming unaligned malloc()')
|
||||
elif malloc_test.stderr().to_int() >= 16
|
||||
malloc_is_aligned = true
|
||||
cdata.set('MALLOC_IS_ALIGNED16', 1)
|
||||
endif
|
||||
else
|
||||
# TODO: more reasinable cross-compiling defaults?
|
||||
message ('cross-compiling, assuming unaligned malloc()')
|
||||
endif
|
||||
|
||||
if cc.get_id() == 'gcc' or cc.get_id() == 'clang'
|
||||
no_builtin_args = cc.get_supported_arguments(['-fno-builtin'])
|
||||
else
|
||||
no_builtin_args = []
|
||||
endif
|
||||
|
||||
# Check that posix_memalign() is not a builtin
|
||||
have_posix_memalign = cc.links('''#include <stdlib.h>
|
||||
int main (int argc, char ** argv) {
|
||||
void *p;
|
||||
return posix_memalign (&p, 16, argc);
|
||||
}
|
||||
''',
|
||||
args : no_builtin_args,
|
||||
name : 'posix_memalign() is not a builtin')
|
||||
if have_posix_memalign
|
||||
cdata.set('HAVE_POSIX_MEMALIGN', 1)
|
||||
endif
|
||||
|
||||
# Check that memalign() is not a builtin
|
||||
have_memalign = cc.links('''#include <malloc.h>
|
||||
int main (int argc, char ** argv) {
|
||||
return memalign (16, argc);
|
||||
}
|
||||
''',
|
||||
args : no_builtin_args,
|
||||
name : 'memalign() is not a builtin')
|
||||
if have_memalign
|
||||
cdata.set('HAVE_MEMALIGN', 1)
|
||||
endif
|
||||
|
||||
# Check that aligned_alloc() is not a builtin
|
||||
have_aligned_alloc = cc.links('''#include <stdlib.h>
|
||||
int main (int argc, char ** argv) {
|
||||
return aligned_alloc (16, argc);
|
||||
}
|
||||
''',
|
||||
args : no_builtin_args,
|
||||
name : 'aligned_alloc() is not a builtin')
|
||||
if have_aligned_alloc
|
||||
cdata.set('HAVE_ALIGNED_ALLOC', 1)
|
||||
endif
|
||||
|
||||
# Check that _aligned_malloc() is not a builtin
|
||||
have__aligned_malloc = cc.links('''#include <malloc.h>
|
||||
int main (int argc, char ** argv) {
|
||||
return _aligned_malloc (argc, 16);
|
||||
}
|
||||
''',
|
||||
args : no_builtin_args,
|
||||
name : '_aligned_malloc() is not a builtin')
|
||||
if have__aligned_malloc
|
||||
cdata.set('HAVE__ALIGNED_MALLOC', 1)
|
||||
endif
|
||||
|
||||
if graphene_has_sse2 or graphene_has_gcc
|
||||
message('Need aligned memory due to the use of SSE2 or GCC vector instructions')
|
||||
|
||||
if os_win32 and cc.get_id() == 'gcc'
|
||||
add_global_arguments(['-mstackrealign'], language: 'c')
|
||||
endif
|
||||
|
||||
if (not malloc_is_aligned and
|
||||
not have_posix_memalign and
|
||||
not have_memalign and
|
||||
not have_aligned_alloc and
|
||||
not have__aligned_malloc)
|
||||
error('Failed to find any means of allocating aligned memory')
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
subdir('gdk')
|
||||
subdir('gsk')
|
||||
|
Loading…
Reference in New Issue
Block a user