Merge branch 'lrn/aligned-alloc' into 'master'

Aligned allocators for GTK4

Closes #856

See merge request GNOME/gtk!187
This commit is contained in:
Matthias Clasen 2018-06-10 22:05:12 +00:00
commit bc13a58f3c
8 changed files with 436 additions and 11 deletions

View File

@ -305,3 +305,13 @@
#mesondefine HAVE_PANGOFT
#mesondefine ISO_CODES_PREFIX
#mesondefine MALLOC_IS_ALIGNED16
#mesondefine HAVE_POSIX_MEMALIGN
#mesondefine HAVE_MEMALIGN
#mesondefine HAVE_ALIGNED_ALLOC
#mesondefine HAVE__ALIGNED_MALLOC

130
gsk/gskalloc.c Normal file
View File

@ -0,0 +1,130 @@
#include "config.h"
#if defined(HAVE_POSIX_MEMALIGN) && !defined(_XOPEN_SOURCE)
# define _XOPEN_SOURCE 600
#endif
#include "gskallocprivate.h"
#if defined(HAVE_MEMALIGN) || defined(HAVE__ALIGNED_MALLOC)
/* Required for _aligned_malloc() and _aligned_free() on Windows */
#include <malloc.h>
#endif
#include <errno.h>
#include <string.h>
#ifdef HAVE__ALIGNED_MALLOC
/* _aligned_malloc() takes parameters of aligned_alloc() in reverse order */
# define aligned_alloc(alignment, size) _aligned_malloc (size, alignment)
/* _aligned_malloc()'ed memory must be freed by _align_free() on Windows */
# define aligned_free(x) _aligned_free (x)
#else
# define aligned_free(x) free (x)
#endif
/*
* gsk_aligned_alloc:
* @size: the size of the memory to allocate
* @number: the multiples of @size to allocate
* @alignment: the alignment to be enforced, as a power of 2
*
* Allocates @number times @size memory, with the given @alignment.
*
* If the total requested memory overflows %G_MAXSIZE, this function
* will abort.
*
* If allocation fails, this function will abort, in line with
* the behaviour of GLib.
*
* Returns: (transfer full): the allocated memory
*/
gpointer
gsk_aligned_alloc (gsize size,
gsize number,
gsize alignment)
{
gpointer res = NULL;
gsize real_size;
if (G_UNLIKELY (number > G_MAXSIZE / size))
{
#ifndef G_DISABLE_ASSERT
g_error ("%s: overflow in the allocation of (%"G_GSIZE_FORMAT" x %"G_GSIZE_FORMAT") bytes",
G_STRLOC, size, number);
#endif
return NULL;
}
real_size = size * number;
#ifndef G_DISABLE_ASSERT
errno = 0;
#endif
#if defined(HAVE_POSIX_MEMALIGN) && !defined (MALLOC_IS_ALIGNED16)
errno = posix_memalign (&res, alignment, real_size);
#elif (defined(HAVE_ALIGNED_ALLOC) || defined(HAVE__ALIGNED_MALLOC)) && !defined (MALLOC_IS_ALIGNED16)
/* real_size must be a multiple of alignment */
if (real_size % alignment != 0)
{
gsize offset = real_size % alignment;
real_size += (alignment - offset);
}
res = aligned_alloc (alignment, real_size);
#elif defined(HAVE_MEMALIGN) && !defined (MALLOC_IS_ALIGNED16)
res = memalign (alignment, real_size);
#else
res = malloc (real_size);
#endif
#ifndef G_DISABLE_ASSERT
if (errno != 0 || res == NULL)
{
g_error ("%s: error in the allocation of (%"G_GSIZE_FORMAT" x %"G_GSIZE_FORMAT") bytes: %s",
G_STRLOC, size, number, strerror (errno));
}
#endif
return res;
}
/*
* gsk_aligned_alloc0:
* @size: the size of the memory to allocate
* @number: the multiples of @size to allocate
* @alignment: the alignment to be enforced, as a power of 2
*
* Allocates @number times @size memory, with the given @alignment,
* like gsk_aligned_alloc(), but it also clears the memory.
*
* Returns: (transfer full): the allocated, cleared memory
*/
gpointer
gsk_aligned_alloc0 (gsize size,
gsize number,
gsize alignment)
{
gpointer mem;
mem = gsk_aligned_alloc (size, number, alignment);
if (mem)
memset (mem, 0, size * number);
return mem;
}
/*
* gsk_aligned_free:
* @mem: the memory to deallocate
*
* Frees the memory allocated by gsk_aligned_alloc().
*/
void
gsk_aligned_free (gpointer mem)
{
aligned_free (mem);
}

18
gsk/gskallocprivate.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef __GSK_ALLOC_PRIVATE_H__
#define __GSK_ALLOC_PRIVATE_H__
#include <glib.h>
G_BEGIN_DECLS
gpointer gsk_aligned_alloc (gsize size,
gsize number,
gsize alignment);
gpointer gsk_aligned_alloc0 (gsize size,
gsize number,
gsize alignment);
void gsk_aligned_free (gpointer mem);
G_END_DECLS
#endif /* __GSK_ALLOC_PRIVATE_H__ */

View File

@ -42,6 +42,7 @@
#include "gskdebugprivate.h"
#include "gskrendererprivate.h"
#include "gskallocprivate.h"
#include <graphene-gobject.h>
@ -66,7 +67,7 @@ gsk_render_node_finalize (GskRenderNode *self)
{
self->node_class->finalize (self);
g_free (self);
gsk_aligned_free (self);
}
/*< private >
@ -83,7 +84,7 @@ gsk_render_node_new (const GskRenderNodeClass *node_class, gsize extra_size)
g_return_val_if_fail (node_class != NULL, NULL);
g_return_val_if_fail (node_class->node_type != GSK_NOT_A_RENDER_NODE, NULL);
self = g_malloc0 (node_class->struct_size + extra_size);
self = gsk_aligned_alloc0 (node_class->struct_size + extra_size, 1, 16);
self->node_class = node_class;

View File

@ -30,6 +30,7 @@ gsk_public_sources = files([
])
gsk_private_sources = files([
'gskalloc.c',
'gskcairoblur.c',
'gskcairorenderer.c',
'gskdebug.c',

View File

@ -31,9 +31,138 @@
#include "gsk/gskrendernodeprivate.h"
#include "gsk/gskallocprivate.h"
#include "gtk/gskpango.h"
static void gtk_snapshot_state_clear (GtkSnapshotState *state);
/* Returns the smallest power of 2 greater than n, or n if
* such power does not fit in a guint
*/
static guint
g_nearest_pow (gint num)
{
guint n = 1;
while (n < num && n > 0)
n <<= 1;
return n ? n : num;
}
typedef struct _GtkRealSnapshotStateArray GtkRealSnapshotStateArray;
struct _GtkRealSnapshotStateArray
{
GtkSnapshotState *data;
guint len;
guint alloc;
gint ref_count;
};
static GtkSnapshotStateArray*
gtk_snapshot_state_array_new (void)
{
GtkRealSnapshotStateArray *array;
g_return_val_if_fail (sizeof (GtkSnapshotState) % 16 == 0, NULL);
array = g_slice_new (GtkRealSnapshotStateArray);
array->data = NULL;
array->len = 0;
array->alloc = 0;
array->ref_count = 1;
return (GtkSnapshotStateArray *) array;
}
static GtkSnapshotState *
gtk_snapshot_state_array_free (GtkSnapshotStateArray *farray)
{
GtkRealSnapshotStateArray *array = (GtkRealSnapshotStateArray*) farray;
guint i;
g_return_val_if_fail (array, NULL);
for (i = 0; i < array->len; i++)
gtk_snapshot_state_clear (&array->data[i]);
gsk_aligned_free (array->data);
g_slice_free1 (sizeof (GtkRealSnapshotStateArray), array);
return NULL;
}
#define MIN_ARRAY_SIZE 16
static void
gtk_snapshot_state_array_maybe_expand (GtkRealSnapshotStateArray *array,
gint len)
{
guint want_alloc = sizeof (GtkSnapshotState) * (array->len + len);
GtkSnapshotState *new_data;
if (want_alloc <= array->alloc)
return;
want_alloc = g_nearest_pow (want_alloc);
want_alloc = MAX (want_alloc, MIN_ARRAY_SIZE);
new_data = gsk_aligned_alloc0 (want_alloc, 1, 16);
memcpy (new_data, array->data, sizeof (GtkSnapshotState) * array->len);
gsk_aligned_free (array->data);
array->data = new_data;
array->alloc = want_alloc;
}
static GtkSnapshotStateArray*
gtk_snapshot_state_array_remove_index (GtkSnapshotStateArray *farray,
guint index_)
{
GtkRealSnapshotStateArray *array = (GtkRealSnapshotStateArray*) farray;
g_return_val_if_fail (array, NULL);
g_return_val_if_fail (index_ < array->len, NULL);
gtk_snapshot_state_clear (&array->data[index_]);
memmove (&array->data[index_],
&array->data[index_ + 1],
(array->len - index_ - 1) * sizeof (GtkSnapshotState));
array->len -= 1;
return farray;
}
#define gtk_snapshot_state_array_append_val(a,v) gtk_snapshot_state_array_append_vals (a, &(v), 1)
static GtkSnapshotStateArray*
gtk_snapshot_state_array_append_vals (GtkSnapshotStateArray *farray,
gconstpointer data,
guint len)
{
GtkRealSnapshotStateArray *array = (GtkRealSnapshotStateArray*) farray;
g_return_val_if_fail (array, NULL);
if (len == 0)
return farray;
gtk_snapshot_state_array_maybe_expand (array, len);
memcpy (&array->data[array->len], data,
sizeof (GtkSnapshotState) * len);
array->len += len;
return farray;
}
/**
* SECTION:gtksnapshot
* @Short_description: Auxiliary object for snapshots
@ -120,9 +249,9 @@ gtk_snapshot_push_state (GtkSnapshot *snapshot,
state.start_node_index = snapshot->nodes->len;
state.n_nodes = 0;
g_array_append_val (snapshot->state_stack, state);
gtk_snapshot_state_array_append_val (snapshot->state_stack, state);
return &g_array_index (snapshot->state_stack, GtkSnapshotState, snapshot->state_stack->len - 1);
return &snapshot->state_stack->data[snapshot->state_stack->len - 1];
}
static GtkSnapshotState *
@ -130,7 +259,7 @@ gtk_snapshot_get_current_state (const GtkSnapshot *snapshot)
{
g_assert (snapshot->state_stack->len > 0);
return &g_array_index (snapshot->state_stack, GtkSnapshotState, snapshot->state_stack->len - 1);
return &snapshot->state_stack->data[snapshot->state_stack->len - 1];
}
static GtkSnapshotState *
@ -138,7 +267,7 @@ gtk_snapshot_get_previous_state (const GtkSnapshot *snapshot)
{
g_assert (snapshot->state_stack->len > 1);
return &g_array_index (snapshot->state_stack, GtkSnapshotState, snapshot->state_stack->len - 2);
return &snapshot->state_stack->data[snapshot->state_stack->len - 2];
}
static void
@ -160,8 +289,7 @@ gtk_snapshot_new (void)
snapshot = g_object_new (GTK_TYPE_SNAPSHOT, NULL);
snapshot->state_stack = g_array_new (FALSE, TRUE, sizeof (GtkSnapshotState));
g_array_set_clear_func (snapshot->state_stack, (GDestroyNotify)gtk_snapshot_state_clear);
snapshot->state_stack = gtk_snapshot_state_array_new ();
snapshot->nodes = g_ptr_array_new_with_free_func ((GDestroyNotify)gsk_render_node_unref);
gtk_snapshot_push_state (snapshot,
@ -925,7 +1053,7 @@ gtk_snapshot_pop_internal (GtkSnapshot *snapshot)
snapshot->nodes->len - state->n_nodes,
state->n_nodes);
g_array_remove_index (snapshot->state_stack, state_index);
gtk_snapshot_state_array_remove_index (snapshot->state_stack, state_index);
return node;
}
@ -955,7 +1083,7 @@ gtk_snapshot_to_node (GtkSnapshot *snapshot)
result = gtk_snapshot_pop_internal (snapshot);
g_array_free (snapshot->state_stack, TRUE);
gtk_snapshot_state_array_free (snapshot->state_stack);
snapshot->state_stack = NULL;
g_ptr_array_free (snapshot->nodes, TRUE);

View File

@ -23,6 +23,17 @@
G_BEGIN_DECLS
typedef struct _GtkSnapshotState GtkSnapshotState;
typedef struct _GtkSnapshotStateArray GtkSnapshotStateArray;
/* This is a stripped-down copy of GArray tailored specifically
* for GtkSnapshotState and guaranteed to be aligned to 16 byte
* bounaries.
*/
struct _GtkSnapshotStateArray
{
GtkSnapshotState *data;
guint len;
};
typedef GskRenderNode * (* GtkSnapshotCollectFunc) (GtkSnapshot *snapshot,
GtkSnapshotState *state,
@ -88,7 +99,7 @@ struct _GtkSnapshotState {
struct _GdkSnapshot {
GObject parent_instance; /* it's really GdkSnapshot, but don't tell anyone! */
GArray *state_stack;
GtkSnapshotStateArray *state_stack;
GPtrArray *nodes;
};

View File

@ -640,6 +640,132 @@ if cloudproviders_enabled
endif
endif
graphene_has_sse2 = graphene_dep.get_pkgconfig_variable('graphene_has_sse2') == '1'
graphene_has_gcc = graphene_dep.get_pkgconfig_variable('graphene_has_gcc') == '1'
malloc_is_aligned = false
if not meson.is_cross_build() or meson.has_exe_wrapper()
malloc_test = cc.run ('''
#include <malloc.h>
#include <stdio.h>
#define COUNT 100
#define is_aligned(POINTER, BYTE_COUNT) \
(((uintptr_t)(const void *)(POINTER)) % (BYTE_COUNT) == 0)
int
main (int argc, char *argv[])
{
void **pointers;
int i, a, min_a;
FILE *f;
int wrote;
pointers = malloc (sizeof (void *) * COUNT);
for (i = 0, min_a = 128; i < COUNT; i++, pointers++)
{
*pointers = malloc (sizeof (void *));
for (a = 1; a <= 128; a = a * 2)
{
if (!is_aligned (*pointers, a))
{
a = a / 2;
break;
}
}
if (a > 128)
a = 128;
if (a < min_a)
min_a = a;
}
wrote = fprintf (stderr, "%d", min_a);
if (wrote <= 0)
return 1;
return 0;
}
''')
if not malloc_test.compiled() or malloc_test.returncode() != 0
message ('malloc() alignment test failed, assuming unaligned malloc()')
elif malloc_test.stderr().to_int() >= 16
malloc_is_aligned = true
cdata.set('MALLOC_IS_ALIGNED16', 1)
endif
else
# TODO: more reasinable cross-compiling defaults?
message ('cross-compiling, assuming unaligned malloc()')
endif
if cc.get_id() == 'gcc' or cc.get_id() == 'clang'
no_builtin_args = cc.get_supported_arguments(['-fno-builtin'])
else
no_builtin_args = []
endif
# Check that posix_memalign() is not a builtin
have_posix_memalign = cc.links('''#include <stdlib.h>
int main (int argc, char ** argv) {
void *p;
return posix_memalign (&p, 16, argc);
}
''',
args : no_builtin_args,
name : 'posix_memalign() is not a builtin')
if have_posix_memalign
cdata.set('HAVE_POSIX_MEMALIGN', 1)
endif
# Check that memalign() is not a builtin
have_memalign = cc.links('''#include <malloc.h>
int main (int argc, char ** argv) {
return memalign (16, argc);
}
''',
args : no_builtin_args,
name : 'memalign() is not a builtin')
if have_memalign
cdata.set('HAVE_MEMALIGN', 1)
endif
# Check that aligned_alloc() is not a builtin
have_aligned_alloc = cc.links('''#include <stdlib.h>
int main (int argc, char ** argv) {
return aligned_alloc (16, argc);
}
''',
args : no_builtin_args,
name : 'aligned_alloc() is not a builtin')
if have_aligned_alloc
cdata.set('HAVE_ALIGNED_ALLOC', 1)
endif
# Check that _aligned_malloc() is not a builtin
have__aligned_malloc = cc.links('''#include <malloc.h>
int main (int argc, char ** argv) {
return _aligned_malloc (argc, 16);
}
''',
args : no_builtin_args,
name : '_aligned_malloc() is not a builtin')
if have__aligned_malloc
cdata.set('HAVE__ALIGNED_MALLOC', 1)
endif
if graphene_has_sse2 or graphene_has_gcc
message('Need aligned memory due to the use of SSE2 or GCC vector instructions')
if os_win32 and cc.get_id() == 'gcc'
add_global_arguments(['-mstackrealign'], language: 'c')
endif
if (not malloc_is_aligned and
not have_posix_memalign and
not have_memalign and
not have_aligned_alloc and
not have__aligned_malloc)
error('Failed to find any means of allocating aligned memory')
endif
endif
subdir('gdk')
subdir('gsk')