diff --git a/demos/gtk-demo/listview_colors.c b/demos/gtk-demo/listview_colors.c index b63e14498e..5e59c6d8a0 100644 --- a/demos/gtk-demo/listview_colors.c +++ b/demos/gtk-demo/listview_colors.c @@ -30,7 +30,6 @@ struct _GtkColor char *name; GdkRGBA color; int h, s, v; - gboolean selected; }; enum { @@ -43,7 +42,6 @@ enum { PROP_HUE, PROP_SATURATION, PROP_VALUE, - PROP_SELECTED, N_COLOR_PROPS }; @@ -206,10 +204,6 @@ gtk_color_get_property (GObject *object, g_value_set_int (value, self->v); break; - case PROP_SELECTED: - g_value_set_boolean (value, self->selected); - break; - default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; @@ -239,10 +233,6 @@ gtk_color_set_property (GObject *object, self->v = round (100 * v); break; - case PROP_SELECTED: - self->selected = g_value_get_boolean (value); - break; - default: G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec); break; @@ -284,8 +274,6 @@ gtk_color_class_init (GtkColorClass *klass) g_param_spec_int ("saturation", NULL, NULL, 0, 100, 0, G_PARAM_READABLE); color_properties[PROP_VALUE] = g_param_spec_int ("value", NULL, NULL, 0, 100, 0, G_PARAM_READABLE); - color_properties[PROP_SELECTED] = - g_param_spec_boolean ("selected", NULL, NULL, FALSE, G_PARAM_READWRITE); g_object_class_install_properties (gobject_class, N_COLOR_PROPS, color_properties); } @@ -673,7 +661,7 @@ create_color_grid (void) model = G_LIST_MODEL (gtk_sort_list_model_new (gtk_color_list_new (0), NULL)); - selection = G_LIST_MODEL (gtk_property_selection_new (model, "selected")); + selection = G_LIST_MODEL (gtk_multi_selection_new (model)); gtk_grid_view_set_model (GTK_GRID_VIEW (gridview), selection); g_object_unref (selection); g_object_unref (model); diff --git a/docs/reference/gtk/gtk4-docs.xml b/docs/reference/gtk/gtk4-docs.xml index 4c57d0c60a..f6a46015cc 100644 --- a/docs/reference/gtk/gtk4-docs.xml +++ b/docs/reference/gtk/gtk4-docs.xml @@ -48,6 +48,7 @@ GListModel support +
@@ -72,7 +73,6 @@ -
diff --git a/docs/reference/gtk/gtk4-sections.txt b/docs/reference/gtk/gtk4-sections.txt index 39e04bfb81..ec497c66ea 100644 --- a/docs/reference/gtk/gtk4-sections.txt +++ b/docs/reference/gtk/gtk4-sections.txt @@ -339,21 +339,65 @@ gtk_list_box_get_type gtk_list_box_row_get_type +
+gtkbitset +GtkBitset +GtkBitset +gtk_bitset_ref +gtk_bitset_unref +gtk_bitset_new_empty +gtk_bitset_copy + +gtk_bitset_contains +gtk_bitset_is_empty +gtk_bitset_equals +gtk_bitset_get_minimum +gtk_bitset_get_maximum + +gtk_bitset_remove_all +gtk_bitset_add +gtk_bitset_remove +gtk_bitset_add_range +gtk_bitset_remove_range +gtk_bitset_add_range_closed +gtk_bitset_remove_range_closed +gtk_bitset_add_rectangle +gtk_bitset_remove_rectangle +gtk_bitset_union +gtk_bitset_intersect +gtk_bitset_subtract +gtk_bitset_difference +gtk_bitset_shift_left +gtk_bitset_shift_right +gtk_bitset_slice + +GtkBitsetIter +gtk_bitset_iter_init_first +gtk_bitset_iter_init_last +gtk_bitset_iter_init_at +gtk_bitset_iter_next +gtk_bitset_iter_previous +gtk_bitset_iter_get_value +gtk_bitset_iter_is_valid + +GTK_TYPE_BITSET +gtk_bitset_get_type +
+
gtkselectionmodel GtkSelectionModel GtkSelectionModel gtk_selection_model_is_selected +gtk_selection_model_get_selection +gtk_selection_model_get_selection_in_range gtk_selection_model_select_item gtk_selection_model_unselect_item gtk_selection_model_select_range gtk_selection_model_unselect_range gtk_selection_model_select_all gtk_selection_model_unselect_all -GtkSelectionCallback -gtk_selection_model_select_callback -gtk_selection_model_unselect_callback -gtk_selection_model_query_range +gtk_selection_model_set_selection gtk_selection_model_selection_changed @@ -404,17 +448,6 @@ gtk_multi_selection_new gtk_multi_selection_get_type
-
-gtkpropertyselection -GtkPropertySelection -GtkPropertySelection -gtk_property_selection_new -gtk_property_selection_get_model -gtk_property_selection_get_property - -gtk_property_selection_get_type -
-
gtklistitem GtkListItem diff --git a/docs/reference/gtk/gtk4.types.in b/docs/reference/gtk/gtk4.types.in index ac13151dca..d61305ae2e 100644 --- a/docs/reference/gtk/gtk4.types.in +++ b/docs/reference/gtk/gtk4.types.in @@ -18,6 +18,7 @@ gtk_aspect_frame_get_type gtk_assistant_get_type gtk_assistant_page_get_type gtk_bin_layout_get_type +gtk_bitset_get_type gtk_bookmark_list_get_type gtk_box_get_type gtk_box_layout_get_type @@ -171,7 +172,6 @@ gtk_print_operation_preview_get_type gtk_print_settings_get_type @DISABLE_ON_W32@gtk_print_unix_dialog_get_type gtk_progress_bar_get_type -gtk_property_selection_get_type gtk_radio_button_get_type gtk_range_get_type gtk_recent_manager_get_type diff --git a/docs/reference/gtk/meson.build b/docs/reference/gtk/meson.build index e205261b08..af14959544 100644 --- a/docs/reference/gtk/meson.build +++ b/docs/reference/gtk/meson.build @@ -176,7 +176,6 @@ private_headers = [ 'gtkroundedboxprivate.h', 'gtkscalerprivate.h', 'gtksearchentryprivate.h', - 'gtkset.h', 'gtksettingsprivate.h', 'gtkshortcutcontrollerprivate.h', 'gtkshortcutsshortcutprivate.h', @@ -216,6 +215,7 @@ private_headers = [ 'gtkwin32themeprivate.h', 'gtkwindowprivate.h', 'gtk-text-input-client-protocol.h', + 'roaring.h', ] images = [ diff --git a/gtk/gtk.h b/gtk/gtk.h index 0ab53bfd33..f4c105e6e5 100644 --- a/gtk/gtk.h +++ b/gtk/gtk.h @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -200,7 +201,6 @@ #include #include #include -#include #include #include #include diff --git a/gtk/gtkbitset.c b/gtk/gtkbitset.c new file mode 100644 index 0000000000..b9bde5b90b --- /dev/null +++ b/gtk/gtkbitset.c @@ -0,0 +1,865 @@ +/* + * Copyright © 2020 Benjamin Otte + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Authors: Benjamin Otte + */ + +#include "config.h" + +#include "gtkbitset.h" + +#include "roaring.c" + +/** + * SECTION:gtkbitset + * @Short_description: Sets of integers + * @Title: GtkBitset + * @See_also: GtkSelectionModel + * + * #GtkBitset is a data structure for representing a set of unsigned integers. + * Another name for this data structure is "bitmap". + * + * This version is based on [roaring bitmaps](https://roaringbitmap.org/). + * + * A bitset allows adding a set of integers and provides support for set operations + * like unions, intersections and checks for equality or if a value is contained + * in the set. #GtkBitset also contains various functions to query metadata about + * the bitset, such as the minimum or maximum values or its size. + * + * The fastest way to iterate values in a bitset is #GtkBitsetIter which allows + * quick iteration of all the values in a bitset. + * + * The main use case #GtkBitset is implementing complex selections for #GtkSelectionModel. + */ + +/** + * GtkBitset: (ref-func gtk_bitset_ref) (unref-func gtk_bitset_unref) + * + * The `GtkBitset` structure contains only private data. + */ +struct _GtkBitset +{ + int ref_count; + roaring_bitmap_t roaring; +}; + + +G_DEFINE_BOXED_TYPE (GtkBitset, gtk_bitset, + gtk_bitset_ref, + gtk_bitset_unref) + +/** + * gtk_bitset_ref: + * @self: (allow-none): a #GtkBitset + * + * Acquires a reference on the given #GtkBitset. + * + * Returns: (transfer none): the #GtkBitset with an additional reference + */ +GtkBitset * +gtk_bitset_ref (GtkBitset *self) +{ + g_return_val_if_fail (self != NULL, NULL); + + self->ref_count += 1; + + return self; +} + +/** + * gtk_bitset_unref: + * @self: (allow-none): a #GtkBitset + * + * Releases a reference on the given #GtkBitset. + * + * If the reference was the last, the resources associated to the @self are + * freed. + */ +void +gtk_bitset_unref (GtkBitset *self) +{ + g_return_if_fail (self != NULL); + g_return_if_fail (self->ref_count > 0); + + self->ref_count -= 1; + if (self->ref_count > 0) + return; + + ra_clear (&self->roaring.high_low_container); + g_slice_free (GtkBitset, self); +} + +/** + * gtk_bitset_contains: + * @self: a #GtkBitset + * @value: the value to check + * + * Checks if the given @value has been added to @bitset + * + * Returns: %TRUE if @self contains @value + **/ +gboolean +gtk_bitset_contains (const GtkBitset *self, + guint value) +{ + g_return_val_if_fail (self != NULL, FALSE); + + return roaring_bitmap_contains (&self->roaring, value); +} + +/** + * gtk_bitset_is_empty: + * @self: a #GtkBitset + * + * Check if no value is contained in bitset. + * + * Returns: %TRUE if @self is empty + **/ +gboolean +gtk_bitset_is_empty (const GtkBitset *self) +{ + g_return_val_if_fail (self != NULL, TRUE); + + return roaring_bitmap_is_empty (&self->roaring); +} + +/** + * gtk_bitset_equals: + * @self: a #GtkBitset + * @other: another #GtkBitset + * + * Returns %TRUE if @self and @other contain the same values. + * + * Returns: %TRUE if @self and @other contain the same values + **/ +gboolean +gtk_bitset_equals (const GtkBitset *self, + const GtkBitset *other) +{ + g_return_val_if_fail (self != NULL, other == NULL); + g_return_val_if_fail (other != NULL, FALSE); + + if (self == other) + return TRUE; + + return roaring_bitmap_equals (&self->roaring, &other->roaring); +} + +/** + * gtk_bitset_get_minimum: + * @self: a #GtkBitset + * + * Returns the smallest value in @self. If @self is empty, + * G_MAXUINT is returned. + * + * Returns: The smallest value in @self + **/ +guint +gtk_bitset_get_minimum (const GtkBitset *self) +{ + g_return_val_if_fail (self != NULL, G_MAXUINT); + + return roaring_bitmap_minimum (&self->roaring); +} + +/** + * gtk_bitset_get_maximum: + * @self: a #GtkBitset + * + * Returns the largest value in @self. If @self is empty, + * 0 is returned. + * + * Returns: The largest value in @self + **/ +guint +gtk_bitset_get_maximum (const GtkBitset *self) +{ + g_return_val_if_fail (self != NULL, 0); + + return roaring_bitmap_maximum (&self->roaring); +} + +/** + * gtk_bitset_new_empty: + * + * Creates a new empty bitset. + * + * Returns: A new empty bitset. + **/ +GtkBitset * +gtk_bitset_new_empty (void) +{ + GtkBitset *self; + + self = g_slice_new0 (GtkBitset); + + self->ref_count = 1; + + ra_init (&self->roaring.high_low_container); + + return self; +} + +/** + * gtk_bitset_copy: + * @self: a #GtkBitset + * + * Creates a copy of @self. + * + * Returns: (transfer full) A new bitset that contains the same + * values as @self + **/ +GtkBitset * +gtk_bitset_copy (const GtkBitset *self) +{ + GtkBitset *copy; + + g_return_val_if_fail (self != NULL, NULL); + + copy = gtk_bitset_new_empty (); + roaring_bitmap_overwrite (©->roaring, &self->roaring); + + return copy; +} + +/** + * gtk_bitset_remove_all: + * @self: a #GtkBitset + * + * Removes all values from the bitset so that it is empty again. + **/ +void +gtk_bitset_remove_all (GtkBitset *self) +{ + g_return_if_fail (self != NULL); + + roaring_bitmap_clear (&self->roaring); +} + +/** + * gtk_bitset_add: + * @self: a #GtkBitset + * @value: value to add + * + * Adds @value to @self if it wasn't part of it before. + * + * Returns: %TRUE if @value was not part of @self and @self + * was changed. + **/ +gboolean +gtk_bitset_add (GtkBitset *self, + guint value) +{ + g_return_val_if_fail (self != NULL, FALSE); + + return roaring_bitmap_add_checked (&self->roaring, value); +} + +/** + * gtk_bitset_remove: + * @self: a #GtkBitset + * @value: value to add + * + * Adds @value to @self if it wasn't part of it before. + * + * Returns: %TRUE if @value was part of @self and @self + * was changed. + **/ +gboolean +gtk_bitset_remove (GtkBitset *self, + guint value) +{ + g_return_val_if_fail (self != NULL, FALSE); + + return roaring_bitmap_remove_checked (&self->roaring, value); +} + +/** + * gtk_bitset_add_range: + * @self: a #GtkBitset + * @start: first value to add + * @n_items: number of consecutive values to add + * + * Adds all values from @start (inclusive) to @start + @n_items (exclusive) + * in @self. + **/ +void +gtk_bitset_add_range (GtkBitset *self, + guint start, + guint n_items) +{ + g_return_if_fail (self != NULL); + + if (n_items == 0) + return; + + /* overflow check, the == 0 is to allow add_range(G_MAXUINT, 1); */ + g_return_if_fail (start + n_items == 0 || start + n_items > start); + + roaring_bitmap_add_range_closed (&self->roaring, start, start + n_items - 1); +} + +/** + * gtk_bitset_remove_range: + * @self: a #GtkBitset + * @start: first value to remove + * @n_items: number of consecutive values to remove + * + * Removes all values from @start (inclusive) to @start + @n_items (exclusive) + * in @self. + **/ +void +gtk_bitset_remove_range (GtkBitset *self, + guint start, + guint n_items) +{ + g_return_if_fail (self != NULL); + + if (n_items == 0) + return; + + /* overflow check, the == 0 is to allow add_range(G_MAXUINT, 1); */ + g_return_if_fail (start + n_items == 0 || start + n_items > start); + + roaring_bitmap_remove_range_closed (&self->roaring, start, start + n_items - 1); +} + +/** + * gtk_bitset_add_range_closed: + * @self: a #GtkBitset + * @first: first value to add + * @last: last value to add + * + * Adds the closed range [@first, @last], so @first, @last and all values inbetween. + * @first must be smaller than @last. + **/ +void +gtk_bitset_add_range_closed (GtkBitset *self, + guint first, + guint last) +{ + g_return_if_fail (self != NULL); + g_return_if_fail (first <= last); + + roaring_bitmap_add_range_closed (&self->roaring, first, last); +} + +/** + * gtk_bitset_remove_range_closed: + * @self: a #GtkBitset + * @first: first value to remove + * @last: last value to remove + * + * Removes the closed range [@first, @last], so @first, @last and all values inbetween. + * @first must be smaller than @last. + **/ +void +gtk_bitset_remove_range_closed (GtkBitset *self, + guint first, + guint last) +{ + g_return_if_fail (self != NULL); + g_return_if_fail (first <= last); + + roaring_bitmap_remove_range_closed (&self->roaring, first, last); +} + +/** + * gtk_bitset_add_rectangle: + * @self: a #GtkBitset + * @start: first value to add + * @width: width of the rectangle + * @height: height of the rectangle + * @stride: rowstride of the rectangle + * + * Interprets the values as a 2-dimensional boolean grid with the given @stride + * and inside that grid, adds a rectangle with the given @width and @height. + **/ +void +gtk_bitset_add_rectangle (GtkBitset *self, + guint start, + guint width, + guint height, + guint stride) +{ + guint i; + + g_return_if_fail (self != NULL); + g_return_if_fail (width <= stride); + g_return_if_fail (G_MAXUINT - start >= height * stride); + + if (width == 0 || height == 0) + return; + + for (i = 0; i < height; i++) + gtk_bitset_add_range (self, i * stride + start, width); +} + +/** + * gtk_bitset_remove_rectangle: + * @self: a #GtkBitset + * @start: first value to remove + * @width: width of the rectangle + * @height: height of the rectangle + * @stride: rowstride of the rectangle + * + * Interprets the values as a 2-dimensional boolean grid with the given @stride + * and inside that grid, removes a rectangle with the given @width and @height. + **/ +void +gtk_bitset_remove_rectangle (GtkBitset *self, + guint start, + guint width, + guint height, + guint stride) +{ + guint i; + + g_return_if_fail (self != NULL); + g_return_if_fail (width <= stride); + g_return_if_fail (G_MAXUINT - start >= height * stride); + + if (width == 0 || height == 0) + return; + + for (i = 0; i < height; i++) + gtk_bitset_remove_range (self, i * stride + start, width); +} + +/** + * gtk_bitset_union: + * @self: a #GtkBitset + * @other: the #GtkBitset to union with + * + * Sets @self to be the union of @self and @other, that is add all values + * from @other into @self that weren't part of it. + * + * It is allowed for @self and @other to be the same bitset. Nothing will + * happen in that case. + **/ +void +gtk_bitset_union (GtkBitset *self, + const GtkBitset *other) +{ + g_return_if_fail (self != NULL); + g_return_if_fail (other != NULL); + + if (self == other) + return; + + roaring_bitmap_or_inplace (&self->roaring, &other->roaring); +} + +/** + * gtk_bitset_intersect: + * @self: a #GtkBitset + * @other: the #GtkBitset to intersect with + * + * Sets @self to be the intersection of @self and @other, that is remove all values + * from @self that are not part of @other. + * + * It is allowed for @self and @other to be the same bitset. Nothing will + * happen in that case. + **/ +void +gtk_bitset_intersect (GtkBitset *self, + const GtkBitset *other) +{ + g_return_if_fail (self != NULL); + g_return_if_fail (other != NULL); + + if (self == other) + return; + + roaring_bitmap_and_inplace (&self->roaring, &other->roaring); +} + +/** + * gtk_bitset_subtract: + * @self: a #GtkBitset + * @other: the #GtkBitset to subtract + * + * Sets @self to be the subtraction of @other from @self, that is remove + * all values from @self that are part of @other. + * + * It is allowed for @self and @other to be the same bitset. The bitset + * will be emptied in that case. + **/ +void +gtk_bitset_subtract (GtkBitset *self, + const GtkBitset *other) +{ + g_return_if_fail (self != NULL); + g_return_if_fail (other != NULL); + + if (self == other) + { + roaring_bitmap_clear (&self->roaring); + return; + } + + roaring_bitmap_andnot_inplace (&self->roaring, &other->roaring); +} + +/** + * gtk_bitset_difference: + * @self: a #GtkBitset + * @other: the #GtkBitset to compute the difference from + * + * Sets @self to be the symmetric difference of @self and @other, that + * is set @self to contain all values that were either contained in @self + * or in @other, but not in both. + * This operation is also called an XOR. + * + * It is allowed for @self and @other to be the same bitset. The bitset + * will be emptied in that case. + **/ +void +gtk_bitset_difference (GtkBitset *self, + const GtkBitset *other) +{ + g_return_if_fail (self != NULL); + g_return_if_fail (other != NULL); + + if (self == other) + { + roaring_bitmap_clear (&self->roaring); + return; + } + + roaring_bitmap_xor_inplace (&self->roaring, &other->roaring); +} + +/** + * gtk_bitset_shift_left: + * @self: a $GtkBitset + * @amount: amount to shift all values to the left + * + * Shifts all values in @self to the left by @amount. Values + * smaller than @amount are discarded. + **/ +void +gtk_bitset_shift_left (GtkBitset *self, + guint amount) +{ + GtkBitset *original; + GtkBitsetIter iter; + guint value; + gboolean loop; + + g_return_if_fail (self != NULL); + + if (amount == 0) + return; + + original = gtk_bitset_copy (self); + gtk_bitset_remove_all (self); + + for (loop = gtk_bitset_iter_init_at (&iter, original, amount, &value); + loop; + loop = gtk_bitset_iter_next (&iter, &value)) + { + gtk_bitset_add (self, value - amount); + } + + gtk_bitset_unref (original); +} + +/** + * gtk_bitset_shift_left: + * @self: a $GtkBitset + * @amount: amount to shift all values to the right + * + * Shifts all values in @self to the right by @amount. Values + * that end up too large to be held in a #guint are discarded. + **/ +void +gtk_bitset_shift_right (GtkBitset *self, + guint amount) +{ + GtkBitset *original; + GtkBitsetIter iter; + guint value; + gboolean loop; + + g_return_if_fail (self != NULL); + + if (amount == 0) + return; + + original = gtk_bitset_copy (self); + gtk_bitset_remove_all (self); + + for (loop = gtk_bitset_iter_init_at (&iter, original, amount, &value); + loop && value >= G_MAXUINT - amount; + loop = gtk_bitset_iter_next (&iter, &value)) + { + gtk_bitset_add (self, value + amount); + } + + gtk_bitset_unref (original); +} + +/** + * gtk_bitset_slice: + * @self: a #GtkBitset + * @position: position at which to slice + * @removed: number of values to remove + * @added: number of values to add + * + * This is a support function for #GListModel handling, by mirroring + * the #GlistModel::items-changed signal. + * + * First, it "cuts" the values from @position to @removed from + * the bitset. That is, it removes all those values and shifts + * all larger values to the left by @removed places. + * + * Then, it "pastes" new room into the bitset by shifting all values + * larger than @position by @added spaces to the right. This frees + * up space that can then be filled using + **/ +void +gtk_bitset_slice (GtkBitset *self, + guint position, + guint removed, + guint added) +{ + g_return_if_fail (self != NULL); + /* overflow */ + g_return_if_fail (position + removed >= position); + g_return_if_fail (position + added >= position); + + gtk_bitset_remove_range (self, position, removed); + + if (removed != added) + { + GtkBitset *shift = gtk_bitset_copy (self); + + gtk_bitset_remove_range (shift, 0, position); + gtk_bitset_remove_range (self, position, G_MAXUINT - position + 1); + if (added > removed) + gtk_bitset_shift_right (shift, added - removed); + else + gtk_bitset_shift_left (shift, removed - added); + gtk_bitset_union (self, shift); + gtk_bitset_unref (shift); + } +} + +G_STATIC_ASSERT (sizeof (GtkBitsetIter) >= sizeof (roaring_uint32_iterator_t)); + +/** + * gtk_bitset_iter_init_first: + * @iter: (out): a pointer to a preallocated #GtkBitsetIter + * @set: a #GtkBitset + * @value: (out) (optional): Set to the first value in @set + * + * Initializes an iterator for @set and points it to the first + * value in @set. If @set is empty, %FALSE is returned and @value is set to + * %G_MAXUINT. + * + * Returns: %TRUE if a @set isn't empty. + **/ +gboolean +gtk_bitset_iter_init_first (GtkBitsetIter *iter, + const GtkBitset *set, + guint *value) +{ + roaring_uint32_iterator_t *riter = (roaring_uint32_iterator_t *) iter; + + g_return_val_if_fail (iter != NULL, FALSE); + g_return_val_if_fail (set != NULL, FALSE); + + roaring_init_iterator (&set->roaring, riter); + + if (value) + *value = riter->has_value ? riter->current_value : 0; + + return riter->has_value; +} + +/** + * gtk_bitset_iter_init_last: + * @iter: (out): a pointer to a preallocated #GtkBitsetIter + * @set: a #GtkBitset + * @value: (out) (optional): Set to the last value in @set + * + * Initializes an iterator for @set and points it to the last + * value in @set. If @set is empty, %FALSE is returned. + * + * Returns: %TRUE if a @set isn't empty. + **/ +gboolean +gtk_bitset_iter_init_last (GtkBitsetIter *iter, + const GtkBitset *set, + guint *value) +{ + roaring_uint32_iterator_t *riter = (roaring_uint32_iterator_t *) iter; + + g_return_val_if_fail (iter != NULL, FALSE); + g_return_val_if_fail (set != NULL, FALSE); + + roaring_init_iterator_last (&set->roaring, riter); + + if (value) + *value = riter->has_value ? riter->current_value : 0; + + return riter->has_value; +} + +/** + * gtk_bitset_iter_init_at: + * @iter: a #GtkBitsetIter + * @target: target value to start iterating at + * @value: (out) (optional): Set to the found value in @set + * + * Initializes @iter to point to @target. If @target is not found, finds + * the next value after it. If no value >= @target exists in @set, this + * function returns %FALSE. + * + * Returns: %TRUE if a value was found. + **/ +gboolean +gtk_bitset_iter_init_at (GtkBitsetIter *iter, + const GtkBitset *set, + guint target, + guint *value) +{ + roaring_uint32_iterator_t *riter = (roaring_uint32_iterator_t *) iter; + + g_return_val_if_fail (iter != NULL, FALSE); + g_return_val_if_fail (set != NULL, FALSE); + + roaring_init_iterator (&set->roaring, riter); + if (!roaring_move_uint32_iterator_equalorlarger (riter, target)) + { + if (value) + *value = 0; + return FALSE; + } + + if (value) + *value = riter->current_value; + + return TRUE; +} + +/** + * gtk_bitset_iter_next: + * @iter: (out): a pointer to a valid #GtkBitsetIter + * @value: (out) (optional): Set to the next value + * + * Moves @iter to the next value in the set. If it was already + * pointing to the last value in the set, %FALSE is returned and + * @iter is invalidated. + * + * Returns: %TRUE if a next value existed + **/ +gboolean +gtk_bitset_iter_next (GtkBitsetIter *iter, + guint *value) +{ + roaring_uint32_iterator_t *riter = (roaring_uint32_iterator_t *) iter; + + g_return_val_if_fail (iter != NULL, FALSE); + + if (!roaring_advance_uint32_iterator (riter)) + { + if (value) + *value = 0; + return FALSE; + } + + if (value) + *value = riter->current_value; + + return TRUE; +} + +/** + * gtk_bitset_iter_previous: + * @iter: (out): a pointer to a valid #GtkBitsetIter + * @value: (out) (optional): Set to the previous value + * + * Moves @iter to the previous value in the set. If it was already + * pointing to the first value in the set, %FALSE is returned and + * @iter is invalidated. + * + * Returns: %TRUE if a previous value existed + **/ +gboolean +gtk_bitset_iter_previous (GtkBitsetIter *iter, + guint *value) +{ + roaring_uint32_iterator_t *riter = (roaring_uint32_iterator_t *) iter; + + g_return_val_if_fail (iter != NULL, FALSE); + + if (!roaring_previous_uint32_iterator (riter)) + { + if (value) + *value = 0; + return FALSE; + } + + if (value) + *value = riter->current_value; + + return TRUE; +} + +/** + * gtk_bitset_iter_get_value: + * @iter: a #GtkBitsetIter + * + * Gets the current value that @iter points to. + * + * If @iter is not valid and gtk_bitset_iter_is_valid() returns + * %FALSE, this function returns 0. + * + * Returns: The current value pointer to by @iter + **/ +guint +gtk_bitset_iter_get_value (const GtkBitsetIter *iter) +{ + roaring_uint32_iterator_t *riter = (roaring_uint32_iterator_t *) iter; + + g_return_val_if_fail (iter != NULL, 0); + + if (!riter->has_value) + return 0; + + return riter->current_value; +} + +/** + * gtk_bitset_iter_is_valid: + * @iter: a #GtkBitsetIter + * + * Checks if @iter points to a valid value + * + * Returns: %TRUE if @iter points to a valid value + **/ +gboolean +gtk_bitset_iter_is_valid (const GtkBitsetIter *iter) +{ + roaring_uint32_iterator_t *riter = (roaring_uint32_iterator_t *) iter; + + g_return_val_if_fail (iter != NULL, FALSE); + + return riter->has_value; +} + diff --git a/gtk/gtkbitset.h b/gtk/gtkbitset.h new file mode 100644 index 0000000000..fc26e0d7de --- /dev/null +++ b/gtk/gtkbitset.h @@ -0,0 +1,151 @@ +/* + * Copyright © 2020 Benjamin Otte + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + * + * Authors: Benjamin Otte + */ + + +#ifndef __GTK_BITSET_H__ +#define __GTK_BITSET_H__ + +#if !defined (__GTK_H_INSIDE__) && !defined (GTK_COMPILATION) +#error "Only can be included directly." +#endif + +#include + +G_BEGIN_DECLS + +#define GTK_TYPE_BITSET (gtk_bitset_get_type ()) + +GDK_AVAILABLE_IN_ALL +GType gtk_bitset_get_type (void) G_GNUC_CONST; + +GDK_AVAILABLE_IN_ALL +GtkBitset * gtk_bitset_ref (GtkBitset *self); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_unref (GtkBitset *self); + +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_contains (const GtkBitset *self, + guint value); +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_is_empty (const GtkBitset *self); +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_equals (const GtkBitset *self, + const GtkBitset *other); +GDK_AVAILABLE_IN_ALL +guint gtk_bitset_get_minimum (const GtkBitset *self); +GDK_AVAILABLE_IN_ALL +guint gtk_bitset_get_maximum (const GtkBitset *self); + +GDK_AVAILABLE_IN_ALL +GtkBitset * gtk_bitset_new_empty (void); +GDK_AVAILABLE_IN_ALL +GtkBitset * gtk_bitset_copy (const GtkBitset *self); + +GDK_AVAILABLE_IN_ALL +void gtk_bitset_remove_all (GtkBitset *self); +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_add (GtkBitset *self, + guint value); +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_remove (GtkBitset *self, + guint value); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_add_range (GtkBitset *self, + guint start, + guint n_items); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_remove_range (GtkBitset *self, + guint start, + guint n_items); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_add_range_closed (GtkBitset *self, + guint first, + guint last); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_remove_range_closed (GtkBitset *self, + guint first, + guint last); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_add_rectangle (GtkBitset *self, + guint start, + guint width, + guint height, + guint stride); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_remove_rectangle (GtkBitset *self, + guint start, + guint width, + guint height, + guint stride); + +GDK_AVAILABLE_IN_ALL +void gtk_bitset_union (GtkBitset *self, + const GtkBitset *other); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_intersect (GtkBitset *self, + const GtkBitset *other); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_subtract (GtkBitset *self, + const GtkBitset *other); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_difference (GtkBitset *self, + const GtkBitset *other); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_shift_left (GtkBitset *self, + guint amount); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_shift_right (GtkBitset *self, + guint amount); +GDK_AVAILABLE_IN_ALL +void gtk_bitset_slice (GtkBitset *self, + guint position, + guint removed, + guint added); + +typedef struct {gpointer private_data[10]; } GtkBitsetIter; + +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_iter_init_first (GtkBitsetIter *iter, + const GtkBitset *set, + guint *value); +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_iter_init_last (GtkBitsetIter *iter, + const GtkBitset *set, + guint *value); +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_iter_init_at (GtkBitsetIter *iter, + const GtkBitset *set, + guint target, + guint *value); +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_iter_next (GtkBitsetIter *iter, + guint *value); +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_iter_previous (GtkBitsetIter *iter, + guint *value); +GDK_AVAILABLE_IN_ALL +guint gtk_bitset_iter_get_value (const GtkBitsetIter *iter); +GDK_AVAILABLE_IN_ALL +gboolean gtk_bitset_iter_is_valid (const GtkBitsetIter *iter); + + + +G_END_DECLS + +#endif /* __GTK_BITSET_H__ */ diff --git a/gtk/gtkgridview.c b/gtk/gtkgridview.c index 5b2a381c69..3a1b96098e 100644 --- a/gtk/gtkgridview.c +++ b/gtk/gtkgridview.c @@ -21,6 +21,7 @@ #include "gtkgridview.h" +#include "gtkbitset.h" #include "gtkintl.h" #include "gtklistbaseprivate.h" #include "gtklistitemfactory.h" @@ -452,6 +453,36 @@ gtk_grid_view_get_position_from_allocation (GtkListBase *base, return TRUE; } +static GtkBitset * +gtk_grid_view_get_items_in_rect (GtkListBase *base, + const GdkRectangle *rect) +{ + GtkGridView *self = GTK_GRID_VIEW (base); + guint first_row, last_row, first_column, last_column, n_items; + GtkBitset *result; + + result = gtk_bitset_new_empty (); + + n_items = gtk_list_base_get_n_items (base); + if (n_items == 0) + return result; + + first_column = floor (rect->x / self->column_width); + last_column = floor ((rect->x + rect->width) / self->column_width); + if (!gtk_grid_view_get_cell_at_y (self, rect->y, &first_row, NULL, NULL)) + first_row = rect->y < 0 ? 0 : n_items - 1; + if (!gtk_grid_view_get_cell_at_y (self, rect->y + rect->height, &last_row, NULL, NULL)) + last_row = rect->y < 0 ? 0 : n_items - 1; + + gtk_bitset_add_rectangle (result, + first_row + first_column, + last_column - first_column + 1, + (last_row - first_row) / self->n_columns + 1, + self->n_columns); + + return result; +} + static guint gtk_grid_view_move_focus_along (GtkListBase *base, guint pos, @@ -687,43 +718,6 @@ cell_set_size (Cell *cell, gtk_rb_tree_node_mark_dirty (cell); } -static void -gtk_grid_view_size_allocate_child (GtkGridView *self, - GtkWidget *child, - int x, - int y, - int width, - int height) -{ - GtkAllocation child_allocation; - - if (gtk_list_base_get_orientation (GTK_LIST_BASE (self)) == GTK_ORIENTATION_VERTICAL) - { - child_allocation.x = x; - child_allocation.y = y; - child_allocation.width = width; - child_allocation.height = height; - } - else if (_gtk_widget_get_direction (GTK_WIDGET (self)) == GTK_TEXT_DIR_LTR) - { - child_allocation.x = y; - child_allocation.y = x; - child_allocation.width = height; - child_allocation.height = width; - } - else - { - int mirror_point = gtk_widget_get_width (GTK_WIDGET (self)); - - child_allocation.x = mirror_point - y - height; - child_allocation.y = x; - child_allocation.width = height; - child_allocation.height = width; - } - - gtk_widget_size_allocate (child, &child_allocation, -1); -} - static int gtk_grid_view_compute_total_height (GtkGridView *self) { @@ -753,8 +747,6 @@ gtk_grid_view_size_allocate (GtkWidget *widget, int x, y; guint i; - gtk_list_base_allocate_rubberband (GTK_LIST_BASE (widget)); - orientation = gtk_list_base_get_orientation (GTK_LIST_BASE (self)); scroll_policy = gtk_list_base_get_scroll_policy (GTK_LIST_BASE (self), orientation); opposite_orientation = OPPOSITE_ORIENTATION (orientation); @@ -873,7 +865,7 @@ gtk_grid_view_size_allocate (GtkWidget *widget, { row_height += cell->size; - gtk_grid_view_size_allocate_child (self, + gtk_list_base_size_allocate_child (GTK_LIST_BASE (self), cell->parent.widget, x + ceil (self->column_width * i), y, @@ -914,6 +906,8 @@ gtk_grid_view_size_allocate (GtkWidget *widget, } } } + + gtk_list_base_allocate_rubberband (GTK_LIST_BASE (widget)); } static void @@ -1037,6 +1031,7 @@ gtk_grid_view_class_init (GtkGridViewClass *klass) list_base_class->list_item_augment_func = cell_augment; list_base_class->get_allocation_along = gtk_grid_view_get_allocation_along; list_base_class->get_allocation_across = gtk_grid_view_get_allocation_across; + list_base_class->get_items_in_rect = gtk_grid_view_get_items_in_rect; list_base_class->get_position_from_allocation = gtk_grid_view_get_position_from_allocation; list_base_class->move_focus_along = gtk_grid_view_move_focus_along; list_base_class->move_focus_across = gtk_grid_view_move_focus_across; diff --git a/gtk/gtklistbase.c b/gtk/gtklistbase.c index 9b33c56a53..5a1a38e40a 100644 --- a/gtk/gtklistbase.c +++ b/gtk/gtklistbase.c @@ -22,6 +22,8 @@ #include "gtklistbaseprivate.h" #include "gtkadjustment.h" +#include "gtkbitset.h" +#include "gtkdragsource.h" #include "gtkdropcontrollermotion.h" #include "gtkgesturedrag.h" #include "gtkgizmoprivate.h" @@ -30,7 +32,6 @@ #include "gtkmultiselection.h" #include "gtkorientable.h" #include "gtkscrollable.h" -#include "gtkset.h" #include "gtksingleselection.h" #include "gtksnapshot.h" #include "gtkstylecontextprivate.h" @@ -41,24 +42,15 @@ typedef struct _RubberbandData RubberbandData; struct _RubberbandData { - GtkWidget *widget; - GtkSet *active; - double x1, y1; - double x2, y2; - gboolean modify; - gboolean extend; + GtkWidget *widget; /* The rubberband widget */ + + GtkListItemTracker *start_tracker; /* The item we started dragging on */ + double start_align_across; /* alignment in horizontal direction */ + double start_align_along; /* alignment in vertical direction */ + + double pointer_x, pointer_y; /* mouse coordinates in widget space */ }; -static void -rubberband_data_free (gpointer data) -{ - RubberbandData *rdata = data; - - g_clear_pointer (&rdata->widget, gtk_widget_unparent); - g_clear_pointer (&rdata->active, gtk_set_free); - g_free (rdata); -} - typedef struct _GtkListBasePrivate GtkListBasePrivate; struct _GtkListBasePrivate @@ -1241,8 +1233,6 @@ gtk_list_base_class_init (GtkListBaseClass *klass) gtk_widget_class_add_binding_action (widget_class, GDK_KEY_backslash, GDK_CONTROL_MASK, "list.unselect-all", NULL); } -static void gtk_list_base_update_rubberband_selection (GtkListBase *self); - static gboolean autoscroll_cb (GtkWidget *widget, GdkFrameClock *frame_clock, @@ -1263,15 +1253,6 @@ autoscroll_cb (GtkWidget *widget, delta_y = gtk_adjustment_get_value (priv->adjustment[GTK_ORIENTATION_VERTICAL]) - value; - if (priv->rubberband) - { - priv->rubberband->x2 += delta_x; - priv->rubberband->y2 += delta_y; - gtk_list_base_update_rubberband_selection (self); - } - - gtk_widget_queue_draw (GTK_WIDGET (self)); - if (delta_x != 0 || delta_y != 0) { return G_SOURCE_CONTINUE; @@ -1290,8 +1271,14 @@ add_autoscroll (GtkListBase *self, { GtkListBasePrivate *priv = gtk_list_base_get_instance_private (self); - priv->autoscroll_delta_x = delta_x; - priv->autoscroll_delta_y = delta_y; + if (gtk_list_base_adjustment_is_flipped (self, GTK_ORIENTATION_HORIZONTAL)) + priv->autoscroll_delta_x = -delta_x; + else + priv->autoscroll_delta_x = delta_x; + if (gtk_list_base_adjustment_is_flipped (self, GTK_ORIENTATION_VERTICAL)) + priv->autoscroll_delta_y = -delta_y; + else + priv->autoscroll_delta_y = delta_y; if (priv->autoscroll_id == 0) priv->autoscroll_id = gtk_widget_add_tick_callback (GTK_WIDGET (self), autoscroll_cb, self, NULL); @@ -1346,76 +1333,214 @@ update_autoscroll (GtkListBase *self, remove_autoscroll (self); } +/** + * gtk_list_base_size_allocate_child: + * @self: The listbase + * @child: The child + * @x: top left coordinate in the across direction + * @y: top right coordinate in the along direction + * @width: size in the across direction + * @height: size in the along direction + * + * Allocates a child widget in the list coordinate system, + * but with the coordinates already offset by the scroll + * offset. + **/ +void +gtk_list_base_size_allocate_child (GtkListBase *self, + GtkWidget *child, + int x, + int y, + int width, + int height) +{ + GtkAllocation child_allocation; + + if (gtk_list_base_get_orientation (GTK_LIST_BASE (self)) == GTK_ORIENTATION_VERTICAL) + { + if (_gtk_widget_get_direction (GTK_WIDGET (self)) == GTK_TEXT_DIR_LTR) + { + child_allocation.x = x; + child_allocation.y = y; + child_allocation.width = width; + child_allocation.height = height; + } + else + { + int mirror_point = gtk_widget_get_width (GTK_WIDGET (self)); + + child_allocation.x = mirror_point - x - width; + child_allocation.y = y; + child_allocation.width = width; + child_allocation.height = height; + } + } + else + { + if (_gtk_widget_get_direction (GTK_WIDGET (self)) == GTK_TEXT_DIR_LTR) + { + child_allocation.x = y; + child_allocation.y = x; + child_allocation.width = height; + child_allocation.height = width; + } + else + { + int mirror_point = gtk_widget_get_width (GTK_WIDGET (self)); + + child_allocation.x = mirror_point - y - height; + child_allocation.y = x; + child_allocation.width = height; + child_allocation.height = width; + } + } + + gtk_widget_size_allocate (child, &child_allocation, -1); +} + +static void +gtk_list_base_widget_to_list (GtkListBase *self, + double x_widget, + double y_widget, + int *across_out, + int *along_out) +{ + GtkListBasePrivate *priv = gtk_list_base_get_instance_private (self); + GtkWidget *widget = GTK_WIDGET (self); + + if (gtk_widget_get_direction (widget) == GTK_TEXT_DIR_RTL) + x_widget = gtk_widget_get_width (widget) - x_widget; + + gtk_list_base_get_adjustment_values (self, OPPOSITE_ORIENTATION (priv->orientation), across_out, NULL, NULL); + gtk_list_base_get_adjustment_values (self, priv->orientation, along_out, NULL, NULL); + + if (priv->orientation == GTK_ORIENTATION_VERTICAL) + { + *across_out += x_widget; + *along_out += y_widget; + } + else + { + *across_out += y_widget; + *along_out += x_widget; + } +} + +static GtkBitset * +gtk_list_base_get_items_in_rect (GtkListBase *self, + const GdkRectangle *rect) +{ + return GTK_LIST_BASE_GET_CLASS (self)->get_items_in_rect (self, rect); +} + +static gboolean +gtk_list_base_get_rubberband_coords (GtkListBase *self, + GdkRectangle *rect) +{ + GtkListBasePrivate *priv = gtk_list_base_get_instance_private (self); + int x1, x2, y1, y2; + + if (!priv->rubberband) + return FALSE; + + if (priv->rubberband->start_tracker == NULL) + { + x1 = 0; + y1 = 0; + } + else + { + guint pos = gtk_list_item_tracker_get_position (priv->item_manager, priv->rubberband->start_tracker); + + if (gtk_list_base_get_allocation_along (self, pos, &y1, &y2) && + gtk_list_base_get_allocation_across (self, pos, &x1, &x2)) + { + x1 += x2 * priv->rubberband->start_align_across; + y1 += y2 * priv->rubberband->start_align_along; + } + else + { + x1 = 0; + y1 = 0; + } + } + + gtk_list_base_widget_to_list (self, + priv->rubberband->pointer_x, priv->rubberband->pointer_y, + &x2, &y2); + + rect->x = MIN (x1, x2); + rect->y = MIN (y1, y2); + rect->width = ABS (x1 - x2) + 1; + rect->height = ABS (y1 - y2) + 1; + + return TRUE; +} + void gtk_list_base_allocate_rubberband (GtkListBase *self) { GtkListBasePrivate *priv = gtk_list_base_get_instance_private (self); + GtkRequisition min_size; GdkRectangle rect; - double x, y; - int min, nat; + int offset_x, offset_y; - if (!priv->rubberband) + if (!gtk_list_base_get_rubberband_coords (self, &rect)) return; - gtk_widget_measure (priv->rubberband->widget, - GTK_ORIENTATION_HORIZONTAL, -1, - &min, &nat, NULL, NULL); + gtk_widget_get_preferred_size (priv->rubberband->widget, &min_size, NULL); + rect.width = MAX (min_size.width, rect.width); + rect.height = MAX (min_size.height, rect.height); - x = gtk_adjustment_get_value (priv->adjustment[GTK_ORIENTATION_HORIZONTAL]); - y = gtk_adjustment_get_value (priv->adjustment[GTK_ORIENTATION_VERTICAL]); + gtk_list_base_get_adjustment_values (self, OPPOSITE_ORIENTATION (priv->orientation), &offset_x, NULL, NULL); + gtk_list_base_get_adjustment_values (self, priv->orientation, &offset_y, NULL, NULL); + rect.x -= offset_x; + rect.y -= offset_y; - rect.x = MIN (priv->rubberband->x1, priv->rubberband->x2) - x; - rect.y = MIN (priv->rubberband->y1, priv->rubberband->y2) - y; - rect.width = ABS (priv->rubberband->x1 - priv->rubberband->x2) + 1; - rect.height = ABS (priv->rubberband->y1 - priv->rubberband->y2) + 1; - - gtk_widget_size_allocate (priv->rubberband->widget, &rect, -1); + gtk_list_base_size_allocate_child (self, + priv->rubberband->widget, + rect.x, rect.y, rect.width, rect.height); } static void gtk_list_base_start_rubberband (GtkListBase *self, double x, - double y, - gboolean modify, - gboolean extend) + double y) { GtkListBasePrivate *priv = gtk_list_base_get_instance_private (self); - double value_x, value_y; + cairo_rectangle_int_t item_area; + int list_x, list_y; + guint pos; if (priv->rubberband) return; - value_x = gtk_adjustment_get_value (priv->adjustment[GTK_ORIENTATION_HORIZONTAL]); - value_y = gtk_adjustment_get_value (priv->adjustment[GTK_ORIENTATION_VERTICAL]); + gtk_list_base_widget_to_list (self, x, y, &list_x, &list_y); + if (!gtk_list_base_get_position_from_allocation (self, list_x, list_y, &pos, &item_area)) + { + g_warning ("Could not start rubberbanding: No item\n"); + return; + } priv->rubberband = g_new0 (RubberbandData, 1); - priv->rubberband->x1 = priv->rubberband->x2 = x + value_x; - priv->rubberband->y1 = priv->rubberband->y2 = y + value_y; + priv->rubberband->start_tracker = gtk_list_item_tracker_new (priv->item_manager); + gtk_list_item_tracker_set_position (priv->item_manager, priv->rubberband->start_tracker, pos, 0, 0); + priv->rubberband->start_align_across = (double) (list_x - item_area.x) / item_area.width; + priv->rubberband->start_align_along = (double) (list_y - item_area.y) / item_area.height; - priv->rubberband->modify = modify; - priv->rubberband->extend = extend; + priv->rubberband->pointer_x = x; + priv->rubberband->pointer_y = y; priv->rubberband->widget = gtk_gizmo_new ("rubberband", NULL, NULL, NULL, NULL, NULL, NULL); gtk_widget_set_parent (priv->rubberband->widget, GTK_WIDGET (self)); - priv->rubberband->active = gtk_set_new (); } static void -range_cb (guint position, - guint *start, - guint *n_items, - gboolean *selected, - gpointer data) -{ - GtkSet *set = data; - - gtk_set_find_range (set, position, gtk_set_get_max (set) + 1, start, n_items, selected); -} - -static void -gtk_list_base_stop_rubberband (GtkListBase *self) +gtk_list_base_stop_rubberband (GtkListBase *self, + gboolean modify, + gboolean extend) { GtkListBasePrivate *priv = gtk_list_base_get_instance_private (self); GtkListItemManagerItem *item; @@ -1433,22 +1558,102 @@ gtk_list_base_stop_rubberband (GtkListBase *self) } model = gtk_list_item_manager_get_model (priv->item_manager); + if (model != NULL) + { + GtkBitset *selected, *mask; + GdkRectangle rect; + GtkBitset *rubberband_selection; - if (priv->rubberband->modify) - { - gtk_selection_model_unselect_callback (model, range_cb, priv->rubberband->active); - } - else - { - gtk_selection_model_select_callback (model, - !priv->rubberband->extend, - range_cb, priv->rubberband->active); + if (!gtk_list_base_get_rubberband_coords (self, &rect)) + return; + + rubberband_selection = gtk_list_base_get_items_in_rect (self, &rect); + if (gtk_bitset_is_empty (rubberband_selection)) + { + gtk_bitset_unref (rubberband_selection); + return; + } + + if (modify && extend) /* Ctrl + Shift */ + { + GtkBitset *current; + guint min = gtk_bitset_get_minimum (rubberband_selection); + guint max = gtk_bitset_get_maximum (rubberband_selection); + /* toggle the rubberband, keep the rest */ + current = gtk_selection_model_get_selection_in_range (model, min, max - min + 1); + selected = gtk_bitset_copy (current); + gtk_bitset_unref (current); + gtk_bitset_intersect (selected, rubberband_selection); + gtk_bitset_difference (selected, rubberband_selection); + + mask = gtk_bitset_ref (rubberband_selection); + } + else if (modify) /* Ctrl */ + { + /* select the rubberband, keep the rest */ + selected = gtk_bitset_ref (rubberband_selection); + mask = gtk_bitset_ref (rubberband_selection); + } + else if (extend) /* Shift */ + { + /* unselect the rubberband, keep the rest */ + selected = gtk_bitset_new_empty (); + mask = gtk_bitset_ref (rubberband_selection); + } + else /* no modifer */ + { + /* select the rubberband, clear the rest */ + selected = gtk_bitset_ref (rubberband_selection); + mask = gtk_bitset_new_empty (); + gtk_bitset_add_range (mask, 0, g_list_model_get_n_items (G_LIST_MODEL (model))); + } + + gtk_selection_model_set_selection (model, selected, mask); + + gtk_bitset_unref (selected); + gtk_bitset_unref (mask); + gtk_bitset_unref (rubberband_selection); } - g_clear_pointer (&priv->rubberband, rubberband_data_free); + gtk_list_item_tracker_free (priv->item_manager, priv->rubberband->start_tracker); + g_clear_pointer (&priv->rubberband->widget, gtk_widget_unparent); + g_free (priv->rubberband); + priv->rubberband = NULL; + remove_autoscroll (self); +} - gtk_widget_queue_draw (GTK_WIDGET (self)); +static void +gtk_list_base_update_rubberband_selection (GtkListBase *self) +{ + GtkListBasePrivate *priv = gtk_list_base_get_instance_private (self); + GtkListItemManagerItem *item; + GdkRectangle rect; + guint pos; + GtkBitset *rubberband_selection; + + if (!gtk_list_base_get_rubberband_coords (self, &rect)) + return; + + rubberband_selection = gtk_list_base_get_items_in_rect (self, &rect); + + pos = 0; + for (item = gtk_list_item_manager_get_first (priv->item_manager); + item != NULL; + item = gtk_rb_tree_node_get_next (item)) + { + if (item->widget) + { + if (gtk_bitset_contains (rubberband_selection, pos)) + gtk_widget_set_state_flags (item->widget, GTK_STATE_FLAG_ACTIVE, FALSE); + else + gtk_widget_unset_state_flags (item->widget, GTK_STATE_FLAG_ACTIVE); + } + + pos += item->n_items; + } + + gtk_bitset_unref (rubberband_selection); } static void @@ -1461,51 +1666,14 @@ gtk_list_base_update_rubberband (GtkListBase *self, if (!priv->rubberband) return; - priv->rubberband->x2 = x + gtk_adjustment_get_value (priv->adjustment[GTK_ORIENTATION_HORIZONTAL]); - priv->rubberband->y2 = y + gtk_adjustment_get_value (priv->adjustment[GTK_ORIENTATION_VERTICAL]); + priv->rubberband->pointer_x = x; + priv->rubberband->pointer_y = y; gtk_list_base_update_rubberband_selection (self); update_autoscroll (self, x, y); - gtk_widget_queue_draw (GTK_WIDGET (self)); -} - -static void -gtk_list_base_update_rubberband_selection (GtkListBase *self) -{ - GtkListBasePrivate *priv = gtk_list_base_get_instance_private (self); - GdkRectangle rect; - GdkRectangle alloc; - GtkListItemManagerItem *item; - - gtk_list_base_allocate_rubberband (self); - gtk_widget_get_allocation (priv->rubberband->widget, &rect); - - for (item = gtk_list_item_manager_get_first (priv->item_manager); - item != NULL; - item = gtk_rb_tree_node_get_next (item)) - { - guint pos; - - if (!item->widget) - continue; - - pos = gtk_list_item_manager_get_item_position (priv->item_manager, item); - - gtk_widget_get_allocation (item->widget, &alloc); - - if (gdk_rectangle_intersect (&rect, &alloc, &alloc)) - { - gtk_set_add_item (priv->rubberband->active, pos); - gtk_widget_set_state_flags (item->widget, GTK_STATE_FLAG_ACTIVE, FALSE); - } - else - { - gtk_set_remove_item (priv->rubberband->active, pos); - gtk_widget_unset_state_flags (item->widget, GTK_STATE_FLAG_ACTIVE); - } - } + gtk_widget_queue_allocate (GTK_WIDGET (self)); } static void @@ -1529,27 +1697,25 @@ get_selection_modifiers (GtkGesture *gesture, *extend = TRUE; } -static void -gtk_list_base_drag_begin (GtkGestureDrag *gesture, - double start_x, - double start_y, - GtkListBase *self) -{ - gboolean modify; - gboolean extend; - - get_selection_modifiers (GTK_GESTURE (gesture), &modify, &extend); - gtk_list_base_start_rubberband (self, start_x, start_y, modify, extend); -} - static void gtk_list_base_drag_update (GtkGestureDrag *gesture, double offset_x, double offset_y, GtkListBase *self) { + GtkListBasePrivate *priv = gtk_list_base_get_instance_private (self); double start_x, start_y; + gtk_gesture_drag_get_start_point (gesture, &start_x, &start_y); + + if (!priv->rubberband) + { + if (!gtk_drag_check_threshold (GTK_WIDGET (self), 0, 0, offset_x, offset_y)) + return; + + gtk_gesture_set_state (GTK_GESTURE (gesture), GTK_EVENT_SEQUENCE_CLAIMED); + gtk_list_base_start_rubberband (self, start_x, start_y); + } gtk_list_base_update_rubberband (self, start_x + offset_x, start_y + offset_y); } @@ -1559,8 +1725,11 @@ gtk_list_base_drag_end (GtkGestureDrag *gesture, double offset_y, GtkListBase *self) { + gboolean modify, extend; + gtk_list_base_drag_update (gesture, offset_x, offset_y, self); - gtk_list_base_stop_rubberband (self); + get_selection_modifiers (GTK_GESTURE (gesture), &modify, &extend); + gtk_list_base_stop_rubberband (self, modify, extend); } void @@ -1577,7 +1746,6 @@ gtk_list_base_set_enable_rubberband (GtkListBase *self, if (enable) { priv->drag_gesture = gtk_gesture_drag_new (); - g_signal_connect (priv->drag_gesture, "drag-begin", G_CALLBACK (gtk_list_base_drag_begin), self); g_signal_connect (priv->drag_gesture, "drag-update", G_CALLBACK (gtk_list_base_drag_update), self); g_signal_connect (priv->drag_gesture, "drag-end", G_CALLBACK (gtk_list_base_drag_end), self); gtk_widget_add_controller (GTK_WIDGET (self), GTK_EVENT_CONTROLLER (priv->drag_gesture)); diff --git a/gtk/gtklistbaseprivate.h b/gtk/gtklistbaseprivate.h index 7cf0504a43..c714c63c0c 100644 --- a/gtk/gtklistbaseprivate.h +++ b/gtk/gtklistbaseprivate.h @@ -54,6 +54,8 @@ struct _GtkListBaseClass int along, guint *pos, cairo_rectangle_int_t *area); + GtkBitset * (* get_items_in_rect) (GtkListBase *self, + const cairo_rectangle_int_t *rect); guint (* move_focus_along) (GtkListBase *self, guint pos, int steps); @@ -99,10 +101,17 @@ gboolean gtk_list_base_grab_focus_on_item (GtkListBase gboolean select, gboolean modify, gboolean extend); + void gtk_list_base_set_enable_rubberband (GtkListBase *self, gboolean enable); gboolean gtk_list_base_get_enable_rubberband (GtkListBase *self); - void gtk_list_base_allocate_rubberband (GtkListBase *self); +void gtk_list_base_size_allocate_child (GtkListBase *self, + GtkWidget *child, + int x, + int y, + int width, + int height); + #endif /* __GTK_LIST_BASE_PRIVATE_H__ */ diff --git a/gtk/gtklistitemmanagerprivate.h b/gtk/gtklistitemmanagerprivate.h index ccb0570c20..baf4c51442 100644 --- a/gtk/gtklistitemmanagerprivate.h +++ b/gtk/gtklistitemmanagerprivate.h @@ -88,10 +88,10 @@ GtkSelectionModel * gtk_list_item_manager_get_model (GtkListItemMana guint gtk_list_item_manager_get_size (GtkListItemManager *self); void gtk_list_item_manager_set_single_click_activate - (GtkListItemManager *self, - gboolean single_click_activate); + (GtkListItemManager *self, + gboolean single_click_activate); gboolean gtk_list_item_manager_get_single_click_activate - (GtkListItemManager *self); + (GtkListItemManager *self); GtkListItemTracker * gtk_list_item_tracker_new (GtkListItemManager *self); void gtk_list_item_tracker_free (GtkListItemManager *self, diff --git a/gtk/gtklistview.c b/gtk/gtklistview.c index 484f5b172c..4139e071c3 100644 --- a/gtk/gtklistview.c +++ b/gtk/gtklistview.c @@ -21,6 +21,7 @@ #include "gtklistview.h" +#include "gtkbitset.h" #include "gtkintl.h" #include "gtklistbaseprivate.h" #include "gtklistitemmanagerprivate.h" @@ -374,6 +375,36 @@ gtk_list_view_get_allocation_across (GtkListBase *base, return TRUE; } +static GtkBitset * +gtk_list_view_get_items_in_rect (GtkListBase *base, + const cairo_rectangle_int_t *rect) +{ + GtkListView *self = GTK_LIST_VIEW (base); + guint first, last, n_items; + GtkBitset *result; + ListRow *row; + + result = gtk_bitset_new_empty (); + + n_items = gtk_list_base_get_n_items (base); + if (n_items == 0) + return result; + + row = gtk_list_view_get_row_at_y (self, rect->y, NULL); + if (row) + first = gtk_list_item_manager_get_item_position (self->item_manager, row); + else + first = rect->y < 0 ? 0 : n_items - 1; + row = gtk_list_view_get_row_at_y (self, rect->y + rect->height, NULL); + if (row) + last = gtk_list_item_manager_get_item_position (self->item_manager, row); + else + last = rect->y < 0 ? 0 : n_items - 1; + + gtk_bitset_add_range_closed (result, first, last); + return result; +} + static guint gtk_list_view_move_focus_along (GtkListBase *base, guint pos, @@ -553,43 +584,6 @@ gtk_list_view_measure (GtkWidget *widget, gtk_list_view_measure_across (widget, orientation, for_size, minimum, natural); } -static void -gtk_list_view_size_allocate_child (GtkListView *self, - GtkWidget *child, - int x, - int y, - int width, - int height) -{ - GtkAllocation child_allocation; - - if (gtk_list_base_get_orientation (GTK_LIST_BASE (self)) == GTK_ORIENTATION_VERTICAL) - { - child_allocation.x = x; - child_allocation.y = y; - child_allocation.width = width; - child_allocation.height = height; - } - else if (_gtk_widget_get_direction (GTK_WIDGET (self)) == GTK_TEXT_DIR_LTR) - { - child_allocation.x = y; - child_allocation.y = x; - child_allocation.width = height; - child_allocation.height = width; - } - else - { - int mirror_point = gtk_widget_get_width (GTK_WIDGET (self)); - - child_allocation.x = mirror_point - y - height; - child_allocation.y = x; - child_allocation.width = height; - child_allocation.height = width; - } - - gtk_widget_size_allocate (child, &child_allocation, -1); -} - static void gtk_list_view_size_allocate (GtkWidget *widget, int width, @@ -604,8 +598,6 @@ gtk_list_view_size_allocate (GtkWidget *widget, GtkOrientation orientation, opposite_orientation; GtkScrollablePolicy scroll_policy; - gtk_list_base_allocate_rubberband (GTK_LIST_BASE (self)); - orientation = gtk_list_base_get_orientation (GTK_LIST_BASE (self)); opposite_orientation = OPPOSITE_ORIENTATION (orientation); scroll_policy = gtk_list_base_get_scroll_policy (GTK_LIST_BASE (self), orientation); @@ -685,7 +677,7 @@ gtk_list_view_size_allocate (GtkWidget *widget, { if (row->parent.widget) { - gtk_list_view_size_allocate_child (self, + gtk_list_base_size_allocate_child (GTK_LIST_BASE (self), row->parent.widget, x, y, @@ -695,6 +687,8 @@ gtk_list_view_size_allocate (GtkWidget *widget, y += row->height * row->parent.n_items; } + + gtk_list_base_allocate_rubberband (GTK_LIST_BASE (self)); } static void @@ -810,6 +804,7 @@ gtk_list_view_class_init (GtkListViewClass *klass) list_base_class->list_item_augment_func = list_row_augment; list_base_class->get_allocation_along = gtk_list_view_get_allocation_along; list_base_class->get_allocation_across = gtk_list_view_get_allocation_across; + list_base_class->get_items_in_rect = gtk_list_view_get_items_in_rect; list_base_class->get_position_from_allocation = gtk_list_view_get_position_from_allocation; list_base_class->move_focus_along = gtk_list_view_move_focus_along; list_base_class->move_focus_across = gtk_list_view_move_focus_across; diff --git a/gtk/gtkmultiselection.c b/gtk/gtkmultiselection.c index 70045b525c..34e218d7a5 100644 --- a/gtk/gtkmultiselection.c +++ b/gtk/gtkmultiselection.c @@ -21,9 +21,9 @@ #include "gtkmultiselection.h" +#include "gtkbitset.h" #include "gtkintl.h" #include "gtkselectionmodel.h" -#include "gtkset.h" /** * SECTION:gtkmultiselection @@ -47,7 +47,8 @@ struct _GtkMultiSelection GListModel *model; - GtkSet *selected; + GtkBitset *selected; + GHashTable *items; /* item => position */ }; struct _GtkMultiSelectionClass @@ -103,175 +104,106 @@ gtk_multi_selection_is_selected (GtkSelectionModel *model, { GtkMultiSelection *self = GTK_MULTI_SELECTION (model); - return gtk_set_contains (self->selected, position); + return gtk_bitset_contains (self->selected, position); } -static gboolean -gtk_multi_selection_select_range (GtkSelectionModel *model, - guint position, - guint n_items, - gboolean exclusive) -{ - GtkMultiSelection *self = GTK_MULTI_SELECTION (model); - guint min = G_MAXUINT; - guint max = 0; - - if (exclusive) - { - min = gtk_set_get_min (self->selected); - max = gtk_set_get_max (self->selected); - gtk_set_remove_all (self->selected); - } - - gtk_set_add_range (self->selected, position, n_items); - - min = MIN (position, min); - max = MAX (max, position + n_items - 1); - - gtk_selection_model_selection_changed (model, min, max - min + 1); - - return TRUE; -} - -static gboolean -gtk_multi_selection_unselect_range (GtkSelectionModel *model, - guint position, - guint n_items) +static GtkBitset * +gtk_multi_selection_get_selection_in_range (GtkSelectionModel *model, + guint pos, + guint n_items) { GtkMultiSelection *self = GTK_MULTI_SELECTION (model); - gtk_set_remove_range (self->selected, position, n_items); - gtk_selection_model_selection_changed (model, position, n_items); - - return TRUE; + return gtk_bitset_ref (self->selected); } -static gboolean -gtk_multi_selection_select_item (GtkSelectionModel *model, - guint position, - gboolean exclusive) +static void +gtk_multi_selection_toggle_selection (GtkMultiSelection *self, + GtkBitset *changes) { - return gtk_multi_selection_select_range (model, position, 1, exclusive); -} + GListModel *model = G_LIST_MODEL (self); + GtkBitsetIter change_iter, selected_iter; + GtkBitset *selected; + guint change_pos, selected_pos; + gboolean more; -static gboolean -gtk_multi_selection_unselect_item (GtkSelectionModel *model, - guint position) -{ - return gtk_multi_selection_unselect_range (model, position, 1); -} + gtk_bitset_difference (self->selected, changes); -static gboolean -gtk_multi_selection_select_all (GtkSelectionModel *model) -{ - return gtk_multi_selection_select_range (model, 0, g_list_model_get_n_items (G_LIST_MODEL (model)), FALSE); -} + selected = gtk_bitset_copy (changes); + gtk_bitset_intersect (selected, self->selected); -static gboolean -gtk_multi_selection_unselect_all (GtkSelectionModel *model) -{ - return gtk_multi_selection_unselect_range (model, 0, g_list_model_get_n_items (G_LIST_MODEL (model))); -} + if (!gtk_bitset_iter_init_first (&selected_iter, selected, &selected_pos)) + selected_pos = G_MAXUINT; -static gboolean -gtk_multi_selection_add_or_remove (GtkSelectionModel *model, - gboolean unselect_rest, - gboolean add, - GtkSelectionCallback callback, - gpointer data) -{ - GtkMultiSelection *self = GTK_MULTI_SELECTION (model); - guint pos, start, n_items; - gboolean in; - guint min, max; - guint n; - - n = g_list_model_get_n_items (G_LIST_MODEL (self)); - - min = G_MAXUINT; - max = 0; - - if (unselect_rest) + for (more = gtk_bitset_iter_init_first (&change_iter, changes, &change_pos); + more; + more = gtk_bitset_iter_next (&change_iter, &change_pos)) { - min = gtk_set_get_min (self->selected); - max = gtk_set_get_max (self->selected); - gtk_set_remove_all (self->selected); - } + gpointer item = g_list_model_get_item (model, change_pos); - for (pos = 0; pos < n; pos = start + n_items) - { - callback (pos, &start, &n_items, &in, data); - - if (n_items == 0) - break; - - g_assert (start <= pos && pos < start + n_items); - - if (in) + if (change_pos < selected_pos) { - if (start < min) - min = start; - if (start + n_items - 1 > max) - max = start + n_items - 1; - - if (add) - gtk_set_add_range (self->selected, start, n_items); - else - gtk_set_remove_range (self->selected, start, n_items); + g_hash_table_remove (self->items, item); + g_object_unref (item); } + else + { + g_assert (change_pos == selected_pos); - pos = start + n_items; + g_hash_table_insert (self->items, item, GUINT_TO_POINTER (change_pos)); + + if (!gtk_bitset_iter_next (&selected_iter, &selected_pos)) + selected_pos = G_MAXUINT; + } } + gtk_bitset_unref (selected); +} + +static gboolean +gtk_multi_selection_set_selection (GtkSelectionModel *model, + GtkBitset *selected, + GtkBitset *mask) +{ + GtkMultiSelection *self = GTK_MULTI_SELECTION (model); + GtkBitset *changes; + guint min, max, n_items; + + /* changes = (self->selected XOR selected) AND mask + * But doing it this way avoids looking at all values outside the mask + */ + changes = gtk_bitset_copy (selected); + gtk_bitset_difference (changes, self->selected); + gtk_bitset_intersect (changes, mask); + + min = gtk_bitset_get_minimum (changes); + max = gtk_bitset_get_maximum (changes); + + /* sanity check */ + n_items = g_list_model_get_n_items (self->model); + if (max >= n_items) + { + gtk_bitset_remove_range_closed (changes, n_items, max); + max = gtk_bitset_get_maximum (changes); + } + + /* actually do the change */ + gtk_multi_selection_toggle_selection (self, changes); + + gtk_bitset_unref (changes); + if (min <= max) gtk_selection_model_selection_changed (model, min, max - min + 1); return TRUE; } -static gboolean -gtk_multi_selection_select_callback (GtkSelectionModel *model, - gboolean unselect_rest, - GtkSelectionCallback callback, - gpointer data) -{ - return gtk_multi_selection_add_or_remove (model, unselect_rest, TRUE, callback, data); -} - -static gboolean -gtk_multi_selection_unselect_callback (GtkSelectionModel *model, - GtkSelectionCallback callback, - gpointer data) -{ - return gtk_multi_selection_add_or_remove (model, FALSE, FALSE, callback, data); -} - -static void -gtk_multi_selection_query_range (GtkSelectionModel *model, - guint position, - guint *start_range, - guint *n_items, - gboolean *selected) -{ - GtkMultiSelection *self = GTK_MULTI_SELECTION (model); - guint upper_bound = g_list_model_get_n_items (self->model); - - gtk_set_find_range (self->selected, position, upper_bound, start_range, n_items, selected); -} - static void gtk_multi_selection_selection_model_init (GtkSelectionModelInterface *iface) { iface->is_selected = gtk_multi_selection_is_selected; - iface->select_item = gtk_multi_selection_select_item; - iface->unselect_item = gtk_multi_selection_unselect_item; - iface->select_range = gtk_multi_selection_select_range; - iface->unselect_range = gtk_multi_selection_unselect_range; - iface->select_all = gtk_multi_selection_select_all; - iface->unselect_all = gtk_multi_selection_unselect_all; - iface->select_callback = gtk_multi_selection_select_callback; - iface->unselect_callback = gtk_multi_selection_unselect_callback; - iface->query_range = gtk_multi_selection_query_range; + iface->get_selection_in_range = gtk_multi_selection_get_selection_in_range; + iface->set_selection = gtk_multi_selection_set_selection; } G_DEFINE_TYPE_EXTENDED (GtkMultiSelection, gtk_multi_selection, G_TYPE_OBJECT, 0, @@ -287,8 +219,57 @@ gtk_multi_selection_items_changed_cb (GListModel *model, guint added, GtkMultiSelection *self) { - gtk_set_remove_range (self->selected, position, removed); - gtk_set_shift (self->selected, position, (int)added - (int)removed); + GHashTableIter iter; + gpointer item, pos_pointer; + GHashTable *pending = NULL; + guint i; + + gtk_bitset_slice (self->selected, position, removed, added); + + g_hash_table_iter_init (&iter, self->items); + while (g_hash_table_iter_next (&iter, &item, &pos_pointer)) + { + guint pos = GPOINTER_TO_UINT (pos_pointer); + + if (pos < position) + continue; + else if (pos >= position + removed) + g_hash_table_iter_replace (&iter, GUINT_TO_POINTER (pos - removed + added)); + else /* if pos is in the removed range */ + { + if (added == 0) + { + g_hash_table_iter_remove (&iter); + } + else + { + g_hash_table_iter_steal (&iter); + if (pending == NULL) + pending = g_hash_table_new_full (NULL, NULL, g_object_unref, NULL); + g_hash_table_add (pending, item); + } + } + } + + for (i = position; pending != NULL && i < position + added; i++) + { + item = g_list_model_get_item (model, i); + if (g_hash_table_contains (pending, item)) + { + gtk_bitset_add (self->selected, i); + g_hash_table_insert (self->items, item, GUINT_TO_POINTER (i)); + g_hash_table_remove (pending, item); + if (g_hash_table_size (pending) == 0) + g_clear_pointer (&pending, g_hash_table_unref); + } + else + { + g_object_unref (item); + } + } + + g_clear_pointer (&pending, g_hash_table_unref); + g_list_model_items_changed (G_LIST_MODEL (self), position, removed, added); } @@ -306,9 +287,9 @@ gtk_multi_selection_clear_model (GtkMultiSelection *self) static void gtk_multi_selection_set_property (GObject *object, - guint prop_id, - const GValue *value, - GParamSpec *pspec) + guint prop_id, + const GValue *value, + GParamSpec *pspec) { GtkMultiSelection *self = GTK_MULTI_SELECTION (object); @@ -332,9 +313,9 @@ gtk_multi_selection_set_property (GObject *object, static void gtk_multi_selection_get_property (GObject *object, - guint prop_id, - GValue *value, - GParamSpec *pspec) + guint prop_id, + GValue *value, + GParamSpec *pspec) { GtkMultiSelection *self = GTK_MULTI_SELECTION (object); @@ -357,7 +338,8 @@ gtk_multi_selection_dispose (GObject *object) gtk_multi_selection_clear_model (self); - g_clear_pointer (&self->selected, gtk_set_free); + g_clear_pointer (&self->selected, gtk_bitset_unref); + g_clear_pointer (&self->items, g_hash_table_unref); G_OBJECT_CLASS (gtk_multi_selection_parent_class)->dispose (object); } @@ -389,7 +371,8 @@ gtk_multi_selection_class_init (GtkMultiSelectionClass *klass) static void gtk_multi_selection_init (GtkMultiSelection *self) { - self->selected = gtk_set_new (); + self->selected = gtk_bitset_new_empty (); + self->items = g_hash_table_new_full (NULL, NULL, g_object_unref, NULL); } /** diff --git a/gtk/gtknoselection.c b/gtk/gtknoselection.c index c3706f2e1c..72ace9a5a5 100644 --- a/gtk/gtknoselection.c +++ b/gtk/gtknoselection.c @@ -21,6 +21,7 @@ #include "gtknoselection.h" +#include "gtkbitset.h" #include "gtkintl.h" #include "gtkselectionmodel.h" @@ -96,25 +97,19 @@ gtk_no_selection_is_selected (GtkSelectionModel *model, return FALSE; } -static void -gtk_no_selection_query_range (GtkSelectionModel *model, - guint position, - guint *start_range, - guint *n_range, - gboolean *selected) +static GtkBitset * +gtk_no_selection_get_selection_in_range (GtkSelectionModel *model, + guint pos, + guint n_items) { - GtkNoSelection *self = GTK_NO_SELECTION (model); - - *start_range = 0; - *n_range = g_list_model_get_n_items (self->model); - *selected = FALSE; + return gtk_bitset_new_empty (); } static void gtk_no_selection_selection_model_init (GtkSelectionModelInterface *iface) { - iface->is_selected = gtk_no_selection_is_selected; - iface->query_range = gtk_no_selection_query_range; + iface->is_selected = gtk_no_selection_is_selected; + iface->get_selection_in_range = gtk_no_selection_get_selection_in_range; } G_DEFINE_TYPE_EXTENDED (GtkNoSelection, gtk_no_selection, G_TYPE_OBJECT, 0, diff --git a/gtk/gtkpropertyselection.c b/gtk/gtkpropertyselection.c deleted file mode 100644 index 25fb94846d..0000000000 --- a/gtk/gtkpropertyselection.c +++ /dev/null @@ -1,532 +0,0 @@ -/* - * Copyright © 2020 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Authors: Matthias Clasen - */ - -#include "config.h" - -#include "gtkpropertyselection.h" - -#include "gtkintl.h" -#include "gtkselectionmodel.h" - -/** - * SECTION:gtkpropertyselection - * @Short_description: A selection model that uses an item property - * @Title: GtkPropertySelection - * @see_also: #GtkSelectionModel - * - * GtkPropertySelection is an implementation of the #GtkSelectionModel - * interface that stores the selected state for each item in a property - * of the item. - * - * The property named by #GtkPropertySelection:property must be writable - * boolean property of the item type. GtkPropertySelection preserves the - * selected state of items when they are added to the model, but it does - * not listen to changes of the property while the item is a part of the - * model. It assumes that it has *exclusive* access to the property. - * - * The advantage of storing the selected state in item properties is that - * the state is *persistent* -- when an item is removed and re-added to - * the model, it will still have the same selection state. In particular, - * this makes the selection persist across changes of the sort order if - * the underlying model is a #GtkSortListModel. - */ - -struct _GtkPropertySelection -{ - GObject parent_instance; - - GListModel *model; - char *property; -}; - -struct _GtkPropertySelectionClass -{ - GObjectClass parent_class; -}; - -enum { - PROP_0, - PROP_MODEL, - PROP_PROPERTY, - - N_PROPS, -}; - -static GParamSpec *properties[N_PROPS] = { NULL, }; - -static GType -gtk_property_selection_get_item_type (GListModel *list) -{ - GtkPropertySelection *self = GTK_PROPERTY_SELECTION (list); - - return g_list_model_get_item_type (self->model); -} - -static guint -gtk_property_selection_get_n_items (GListModel *list) -{ - GtkPropertySelection *self = GTK_PROPERTY_SELECTION (list); - - return g_list_model_get_n_items (self->model); -} - -static gpointer -gtk_property_selection_get_item (GListModel *list, - guint position) -{ - GtkPropertySelection *self = GTK_PROPERTY_SELECTION (list); - - return g_list_model_get_item (self->model, position); -} - -static void -gtk_property_selection_list_model_init (GListModelInterface *iface) -{ - iface->get_item_type = gtk_property_selection_get_item_type; - iface->get_n_items = gtk_property_selection_get_n_items; - iface->get_item = gtk_property_selection_get_item; -} - -static gboolean -is_selected (GtkPropertySelection *self, - guint position) -{ - gpointer item; - gboolean ret; - - item = g_list_model_get_item (self->model, position); - g_object_get (item, self->property, &ret, NULL); - g_object_unref (item); - - return ret; -} - -static void -set_selected (GtkPropertySelection *self, - guint position, - gboolean selected) -{ - gpointer item; - - item = g_list_model_get_item (self->model, position); - g_object_set (item, self->property, selected, NULL); - g_object_unref (item); -} - -static gboolean -gtk_property_selection_is_selected (GtkSelectionModel *model, - guint position) -{ - return is_selected (GTK_PROPERTY_SELECTION (model), position); -} - -static gboolean -gtk_property_selection_select_range (GtkSelectionModel *model, - guint position, - guint n_items, - gboolean exclusive) -{ - GtkPropertySelection *self = GTK_PROPERTY_SELECTION (model); - guint i; - guint n; - - n = g_list_model_get_n_items (G_LIST_MODEL (self)); - if (exclusive) - { - for (i = 0; i < n; i++) - set_selected (self, i, FALSE); - } - - for (i = position; i < position + n_items; i++) - set_selected (self, i, TRUE); - - /* FIXME: do better here */ - if (exclusive) - gtk_selection_model_selection_changed (model, 0, n); - else - gtk_selection_model_selection_changed (model, position, n_items); - - return TRUE; -} - -static gboolean -gtk_property_selection_unselect_range (GtkSelectionModel *model, - guint position, - guint n_items) -{ - GtkPropertySelection *self = GTK_PROPERTY_SELECTION (model); - guint i; - - for (i = position; i < position + n_items; i++) - set_selected (self, i, FALSE); - - gtk_selection_model_selection_changed (model, position, n_items); - - return TRUE; -} - -static gboolean -gtk_property_selection_select_item (GtkSelectionModel *model, - guint position, - gboolean exclusive) -{ - return gtk_property_selection_select_range (model, position, 1, exclusive); -} - -static gboolean -gtk_property_selection_unselect_item (GtkSelectionModel *model, - guint position) -{ - return gtk_property_selection_unselect_range (model, position, 1); -} - -static gboolean -gtk_property_selection_select_all (GtkSelectionModel *model) -{ - return gtk_property_selection_select_range (model, 0, g_list_model_get_n_items (G_LIST_MODEL (model)), FALSE); -} - -static gboolean -gtk_property_selection_unselect_all (GtkSelectionModel *model) -{ - return gtk_property_selection_unselect_range (model, 0, g_list_model_get_n_items (G_LIST_MODEL (model))); -} - -static gboolean -gtk_property_selection_add_or_remove (GtkSelectionModel *model, - gboolean unselect_rest, - gboolean add, - GtkSelectionCallback callback, - gpointer data) -{ - GtkPropertySelection *self = GTK_PROPERTY_SELECTION (model); - guint pos, start, n, n_items; - gboolean in; - guint min, max; - guint i; - - n_items = g_list_model_get_n_items (G_LIST_MODEL (self)); - if (unselect_rest) - { - for (i = 0; i < n_items; i++) - set_selected (self, i, FALSE); - } - - min = G_MAXUINT; - max = 0; - - pos = 0; - do - { - callback (pos, &start, &n, &in, data); - if (in) - { - if (start < min) - min = start; - if (start + n - 1 > max) - max = start + n - 1; - - for (i = start; i < start + n; i++) - set_selected (self, i, add); - } - pos = start + n; - } - while (n > 0); - - /* FIXME: do better here */ - if (unselect_rest) - gtk_selection_model_selection_changed (model, 0, n_items); - else if (min <= max) - gtk_selection_model_selection_changed (model, min, max - min + 1); - - return TRUE; -} - -static gboolean -gtk_property_selection_select_callback (GtkSelectionModel *model, - gboolean unselect_rest, - GtkSelectionCallback callback, - gpointer data) -{ - return gtk_property_selection_add_or_remove (model, unselect_rest, TRUE, callback, data); -} - -static gboolean -gtk_property_selection_unselect_callback (GtkSelectionModel *model, - GtkSelectionCallback callback, - gpointer data) -{ - return gtk_property_selection_add_or_remove (model, FALSE, FALSE, callback, data); -} - -static void -gtk_property_selection_query_range (GtkSelectionModel *model, - guint position, - guint *start_range, - guint *n_items, - gboolean *selected) -{ - GtkPropertySelection *self = GTK_PROPERTY_SELECTION (model); - guint n; - gboolean sel; - guint start, end; - - n = g_list_model_get_n_items (G_LIST_MODEL (self)); - sel = is_selected (self, position); - - start = position; - while (start > 0) - { - if (is_selected (self, start - 1) != sel) - break; - start--; - } - - end = position; - while (end + 1 < n) - { - if (is_selected (self, end + 1) != sel) - break; - end++; - } - - *start_range = start; - *n_items = end - start + 1; - *selected = sel; -} - -static void -gtk_property_selection_selection_model_init (GtkSelectionModelInterface *iface) -{ - iface->is_selected = gtk_property_selection_is_selected; - iface->select_item = gtk_property_selection_select_item; - iface->unselect_item = gtk_property_selection_unselect_item; - iface->select_range = gtk_property_selection_select_range; - iface->unselect_range = gtk_property_selection_unselect_range; - iface->select_all = gtk_property_selection_select_all; - iface->unselect_all = gtk_property_selection_unselect_all; - iface->select_callback = gtk_property_selection_select_callback; - iface->unselect_callback = gtk_property_selection_unselect_callback; - iface->query_range = gtk_property_selection_query_range; -} - -G_DEFINE_TYPE_EXTENDED (GtkPropertySelection, gtk_property_selection, G_TYPE_OBJECT, 0, - G_IMPLEMENT_INTERFACE (G_TYPE_LIST_MODEL, - gtk_property_selection_list_model_init) - G_IMPLEMENT_INTERFACE (GTK_TYPE_SELECTION_MODEL, - gtk_property_selection_selection_model_init)) - -static void -gtk_property_selection_items_changed_cb (GListModel *model, - guint position, - guint removed, - guint added, - GtkPropertySelection *self) -{ - g_list_model_items_changed (G_LIST_MODEL (self), position, removed, added); -} - -static void -gtk_property_selection_clear_model (GtkPropertySelection *self) -{ - if (self->model == NULL) - return; - - g_signal_handlers_disconnect_by_func (self->model, - gtk_property_selection_items_changed_cb, - self); - g_clear_object (&self->model); -} - -static void -gtk_property_selection_real_set_property (GObject *object, - guint prop_id, - const GValue *value, - GParamSpec *pspec) - -{ - GtkPropertySelection *self = GTK_PROPERTY_SELECTION (object); - - switch (prop_id) - { - case PROP_MODEL: - self->model = g_value_dup_object (value); - g_warn_if_fail (self->model != NULL); - g_signal_connect (self->model, - "items-changed", - G_CALLBACK (gtk_property_selection_items_changed_cb), - self); - break; - - case PROP_PROPERTY: - { - GObjectClass *class; - GParamSpec *prop; - - self->property = g_value_dup_string (value); - g_warn_if_fail (self->property != NULL); - - class = g_type_class_ref (g_list_model_get_item_type (self->model)); - prop = g_object_class_find_property (class, self->property); - g_warn_if_fail (prop != NULL && - prop->value_type == G_TYPE_BOOLEAN && - ((prop->flags & (G_PARAM_READABLE|G_PARAM_WRITABLE|G_PARAM_CONSTRUCT_ONLY)) == - (G_PARAM_READABLE|G_PARAM_WRITABLE))); - g_type_class_unref (class); - } - break; - - default: - G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); - break; - } -} - -static void -gtk_property_selection_real_get_property (GObject *object, - guint prop_id, - GValue *value, - GParamSpec *pspec) -{ - GtkPropertySelection *self = GTK_PROPERTY_SELECTION (object); - - switch (prop_id) - { - case PROP_MODEL: - g_value_set_object (value, self->model); - break; - - case PROP_PROPERTY: - g_value_set_string (value, self->property); - break; - - default: - G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); - break; - } -} - -static void -gtk_property_selection_dispose (GObject *object) -{ - GtkPropertySelection *self = GTK_PROPERTY_SELECTION (object); - - gtk_property_selection_clear_model (self); - - g_free (self->property); - - G_OBJECT_CLASS (gtk_property_selection_parent_class)->dispose (object); -} - -static void -gtk_property_selection_class_init (GtkPropertySelectionClass *klass) -{ - GObjectClass *gobject_class = G_OBJECT_CLASS (klass); - - gobject_class->get_property = gtk_property_selection_real_get_property; - gobject_class->set_property = gtk_property_selection_real_set_property; - gobject_class->dispose = gtk_property_selection_dispose; - - /** - * GtkPropertySelection:model - * - * The list managed by this selection - */ - properties[PROP_MODEL] = - g_param_spec_object ("model", - P_("Model"), - P_("List managed by this selection"), - G_TYPE_LIST_MODEL, - G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_EXPLICIT_NOTIFY | G_PARAM_STATIC_STRINGS); - - properties[PROP_PROPERTY] = - g_param_spec_string ("property", - P_("Property"), - P_("Item property to store selection state in"), - NULL, - G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_EXPLICIT_NOTIFY | G_PARAM_STATIC_STRINGS); - - g_object_class_install_properties (gobject_class, N_PROPS, properties); -} - -static void -gtk_property_selection_init (GtkPropertySelection *self) -{ -} - -/** - * gtk_property_selection_new: - * @model: (transfer none): the #GListModel to manage - * @property: the item property to use - * - * Creates a new property selection to handle @model. - * - * @property must be the name of a writable boolean property - * of the item type of @model. - * - * Note that GtkPropertySelection does not monitor the property - * for changes while the item is part of the model, but it does - * inherit the initial value when an item is added to the model. - * - * Returns: (transfer full) (type GtkPropertySelection): a new #GtkPropertySelection - **/ -GListModel * -gtk_property_selection_new (GListModel *model, - const char *property) -{ - g_return_val_if_fail (G_IS_LIST_MODEL (model), NULL); - - return g_object_new (GTK_TYPE_PROPERTY_SELECTION, - "model", model, - "property", property, - NULL); -} - -/** - * gtk_property_selection_get_model: - * @self: a #GtkPropertySelection - * - * Gets the underlying model. - * - * Returns: (transfer none): the underlying model - */ -GListModel * -gtk_property_selection_get_model (GtkPropertySelection *self) -{ - g_return_val_if_fail (GTK_IS_PROPERTY_SELECTION (self), NULL); - - return self->model; -} - -/** - * gtk_property_selection_get_property: - * @self: a #GtkPropertySelection - * - * Gets the name of the item property that @self stores - * the selection in. - * - * Returns: the name of the property - */ -const char * -gtk_property_selection_get_property (GtkPropertySelection *self) -{ - g_return_val_if_fail (GTK_IS_PROPERTY_SELECTION (self), NULL); - - return self->property; -} diff --git a/gtk/gtkpropertyselection.h b/gtk/gtkpropertyselection.h deleted file mode 100644 index 8a2440449f..0000000000 --- a/gtk/gtkpropertyselection.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright © 2019 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Authors: Matthias Clasen - */ - -#ifndef __GTK_PROPERTY_SELECTION_H__ -#define __GTK_PROPERTY_SELECTION_H__ - -#include -#include - -G_BEGIN_DECLS - -#define GTK_TYPE_PROPERTY_SELECTION (gtk_property_selection_get_type ()) - -GDK_AVAILABLE_IN_ALL -G_DECLARE_FINAL_TYPE (GtkPropertySelection, gtk_property_selection, GTK, PROPERTY_SELECTION, GObject) - -GDK_AVAILABLE_IN_ALL -GListModel * gtk_property_selection_new (GListModel *model, - const char *property); - -GDK_AVAILABLE_IN_ALL -GListModel * gtk_property_selection_get_model (GtkPropertySelection *self); - -GDK_AVAILABLE_IN_ALL -const char * gtk_property_selection_get_property (GtkPropertySelection *self); - -G_END_DECLS - -#endif /* __GTK_PROPERTY_SELECTION_H__ */ diff --git a/gtk/gtkselectionmodel.c b/gtk/gtkselectionmodel.c index 1bd62e810d..0a60d7310d 100644 --- a/gtk/gtkselectionmodel.c +++ b/gtk/gtkselectionmodel.c @@ -21,6 +21,7 @@ #include "gtkselectionmodel.h" +#include "gtkbitset.h" #include "gtkintl.h" #include "gtkmarshalers.h" @@ -79,7 +80,33 @@ static gboolean gtk_selection_model_default_is_selected (GtkSelectionModel *model, guint position) { - return FALSE; + GtkBitset *bitset; + gboolean selected; + + bitset = gtk_selection_model_get_selection_in_range (model, position, 1); + selected = gtk_bitset_contains (bitset, position); + gtk_bitset_unref (bitset); + + return selected; +} + +static GtkBitset * +gtk_selection_model_default_get_selection_in_range (GtkSelectionModel *model, + guint position, + guint n_items) +{ + GtkBitset *bitset; + guint i; + + bitset = gtk_bitset_new_empty (); + + for (i = position; i < position + n_items; i++) + { + if (gtk_selection_model_is_selected (model, i)) + gtk_bitset_add (bitset, i); + } + + return bitset; } static gboolean @@ -87,13 +114,48 @@ gtk_selection_model_default_select_item (GtkSelectionModel *model, guint position, gboolean unselect_rest) { - return FALSE; + GtkBitset *selected; + GtkBitset *mask; + gboolean result; + + selected = gtk_bitset_new_empty (); + gtk_bitset_add (selected, position); + if (unselect_rest) + { + mask = gtk_bitset_new_empty (); + gtk_bitset_add_range (mask, 0, g_list_model_get_n_items (G_LIST_MODEL (model))); + } + else + { + mask = gtk_bitset_ref (selected); + } + + result = gtk_selection_model_set_selection (model, selected, mask); + + gtk_bitset_unref (selected); + gtk_bitset_unref (mask); + + return result; } + static gboolean gtk_selection_model_default_unselect_item (GtkSelectionModel *model, guint position) { - return FALSE; + GtkBitset *selected; + GtkBitset *mask; + gboolean result; + + selected = gtk_bitset_new_empty (); + mask = gtk_bitset_new_empty (); + gtk_bitset_add (mask, position); + + result = gtk_selection_model_set_selection (model, selected, mask); + + gtk_bitset_unref (selected); + gtk_bitset_unref (mask); + + return result; } static gboolean @@ -102,7 +164,28 @@ gtk_selection_model_default_select_range (GtkSelectionModel *model, guint n_items, gboolean unselect_rest) { - return FALSE; + GtkBitset *selected; + GtkBitset *mask; + gboolean result; + + selected = gtk_bitset_new_empty (); + gtk_bitset_add_range (selected, position, n_items); + if (unselect_rest) + { + mask = gtk_bitset_new_empty (); + gtk_bitset_add_range (mask, 0, g_list_model_get_n_items (G_LIST_MODEL (model))); + } + else + { + mask = gtk_bitset_ref (selected); + } + + result = gtk_selection_model_set_selection (model, selected, mask); + + gtk_bitset_unref (selected); + gtk_bitset_unref (mask); + + return result; } static gboolean @@ -110,24 +193,20 @@ gtk_selection_model_default_unselect_range (GtkSelectionModel *model, guint position, guint n_items) { - return FALSE; -} + GtkBitset *selected; + GtkBitset *mask; + gboolean result; -static gboolean -gtk_selection_model_default_select_callback (GtkSelectionModel *model, - gboolean unselect_rest, - GtkSelectionCallback callback, - gpointer data) -{ - return FALSE; -} + selected = gtk_bitset_new_empty (); + mask = gtk_bitset_new_empty (); + gtk_bitset_add_range (mask, position, n_items); -static gboolean -gtk_selection_model_default_unselect_callback (GtkSelectionModel *model, - GtkSelectionCallback callback, - gpointer data) -{ - return FALSE; + result = gtk_selection_model_set_selection (model, selected, mask); + + gtk_bitset_unref (selected); + gtk_bitset_unref (mask); + + return result; } static gboolean @@ -142,40 +221,26 @@ gtk_selection_model_default_unselect_all (GtkSelectionModel *model) return gtk_selection_model_unselect_range (model, 0, g_list_model_get_n_items (G_LIST_MODEL (model))); } -static void -gtk_selection_model_default_query_range (GtkSelectionModel *model, - guint position, - guint *start_range, - guint *n_items, - gboolean *selected) +static gboolean +gtk_selection_model_default_set_selection (GtkSelectionModel *model, + GtkBitset *selected, + GtkBitset *mask) { - *start_range = position; - - if (position >= g_list_model_get_n_items (G_LIST_MODEL (model))) - { - *n_items = 0; - *selected = FALSE; - } - else - { - *n_items = 1; - *selected = gtk_selection_model_is_selected (model, position); - } + return FALSE; } static void gtk_selection_model_default_init (GtkSelectionModelInterface *iface) { iface->is_selected = gtk_selection_model_default_is_selected; + iface->get_selection_in_range = gtk_selection_model_default_get_selection_in_range; iface->select_item = gtk_selection_model_default_select_item; iface->unselect_item = gtk_selection_model_default_unselect_item; iface->select_range = gtk_selection_model_default_select_range; iface->unselect_range = gtk_selection_model_default_unselect_range; iface->select_all = gtk_selection_model_default_select_all; iface->unselect_all = gtk_selection_model_default_unselect_all; - iface->select_callback = gtk_selection_model_default_select_callback; - iface->unselect_callback = gtk_selection_model_default_unselect_callback; - iface->query_range = gtk_selection_model_default_query_range; + iface->set_selection = gtk_selection_model_default_set_selection; /** * GtkSelectionModel::selection-changed @@ -225,6 +290,62 @@ gtk_selection_model_is_selected (GtkSelectionModel *model, return iface->is_selected (model, position); } +/** + * gtk_selection_model_get_selection: + * @model: a #GtkSelectionModel + * + * Gets the set containing all currently selected items in the model. + * + * This function may be slow, so if you are only interested in single item, + * consider using gtk_selection_model_is_selected() or if you are only + * interested in a few consider gtk_selection_model_get_selection_in_range(). + * + * Returns: (transfer full): a #GtkBitset containing all the values currently + * selected in @model. If no items are selected, the bitset is empty. + * The bitset must not be modified. + **/ +GtkBitset * +gtk_selection_model_get_selection (GtkSelectionModel *model) +{ + g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), gtk_bitset_new_empty ()); + + return gtk_selection_model_get_selection_in_range (model, 0, g_list_model_get_n_items (G_LIST_MODEL (model))); +} + +/** + * gtk_selection_model_get_selection_in_range: + * @model: a #GtkSelectionModel + * @position: start of the queired range + * @n_items: number of items in the queried range + * + * Gets a set containing a set where the values in the range [position, + * position + n_items) match the selected state of the items in that range. + * All values outside that range are undefined. + * + * This function is an optimization for gtk_selection_model_get_selection() when + * you are only interested in part of the model's selected state. A common use + * case is in response to the :selection-changed signal. + * + * Returns: A #GtkBitset that matches the selection state for the given state + * with all other values being undefined. + * The bitset must not be modified. + **/ +GtkBitset * +gtk_selection_model_get_selection_in_range (GtkSelectionModel *model, + guint position, + guint n_items) +{ + GtkSelectionModelInterface *iface; + + g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), gtk_bitset_new_empty ()); + + if (n_items == 0) + return gtk_bitset_new_empty (); + + iface = GTK_SELECTION_MODEL_GET_IFACE (model); + return iface->get_selection_in_range (model, position, n_items); +} + /** * gtk_selection_model_select_item: * @model: a #GtkSelectionModel @@ -232,6 +353,9 @@ gtk_selection_model_is_selected (GtkSelectionModel *model, * @unselect_rest: whether previously selected items should be unselected * * Requests to select an item in the model. + * + * Returns: %TRUE if this action was supported and no fallback should be + * tried. This does not mean the item was selected. */ gboolean gtk_selection_model_select_item (GtkSelectionModel *model, @@ -240,7 +364,7 @@ gtk_selection_model_select_item (GtkSelectionModel *model, { GtkSelectionModelInterface *iface; - g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), 0); + g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), FALSE); iface = GTK_SELECTION_MODEL_GET_IFACE (model); return iface->select_item (model, position, unselect_rest); @@ -252,6 +376,9 @@ gtk_selection_model_select_item (GtkSelectionModel *model, * @position: the position of the item to unselect * * Requests to unselect an item in the model. + * + * Returns: %TRUE if this action was supported and no fallback should be + * tried. This does not mean the item was unselected. */ gboolean gtk_selection_model_unselect_item (GtkSelectionModel *model, @@ -259,7 +386,7 @@ gtk_selection_model_unselect_item (GtkSelectionModel *model, { GtkSelectionModelInterface *iface; - g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), 0); + g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), FALSE); iface = GTK_SELECTION_MODEL_GET_IFACE (model); return iface->unselect_item (model, position); @@ -273,6 +400,9 @@ gtk_selection_model_unselect_item (GtkSelectionModel *model, * @unselect_rest: whether previously selected items should be unselected * * Requests to select a range of items in the model. + * + * Returns: %TRUE if this action was supported and no fallback should be + * tried. This does not mean the range was selected. */ gboolean gtk_selection_model_select_range (GtkSelectionModel *model, @@ -282,7 +412,7 @@ gtk_selection_model_select_range (GtkSelectionModel *model, { GtkSelectionModelInterface *iface; - g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), 0); + g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), FALSE); iface = GTK_SELECTION_MODEL_GET_IFACE (model); return iface->select_range (model, position, n_items, unselect_rest); @@ -295,6 +425,9 @@ gtk_selection_model_select_range (GtkSelectionModel *model, * @n_items: the number of items to unselect * * Requests to unselect a range of items in the model. + * + * Returns: %TRUE if this action was supported and no fallback should be + * tried. This does not mean the range was unselected. */ gboolean gtk_selection_model_unselect_range (GtkSelectionModel *model, @@ -303,7 +436,7 @@ gtk_selection_model_unselect_range (GtkSelectionModel *model, { GtkSelectionModelInterface *iface; - g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), 0); + g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), FALSE); iface = GTK_SELECTION_MODEL_GET_IFACE (model); return iface->unselect_range (model, position, n_items); @@ -314,13 +447,16 @@ gtk_selection_model_unselect_range (GtkSelectionModel *model, * @model: a #GtkSelectionModel * * Requests to select all items in the model. + * + * Returns: %TRUE if this action was supported and no fallback should be + * tried. This does not mean that all items are now selected. */ gboolean gtk_selection_model_select_all (GtkSelectionModel *model) { GtkSelectionModelInterface *iface; - g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), 0); + g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), FALSE); iface = GTK_SELECTION_MODEL_GET_IFACE (model); return iface->select_all (model); @@ -331,96 +467,74 @@ gtk_selection_model_select_all (GtkSelectionModel *model) * @model: a #GtkSelectionModel * * Requests to unselect all items in the model. + * + * Returns: %TRUE if this action was supported and no fallback should be + * tried. This does not mean that all items are now unselected. */ gboolean gtk_selection_model_unselect_all (GtkSelectionModel *model) { GtkSelectionModelInterface *iface; - g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), 0); + g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), FALSE); iface = GTK_SELECTION_MODEL_GET_IFACE (model); return iface->unselect_all (model); } /** - * gtk_selection_model_select_callback: + * gtk_selection_model_set_selection: * @model: a #GtkSelectionModel - * @unselect_rest: whether previously selected items should be unselected - * @callback: (scope call): a #GtkSelectionCallback to determine items to select - * @data: data to pass to @callback + * @selected: bitmask specifying if items should be selected or + * unselected + * @mask: bitmask specifying which items should be updated * - * Requests to select all items for which @callback returns - * @selected as TRUE. - */ + * This is the most advanced selection updating method that allows + * the most fine-grained control over selection changes. + * If you can, you should try the simpler versions, as implementations + * are more likely to implement support for those. + * + * Requests that the selection state of all positions set in @mask be + * updated to the respecitve value in the @selected bitmask. + * + * In pseudocode, it would look something like this: + * + * |[ + * for (i = 0; i < n_items; i++) + * { + * // don't change values not in the mask + * if (!gtk_bitset_contains (mask, i)) + * continue; + * + * if (gtk_bitset_contains (selected, i)) + * select_item (i); + * else + * unselect_item (i); + * } + * + * gtk_selection_model_selection_changed (model, first_changed_item, n_changed_items); + * ]| + * + * @mask and @selected must not be modified. They may refer to the same bitset, + * which would mean that every item in the set should be selected. + * + * Returns: %TRUE if this action was supported and no fallback should be + * tried. This does not mean that all items were updated according + * to the inputs. + **/ gboolean -gtk_selection_model_select_callback (GtkSelectionModel *model, - gboolean unselect_rest, - GtkSelectionCallback callback, - gpointer data) -{ - g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), FALSE); - - return GTK_SELECTION_MODEL_GET_IFACE (model)->select_callback (model, unselect_rest, callback, data); -} - -/** - * gtk_selection_model_unselect_callback: - * @model: a #GtkSelectionModel - * @callback: (scope call): a #GtkSelectionCallback to determine items to select - * @data: data to pass to @callback - * - * Requests to unselect all items for which @callback returns - * @selected as TRUE. - */ -gboolean -gtk_selection_model_unselect_callback (GtkSelectionModel *model, - GtkSelectionCallback callback, - gpointer data) -{ - g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), FALSE); - - return GTK_SELECTION_MODEL_GET_IFACE (model)->unselect_callback (model, callback, data); -} - -/** - * gtk_selection_model_query_range: - * @model: a #GtkSelectionModel - * @position: the position inside the range - * @start_range: (out): returns the position of the first element of the range - * @n_items: (out): returns the size of the range - * @selected: (out): returns whether items in @range are selected - * - * This function allows to query the selection status of multiple elements at once. - * It is passed a position and returns a range of elements of uniform selection status. - * - * If @position is greater than the number of items in @model, @n_items is set to 0. - * Otherwise the returned range is guaranteed to include the passed-in position, so - * @n_items will be >= 1. - * - * Positions directly adjacent to the returned range may have the same selection - * status as the returned range. - * - * This is an optimization function to make iterating over a model faster when few - * items are selected. However, it is valid behavior for implementations to use a - * naive implementation that only ever returns a single element. - */ -void -gtk_selection_model_query_range (GtkSelectionModel *model, - guint position, - guint *start_range, - guint *n_items, - gboolean *selected) +gtk_selection_model_set_selection (GtkSelectionModel *model, + GtkBitset *selected, + GtkBitset *mask) { GtkSelectionModelInterface *iface; - g_return_if_fail (GTK_IS_SELECTION_MODEL (model)); - g_return_if_fail (start_range != NULL); - g_return_if_fail (n_items != NULL); - g_return_if_fail (selected != NULL); + g_return_val_if_fail (GTK_IS_SELECTION_MODEL (model), FALSE); + g_return_val_if_fail (selected != NULL, FALSE); + g_return_val_if_fail (mask != NULL, FALSE); iface = GTK_SELECTION_MODEL_GET_IFACE (model); - return iface->query_range (model, position, start_range, n_items, selected); + return iface->set_selection (model, selected, mask); } /** diff --git a/gtk/gtkselectionmodel.h b/gtk/gtkselectionmodel.h index 55a012e3d6..4e5722c7e6 100644 --- a/gtk/gtkselectionmodel.h +++ b/gtk/gtkselectionmodel.h @@ -24,7 +24,7 @@ #error "Only can be included directly." #endif -#include +#include G_BEGIN_DECLS @@ -33,39 +33,12 @@ G_BEGIN_DECLS GDK_AVAILABLE_IN_ALL G_DECLARE_INTERFACE (GtkSelectionModel, gtk_selection_model, GTK, SELECTION_MODEL, GListModel) -/** - * GtkSelectionCallback: - * @position: the position to query - * @start_range: (out): returns the position of the first element of the range - * @n_items: (out): returns the size of the range - * @selected: (out): returns whether items in @range are selected - * @data: callback data - * - * Callback type for determining items to operate on with - * gtk_selection_model_select_callback() or - * gtk_selection_model_unselect_callback(). - * - * The callback determines a range of consecutive items around - * @position which should either all - * be changed, in which case @selected is set to %TRUE, or all not - * be changed, in which case @selected is set to %FALSE. - * - * @start_range and @n_items are set to return the range. - * - * The callback will be called repeatedly to find all ranges - * to operate on until it has exhausted the items of the model, - * or until it returns an empty range (ie @n_items == 0). - */ -typedef void (* GtkSelectionCallback) (guint position, - guint *start_range, - guint *n_items, - gboolean *selected, - gpointer data); - - /** * GtkSelectionModelInterface: * @is_selected: Return if the item at the given position is selected. + * @get_selection_in_range: Return a bitset with all currently selected + * items in the given range. By default, this function will call + * #GtkSelectionModel::is_selected() on all items in the given range. * @select_item: Select the item in the given position. If the operation * is known to fail, return %FALSE. * @unselect_item: Unselect the item in the given position. If the @@ -79,12 +52,22 @@ typedef void (* GtkSelectionCallback) (guint position, * unsupported or known to fail for all items, return %FALSE. * @unselect_all: Unselect all items in the model. If the operation is * unsupported or known to fail for all items, return %FALSE. + * @set_selection: Set selection state of all items in mask to selected. + * See gtk_selection_model_set_selection() for a detailed explanation + * of this function. * * The list of virtual functions for the #GtkSelectionModel interface. - * All getter functions are mandatory to implement, but the model does - * not need to implement any functions to support selecting or unselecting - * items. Of course, if the model does not do that, it means that users - * cannot select or unselect items in a list widgets using the model. + * No function must be implemented, but unless #GtkSelectionModel::is_selected() + * is implemented, it will not be possible to select items in the set. + * + * The model does not need to implement any functions to support either + * selecting or unselecting items. Of course, if the model does not do that, + * it means that users cannot select or unselect items in a list widget + * using the model. + * + * All selection functions fall back to #GtkSelectionModel::set_selection() + * so it is sufficient to implement just that function for full selection + * support. */ struct _GtkSelectionModelInterface { @@ -94,6 +77,9 @@ struct _GtkSelectionModelInterface /*< public >*/ gboolean (* is_selected) (GtkSelectionModel *model, guint position); + GtkBitset * (* get_selection_in_range) (GtkSelectionModel *model, + guint position, + guint n_items); gboolean (* select_item) (GtkSelectionModel *model, guint position, @@ -109,23 +95,21 @@ struct _GtkSelectionModelInterface guint n_items); gboolean (* select_all) (GtkSelectionModel *model); gboolean (* unselect_all) (GtkSelectionModel *model); - gboolean (* select_callback) (GtkSelectionModel *model, - gboolean unselect_rest, - GtkSelectionCallback callback, - gpointer data); - gboolean (* unselect_callback) (GtkSelectionModel *model, - GtkSelectionCallback callback, - gpointer data); - void (* query_range) (GtkSelectionModel *model, - guint position, - guint *start_range, - guint *n_items, - gboolean *selected); + gboolean (* set_selection) (GtkSelectionModel *model, + GtkBitset *selected, + GtkBitset *mask); }; GDK_AVAILABLE_IN_ALL gboolean gtk_selection_model_is_selected (GtkSelectionModel *model, guint position); +GDK_AVAILABLE_IN_ALL +GtkBitset * gtk_selection_model_get_selection (GtkSelectionModel *model); +GDK_AVAILABLE_IN_ALL +GtkBitset * gtk_selection_model_get_selection_in_range + (GtkSelectionModel *model, + guint position, + guint n_items); GDK_AVAILABLE_IN_ALL gboolean gtk_selection_model_select_item (GtkSelectionModel *model, @@ -147,23 +131,10 @@ GDK_AVAILABLE_IN_ALL gboolean gtk_selection_model_select_all (GtkSelectionModel *model); GDK_AVAILABLE_IN_ALL gboolean gtk_selection_model_unselect_all (GtkSelectionModel *model); - GDK_AVAILABLE_IN_ALL -gboolean gtk_selection_model_select_callback (GtkSelectionModel *model, - gboolean unselect_rest, - GtkSelectionCallback callback, - gpointer data); -GDK_AVAILABLE_IN_ALL -gboolean gtk_selection_model_unselect_callback (GtkSelectionModel *model, - GtkSelectionCallback callback, - gpointer data); - -GDK_AVAILABLE_IN_ALL -void gtk_selection_model_query_range (GtkSelectionModel *model, - guint position, - guint *start_range, - guint *n_items, - gboolean *selected); +gboolean gtk_selection_model_set_selection (GtkSelectionModel *model, + GtkBitset *selected, + GtkBitset *mask); /* for implementations only */ GDK_AVAILABLE_IN_ALL diff --git a/gtk/gtkset.c b/gtk/gtkset.c deleted file mode 100644 index 18fab23c56..0000000000 --- a/gtk/gtkset.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright © 2019 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Authors: Matthias Clasen - */ - -#include "gtkset.h" - -/* Store a set of unsigned integers as a sorted array of ranges. - */ - -typedef struct -{ - guint first; - guint n_items; -} Range; - -struct _GtkSet -{ - GArray *ranges; -}; - -typedef struct -{ - GtkSet *set; - Range *current; - int idx; - guint pos; -} GtkRealSetIter; - -GtkSet * -gtk_set_new (void) -{ - GtkSet *set; - - set = g_new (GtkSet, 1); - set->ranges = g_array_new (FALSE, FALSE, sizeof (Range)); - - return set; -} - -GtkSet * -gtk_set_copy (GtkSet *set) -{ - GtkSet *copy; - - copy = g_new (GtkSet, 1); - copy->ranges = g_array_copy (set->ranges); - - return copy; -} - -void -gtk_set_free (GtkSet *set) -{ - g_array_free (set->ranges, TRUE); - g_free (set); -} - -gboolean -gtk_set_contains (GtkSet *set, - guint item) -{ - int i; - - for (i = 0; i < set->ranges->len; i++) - { - Range *r = &g_array_index (set->ranges, Range, i); - - if (item < r->first) - return FALSE; - - if (item < r->first + r->n_items) - return TRUE; - } - - return FALSE; -} - -void -gtk_set_remove_all (GtkSet *set) -{ - g_array_set_size (set->ranges, 0); -} - -static int -range_compare (Range *r, Range *s) -{ - int ret = 0; - - if (r->first + r->n_items < s->first) - ret = -1; - else if (s->first + s->n_items < r->first) - ret = 1; - - return ret; -} - -void -gtk_set_add_range (GtkSet *set, - guint first_item, - guint n_items) -{ - int i; - Range s; - int first = -1; - int last = -1; - - s.first = first_item; - s.n_items = n_items; - - for (i = 0; i < set->ranges->len; i++) - { - Range *r = &g_array_index (set->ranges, Range, i); - int cmp = range_compare (&s, r); - - if (cmp < 0) - break; - - if (cmp == 0) - { - if (first < 0) - first = i; - last = i; - } - } - - if (first > -1) - { - Range *r; - guint start; - guint end; - - r = &g_array_index (set->ranges, Range, first); - start = MIN (s.first, r->first); - - r = &g_array_index (set->ranges, Range, last); - end = MAX (s.first + s.n_items - 1, r->first + r->n_items - 1); - - s.first = start; - s.n_items = end - start + 1; - - g_array_remove_range (set->ranges, first, last - first + 1); - g_array_insert_val (set->ranges, first, s); - } - else - g_array_insert_val (set->ranges, i, s); -} - -void -gtk_set_remove_range (GtkSet *set, - guint first_item, - guint n_items) -{ - Range s; - int i; - int first = -1; - int last = -1; - - s.first = first_item; - s.n_items = n_items; - - for (i = 0; i < set->ranges->len; i++) - { - Range *r = &g_array_index (set->ranges, Range, i); - int cmp = range_compare (&s, r); - - if (cmp < 0) - break; - - if (cmp == 0) - { - if (first < 0) - first = i; - last = i; - } - } - - if (first > -1) - { - Range *r; - Range a[2]; - int k = 0; - - r = &g_array_index (set->ranges, Range, first); - if (r->first < s.first) - { - a[k].first = r->first; - a[k].n_items = s.first - r->first; - k++; - } - r = &g_array_index (set->ranges, Range, last); - if (r->first + r->n_items > s.first + s.n_items) - { - a[k].first = s.first + s.n_items; - a[k].n_items = r->first + r->n_items - a[k].first; - k++; - } - g_array_remove_range (set->ranges, first, last - first + 1); - if (k > 0) - g_array_insert_vals (set->ranges, first, a, k); - } -} - -void -gtk_set_find_range (GtkSet *set, - guint position, - guint upper_bound, - guint *start, - guint *n_items, - gboolean *contained) -{ - int i; - int last = 0; - - if (position >= upper_bound) - { - *start = 0; - *n_items = 0; - *contained = FALSE; - return; - } - - for (i = 0; i < set->ranges->len; i++) - { - Range *r = &g_array_index (set->ranges, Range, i); - - if (position < r->first) - { - *start = last; - *n_items = r->first - last; - *contained = FALSE; - - return; - } - else if (r->first <= position && position < r->first + r->n_items) - { - *start = r->first; - *n_items = r->n_items; - *contained = TRUE; - - return; - } - else - last = r->first + r->n_items; - } - - *start = last; - *n_items = upper_bound - last; - *contained = FALSE; -} - -void -gtk_set_add_item (GtkSet *set, - guint item) -{ - gtk_set_add_range (set, item, 1); -} - -void -gtk_set_remove_item (GtkSet *set, - guint item) -{ - gtk_set_remove_range (set, item, 1); -} - -/* This is peculiar operation: Replace every number n >= first by n + shift - * This is only supported for negative shifts if the shifting does not cause - * any ranges to overlap. - */ -void -gtk_set_shift (GtkSet *set, - guint first, - int shift) -{ - int i; - - for (i = 0; i < set->ranges->len; i++) - { - Range *r = &g_array_index (set->ranges, Range, i); - if (r->first >= first) - r->first += shift; - } -} - -void -gtk_set_iter_init (GtkSetIter *iter, - GtkSet *set) -{ - GtkRealSetIter *ri = (GtkRealSetIter *)iter; - - ri->set = set; - ri->idx = -1; - ri->current = 0; -} - -gboolean -gtk_set_iter_next (GtkSetIter *iter, - guint *item) -{ - GtkRealSetIter *ri = (GtkRealSetIter *)iter; - - if (ri->idx == -1) - { -next_range: - ri->idx++; - - if (ri->idx == ri->set->ranges->len) - return FALSE; - - ri->current = &g_array_index (ri->set->ranges, Range, ri->idx); - ri->pos = ri->current->first; - } - else - { - ri->pos++; - if (ri->pos == ri->current->first + ri->current->n_items) - goto next_range; - } - - *item = ri->pos; - return TRUE; -} - -gboolean -gtk_set_is_empty (GtkSet *set) -{ - return set->ranges->len == 0; -} - -guint -gtk_set_get_min (GtkSet *set) -{ - Range *r; - - if (gtk_set_is_empty (set)) - return 0; - - r = &g_array_index (set->ranges, Range, 0); - - return r->first; -} - -guint -gtk_set_get_max (GtkSet *set) -{ - Range *r; - - if (gtk_set_is_empty (set)) - return 0; - - r = &g_array_index (set->ranges, Range, set->ranges->len - 1); - - return r->first + r->n_items - 1; -} - -#if 0 -void -gtk_set_dump (GtkSet *set) -{ - int i; - - for (i = 0; i < set->ranges->len; i++) - { - Range *r = &g_array_index (set->ranges, Range, i); - g_print (" %u:%u", r->first, r->n_items); - } - g_print ("\n"); -} -#endif diff --git a/gtk/gtkset.h b/gtk/gtkset.h deleted file mode 100644 index 1ab6c6d1a2..0000000000 --- a/gtk/gtkset.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright © 2019 Red Hat, Inc. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library. If not, see . - * - * Authors: Matthias Clasen - */ - -#ifndef __GTK_SET_H__ -#define __GTK_SET_H__ - -#include - -typedef struct _GtkSet GtkSet; -typedef struct _GtkSetIter GtkSetIter; - -struct _GtkSetIter -{ - gpointer dummy1; - gpointer dummy2; - int dummy3; - int dummy4; -}; - -GtkSet *gtk_set_new (void); -void gtk_set_free (GtkSet *set); -GtkSet *gtk_set_copy (GtkSet *set); - -gboolean gtk_set_contains (GtkSet *set, - guint item); - -void gtk_set_remove_all (GtkSet *set); -void gtk_set_add_item (GtkSet *set, - guint item); -void gtk_set_remove_item (GtkSet *set, - guint item); -void gtk_set_add_range (GtkSet *set, - guint first, - guint n); -void gtk_set_remove_range (GtkSet *set, - guint first, - guint n); -void gtk_set_find_range (GtkSet *set, - guint position, - guint upper_bound, - guint *start, - guint *n_items, - gboolean *contained); - -void gtk_set_shift (GtkSet *set, - guint first, - int shift); - -void gtk_set_iter_init (GtkSetIter *iter, - GtkSet *set); -gboolean gtk_set_iter_next (GtkSetIter *iter, - guint *item); - -gboolean gtk_set_is_empty (GtkSet *set); -guint gtk_set_get_min (GtkSet *set); -guint gtk_set_get_max (GtkSet *set); - -#endif /* __GTK_SET_H__ */ diff --git a/gtk/gtksingleselection.c b/gtk/gtksingleselection.c index 5d0172c7b1..bd35aed782 100644 --- a/gtk/gtksingleselection.c +++ b/gtk/gtksingleselection.c @@ -21,6 +21,7 @@ #include "gtksingleselection.h" +#include "gtkbitset.h" #include "gtkintl.h" #include "gtkselectionmodel.h" @@ -110,6 +111,21 @@ gtk_single_selection_is_selected (GtkSelectionModel *model, return self->selected == position; } +static GtkBitset * +gtk_single_selection_get_selection_in_range (GtkSelectionModel *model, + guint position, + guint n_items) +{ + GtkSingleSelection *self = GTK_SINGLE_SELECTION (model); + GtkBitset *result; + + result = gtk_bitset_new_empty (); + if (self->selected != GTK_INVALID_LIST_POSITION) + gtk_bitset_add (result, self->selected); + + return result; +} + static gboolean gtk_single_selection_select_item (GtkSelectionModel *model, guint position, @@ -138,57 +154,13 @@ gtk_single_selection_unselect_item (GtkSelectionModel *model, return TRUE; } -static void -gtk_single_selection_query_range (GtkSelectionModel *model, - guint position, - guint *start_range, - guint *n_range, - gboolean *selected) -{ - GtkSingleSelection *self = GTK_SINGLE_SELECTION (model); - guint n_items; - - n_items = g_list_model_get_n_items (self->model); - - if (position >= n_items) - { - *start_range = position; - *n_range = 0; - *selected = FALSE; - } - else if (self->selected == GTK_INVALID_LIST_POSITION) - { - *start_range = 0; - *n_range = n_items; - *selected = FALSE; - } - else if (position < self->selected) - { - *start_range = 0; - *n_range = self->selected; - *selected = FALSE; - } - else if (position > self->selected) - { - *start_range = self->selected + 1; - *n_range = n_items - *start_range; - *selected = FALSE; - } - else - { - *start_range = self->selected; - *n_range = 1; - *selected = TRUE; - } -} - static void gtk_single_selection_selection_model_init (GtkSelectionModelInterface *iface) { iface->is_selected = gtk_single_selection_is_selected; + iface->get_selection_in_range = gtk_single_selection_get_selection_in_range; iface->select_item = gtk_single_selection_select_item; iface->unselect_item = gtk_single_selection_unselect_item; - iface->query_range = gtk_single_selection_query_range; } G_DEFINE_TYPE_EXTENDED (GtkSingleSelection, gtk_single_selection, G_TYPE_OBJECT, 0, diff --git a/gtk/gtktypes.h b/gtk/gtktypes.h index f8e68a0e07..9c3f4fe2ed 100644 --- a/gtk/gtktypes.h +++ b/gtk/gtktypes.h @@ -34,6 +34,7 @@ G_BEGIN_DECLS typedef struct _GtkAdjustment GtkAdjustment; +typedef struct _GtkBitset GtkBitset; typedef struct _GtkBuilder GtkBuilder; typedef struct _GtkBuilderScope GtkBuilderScope; typedef struct _GtkClipboard GtkClipboard; diff --git a/gtk/meson.build b/gtk/meson.build index 248e605a50..5a7ca5715f 100644 --- a/gtk/meson.build +++ b/gtk/meson.build @@ -134,7 +134,6 @@ gtk_private_sources = files([ 'gtkscaler.c', 'gtksearchengine.c', 'gtksearchenginemodel.c', - 'gtkset.c', 'gtksizerequestcache.c', 'gtkstyleanimation.c', 'gtkstylecascade.c', @@ -164,6 +163,7 @@ gtk_public_sources = files([ 'gtkaspectframe.c', 'gtkassistant.c', 'gtkbinlayout.c', + 'gtkbitset.c', 'gtkbookmarklist.c', 'gtkborder.c', 'gtkboxlayout.c', @@ -329,7 +329,6 @@ gtk_public_sources = files([ 'gtkprintsettings.c', 'gtkprogressbar.c', 'gtkpropertylookuplistmodel.c', - 'gtkpropertyselection.c', 'gtkradiobutton.c', 'gtkrange.c', 'gtktreerbtree.c', @@ -451,6 +450,7 @@ gtk_public_headers = files([ 'gtkaspectframe.h', 'gtkassistant.h', 'gtkbinlayout.h', + 'gtkbitset.h', 'gtkbookmarklist.h', 'gtkborder.h', 'gtkbox.h', @@ -606,7 +606,6 @@ gtk_public_headers = files([ 'gtkprintoperationpreview.h', 'gtkprintsettings.h', 'gtkprogressbar.h', - 'gtkpropertyselection.h', 'gtkradiobutton.h', 'gtkrange.h', 'gtkrecentmanager.h', diff --git a/gtk/roaring.c b/gtk/roaring.c new file mode 100644 index 0000000000..779a47c1d6 --- /dev/null +++ b/gtk/roaring.c @@ -0,0 +1,11453 @@ +/* auto-generated on Fri 26 Jun 2020 06:06:51 AM CEST. Do not edit! */ +#include "roaring.h" + +/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ +#ifdef DMALLOC +#include "dmalloc.h" +#endif + +/* begin file src/array_util.c */ +#include +#include +#include +#include +#include +#include + + +#ifdef USESSE4 +// used by intersect_vector16 +ALIGNED(0x1000) +static const uint8_t shuffle_mask16[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, + 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, + 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, + 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, + 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, + 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, + 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, + 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, + 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, + 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, + 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, + 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, + 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, + 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, + 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, + 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, + 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, + 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, + 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, + 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, + 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, + 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, + 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, + 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, + 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, + 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, + 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, + 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, + 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, + 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, + 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, + 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13, + 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, + 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, + 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, + 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15}; + +/** + * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions + * Optimized by D. Lemire on May 3rd 2013 + */ +int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, + const uint16_t *__restrict__ B, size_t s_b, + uint16_t *C) { + size_t count = 0; + size_t i_a = 0, i_b = 0; + const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); + const size_t st_a = (s_a / vectorlength) * vectorlength; + const size_t st_b = (s_b / vectorlength) * vectorlength; + __m128i v_a, v_b; + if ((i_a < st_a) && (i_b < st_b)) { + v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); + v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); + while ((A[i_a] == 0) || (B[i_b] == 0)) { + const __m128i res_v = _mm_cmpestrm( + v_b, vectorlength, v_a, vectorlength, + _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); + const int r = _mm_extract_epi32(res_v, 0); + __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r); + __m128i p = _mm_shuffle_epi8(v_a, sm16); + _mm_storeu_si128((__m128i *)&C[count], p); // can overflow + count += _mm_popcnt_u32(r); + const uint16_t a_max = A[i_a + vectorlength - 1]; + const uint16_t b_max = B[i_b + vectorlength - 1]; + if (a_max <= b_max) { + i_a += vectorlength; + if (i_a == st_a) break; + v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); + } + if (b_max <= a_max) { + i_b += vectorlength; + if (i_b == st_b) break; + v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); + } + } + if ((i_a < st_a) && (i_b < st_b)) + while (true) { + const __m128i res_v = _mm_cmpistrm( + v_b, v_a, + _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); + const int r = _mm_extract_epi32(res_v, 0); + __m128i sm16 = + _mm_load_si128((const __m128i *)shuffle_mask16 + r); + __m128i p = _mm_shuffle_epi8(v_a, sm16); + _mm_storeu_si128((__m128i *)&C[count], p); // can overflow + count += _mm_popcnt_u32(r); + const uint16_t a_max = A[i_a + vectorlength - 1]; + const uint16_t b_max = B[i_b + vectorlength - 1]; + if (a_max <= b_max) { + i_a += vectorlength; + if (i_a == st_a) break; + v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); + } + if (b_max <= a_max) { + i_b += vectorlength; + if (i_b == st_b) break; + v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); + } + } + } + // intersect the tail using scalar intersection + while (i_a < s_a && i_b < s_b) { + uint16_t a = A[i_a]; + uint16_t b = B[i_b]; + if (a < b) { + i_a++; + } else if (b < a) { + i_b++; + } else { + C[count] = a; //==b; + count++; + i_a++; + i_b++; + } + } + return (int32_t)count; +} + +int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, + size_t s_a, + const uint16_t *__restrict__ B, + size_t s_b) { + size_t count = 0; + size_t i_a = 0, i_b = 0; + const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); + const size_t st_a = (s_a / vectorlength) * vectorlength; + const size_t st_b = (s_b / vectorlength) * vectorlength; + __m128i v_a, v_b; + if ((i_a < st_a) && (i_b < st_b)) { + v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); + v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); + while ((A[i_a] == 0) || (B[i_b] == 0)) { + const __m128i res_v = _mm_cmpestrm( + v_b, vectorlength, v_a, vectorlength, + _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); + const int r = _mm_extract_epi32(res_v, 0); + count += _mm_popcnt_u32(r); + const uint16_t a_max = A[i_a + vectorlength - 1]; + const uint16_t b_max = B[i_b + vectorlength - 1]; + if (a_max <= b_max) { + i_a += vectorlength; + if (i_a == st_a) break; + v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); + } + if (b_max <= a_max) { + i_b += vectorlength; + if (i_b == st_b) break; + v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); + } + } + if ((i_a < st_a) && (i_b < st_b)) + while (true) { + const __m128i res_v = _mm_cmpistrm( + v_b, v_a, + _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); + const int r = _mm_extract_epi32(res_v, 0); + count += _mm_popcnt_u32(r); + const uint16_t a_max = A[i_a + vectorlength - 1]; + const uint16_t b_max = B[i_b + vectorlength - 1]; + if (a_max <= b_max) { + i_a += vectorlength; + if (i_a == st_a) break; + v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); + } + if (b_max <= a_max) { + i_b += vectorlength; + if (i_b == st_b) break; + v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); + } + } + } + // intersect the tail using scalar intersection + while (i_a < s_a && i_b < s_b) { + uint16_t a = A[i_a]; + uint16_t b = B[i_b]; + if (a < b) { + i_a++; + } else if (b < a) { + i_b++; + } else { + count++; + i_a++; + i_b++; + } + } + return (int32_t)count; +} + +///////// +// Warning: +// This function may not be safe if A == C or B == C. +///////// +int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, + const uint16_t *__restrict__ B, size_t s_b, + uint16_t *C) { + // we handle the degenerate case + if (s_a == 0) return 0; + if (s_b == 0) { + if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a); + return (int32_t)s_a; + } + // handle the leading zeroes, it is messy but it allows us to use the fast + // _mm_cmpistrm instrinsic safely + int32_t count = 0; + if ((A[0] == 0) || (B[0] == 0)) { + if ((A[0] == 0) && (B[0] == 0)) { + A++; + s_a--; + B++; + s_b--; + } else if (A[0] == 0) { + C[count++] = 0; + A++; + s_a--; + } else { + B++; + s_b--; + } + } + // at this point, we have two non-empty arrays, made of non-zero + // increasing values. + size_t i_a = 0, i_b = 0; + const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t); + const size_t st_a = (s_a / vectorlength) * vectorlength; + const size_t st_b = (s_b / vectorlength) * vectorlength; + if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path + __m128i v_a, v_b; //, v_bmax; + // we load a vector from A and a vector from B + v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); + v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); + // we have a runningmask which indicates which values from A have been + // spotted in B, these don't get written out. + __m128i runningmask_a_found_in_b = _mm_setzero_si128(); + /**** + * start of the main vectorized loop + *****/ + while (true) { + // afoundinb will contain a mask indicate for each entry in A + // whether it is seen + // in B + const __m128i a_found_in_b = + _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | + _SIDD_BIT_MASK); + runningmask_a_found_in_b = + _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); + // we always compare the last values of A and B + const uint16_t a_max = A[i_a + vectorlength - 1]; + const uint16_t b_max = B[i_b + vectorlength - 1]; + if (a_max <= b_max) { + // Ok. In this code path, we are ready to write our v_a + // because there is no need to read more from B, they will + // all be large values. + const int bitmask_belongs_to_difference = + _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; + /*** next few lines are probably expensive *****/ + __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + + bitmask_belongs_to_difference); + __m128i p = _mm_shuffle_epi8(v_a, sm16); + _mm_storeu_si128((__m128i *)&C[count], p); // can overflow + count += _mm_popcnt_u32(bitmask_belongs_to_difference); + // we advance a + i_a += vectorlength; + if (i_a == st_a) // no more + break; + runningmask_a_found_in_b = _mm_setzero_si128(); + v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); + } + if (b_max <= a_max) { + // in this code path, the current v_b has become useless + i_b += vectorlength; + if (i_b == st_b) break; + v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); + } + } + // at this point, either we have i_a == st_a, which is the end of the + // vectorized processing, + // or we have i_b == st_b, and we are not done processing the vector... + // so we need to finish it off. + if (i_a < st_a) { // we have unfinished business... + uint16_t buffer[8]; // buffer to do a masked load + memset(buffer, 0, 8 * sizeof(uint16_t)); + memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t)); + v_b = _mm_lddqu_si128((__m128i *)buffer); + const __m128i a_found_in_b = + _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | + _SIDD_BIT_MASK); + runningmask_a_found_in_b = + _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); + const int bitmask_belongs_to_difference = + _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; + __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + + bitmask_belongs_to_difference); + __m128i p = _mm_shuffle_epi8(v_a, sm16); + _mm_storeu_si128((__m128i *)&C[count], p); // can overflow + count += _mm_popcnt_u32(bitmask_belongs_to_difference); + i_a += vectorlength; + } + // at this point we should have i_a == st_a and i_b == st_b + } + // do the tail using scalar code + while (i_a < s_a && i_b < s_b) { + uint16_t a = A[i_a]; + uint16_t b = B[i_b]; + if (b < a) { + i_b++; + } else if (a < b) { + C[count] = a; + count++; + i_a++; + } else { //== + i_a++; + i_b++; + } + } + if (i_a < s_a) { + if(C == A) { + assert((size_t)count <= i_a); + if((size_t)count < i_a) { + memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a)); + } + } else { + for(size_t i = 0; i < (s_a - i_a); i++) { + C[count + i] = A[i + i_a]; + } + } + count += (int32_t)(s_a - i_a); + } + return count; +} + +#endif // USESSE4 + + + +#ifdef USE_OLD_SKEW_INTERSECT +// TODO: given enough experience with the new skew intersect, drop the old one from the code base. + + +/* Computes the intersection between one small and one large set of uint16_t. + * Stores the result into buffer and return the number of elements. */ +int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, + const uint16_t *large, size_t size_l, + uint16_t *buffer) { + size_t pos = 0, idx_l = 0, idx_s = 0; + + if (0 == size_s) { + return 0; + } + + uint16_t val_l = large[idx_l], val_s = small[idx_s]; + + while (true) { + if (val_l < val_s) { + idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); + if (idx_l == size_l) break; + val_l = large[idx_l]; + } else if (val_s < val_l) { + idx_s++; + if (idx_s == size_s) break; + val_s = small[idx_s]; + } else { + buffer[pos++] = val_s; + idx_s++; + if (idx_s == size_s) break; + val_s = small[idx_s]; + idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); + if (idx_l == size_l) break; + val_l = large[idx_l]; + } + } + + return (int32_t)pos; +} +#else // USE_OLD_SKEW_INTERSECT + + +/** +* Branchless binary search going after 4 values at once. +* Assumes that array is sorted. +* You have that array[*index1] >= target1, array[*index12] >= target2, ... +* except when *index1 = n, in which case you know that all values in array are +* smaller than target1, and so forth. +* It has logarithmic complexity. +*/ +static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1, + uint16_t target2, uint16_t target3, uint16_t target4, + int32_t *index1, int32_t *index2, int32_t *index3, + int32_t *index4) { + const uint16_t *base1 = array; + const uint16_t *base2 = array; + const uint16_t *base3 = array; + const uint16_t *base4 = array; + if (n == 0) + return; + while (n > 1) { + int32_t half = n >> 1; + base1 = (base1[half] < target1) ? &base1[half] : base1; + base2 = (base2[half] < target2) ? &base2[half] : base2; + base3 = (base3[half] < target3) ? &base3[half] : base3; + base4 = (base4[half] < target4) ? &base4[half] : base4; + n -= half; + } + *index1 = (int32_t)((*base1 < target1) + base1 - array); + *index2 = (int32_t)((*base2 < target2) + base2 - array); + *index3 = (int32_t)((*base3 < target3) + base3 - array); + *index4 = (int32_t)((*base4 < target4) + base4 - array); +} + +/** +* Branchless binary search going after 2 values at once. +* Assumes that array is sorted. +* You have that array[*index1] >= target1, array[*index12] >= target2. +* except when *index1 = n, in which case you know that all values in array are +* smaller than target1, and so forth. +* It has logarithmic complexity. +*/ +static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1, + uint16_t target2, int32_t *index1, int32_t *index2) { + const uint16_t *base1 = array; + const uint16_t *base2 = array; + if (n == 0) + return; + while (n > 1) { + int32_t half = n >> 1; + base1 = (base1[half] < target1) ? &base1[half] : base1; + base2 = (base2[half] < target2) ? &base2[half] : base2; + n -= half; + } + *index1 = (int32_t)((*base1 < target1) + base1 - array); + *index2 = (int32_t)((*base2 < target2) + base2 - array); +} + +/* Computes the intersection between one small and one large set of uint16_t. + * Stores the result into buffer and return the number of elements. + * Processes the small set in blocks of 4 values calling binarySearch4 + * and binarySearch2. This approach can be slightly superior to a conventional + * galloping search in some instances. + */ +int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, + const uint16_t *large, size_t size_l, + uint16_t *buffer) { + size_t pos = 0, idx_l = 0, idx_s = 0; + + if (0 == size_s) { + return 0; + } + int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0; + while ((idx_s + 4 <= size_s) && (idx_l < size_l)) { + uint16_t target1 = small[idx_s]; + uint16_t target2 = small[idx_s + 1]; + uint16_t target3 = small[idx_s + 2]; + uint16_t target4 = small[idx_s + 3]; + binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3, + target4, &index1, &index2, &index3, &index4); + if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { + buffer[pos++] = target1; + } + if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { + buffer[pos++] = target2; + } + if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) { + buffer[pos++] = target3; + } + if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) { + buffer[pos++] = target4; + } + idx_s += 4; + idx_l += index4; + } + if ((idx_s + 2 <= size_s) && (idx_l < size_l)) { + uint16_t target1 = small[idx_s]; + uint16_t target2 = small[idx_s + 1]; + binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1, + &index2); + if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { + buffer[pos++] = target1; + } + if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { + buffer[pos++] = target2; + } + idx_s += 2; + idx_l += index2; + } + if ((idx_s < size_s) && (idx_l < size_l)) { + uint16_t val_s = small[idx_s]; + int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s); + if (index >= 0) + buffer[pos++] = val_s; + } + return (int32_t)pos; +} + + +#endif //USE_OLD_SKEW_INTERSECT + + +// TODO: this could be accelerated, possibly, by using binarySearch4 as above. +int32_t intersect_skewed_uint16_cardinality(const uint16_t *small, + size_t size_s, + const uint16_t *large, + size_t size_l) { + size_t pos = 0, idx_l = 0, idx_s = 0; + + if (0 == size_s) { + return 0; + } + + uint16_t val_l = large[idx_l], val_s = small[idx_s]; + + while (true) { + if (val_l < val_s) { + idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); + if (idx_l == size_l) break; + val_l = large[idx_l]; + } else if (val_s < val_l) { + idx_s++; + if (idx_s == size_s) break; + val_s = small[idx_s]; + } else { + pos++; + idx_s++; + if (idx_s == size_s) break; + val_s = small[idx_s]; + idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); + if (idx_l == size_l) break; + val_l = large[idx_l]; + } + } + + return (int32_t)pos; +} + +bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s, + const uint16_t *large, size_t size_l) { + size_t idx_l = 0, idx_s = 0; + + if (0 == size_s) { + return false; + } + + uint16_t val_l = large[idx_l], val_s = small[idx_s]; + + while (true) { + if (val_l < val_s) { + idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); + if (idx_l == size_l) break; + val_l = large[idx_l]; + } else if (val_s < val_l) { + idx_s++; + if (idx_s == size_s) break; + val_s = small[idx_s]; + } else { + return true; + } + } + + return false; +} + +/** + * Generic intersection function. + */ +int32_t intersect_uint16(const uint16_t *A, const size_t lenA, + const uint16_t *B, const size_t lenB, uint16_t *out) { + const uint16_t *initout = out; + if (lenA == 0 || lenB == 0) return 0; + const uint16_t *endA = A + lenA; + const uint16_t *endB = B + lenB; + + while (1) { + while (*A < *B) { + SKIP_FIRST_COMPARE: + if (++A == endA) return (int32_t)(out - initout); + } + while (*A > *B) { + if (++B == endB) return (int32_t)(out - initout); + } + if (*A == *B) { + *out++ = *A; + if (++A == endA || ++B == endB) return (int32_t)(out - initout); + } else { + goto SKIP_FIRST_COMPARE; + } + } + return (int32_t)(out - initout); // NOTREACHED +} + +int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, + const uint16_t *B, const size_t lenB) { + int32_t answer = 0; + if (lenA == 0 || lenB == 0) return 0; + const uint16_t *endA = A + lenA; + const uint16_t *endB = B + lenB; + + while (1) { + while (*A < *B) { + SKIP_FIRST_COMPARE: + if (++A == endA) return answer; + } + while (*A > *B) { + if (++B == endB) return answer; + } + if (*A == *B) { + ++answer; + if (++A == endA || ++B == endB) return answer; + } else { + goto SKIP_FIRST_COMPARE; + } + } + return answer; // NOTREACHED +} + + +bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, + const uint16_t *B, const size_t lenB) { + if (lenA == 0 || lenB == 0) return 0; + const uint16_t *endA = A + lenA; + const uint16_t *endB = B + lenB; + + while (1) { + while (*A < *B) { + SKIP_FIRST_COMPARE: + if (++A == endA) return false; + } + while (*A > *B) { + if (++B == endB) return false; + } + if (*A == *B) { + return true; + } else { + goto SKIP_FIRST_COMPARE; + } + } + return false; // NOTREACHED +} + + + +/** + * Generic intersection function. + */ +size_t intersection_uint32(const uint32_t *A, const size_t lenA, + const uint32_t *B, const size_t lenB, + uint32_t *out) { + const uint32_t *initout = out; + if (lenA == 0 || lenB == 0) return 0; + const uint32_t *endA = A + lenA; + const uint32_t *endB = B + lenB; + + while (1) { + while (*A < *B) { + SKIP_FIRST_COMPARE: + if (++A == endA) return (out - initout); + } + while (*A > *B) { + if (++B == endB) return (out - initout); + } + if (*A == *B) { + *out++ = *A; + if (++A == endA || ++B == endB) return (out - initout); + } else { + goto SKIP_FIRST_COMPARE; + } + } + return (out - initout); // NOTREACHED +} + +size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, + const uint32_t *B, const size_t lenB) { + if (lenA == 0 || lenB == 0) return 0; + size_t card = 0; + const uint32_t *endA = A + lenA; + const uint32_t *endB = B + lenB; + + while (1) { + while (*A < *B) { + SKIP_FIRST_COMPARE: + if (++A == endA) return card; + } + while (*A > *B) { + if (++B == endB) return card; + } + if (*A == *B) { + card++; + if (++A == endA || ++B == endB) return card; + } else { + goto SKIP_FIRST_COMPARE; + } + } + return card; // NOTREACHED +} + +// can one vectorize the computation of the union? (Update: Yes! See +// union_vector16). + +size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, + size_t size_2, uint16_t *buffer) { + size_t pos = 0, idx_1 = 0, idx_2 = 0; + + if (0 == size_2) { + memmove(buffer, set_1, size_1 * sizeof(uint16_t)); + return size_1; + } + if (0 == size_1) { + memmove(buffer, set_2, size_2 * sizeof(uint16_t)); + return size_2; + } + + uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; + + while (true) { + if (val_1 < val_2) { + buffer[pos++] = val_1; + ++idx_1; + if (idx_1 >= size_1) break; + val_1 = set_1[idx_1]; + } else if (val_2 < val_1) { + buffer[pos++] = val_2; + ++idx_2; + if (idx_2 >= size_2) break; + val_2 = set_2[idx_2]; + } else { + buffer[pos++] = val_1; + ++idx_1; + ++idx_2; + if (idx_1 >= size_1 || idx_2 >= size_2) break; + val_1 = set_1[idx_1]; + val_2 = set_2[idx_2]; + } + } + + if (idx_1 < size_1) { + const size_t n_elems = size_1 - idx_1; + memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint16_t)); + pos += n_elems; + } else if (idx_2 < size_2) { + const size_t n_elems = size_2 - idx_2; + memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint16_t)); + pos += n_elems; + } + + return pos; +} + +int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, + int length2, uint16_t *a_out) { + int out_card = 0; + int k1 = 0, k2 = 0; + if (length1 == 0) return 0; + if (length2 == 0) { + if (a1 != a_out) memcpy(a_out, a1, sizeof(uint16_t) * length1); + return length1; + } + uint16_t s1 = a1[k1]; + uint16_t s2 = a2[k2]; + while (true) { + if (s1 < s2) { + a_out[out_card++] = s1; + ++k1; + if (k1 >= length1) { + break; + } + s1 = a1[k1]; + } else if (s1 == s2) { + ++k1; + ++k2; + if (k1 >= length1) { + break; + } + if (k2 >= length2) { + memmove(a_out + out_card, a1 + k1, + sizeof(uint16_t) * (length1 - k1)); + return out_card + length1 - k1; + } + s1 = a1[k1]; + s2 = a2[k2]; + } else { // if (val1>val2) + ++k2; + if (k2 >= length2) { + memmove(a_out + out_card, a1 + k1, + sizeof(uint16_t) * (length1 - k1)); + return out_card + length1 - k1; + } + s2 = a2[k2]; + } + } + return out_card; +} + +int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, + const uint16_t *array_2, int32_t card_2, uint16_t *out) { + int32_t pos1 = 0, pos2 = 0, pos_out = 0; + while (pos1 < card_1 && pos2 < card_2) { + const uint16_t v1 = array_1[pos1]; + const uint16_t v2 = array_2[pos2]; + if (v1 == v2) { + ++pos1; + ++pos2; + continue; + } + if (v1 < v2) { + out[pos_out++] = v1; + ++pos1; + } else { + out[pos_out++] = v2; + ++pos2; + } + } + if (pos1 < card_1) { + const size_t n_elems = card_1 - pos1; + memcpy(out + pos_out, array_1 + pos1, n_elems * sizeof(uint16_t)); + pos_out += (int32_t)n_elems; + } else if (pos2 < card_2) { + const size_t n_elems = card_2 - pos2; + memcpy(out + pos_out, array_2 + pos2, n_elems * sizeof(uint16_t)); + pos_out += (int32_t)n_elems; + } + return pos_out; +} + +#ifdef USESSE4 + +/*** + * start of the SIMD 16-bit union code + * + */ + +// Assuming that vInput1 and vInput2 are sorted, produces a sorted output going +// from vecMin all the way to vecMax +// developed originally for merge sort using SIMD instructions. +// Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly +// Algorithm for Sorting an Array of Structures +static inline void sse_merge(const __m128i *vInput1, + const __m128i *vInput2, // input 1 & 2 + __m128i *vecMin, __m128i *vecMax) { // output + __m128i vecTmp; + vecTmp = _mm_min_epu16(*vInput1, *vInput2); + *vecMax = _mm_max_epu16(*vInput1, *vInput2); + vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2); + *vecMin = _mm_min_epu16(vecTmp, *vecMax); + *vecMax = _mm_max_epu16(vecTmp, *vecMax); + vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); + *vecMin = _mm_min_epu16(vecTmp, *vecMax); + *vecMax = _mm_max_epu16(vecTmp, *vecMax); + vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); + *vecMin = _mm_min_epu16(vecTmp, *vecMax); + *vecMax = _mm_max_epu16(vecTmp, *vecMax); + vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); + *vecMin = _mm_min_epu16(vecTmp, *vecMax); + *vecMax = _mm_max_epu16(vecTmp, *vecMax); + vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); + *vecMin = _mm_min_epu16(vecTmp, *vecMax); + *vecMax = _mm_max_epu16(vecTmp, *vecMax); + vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); + *vecMin = _mm_min_epu16(vecTmp, *vecMax); + *vecMax = _mm_max_epu16(vecTmp, *vecMax); + vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); + *vecMin = _mm_min_epu16(vecTmp, *vecMax); + *vecMax = _mm_max_epu16(vecTmp, *vecMax); + *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2); +} + +// used by store_unique, generated by simdunion.py +static uint8_t uniqshuf[] = { + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, + 0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, + 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, + 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, + 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, + 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, + 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, + 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, + 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, + 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, + 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, + 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, + 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, + 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, + 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, + 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, + 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, + 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, + 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, + 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, + 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, + 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, + 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, + 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, + 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, + 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, + 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, + 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, + 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, + 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, + 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, + 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, + 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, + 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, + 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, + 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, + 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, + 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, + 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, + 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF}; + +// write vector new, while omitting repeated values assuming that previously +// written vector was "old" +static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) { + __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2); + // lots of high latency instructions follow (optimize?) + int M = _mm_movemask_epi8( + _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128())); + int numberofnewvalues = 8 - _mm_popcnt_u32(M); + __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); + __m128i val = _mm_shuffle_epi8(newval, key); + _mm_storeu_si128((__m128i *)output, val); + return numberofnewvalues; +} + +// working in-place, this function overwrites the repeated values +// could be avoided? +static inline uint32_t unique(uint16_t *out, uint32_t len) { + uint32_t pos = 1; + for (uint32_t i = 1; i < len; ++i) { + if (out[i] != out[i - 1]) { + out[pos++] = out[i]; + } + } + return pos; +} + +// use with qsort, could be avoided +static int uint16_compare(const void *a, const void *b) { + return (*(uint16_t *)a - *(uint16_t *)b); +} + +// a one-pass SSE union algorithm +// This function may not be safe if array1 == output or array2 == output. +uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1, + const uint16_t *__restrict__ array2, uint32_t length2, + uint16_t *__restrict__ output) { + if ((length1 < 8) || (length2 < 8)) { + return (uint32_t)union_uint16(array1, length1, array2, length2, output); + } + __m128i vA, vB, V, vecMin, vecMax; + __m128i laststore; + uint16_t *initoutput = output; + uint32_t len1 = length1 / 8; + uint32_t len2 = length2 / 8; + uint32_t pos1 = 0; + uint32_t pos2 = 0; + // we start the machine + vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); + pos1++; + vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); + pos2++; + sse_merge(&vA, &vB, &vecMin, &vecMax); + laststore = _mm_set1_epi16(-1); + output += store_unique(laststore, vecMin, output); + laststore = vecMin; + if ((pos1 < len1) && (pos2 < len2)) { + uint16_t curA, curB; + curA = array1[8 * pos1]; + curB = array2[8 * pos2]; + while (true) { + if (curA <= curB) { + V = _mm_lddqu_si128((const __m128i *)array1 + pos1); + pos1++; + if (pos1 < len1) { + curA = array1[8 * pos1]; + } else { + break; + } + } else { + V = _mm_lddqu_si128((const __m128i *)array2 + pos2); + pos2++; + if (pos2 < len2) { + curB = array2[8 * pos2]; + } else { + break; + } + } + sse_merge(&V, &vecMax, &vecMin, &vecMax); + output += store_unique(laststore, vecMin, output); + laststore = vecMin; + } + sse_merge(&V, &vecMax, &vecMin, &vecMax); + output += store_unique(laststore, vecMin, output); + laststore = vecMin; + } + // we finish the rest off using a scalar algorithm + // could be improved? + // + // copy the small end on a tmp buffer + uint32_t len = (uint32_t)(output - initoutput); + uint16_t buffer[16]; + uint32_t leftoversize = store_unique(laststore, vecMax, buffer); + if (pos1 == len1) { + memcpy(buffer + leftoversize, array1 + 8 * pos1, + (length1 - 8 * len1) * sizeof(uint16_t)); + leftoversize += length1 - 8 * len1; + qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); + + leftoversize = unique(buffer, leftoversize); + len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2, + length2 - 8 * pos2, output); + } else { + memcpy(buffer + leftoversize, array2 + 8 * pos2, + (length2 - 8 * len2) * sizeof(uint16_t)); + leftoversize += length2 - 8 * len2; + qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); + leftoversize = unique(buffer, leftoversize); + len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1, + length1 - 8 * pos1, output); + } + return len; +} + +/** + * End of the SIMD 16-bit union code + * + */ + +/** + * Start of SIMD 16-bit XOR code + */ + +// write vector new, while omitting repeated values assuming that previously +// written vector was "old" +static inline int store_unique_xor(__m128i old, __m128i newval, + uint16_t *output) { + __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4); + __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2); + __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1); + __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval); + __m128i equalleftoright = _mm_or_si128(equalleft, equalright); + int M = _mm_movemask_epi8( + _mm_packs_epi16(equalleftoright, _mm_setzero_si128())); + int numberofnewvalues = 8 - _mm_popcnt_u32(M); + __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); + __m128i val = _mm_shuffle_epi8(vecTmp2, key); + _mm_storeu_si128((__m128i *)output, val); + return numberofnewvalues; +} + +// working in-place, this function overwrites the repeated values +// could be avoided? Warning: assumes len > 0 +static inline uint32_t unique_xor(uint16_t *out, uint32_t len) { + uint32_t pos = 1; + for (uint32_t i = 1; i < len; ++i) { + if (out[i] != out[i - 1]) { + out[pos++] = out[i]; + } else + pos--; // if it is identical to previous, delete it + } + return pos; +} + +// a one-pass SSE xor algorithm +uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, + const uint16_t *__restrict__ array2, uint32_t length2, + uint16_t *__restrict__ output) { + if ((length1 < 8) || (length2 < 8)) { + return xor_uint16(array1, length1, array2, length2, output); + } + __m128i vA, vB, V, vecMin, vecMax; + __m128i laststore; + uint16_t *initoutput = output; + uint32_t len1 = length1 / 8; + uint32_t len2 = length2 / 8; + uint32_t pos1 = 0; + uint32_t pos2 = 0; + // we start the machine + vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); + pos1++; + vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); + pos2++; + sse_merge(&vA, &vB, &vecMin, &vecMax); + laststore = _mm_set1_epi16(-1); + uint16_t buffer[17]; + output += store_unique_xor(laststore, vecMin, output); + + laststore = vecMin; + if ((pos1 < len1) && (pos2 < len2)) { + uint16_t curA, curB; + curA = array1[8 * pos1]; + curB = array2[8 * pos2]; + while (true) { + if (curA <= curB) { + V = _mm_lddqu_si128((const __m128i *)array1 + pos1); + pos1++; + if (pos1 < len1) { + curA = array1[8 * pos1]; + } else { + break; + } + } else { + V = _mm_lddqu_si128((const __m128i *)array2 + pos2); + pos2++; + if (pos2 < len2) { + curB = array2[8 * pos2]; + } else { + break; + } + } + sse_merge(&V, &vecMax, &vecMin, &vecMax); + // conditionally stores the last value of laststore as well as all + // but the + // last value of vecMin + output += store_unique_xor(laststore, vecMin, output); + laststore = vecMin; + } + sse_merge(&V, &vecMax, &vecMin, &vecMax); + // conditionally stores the last value of laststore as well as all but + // the + // last value of vecMin + output += store_unique_xor(laststore, vecMin, output); + laststore = vecMin; + } + uint32_t len = (uint32_t)(output - initoutput); + + // we finish the rest off using a scalar algorithm + // could be improved? + // conditionally stores the last value of laststore as well as all but the + // last value of vecMax, + // we store to "buffer" + int leftoversize = store_unique_xor(laststore, vecMax, buffer); + uint16_t vec7 = _mm_extract_epi16(vecMax, 7); + uint16_t vec6 = _mm_extract_epi16(vecMax, 6); + if (vec7 != vec6) buffer[leftoversize++] = vec7; + if (pos1 == len1) { + memcpy(buffer + leftoversize, array1 + 8 * pos1, + (length1 - 8 * len1) * sizeof(uint16_t)); + leftoversize += length1 - 8 * len1; + if (leftoversize == 0) { // trivial case + memcpy(output, array2 + 8 * pos2, + (length2 - 8 * pos2) * sizeof(uint16_t)); + len += (length2 - 8 * pos2); + } else { + qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); + leftoversize = unique_xor(buffer, leftoversize); + len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2, + length2 - 8 * pos2, output); + } + } else { + memcpy(buffer + leftoversize, array2 + 8 * pos2, + (length2 - 8 * len2) * sizeof(uint16_t)); + leftoversize += length2 - 8 * len2; + if (leftoversize == 0) { // trivial case + memcpy(output, array1 + 8 * pos1, + (length1 - 8 * pos1) * sizeof(uint16_t)); + len += (length1 - 8 * pos1); + } else { + qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); + leftoversize = unique_xor(buffer, leftoversize); + len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1, + length1 - 8 * pos1, output); + } + } + return len; +} + +/** + * End of SIMD 16-bit XOR code + */ + +#endif // USESSE4 + +size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, + size_t size_2, uint32_t *buffer) { + size_t pos = 0, idx_1 = 0, idx_2 = 0; + + if (0 == size_2) { + memmove(buffer, set_1, size_1 * sizeof(uint32_t)); + return size_1; + } + if (0 == size_1) { + memmove(buffer, set_2, size_2 * sizeof(uint32_t)); + return size_2; + } + + uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; + + while (true) { + if (val_1 < val_2) { + buffer[pos++] = val_1; + ++idx_1; + if (idx_1 >= size_1) break; + val_1 = set_1[idx_1]; + } else if (val_2 < val_1) { + buffer[pos++] = val_2; + ++idx_2; + if (idx_2 >= size_2) break; + val_2 = set_2[idx_2]; + } else { + buffer[pos++] = val_1; + ++idx_1; + ++idx_2; + if (idx_1 >= size_1 || idx_2 >= size_2) break; + val_1 = set_1[idx_1]; + val_2 = set_2[idx_2]; + } + } + + if (idx_1 < size_1) { + const size_t n_elems = size_1 - idx_1; + memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint32_t)); + pos += n_elems; + } else if (idx_2 < size_2) { + const size_t n_elems = size_2 - idx_2; + memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint32_t)); + pos += n_elems; + } + + return pos; +} + +size_t union_uint32_card(const uint32_t *set_1, size_t size_1, + const uint32_t *set_2, size_t size_2) { + size_t pos = 0, idx_1 = 0, idx_2 = 0; + + if (0 == size_2) { + return size_1; + } + if (0 == size_1) { + return size_2; + } + + uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; + + while (true) { + if (val_1 < val_2) { + ++idx_1; + ++pos; + if (idx_1 >= size_1) break; + val_1 = set_1[idx_1]; + } else if (val_2 < val_1) { + ++idx_2; + ++pos; + if (idx_2 >= size_2) break; + val_2 = set_2[idx_2]; + } else { + ++idx_1; + ++idx_2; + ++pos; + if (idx_1 >= size_1 || idx_2 >= size_2) break; + val_1 = set_1[idx_1]; + val_2 = set_2[idx_2]; + } + } + + if (idx_1 < size_1) { + const size_t n_elems = size_1 - idx_1; + pos += n_elems; + } else if (idx_2 < size_2) { + const size_t n_elems = size_2 - idx_2; + pos += n_elems; + } + return pos; +} + + + +size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, + size_t size_2, uint16_t *buffer) { +#ifdef ROARING_VECTOR_OPERATIONS_ENABLED + // compute union with smallest array first + if (size_1 < size_2) { + return union_vector16(set_1, (uint32_t)size_1, + set_2, (uint32_t)size_2, buffer); + } else { + return union_vector16(set_2, (uint32_t)size_2, + set_1, (uint32_t)size_1, buffer); + } +#else + // compute union with smallest array first + if (size_1 < size_2) { + return union_uint16( + set_1, size_1, set_2, size_2, buffer); + } else { + return union_uint16( + set_2, size_2, set_1, size_1, buffer); + } +#endif +} + +bool memequals(const void *s1, const void *s2, size_t n) { + if (n == 0) { + return true; + } +#ifdef USEAVX + const uint8_t *ptr1 = (const uint8_t *)s1; + const uint8_t *ptr2 = (const uint8_t *)s2; + const uint8_t *end1 = ptr1 + n; + const uint8_t *end8 = ptr1 + n/8*8; + const uint8_t *end32 = ptr1 + n/32*32; + + while (ptr1 < end32) { + __m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1); + __m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2); + int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); + if ((uint32_t)mask != UINT32_MAX) { + return false; + } + ptr1 += 32; + ptr2 += 32; + } + + while (ptr1 < end8) { + uint64_t v1 = *((const uint64_t*)ptr1); + uint64_t v2 = *((const uint64_t*)ptr2); + if (v1 != v2) { + return false; + } + ptr1 += 8; + ptr2 += 8; + } + + while (ptr1 < end1) { + if (*ptr1 != *ptr2) { + return false; + } + ptr1++; + ptr2++; + } + + return true; +#else + return memcmp(s1, s2, n) == 0; +#endif +} +/* end file src/array_util.c */ +/* begin file src/bitset_util.c */ +#include +#include +#include +#include +#include + + +#ifdef IS_X64 +static uint8_t lengthTable[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; +#endif + +#ifdef USEAVX +ALIGNED(32) +static uint32_t vecDecodeTable[256][8] = { + {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ + {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ + {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ + {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ + {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ + {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ + {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ + {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ + {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ + {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ + {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ + {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ + {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ + {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ + {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ + {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ + {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ + {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ + {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ + {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ + {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ + {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ + {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ + {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ + {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ + {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ + {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ + {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ + {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ + {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ + {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ + {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ + {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ + {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ + {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ + {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ + {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ + {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ + {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ + {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ + {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ + {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ + {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ + {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ + {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ + {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ + {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ + {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ + {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ + {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ + {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ + {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ + {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ + {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ + {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ + {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ + {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ + {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ + {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ + {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ + {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ + {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ + {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ + {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ + {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ + {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ + {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ + {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ + {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ + {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ + {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ + {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ + {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ + {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ + {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ + {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ + {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ + {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ + {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ + {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ + {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ + {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ + {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ + {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ + {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ + {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ + {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ + {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ + {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ + {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ + {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ + {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ + {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ + {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ + {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ + {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ + {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ + {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ + {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ + {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ + {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ + {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ + {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ + {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ + {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ + {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ + {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ + {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ + {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ + {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ + {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ + {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ + {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ + {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ + {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ + {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ + {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ + {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ + {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ + {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ + {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ + {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ + {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ + {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ + {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ + {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ + {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ + {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ + {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ + {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ + {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ + {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ + {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ + {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ + {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ + {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ + {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ + {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ + {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ + {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ + {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ + {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ + {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ + {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ + {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ + {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ + {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ + {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ + {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ + {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ + {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ + {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ + {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ + {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ + {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ + {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ + {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ + {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ + {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ + {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ + {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ + {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ + {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ + {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ + {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ + {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ + {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ + {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ + {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ + {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ + {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ + {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ + {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ + {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ + {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ + {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ + {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ + {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ + {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ + {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ + {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ + {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ + {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ + {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ + {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ + {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ + {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ + {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ + {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ + {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ + {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ + {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ + {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ + {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ + {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ + {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ + {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ + {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ + {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ + {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ + {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ + {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ + {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ + {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ + {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ + {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ + {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ + {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ + {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ + {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ + {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ + {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ + {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ + {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ + {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ + {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ + {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ + {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ + {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ + {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ + {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ + {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ + {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ + {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ + {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ + {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ + {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ + {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ + {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ + {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ + {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ + {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ + {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ + {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ + {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ + {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ + {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ + {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ + {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ + {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ + {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ + {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ + {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ + {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ + {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ + {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ + {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ + {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ + {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ + {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ + {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ + {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ + {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ + {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ + {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ + {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ +}; + +#endif // #ifdef USEAVX + +#ifdef IS_X64 +// same as vecDecodeTable but in 16 bits +ALIGNED(32) +static uint16_t vecDecodeTable_uint16[256][8] = { + {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ + {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ + {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ + {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ + {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ + {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ + {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ + {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ + {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ + {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ + {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ + {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ + {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ + {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ + {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ + {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ + {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ + {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ + {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ + {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ + {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ + {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ + {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ + {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ + {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ + {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ + {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ + {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ + {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ + {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ + {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ + {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ + {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ + {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ + {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ + {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ + {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ + {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ + {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ + {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ + {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ + {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ + {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ + {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ + {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ + {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ + {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ + {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ + {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ + {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ + {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ + {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ + {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ + {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ + {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ + {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ + {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ + {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ + {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ + {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ + {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ + {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ + {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ + {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ + {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ + {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ + {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ + {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ + {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ + {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ + {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ + {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ + {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ + {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ + {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ + {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ + {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ + {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ + {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ + {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ + {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ + {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ + {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ + {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ + {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ + {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ + {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ + {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ + {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ + {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ + {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ + {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ + {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ + {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ + {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ + {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ + {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ + {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ + {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ + {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ + {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ + {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ + {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ + {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ + {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ + {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ + {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ + {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ + {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ + {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ + {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ + {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ + {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ + {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ + {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ + {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ + {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ + {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ + {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ + {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ + {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ + {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ + {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ + {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ + {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ + {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ + {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ + {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ + {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ + {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ + {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ + {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ + {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ + {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ + {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ + {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ + {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ + {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ + {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ + {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ + {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ + {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ + {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ + {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ + {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ + {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ + {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ + {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ + {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ + {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ + {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ + {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ + {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ + {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ + {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ + {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ + {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ + {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ + {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ + {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ + {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ + {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ + {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ + {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ + {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ + {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ + {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ + {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ + {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ + {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ + {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ + {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ + {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ + {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ + {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ + {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ + {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ + {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ + {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ + {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ + {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ + {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ + {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ + {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ + {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ + {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ + {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ + {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ + {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ + {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ + {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ + {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ + {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ + {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ + {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ + {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ + {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ + {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ + {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ + {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ + {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ + {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ + {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ + {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ + {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ + {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ + {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ + {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ + {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ + {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ + {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ + {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ + {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ + {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ + {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ + {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ + {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ + {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ + {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ + {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ + {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ + {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ + {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ + {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ + {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ + {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ + {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ + {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ + {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ + {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ + {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ + {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ + {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ + {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ + {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ + {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ + {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ + {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ + {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ + {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ + {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ + {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ + {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ + {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ + {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ + {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ + {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ + {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ + {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ + {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ + {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ + {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ + {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ + {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ + {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ + {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ +}; + +#endif + +#ifdef USEAVX + +size_t bitset_extract_setbits_avx2(uint64_t *array, size_t length, void *vout, + size_t outcapacity, uint32_t base) { + uint32_t *out = (uint32_t *)vout; + uint32_t *initout = out; + __m256i baseVec = _mm256_set1_epi32(base - 1); + __m256i incVec = _mm256_set1_epi32(64); + __m256i add8 = _mm256_set1_epi32(8); + uint32_t *safeout = out + outcapacity; + size_t i = 0; + for (; (i < length) && (out + 64 <= safeout); ++i) { + uint64_t w = array[i]; + if (w == 0) { + baseVec = _mm256_add_epi32(baseVec, incVec); + } else { + for (int k = 0; k < 4; ++k) { + uint8_t byteA = (uint8_t)w; + uint8_t byteB = (uint8_t)(w >> 8); + w >>= 16; + __m256i vecA = + _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]); + __m256i vecB = + _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]); + uint8_t advanceA = lengthTable[byteA]; + uint8_t advanceB = lengthTable[byteB]; + vecA = _mm256_add_epi32(baseVec, vecA); + baseVec = _mm256_add_epi32(baseVec, add8); + vecB = _mm256_add_epi32(baseVec, vecB); + baseVec = _mm256_add_epi32(baseVec, add8); + _mm256_storeu_si256((__m256i *)out, vecA); + out += advanceA; + _mm256_storeu_si256((__m256i *)out, vecB); + out += advanceB; + } + } + } + base += i * 64; + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = array[i]; + while ((w != 0) && (out < safeout)) { + uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) + int r = __builtin_ctzll(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + out++; + w ^= t; + } + base += 64; + } + return out - initout; +} +#endif // USEAVX + +size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout, + uint32_t base) { + int outpos = 0; + uint32_t *out = (uint32_t *)vout; + for (size_t i = 0; i < length; ++i) { + uint64_t w = bitset[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) + int r = __builtin_ctzll(w); // on x64, should compile to TZCNT + uint32_t val = r + base; + memcpy(out + outpos, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + outpos++; + w ^= t; + } + base += 64; + } + return outpos; +} + +size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1, + const uint64_t * __restrict__ bitset2, + size_t length, uint16_t *out, + uint16_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = bitset1[i] & bitset2[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + out[outpos++] = r + base; + w ^= t; + } + base += 64; + } + return outpos; +} + +#ifdef IS_X64 +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out" as 16-bit integers, values start at "base" (can + *be set to zero). + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + * + * This function uses SSE decoding. + */ +size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length, + uint16_t *out, size_t outcapacity, + uint16_t base) { + uint16_t *initout = out; + __m128i baseVec = _mm_set1_epi16(base - 1); + __m128i incVec = _mm_set1_epi16(64); + __m128i add8 = _mm_set1_epi16(8); + uint16_t *safeout = out + outcapacity; + const int numberofbytes = 2; // process two bytes at a time + size_t i = 0; + for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { + uint64_t w = bitset[i]; + if (w == 0) { + baseVec = _mm_add_epi16(baseVec, incVec); + } else { + for (int k = 0; k < 4; ++k) { + uint8_t byteA = (uint8_t)w; + uint8_t byteB = (uint8_t)(w >> 8); + w >>= 16; + __m128i vecA = _mm_load_si128( + (const __m128i *)vecDecodeTable_uint16[byteA]); + __m128i vecB = _mm_load_si128( + (const __m128i *)vecDecodeTable_uint16[byteB]); + uint8_t advanceA = lengthTable[byteA]; + uint8_t advanceB = lengthTable[byteB]; + vecA = _mm_add_epi16(baseVec, vecA); + baseVec = _mm_add_epi16(baseVec, add8); + vecB = _mm_add_epi16(baseVec, vecB); + baseVec = _mm_add_epi16(baseVec, add8); + _mm_storeu_si128((__m128i *)out, vecA); + out += advanceA; + _mm_storeu_si128((__m128i *)out, vecB); + out += advanceB; + } + } + } + base += (uint16_t)(i * 64); + for (; (i < length) && (out < safeout); ++i) { + uint64_t w = bitset[i]; + while ((w != 0) && (out < safeout)) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + *out = r + base; + out++; + w ^= t; + } + base += 64; + } + return out - initout; +} +#endif + +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out", values start at "base" (can be set to zero). + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + */ +size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length, + uint16_t *out, uint16_t base) { + int outpos = 0; + for (size_t i = 0; i < length; ++i) { + uint64_t w = bitset[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + out[outpos++] = r + base; + w ^= t; + } + base += 64; + } + return outpos; +} + +#if defined(ASMBITMANIPOPTIMIZATION) + +uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, pos; + uint64_t shift = 6; + const uint16_t *end = list + length; + if (!length) return card; + // TODO: could unroll for performance, see bitset_set_list + // bts is not available as an intrinsic in GCC + __asm volatile( + "1:\n" + "movzwq (%[list]), %[pos]\n" + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)\n" + "sbb $-1, %[card]\n" + "add $2, %[list]\n" + "cmp %[list], %[end]\n" + "jnz 1b" + : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), + [pos] "=&r"(pos), [offset] "=&r"(offset) + : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift)); + return card; +} + +void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { + uint64_t pos; + const uint16_t *end = list + length; + + uint64_t shift = 6; + uint64_t offset; + uint64_t load; + for (; list + 3 < end; list += 4) { + pos = list[0]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[1]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[2]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); + pos = list[3]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); + } + + while (list != end) { + pos = list[0]; + __asm volatile( + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "bts %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)" + : [load] "=&r"(load), [offset] "=&r"(offset) + : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos)); + list++; + } +} + +uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, + uint64_t length) { + uint64_t offset, load, pos; + uint64_t shift = 6; + const uint16_t *end = list + length; + if (!length) return card; + // btr is not available as an intrinsic in GCC + __asm volatile( + "1:\n" + "movzwq (%[list]), %[pos]\n" + "shrx %[shift], %[pos], %[offset]\n" + "mov (%[bitset],%[offset],8), %[load]\n" + "btr %[pos], %[load]\n" + "mov %[load], (%[bitset],%[offset],8)\n" + "sbb $0, %[card]\n" + "add $2, %[list]\n" + "cmp %[list], %[end]\n" + "jnz 1b" + : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load), + [pos] "=&r"(pos), [offset] "=&r"(offset) + : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift) + : + /* clobbers */ "memory"); + return card; +} + +#else +uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, + uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = ((uint64_t *)bitset)[offset]; + newload = load & ~(UINT64_C(1) << index); + card -= (load ^ newload) >> index; + ((uint64_t *)bitset)[offset] = newload; + list++; + } + return card; +} + +uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = ((uint64_t *)bitset)[offset]; + newload = load | (UINT64_C(1) << index); + card += (load ^ newload) >> index; + ((uint64_t *)bitset)[offset] = newload; + list++; + } + return card; +} + +void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = ((uint64_t *)bitset)[offset]; + newload = load | (UINT64_C(1) << index); + ((uint64_t *)bitset)[offset] = newload; + list++; + } +} + +#endif + +/* flip specified bits */ +/* TODO: consider whether worthwhile to make an asm version */ + +uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card, + const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = ((uint64_t *)bitset)[offset]; + newload = load ^ (UINT64_C(1) << index); + // todo: is a branch here all that bad? + card += + (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 + ((uint64_t *)bitset)[offset] = newload; + list++; + } + return card; +} + +void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length) { + uint64_t offset, load, newload, pos, index; + const uint16_t *end = list + length; + while (list != end) { + pos = *(const uint16_t *)list; + offset = pos >> 6; + index = pos % 64; + load = ((uint64_t *)bitset)[offset]; + newload = load ^ (UINT64_C(1) << index); + ((uint64_t *)bitset)[offset] = newload; + list++; + } +} +/* end file src/bitset_util.c */ +/* begin file src/containers/array.c */ +/* + * array.c + * + */ + +#include +#include +#include + +/* Create a new array with capacity size. Return NULL in case of failure. */ +array_container_t *array_container_create_given_capacity(int32_t size) { + array_container_t *container; + + container = (array_container_t *)malloc(sizeof(array_container_t)); + assert (container); + + if( size <= 0 ) { // we don't want to rely on malloc(0) + container->array = NULL; + } else { + container->array = (uint16_t *)malloc(sizeof(uint16_t) * size); + assert (container->array); + } + + container->capacity = size; + container->cardinality = 0; + + return container; +} + +/* Create a new array. Return NULL in case of failure. */ +array_container_t *array_container_create(void) { + return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE); +} + +/* Create a new array containing all values in [min,max). */ +array_container_t * array_container_create_range(uint32_t min, uint32_t max) { + array_container_t * answer = array_container_create_given_capacity(max - min + 1); + if(answer == NULL) return answer; + answer->cardinality = 0; + for(uint32_t k = min; k < max; k++) { + answer->array[answer->cardinality++] = k; + } + return answer; +} + +/* Duplicate container */ +array_container_t *array_container_clone(const array_container_t *src) { + array_container_t *newcontainer = + array_container_create_given_capacity(src->capacity); + if (newcontainer == NULL) return NULL; + + newcontainer->cardinality = src->cardinality; + + memcpy(newcontainer->array, src->array, + src->cardinality * sizeof(uint16_t)); + + return newcontainer; +} + +int array_container_shrink_to_fit(array_container_t *src) { + if (src->cardinality == src->capacity) return 0; // nothing to do + int savings = src->capacity - src->cardinality; + src->capacity = src->cardinality; + if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs + free(src->array); + src->array = NULL; + } else { + uint16_t *oldarray = src->array; + src->array = + (uint16_t *)realloc(oldarray, src->capacity * sizeof(uint16_t)); + if (src->array == NULL) free(oldarray); // should never happen? + } + return savings; +} + +/* Free memory. */ +void array_container_free(array_container_t *arr) { + if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise + free(arr->array); + arr->array = NULL; // pedantic + } + free(arr); +} + +static inline int32_t grow_capacity(int32_t capacity) { + return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE + : capacity < 64 ? capacity * 2 + : capacity < 1024 ? capacity * 3 / 2 + : capacity * 5 / 4; +} + +static inline int32_t clamp(int32_t val, int32_t min, int32_t max) { + return ((val < min) ? min : (val > max) ? max : val); +} + +void array_container_grow(array_container_t *container, int32_t min, + bool preserve) { + + int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536); + int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max); + + container->capacity = new_capacity; + uint16_t *array = container->array; + + if (preserve) { + container->array = + (uint16_t *)realloc(array, new_capacity * sizeof(uint16_t)); + if (container->array == NULL) free(array); + } else { + // Jon Strabala reports that some tools complain otherwise + if (array != NULL) { + free(array); + } + container->array = (uint16_t *)malloc(new_capacity * sizeof(uint16_t)); + } + + // handle the case where realloc fails + if (container->array == NULL) { + fprintf(stderr, "could not allocate memory\n"); + } + assert(container->array != NULL); +} + +/* Copy one container into another. We assume that they are distinct. */ +void array_container_copy(const array_container_t *src, + array_container_t *dst) { + const int32_t cardinality = src->cardinality; + if (cardinality > dst->capacity) { + array_container_grow(dst, cardinality, false); + } + + dst->cardinality = cardinality; + memcpy(dst->array, src->array, cardinality * sizeof(uint16_t)); +} + +void array_container_add_from_range(array_container_t *arr, uint32_t min, + uint32_t max, uint16_t step) { + for (uint32_t value = min; value < max; value += step) { + array_container_append(arr, value); + } +} + +/* Computes the union of array1 and array2 and write the result to arrayout. + * It is assumed that arrayout is distinct from both array1 and array2. + */ +void array_container_union(const array_container_t *array_1, + const array_container_t *array_2, + array_container_t *out) { + const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; + const int32_t max_cardinality = card_1 + card_2; + + if (out->capacity < max_cardinality) { + array_container_grow(out, max_cardinality, false); + } + out->cardinality = (int32_t)fast_union_uint16(array_1->array, card_1, + array_2->array, card_2, out->array); + +} + +/* Computes the difference of array1 and array2 and write the result + * to array out. + * Array out does not need to be distinct from array_1 + */ +void array_container_andnot(const array_container_t *array_1, + const array_container_t *array_2, + array_container_t *out) { + if (out->capacity < array_1->cardinality) + array_container_grow(out, array_1->cardinality, false); +#ifdef ROARING_VECTOR_OPERATIONS_ENABLED + if((out != array_1) && (out != array_2)) { + out->cardinality = + difference_vector16(array_1->array, array_1->cardinality, + array_2->array, array_2->cardinality, out->array); + } else { + out->cardinality = + difference_uint16(array_1->array, array_1->cardinality, array_2->array, + array_2->cardinality, out->array); + } +#else + out->cardinality = + difference_uint16(array_1->array, array_1->cardinality, array_2->array, + array_2->cardinality, out->array); +#endif +} + +/* Computes the symmetric difference of array1 and array2 and write the + * result + * to arrayout. + * It is assumed that arrayout is distinct from both array1 and array2. + */ +void array_container_xor(const array_container_t *array_1, + const array_container_t *array_2, + array_container_t *out) { + const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; + const int32_t max_cardinality = card_1 + card_2; + if (out->capacity < max_cardinality) { + array_container_grow(out, max_cardinality, false); + } + +#ifdef ROARING_VECTOR_OPERATIONS_ENABLED + out->cardinality = + xor_vector16(array_1->array, array_1->cardinality, array_2->array, + array_2->cardinality, out->array); +#else + out->cardinality = + xor_uint16(array_1->array, array_1->cardinality, array_2->array, + array_2->cardinality, out->array); +#endif +} + +static inline int32_t minimum_int32(int32_t a, int32_t b) { + return (a < b) ? a : b; +} + +/* computes the intersection of array1 and array2 and write the result to + * arrayout. + * It is assumed that arrayout is distinct from both array1 and array2. + * */ +void array_container_intersection(const array_container_t *array1, + const array_container_t *array2, + array_container_t *out) { + int32_t card_1 = array1->cardinality, card_2 = array2->cardinality, + min_card = minimum_int32(card_1, card_2); + const int threshold = 64; // subject to tuning +#ifdef USEAVX + if (out->capacity < min_card) { + array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t), + false); + } +#else + if (out->capacity < min_card) { + array_container_grow(out, min_card, false); + } +#endif + + if (card_1 * threshold < card_2) { + out->cardinality = intersect_skewed_uint16( + array1->array, card_1, array2->array, card_2, out->array); + } else if (card_2 * threshold < card_1) { + out->cardinality = intersect_skewed_uint16( + array2->array, card_2, array1->array, card_1, out->array); + } else { +#ifdef USEAVX + out->cardinality = intersect_vector16( + array1->array, card_1, array2->array, card_2, out->array); +#else + out->cardinality = intersect_uint16(array1->array, card_1, + array2->array, card_2, out->array); +#endif + } +} + +/* computes the size of the intersection of array1 and array2 + * */ +int array_container_intersection_cardinality(const array_container_t *array1, + const array_container_t *array2) { + int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; + const int threshold = 64; // subject to tuning + if (card_1 * threshold < card_2) { + return intersect_skewed_uint16_cardinality(array1->array, card_1, + array2->array, card_2); + } else if (card_2 * threshold < card_1) { + return intersect_skewed_uint16_cardinality(array2->array, card_2, + array1->array, card_1); + } else { +#ifdef USEAVX + return intersect_vector16_cardinality(array1->array, card_1, + array2->array, card_2); +#else + return intersect_uint16_cardinality(array1->array, card_1, + array2->array, card_2); +#endif + } +} + +bool array_container_intersect(const array_container_t *array1, + const array_container_t *array2) { + int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; + const int threshold = 64; // subject to tuning + if (card_1 * threshold < card_2) { + return intersect_skewed_uint16_nonempty( + array1->array, card_1, array2->array, card_2); + } else if (card_2 * threshold < card_1) { + return intersect_skewed_uint16_nonempty( + array2->array, card_2, array1->array, card_1); + } else { + // we do not bother vectorizing + return intersect_uint16_nonempty(array1->array, card_1, + array2->array, card_2); + } +} + +/* computes the intersection of array1 and array2 and write the result to + * array1. + * */ +void array_container_intersection_inplace(array_container_t *src_1, + const array_container_t *src_2) { + // todo: can any of this be vectorized? + int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality; + const int threshold = 64; // subject to tuning + if (card_1 * threshold < card_2) { + src_1->cardinality = intersect_skewed_uint16( + src_1->array, card_1, src_2->array, card_2, src_1->array); + } else if (card_2 * threshold < card_1) { + src_1->cardinality = intersect_skewed_uint16( + src_2->array, card_2, src_1->array, card_1, src_1->array); + } else { + src_1->cardinality = intersect_uint16( + src_1->array, card_1, src_2->array, card_2, src_1->array); + } +} + +int array_container_to_uint32_array(void *vout, const array_container_t *cont, + uint32_t base) { + int outpos = 0; + uint32_t *out = (uint32_t *)vout; + for (int i = 0; i < cont->cardinality; ++i) { + const uint32_t val = base + cont->array[i]; + memcpy(out + outpos, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + outpos++; + } + return outpos; +} + +void array_container_printf(const array_container_t *v) { + if (v->cardinality == 0) { + printf("{}"); + return; + } + printf("{"); + printf("%d", v->array[0]); + for (int i = 1; i < v->cardinality; ++i) { + printf(",%d", v->array[i]); + } + printf("}"); +} + +void array_container_printf_as_uint32_array(const array_container_t *v, + uint32_t base) { + if (v->cardinality == 0) { + return; + } + printf("%u", v->array[0] + base); + for (int i = 1; i < v->cardinality; ++i) { + printf(",%u", v->array[i] + base); + } +} + +/* Compute the number of runs */ +int32_t array_container_number_of_runs(const array_container_t *a) { + // Can SIMD work here? + int32_t nr_runs = 0; + int32_t prev = -2; + for (const uint16_t *p = a->array; p != a->array + a->cardinality; ++p) { + if (*p != prev + 1) nr_runs++; + prev = *p; + } + return nr_runs; +} + +int32_t array_container_serialize(const array_container_t *container, char *buf) { + int32_t l, off; + uint16_t cardinality = (uint16_t)container->cardinality; + + memcpy(buf, &cardinality, off = sizeof(cardinality)); + l = sizeof(uint16_t) * container->cardinality; + if (l) memcpy(&buf[off], container->array, l); + + return (off + l); +} + +/** + * Writes the underlying array to buf, outputs how many bytes were written. + * The number of bytes written should be + * array_container_size_in_bytes(container). + * + */ +int32_t array_container_write(const array_container_t *container, char *buf) { + memcpy(buf, container->array, container->cardinality * sizeof(uint16_t)); + return array_container_size_in_bytes(container); +} + +bool array_container_is_subset(const array_container_t *container1, + const array_container_t *container2) { + if (container1->cardinality > container2->cardinality) { + return false; + } + int i1 = 0, i2 = 0; + while (i1 < container1->cardinality && i2 < container2->cardinality) { + if (container1->array[i1] == container2->array[i2]) { + i1++; + i2++; + } else if (container1->array[i1] > container2->array[i2]) { + i2++; + } else { // container1->array[i1] < container2->array[i2] + return false; + } + } + if (i1 == container1->cardinality) { + return true; + } else { + return false; + } +} + +int32_t array_container_read(int32_t cardinality, array_container_t *container, + const char *buf) { + if (container->capacity < cardinality) { + array_container_grow(container, cardinality, false); + } + container->cardinality = cardinality; + memcpy(container->array, buf, container->cardinality * sizeof(uint16_t)); + + return array_container_size_in_bytes(container); +} + +uint32_t array_container_serialization_len(const array_container_t *container) { + return (sizeof(uint16_t) /* container->cardinality converted to 16 bit */ + + (sizeof(uint16_t) * container->cardinality)); +} + +void *array_container_deserialize(const char *buf, size_t buf_len) { + array_container_t *ptr; + + if (buf_len < 2) /* capacity converted to 16 bit */ + return (NULL); + else + buf_len -= 2; + + if ((ptr = (array_container_t *)malloc(sizeof(array_container_t))) != + NULL) { + size_t len; + int32_t off; + uint16_t cardinality; + + memcpy(&cardinality, buf, off = sizeof(cardinality)); + + ptr->capacity = ptr->cardinality = (uint32_t)cardinality; + len = sizeof(uint16_t) * ptr->cardinality; + + if (len != buf_len) { + free(ptr); + return (NULL); + } + + if ((ptr->array = (uint16_t *)malloc(sizeof(uint16_t) * + ptr->capacity)) == NULL) { + free(ptr); + return (NULL); + } + + if (len) memcpy(ptr->array, &buf[off], len); + + /* Check if returned values are monotonically increasing */ + for (int32_t i = 0, j = 0; i < ptr->cardinality; i++) { + if (ptr->array[i] < j) { + free(ptr->array); + free(ptr); + return (NULL); + } else + j = ptr->array[i]; + } + } + + return (ptr); +} + +bool array_container_iterate(const array_container_t *cont, uint32_t base, + roaring_iterator iterator, void *ptr) { + for (int i = 0; i < cont->cardinality; i++) + if (!iterator(cont->array[i] + base, ptr)) return false; + return true; +} + +bool array_container_iterate64(const array_container_t *cont, uint32_t base, + roaring_iterator64 iterator, uint64_t high_bits, + void *ptr) { + for (int i = 0; i < cont->cardinality; i++) + if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr)) + return false; + return true; +} +/* end file src/containers/array.c */ +/* begin file src/containers/bitset.c */ +/* + * bitset.c + * + */ +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif +#include +#include +#include +#include + + +void bitset_container_clear(bitset_container_t *bitset) { + memset(bitset->array, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); + bitset->cardinality = 0; +} + +void bitset_container_set_all(bitset_container_t *bitset) { + memset(bitset->array, INT64_C(-1), + sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); + bitset->cardinality = (1 << 16); +} + + + +/* Create a new bitset. Return NULL in case of failure. */ +bitset_container_t *bitset_container_create(void) { + bitset_container_t *bitset = + (bitset_container_t *)malloc(sizeof(bitset_container_t)); + + if (!bitset) { + return NULL; + } + // sizeof(__m256i) == 32 + bitset->array = (uint64_t *)roaring_bitmap_aligned_malloc( + 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); + if (!bitset->array) { + free(bitset); + return NULL; + } + bitset_container_clear(bitset); + return bitset; +} + +/* Copy one container into another. We assume that they are distinct. */ +void bitset_container_copy(const bitset_container_t *source, + bitset_container_t *dest) { + dest->cardinality = source->cardinality; + memcpy(dest->array, source->array, + sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); +} + +void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, + uint32_t max, uint16_t step) { + if (step == 0) return; // refuse to crash + if ((64 % step) == 0) { // step divides 64 + uint64_t mask = 0; // construct the repeated mask + for (uint32_t value = (min % step); value < 64; value += step) { + mask |= ((uint64_t)1 << value); + } + uint32_t firstword = min / 64; + uint32_t endword = (max - 1) / 64; + bitset->cardinality = (max - min + step - 1) / step; + if (firstword == endword) { + bitset->array[firstword] |= + mask & (((~UINT64_C(0)) << (min % 64)) & + ((~UINT64_C(0)) >> ((~max + 1) % 64))); + return; + } + bitset->array[firstword] = mask & ((~UINT64_C(0)) << (min % 64)); + for (uint32_t i = firstword + 1; i < endword; i++) + bitset->array[i] = mask; + bitset->array[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64)); + } else { + for (uint32_t value = min; value < max; value += step) { + bitset_container_add(bitset, value); + } + } +} + +/* Free memory. */ +void bitset_container_free(bitset_container_t *bitset) { + if(bitset->array != NULL) {// Jon Strabala reports that some tools complain otherwise + roaring_bitmap_aligned_free(bitset->array); + bitset->array = NULL; // pedantic + } + free(bitset); +} + +/* duplicate container. */ +bitset_container_t *bitset_container_clone(const bitset_container_t *src) { + bitset_container_t *bitset = + (bitset_container_t *)malloc(sizeof(bitset_container_t)); + assert(bitset); + + // sizeof(__m256i) == 32 + bitset->array = (uint64_t *)roaring_bitmap_aligned_malloc( + 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); + assert(bitset->array); + bitset->cardinality = src->cardinality; + memcpy(bitset->array, src->array, + sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); + return bitset; +} + +void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, + uint32_t end) { + bitset_set_range(bitset->array, begin, end); + bitset->cardinality = + bitset_container_compute_cardinality(bitset); // could be smarter +} + + +bool bitset_container_intersect(const bitset_container_t *src_1, + const bitset_container_t *src_2) { + // could vectorize, but this is probably already quite fast in practice + const uint64_t * __restrict__ array_1 = src_1->array; + const uint64_t * __restrict__ array_2 = src_2->array; + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { + if((array_1[i] & array_2[i]) != 0) return true; + } + return false; +} + + +#ifdef USEAVX +#ifndef WORDS_IN_AVX2_REG +#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) +#endif +/* Get the number of bits set (force computation) */ +int bitset_container_compute_cardinality(const bitset_container_t *bitset) { + return (int) avx2_harley_seal_popcount256( + (const __m256i *)bitset->array, + BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); +} + +#elif defined(USENEON) +int bitset_container_compute_cardinality(const bitset_container_t *bitset) { + uint16x8_t n0 = vdupq_n_u16(0); + uint16x8_t n1 = vdupq_n_u16(0); + uint16x8_t n2 = vdupq_n_u16(0); + uint16x8_t n3 = vdupq_n_u16(0); + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { + uint64x2_t c0 = vld1q_u64(&bitset->array[i + 0]); + n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); + uint64x2_t c1 = vld1q_u64(&bitset->array[i + 2]); + n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); + uint64x2_t c2 = vld1q_u64(&bitset->array[i + 4]); + n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); + uint64x2_t c3 = vld1q_u64(&bitset->array[i + 6]); + n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); + } + uint64x2_t n = vdupq_n_u64(0); + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); + return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); +} + +#else + +/* Get the number of bits set (force computation) */ +int bitset_container_compute_cardinality(const bitset_container_t *bitset) { + const uint64_t *array = bitset->array; + int32_t sum = 0; + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { + sum += hamming(array[i]); + sum += hamming(array[i + 1]); + sum += hamming(array[i + 2]); + sum += hamming(array[i + 3]); + } + return sum; +} + +#endif + +#ifdef USEAVX + +#define BITSET_CONTAINER_FN_REPEAT 8 +#ifndef WORDS_IN_AVX2_REG +#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) +#endif +#define LOOP_SIZE \ + BITSET_CONTAINER_SIZE_IN_WORDS / \ + ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT) + +/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the + result to bitsetout */ +// clang-format off +#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ +int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint8_t * __restrict__ array_1 = (const uint8_t *)src_1->array; \ + const uint8_t * __restrict__ array_2 = (const uint8_t *)src_2->array; \ + /* not using the blocking optimization for some reason*/ \ + uint8_t *out = (uint8_t*)dst->array; \ + const int innerloop = 8; \ + for (size_t i = 0; \ + i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \ + i+=innerloop) {\ + __m256i A1, A2, AO; \ + A1 = _mm256_lddqu_si256((const __m256i *)(array_1)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(array_2)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)out, AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 32)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 32)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out+32), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 64)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 64)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out+64), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 96)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 96)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out+96), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 128)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 128)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out+128), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 160)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 160)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out+160), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 192)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 192)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out+192), AO); \ + A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 224)); \ + A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 224)); \ + AO = avx_intrinsic(A2, A1); \ + _mm256_storeu_si256((__m256i *)(out+224), AO); \ + out+=256; \ + array_1 += 256; \ + array_2 += 256; \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ +} \ +/* next, a version that updates cardinality*/ \ +int bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const __m256i * __restrict__ array_1 = (const __m256i *) src_1->array; \ + const __m256i * __restrict__ array_2 = (const __m256i *) src_2->array; \ + __m256i *out = (__m256i *) dst->array; \ + dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname(array_2,\ + array_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ + return dst->cardinality; \ +} \ +/* next, a version that just computes the cardinality*/ \ +int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2) { \ + const __m256i * __restrict__ data1 = (const __m256i *) src_1->array; \ + const __m256i * __restrict__ data2 = (const __m256i *) src_2->array; \ + return (int)avx2_harley_seal_popcount256_##opname(data2, \ + data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ +} + +#elif defined(USENEON) + +#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ +int bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ array_1 = src_1->array; \ + const uint64_t * __restrict__ array_2 = src_2->array; \ + uint64_t *out = dst->array; \ + uint16x8_t n0 = vdupq_n_u16(0); \ + uint16x8_t n1 = vdupq_n_u16(0); \ + uint16x8_t n2 = vdupq_n_u16(0); \ + uint16x8_t n3 = vdupq_n_u16(0); \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ + uint64x2_t c0 = neon_intrinsic(vld1q_u64(&array_1[i + 0]), \ + vld1q_u64(&array_2[i + 0])); \ + n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ + vst1q_u64(&out[i + 0], c0); \ + uint64x2_t c1 = neon_intrinsic(vld1q_u64(&array_1[i + 2]), \ + vld1q_u64(&array_2[i + 2])); \ + n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ + vst1q_u64(&out[i + 2], c1); \ + uint64x2_t c2 = neon_intrinsic(vld1q_u64(&array_1[i + 4]), \ + vld1q_u64(&array_2[i + 4])); \ + n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ + vst1q_u64(&out[i + 4], c2); \ + uint64x2_t c3 = neon_intrinsic(vld1q_u64(&array_1[i + 6]), \ + vld1q_u64(&array_2[i + 6])); \ + n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ + vst1q_u64(&out[i + 6], c3); \ + } \ + uint64x2_t n = vdupq_n_u64(0); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ + dst->cardinality = vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ array_1 = src_1->array; \ + const uint64_t * __restrict__ array_2 = src_2->array; \ + uint64_t *out = dst->array; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ + vst1q_u64(&out[i + 0], neon_intrinsic(vld1q_u64(&array_1[i + 0]), \ + vld1q_u64(&array_2[i + 0]))); \ + vst1q_u64(&out[i + 2], neon_intrinsic(vld1q_u64(&array_1[i + 2]), \ + vld1q_u64(&array_2[i + 2]))); \ + vst1q_u64(&out[i + 4], neon_intrinsic(vld1q_u64(&array_1[i + 4]), \ + vld1q_u64(&array_2[i + 4]))); \ + vst1q_u64(&out[i + 6], neon_intrinsic(vld1q_u64(&array_1[i + 6]), \ + vld1q_u64(&array_2[i + 6]))); \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2) { \ + const uint64_t * __restrict__ array_1 = src_1->array; \ + const uint64_t * __restrict__ array_2 = src_2->array; \ + uint16x8_t n0 = vdupq_n_u16(0); \ + uint16x8_t n1 = vdupq_n_u16(0); \ + uint16x8_t n2 = vdupq_n_u16(0); \ + uint16x8_t n3 = vdupq_n_u16(0); \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 8) { \ + uint64x2_t c0 = neon_intrinsic(vld1q_u64(&array_1[i + 0]), \ + vld1q_u64(&array_2[i + 0])); \ + n0 = vaddq_u16(n0, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c0)))); \ + uint64x2_t c1 = neon_intrinsic(vld1q_u64(&array_1[i + 2]), \ + vld1q_u64(&array_2[i + 2])); \ + n1 = vaddq_u16(n1, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c1)))); \ + uint64x2_t c2 = neon_intrinsic(vld1q_u64(&array_1[i + 4]), \ + vld1q_u64(&array_2[i + 4])); \ + n2 = vaddq_u16(n2, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c2)))); \ + uint64x2_t c3 = neon_intrinsic(vld1q_u64(&array_1[i + 6]), \ + vld1q_u64(&array_2[i + 6])); \ + n3 = vaddq_u16(n3, vpaddlq_u8(vcntq_u8(vreinterpretq_u8_u64(c3)))); \ + } \ + uint64x2_t n = vdupq_n_u64(0); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n0))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n1))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n2))); \ + n = vaddq_u64(n, vpaddlq_u32(vpaddlq_u16(n3))); \ + return vgetq_lane_u64(n, 0) + vgetq_lane_u64(n, 1); \ +} + +#else /* not USEAVX */ + +#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic, neon_intrinsic) \ +int bitset_container_##opname(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ array_1 = src_1->array; \ + const uint64_t * __restrict__ array_2 = src_2->array; \ + uint64_t *out = dst->array; \ + int32_t sum = 0; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ + const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ + word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ + out[i] = word_1; \ + out[i + 1] = word_2; \ + sum += hamming(word_1); \ + sum += hamming(word_2); \ + } \ + dst->cardinality = sum; \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2, \ + bitset_container_t *dst) { \ + const uint64_t * __restrict__ array_1 = src_1->array; \ + const uint64_t * __restrict__ array_2 = src_2->array; \ + uint64_t *out = dst->array; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ + out[i] = (array_1[i])opsymbol(array_2[i]); \ + } \ + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ + return dst->cardinality; \ +} \ +int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ + const bitset_container_t *src_2) { \ + const uint64_t * __restrict__ array_1 = src_1->array; \ + const uint64_t * __restrict__ array_2 = src_2->array; \ + int32_t sum = 0; \ + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ + const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ + word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ + sum += hamming(word_1); \ + sum += hamming(word_2); \ + } \ + return sum; \ +} + +#endif + +// we duplicate the function because other containers use the "or" term, makes API more consistent +BITSET_CONTAINER_FN(or, |, _mm256_or_si256, vorrq_u64) +BITSET_CONTAINER_FN(union, |, _mm256_or_si256, vorrq_u64) + +// we duplicate the function because other containers use the "intersection" term, makes API more consistent +BITSET_CONTAINER_FN(and, &, _mm256_and_si256, vandq_u64) +BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256, vandq_u64) + +BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256, veorq_u64) +BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256, vbicq_u64) +// clang-format On + + + +int bitset_container_to_uint32_array( void *vout, const bitset_container_t *cont, uint32_t base) { +#ifdef USEAVX2FORDECODING + if(cont->cardinality >= 8192)// heuristic + return (int) bitset_extract_setbits_avx2(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,cont->cardinality,base); + else + return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); +#else + return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); +#endif +} + +/* + * Print this container using printf (useful for debugging). + */ +void bitset_container_printf(const bitset_container_t * v) { + printf("{"); + uint32_t base = 0; + bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { + uint64_t w = v->array[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + if(iamfirst) {// predicted to be false + printf("%u",base + r); + iamfirst = false; + } else { + printf(",%u",base + r); + } + w ^= t; + } + base += 64; + } + printf("}"); +} + + +/* + * Print this container using printf as a comma-separated list of 32-bit integers starting at base. + */ +void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) { + bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { + uint64_t w = v->array[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + if(iamfirst) {// predicted to be false + printf("%u", r + base); + iamfirst = false; + } else { + printf(",%u",r + base); + } + w ^= t; + } + base += 64; + } +} + + +// TODO: use the fast lower bound, also +int bitset_container_number_of_runs(bitset_container_t *b) { + int num_runs = 0; + uint64_t next_word = b->array[0]; + + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) { + uint64_t word = next_word; + next_word = b->array[i+1]; + num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word); + } + + uint64_t word = next_word; + num_runs += hamming((~word) & (word << 1)); + if((word & 0x8000000000000000ULL) != 0) + num_runs++; + return num_runs; +} + +int32_t bitset_container_serialize(const bitset_container_t *container, char *buf) { + int32_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; + memcpy(buf, container->array, l); + return(l); +} + + + +int32_t bitset_container_write(const bitset_container_t *container, + char *buf) { + memcpy(buf, container->array, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); + return bitset_container_size_in_bytes(container); +} + + +int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, + const char *buf) { + container->cardinality = cardinality; + memcpy(container->array, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); + return bitset_container_size_in_bytes(container); +} + +uint32_t bitset_container_serialization_len(void) { + return(sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); +} + +void* bitset_container_deserialize(const char *buf, size_t buf_len) { + bitset_container_t *ptr; + size_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; + + if(l != buf_len) + return(NULL); + + if((ptr = (bitset_container_t *)malloc(sizeof(bitset_container_t))) != NULL) { + memcpy(ptr, buf, sizeof(bitset_container_t)); + // sizeof(__m256i) == 32 + ptr->array = (uint64_t *) roaring_bitmap_aligned_malloc(32, l); + if (! ptr->array) { + free(ptr); + return NULL; + } + memcpy(ptr->array, buf, l); + ptr->cardinality = bitset_container_compute_cardinality(ptr); + } + + return((void*)ptr); +} + +bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) { + for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { + uint64_t w = cont->array[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + if(!iterator(r + base, ptr)) return false; + w ^= t; + } + base += 64; + } + return true; +} + +bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) { + for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { + uint64_t w = cont->array[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false; + w ^= t; + } + base += 64; + } + return true; +} + + +bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { + if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { + if(container1->cardinality != container2->cardinality) { + return false; + } + if (container1->cardinality == INT32_C(0x10000)) { + return true; + } + } +#ifdef USEAVX + const __m256i *ptr1 = (const __m256i*)container1->array; + const __m256i *ptr2 = (const __m256i*)container2->array; + for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)/32; i++) { + __m256i r1 = _mm256_load_si256(ptr1+i); + __m256i r2 = _mm256_load_si256(ptr2+i); + int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2)); + if ((uint32_t)mask != UINT32_MAX) { + return false; + } + } +#else + return memcmp(container1->array, + container2->array, + BITSET_CONTAINER_SIZE_IN_WORDS*sizeof(uint64_t)) == 0; +#endif + return true; +} + +bool bitset_container_is_subset(const bitset_container_t *container1, + const bitset_container_t *container2) { + if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { + if(container1->cardinality > container2->cardinality) { + return false; + } + } + for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { + if((container1->array[i] & container2->array[i]) != container1->array[i]) { + return false; + } + } + return true; +} + +bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) { + int card = bitset_container_cardinality(container); + if(rank >= *start_rank + card) { + *start_rank += card; + return false; + } + const uint64_t *array = container->array; + int32_t size; + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) { + size = hamming(array[i]); + if(rank <= *start_rank + size) { + uint64_t w = container->array[i]; + uint16_t base = i*64; + while (w != 0) { + uint64_t t = w & (~w + 1); + int r = __builtin_ctzll(w); + if(*start_rank == rank) { + *element = r+base; + return true; + } + w ^= t; + *start_rank += 1; + } + } + else + *start_rank += size; + } + assert(false); + __builtin_unreachable(); +} + + +/* Returns the smallest value (assumes not empty) */ +uint16_t bitset_container_minimum(const bitset_container_t *container) { + for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { + uint64_t w = container->array[i]; + if (w != 0) { + int r = __builtin_ctzll(w); + return r + i * 64; + } + } + return UINT16_MAX; +} + +/* Returns the largest value (assumes not empty) */ +uint16_t bitset_container_maximum(const bitset_container_t *container) { + for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) { + uint64_t w = container->array[i]; + if (w != 0) { + int r = __builtin_clzll(w); + return i * 64 + 63 - r; + } + } + return 0; +} + +/* Returns the number of values equal or smaller than x */ +int bitset_container_rank(const bitset_container_t *container, uint16_t x) { + // credit: aqrit + int sum = 0; + int i = 0; + for (int end = x / 64; i < end; i++){ + sum += hamming(container->array[i]); + } + uint64_t lastword = container->array[i]; + uint64_t lastpos = UINT64_C(1) << (x % 64); + uint64_t mask = lastpos + lastpos - 1; // smear right + sum += hamming(lastword & mask); + return sum; +} + +/* Returns the index of the first value equal or larger than x, or -1 */ +int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) { + uint32_t x32 = x; + uint32_t k = x32 / 64; + uint64_t word = container->array[k]; + const int diff = x32 - k * 64; // in [0,64) + word = (word >> diff) << diff; // a mask is faster, but we don't care + while(word == 0) { + k++; + if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1; + word = container->array[k]; + } + return k * 64 + __builtin_ctzll(word); +} +/* end file src/containers/bitset.c */ +/* begin file src/containers/containers.c */ + + +void container_free(void *container, uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + bitset_container_free((bitset_container_t *)container); + break; + case ARRAY_CONTAINER_TYPE_CODE: + array_container_free((array_container_t *)container); + break; + case RUN_CONTAINER_TYPE_CODE: + run_container_free((run_container_t *)container); + break; + case SHARED_CONTAINER_TYPE_CODE: + shared_container_free((shared_container_t *)container); + break; + default: + assert(false); + __builtin_unreachable(); + } +} + +void container_printf(const void *container, uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + bitset_container_printf((const bitset_container_t *)container); + return; + case ARRAY_CONTAINER_TYPE_CODE: + array_container_printf((const array_container_t *)container); + return; + case RUN_CONTAINER_TYPE_CODE: + run_container_printf((const run_container_t *)container); + return; + default: + __builtin_unreachable(); + } +} + +void container_printf_as_uint32_array(const void *container, uint8_t typecode, + uint32_t base) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + bitset_container_printf_as_uint32_array( + (const bitset_container_t *)container, base); + return; + case ARRAY_CONTAINER_TYPE_CODE: + array_container_printf_as_uint32_array( + (const array_container_t *)container, base); + return; + case RUN_CONTAINER_TYPE_CODE: + run_container_printf_as_uint32_array( + (const run_container_t *)container, base); + return; + return; + default: + __builtin_unreachable(); + } +} + +int32_t container_serialize(const void *container, uint8_t typecode, + char *buf) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return (bitset_container_serialize((const bitset_container_t *)container, + buf)); + case ARRAY_CONTAINER_TYPE_CODE: + return ( + array_container_serialize((const array_container_t *)container, buf)); + case RUN_CONTAINER_TYPE_CODE: + return (run_container_serialize((const run_container_t *)container, buf)); + default: + assert(0); + __builtin_unreachable(); + return (-1); + } +} + +uint32_t container_serialization_len(const void *container, uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_serialization_len(); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_serialization_len( + (const array_container_t *)container); + case RUN_CONTAINER_TYPE_CODE: + return run_container_serialization_len( + (const run_container_t *)container); + default: + assert(0); + __builtin_unreachable(); + return (0); + } +} + +void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len) { + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return (bitset_container_deserialize(buf, buf_len)); + case ARRAY_CONTAINER_TYPE_CODE: + return (array_container_deserialize(buf, buf_len)); + case RUN_CONTAINER_TYPE_CODE: + return (run_container_deserialize(buf, buf_len)); + case SHARED_CONTAINER_TYPE_CODE: + printf("this should never happen.\n"); + assert(0); + __builtin_unreachable(); + return (NULL); + default: + assert(0); + __builtin_unreachable(); + return (NULL); + } +} + +void *get_copy_of_container(void *container, uint8_t *typecode, + bool copy_on_write) { + if (copy_on_write) { + shared_container_t *shared_container; + if (*typecode == SHARED_CONTAINER_TYPE_CODE) { + shared_container = (shared_container_t *)container; + shared_container->counter += 1; + return shared_container; + } + assert(*typecode != SHARED_CONTAINER_TYPE_CODE); + + if ((shared_container = (shared_container_t *)malloc( + sizeof(shared_container_t))) == NULL) { + return NULL; + } + + shared_container->container = container; + shared_container->typecode = *typecode; + + shared_container->counter = 2; + *typecode = SHARED_CONTAINER_TYPE_CODE; + + return shared_container; + } // copy_on_write + // otherwise, no copy on write... + const void *actualcontainer = + container_unwrap_shared((const void *)container, typecode); + assert(*typecode != SHARED_CONTAINER_TYPE_CODE); + return container_clone(actualcontainer, *typecode); +} +/** + * Copies a container, requires a typecode. This allocates new memory, caller + * is responsible for deallocation. + */ +void *container_clone(const void *container, uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_clone((const bitset_container_t *)container); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_clone((const array_container_t *)container); + case RUN_CONTAINER_TYPE_CODE: + return run_container_clone((const run_container_t *)container); + case SHARED_CONTAINER_TYPE_CODE: + printf("shared containers are not cloneable\n"); + assert(false); + return NULL; + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +void *shared_container_extract_copy(shared_container_t *container, + uint8_t *typecode) { + assert(container->counter > 0); + assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); + container->counter--; + *typecode = container->typecode; + void *answer; + if (container->counter == 0) { + answer = container->container; + container->container = NULL; // paranoid + free(container); + } else { + answer = container_clone(container->container, *typecode); + } + assert(*typecode != SHARED_CONTAINER_TYPE_CODE); + return answer; +} + +void shared_container_free(shared_container_t *container) { + assert(container->counter > 0); + container->counter--; + if (container->counter == 0) { + assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); + container_free(container->container, container->typecode); + container->container = NULL; // paranoid + free(container); + } +} + +/* end file src/containers/containers.c */ +/* begin file src/containers/convert.c */ +#include + + +// file contains grubby stuff that must know impl. details of all container +// types. +bitset_container_t *bitset_container_from_array(const array_container_t *a) { + bitset_container_t *ans = bitset_container_create(); + int limit = array_container_cardinality(a); + for (int i = 0; i < limit; ++i) bitset_container_set(ans, a->array[i]); + return ans; +} + +bitset_container_t *bitset_container_from_run(const run_container_t *arr) { + int card = run_container_cardinality(arr); + bitset_container_t *answer = bitset_container_create(); + for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { + rle16_t vl = arr->runs[rlepos]; + bitset_set_lenrange(answer->array, vl.value, vl.length); + } + answer->cardinality = card; + return answer; +} + +array_container_t *array_container_from_run(const run_container_t *arr) { + array_container_t *answer = + array_container_create_given_capacity(run_container_cardinality(arr)); + answer->cardinality = 0; + for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { + int run_start = arr->runs[rlepos].value; + int run_end = run_start + arr->runs[rlepos].length; + + for (int run_value = run_start; run_value <= run_end; ++run_value) { + answer->array[answer->cardinality++] = (uint16_t)run_value; + } + } + return answer; +} + +array_container_t *array_container_from_bitset(const bitset_container_t *bits) { + array_container_t *result = + array_container_create_given_capacity(bits->cardinality); + result->cardinality = bits->cardinality; + // sse version ends up being slower here + // (bitset_extract_setbits_sse_uint16) + // because of the sparsity of the data + bitset_extract_setbits_uint16(bits->array, BITSET_CONTAINER_SIZE_IN_WORDS, + result->array, 0); + return result; +} + +/* assumes that container has adequate space. Run from [s,e] (inclusive) */ +static void add_run(run_container_t *r, int s, int e) { + r->runs[r->n_runs].value = s; + r->runs[r->n_runs].length = e - s; + r->n_runs++; +} + +run_container_t *run_container_from_array(const array_container_t *c) { + int32_t n_runs = array_container_number_of_runs(c); + run_container_t *answer = run_container_create_given_capacity(n_runs); + int prev = -2; + int run_start = -1; + int32_t card = c->cardinality; + if (card == 0) return answer; + for (int i = 0; i < card; ++i) { + const uint16_t cur_val = c->array[i]; + if (cur_val != prev + 1) { + // new run starts; flush old one, if any + if (run_start != -1) add_run(answer, run_start, prev); + run_start = cur_val; + } + prev = c->array[i]; + } + // now prev is the last seen value + add_run(answer, run_start, prev); + // assert(run_container_cardinality(answer) == c->cardinality); + return answer; +} + +/** + * Convert the runcontainer to either a Bitmap or an Array Container, depending + * on the cardinality. Frees the container. + * Allocates and returns new container, which caller is responsible for freeing. + * It does not free the run container. + */ + +void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card, + uint8_t *resulttype) { + if (card <= DEFAULT_MAX_SIZE) { + array_container_t *answer = array_container_create_given_capacity(card); + answer->cardinality = 0; + for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { + uint16_t run_start = r->runs[rlepos].value; + uint16_t run_end = run_start + r->runs[rlepos].length; + for (uint16_t run_value = run_start; run_value <= run_end; + ++run_value) { + answer->array[answer->cardinality++] = run_value; + } + } + assert(card == answer->cardinality); + *resulttype = ARRAY_CONTAINER_TYPE_CODE; + //run_container_free(r); + return answer; + } + bitset_container_t *answer = bitset_container_create(); + for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { + uint16_t run_start = r->runs[rlepos].value; + bitset_set_lenrange(answer->array, run_start, r->runs[rlepos].length); + } + answer->cardinality = card; + *resulttype = BITSET_CONTAINER_TYPE_CODE; + //run_container_free(r); + return answer; +} + +/* Converts a run container to either an array or a bitset, IF it saves space. + */ +/* If a conversion occurs, the caller is responsible to free the original + * container and + * he becomes responsible to free the new one. */ +void *convert_run_to_efficient_container(run_container_t *c, + uint8_t *typecode_after) { + int32_t size_as_run_container = + run_container_serialized_size_in_bytes(c->n_runs); + + int32_t size_as_bitset_container = + bitset_container_serialized_size_in_bytes(); + int32_t card = run_container_cardinality(c); + int32_t size_as_array_container = + array_container_serialized_size_in_bytes(card); + + int32_t min_size_non_run = + size_as_bitset_container < size_as_array_container + ? size_as_bitset_container + : size_as_array_container; + if (size_as_run_container <= min_size_non_run) { // no conversion + *typecode_after = RUN_CONTAINER_TYPE_CODE; + return c; + } + if (card <= DEFAULT_MAX_SIZE) { + // to array + array_container_t *answer = array_container_create_given_capacity(card); + answer->cardinality = 0; + for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { + int run_start = c->runs[rlepos].value; + int run_end = run_start + c->runs[rlepos].length; + + for (int run_value = run_start; run_value <= run_end; ++run_value) { + answer->array[answer->cardinality++] = (uint16_t)run_value; + } + } + *typecode_after = ARRAY_CONTAINER_TYPE_CODE; + return answer; + } + + // else to bitset + bitset_container_t *answer = bitset_container_create(); + + for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { + int start = c->runs[rlepos].value; + int end = start + c->runs[rlepos].length; + bitset_set_range(answer->array, start, end + 1); + } + answer->cardinality = card; + *typecode_after = BITSET_CONTAINER_TYPE_CODE; + return answer; +} + +// like convert_run_to_efficient_container but frees the old result if needed +void *convert_run_to_efficient_container_and_free(run_container_t *c, + uint8_t *typecode_after) { + void *answer = convert_run_to_efficient_container(c, typecode_after); + if (answer != c) run_container_free(c); + return answer; +} + +/* once converted, the original container is disposed here, rather than + in roaring_array +*/ + +// TODO: split into run- array- and bitset- subfunctions for sanity; +// a few function calls won't really matter. + +void *convert_run_optimize(void *c, uint8_t typecode_original, + uint8_t *typecode_after) { + if (typecode_original == RUN_CONTAINER_TYPE_CODE) { + void *newc = convert_run_to_efficient_container((run_container_t *)c, + typecode_after); + if (newc != c) { + container_free(c, typecode_original); + } + return newc; + } else if (typecode_original == ARRAY_CONTAINER_TYPE_CODE) { + // it might need to be converted to a run container. + array_container_t *c_qua_array = (array_container_t *)c; + int32_t n_runs = array_container_number_of_runs(c_qua_array); + int32_t size_as_run_container = + run_container_serialized_size_in_bytes(n_runs); + int32_t card = array_container_cardinality(c_qua_array); + int32_t size_as_array_container = + array_container_serialized_size_in_bytes(card); + + if (size_as_run_container >= size_as_array_container) { + *typecode_after = ARRAY_CONTAINER_TYPE_CODE; + return c; + } + // else convert array to run container + run_container_t *answer = run_container_create_given_capacity(n_runs); + int prev = -2; + int run_start = -1; + + assert(card > 0); + for (int i = 0; i < card; ++i) { + uint16_t cur_val = c_qua_array->array[i]; + if (cur_val != prev + 1) { + // new run starts; flush old one, if any + if (run_start != -1) add_run(answer, run_start, prev); + run_start = cur_val; + } + prev = c_qua_array->array[i]; + } + assert(run_start >= 0); + // now prev is the last seen value + add_run(answer, run_start, prev); + *typecode_after = RUN_CONTAINER_TYPE_CODE; + array_container_free(c_qua_array); + return answer; + } else if (typecode_original == + BITSET_CONTAINER_TYPE_CODE) { // run conversions on bitset + // does bitset need conversion to run? + bitset_container_t *c_qua_bitset = (bitset_container_t *)c; + int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset); + int32_t size_as_run_container = + run_container_serialized_size_in_bytes(n_runs); + int32_t size_as_bitset_container = + bitset_container_serialized_size_in_bytes(); + + if (size_as_bitset_container <= size_as_run_container) { + // no conversion needed. + *typecode_after = BITSET_CONTAINER_TYPE_CODE; + return c; + } + // bitset to runcontainer (ported from Java RunContainer( + // BitmapContainer bc, int nbrRuns)) + assert(n_runs > 0); // no empty bitmaps + run_container_t *answer = run_container_create_given_capacity(n_runs); + + int long_ctr = 0; + uint64_t cur_word = c_qua_bitset->array[0]; + int run_count = 0; + while (true) { + while (cur_word == UINT64_C(0) && + long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) + cur_word = c_qua_bitset->array[++long_ctr]; + + if (cur_word == UINT64_C(0)) { + bitset_container_free(c_qua_bitset); + *typecode_after = RUN_CONTAINER_TYPE_CODE; + return answer; + } + + int local_run_start = __builtin_ctzll(cur_word); + int run_start = local_run_start + 64 * long_ctr; + uint64_t cur_word_with_1s = cur_word | (cur_word - 1); + + int run_end = 0; + while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) && + long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) + cur_word_with_1s = c_qua_bitset->array[++long_ctr]; + + if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) { + run_end = 64 + long_ctr * 64; // exclusive, I guess + add_run(answer, run_start, run_end - 1); + bitset_container_free(c_qua_bitset); + *typecode_after = RUN_CONTAINER_TYPE_CODE; + return answer; + } + int local_run_end = __builtin_ctzll(~cur_word_with_1s); + run_end = local_run_end + long_ctr * 64; + add_run(answer, run_start, run_end - 1); + run_count++; + cur_word = cur_word_with_1s & (cur_word_with_1s + 1); + } + return answer; + } else { + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +bitset_container_t *bitset_container_from_run_range(const run_container_t *run, + uint32_t min, uint32_t max) { + bitset_container_t *bitset = bitset_container_create(); + int32_t union_cardinality = 0; + for (int32_t i = 0; i < run->n_runs; ++i) { + uint32_t rle_min = run->runs[i].value; + uint32_t rle_max = rle_min + run->runs[i].length; + bitset_set_lenrange(bitset->array, rle_min, rle_max - rle_min); + union_cardinality += run->runs[i].length + 1; + } + union_cardinality += max - min + 1; + union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min); + bitset_set_lenrange(bitset->array, min, max - min); + bitset->cardinality = union_cardinality; + return bitset; +} +/* end file src/containers/convert.c */ +/* begin file src/containers/mixed_andnot.c */ +/* + * mixed_andnot.c. More methods since operation is not symmetric, + * except no "wide" andnot , so no lazy options motivated. + */ + +#include +#include + + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst, a valid array container that could be the same as dst.*/ +void array_bitset_container_andnot(const array_container_t *src_1, + const bitset_container_t *src_2, + array_container_t *dst) { + // follows Java implementation as of June 2016 + if (dst->capacity < src_1->cardinality) { + array_container_grow(dst, src_1->cardinality, false); + } + int32_t newcard = 0; + const int32_t origcard = src_1->cardinality; + for (int i = 0; i < origcard; ++i) { + uint16_t key = src_1->array[i]; + dst->array[newcard] = key; + newcard += 1 - bitset_container_contains(src_2, key); + } + dst->cardinality = newcard; +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * src_1 */ + +void array_bitset_container_iandnot(array_container_t *src_1, + const bitset_container_t *src_2) { + array_bitset_container_andnot(src_1, src_2, src_1); +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst, which does not initially have a valid container. + * Return true for a bitset result; false for array + */ + +bool bitset_array_container_andnot(const bitset_container_t *src_1, + const array_container_t *src_2, void **dst) { + // Java did this directly, but we have option of asm or avx + bitset_container_t *result = bitset_container_create(); + bitset_container_copy(src_1, result); + result->cardinality = + (int32_t)bitset_clear_list(result->array, (uint64_t)result->cardinality, + src_2->array, (uint64_t)src_2->cardinality); + + // do required type conversions. + if (result->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(result); + bitset_container_free(result); + return false; + } + *dst = result; + return true; +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_array_container_iandnot(bitset_container_t *src_1, + const array_container_t *src_2, + void **dst) { + *dst = src_1; + src_1->cardinality = + (int32_t)bitset_clear_list(src_1->array, (uint64_t)src_1->cardinality, + src_2->array, (uint64_t)src_2->cardinality); + + if (src_1->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(src_1); + bitset_container_free(src_1); + return false; // not bitset + } else + return true; +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_andnot(const run_container_t *src_1, + const bitset_container_t *src_2, void **dst) { + // follows the Java implementation as of June 2016 + int card = run_container_cardinality(src_1); + if (card <= DEFAULT_MAX_SIZE) { + // must be an array + array_container_t *answer = array_container_create_given_capacity(card); + answer->cardinality = 0; + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + for (int run_value = rle.value; run_value <= rle.value + rle.length; + ++run_value) { + if (!bitset_container_get(src_2, (uint16_t)run_value)) { + answer->array[answer->cardinality++] = (uint16_t)run_value; + } + } + } + *dst = answer; + return false; + } else { // we guess it will be a bitset, though have to check guess when + // done + bitset_container_t *answer = bitset_container_clone(src_2); + + uint32_t last_pos = 0; + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + + uint32_t start = rle.value; + uint32_t end = start + rle.length + 1; + bitset_reset_range(answer->array, last_pos, start); + bitset_flip_range(answer->array, start, end); + last_pos = end; + } + bitset_reset_range(answer->array, last_pos, (uint32_t)(1 << 16)); + + answer->cardinality = bitset_container_compute_cardinality(answer); + + if (answer->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(answer); + bitset_container_free(answer); + return false; // not bitset + } + *dst = answer; + return true; // bitset + } +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_iandnot(run_container_t *src_1, + const bitset_container_t *src_2, void **dst) { + // dummy implementation + bool ans = run_bitset_container_andnot(src_1, src_2, dst); + run_container_free(src_1); + return ans; +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool bitset_run_container_andnot(const bitset_container_t *src_1, + const run_container_t *src_2, void **dst) { + // follows Java implementation + bitset_container_t *result = bitset_container_create(); + + bitset_container_copy(src_1, result); + for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { + rle16_t rle = src_2->runs[rlepos]; + bitset_reset_range(result->array, rle.value, + rle.value + rle.length + UINT32_C(1)); + } + result->cardinality = bitset_container_compute_cardinality(result); + + if (result->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(result); + bitset_container_free(result); + return false; // not bitset + } + *dst = result; + return true; // bitset +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_run_container_iandnot(bitset_container_t *src_1, + const run_container_t *src_2, void **dst) { + *dst = src_1; + + for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { + rle16_t rle = src_2->runs[rlepos]; + bitset_reset_range(src_1->array, rle.value, + rle.value + rle.length + UINT32_C(1)); + } + src_1->cardinality = bitset_container_compute_cardinality(src_1); + + if (src_1->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(src_1); + bitset_container_free(src_1); + return false; // not bitset + } else + return true; +} + +/* helper. a_out must be a valid array container with adequate capacity. + * Returns the cardinality of the output container. Partly Based on Java + * implementation Util.unsignedDifference. + * + * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper + * to avoid advanceUntil? + */ + +static int run_array_array_subtract(const run_container_t *r, + const array_container_t *a_in, + array_container_t *a_out) { + int out_card = 0; + int32_t in_array_pos = + -1; // since advanceUntil always assumes we start the search AFTER this + + for (int rlepos = 0; rlepos < r->n_runs; rlepos++) { + int32_t start = r->runs[rlepos].value; + int32_t end = start + r->runs[rlepos].length + 1; + + in_array_pos = advanceUntil(a_in->array, in_array_pos, + a_in->cardinality, (uint16_t)start); + + if (in_array_pos >= a_in->cardinality) { // run has no items subtracted + for (int32_t i = start; i < end; ++i) + a_out->array[out_card++] = (uint16_t)i; + } else { + uint16_t next_nonincluded = a_in->array[in_array_pos]; + if (next_nonincluded >= end) { + // another case when run goes unaltered + for (int32_t i = start; i < end; ++i) + a_out->array[out_card++] = (uint16_t)i; + in_array_pos--; // ensure we see this item again if necessary + } else { + for (int32_t i = start; i < end; ++i) + if (i != next_nonincluded) + a_out->array[out_card++] = (uint16_t)i; + else // 0 should ensure we don't match + next_nonincluded = + (in_array_pos + 1 >= a_in->cardinality) + ? 0 + : a_in->array[++in_array_pos]; + in_array_pos--; // see again + } + } + } + return out_card; +} + +/* dst does not indicate a valid container initially. Eventually it + * can become any type of container. + */ + +int run_array_container_andnot(const run_container_t *src_1, + const array_container_t *src_2, void **dst) { + // follows the Java impl as of June 2016 + + int card = run_container_cardinality(src_1); + const int arbitrary_threshold = 32; + + if (card <= arbitrary_threshold) { + if (src_2->cardinality == 0) { + *dst = run_container_clone(src_1); + return RUN_CONTAINER_TYPE_CODE; + } + // Java's "lazyandNot.toEfficientContainer" thing + run_container_t *answer = run_container_create_given_capacity( + card + array_container_cardinality(src_2)); + + int rlepos = 0; + int xrlepos = 0; // "x" is src_2 + rle16_t rle = src_1->runs[rlepos]; + int32_t start = rle.value; + int32_t end = start + rle.length + 1; + int32_t xstart = src_2->array[xrlepos]; + + while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) { + if (end <= xstart) { + // output the first run + answer->runs[answer->n_runs++] = + (rle16_t){.value = (uint16_t)start, + .length = (uint16_t)(end - start - 1)}; + rlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + } else if (xstart + 1 <= start) { + // exit the second run + xrlepos++; + if (xrlepos < src_2->cardinality) { + xstart = src_2->array[xrlepos]; + } + } else { + if (start < xstart) { + answer->runs[answer->n_runs++] = + (rle16_t){.value = (uint16_t)start, + .length = (uint16_t)(xstart - start - 1)}; + } + if (xstart + 1 < end) { + start = xstart + 1; + } else { + rlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + } + } + } + if (rlepos < src_1->n_runs) { + answer->runs[answer->n_runs++] = + (rle16_t){.value = (uint16_t)start, + .length = (uint16_t)(end - start - 1)}; + rlepos++; + if (rlepos < src_1->n_runs) { + memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos, + (src_1->n_runs - rlepos) * sizeof(rle16_t)); + answer->n_runs += (src_1->n_runs - rlepos); + } + } + uint8_t return_type; + *dst = convert_run_to_efficient_container(answer, &return_type); + if (answer != *dst) run_container_free(answer); + return return_type; + } + // else it's a bitmap or array + + if (card <= DEFAULT_MAX_SIZE) { + array_container_t *ac = array_container_create_given_capacity(card); + // nb Java code used a generic iterator-based merge to compute + // difference + ac->cardinality = run_array_array_subtract(src_1, src_2, ac); + *dst = ac; + return ARRAY_CONTAINER_TYPE_CODE; + } + bitset_container_t *ans = bitset_container_from_run(src_1); + bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst); + return (result_is_bitset ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE); +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +int run_array_container_iandnot(run_container_t *src_1, + const array_container_t *src_2, void **dst) { + // dummy implementation same as June 2016 Java + int ans = run_array_container_andnot(src_1, src_2, dst); + run_container_free(src_1); + return ans; +} + +/* dst must be a valid array container, allowed to be src_1 */ + +void array_run_container_andnot(const array_container_t *src_1, + const run_container_t *src_2, + array_container_t *dst) { + // basically following Java impl as of June 2016 + if (src_1->cardinality > dst->capacity) { + array_container_grow(dst, src_1->cardinality, false); + } + + if (src_2->n_runs == 0) { + memmove(dst->array, src_1->array, + sizeof(uint16_t) * src_1->cardinality); + dst->cardinality = src_1->cardinality; + return; + } + int32_t run_start = src_2->runs[0].value; + int32_t run_end = run_start + src_2->runs[0].length; + int which_run = 0; + + uint16_t val = 0; + int dest_card = 0; + for (int i = 0; i < src_1->cardinality; ++i) { + val = src_1->array[i]; + if (val < run_start) + dst->array[dest_card++] = val; + else if (val <= run_end) { + ; // omitted item + } else { + do { + if (which_run + 1 < src_2->n_runs) { + ++which_run; + run_start = src_2->runs[which_run].value; + run_end = run_start + src_2->runs[which_run].length; + + } else + run_start = run_end = (1 << 16) + 1; + } while (val > run_end); + --i; + } + } + dst->cardinality = dest_card; +} + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +void array_run_container_iandnot(array_container_t *src_1, + const run_container_t *src_2) { + array_run_container_andnot(src_1, src_2, src_1); +} + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int run_run_container_andnot(const run_container_t *src_1, + const run_container_t *src_2, void **dst) { + run_container_t *ans = run_container_create(); + run_container_andnot(src_1, src_2, ans); + uint8_t typecode_after; + *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); + return typecode_after; +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +int run_run_container_iandnot(run_container_t *src_1, + const run_container_t *src_2, void **dst) { + // following Java impl as of June 2016 (dummy) + int ans = run_run_container_andnot(src_1, src_2, dst); + run_container_free(src_1); + return ans; +} + +/* + * dst is a valid array container and may be the same as src_1 + */ + +void array_array_container_andnot(const array_container_t *src_1, + const array_container_t *src_2, + array_container_t *dst) { + array_container_andnot(src_1, src_2, dst); +} + +/* inplace array-array andnot will always be able to reuse the space of + * src_1 */ +void array_array_container_iandnot(array_container_t *src_1, + const array_container_t *src_2) { + array_container_andnot(src_1, src_2, src_1); +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). Return value is + * "dst is a bitset" + */ + +bool bitset_bitset_container_andnot(const bitset_container_t *src_1, + const bitset_container_t *src_2, + void **dst) { + bitset_container_t *ans = bitset_container_create(); + int card = bitset_container_andnot(src_1, src_2, ans); + if (card <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(ans); + bitset_container_free(ans); + return false; // not bitset + } else { + *dst = ans; + return true; + } +} + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_bitset_container_iandnot(bitset_container_t *src_1, + const bitset_container_t *src_2, + void **dst) { + int card = bitset_container_andnot(src_1, src_2, src_1); + if (card <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(src_1); + bitset_container_free(src_1); + return false; // not bitset + } else { + *dst = src_1; + return true; + } +} +/* end file src/containers/mixed_andnot.c */ +/* begin file src/containers/mixed_equal.c */ + +bool array_container_equal_bitset(const array_container_t* container1, + const bitset_container_t* container2) { + if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { + if (container2->cardinality != container1->cardinality) { + return false; + } + } + int32_t pos = 0; + for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { + uint64_t w = container2->array[i]; + while (w != 0) { + uint64_t t = w & (~w + 1); + uint16_t r = i * 64 + __builtin_ctzll(w); + if (pos >= container1->cardinality) { + return false; + } + if (container1->array[pos] != r) { + return false; + } + ++pos; + w ^= t; + } + } + return (pos == container1->cardinality); +} + +bool run_container_equals_array(const run_container_t* container1, + const array_container_t* container2) { + if (run_container_cardinality(container1) != container2->cardinality) + return false; + int32_t pos = 0; + for (int i = 0; i < container1->n_runs; ++i) { + const uint32_t run_start = container1->runs[i].value; + const uint32_t le = container1->runs[i].length; + + if (container2->array[pos] != run_start) { + return false; + } + + if (container2->array[pos + le] != run_start + le) { + return false; + } + + pos += le + 1; + } + return true; +} + +bool run_container_equals_bitset(const run_container_t* container1, + const bitset_container_t* container2) { + + int run_card = run_container_cardinality(container1); + int bitset_card = (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) ? + container2->cardinality : + bitset_container_compute_cardinality(container2); + if (bitset_card != run_card) { + return false; + } + + for (int32_t i = 0; i < container1->n_runs; i++) { + uint32_t begin = container1->runs[i].value; + if (container1->runs[i].length) { + uint32_t end = begin + container1->runs[i].length + 1; + if (!bitset_container_contains_range(container2, begin, end)) { + return false; + } + } else { + if (!bitset_container_contains(container2, begin)) { + return false; + } + } + } + + return true; +} +/* end file src/containers/mixed_equal.c */ +/* begin file src/containers/mixed_intersection.c */ +/* + * mixed_intersection.c + * + */ + + +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. */ +void array_bitset_container_intersection(const array_container_t *src_1, + const bitset_container_t *src_2, + array_container_t *dst) { + if (dst->capacity < src_1->cardinality) { + array_container_grow(dst, src_1->cardinality, false); + } + int32_t newcard = 0; // dst could be src_1 + const int32_t origcard = src_1->cardinality; + for (int i = 0; i < origcard; ++i) { + uint16_t key = src_1->array[i]; + // this branchless approach is much faster... + dst->array[newcard] = key; + newcard += bitset_container_contains(src_2, key); + /** + * we could do it this way instead... + * if (bitset_container_contains(src_2, key)) { + * dst->array[newcard++] = key; + * } + * but if the result is unpredictible, the processor generates + * many mispredicted branches. + * Difference can be huge (from 3 cycles when predictible all the way + * to 16 cycles when unpredictible. + * See + * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c + */ + } + dst->cardinality = newcard; +} + +/* Compute the size of the intersection of src_1 and src_2. */ +int array_bitset_container_intersection_cardinality( + const array_container_t *src_1, const bitset_container_t *src_2) { + int32_t newcard = 0; + const int32_t origcard = src_1->cardinality; + for (int i = 0; i < origcard; ++i) { + uint16_t key = src_1->array[i]; + newcard += bitset_container_contains(src_2, key); + } + return newcard; +} + + +bool array_bitset_container_intersect(const array_container_t *src_1, + const bitset_container_t *src_2) { + const int32_t origcard = src_1->cardinality; + for (int i = 0; i < origcard; ++i) { + uint16_t key = src_1->array[i]; + if(bitset_container_contains(src_2, key)) return true; + } + return false; +} + +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. It is allowed for dst to be equal to src_1. We assume that dst is a + * valid container. */ +void array_run_container_intersection(const array_container_t *src_1, + const run_container_t *src_2, + array_container_t *dst) { + if (run_container_is_full(src_2)) { + if (dst != src_1) array_container_copy(src_1, dst); + return; + } + if (dst->capacity < src_1->cardinality) { + array_container_grow(dst, src_1->cardinality, false); + } + if (src_2->n_runs == 0) { + return; + } + int32_t rlepos = 0; + int32_t arraypos = 0; + rle16_t rle = src_2->runs[rlepos]; + int32_t newcard = 0; + while (arraypos < src_1->cardinality) { + const uint16_t arrayval = src_1->array[arraypos]; + while (rle.value + rle.length < + arrayval) { // this will frequently be false + ++rlepos; + if (rlepos == src_2->n_runs) { + dst->cardinality = newcard; + return; // we are done + } + rle = src_2->runs[rlepos]; + } + if (rle.value > arrayval) { + arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, + rle.value); + } else { + dst->array[newcard] = arrayval; + newcard++; + arraypos++; + } + } + dst->cardinality = newcard; +} + +/* Compute the intersection of src_1 and src_2 and write the result to + * *dst. If the result is true then the result is a bitset_container_t + * otherwise is a array_container_t. If *dst == src_2, an in-place processing + * is attempted.*/ +bool run_bitset_container_intersection(const run_container_t *src_1, + const bitset_container_t *src_2, + void **dst) { + if (run_container_is_full(src_1)) { + if (*dst != src_2) *dst = bitset_container_clone(src_2); + return true; + } + int32_t card = run_container_cardinality(src_1); + if (card <= DEFAULT_MAX_SIZE) { + // result can only be an array (assuming that we never make a + // RunContainer) + if (card > src_2->cardinality) { + card = src_2->cardinality; + } + array_container_t *answer = array_container_create_given_capacity(card); + *dst = answer; + if (*dst == NULL) { + return false; + } + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + uint32_t endofrun = (uint32_t)rle.value + rle.length; + for (uint32_t runValue = rle.value; runValue <= endofrun; + ++runValue) { + answer->array[answer->cardinality] = (uint16_t)runValue; + answer->cardinality += + bitset_container_contains(src_2, runValue); + } + } + return false; + } + if (*dst == src_2) { // we attempt in-place + bitset_container_t *answer = (bitset_container_t *)*dst; + uint32_t start = 0; + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + const rle16_t rle = src_1->runs[rlepos]; + uint32_t end = rle.value; + bitset_reset_range(src_2->array, start, end); + + start = end + rle.length + 1; + } + bitset_reset_range(src_2->array, start, UINT32_C(1) << 16); + answer->cardinality = bitset_container_compute_cardinality(answer); + if (src_2->cardinality > DEFAULT_MAX_SIZE) { + return true; + } else { + array_container_t *newanswer = array_container_from_bitset(src_2); + if (newanswer == NULL) { + *dst = NULL; + return false; + } + *dst = newanswer; + return false; + } + } else { // no inplace + // we expect the answer to be a bitmap (if we are lucky) + bitset_container_t *answer = bitset_container_clone(src_2); + + *dst = answer; + if (answer == NULL) { + return true; + } + uint32_t start = 0; + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + const rle16_t rle = src_1->runs[rlepos]; + uint32_t end = rle.value; + bitset_reset_range(answer->array, start, end); + start = end + rle.length + 1; + } + bitset_reset_range(answer->array, start, UINT32_C(1) << 16); + answer->cardinality = bitset_container_compute_cardinality(answer); + + if (answer->cardinality > DEFAULT_MAX_SIZE) { + return true; + } else { + array_container_t *newanswer = array_container_from_bitset(answer); + bitset_container_free((bitset_container_t *)*dst); + if (newanswer == NULL) { + *dst = NULL; + return false; + } + *dst = newanswer; + return false; + } + } +} + +/* Compute the size of the intersection between src_1 and src_2 . */ +int array_run_container_intersection_cardinality(const array_container_t *src_1, + const run_container_t *src_2) { + if (run_container_is_full(src_2)) { + return src_1->cardinality; + } + if (src_2->n_runs == 0) { + return 0; + } + int32_t rlepos = 0; + int32_t arraypos = 0; + rle16_t rle = src_2->runs[rlepos]; + int32_t newcard = 0; + while (arraypos < src_1->cardinality) { + const uint16_t arrayval = src_1->array[arraypos]; + while (rle.value + rle.length < + arrayval) { // this will frequently be false + ++rlepos; + if (rlepos == src_2->n_runs) { + return newcard; // we are done + } + rle = src_2->runs[rlepos]; + } + if (rle.value > arrayval) { + arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, + rle.value); + } else { + newcard++; + arraypos++; + } + } + return newcard; +} + +/* Compute the intersection between src_1 and src_2 + **/ +int run_bitset_container_intersection_cardinality( + const run_container_t *src_1, const bitset_container_t *src_2) { + if (run_container_is_full(src_1)) { + return bitset_container_cardinality(src_2); + } + int answer = 0; + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + answer += + bitset_lenrange_cardinality(src_2->array, rle.value, rle.length); + } + return answer; +} + + +bool array_run_container_intersect(const array_container_t *src_1, + const run_container_t *src_2) { + if( run_container_is_full(src_2) ) { + return !array_container_empty(src_1); + } + if (src_2->n_runs == 0) { + return false; + } + int32_t rlepos = 0; + int32_t arraypos = 0; + rle16_t rle = src_2->runs[rlepos]; + while (arraypos < src_1->cardinality) { + const uint16_t arrayval = src_1->array[arraypos]; + while (rle.value + rle.length < + arrayval) { // this will frequently be false + ++rlepos; + if (rlepos == src_2->n_runs) { + return false; // we are done + } + rle = src_2->runs[rlepos]; + } + if (rle.value > arrayval) { + arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, + rle.value); + } else { + return true; + } + } + return false; +} + +/* Compute the intersection between src_1 and src_2 + **/ +bool run_bitset_container_intersect(const run_container_t *src_1, + const bitset_container_t *src_2) { + if( run_container_is_full(src_1) ) { + return !bitset_container_empty(src_2); + } + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + if(!bitset_lenrange_empty(src_2->array, rle.value,rle.length)) return true; + } + return false; +} + +/* + * Compute the intersection between src_1 and src_2 and write the result + * to *dst. If the return function is true, the result is a bitset_container_t + * otherwise is a array_container_t. + */ +bool bitset_bitset_container_intersection(const bitset_container_t *src_1, + const bitset_container_t *src_2, + void **dst) { + const int newCardinality = bitset_container_and_justcard(src_1, src_2); + if (newCardinality > DEFAULT_MAX_SIZE) { + *dst = bitset_container_create(); + if (*dst != NULL) { + bitset_container_and_nocard(src_1, src_2, + (bitset_container_t *)*dst); + ((bitset_container_t *)*dst)->cardinality = newCardinality; + } + return true; // it is a bitset + } + *dst = array_container_create_given_capacity(newCardinality); + if (*dst != NULL) { + ((array_container_t *)*dst)->cardinality = newCardinality; + bitset_extract_intersection_setbits_uint16( + ((const bitset_container_t *)src_1)->array, + ((const bitset_container_t *)src_2)->array, + BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array, + 0); + } + return false; // not a bitset +} + +bool bitset_bitset_container_intersection_inplace( + bitset_container_t *src_1, const bitset_container_t *src_2, void **dst) { + const int newCardinality = bitset_container_and_justcard(src_1, src_2); + if (newCardinality > DEFAULT_MAX_SIZE) { + *dst = src_1; + bitset_container_and_nocard(src_1, src_2, src_1); + ((bitset_container_t *)*dst)->cardinality = newCardinality; + return true; // it is a bitset + } + *dst = array_container_create_given_capacity(newCardinality); + if (*dst != NULL) { + ((array_container_t *)*dst)->cardinality = newCardinality; + bitset_extract_intersection_setbits_uint16( + ((const bitset_container_t *)src_1)->array, + ((const bitset_container_t *)src_2)->array, + BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array, + 0); + } + return false; // not a bitset +} +/* end file src/containers/mixed_intersection.c */ +/* begin file src/containers/mixed_negation.c */ +/* + * mixed_negation.c + * + */ + +#include +#include + + +// TODO: make simplified and optimized negation code across +// the full range. + +/* Negation across the entire range of the container. + * Compute the negation of src and write the result + * to *dst. The complement of a + * sufficiently sparse set will always be dense and a hence a bitmap +' * We assume that dst is pre-allocated and a valid bitset container + * There can be no in-place version. + */ +void array_container_negation(const array_container_t *src, + bitset_container_t *dst) { + uint64_t card = UINT64_C(1 << 16); + bitset_container_set_all(dst); + + dst->cardinality = (int32_t)bitset_clear_list(dst->array, card, src->array, + (uint64_t)src->cardinality); +} + +/* Negation across the entire range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation(const bitset_container_t *src, void **dst) { + return bitset_container_negation_range(src, 0, (1 << 16), dst); +} + +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_inplace(bitset_container_t *src, void **dst) { + return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); +} + +/* Negation across the entire range of container + * Compute the negation of src and write the result + * to *dst. Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation(const run_container_t *src, void **dst) { + return run_container_negation_range(src, 0, (1 << 16), dst); +} + +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_inplace(run_container_t *src, void **dst) { + return run_container_negation_range_inplace(src, 0, (1 << 16), dst); +} + +/* Negation across a range of the container. + * Compute the negation of src and write the result + * to *dst. Returns true if the result is a bitset container + * and false for an array container. *dst is not preallocated. + */ +bool array_container_negation_range(const array_container_t *src, + const int range_start, const int range_end, + void **dst) { + /* close port of the Java implementation */ + if (range_start >= range_end) { + *dst = array_container_clone(src); + return false; + } + + int32_t start_index = + binarySearch(src->array, src->cardinality, (uint16_t)range_start); + if (start_index < 0) start_index = -start_index - 1; + + int32_t last_index = + binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); + if (last_index < 0) last_index = -last_index - 2; + + const int32_t current_values_in_range = last_index - start_index + 1; + const int32_t span_to_be_flipped = range_end - range_start; + const int32_t new_values_in_range = + span_to_be_flipped - current_values_in_range; + const int32_t cardinality_change = + new_values_in_range - current_values_in_range; + const int32_t new_cardinality = src->cardinality + cardinality_change; + + if (new_cardinality > DEFAULT_MAX_SIZE) { + bitset_container_t *temp = bitset_container_from_array(src); + bitset_flip_range(temp->array, (uint32_t)range_start, + (uint32_t)range_end); + temp->cardinality = new_cardinality; + *dst = temp; + return true; + } + + array_container_t *arr = + array_container_create_given_capacity(new_cardinality); + *dst = (void *)arr; + if(new_cardinality == 0) { + arr->cardinality = new_cardinality; + return false; // we are done. + } + // copy stuff before the active area + memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); + + // work on the range + int32_t out_pos = start_index, in_pos = start_index; + int32_t val_in_range = range_start; + for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { + if ((uint16_t)val_in_range != src->array[in_pos]) { + arr->array[out_pos++] = (uint16_t)val_in_range; + } else { + ++in_pos; + } + } + for (; val_in_range < range_end; ++val_in_range) + arr->array[out_pos++] = (uint16_t)val_in_range; + + // content after the active range + memcpy(arr->array + out_pos, src->array + (last_index + 1), + (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); + arr->cardinality = new_cardinality; + return false; +} + +/* Even when the result would fit, it is unclear how to make an + * inplace version without inefficient copying. + */ + +bool array_container_negation_range_inplace(array_container_t *src, + const int range_start, + const int range_end, void **dst) { + bool ans = array_container_negation_range(src, range_start, range_end, dst); + // TODO : try a real inplace version + array_container_free(src); + return ans; +} + +/* Negation across a range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation_range(const bitset_container_t *src, + const int range_start, const int range_end, + void **dst) { + // TODO maybe consider density-based estimate + // and sometimes build result directly as array, with + // conversion back to bitset if wrong. Or determine + // actual result cardinality, then go directly for the known final cont. + + // keep computation using bitsets as long as possible. + bitset_container_t *t = bitset_container_clone(src); + bitset_flip_range(t->array, (uint32_t)range_start, (uint32_t)range_end); + t->cardinality = bitset_container_compute_cardinality(t); + + if (t->cardinality > DEFAULT_MAX_SIZE) { + *dst = t; + return true; + } else { + *dst = array_container_from_bitset(t); + bitset_container_free(t); + return false; + } +} + +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_range_inplace(bitset_container_t *src, + const int range_start, + const int range_end, void **dst) { + bitset_flip_range(src->array, (uint32_t)range_start, (uint32_t)range_end); + src->cardinality = bitset_container_compute_cardinality(src); + if (src->cardinality > DEFAULT_MAX_SIZE) { + *dst = src; + return true; + } + *dst = array_container_from_bitset(src); + bitset_container_free(src); + return false; +} + +/* Negation across a range of container + * Compute the negation of src and write the result + * to *dst. Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation_range(const run_container_t *src, + const int range_start, const int range_end, + void **dst) { + uint8_t return_typecode; + + // follows the Java implementation + if (range_end <= range_start) { + *dst = run_container_clone(src); + return RUN_CONTAINER_TYPE_CODE; + } + + run_container_t *ans = run_container_create_given_capacity( + src->n_runs + 1); // src->n_runs + 1); + int k = 0; + for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { + ans->runs[k] = src->runs[k]; + ans->n_runs++; + } + + run_container_smart_append_exclusive( + ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); + + for (; k < src->n_runs; ++k) { + run_container_smart_append_exclusive(ans, src->runs[k].value, + src->runs[k].length); + } + + *dst = convert_run_to_efficient_container(ans, &return_typecode); + if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans); + + return return_typecode; +} + +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_range_inplace(run_container_t *src, + const int range_start, + const int range_end, void **dst) { + uint8_t return_typecode; + + if (range_end <= range_start) { + *dst = src; + return RUN_CONTAINER_TYPE_CODE; + } + + // TODO: efficient special case when range is 0 to 65535 inclusive + + if (src->capacity == src->n_runs) { + // no excess room. More checking to see if result can fit + bool last_val_before_range = false; + bool first_val_in_range = false; + bool last_val_in_range = false; + bool first_val_past_range = false; + + if (range_start > 0) + last_val_before_range = + run_container_contains(src, (uint16_t)(range_start - 1)); + first_val_in_range = run_container_contains(src, (uint16_t)range_start); + + if (last_val_before_range == first_val_in_range) { + last_val_in_range = + run_container_contains(src, (uint16_t)(range_end - 1)); + if (range_end != 0x10000) + first_val_past_range = + run_container_contains(src, (uint16_t)range_end); + + if (last_val_in_range == + first_val_past_range) { // no space for inplace + int ans = run_container_negation_range(src, range_start, + range_end, dst); + run_container_free(src); + return ans; + } + } + } + // all other cases: result will fit + + run_container_t *ans = src; + int my_nbr_runs = src->n_runs; + + ans->n_runs = 0; + int k = 0; + for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { + // ans->runs[k] = src->runs[k]; (would be self-copy) + ans->n_runs++; + } + + // as with Java implementation, use locals to give self a buffer of depth 1 + rle16_t buffered = (rle16_t){.value = (uint16_t)0, .length = (uint16_t)0}; + rle16_t next = buffered; + if (k < my_nbr_runs) buffered = src->runs[k]; + + run_container_smart_append_exclusive( + ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); + + for (; k < my_nbr_runs; ++k) { + if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; + + run_container_smart_append_exclusive(ans, buffered.value, + buffered.length); + buffered = next; + } + + *dst = convert_run_to_efficient_container(ans, &return_typecode); + if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans); + + return return_typecode; +} +/* end file src/containers/mixed_negation.c */ +/* begin file src/containers/mixed_subset.c */ + +bool array_container_is_subset_bitset(const array_container_t* container1, + const bitset_container_t* container2) { + if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { + if (container2->cardinality < container1->cardinality) { + return false; + } + } + for (int i = 0; i < container1->cardinality; ++i) { + if (!bitset_container_contains(container2, container1->array[i])) { + return false; + } + } + return true; +} + +bool run_container_is_subset_array(const run_container_t* container1, + const array_container_t* container2) { + if (run_container_cardinality(container1) > container2->cardinality) + return false; + int32_t start_pos = -1, stop_pos = -1; + for (int i = 0; i < container1->n_runs; ++i) { + int32_t start = container1->runs[i].value; + int32_t stop = start + container1->runs[i].length; + start_pos = advanceUntil(container2->array, stop_pos, + container2->cardinality, start); + stop_pos = advanceUntil(container2->array, stop_pos, + container2->cardinality, stop); + if (start_pos == container2->cardinality) { + return false; + } else if (stop_pos - start_pos != stop - start || + container2->array[start_pos] != start || + container2->array[stop_pos] != stop) { + return false; + } + } + return true; +} + +bool array_container_is_subset_run(const array_container_t* container1, + const run_container_t* container2) { + if (container1->cardinality > run_container_cardinality(container2)) + return false; + int i_array = 0, i_run = 0; + while (i_array < container1->cardinality && i_run < container2->n_runs) { + uint32_t start = container2->runs[i_run].value; + uint32_t stop = start + container2->runs[i_run].length; + if (container1->array[i_array] < start) { + return false; + } else if (container1->array[i_array] > stop) { + i_run++; + } else { // the value of the array is in the run + i_array++; + } + } + if (i_array == container1->cardinality) { + return true; + } else { + return false; + } +} + +bool run_container_is_subset_bitset(const run_container_t* container1, + const bitset_container_t* container2) { + // todo: this code could be much faster + if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { + if (container2->cardinality < run_container_cardinality(container1)) { + return false; + } + } else { + int32_t card = bitset_container_compute_cardinality( + container2); // modify container2? + if (card < run_container_cardinality(container1)) { + return false; + } + } + for (int i = 0; i < container1->n_runs; ++i) { + uint32_t run_start = container1->runs[i].value; + uint32_t le = container1->runs[i].length; + for (uint32_t j = run_start; j <= run_start + le; ++j) { + if (!bitset_container_contains(container2, j)) { + return false; + } + } + } + return true; +} + +bool bitset_container_is_subset_run(const bitset_container_t* container1, + const run_container_t* container2) { + // todo: this code could be much faster + if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) { + if (container1->cardinality > run_container_cardinality(container2)) { + return false; + } + } + int32_t i_bitset = 0, i_run = 0; + while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS && + i_run < container2->n_runs) { + uint64_t w = container1->array[i_bitset]; + while (w != 0 && i_run < container2->n_runs) { + uint32_t start = container2->runs[i_run].value; + uint32_t stop = start + container2->runs[i_run].length; + uint64_t t = w & (~w + 1); + uint16_t r = i_bitset * 64 + __builtin_ctzll(w); + if (r < start) { + return false; + } else if (r > stop) { + i_run++; + continue; + } else { + w ^= t; + } + } + if (w == 0) { + i_bitset++; + } else { + return false; + } + } + if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) { + // terminated iterating on the run containers, check that rest of bitset + // is empty + for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) { + if (container1->array[i_bitset] != 0) { + return false; + } + } + } + return true; +} +/* end file src/containers/mixed_subset.c */ +/* begin file src/containers/mixed_union.c */ +/* + * mixed_union.c + * + */ + +#include +#include + + +/* Compute the union of src_1 and src_2 and write the result to + * dst. */ +void array_bitset_container_union(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + if (src_2 != dst) bitset_container_copy(src_2, dst); + dst->cardinality = (int32_t)bitset_set_list_withcard( + dst->array, dst->cardinality, src_1->array, src_1->cardinality); +} + +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ +void array_bitset_container_lazy_union(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + if (src_2 != dst) bitset_container_copy(src_2, dst); + bitset_set_list(dst->array, src_1->array, src_1->cardinality); + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +} + +void run_bitset_container_union(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + assert(!run_container_is_full(src_1)); // catch this case upstream + if (src_2 != dst) bitset_container_copy(src_2, dst); + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + bitset_set_lenrange(dst->array, rle.value, rle.length); + } + dst->cardinality = bitset_container_compute_cardinality(dst); +} + +void run_bitset_container_lazy_union(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + assert(!run_container_is_full(src_1)); // catch this case upstream + if (src_2 != dst) bitset_container_copy(src_2, dst); + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + bitset_set_lenrange(dst->array, rle.value, rle.length); + } + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +} + +// why do we leave the result as a run container?? +void array_run_container_union(const array_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst) { + if (run_container_is_full(src_2)) { + run_container_copy(src_2, dst); + return; + } + // TODO: see whether the "2*" is spurious + run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); + int32_t rlepos = 0; + int32_t arraypos = 0; + rle16_t previousrle; + if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { + previousrle = run_container_append_first(dst, src_2->runs[rlepos]); + rlepos++; + } else { + previousrle = + run_container_append_value_first(dst, src_1->array[arraypos]); + arraypos++; + } + while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { + if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { + run_container_append(dst, src_2->runs[rlepos], &previousrle); + rlepos++; + } else { + run_container_append_value(dst, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } + if (arraypos < src_1->cardinality) { + while (arraypos < src_1->cardinality) { + run_container_append_value(dst, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } else { + while (rlepos < src_2->n_runs) { + run_container_append(dst, src_2->runs[rlepos], &previousrle); + rlepos++; + } + } +} + +void array_run_container_inplace_union(const array_container_t *src_1, + run_container_t *src_2) { + if (run_container_is_full(src_2)) { + return; + } + const int32_t maxoutput = src_1->cardinality + src_2->n_runs; + const int32_t neededcapacity = maxoutput + src_2->n_runs; + if (src_2->capacity < neededcapacity) + run_container_grow(src_2, neededcapacity, true); + memmove(src_2->runs + maxoutput, src_2->runs, + src_2->n_runs * sizeof(rle16_t)); + rle16_t *inputsrc2 = src_2->runs + maxoutput; + int32_t rlepos = 0; + int32_t arraypos = 0; + int src2nruns = src_2->n_runs; + src_2->n_runs = 0; + + rle16_t previousrle; + + if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { + previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); + rlepos++; + } else { + previousrle = + run_container_append_value_first(src_2, src_1->array[arraypos]); + arraypos++; + } + + while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { + if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { + run_container_append(src_2, inputsrc2[rlepos], &previousrle); + rlepos++; + } else { + run_container_append_value(src_2, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } + if (arraypos < src_1->cardinality) { + while (arraypos < src_1->cardinality) { + run_container_append_value(src_2, src_1->array[arraypos], + &previousrle); + arraypos++; + } + } else { + while (rlepos < src2nruns) { + run_container_append(src_2, inputsrc2[rlepos], &previousrle); + rlepos++; + } + } +} + +bool array_array_container_union(const array_container_t *src_1, + const array_container_t *src_2, void **dst) { + int totalCardinality = src_1->cardinality + src_2->cardinality; + if (totalCardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_create_given_capacity(totalCardinality); + if (*dst != NULL) { + array_container_union(src_1, src_2, (array_container_t *)*dst); + } else { + return true; // otherwise failure won't be caught + } + return false; // not a bitset + } + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = (bitset_container_t *)*dst; + bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); + ourbitset->cardinality = (int32_t)bitset_set_list_withcard( + ourbitset->array, src_1->cardinality, src_2->array, + src_2->cardinality); + if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { + // need to convert! + *dst = array_container_from_bitset(ourbitset); + bitset_container_free(ourbitset); + returnval = false; // not going to be a bitset + } + } + return returnval; +} + +bool array_array_container_inplace_union(array_container_t *src_1, + const array_container_t *src_2, void **dst) { + int totalCardinality = src_1->cardinality + src_2->cardinality; + *dst = NULL; + if (totalCardinality <= DEFAULT_MAX_SIZE) { + if(src_1->capacity < totalCardinality) { + *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous + if (*dst != NULL) { + array_container_union(src_1, src_2, (array_container_t *)*dst); + } else { + return true; // otherwise failure won't be caught + } + return false; // not a bitset + } else { + memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); + src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, + src_2->array, src_2->cardinality, src_1->array); + return false; // not a bitset + } + } + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = (bitset_container_t *)*dst; + bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); + ourbitset->cardinality = (int32_t)bitset_set_list_withcard( + ourbitset->array, src_1->cardinality, src_2->array, + src_2->cardinality); + if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { + // need to convert! + if(src_1->capacity < ourbitset->cardinality) { + array_container_grow(src_1, ourbitset->cardinality, false); + } + + bitset_extract_setbits_uint16(ourbitset->array, BITSET_CONTAINER_SIZE_IN_WORDS, + src_1->array, 0); + src_1->cardinality = ourbitset->cardinality; + *dst = src_1; + bitset_container_free(ourbitset); + returnval = false; // not going to be a bitset + } + } + return returnval; +} + + +bool array_array_container_lazy_union(const array_container_t *src_1, + const array_container_t *src_2, + void **dst) { + int totalCardinality = src_1->cardinality + src_2->cardinality; + if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { + *dst = array_container_create_given_capacity(totalCardinality); + if (*dst != NULL) { + array_container_union(src_1, src_2, (array_container_t *)*dst); + } else { + return true; // otherwise failure won't be caught + } + return false; // not a bitset + } + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = (bitset_container_t *)*dst; + bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); + bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); + ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; + } + return returnval; +} + + +bool array_array_container_lazy_inplace_union(array_container_t *src_1, + const array_container_t *src_2, + void **dst) { + int totalCardinality = src_1->cardinality + src_2->cardinality; + *dst = NULL; + if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { + if(src_1->capacity < totalCardinality) { + *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous + if (*dst != NULL) { + array_container_union(src_1, src_2, (array_container_t *)*dst); + } else { + return true; // otherwise failure won't be caught + } + return false; // not a bitset + } else { + memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); + src_1->cardinality = (int32_t)union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, + src_2->array, src_2->cardinality, src_1->array); + return false; // not a bitset + } + } + *dst = bitset_container_create(); + bool returnval = true; // expect a bitset + if (*dst != NULL) { + bitset_container_t *ourbitset = (bitset_container_t *)*dst; + bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); + bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); + ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; + } + return returnval; +} +/* end file src/containers/mixed_union.c */ +/* begin file src/containers/mixed_xor.c */ +/* + * mixed_xor.c + */ + +#include +#include + + +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). + * Result is true iff dst is a bitset */ +bool array_bitset_container_xor(const array_container_t *src_1, + const bitset_container_t *src_2, void **dst) { + bitset_container_t *result = bitset_container_create(); + bitset_container_copy(src_2, result); + result->cardinality = (int32_t)bitset_flip_list_withcard( + result->array, result->cardinality, src_1->array, src_1->cardinality); + + // do required type conversions. + if (result->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(result); + bitset_container_free(result); + return false; // not bitset + } + *dst = result; + return true; // bitset +} + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). + */ + +void array_bitset_container_lazy_xor(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + if (src_2 != dst) bitset_container_copy(src_2, dst); + bitset_flip_list(dst->array, src_1->array, src_1->cardinality); + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +} + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_xor(const run_container_t *src_1, + const bitset_container_t *src_2, void **dst) { + bitset_container_t *result = bitset_container_create(); + + bitset_container_copy(src_2, result); + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + bitset_flip_range(result->array, rle.value, + rle.value + rle.length + UINT32_C(1)); + } + result->cardinality = bitset_container_compute_cardinality(result); + + if (result->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(result); + bitset_container_free(result); + return false; // not bitset + } + *dst = result; + return true; // bitset +} + +/* lazy xor. Dst is initialized and may be equal to src_2. + * Result is left as a bitset container, even if actual + * cardinality would dictate an array container. + */ + +void run_bitset_container_lazy_xor(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst) { + if (src_2 != dst) bitset_container_copy(src_2, dst); + for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { + rle16_t rle = src_1->runs[rlepos]; + bitset_flip_range(dst->array, rle.value, + rle.value + rle.length + UINT32_C(1)); + } + dst->cardinality = BITSET_UNKNOWN_CARDINALITY; +} + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int array_run_container_xor(const array_container_t *src_1, + const run_container_t *src_2, void **dst) { + // semi following Java XOR implementation as of May 2016 + // the C OR implementation works quite differently and can return a run + // container + // TODO could optimize for full run containers. + + // use of lazy following Java impl. + const int arbitrary_threshold = 32; + if (src_1->cardinality < arbitrary_threshold) { + run_container_t *ans = run_container_create(); + array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs. + uint8_t typecode_after; + *dst = + convert_run_to_efficient_container_and_free(ans, &typecode_after); + return typecode_after; + } + + int card = run_container_cardinality(src_2); + if (card <= DEFAULT_MAX_SIZE) { + // Java implementation works with the array, xoring the run elements via + // iterator + array_container_t *temp = array_container_from_run(src_2); + bool ret_is_bitset = array_array_container_xor(temp, src_1, dst); + array_container_free(temp); + return ret_is_bitset ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + + } else { // guess that it will end up as a bitset + bitset_container_t *result = bitset_container_from_run(src_2); + bool is_bitset = bitset_array_container_ixor(result, src_1, dst); + // any necessary type conversion has been done by the ixor + int retval = (is_bitset ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE); + return retval; + } +} + +/* Dst is a valid run container. (Can it be src_2? Let's say not.) + * Leaves result as run container, even if other options are + * smaller. + */ + +void array_run_container_lazy_xor(const array_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst) { + run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); + int32_t rlepos = 0; + int32_t arraypos = 0; + dst->n_runs = 0; + + while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { + if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { + run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, + src_2->runs[rlepos].length); + rlepos++; + } else { + run_container_smart_append_exclusive(dst, src_1->array[arraypos], + 0); + arraypos++; + } + } + while (arraypos < src_1->cardinality) { + run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0); + arraypos++; + } + while (rlepos < src_2->n_runs) { + run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, + src_2->runs[rlepos].length); + rlepos++; + } +} + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int run_run_container_xor(const run_container_t *src_1, + const run_container_t *src_2, void **dst) { + run_container_t *ans = run_container_create(); + run_container_xor(src_1, src_2, ans); + uint8_t typecode_after; + *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); + return typecode_after; +} + +/* + * Java implementation (as of May 2016) for array_run, run_run + * and bitset_run don't do anything different for inplace. + * Could adopt the mixed_union.c approach instead (ie, using + * smart_append_exclusive) + * + */ + +bool array_array_container_xor(const array_container_t *src_1, + const array_container_t *src_2, void **dst) { + int totalCardinality = + src_1->cardinality + src_2->cardinality; // upper bound + if (totalCardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_create_given_capacity(totalCardinality); + array_container_xor(src_1, src_2, (array_container_t *)*dst); + return false; // not a bitset + } + *dst = bitset_container_from_array(src_1); + bool returnval = true; // expect a bitset + bitset_container_t *ourbitset = (bitset_container_t *)*dst; + ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( + ourbitset->array, src_1->cardinality, src_2->array, src_2->cardinality); + if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { + // need to convert! + *dst = array_container_from_bitset(ourbitset); + bitset_container_free(ourbitset); + returnval = false; // not going to be a bitset + } + + return returnval; +} + +bool array_array_container_lazy_xor(const array_container_t *src_1, + const array_container_t *src_2, + void **dst) { + int totalCardinality = src_1->cardinality + src_2->cardinality; + // upper bound, but probably poor estimate for xor + if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { + *dst = array_container_create_given_capacity(totalCardinality); + if (*dst != NULL) + array_container_xor(src_1, src_2, (array_container_t *)*dst); + return false; // not a bitset + } + *dst = bitset_container_from_array(src_1); + bool returnval = true; // expect a bitset (maybe, for XOR??) + if (*dst != NULL) { + bitset_container_t *ourbitset = (bitset_container_t *)*dst; + bitset_flip_list(ourbitset->array, src_2->array, src_2->cardinality); + ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; + } + return returnval; +} + +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). Return value is + * "dst is a bitset" + */ + +bool bitset_bitset_container_xor(const bitset_container_t *src_1, + const bitset_container_t *src_2, void **dst) { + bitset_container_t *ans = bitset_container_create(); + int card = bitset_container_xor(src_1, src_2, ans); + if (card <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(ans); + bitset_container_free(ans); + return false; // not bitset + } else { + *dst = ans; + return true; + } +} + +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_array_container_ixor(bitset_container_t *src_1, + const array_container_t *src_2, void **dst) { + *dst = src_1; + src_1->cardinality = (uint32_t)bitset_flip_list_withcard( + src_1->array, src_1->cardinality, src_2->array, src_2->cardinality); + + if (src_1->cardinality <= DEFAULT_MAX_SIZE) { + *dst = array_container_from_bitset(src_1); + bitset_container_free(src_1); + return false; // not bitset + } else + return true; +} + +/* a bunch of in-place, some of which may not *really* be inplace. + * TODO: write actual inplace routine if efficiency warrants it + * Anything inplace with a bitset is a good candidate + */ + +bool bitset_bitset_container_ixor(bitset_container_t *src_1, + const bitset_container_t *src_2, void **dst) { + bool ans = bitset_bitset_container_xor(src_1, src_2, dst); + bitset_container_free(src_1); + return ans; +} + +bool array_bitset_container_ixor(array_container_t *src_1, + const bitset_container_t *src_2, void **dst) { + bool ans = array_bitset_container_xor(src_1, src_2, dst); + array_container_free(src_1); + return ans; +} + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_ixor(run_container_t *src_1, + const bitset_container_t *src_2, void **dst) { + bool ans = run_bitset_container_xor(src_1, src_2, dst); + run_container_free(src_1); + return ans; +} + +bool bitset_run_container_ixor(bitset_container_t *src_1, + const run_container_t *src_2, void **dst) { + bool ans = run_bitset_container_xor(src_2, src_1, dst); + bitset_container_free(src_1); + return ans; +} + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int array_run_container_ixor(array_container_t *src_1, + const run_container_t *src_2, void **dst) { + int ans = array_run_container_xor(src_1, src_2, dst); + array_container_free(src_1); + return ans; +} + +int run_array_container_ixor(run_container_t *src_1, + const array_container_t *src_2, void **dst) { + int ans = array_run_container_xor(src_2, src_1, dst); + run_container_free(src_1); + return ans; +} + +bool array_array_container_ixor(array_container_t *src_1, + const array_container_t *src_2, void **dst) { + bool ans = array_array_container_xor(src_1, src_2, dst); + array_container_free(src_1); + return ans; +} + +int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, + void **dst) { + int ans = run_run_container_xor(src_1, src_2, dst); + run_container_free(src_1); + return ans; +} +/* end file src/containers/mixed_xor.c */ +/* begin file src/containers/run.c */ +#include +#include + + +bool run_container_add(run_container_t *run, uint16_t pos) { + int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); + if (index >= 0) return false; // already there + index = -index - 2; // points to preceding value, possibly -1 + if (index >= 0) { // possible match + int32_t offset = pos - run->runs[index].value; + int32_t le = run->runs[index].length; + if (offset <= le) return false; // already there + if (offset == le + 1) { + // we may need to fuse + if (index + 1 < run->n_runs) { + if (run->runs[index + 1].value == pos + 1) { + // indeed fusion is needed + run->runs[index].length = run->runs[index + 1].value + + run->runs[index + 1].length - + run->runs[index].value; + recoverRoomAtIndex(run, (uint16_t)(index + 1)); + return true; + } + } + run->runs[index].length++; + return true; + } + if (index + 1 < run->n_runs) { + // we may need to fuse + if (run->runs[index + 1].value == pos + 1) { + // indeed fusion is needed + run->runs[index + 1].value = pos; + run->runs[index + 1].length = run->runs[index + 1].length + 1; + return true; + } + } + } + if (index == -1) { + // we may need to extend the first run + if (0 < run->n_runs) { + if (run->runs[0].value == pos + 1) { + run->runs[0].length++; + run->runs[0].value--; + return true; + } + } + } + makeRoomAtIndex(run, (uint16_t)(index + 1)); + run->runs[index + 1].value = pos; + run->runs[index + 1].length = 0; + return true; +} + +/* Create a new run container. Return NULL in case of failure. */ +run_container_t *run_container_create_given_capacity(int32_t size) { + run_container_t *run; + /* Allocate the run container itself. */ + run = (run_container_t *)malloc(sizeof(run_container_t)); + assert (run); + if (size <= 0) // we don't want to rely on malloc(0) + run->runs = NULL; + run->runs = (rle16_t *)malloc(sizeof(rle16_t) * size); + assert (run->runs); + run->capacity = size; + run->n_runs = 0; + return run; +} + +int run_container_shrink_to_fit(run_container_t *src) { + if (src->n_runs == src->capacity) return 0; // nothing to do + int savings = src->capacity - src->n_runs; + src->capacity = src->n_runs; + rle16_t *oldruns = src->runs; + src->runs = (rle16_t *)realloc(oldruns, src->capacity * sizeof(rle16_t)); + if (src->runs == NULL) free(oldruns); // should never happen? + return savings; +} +/* Create a new run container. Return NULL in case of failure. */ +run_container_t *run_container_create(void) { + return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE); +} + +run_container_t *run_container_clone(const run_container_t *src) { + run_container_t *run = run_container_create_given_capacity(src->capacity); + if (run == NULL) return NULL; + run->capacity = src->capacity; + run->n_runs = src->n_runs; + memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t)); + return run; +} + +/* Free memory. */ +void run_container_free(run_container_t *run) { + if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise + free(run->runs); + run->runs = NULL; // pedantic + } + free(run); +} + +void run_container_grow(run_container_t *run, int32_t min, bool copy) { + int32_t newCapacity = + (run->capacity == 0) + ? RUN_DEFAULT_INIT_SIZE + : run->capacity < 64 ? run->capacity * 2 + : run->capacity < 1024 ? run->capacity * 3 / 2 + : run->capacity * 5 / 4; + if (newCapacity < min) newCapacity = min; + run->capacity = newCapacity; + assert(run->capacity >= min); + if (copy) { + rle16_t *oldruns = run->runs; + run->runs = + (rle16_t *)realloc(oldruns, run->capacity * sizeof(rle16_t)); + if (run->runs == NULL) free(oldruns); + } else { + // Jon Strabala reports that some tools complain otherwise + if (run->runs != NULL) { + free(run->runs); + } + run->runs = (rle16_t *)malloc(run->capacity * sizeof(rle16_t)); + } + // handle the case where realloc fails + if (run->runs == NULL) { + fprintf(stderr, "could not allocate memory\n"); + } + assert(run->runs != NULL); +} + +/* copy one container into another */ +void run_container_copy(const run_container_t *src, run_container_t *dst) { + const int32_t n_runs = src->n_runs; + if (src->n_runs > dst->capacity) { + run_container_grow(dst, n_runs, false); + } + dst->n_runs = n_runs; + memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs); +} + +/* Compute the union of `src_1' and `src_2' and write the result to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_union(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst) { + // TODO: this could be a lot more efficient + + // we start out with inexpensive checks + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + run_container_copy(src_1, dst); + return; + } + if (if2) { + run_container_copy(src_2, dst); + return; + } + } + const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; + if (dst->capacity < neededcapacity) + run_container_grow(dst, neededcapacity, false); + dst->n_runs = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; + + rle16_t previousrle; + if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { + previousrle = run_container_append_first(dst, src_1->runs[rlepos]); + rlepos++; + } else { + previousrle = run_container_append_first(dst, src_2->runs[xrlepos]); + xrlepos++; + } + + while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { + rle16_t newrl; + if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { + newrl = src_1->runs[rlepos]; + rlepos++; + } else { + newrl = src_2->runs[xrlepos]; + xrlepos++; + } + run_container_append(dst, newrl, &previousrle); + } + while (xrlepos < src_2->n_runs) { + run_container_append(dst, src_2->runs[xrlepos], &previousrle); + xrlepos++; + } + while (rlepos < src_1->n_runs) { + run_container_append(dst, src_1->runs[rlepos], &previousrle); + rlepos++; + } +} + +/* Compute the union of `src_1' and `src_2' and write the result to `src_1' + */ +void run_container_union_inplace(run_container_t *src_1, + const run_container_t *src_2) { + // TODO: this could be a lot more efficient + + // we start out with inexpensive checks + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + return; + } + if (if2) { + run_container_copy(src_2, src_1); + return; + } + } + // we move the data to the end of the current array + const int32_t maxoutput = src_1->n_runs + src_2->n_runs; + const int32_t neededcapacity = maxoutput + src_1->n_runs; + if (src_1->capacity < neededcapacity) + run_container_grow(src_1, neededcapacity, true); + memmove(src_1->runs + maxoutput, src_1->runs, + src_1->n_runs * sizeof(rle16_t)); + rle16_t *inputsrc1 = src_1->runs + maxoutput; + const int32_t input1nruns = src_1->n_runs; + src_1->n_runs = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; + + rle16_t previousrle; + if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { + previousrle = run_container_append_first(src_1, inputsrc1[rlepos]); + rlepos++; + } else { + previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]); + xrlepos++; + } + while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { + rle16_t newrl; + if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { + newrl = inputsrc1[rlepos]; + rlepos++; + } else { + newrl = src_2->runs[xrlepos]; + xrlepos++; + } + run_container_append(src_1, newrl, &previousrle); + } + while (xrlepos < src_2->n_runs) { + run_container_append(src_1, src_2->runs[xrlepos], &previousrle); + xrlepos++; + } + while (rlepos < input1nruns) { + run_container_append(src_1, inputsrc1[rlepos], &previousrle); + rlepos++; + } +} + +/* Compute the symmetric difference of `src_1' and `src_2' and write the result + * to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_xor(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst) { + // don't bother to convert xor with full range into negation + // since negation is implemented similarly + + const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; + if (dst->capacity < neededcapacity) + run_container_grow(dst, neededcapacity, false); + + int32_t pos1 = 0; + int32_t pos2 = 0; + dst->n_runs = 0; + + while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { + if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { + run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, + src_1->runs[pos1].length); + pos1++; + } else { + run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, + src_2->runs[pos2].length); + pos2++; + } + } + while (pos1 < src_1->n_runs) { + run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, + src_1->runs[pos1].length); + pos1++; + } + + while (pos2 < src_2->n_runs) { + run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, + src_2->runs[pos2].length); + pos2++; + } +} + +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void run_container_intersection(const run_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst) { + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + run_container_copy(src_2, dst); + return; + } + if (if2) { + run_container_copy(src_1, dst); + return; + } + } + // TODO: this could be a lot more efficient, could use SIMD optimizations + const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; + if (dst->capacity < neededcapacity) + run_container_grow(dst, neededcapacity, false); + dst->n_runs = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; + int32_t start = src_1->runs[rlepos].value; + int32_t end = start + src_1->runs[rlepos].length + 1; + int32_t xstart = src_2->runs[xrlepos].value; + int32_t xend = xstart + src_2->runs[xrlepos].length + 1; + while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { + if (end <= xstart) { + ++rlepos; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + } else if (xend <= start) { + ++xrlepos; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else { // they overlap + const int32_t lateststart = start > xstart ? start : xstart; + int32_t earliestend; + if (end == xend) { // improbable + earliestend = end; + rlepos++; + xrlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else if (end < xend) { + earliestend = end; + rlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + + } else { // end > xend + earliestend = xend; + xrlepos++; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } + dst->runs[dst->n_runs].value = (uint16_t)lateststart; + dst->runs[dst->n_runs].length = + (uint16_t)(earliestend - lateststart - 1); + dst->n_runs++; + } + } +} + +/* Compute the size of the intersection of src_1 and src_2 . */ +int run_container_intersection_cardinality(const run_container_t *src_1, + const run_container_t *src_2) { + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + return run_container_cardinality(src_2); + } + if (if2) { + return run_container_cardinality(src_1); + } + } + int answer = 0; + int32_t rlepos = 0; + int32_t xrlepos = 0; + int32_t start = src_1->runs[rlepos].value; + int32_t end = start + src_1->runs[rlepos].length + 1; + int32_t xstart = src_2->runs[xrlepos].value; + int32_t xend = xstart + src_2->runs[xrlepos].length + 1; + while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { + if (end <= xstart) { + ++rlepos; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + } else if (xend <= start) { + ++xrlepos; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else { // they overlap + const int32_t lateststart = start > xstart ? start : xstart; + int32_t earliestend; + if (end == xend) { // improbable + earliestend = end; + rlepos++; + xrlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else if (end < xend) { + earliestend = end; + rlepos++; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + + } else { // end > xend + earliestend = xend; + xrlepos++; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } + answer += earliestend - lateststart; + } + } + return answer; +} + +bool run_container_intersect(const run_container_t *src_1, + const run_container_t *src_2) { + const bool if1 = run_container_is_full(src_1); + const bool if2 = run_container_is_full(src_2); + if (if1 || if2) { + if (if1) { + return !run_container_empty(src_2); + } + if (if2) { + return !run_container_empty(src_1); + } + } + int32_t rlepos = 0; + int32_t xrlepos = 0; + int32_t start = src_1->runs[rlepos].value; + int32_t end = start + src_1->runs[rlepos].length + 1; + int32_t xstart = src_2->runs[xrlepos].value; + int32_t xend = xstart + src_2->runs[xrlepos].length + 1; + while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { + if (end <= xstart) { + ++rlepos; + if (rlepos < src_1->n_runs) { + start = src_1->runs[rlepos].value; + end = start + src_1->runs[rlepos].length + 1; + } + } else if (xend <= start) { + ++xrlepos; + if (xrlepos < src_2->n_runs) { + xstart = src_2->runs[xrlepos].value; + xend = xstart + src_2->runs[xrlepos].length + 1; + } + } else { // they overlap + return true; + } + } + return false; +} + + +/* Compute the difference of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void run_container_andnot(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst) { + // following Java implementation as of June 2016 + + if (dst->capacity < src_1->n_runs + src_2->n_runs) + run_container_grow(dst, src_1->n_runs + src_2->n_runs, false); + + dst->n_runs = 0; + + int rlepos1 = 0; + int rlepos2 = 0; + int32_t start = src_1->runs[rlepos1].value; + int32_t end = start + src_1->runs[rlepos1].length + 1; + int32_t start2 = src_2->runs[rlepos2].value; + int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; + + while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { + if (end <= start2) { + // output the first run + dst->runs[dst->n_runs++] = + (rle16_t){.value = (uint16_t)start, + .length = (uint16_t)(end - start - 1)}; + rlepos1++; + if (rlepos1 < src_1->n_runs) { + start = src_1->runs[rlepos1].value; + end = start + src_1->runs[rlepos1].length + 1; + } + } else if (end2 <= start) { + // exit the second run + rlepos2++; + if (rlepos2 < src_2->n_runs) { + start2 = src_2->runs[rlepos2].value; + end2 = start2 + src_2->runs[rlepos2].length + 1; + } + } else { + if (start < start2) { + dst->runs[dst->n_runs++] = + (rle16_t){.value = (uint16_t)start, + .length = (uint16_t)(start2 - start - 1)}; + } + if (end2 < end) { + start = end2; + } else { + rlepos1++; + if (rlepos1 < src_1->n_runs) { + start = src_1->runs[rlepos1].value; + end = start + src_1->runs[rlepos1].length + 1; + } + } + } + } + if (rlepos1 < src_1->n_runs) { + dst->runs[dst->n_runs++] = (rle16_t){ + .value = (uint16_t)start, .length = (uint16_t)(end - start - 1)}; + rlepos1++; + if (rlepos1 < src_1->n_runs) { + memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1, + sizeof(rle16_t) * (src_1->n_runs - rlepos1)); + dst->n_runs += src_1->n_runs - rlepos1; + } + } +} + +int run_container_to_uint32_array(void *vout, const run_container_t *cont, + uint32_t base) { + int outpos = 0; + uint32_t *out = (uint32_t *)vout; + for (int i = 0; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + for (int j = 0; j <= le; ++j) { + uint32_t val = run_start + j; + memcpy(out + outpos, &val, + sizeof(uint32_t)); // should be compiled as a MOV on x64 + outpos++; + } + } + return outpos; +} + +/* + * Print this container using printf (useful for debugging). + */ +void run_container_printf(const run_container_t *cont) { + for (int i = 0; i < cont->n_runs; ++i) { + uint16_t run_start = cont->runs[i].value; + uint16_t le = cont->runs[i].length; + printf("[%d,%d]", run_start, run_start + le); + } +} + +/* + * Print this container using printf as a comma-separated list of 32-bit + * integers starting at base. + */ +void run_container_printf_as_uint32_array(const run_container_t *cont, + uint32_t base) { + if (cont->n_runs == 0) return; + { + uint32_t run_start = base + cont->runs[0].value; + uint16_t le = cont->runs[0].length; + printf("%u", run_start); + for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j); + } + for (int32_t i = 1; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j); + } +} + +int32_t run_container_serialize(const run_container_t *container, char *buf) { + int32_t l, off; + + memcpy(buf, &container->n_runs, off = sizeof(container->n_runs)); + memcpy(&buf[off], &container->capacity, sizeof(container->capacity)); + off += sizeof(container->capacity); + + l = sizeof(rle16_t) * container->n_runs; + memcpy(&buf[off], container->runs, l); + return (off + l); +} + +int32_t run_container_write(const run_container_t *container, char *buf) { + memcpy(buf, &container->n_runs, sizeof(uint16_t)); + memcpy(buf + sizeof(uint16_t), container->runs, + container->n_runs * sizeof(rle16_t)); + return run_container_size_in_bytes(container); +} + +int32_t run_container_read(int32_t cardinality, run_container_t *container, + const char *buf) { + (void)cardinality; + memcpy(&container->n_runs, buf, sizeof(uint16_t)); + if (container->n_runs > container->capacity) + run_container_grow(container, container->n_runs, false); + if(container->n_runs > 0) { + memcpy(container->runs, buf + sizeof(uint16_t), + container->n_runs * sizeof(rle16_t)); + } + return run_container_size_in_bytes(container); +} + +uint32_t run_container_serialization_len(const run_container_t *container) { + return (sizeof(container->n_runs) + sizeof(container->capacity) + + sizeof(rle16_t) * container->n_runs); +} + +void *run_container_deserialize(const char *buf, size_t buf_len) { + run_container_t *ptr; + + if (buf_len < 8 /* n_runs + capacity */) + return (NULL); + else + buf_len -= 8; + + if ((ptr = (run_container_t *)malloc(sizeof(run_container_t))) != NULL) { + size_t len; + int32_t off; + + memcpy(&ptr->n_runs, buf, off = 4); + memcpy(&ptr->capacity, &buf[off], 4); + off += 4; + + len = sizeof(rle16_t) * ptr->n_runs; + + if (len != buf_len) { + free(ptr); + return (NULL); + } + + if ((ptr->runs = (rle16_t *)malloc(len)) == NULL) { + free(ptr); + return (NULL); + } + + memcpy(ptr->runs, &buf[off], len); + + /* Check if returned values are monotonically increasing */ + for (int32_t i = 0, j = 0; i < ptr->n_runs; i++) { + if (ptr->runs[i].value < j) { + free(ptr->runs); + free(ptr); + return (NULL); + } else + j = ptr->runs[i].value; + } + } + + return (ptr); +} + +bool run_container_iterate(const run_container_t *cont, uint32_t base, + roaring_iterator iterator, void *ptr) { + for (int i = 0; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + + for (int j = 0; j <= le; ++j) + if (!iterator(run_start + j, ptr)) return false; + } + return true; +} + +bool run_container_iterate64(const run_container_t *cont, uint32_t base, + roaring_iterator64 iterator, uint64_t high_bits, + void *ptr) { + for (int i = 0; i < cont->n_runs; ++i) { + uint32_t run_start = base + cont->runs[i].value; + uint16_t le = cont->runs[i].length; + + for (int j = 0; j <= le; ++j) + if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) + return false; + } + return true; +} + +bool run_container_is_subset(const run_container_t *container1, + const run_container_t *container2) { + int i1 = 0, i2 = 0; + while (i1 < container1->n_runs && i2 < container2->n_runs) { + int start1 = container1->runs[i1].value; + int stop1 = start1 + container1->runs[i1].length; + int start2 = container2->runs[i2].value; + int stop2 = start2 + container2->runs[i2].length; + if (start1 < start2) { + return false; + } else { // start1 >= start2 + if (stop1 < stop2) { + i1++; + } else if (stop1 == stop2) { + i1++; + i2++; + } else { // stop1 > stop2 + i2++; + } + } + } + if (i1 == container1->n_runs) { + return true; + } else { + return false; + } +} + +// TODO: write smart_append_exclusive version to match the overloaded 1 param +// Java version (or is it even used?) + +// follows the Java implementation closely +// length is the rle-value. Ie, run [10,12) uses a length value 1. +void run_container_smart_append_exclusive(run_container_t *src, + const uint16_t start, + const uint16_t length) { + int old_end; + rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; + rle16_t *appended_last_run = src->runs + src->n_runs; + + if (!src->n_runs || + (start > (old_end = last_run->value + last_run->length + 1))) { + *appended_last_run = (rle16_t){.value = start, .length = length}; + src->n_runs++; + return; + } + if (old_end == start) { + // we merge + last_run->length += (length + 1); + return; + } + int new_end = start + length + 1; + + if (start == last_run->value) { + // wipe out previous + if (new_end < old_end) { + *last_run = (rle16_t){.value = (uint16_t)new_end, + .length = (uint16_t)(old_end - new_end - 1)}; + return; + } else if (new_end > old_end) { + *last_run = (rle16_t){.value = (uint16_t)old_end, + .length = (uint16_t)(new_end - old_end - 1)}; + return; + } else { + src->n_runs--; + return; + } + } + last_run->length = start - last_run->value - 1; + if (new_end < old_end) { + *appended_last_run = + (rle16_t){.value = (uint16_t)new_end, + .length = (uint16_t)(old_end - new_end - 1)}; + src->n_runs++; + } else if (new_end > old_end) { + *appended_last_run = + (rle16_t){.value = (uint16_t)old_end, + .length = (uint16_t)(new_end - old_end - 1)}; + src->n_runs++; + } +} + +bool run_container_select(const run_container_t *container, + uint32_t *start_rank, uint32_t rank, + uint32_t *element) { + for (int i = 0; i < container->n_runs; i++) { + uint16_t length = container->runs[i].length; + if (rank <= *start_rank + length) { + uint16_t value = container->runs[i].value; + *element = value + rank - (*start_rank); + return true; + } else + *start_rank += length + 1; + } + return false; +} + +int run_container_rank(const run_container_t *container, uint16_t x) { + int sum = 0; + uint32_t x32 = x; + for (int i = 0; i < container->n_runs; i++) { + uint32_t startpoint = container->runs[i].value; + uint32_t length = container->runs[i].length; + uint32_t endpoint = length + startpoint; + if (x <= endpoint) { + if (x < startpoint) break; + return sum + (x32 - startpoint) + 1; + } else { + sum += length + 1; + } + } + return sum; +} +/* end file src/containers/run.c */ +/* begin file src/roaring.c */ +#include +#include +#include +#include +#include +#include + +static inline bool is_cow(const roaring_bitmap_t *r) { + return r->high_low_container.flags & ROARING_FLAG_COW; +} +static inline bool is_frozen(const roaring_bitmap_t *r) { + return r->high_low_container.flags & ROARING_FLAG_FROZEN; +} + +// this is like roaring_bitmap_add, but it populates pointer arguments in such a +// way +// that we can recover the container touched, which, in turn can be used to +// accelerate some functions (when you repeatedly need to add to the same +// container) +static inline void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, + uint32_t val, + uint8_t *typecode, + int *index) { + uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, i); + void *container = + ra_get_container_at_index(&r->high_low_container, i, typecode); + uint8_t newtypecode = *typecode; + void *container2 = + container_add(container, val & 0xFFFF, *typecode, &newtypecode); + *index = i; + if (container2 != container) { + container_free(container, *typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + *typecode = newtypecode; + return container2; + } else { + return container; + } + } else { + array_container_t *newac = array_container_create(); + void *container = container_add(newac, val & 0xFFFF, + ARRAY_CONTAINER_TYPE_CODE, typecode); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, + container, *typecode); + *index = -i - 1; + return container; + } +} + +roaring_bitmap_t *roaring_bitmap_create(void) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); + if (!ans) { + return NULL; + } + ra_init(&ans->high_low_container); + return ans; +} + +roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); + if (!ans) { + return NULL; + } + bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); + if (!is_ok) { + free(ans); + return NULL; + } + return ans; +} + +void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals) { + void *container = NULL; // hold value of last container touched + uint8_t typecode = 0; // typecode of last container touched + uint32_t prev = 0; // previous valued inserted + size_t i = 0; // index of value + int containerindex = 0; + if (n_args == 0) return; + uint32_t val; + memcpy(&val, vals + i, sizeof(val)); + container = + containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); + prev = val; + i++; + for (; i < n_args; i++) { + memcpy(&val, vals + i, sizeof(val)); + if (((prev ^ val) >> 16) == + 0) { // no need to seek the container, it is at hand + // because we already have the container at hand, we can do the + // insertion + // automatically, bypassing the roaring_bitmap_add call + uint8_t newtypecode = typecode; + void *container2 = + container_add(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { // rare instance when we need to + // change the container type + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, + containerindex, container2, + newtypecode); + typecode = newtypecode; + container = container2; + } + } else { + container = containerptr_roaring_bitmap_add(r, val, &typecode, + &containerindex); + } + prev = val; + } +} + +roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { + roaring_bitmap_t *answer = roaring_bitmap_create(); + roaring_bitmap_add_many(answer, n_args, vals); + return answer; +} + +roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { + // todo: could be greatly optimized but we do not expect this call to ever + // include long lists + roaring_bitmap_t *answer = roaring_bitmap_create(); + va_list ap; + va_start(ap, n_args); + for (size_t i = 1; i <= n_args; i++) { + uint32_t val = va_arg(ap, uint32_t); + roaring_bitmap_add(answer, val); + } + va_end(ap); + return answer; +} + +static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { + return (a < b) ? a : b; +} + +static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { + return (a < b) ? a : b; +} + +roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, + uint32_t step) { + if(max >= UINT64_C(0x100000000)) { + max = UINT64_C(0x100000000); + } + if (step == 0) return NULL; + if (max <= min) return NULL; + roaring_bitmap_t *answer = roaring_bitmap_create(); + if (step >= (1 << 16)) { + for (uint32_t value = (uint32_t)min; value < max; value += step) { + roaring_bitmap_add(answer, value); + } + return answer; + } + uint64_t min_tmp = min; + do { + uint32_t key = (uint32_t)min_tmp >> 16; + uint32_t container_min = min_tmp & 0xFFFF; + uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); + uint8_t type; + void *container = container_from_range(&type, container_min, + container_max, (uint16_t)step); + ra_append(&answer->high_low_container, key, container, type); + uint32_t gap = container_max - container_min + step - 1; + min_tmp += gap - (gap % step); + } while (min_tmp < max); + // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step + return answer; +} + +void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { + if (min > max) { + return; + } + + uint32_t min_key = min >> 16; + uint32_t max_key = max >> 16; + + int32_t num_required_containers = max_key - min_key + 1; + int32_t suffix_length = count_greater(ra->high_low_container.keys, + ra->high_low_container.size, + max_key); + int32_t prefix_length = count_less(ra->high_low_container.keys, + ra->high_low_container.size - suffix_length, + min_key); + int32_t common_length = ra->high_low_container.size - prefix_length - suffix_length; + + if (num_required_containers > common_length) { + ra_shift_tail(&ra->high_low_container, suffix_length, + num_required_containers - common_length); + } + + int32_t src = prefix_length + common_length - 1; + int32_t dst = ra->high_low_container.size - suffix_length - 1; + for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0 + uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; + uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; + void* new_container; + uint8_t new_type; + + if (src >= 0 && ra->high_low_container.keys[src] == key) { + ra_unshare_container_at_index(&ra->high_low_container, src); + new_container = container_add_range(ra->high_low_container.containers[src], + ra->high_low_container.typecodes[src], + container_min, container_max, &new_type); + if (new_container != ra->high_low_container.containers[src]) { + container_free(ra->high_low_container.containers[src], + ra->high_low_container.typecodes[src]); + } + src--; + } else { + new_container = container_from_range(&new_type, container_min, + container_max+1, 1); + } + ra_replace_key_and_container_at_index(&ra->high_low_container, dst, + key, new_container, new_type); + dst--; + } +} + +void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { + if (min > max) { + return; + } + + uint32_t min_key = min >> 16; + uint32_t max_key = max >> 16; + + int32_t src = count_less(ra->high_low_container.keys, ra->high_low_container.size, min_key); + int32_t dst = src; + while (src < ra->high_low_container.size && ra->high_low_container.keys[src] <= max_key) { + uint32_t container_min = (min_key == ra->high_low_container.keys[src]) ? (min & 0xffff) : 0; + uint32_t container_max = (max_key == ra->high_low_container.keys[src]) ? (max & 0xffff) : 0xffff; + ra_unshare_container_at_index(&ra->high_low_container, src); + void *new_container; + uint8_t new_type; + new_container = container_remove_range(ra->high_low_container.containers[src], + ra->high_low_container.typecodes[src], + container_min, container_max, + &new_type); + if (new_container != ra->high_low_container.containers[src]) { + container_free(ra->high_low_container.containers[src], + ra->high_low_container.typecodes[src]); + } + if (new_container) { + ra_replace_key_and_container_at_index(&ra->high_low_container, dst, + ra->high_low_container.keys[src], + new_container, new_type); + dst++; + } + src++; + } + if (src > dst) { + ra_shift_tail(&ra->high_low_container, ra->high_low_container.size - src, dst - src); + } +} + +void roaring_bitmap_printf(const roaring_bitmap_t *ra) { + printf("{"); + for (int i = 0; i < ra->high_low_container.size; ++i) { + container_printf_as_uint32_array( + ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i], + ((uint32_t)ra->high_low_container.keys[i]) << 16); + if (i + 1 < ra->high_low_container.size) printf(","); + } + printf("}"); +} + +void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra) { + printf("{"); + for (int i = 0; i < ra->high_low_container.size; ++i) { + printf("%d: %s (%d)", ra->high_low_container.keys[i], + get_full_container_name(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i]), + container_get_cardinality(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i])); + if (ra->high_low_container.typecodes[i] == SHARED_CONTAINER_TYPE_CODE) { + printf( + "(shared count = %" PRIu32 " )", + ((shared_container_t *)(ra->high_low_container.containers[i])) + ->counter); + } + + if (i + 1 < ra->high_low_container.size) printf(", "); + } + printf("}"); +} + +typedef struct min_max_sum_s { + uint32_t min; + uint32_t max; + uint64_t sum; +} min_max_sum_t; + +static bool min_max_sum_fnc(uint32_t value, void *param) { + min_max_sum_t *mms = (min_max_sum_t *)param; + if (value > mms->max) mms->max = value; + if (value < mms->min) mms->min = value; + mms->sum += value; + return true; // we always process all data points +} + +/** +* (For advanced users.) +* Collect statistics about the bitmap +*/ +void roaring_bitmap_statistics(const roaring_bitmap_t *ra, + roaring_statistics_t *stat) { + memset(stat, 0, sizeof(*stat)); + stat->n_containers = ra->high_low_container.size; + stat->cardinality = roaring_bitmap_get_cardinality(ra); + min_max_sum_t mms; + mms.min = UINT32_C(0xFFFFFFFF); + mms.max = UINT32_C(0); + mms.sum = 0; + roaring_iterate(ra, &min_max_sum_fnc, &mms); + stat->min_value = mms.min; + stat->max_value = mms.max; + stat->sum_value = mms.sum; + + for (int i = 0; i < ra->high_low_container.size; ++i) { + uint8_t truetype = + get_container_type(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i]); + uint32_t card = + container_get_cardinality(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i]); + uint32_t sbytes = + container_size_in_bytes(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i]); + switch (truetype) { + case BITSET_CONTAINER_TYPE_CODE: + stat->n_bitset_containers++; + stat->n_values_bitset_containers += card; + stat->n_bytes_bitset_containers += sbytes; + break; + case ARRAY_CONTAINER_TYPE_CODE: + stat->n_array_containers++; + stat->n_values_array_containers += card; + stat->n_bytes_array_containers += sbytes; + break; + case RUN_CONTAINER_TYPE_CODE: + stat->n_run_containers++; + stat->n_values_run_containers += card; + stat->n_bytes_run_containers += sbytes; + break; + default: + assert(false); + __builtin_unreachable(); + } + } +} + +roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); + if (!ans) { + return NULL; + } + bool is_ok = ra_copy(&r->high_low_container, &ans->high_low_container, + is_cow(r)); + if (!is_ok) { + free(ans); + return NULL; + } + roaring_bitmap_set_copy_on_write(ans, is_cow(r)); + return ans; +} + +bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, + const roaring_bitmap_t *src) { + return ra_overwrite(&src->high_low_container, &dest->high_low_container, + is_cow(src)); +} + +void roaring_bitmap_free(const roaring_bitmap_t *r) { + if (!is_frozen(r)) { + ra_clear((roaring_array_t*)&r->high_low_container); + } + free((roaring_bitmap_t*)r); +} + +void roaring_bitmap_clear(roaring_bitmap_t *r) { + ra_reset(&r->high_low_container); +} + +void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, i); + void *container = + ra_get_container_at_index(&r->high_low_container, i, &typecode); + uint8_t newtypecode = typecode; + void *container2 = + container_add(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } + } else { + array_container_t *newac = array_container_create(); + void *container = container_add(newac, val & 0xFFFF, + ARRAY_CONTAINER_TYPE_CODE, &typecode); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, + container, typecode); + } +} + +bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + bool result = false; + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, i); + void *container = + ra_get_container_at_index(&r->high_low_container, i, &typecode); + + const int oldCardinality = + container_get_cardinality(container, typecode); + + uint8_t newtypecode = typecode; + void *container2 = + container_add(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + result = true; + } else { + const int newCardinality = + container_get_cardinality(container, newtypecode); + + result = oldCardinality != newCardinality; + } + } else { + array_container_t *newac = array_container_create(); + void *container = container_add(newac, val & 0xFFFF, + ARRAY_CONTAINER_TYPE_CODE, &typecode); + // we could just assume that it stays an array container + ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, + container, typecode); + result = true; + } + + return result; +} + +void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, i); + void *container = + ra_get_container_at_index(&r->high_low_container, i, &typecode); + uint8_t newtypecode = typecode; + void *container2 = + container_remove(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } + if (container_get_cardinality(container2, newtypecode) != 0) { + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } else { + ra_remove_at_index_and_free(&r->high_low_container, i); + } + } +} + +bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + const int i = ra_get_index(&r->high_low_container, hb); + uint8_t typecode; + bool result = false; + if (i >= 0) { + ra_unshare_container_at_index(&r->high_low_container, i); + void *container = + ra_get_container_at_index(&r->high_low_container, i, &typecode); + + const int oldCardinality = + container_get_cardinality(container, typecode); + + uint8_t newtypecode = typecode; + void *container2 = + container_remove(container, val & 0xFFFF, typecode, &newtypecode); + if (container2 != container) { + container_free(container, typecode); + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } + + const int newCardinality = + container_get_cardinality(container2, newtypecode); + + if (newCardinality != 0) { + ra_set_container_at_index(&r->high_low_container, i, container2, + newtypecode); + } else { + ra_remove_at_index_and_free(&r->high_low_container, i); + } + + result = oldCardinality != newCardinality; + } + return result; +} + +void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals) { + if (n_args == 0 || r->high_low_container.size == 0) { + return; + } + int32_t pos = -1; // position of the container used in the previous iteration + for (size_t i = 0; i < n_args; i++) { + uint16_t key = (uint16_t)(vals[i] >> 16); + if (pos < 0 || key != r->high_low_container.keys[pos]) { + pos = ra_get_index(&r->high_low_container, key); + } + if (pos >= 0) { + uint8_t new_typecode; + void *new_container; + new_container = container_remove(r->high_low_container.containers[pos], + vals[i] & 0xffff, + r->high_low_container.typecodes[pos], + &new_typecode); + if (new_container != r->high_low_container.containers[pos]) { + container_free(r->high_low_container.containers[pos], + r->high_low_container.typecodes[pos]); + ra_replace_key_and_container_at_index(&r->high_low_container, + pos, key, new_container, + new_typecode); + } + if (!container_nonzero_cardinality(new_container, new_typecode)) { + container_free(new_container, new_typecode); + ra_remove_at_index(&r->high_low_container, pos); + pos = -1; + } + } + } +} + +// there should be some SIMD optimizations possible here +roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t container_result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint32_t neededcap = length1 > length2 ? length2 : length1; + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); + + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + if (s1 == s2) { + uint8_t container_type_1, container_type_2; + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + void *c = container_and(c1, container_type_1, c2, container_type_2, + &container_result_type); + if (container_nonzero_cardinality(c, container_result_type)) { + ra_append(&answer->high_low_container, s1, c, + container_result_type); + } else { + container_free( + c, container_result_type); // otherwise:memory leak! + } + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + } + } + return answer; +} + +/** + * Compute the union of 'number' bitmaps. + */ +roaring_bitmap_t *roaring_bitmap_or_many(size_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); + } + if (number == 1) { + return roaring_bitmap_copy(x[0]); + } + roaring_bitmap_t *answer = + roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); + for (size_t i = 2; i < number; i++) { + roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); + } + roaring_bitmap_repair_after_lazy(answer); + return answer; +} + +/** + * Compute the xor of 'number' bitmaps. + */ +roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); + } + if (number == 1) { + return roaring_bitmap_copy(x[0]); + } + roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); + for (size_t i = 2; i < number; i++) { + roaring_bitmap_lazy_xor_inplace(answer, x[i]); + } + roaring_bitmap_repair_after_lazy(answer); + return answer; +} + +// inplace and (modifies its first argument). +void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + if (x1 == x2) return; + int pos1 = 0, pos2 = 0, intersection_size = 0; + const int length1 = ra_get_size(&x1->high_low_container); + const int length2 = ra_get_size(&x2->high_low_container); + + // any skipped-over or newly emptied containers in x1 + // have to be freed. + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + if (s1 == s2) { + uint8_t typecode1, typecode2, typecode_result; + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &typecode1); + c1 = get_writable_copy_if_shared(c1, &typecode1); + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &typecode2); + void *c = + container_iand(c1, typecode1, c2, typecode2, &typecode_result); + if (c != c1) { // in this instance a new container was created, and + // we need to free the old one + container_free(c1, typecode1); + } + if (container_nonzero_cardinality(c, typecode_result)) { + ra_replace_key_and_container_at_index(&x1->high_low_container, + intersection_size, s1, c, + typecode_result); + intersection_size++; + } else { + container_free(c, typecode_result); + } + ++pos1; + ++pos2; + } else if (s1 < s2) { + pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + } + } + + // if we ended early because x2 ran out, then all remaining in x1 should be + // freed + while (pos1 < length1) { + container_free(x1->high_low_container.containers[pos1], + x1->high_low_container.typecodes[pos1]); + ++pos1; + } + + // all containers after this have either been copied or freed + ra_downsize(&x1->high_low_container, intersection_size); +} + +roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t container_result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + void *c = container_or(c1, container_type_1, c2, container_type_2, + &container_result_type); + // since we assume that the initial containers are non-empty, the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, + container_result_type); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + // c1 = container_clone(c1, container_type_1); + c1 = + get_copy_of_container(c1, &container_type_1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + container_type_1); + } + ra_append(&answer->high_low_container, s1, c1, container_type_1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + // c2 = container_clone(c2, container_type_2); + c2 = + get_copy_of_container(c2, &container_type_2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + container_type_2); + } + ra_append(&answer->high_low_container, s2, c2, container_type_2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} + +// inplace or (modifies its first argument). +void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t container_result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; + } + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + if (!container_is_full(c1, container_type_1)) { + c1 = get_writable_copy_if_shared(c1, &container_type_1); + + void *c2 = ra_get_container_at_index(&x2->high_low_container, + pos2, &container_type_2); + void *c = + container_ior(c1, container_type_1, c2, container_type_2, + &container_result_type); + if (c != + c1) { // in this instance a new container was created, and + // we need to free the old one + container_free(c1, container_type_1); + } + + ra_set_container_at_index(&x1->high_low_container, pos1, c, + container_result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + c2 = + get_copy_of_container(c2, &container_type_2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + container_type_2); + } + + // void *c2_clone = container_clone(c2, container_type_2); + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + container_type_2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } +} + +roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t container_result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + void *c = container_xor(c1, container_type_1, c2, container_type_2, + &container_result_type); + + if (container_nonzero_cardinality(c, container_result_type)) { + ra_append(&answer->high_low_container, s1, c, + container_result_type); + } else { + container_free(c, container_result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + c1 = + get_copy_of_container(c1, &container_type_1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + container_type_1); + } + ra_append(&answer->high_low_container, s1, c1, container_type_1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + c2 = + get_copy_of_container(c2, &container_type_2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + container_type_2); + } + ra_append(&answer->high_low_container, s2, c2, container_type_2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} + +// inplace xor (modifies its first argument). + +void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); + uint8_t container_result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; + } + + // XOR can have new containers inserted from x2, but can also + // lose containers when x1 and x2 are nonempty and identical. + + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + c1 = get_writable_copy_if_shared(c1, &container_type_1); + + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + void *c = container_ixor(c1, container_type_1, c2, container_type_2, + &container_result_type); + + if (container_nonzero_cardinality(c, container_result_type)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c, + container_result_type); + ++pos1; + } else { + container_free(c, container_result_type); + ra_remove_at_index(&x1->high_low_container, pos1); + --length1; + } + + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + c2 = + get_copy_of_container(c2, &container_type_2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + container_type_2); + } + + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + container_type_2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } +} + +roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t container_result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); + roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) && is_cow(x2)); + return empty_bitmap; + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); + + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = 0; + uint16_t s2 = 0; + while (true) { + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + if (s1 == s2) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + void *c = + container_andnot(c1, container_type_1, c2, container_type_2, + &container_result_type); + + if (container_nonzero_cardinality(c, container_result_type)) { + ra_append(&answer->high_low_container, s1, c, + container_result_type); + } else { + container_free(c, container_result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + } else if (s1 < s2) { // s1 < s2 + const int next_pos1 = + ra_advance_until(&x1->high_low_container, s2, pos1); + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, next_pos1, + is_cow(x1)); + // TODO : perhaps some of the copy_on_write should be based on + // answer rather than x1 (more stringent?). Many similar cases + pos1 = next_pos1; + if (pos1 == length1) break; + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + if (pos2 == length2) break; + } + } + if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} + +// inplace andnot (modifies its first argument). + +void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); + + uint8_t container_result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + int intersection_size = 0; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_clear(x1); + return; + } + + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + c1 = get_writable_copy_if_shared(c1, &container_type_1); + + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + void *c = + container_iandnot(c1, container_type_1, c2, container_type_2, + &container_result_type); + + if (container_nonzero_cardinality(c, container_result_type)) { + ra_replace_key_and_container_at_index(&x1->high_low_container, + intersection_size++, s1, + c, container_result_type); + } else { + container_free(c, container_result_type); + } + + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + if (pos1 != intersection_size) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, + pos1, &container_type_1); + + ra_replace_key_and_container_at_index(&x1->high_low_container, + intersection_size, s1, c1, + container_type_1); + } + intersection_size++; + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + + if (pos1 < length1) { + // all containers between intersection_size and + // pos1 are junk. However, they have either been moved + // (thus still referenced) or involved in an iandnot + // that will clean up all containers that could not be reused. + // Thus we should not free the junk containers between + // intersection_size and pos1. + if (pos1 > intersection_size) { + // left slide of remaining items + ra_copy_range(&x1->high_low_container, pos1, length1, + intersection_size); + } + // else current placement is fine + intersection_size += (length1 - pos1); + } + ra_downsize(&x1->high_low_container, intersection_size); +} + +uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra) { + uint64_t card = 0; + for (int i = 0; i < ra->high_low_container.size; ++i) + card += container_get_cardinality(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i]); + return card; +} + +uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, + uint64_t range_start, + uint64_t range_end) { + if (range_end > UINT32_MAX) { + range_end = UINT32_MAX + UINT64_C(1); + } + if (range_start >= range_end) { + return 0; + } + range_end--; // make range_end inclusive + // now we have: 0 <= range_start <= range_end <= UINT32_MAX + + uint16_t minhb = range_start >> 16; + uint16_t maxhb = range_end >> 16; + + uint64_t card = 0; + + int i = ra_get_index(&ra->high_low_container, minhb); + if (i >= 0) { + if (minhb == maxhb) { + card += container_rank(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i], + range_end & 0xffff); + } else { + card += container_get_cardinality(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i]); + } + if ((range_start & 0xffff) != 0) { + card -= container_rank(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i], + (range_start & 0xffff) - 1); + } + i++; + } else { + i = -i - 1; + } + + for (; i < ra->high_low_container.size; i++) { + uint16_t key = ra->high_low_container.keys[i]; + if (key < maxhb) { + card += container_get_cardinality(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i]); + } else if (key == maxhb) { + card += container_rank(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i], + range_end & 0xffff); + break; + } else { + break; + } + } + + return card; +} + + +bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) { + return ra->high_low_container.size == 0; +} + +void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans) { + ra_to_uint32_array(&ra->high_low_container, ans); +} + +bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans) { + return ra_range_uint32_array(&ra->high_low_container, offset, limit, ans); +} + +/** convert array and bitmap containers to run containers when it is more + * efficient; + * also convert from run containers when more space efficient. Returns + * true if the result has at least one run container. +*/ +bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { + bool answer = false; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t typecode_original, typecode_after; + ra_unshare_container_at_index( + &r->high_low_container, i); // TODO: this introduces extra cloning! + void *c = ra_get_container_at_index(&r->high_low_container, i, + &typecode_original); + void *c1 = convert_run_optimize(c, typecode_original, &typecode_after); + if (typecode_after == RUN_CONTAINER_TYPE_CODE) answer = true; + ra_set_container_at_index(&r->high_low_container, i, c1, + typecode_after); + } + return answer; +} + +size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { + size_t answer = 0; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t typecode_original; + void *c = ra_get_container_at_index(&r->high_low_container, i, + &typecode_original); + answer += container_shrink_to_fit(c, typecode_original); + } + answer += ra_shrink_to_fit(&r->high_low_container); + return answer; +} + +/** + * Remove run-length encoding even when it is more space efficient + * return whether a change was applied + */ +bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { + bool answer = false; + for (int i = 0; i < r->high_low_container.size; i++) { + uint8_t typecode_original, typecode_after; + void *c = ra_get_container_at_index(&r->high_low_container, i, + &typecode_original); + if (get_container_type(c, typecode_original) == + RUN_CONTAINER_TYPE_CODE) { + answer = true; + if (typecode_original == SHARED_CONTAINER_TYPE_CODE) { + run_container_t *truec = + (run_container_t *)((shared_container_t *)c)->container; + int32_t card = run_container_cardinality(truec); + void *c1 = convert_to_bitset_or_array_container( + truec, card, &typecode_after); + shared_container_free((shared_container_t *)c);// will free the run container as needed + ra_set_container_at_index(&r->high_low_container, i, c1, + typecode_after); + + } else { + int32_t card = run_container_cardinality((run_container_t *)c); + void *c1 = convert_to_bitset_or_array_container( + (run_container_t *)c, card, &typecode_after); + run_container_free((run_container_t *)c); + ra_set_container_at_index(&r->high_low_container, i, c1, + typecode_after); + } + } + } + return answer; +} + +size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf) { + size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); + uint64_t cardinality = roaring_bitmap_get_cardinality(ra); + uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); + if (portablesize < sizeasarray) { + buf[0] = SERIALIZATION_CONTAINER; + return roaring_bitmap_portable_serialize(ra, buf + 1) + 1; + } else { + buf[0] = SERIALIZATION_ARRAY_UINT32; + memcpy(buf + 1, &cardinality, sizeof(uint32_t)); + roaring_bitmap_to_uint32_array( + ra, (uint32_t *)(buf + 1 + sizeof(uint32_t))); + return 1 + (size_t)sizeasarray; + } +} + +size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra) { + size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); + uint64_t sizeasarray = roaring_bitmap_get_cardinality(ra) * sizeof(uint32_t) + + sizeof(uint32_t); + return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1; +} + +size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) { + return ra_portable_size_in_bytes(&ra->high_low_container); +} + + +roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) { + roaring_bitmap_t *ans = + (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); + if (ans == NULL) { + return NULL; + } + size_t bytesread; + bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread); + if(is_ok) assert(bytesread <= maxbytes); + roaring_bitmap_set_copy_on_write(ans, false); + if (!is_ok) { + free(ans); + return NULL; + } + return ans; +} + +roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { + return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); +} + + +size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) { + return ra_portable_deserialize_size(buf, maxbytes); +} + + +size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, + char *buf) { + return ra_portable_serialize(&ra->high_low_container, buf); +} + +roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { + const char *bufaschar = (const char *)buf; + if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) { + /* This looks like a compressed set of uint32_t elements */ + uint32_t card; + memcpy(&card, bufaschar + 1, sizeof(uint32_t)); + const uint32_t *elems = + (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); + + return roaring_bitmap_of_ptr(card, elems); + } else if (bufaschar[0] == SERIALIZATION_CONTAINER) { + return roaring_bitmap_portable_deserialize(bufaschar + 1); + } else + return (NULL); +} + +bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, + void *ptr) { + for (int i = 0; i < ra->high_low_container.size; ++i) + if (!container_iterate(ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i], + ((uint32_t)ra->high_low_container.keys[i]) << 16, + iterator, ptr)) { + return false; + } + return true; +} + +bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, + uint64_t high_bits, void *ptr) { + for (int i = 0; i < ra->high_low_container.size; ++i) + if (!container_iterate64( + ra->high_low_container.containers[i], + ra->high_low_container.typecodes[i], + ((uint32_t)ra->high_low_container.keys[i]) << 16, iterator, + high_bits, ptr)) { + return false; + } + return true; +} + +/**** +* begin roaring_uint32_iterator_t +*****/ + +// Partially initializes the roaring iterator when it begins looking at +// a new container. +static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) { + newit->in_container_index = 0; + newit->run_index = 0; + newit->current_value = 0; + if (newit->container_index >= newit->parent->high_low_container.size || + newit->container_index < 0) { + newit->current_value = UINT32_MAX; + return (newit->has_value = false); + } + // assume not empty + newit->has_value = true; + // we precompute container, typecode and highbits so that successive + // iterators do not have to grab them from odd memory locations + // and have to worry about the (easily predicted) container_unwrap_shared + // call. + newit->container = + newit->parent->high_low_container.containers[newit->container_index]; + newit->typecode = + newit->parent->high_low_container.typecodes[newit->container_index]; + newit->highbits = + ((uint32_t) + newit->parent->high_low_container.keys[newit->container_index]) + << 16; + newit->container = + container_unwrap_shared(newit->container, &(newit->typecode)); + return newit->has_value; +} + +static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { + if (!iter_new_container_partial_init(newit)) + return newit->has_value; + + uint32_t wordindex; + uint64_t word; // used for bitsets + switch (newit->typecode) { + case BITSET_CONTAINER_TYPE_CODE: + wordindex = 0; + while ((word = ((const bitset_container_t *)(newit->container)) + ->array[wordindex]) == 0) + wordindex++; // advance + // here "word" is non-zero + newit->in_container_index = wordindex * 64 + __builtin_ctzll(word); + newit->current_value = newit->highbits | newit->in_container_index; + break; + case ARRAY_CONTAINER_TYPE_CODE: + newit->current_value = + newit->highbits | + ((const array_container_t *)(newit->container))->array[0]; + break; + case RUN_CONTAINER_TYPE_CODE: + newit->current_value = + newit->highbits | + (((const run_container_t *)(newit->container))->runs[0].value); + break; + default: + // if this ever happens, bug! + assert(false); + } // switch (typecode) + return true; +} + +static bool loadlastvalue(roaring_uint32_iterator_t* newit) { + if (!iter_new_container_partial_init(newit)) + return newit->has_value; + + switch(newit->typecode) { + case BITSET_CONTAINER_TYPE_CODE: { + uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1; + uint64_t word; + const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container; + while ((word = bitset_container->array[wordindex]) == 0) + --wordindex; + + int num_leading_zeros = __builtin_clzll(word); + newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); + newit->current_value = newit->highbits | newit->in_container_index; + break; + } + case ARRAY_CONTAINER_TYPE_CODE: { + const array_container_t* array_container = (const array_container_t*)newit->container; + newit->in_container_index = array_container->cardinality - 1; + newit->current_value = newit->highbits | array_container->array[newit->in_container_index]; + break; + } + case RUN_CONTAINER_TYPE_CODE: { + const run_container_t* run_container = (const run_container_t*)newit->container; + newit->run_index = run_container->n_runs - 1; + const rle16_t* last_run = &run_container->runs[newit->run_index]; + newit->current_value = newit->highbits | (last_run->value + last_run->length); + break; + } + default: + // if this ever happens, bug! + assert(false); + } + return true; +} + +// prerequesite: the value should be in range of the container +static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { + // Don't have to check return value because of prerequisite + iter_new_container_partial_init(newit); + uint16_t lb = val & 0xFFFF; + + switch (newit->typecode) { + case BITSET_CONTAINER_TYPE_CODE: + newit->in_container_index = bitset_container_index_equalorlarger((const bitset_container_t *)(newit->container), lb); + newit->current_value = newit->highbits | newit->in_container_index; + break; + case ARRAY_CONTAINER_TYPE_CODE: + newit->in_container_index = array_container_index_equalorlarger((const array_container_t *)(newit->container), lb); + newit->current_value = + newit->highbits | + ((const array_container_t *)(newit->container))->array[newit->in_container_index]; + break; + case RUN_CONTAINER_TYPE_CODE: + newit->run_index = run_container_index_equalorlarger((const run_container_t *)(newit->container), lb); + if(((const run_container_t *)(newit->container))->runs[newit->run_index].value <= lb) { + newit->current_value = val; + } else { + newit->current_value = + newit->highbits | + (((const run_container_t *)(newit->container))->runs[newit->run_index].value); + } + break; + default: + // if this ever happens, bug! + assert(false); + } // switch (typecode) + return true; +} + +void roaring_init_iterator(const roaring_bitmap_t *ra, + roaring_uint32_iterator_t *newit) { + newit->parent = ra; + newit->container_index = 0; + newit->has_value = loadfirstvalue(newit); +} + +void roaring_init_iterator_last(const roaring_bitmap_t *ra, + roaring_uint32_iterator_t *newit) { + newit->parent = ra; + newit->container_index = newit->parent->high_low_container.size - 1; + newit->has_value = loadlastvalue(newit); +} + +roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra) { + roaring_uint32_iterator_t *newit = + (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); + if (newit == NULL) return NULL; + roaring_init_iterator(ra, newit); + return newit; +} + +roaring_uint32_iterator_t *roaring_copy_uint32_iterator( + const roaring_uint32_iterator_t *it) { + roaring_uint32_iterator_t *newit = + (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); + memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); + return newit; +} + +bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { + uint16_t hb = val >> 16; + const int i = ra_get_index(& it->parent->high_low_container, hb); + if (i >= 0) { + uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]); + uint16_t lb = val & 0xFFFF; + if(lowvalue < lb ) { + it->container_index = i+1; // will have to load first value of next container + } else {// the value is necessarily within the range of the container + it->container_index = i; + it->has_value = loadfirstvalue_largeorequal(it, val); + return it->has_value; + } + } else { + // there is no matching, so we are going for the next container + it->container_index = -i-1; + } + it->has_value = loadfirstvalue(it); + return it->has_value; +} + + +bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { + if (it->container_index >= it->parent->high_low_container.size) { + return (it->has_value = false); + } + if (it->container_index < 0) { + it->container_index = 0; + return (it->has_value = loadfirstvalue(it)); + } + + uint32_t wordindex; // used for bitsets + uint64_t word; // used for bitsets + switch (it->typecode) { + case BITSET_CONTAINER_TYPE_CODE: + it->in_container_index++; + wordindex = it->in_container_index / 64; + if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; + word = ((const bitset_container_t *)(it->container)) + ->array[wordindex] & + (UINT64_MAX << (it->in_container_index % 64)); + // next part could be optimized/simplified + while ((word == 0) && + (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { + wordindex++; + word = ((const bitset_container_t *)(it->container)) + ->array[wordindex]; + } + if (word != 0) { + it->in_container_index = wordindex * 64 + __builtin_ctzll(word); + it->current_value = it->highbits | it->in_container_index; + return (it->has_value = true); + } + break; + case ARRAY_CONTAINER_TYPE_CODE: + it->in_container_index++; + if (it->in_container_index < + ((const array_container_t *)(it->container))->cardinality) { + it->current_value = it->highbits | + ((const array_container_t *)(it->container)) + ->array[it->in_container_index]; + return (it->has_value = true); + } + break; + case RUN_CONTAINER_TYPE_CODE: { + if(it->current_value == UINT32_MAX) { + return (it->has_value = false); // without this, we risk an overflow to zero + } + + const run_container_t* run_container = (const run_container_t*)it->container; + if (++it->current_value <= (it->highbits | (run_container->runs[it->run_index].value + + run_container->runs[it->run_index].length))) { + return (it->has_value = true); + } + + if (++it->run_index < run_container->n_runs) { + // Assume the run has a value + it->current_value = it->highbits | run_container->runs[it->run_index].value; + return (it->has_value = true); + } + break; + } + default: + // if this ever happens, bug! + assert(false); + } // switch (typecode) + // moving to next container + it->container_index++; + return (it->has_value = loadfirstvalue(it)); +} + +bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) { + if (it->container_index < 0) { + return (it->has_value = false); + } + if (it->container_index >= it->parent->high_low_container.size) { + it->container_index = it->parent->high_low_container.size - 1; + return (it->has_value = loadlastvalue(it)); + } + + switch (it->typecode) { + case BITSET_CONTAINER_TYPE_CODE: { + if (--it->in_container_index < 0) + break; + + const bitset_container_t* bitset_container = (const bitset_container_t*)it->container; + int32_t wordindex = it->in_container_index / 64; + uint64_t word = bitset_container->array[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64))); + + while (word == 0 && --wordindex >= 0) { + word = bitset_container->array[wordindex]; + } + if (word == 0) + break; + + int num_leading_zeros = __builtin_clzll(word); + it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros); + it->current_value = it->highbits | it->in_container_index; + return (it->has_value = true); + } + case ARRAY_CONTAINER_TYPE_CODE: { + if (--it->in_container_index < 0) + break; + + const array_container_t* array_container = (const array_container_t*)it->container; + it->current_value = it->highbits | array_container->array[it->in_container_index]; + return (it->has_value = true); + } + case RUN_CONTAINER_TYPE_CODE: { + if(it->current_value == 0) + return (it->has_value = false); + + const run_container_t* run_container = (const run_container_t*)it->container; + if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) { + return (it->has_value = true); + } + + if (--it->run_index < 0) + break; + + it->current_value = it->highbits | (run_container->runs[it->run_index].value + + run_container->runs[it->run_index].length); + return (it->has_value = true); + } + default: + // if this ever happens, bug! + assert(false); + } // switch (typecode) + + // moving to previous container + it->container_index--; + return (it->has_value = loadlastvalue(it)); +} + +uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { + uint32_t ret = 0; + uint32_t num_values; + uint32_t wordindex; // used for bitsets + uint64_t word; // used for bitsets + const array_container_t* acont; //TODO remove + const run_container_t* rcont; //TODO remove + const bitset_container_t* bcont; //TODO remove + + while (it->has_value && ret < count) { + switch (it->typecode) { + case BITSET_CONTAINER_TYPE_CODE: + bcont = (const bitset_container_t*)(it->container); + wordindex = it->in_container_index / 64; + word = bcont->array[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); + do { + while (word != 0 && ret < count) { + buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word)); + word = word & (word - 1); + buf++; + ret++; + } + while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { + wordindex++; + word = bcont->array[wordindex]; + } + } while (word != 0 && ret < count); + it->has_value = (word != 0); + if (it->has_value) { + it->in_container_index = wordindex * 64 + __builtin_ctzll(word); + it->current_value = it->highbits | it->in_container_index; + } + break; + case ARRAY_CONTAINER_TYPE_CODE: + acont = (const array_container_t *)(it->container); + num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret); + for (uint32_t i = 0; i < num_values; i++) { + buf[i] = it->highbits | acont->array[it->in_container_index + i]; + } + buf += num_values; + ret += num_values; + it->in_container_index += num_values; + it->has_value = (it->in_container_index < acont->cardinality); + if (it->has_value) { + it->current_value = it->highbits | acont->array[it->in_container_index]; + } + break; + case RUN_CONTAINER_TYPE_CODE: + rcont = (const run_container_t*)(it->container); + //"in_run_index" name is misleading, read it as "max_value_in_current_run" + do { + uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length); + num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret); + for (uint32_t i = 0; i < num_values; i++) { + buf[i] = it->current_value + i; + } + it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 + buf += num_values; + ret += num_values; + + if (it->current_value > largest_run_value || it->current_value == 0) { + it->run_index++; + if (it->run_index < rcont->n_runs) { + it->current_value = it->highbits | rcont->runs[it->run_index].value; + } else { + it->has_value = false; + } + } + } while ((ret < count) && it->has_value); + break; + default: + assert(false); + } + if (it->has_value) { + assert(ret == count); + return ret; + } + it->container_index++; + it->has_value = loadfirstvalue(it); + } + return ret; +} + + + +void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { free(it); } + +/**** +* end of roaring_uint32_iterator_t +*****/ + +bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, + const roaring_bitmap_t *ra2) { + if (ra1->high_low_container.size != ra2->high_low_container.size) { + return false; + } + for (int i = 0; i < ra1->high_low_container.size; ++i) { + if (ra1->high_low_container.keys[i] != + ra2->high_low_container.keys[i]) { + return false; + } + } + for (int i = 0; i < ra1->high_low_container.size; ++i) { + bool areequal = container_equals(ra1->high_low_container.containers[i], + ra1->high_low_container.typecodes[i], + ra2->high_low_container.containers[i], + ra2->high_low_container.typecodes[i]); + if (!areequal) { + return false; + } + } + return true; +} + +bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, + const roaring_bitmap_t *ra2) { + const int length1 = ra1->high_low_container.size, + length2 = ra2->high_low_container.size; + + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = ra_get_key_at_index(&ra1->high_low_container, pos1); + const uint16_t s2 = ra_get_key_at_index(&ra2->high_low_container, pos2); + + if (s1 == s2) { + uint8_t container_type_1, container_type_2; + void *c1 = ra_get_container_at_index(&ra1->high_low_container, pos1, + &container_type_1); + void *c2 = ra_get_container_at_index(&ra2->high_low_container, pos2, + &container_type_2); + bool subset = + container_is_subset(c1, container_type_1, c2, container_type_2); + if (!subset) return false; + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + return false; + } else { // s1 > s2 + pos2 = ra_advance_until(&ra2->high_low_container, s1, pos2); + } + } + if (pos1 == length1) + return true; + else + return false; +} + +static void insert_flipped_container(roaring_array_t *ans_arr, + const roaring_array_t *x1_arr, uint16_t hb, + uint16_t lb_start, uint16_t lb_end) { + const int i = ra_get_index(x1_arr, hb); + const int j = ra_get_index(ans_arr, hb); + uint8_t ctype_in, ctype_out; + void *flipped_container = NULL; + if (i >= 0) { + void *container_to_flip = + ra_get_container_at_index(x1_arr, i, &ctype_in); + flipped_container = + container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, + (uint32_t)(lb_end + 1), &ctype_out); + + if (container_get_cardinality(flipped_container, ctype_out)) + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + else { + container_free(flipped_container, ctype_out); + } + } else { + flipped_container = container_range_of_ones( + (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + } +} + +static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, + uint16_t lb_start, uint16_t lb_end) { + const int i = ra_get_index(x1_arr, hb); + uint8_t ctype_in, ctype_out; + void *flipped_container = NULL; + if (i >= 0) { + void *container_to_flip = + ra_get_container_at_index(x1_arr, i, &ctype_in); + flipped_container = container_inot_range( + container_to_flip, ctype_in, (uint32_t)lb_start, + (uint32_t)(lb_end + 1), &ctype_out); + // if a new container was created, the old one was already freed + if (container_get_cardinality(flipped_container, ctype_out)) { + ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); + } else { + container_free(flipped_container, ctype_out); + ra_remove_at_index(x1_arr, i); + } + + } else { + flipped_container = container_range_of_ones( + (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); + ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, + ctype_out); + } +} + +static void insert_fully_flipped_container(roaring_array_t *ans_arr, + const roaring_array_t *x1_arr, + uint16_t hb) { + const int i = ra_get_index(x1_arr, hb); + const int j = ra_get_index(ans_arr, hb); + uint8_t ctype_in, ctype_out; + void *flipped_container = NULL; + if (i >= 0) { + void *container_to_flip = + ra_get_container_at_index(x1_arr, i, &ctype_in); + flipped_container = + container_not(container_to_flip, ctype_in, &ctype_out); + if (container_get_cardinality(flipped_container, ctype_out)) + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + else { + container_free(flipped_container, ctype_out); + } + } else { + flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); + ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, + ctype_out); + } +} + +static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { + const int i = ra_get_index(x1_arr, hb); + uint8_t ctype_in, ctype_out; + void *flipped_container = NULL; + if (i >= 0) { + void *container_to_flip = + ra_get_container_at_index(x1_arr, i, &ctype_in); + flipped_container = + container_inot(container_to_flip, ctype_in, &ctype_out); + + if (container_get_cardinality(flipped_container, ctype_out)) { + ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); + } else { + container_free(flipped_container, ctype_out); + ra_remove_at_index(x1_arr, i); + } + + } else { + flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); + ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, + ctype_out); + } +} + +roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, + uint64_t range_start, + uint64_t range_end) { + if (range_start >= range_end) { + return roaring_bitmap_copy(x1); + } + if(range_end >= UINT64_C(0x100000000)) { + range_end = UINT64_C(0x100000000); + } + + roaring_bitmap_t *ans = roaring_bitmap_create(); + roaring_bitmap_set_copy_on_write(ans, is_cow(x1)); + + uint16_t hb_start = (uint16_t)(range_start >> 16); + const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; + uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); + const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; + + ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, + hb_start, is_cow(x1)); + if (hb_start == hb_end) { + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_start, lb_start, + lb_end); + } else { + // start and end containers are distinct + if (lb_start > 0) { + // handle first (partial) container + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_start, + lb_start, 0xFFFF); + ++hb_start; // for the full containers. Can't wrap. + } + + if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block + + for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { + insert_fully_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb); + } + + // handle a partial final container + if (lb_end != 0xFFFF) { + insert_flipped_container(&ans->high_low_container, + &x1->high_low_container, hb_end + 1, 0, + lb_end); + ++hb_end; + } + } + ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, + hb_end, is_cow(x1)); + return ans; +} + +void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, + uint64_t range_end) { + if (range_start >= range_end) { + return; // empty range + } + if(range_end >= UINT64_C(0x100000000)) { + range_end = UINT64_C(0x100000000); + } + + uint16_t hb_start = (uint16_t)(range_start >> 16); + const uint16_t lb_start = (uint16_t)range_start; + uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); + const uint16_t lb_end = (uint16_t)(range_end - 1); + + if (hb_start == hb_end) { + inplace_flip_container(&x1->high_low_container, hb_start, lb_start, + lb_end); + } else { + // start and end containers are distinct + if (lb_start > 0) { + // handle first (partial) container + inplace_flip_container(&x1->high_low_container, hb_start, lb_start, + 0xFFFF); + ++hb_start; // for the full containers. Can't wrap. + } + + if (lb_end != 0xFFFF) --hb_end; + + for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { + inplace_fully_flip_container(&x1->high_low_container, hb); + } + // handle a partial final container + if (lb_end != 0xFFFF) { + inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, + lb_end); + ++hb_end; + } + } +} + +roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2, + const bool bitsetconversion) { + uint8_t container_result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + void *c; + if (bitsetconversion && (get_container_type(c1, container_type_1) != + BITSET_CONTAINER_TYPE_CODE) && + (get_container_type(c2, container_type_2) != + BITSET_CONTAINER_TYPE_CODE)) { + void *newc1 = + container_mutable_unwrap_shared(c1, &container_type_1); + newc1 = container_to_bitset(newc1, container_type_1); + container_type_1 = BITSET_CONTAINER_TYPE_CODE; + c = container_lazy_ior(newc1, container_type_1, c2, + container_type_2, + &container_result_type); + if (c != newc1) { // should not happen + container_free(newc1, container_type_1); + } + } else { + c = container_lazy_or(c1, container_type_1, c2, + container_type_2, &container_result_type); + } + // since we assume that the initial containers are non-empty, + // the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, + container_result_type); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + c1 = + get_copy_of_container(c1, &container_type_1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + container_type_1); + } + ra_append(&answer->high_low_container, s1, c1, container_type_1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + c2 = + get_copy_of_container(c2, &container_type_2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + container_type_2); + } + ra_append(&answer->high_low_container, s2, c2, container_type_2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} + +void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2, + const bool bitsetconversion) { + uint8_t container_result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; + } + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + if (!container_is_full(c1, container_type_1)) { + if ((bitsetconversion == false) || + (get_container_type(c1, container_type_1) == + BITSET_CONTAINER_TYPE_CODE)) { + c1 = get_writable_copy_if_shared(c1, &container_type_1); + } else { + // convert to bitset + void *oldc1 = c1; + uint8_t oldt1 = container_type_1; + c1 = container_mutable_unwrap_shared(c1, &container_type_1); + c1 = container_to_bitset(c1, container_type_1); + container_free(oldc1, oldt1); + container_type_1 = BITSET_CONTAINER_TYPE_CODE; + } + + void *c2 = ra_get_container_at_index(&x2->high_low_container, + pos2, &container_type_2); + void *c = container_lazy_ior(c1, container_type_1, c2, + container_type_2, + &container_result_type); + if (c != + c1) { // in this instance a new container was created, and + // we need to free the old one + container_free(c1, container_type_1); + } + + ra_set_container_at_index(&x1->high_low_container, pos1, c, + container_result_type); + } + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + // void *c2_clone = container_clone(c2, container_type_2); + c2 = + get_copy_of_container(c2, &container_type_2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + container_type_2); + } + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + container_type_2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } +} + +roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + uint8_t container_result_type = 0; + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + if (0 == length1) { + return roaring_bitmap_copy(x2); + } + if (0 == length2) { + return roaring_bitmap_copy(x1); + } + roaring_bitmap_t *answer = + roaring_bitmap_create_with_capacity(length1 + length2); + roaring_bitmap_set_copy_on_write(answer, is_cow(x1) && is_cow(x2)); + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + void *c = + container_lazy_xor(c1, container_type_1, c2, container_type_2, + &container_result_type); + + if (container_nonzero_cardinality(c, container_result_type)) { + ra_append(&answer->high_low_container, s1, c, + container_result_type); + } else { + container_free(c, container_result_type); + } + + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + c1 = + get_copy_of_container(c1, &container_type_1, is_cow(x1)); + if (is_cow(x1)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c1, + container_type_1); + } + ra_append(&answer->high_low_container, s1, c1, container_type_1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + c2 = + get_copy_of_container(c2, &container_type_2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + container_type_2); + } + ra_append(&answer->high_low_container, s2, c2, container_type_2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2, + is_cow(x2)); + } else if (pos2 == length2) { + ra_append_copy_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1, + is_cow(x1)); + } + return answer; +} + +void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + assert(x1 != x2); + uint8_t container_result_type = 0; + int length1 = x1->high_low_container.size; + const int length2 = x2->high_low_container.size; + + if (0 == length2) return; + + if (0 == length1) { + roaring_bitmap_overwrite(x1, x2); + return; + } + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + c1 = get_writable_copy_if_shared(c1, &container_type_1); + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + void *c = + container_lazy_ixor(c1, container_type_1, c2, container_type_2, + &container_result_type); + if (container_nonzero_cardinality(c, container_result_type)) { + ra_set_container_at_index(&x1->high_low_container, pos1, c, + container_result_type); + ++pos1; + } else { + container_free(c, container_result_type); + ra_remove_at_index(&x1->high_low_container, pos1); + --length1; + } + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + // void *c2_clone = container_clone(c2, container_type_2); + c2 = + get_copy_of_container(c2, &container_type_2, is_cow(x2)); + if (is_cow(x2)) { + ra_set_container_at_index(&x2->high_low_container, pos2, c2, + container_type_2); + } + ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, + container_type_2); + pos1++; + length1++; + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, + pos2, length2, is_cow(x2)); + } +} + +void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) { + for (int i = 0; i < ra->high_low_container.size; ++i) { + const uint8_t original_typecode = ra->high_low_container.typecodes[i]; + void *container = ra->high_low_container.containers[i]; + uint8_t new_typecode = original_typecode; + void *newcontainer = + container_repair_after_lazy(container, &new_typecode); + ra->high_low_container.containers[i] = newcontainer; + ra->high_low_container.typecodes[i] = new_typecode; + } +} + + + +/** +* roaring_bitmap_rank returns the number of integers that are smaller or equal +* to x. +*/ +uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { + uint64_t size = 0; + uint32_t xhigh = x >> 16; + for (int i = 0; i < bm->high_low_container.size; i++) { + uint32_t key = bm->high_low_container.keys[i]; + if (xhigh > key) { + size += + container_get_cardinality(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i]); + } else if (xhigh == key) { + return size + container_rank(bm->high_low_container.containers[i], + bm->high_low_container.typecodes[i], + x & 0xFFFF); + } else { + return size; + } + } + return size; +} + +/** +* roaring_bitmap_smallest returns the smallest value in the set. +* Returns UINT32_MAX if the set is empty. +*/ +uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { + if (bm->high_low_container.size > 0) { + void *container = bm->high_low_container.containers[0]; + uint8_t typecode = bm->high_low_container.typecodes[0]; + uint32_t key = bm->high_low_container.keys[0]; + uint32_t lowvalue = container_minimum(container, typecode); + return lowvalue | (key << 16); + } + return UINT32_MAX; +} + +/** +* roaring_bitmap_smallest returns the greatest value in the set. +* Returns 0 if the set is empty. +*/ +uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { + if (bm->high_low_container.size > 0) { + void *container = + bm->high_low_container.containers[bm->high_low_container.size - 1]; + uint8_t typecode = + bm->high_low_container.typecodes[bm->high_low_container.size - 1]; + uint32_t key = + bm->high_low_container.keys[bm->high_low_container.size - 1]; + uint32_t lowvalue = container_maximum(container, typecode); + return lowvalue | (key << 16); + } + return 0; +} + +bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, + uint32_t *element) { + void *container; + uint8_t typecode; + uint16_t key; + uint32_t start_rank = 0; + int i = 0; + bool valid = false; + while (!valid && i < bm->high_low_container.size) { + container = bm->high_low_container.containers[i]; + typecode = bm->high_low_container.typecodes[i]; + valid = + container_select(container, typecode, &start_rank, rank, element); + i++; + } + + if (valid) { + key = bm->high_low_container.keys[i - 1]; + *element |= (key << 16); + return true; + } else + return false; +} + +bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint64_t answer = 0; + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1); + const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2); + + if (s1 == s2) { + uint8_t container_type_1, container_type_2; + void *c1 = ra_get_container_at_index(& x1->high_low_container, pos1, + &container_type_1); + void *c2 = ra_get_container_at_index(& x2->high_low_container, pos2, + &container_type_2); + if( container_intersect(c1, container_type_1, c2, container_type_2) ) return true; + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(& x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(& x2->high_low_container, s1, pos2); + } + } + return answer; +} + + +uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const int length1 = x1->high_low_container.size, + length2 = x2->high_low_container.size; + uint64_t answer = 0; + int pos1 = 0, pos2 = 0; + + while (pos1 < length1 && pos2 < length2) { + const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + if (s1 == s2) { + uint8_t container_type_1, container_type_2; + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + answer += container_and_cardinality(c1, container_type_1, c2, + container_type_2); + ++pos1; + ++pos2; + } else if (s1 < s2) { // s1 < s2 + pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); + } else { // s1 > s2 + pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); + } + } + return answer; +} + +double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return (double)inter / (double)(c1 + c2 - inter); +} + +uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 + c2 - inter; +} + +uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 - inter; +} + +uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2) { + const uint64_t c1 = roaring_bitmap_get_cardinality(x1); + const uint64_t c2 = roaring_bitmap_get_cardinality(x2); + const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); + return c1 + c2 - 2 * inter; +} + + +/** + * Check whether a range of values from range_start (included) to range_end (excluded) is present + */ +bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { + if(range_end >= UINT64_C(0x100000000)) { + range_end = UINT64_C(0x100000000); + } + if (range_start >= range_end) return true; // empty range are always contained! + if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start); + uint16_t hb_rs = (uint16_t)(range_start >> 16); + uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); + const int32_t span = hb_re - hb_rs; + const int32_t hlc_sz = ra_get_size(&r->high_low_container); + if (hlc_sz < span + 1) { + return false; + } + int32_t is = ra_get_index(&r->high_low_container, hb_rs); + int32_t ie = ra_get_index(&r->high_low_container, hb_re); + ie = (ie < 0 ? -ie - 1 : ie); + if ((is < 0) || ((ie - is) != span)) { + return false; + } + const uint32_t lb_rs = range_start & 0xFFFF; + const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; + uint8_t typecode; + void *container = ra_get_container_at_index(&r->high_low_container, is, &typecode); + if (hb_rs == hb_re) { + return container_contains_range(container, lb_rs, lb_re, typecode); + } + if (!container_contains_range(container, lb_rs, 1 << 16, typecode)) { + return false; + } + assert(ie < hlc_sz); // would indicate an algorithmic bug + container = ra_get_container_at_index(&r->high_low_container, ie, &typecode); + if (!container_contains_range(container, 0, lb_re, typecode)) { + return false; + } + for (int32_t i = is + 1; i < ie; ++i) { + container = ra_get_container_at_index(&r->high_low_container, i, &typecode); + if (!container_is_full(container, typecode) ) { + return false; + } + } + return true; +} + + +bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, + const roaring_bitmap_t *ra2) { + return (roaring_bitmap_get_cardinality(ra2) > + roaring_bitmap_get_cardinality(ra1) && + roaring_bitmap_is_subset(ra1, ra2)); +} + + +/* + * FROZEN SERIALIZATION FORMAT DESCRIPTION + * + * -- (beginning must be aligned by 32 bytes) -- + * uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers] + * rle16_t[total number of rle elements in all run containers] + * uint16_t[total number of array elements in all array containers] + * uint16_t[num_containers] + * uint16_t[num_containers] + * uint8_t[num_containers] + *
uint32_t + * + *
is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits) + * and the number of containers (17 bits). + * + * stores number of elements for every container. + * Its meaning depends on container type. + * For array and bitset containers, this value is the container cardinality minus one. + * For run container, it is the number of rle_t elements (n_runs). + * + * ,, are flat arrays of elements of + * all containers of respective type. + * + * <*_data> and are kept close together because they are not accessed + * during deserilization. This may reduce IO in case of large mmaped bitmaps. + * All members have their native alignments during deserilization except
, + * which is not guaranteed to be aligned by 4 bytes. + */ + +size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) { + const roaring_array_t *ra = &rb->high_low_container; + size_t num_bytes = 0; + for (int32_t i = 0; i < ra->size; i++) { + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE_CODE: { + num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + } + case RUN_CONTAINER_TYPE_CODE: { + const run_container_t *run = + (const run_container_t *) ra->containers[i]; + num_bytes += run->n_runs * sizeof(rle16_t); + break; + } + case ARRAY_CONTAINER_TYPE_CODE: { + const array_container_t *array = + (const array_container_t *) ra->containers[i]; + num_bytes += array->cardinality * sizeof(uint16_t); + break; + } + default: + __builtin_unreachable(); + } + } + num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes + num_bytes += 4; // header + return num_bytes; +} + +inline static void *arena_alloc(char **arena, size_t num_bytes) { + char *res = *arena; + *arena += num_bytes; + return res; +} + +void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) { + /* + * Note: we do not require user to supply spicificly aligned buffer. + * Thus we have to use memcpy() everywhere. + */ + + const roaring_array_t *ra = &rb->high_low_container; + + size_t bitset_zone_size = 0; + size_t run_zone_size = 0; + size_t array_zone_size = 0; + for (int32_t i = 0; i < ra->size; i++) { + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE_CODE: { + bitset_zone_size += + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + } + case RUN_CONTAINER_TYPE_CODE: { + const run_container_t *run = + (const run_container_t *) ra->containers[i]; + run_zone_size += run->n_runs * sizeof(rle16_t); + break; + } + case ARRAY_CONTAINER_TYPE_CODE: { + const array_container_t *array = + (const array_container_t *) ra->containers[i]; + array_zone_size += array->cardinality * sizeof(uint16_t); + break; + } + default: + __builtin_unreachable(); + } + } + + uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size); + rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size); + uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size); + uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); + uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size); + uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size); + uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4); + + for (int32_t i = 0; i < ra->size; i++) { + uint16_t count; + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE_CODE: { + const bitset_container_t *bitset = + (const bitset_container_t *) ra->containers[i]; + memcpy(bitset_zone, bitset->array, + BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); + bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; + if (bitset->cardinality != BITSET_UNKNOWN_CARDINALITY) { + count = bitset->cardinality - 1; + } else { + count = bitset_container_compute_cardinality(bitset) - 1; + } + break; + } + case RUN_CONTAINER_TYPE_CODE: { + const run_container_t *run = + (const run_container_t *) ra->containers[i]; + size_t num_bytes = run->n_runs * sizeof(rle16_t); + memcpy(run_zone, run->runs, num_bytes); + run_zone += run->n_runs; + count = run->n_runs; + break; + } + case ARRAY_CONTAINER_TYPE_CODE: { + const array_container_t *array = + (const array_container_t *) ra->containers[i]; + size_t num_bytes = array->cardinality * sizeof(uint16_t); + memcpy(array_zone, array->array, num_bytes); + array_zone += array->cardinality; + count = array->cardinality - 1; + break; + } + default: + __builtin_unreachable(); + } + memcpy(&count_zone[i], &count, 2); + } + memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t)); + memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t)); + uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE; + memcpy(header_zone, &header, 4); +} + +const roaring_bitmap_t * +roaring_bitmap_frozen_view(const char *buf, size_t length) { + if ((uintptr_t)buf % 32 != 0) { + return NULL; + } + + // cookie and num_containers + if (length < 4) { + return NULL; + } + uint32_t header; + memcpy(&header, buf + length - 4, 4); // header may be misaligned + if ((header & 0x7FFF) != FROZEN_COOKIE) { + return NULL; + } + int32_t num_containers = (header >> 15); + + // typecodes, counts and keys + if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) { + return NULL; + } + uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5); + uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3); + uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1); + + // {bitset,array,run}_zone + int32_t num_bitset_containers = 0; + int32_t num_run_containers = 0; + int32_t num_array_containers = 0; + size_t bitset_zone_size = 0; + size_t run_zone_size = 0; + size_t array_zone_size = 0; + for (int32_t i = 0; i < num_containers; i++) { + switch (typecodes[i]) { + case BITSET_CONTAINER_TYPE_CODE: + num_bitset_containers++; + bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + break; + case RUN_CONTAINER_TYPE_CODE: + num_run_containers++; + run_zone_size += counts[i] * sizeof(rle16_t); + break; + case ARRAY_CONTAINER_TYPE_CODE: + num_array_containers++; + array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t); + break; + default: + return NULL; + } + } + if (length != bitset_zone_size + run_zone_size + array_zone_size + + 5 * num_containers + 4) { + return NULL; + } + uint64_t *bitset_zone = (uint64_t*) (buf); + rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size); + uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size); + + size_t alloc_size = 0; + alloc_size += sizeof(roaring_bitmap_t); + alloc_size += num_containers * sizeof(void *); + alloc_size += num_bitset_containers * sizeof(bitset_container_t); + alloc_size += num_run_containers * sizeof(run_container_t); + alloc_size += num_array_containers * sizeof(array_container_t); + + char *arena = (char *)malloc(alloc_size); + if (arena == NULL) { + return NULL; + } + + roaring_bitmap_t *rb = (roaring_bitmap_t *) + arena_alloc(&arena, sizeof(roaring_bitmap_t)); + rb->high_low_container.flags = ROARING_FLAG_FROZEN; + rb->high_low_container.allocation_size = num_containers; + rb->high_low_container.size = num_containers; + rb->high_low_container.keys = (uint16_t *)keys; + rb->high_low_container.typecodes = (uint8_t *)typecodes; + rb->high_low_container.containers = + (void **)arena_alloc(&arena, sizeof(void*) * num_containers); + for (int32_t i = 0; i < num_containers; i++) { + switch (typecodes[i]) { + case BITSET_CONTAINER_TYPE_CODE: { + bitset_container_t *bitset = (bitset_container_t *) + arena_alloc(&arena, sizeof(bitset_container_t)); + bitset->array = bitset_zone; + bitset->cardinality = counts[i] + UINT32_C(1); + rb->high_low_container.containers[i] = bitset; + bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS; + break; + } + case RUN_CONTAINER_TYPE_CODE: { + run_container_t *run = (run_container_t *) + arena_alloc(&arena, sizeof(run_container_t)); + run->capacity = counts[i]; + run->n_runs = counts[i]; + run->runs = run_zone; + rb->high_low_container.containers[i] = run; + run_zone += run->n_runs; + break; + } + case ARRAY_CONTAINER_TYPE_CODE: { + array_container_t *array = (array_container_t *) + arena_alloc(&arena, sizeof(array_container_t)); + array->capacity = counts[i] + UINT32_C(1); + array->cardinality = counts[i] + UINT32_C(1); + array->array = array_zone; + rb->high_low_container.containers[i] = array; + array_zone += counts[i] + UINT32_C(1); + break; + } + default: + free(arena); + return NULL; + } + } + + return rb; +} +/* end file src/roaring.c */ +/* begin file src/roaring_array.c */ +#include +#include +#include +#include +#include +#include + + +// Convention: [0,ra->size) all elements are initialized +// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing + +static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { + // because we combine the allocations, it is not possible to use realloc + /*ra->keys = + (uint16_t *)realloc(ra->keys, sizeof(uint16_t) * new_capacity); +ra->containers = + (void **)realloc(ra->containers, sizeof(void *) * new_capacity); +ra->typecodes = + (uint8_t *)realloc(ra->typecodes, sizeof(uint8_t) * new_capacity); +if (!ra->keys || !ra->containers || !ra->typecodes) { + free(ra->keys); + free(ra->containers); + free(ra->typecodes); + return false; +}*/ + + if ( new_capacity == 0 ) { + free(ra->containers); + ra->containers = NULL; + ra->keys = NULL; + ra->typecodes = NULL; + ra->allocation_size = 0; + return true; + } + const size_t memoryneeded = + new_capacity * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); + void *bigalloc = malloc(memoryneeded); + if (!bigalloc) return false; + void *oldbigalloc = ra->containers; + void **newcontainers = (void **)bigalloc; + uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); + uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); + assert((char *)(newtypecodes + new_capacity) == + (char *)bigalloc + memoryneeded); + if(ra->size > 0) { + memcpy(newcontainers, ra->containers, sizeof(void *) * ra->size); + memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); + memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); + } + ra->containers = newcontainers; + ra->keys = newkeys; + ra->typecodes = newtypecodes; + ra->allocation_size = new_capacity; + free(oldbigalloc); + return true; +} + +bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { + if (!new_ra) return false; + ra_init(new_ra); + + if (cap > INT32_MAX) { return false; } + + if(cap > 0) { + void *bigalloc = + malloc(cap * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t))); + if( bigalloc == NULL ) return false; + new_ra->containers = (void **)bigalloc; + new_ra->keys = (uint16_t *)(new_ra->containers + cap); + new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); + // Narrowing is safe because of above check + new_ra->allocation_size = (int32_t)cap; + } + return true; +} + +int ra_shrink_to_fit(roaring_array_t *ra) { + int savings = (ra->allocation_size - ra->size) * + (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); + if (!realloc_array(ra, ra->size)) { + return 0; + } + ra->allocation_size = ra->size; + return savings; +} + +void ra_init(roaring_array_t *new_ra) { + if (!new_ra) { return; } + new_ra->keys = NULL; + new_ra->containers = NULL; + new_ra->typecodes = NULL; + + new_ra->allocation_size = 0; + new_ra->size = 0; + new_ra->flags = 0; +} + +bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, + bool copy_on_write) { + if (!ra_init_with_capacity(dest, source->size)) return false; + dest->size = source->size; + dest->allocation_size = source->size; + if(dest->size > 0) { + memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); + } + // we go through the containers, turning them into shared containers... + if (copy_on_write) { + for (int32_t i = 0; i < dest->size; ++i) { + source->containers[i] = get_copy_of_container( + source->containers[i], &source->typecodes[i], copy_on_write); + } + // we do a shallow copy to the other bitmap + if(dest->size > 0) { + memcpy(dest->containers, source->containers, + dest->size * sizeof(void *)); + memcpy(dest->typecodes, source->typecodes, + dest->size * sizeof(uint8_t)); + } + } else { + if(dest->size > 0) { + memcpy(dest->typecodes, source->typecodes, + dest->size * sizeof(uint8_t)); + } + for (int32_t i = 0; i < dest->size; i++) { + dest->containers[i] = + container_clone(source->containers[i], source->typecodes[i]); + if (dest->containers[i] == NULL) { + for (int32_t j = 0; j < i; j++) { + container_free(dest->containers[j], dest->typecodes[j]); + } + ra_clear_without_containers(dest); + return false; + } + } + } + return true; +} + +bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, + bool copy_on_write) { + ra_clear_containers(dest); // we are going to overwrite them + if (dest->allocation_size < source->size) { + if (!realloc_array(dest, source->size)) { + return false; + } + } + dest->size = source->size; + memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); + // we go through the containers, turning them into shared containers... + if (copy_on_write) { + for (int32_t i = 0; i < dest->size; ++i) { + source->containers[i] = get_copy_of_container( + source->containers[i], &source->typecodes[i], copy_on_write); + } + // we do a shallow copy to the other bitmap + memcpy(dest->containers, source->containers, + dest->size * sizeof(void *)); + memcpy(dest->typecodes, source->typecodes, + dest->size * sizeof(uint8_t)); + } else { + memcpy(dest->typecodes, source->typecodes, + dest->size * sizeof(uint8_t)); + for (int32_t i = 0; i < dest->size; i++) { + dest->containers[i] = + container_clone(source->containers[i], source->typecodes[i]); + if (dest->containers[i] == NULL) { + for (int32_t j = 0; j < i; j++) { + container_free(dest->containers[j], dest->typecodes[j]); + } + ra_clear_without_containers(dest); + return false; + } + } + } + return true; +} + +void ra_clear_containers(roaring_array_t *ra) { + for (int32_t i = 0; i < ra->size; ++i) { + container_free(ra->containers[i], ra->typecodes[i]); + } +} + +void ra_reset(roaring_array_t *ra) { + ra_clear_containers(ra); + ra->size = 0; + ra_shrink_to_fit(ra); +} + +void ra_clear_without_containers(roaring_array_t *ra) { + free(ra->containers); // keys and typecodes are allocated with containers + ra->size = 0; + ra->allocation_size = 0; + ra->containers = NULL; + ra->keys = NULL; + ra->typecodes = NULL; +} + +void ra_clear(roaring_array_t *ra) { + ra_clear_containers(ra); + ra_clear_without_containers(ra); +} + +bool extend_array(roaring_array_t *ra, int32_t k) { + int32_t desired_size = ra->size + k; + assert(desired_size <= MAX_CONTAINERS); + if (desired_size > ra->allocation_size) { + int32_t new_capacity = + (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; + if (new_capacity > MAX_CONTAINERS) { + new_capacity = MAX_CONTAINERS; + } + + return realloc_array(ra, new_capacity); + } + return true; +} + +void ra_append(roaring_array_t *ra, uint16_t key, void *container, + uint8_t typecode) { + extend_array(ra, 1); + const int32_t pos = ra->size; + + ra->keys[pos] = key; + ra->containers[pos] = container; + ra->typecodes[pos] = typecode; + ra->size++; +} + +void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t index, bool copy_on_write) { + extend_array(ra, 1); + const int32_t pos = ra->size; + + // old contents is junk not needing freeing + ra->keys[pos] = sa->keys[index]; + // the shared container will be in two bitmaps + if (copy_on_write) { + sa->containers[index] = get_copy_of_container( + sa->containers[index], &sa->typecodes[index], copy_on_write); + ra->containers[pos] = sa->containers[index]; + ra->typecodes[pos] = sa->typecodes[index]; + } else { + ra->containers[pos] = + container_clone(sa->containers[index], sa->typecodes[index]); + ra->typecodes[pos] = sa->typecodes[index]; + } + ra->size++; +} + +void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t stopping_key, bool copy_on_write) { + for (int32_t i = 0; i < sa->size; ++i) { + if (sa->keys[i] >= stopping_key) break; + ra_append_copy(ra, sa, i, copy_on_write); + } +} + +void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, + int32_t start_index, int32_t end_index, + bool copy_on_write) { + extend_array(ra, end_index - start_index); + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + ra->keys[pos] = sa->keys[i]; + if (copy_on_write) { + sa->containers[i] = get_copy_of_container( + sa->containers[i], &sa->typecodes[i], copy_on_write); + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + } else { + ra->containers[pos] = + container_clone(sa->containers[i], sa->typecodes[i]); + ra->typecodes[pos] = sa->typecodes[i]; + } + ra->size++; + } +} + +void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t before_start, bool copy_on_write) { + int start_location = ra_get_index(sa, before_start); + if (start_location >= 0) + ++start_location; + else + start_location = -start_location - 1; + ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); +} + +void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, + int32_t start_index, int32_t end_index) { + extend_array(ra, end_index - start_index); + + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + + ra->keys[pos] = sa->keys[i]; + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + ra->size++; + } +} + +void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, + int32_t start_index, int32_t end_index, + bool copy_on_write) { + extend_array(ra, end_index - start_index); + + for (int32_t i = start_index; i < end_index; ++i) { + const int32_t pos = ra->size; + ra->keys[pos] = sa->keys[i]; + if (copy_on_write) { + sa->containers[i] = get_copy_of_container( + sa->containers[i], &sa->typecodes[i], copy_on_write); + ra->containers[pos] = sa->containers[i]; + ra->typecodes[pos] = sa->typecodes[i]; + } else { + ra->containers[pos] = + container_clone(sa->containers[i], sa->typecodes[i]); + ra->typecodes[pos] = sa->typecodes[i]; + } + ra->size++; + } +} + +uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { + return ra->keys[i]; +} + +// everything skipped over is freed +int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { + while (pos < ra->size && ra->keys[pos] < x) { + container_free(ra->containers[pos], ra->typecodes[pos]); + ++pos; + } + return pos; +} + +void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, + void *container, uint8_t typecode) { + extend_array(ra, 1); + // May be an optimization opportunity with DIY memmove + memmove(&(ra->keys[i + 1]), &(ra->keys[i]), + sizeof(uint16_t) * (ra->size - i)); + memmove(&(ra->containers[i + 1]), &(ra->containers[i]), + sizeof(void *) * (ra->size - i)); + memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), + sizeof(uint8_t) * (ra->size - i)); + ra->keys[i] = key; + ra->containers[i] = container; + ra->typecodes[i] = typecode; + ra->size++; +} + +// note: Java routine set things to 0, enabling GC. +// Java called it "resize" but it was always used to downsize. +// Allowing upsize would break the conventions about +// valid containers below ra->size. + +void ra_downsize(roaring_array_t *ra, int32_t new_length) { + assert(new_length <= ra->size); + ra->size = new_length; +} + +void ra_remove_at_index(roaring_array_t *ra, int32_t i) { + memmove(&(ra->containers[i]), &(ra->containers[i + 1]), + sizeof(void *) * (ra->size - i - 1)); + memmove(&(ra->keys[i]), &(ra->keys[i + 1]), + sizeof(uint16_t) * (ra->size - i - 1)); + memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), + sizeof(uint8_t) * (ra->size - i - 1)); + ra->size--; +} + +void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { + container_free(ra->containers[i], ra->typecodes[i]); + ra_remove_at_index(ra, i); +} + +// used in inplace andNot only, to slide left the containers from +// the mutated RoaringBitmap that are after the largest container of +// the argument RoaringBitmap. In use it should be followed by a call to +// downsize. +// +void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, + uint32_t new_begin) { + assert(begin <= end); + assert(new_begin < begin); + + const int range = end - begin; + + // We ensure to previously have freed overwritten containers + // that are not copied elsewhere + + memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), + sizeof(void *) * range); + memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), + sizeof(uint16_t) * range); + memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), + sizeof(uint8_t) * range); +} + +void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { + if (distance > 0) { + extend_array(ra, distance); + } + int32_t srcpos = ra->size - count; + int32_t dstpos = srcpos + distance; + memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), + sizeof(uint16_t) * count); + memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), + sizeof(void *) * count); + memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), + sizeof(uint8_t) * count); + ra->size += distance; +} + + +void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { + size_t ctr = 0; + for (int32_t i = 0; i < ra->size; ++i) { + int num_added = container_to_uint32_array( + ans + ctr, ra->containers[i], ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + ctr += num_added; + } +} + +bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) { + size_t ctr = 0; + size_t dtr = 0; + + size_t t_limit = 0; + + bool first = false; + size_t first_skip = 0; + + uint32_t *t_ans = NULL; + size_t cur_len = 0; + + for (int i = 0; i < ra->size; ++i) { + + const void *container = container_unwrap_shared(ra->containers[i], &ra->typecodes[i]); + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE_CODE: + t_limit = ((const bitset_container_t *)container)->cardinality; + break; + case ARRAY_CONTAINER_TYPE_CODE: + t_limit = ((const array_container_t *)container)->cardinality; + break; + case RUN_CONTAINER_TYPE_CODE: + t_limit = run_container_cardinality((const run_container_t *)container); + break; + case SHARED_CONTAINER_TYPE_CODE: + default: + __builtin_unreachable(); + } + if (ctr + t_limit - 1 >= offset && ctr < offset + limit){ + if (!first){ + //first_skip = t_limit - (ctr + t_limit - offset); + first_skip = offset - ctr; + first = true; + t_ans = (uint32_t *)malloc(sizeof(*t_ans) * (first_skip + limit)); + if(t_ans == NULL) { + return false; + } + memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ; + cur_len = first_skip + limit; + } + if (dtr + t_limit > cur_len){ + uint32_t * append_ans = (uint32_t *)malloc(sizeof(*append_ans) * (cur_len + t_limit)); + if(append_ans == NULL) { + if(t_ans != NULL) free(t_ans); + return false; + } + memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit)); + cur_len = cur_len + t_limit; + memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); + free(t_ans); + t_ans = append_ans; + } + switch (ra->typecodes[i]) { + case BITSET_CONTAINER_TYPE_CODE: + container_to_uint32_array( + t_ans + dtr, (const bitset_container_t *)container, ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + case ARRAY_CONTAINER_TYPE_CODE: + container_to_uint32_array( + t_ans + dtr, (const array_container_t *)container, ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + case RUN_CONTAINER_TYPE_CODE: + container_to_uint32_array( + t_ans + dtr, (const run_container_t *)container, ra->typecodes[i], + ((uint32_t)ra->keys[i]) << 16); + break; + case SHARED_CONTAINER_TYPE_CODE: + default: + __builtin_unreachable(); + } + dtr += t_limit; + } + ctr += t_limit; + if (dtr-first_skip >= limit) break; + } + if(t_ans != NULL) { + memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t)); + free(t_ans); + } + return true; +} + +bool ra_has_run_container(const roaring_array_t *ra) { + for (int32_t k = 0; k < ra->size; ++k) { + if (get_container_type(ra->containers[k], ra->typecodes[k]) == + RUN_CONTAINER_TYPE_CODE) + return true; + } + return false; +} + +uint32_t ra_portable_header_size(const roaring_array_t *ra) { + if (ra_has_run_container(ra)) { + if (ra->size < + NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets + return 4 + (ra->size + 7) / 8 + 4 * ra->size; + } + return 4 + (ra->size + 7) / 8 + + 8 * ra->size; // - 4 because we pack the size with the cookie + } else { + return 4 + 4 + 8 * ra->size; + } +} + +size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { + size_t count = ra_portable_header_size(ra); + + for (int32_t k = 0; k < ra->size; ++k) { + count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); + } + return count; +} + +size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { + char *initbuf = buf; + uint32_t startOffset = 0; + bool hasrun = ra_has_run_container(ra); + if (hasrun) { + uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); + memcpy(buf, &cookie, sizeof(cookie)); + buf += sizeof(cookie); + uint32_t s = (ra->size + 7) / 8; + uint8_t *bitmapOfRunContainers = (uint8_t *)calloc(s, 1); + assert(bitmapOfRunContainers != NULL); // todo: handle + for (int32_t i = 0; i < ra->size; ++i) { + if (get_container_type(ra->containers[i], ra->typecodes[i]) == + RUN_CONTAINER_TYPE_CODE) { + bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); + } + } + memcpy(buf, bitmapOfRunContainers, s); + buf += s; + free(bitmapOfRunContainers); + if (ra->size < NO_OFFSET_THRESHOLD) { + startOffset = 4 + 4 * ra->size + s; + } else { + startOffset = 4 + 8 * ra->size + s; + } + } else { // backwards compatibility + uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; + + memcpy(buf, &cookie, sizeof(cookie)); + buf += sizeof(cookie); + memcpy(buf, &ra->size, sizeof(ra->size)); + buf += sizeof(ra->size); + + startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; + } + for (int32_t k = 0; k < ra->size; ++k) { + memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); + buf += sizeof(ra->keys[k]); + // get_cardinality returns a value in [1,1<<16], subtracting one + // we get [0,1<<16 - 1] which fits in 16 bits + uint16_t card = (uint16_t)( + container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1); + memcpy(buf, &card, sizeof(card)); + buf += sizeof(card); + } + if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { + // writing the containers offsets + for (int32_t k = 0; k < ra->size; k++) { + memcpy(buf, &startOffset, sizeof(startOffset)); + buf += sizeof(startOffset); + startOffset = + startOffset + + container_size_in_bytes(ra->containers[k], ra->typecodes[k]); + } + } + for (int32_t k = 0; k < ra->size; ++k) { + buf += container_write(ra->containers[k], ra->typecodes[k], buf); + } + return buf - initbuf; +} + +// Quickly checks whether there is a serialized bitmap at the pointer, +// not exceeding size "maxbytes" in bytes. This function does not allocate +// memory dynamically. +// +// This function returns 0 if and only if no valid bitmap is found. +// Otherwise, it returns how many bytes are occupied. +// +size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { + size_t bytestotal = sizeof(int32_t);// for cookie + if(bytestotal > maxbytes) return 0; + uint32_t cookie; + memcpy(&cookie, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && + cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + return 0; + } + int32_t size; + + if ((cookie & 0xFFFF) == SERIAL_COOKIE) + size = (cookie >> 16) + 1; + else { + bytestotal += sizeof(int32_t); + if(bytestotal > maxbytes) return 0; + memcpy(&size, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + } + if (size > (1<<16)) { + return 0; // logically impossible + } + char *bitmapOfRunContainers = NULL; + bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; + if (hasrun) { + int32_t s = (size + 7) / 8; + bytestotal += s; + if(bytestotal > maxbytes) return 0; + bitmapOfRunContainers = (char *)buf; + buf += s; + } + bytestotal += size * 2 * sizeof(uint16_t); + if(bytestotal > maxbytes) return 0; + uint16_t *keyscards = (uint16_t *)buf; + buf += size * 2 * sizeof(uint16_t); + if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { + // skipping the offsets + bytestotal += size * 4; + if(bytestotal > maxbytes) return 0; + buf += size * 4; + } + // Reading the containers + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); + uint32_t thiscard = tmp + 1; + bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); + bool isrun = false; + if(hasrun) { + if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { + isbitmap = false; + isrun = true; + } + } + if (isbitmap) { + size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + bytestotal += containersize; + if(bytestotal > maxbytes) return 0; + buf += containersize; + } else if (isrun) { + bytestotal += sizeof(uint16_t); + if(bytestotal > maxbytes) return 0; + uint16_t n_runs; + memcpy(&n_runs, buf, sizeof(uint16_t)); + buf += sizeof(uint16_t); + size_t containersize = n_runs * sizeof(rle16_t); + bytestotal += containersize; + if(bytestotal > maxbytes) return 0; + buf += containersize; + } else { + size_t containersize = thiscard * sizeof(uint16_t); + bytestotal += containersize; + if(bytestotal > maxbytes) return 0; + buf += containersize; + } + } + return bytestotal; +} + + +// this function populates answer from the content of buf (reading up to maxbytes bytes). +// The function returns false if a properly serialized bitmap cannot be found. +// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. +bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { + *readbytes = sizeof(int32_t);// for cookie + if(*readbytes > maxbytes) { + fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n"); + return false; + } + uint32_t cookie; + memcpy(&cookie, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + if ((cookie & 0xFFFF) != SERIAL_COOKIE && + cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { + fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n", + cookie); + return false; + } + int32_t size; + + if ((cookie & 0xFFFF) == SERIAL_COOKIE) + size = (cookie >> 16) + 1; + else { + *readbytes += sizeof(int32_t); + if(*readbytes > maxbytes) { + fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n"); + return false; + } + memcpy(&size, buf, sizeof(int32_t)); + buf += sizeof(uint32_t); + } + if (size > (1<<16)) { + fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n", + size); + return false; // logically impossible + } + const char *bitmapOfRunContainers = NULL; + bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; + if (hasrun) { + int32_t s = (size + 7) / 8; + *readbytes += s; + if(*readbytes > maxbytes) {// data is corrupted? + fprintf(stderr, "Ran out of bytes while reading run bitmap.\n"); + return false; + } + bitmapOfRunContainers = buf; + buf += s; + } + uint16_t *keyscards = (uint16_t *)buf; + + *readbytes += size * 2 * sizeof(uint16_t); + if(*readbytes > maxbytes) { + fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n"); + return false; + } + buf += size * 2 * sizeof(uint16_t); + + bool is_ok = ra_init_with_capacity(answer, size); + if (!is_ok) { + fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n"); + return false; + } + + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2*k, sizeof(tmp)); + answer->keys[k] = tmp; + } + if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { + *readbytes += size * 4; + if(*readbytes > maxbytes) {// data is corrupted? + fprintf(stderr, "Ran out of bytes while reading offsets.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + + // skipping the offsets + buf += size * 4; + } + // Reading the containers + for (int32_t k = 0; k < size; ++k) { + uint16_t tmp; + memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); + uint32_t thiscard = tmp + 1; + bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); + bool isrun = false; + if(hasrun) { + if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { + isbitmap = false; + isrun = true; + } + } + if (isbitmap) { + // we check that the read is allowed + size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + *readbytes += containersize; + if(*readbytes > maxbytes) { + fprintf(stderr, "Running out of bytes while reading a bitset container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + // it is now safe to read + bitset_container_t *c = bitset_container_create(); + if(c == NULL) {// memory allocation failure + fprintf(stderr, "Failed to allocate memory for a bitset container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + answer->size++; + buf += bitset_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = BITSET_CONTAINER_TYPE_CODE; + } else if (isrun) { + // we check that the read is allowed + *readbytes += sizeof(uint16_t); + if(*readbytes > maxbytes) { + fprintf(stderr, "Running out of bytes while reading a run container (header).\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + uint16_t n_runs; + memcpy(&n_runs, buf, sizeof(uint16_t)); + size_t containersize = n_runs * sizeof(rle16_t); + *readbytes += containersize; + if(*readbytes > maxbytes) {// data is corrupted? + fprintf(stderr, "Running out of bytes while reading a run container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + // it is now safe to read + + run_container_t *c = run_container_create(); + if(c == NULL) {// memory allocation failure + fprintf(stderr, "Failed to allocate memory for a run container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + answer->size++; + buf += run_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = RUN_CONTAINER_TYPE_CODE; + } else { + // we check that the read is allowed + size_t containersize = thiscard * sizeof(uint16_t); + *readbytes += containersize; + if(*readbytes > maxbytes) {// data is corrupted? + fprintf(stderr, "Running out of bytes while reading an array container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + // it is now safe to read + array_container_t *c = + array_container_create_given_capacity(thiscard); + if(c == NULL) {// memory allocation failure + fprintf(stderr, "Failed to allocate memory for an array container.\n"); + ra_clear(answer);// we need to clear the containers already allocated, and the roaring array + return false; + } + answer->size++; + buf += array_container_read(thiscard, c, buf); + answer->containers[k] = c; + answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE; + } + } + return true; +} +/* end file src/roaring_array.c */ +/* begin file src/roaring_priority_queue.c */ + +struct roaring_pq_element_s { + uint64_t size; + bool is_temporary; + roaring_bitmap_t *bitmap; +}; + +typedef struct roaring_pq_element_s roaring_pq_element_t; + +struct roaring_pq_s { + roaring_pq_element_t *elements; + uint64_t size; +}; + +typedef struct roaring_pq_s roaring_pq_t; + +static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { + return t1->size < t2->size; +} + +static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { + uint64_t i = pq->size; + pq->elements[pq->size++] = *t; + while (i > 0) { + uint64_t p = (i - 1) >> 1; + roaring_pq_element_t ap = pq->elements[p]; + if (!compare(t, &ap)) break; + pq->elements[i] = ap; + i = p; + } + pq->elements[i] = *t; +} + +static void pq_free(roaring_pq_t *pq) { + free(pq->elements); + pq->elements = NULL; // paranoid + free(pq); +} + +static void percolate_down(roaring_pq_t *pq, uint32_t i) { + uint32_t size = (uint32_t)pq->size; + uint32_t hsize = size >> 1; + roaring_pq_element_t ai = pq->elements[i]; + while (i < hsize) { + uint32_t l = (i << 1) + 1; + uint32_t r = l + 1; + roaring_pq_element_t bestc = pq->elements[l]; + if (r < size) { + if (compare(pq->elements + r, &bestc)) { + l = r; + bestc = pq->elements[r]; + } + } + if (!compare(&bestc, &ai)) { + break; + } + pq->elements[i] = bestc; + i = l; + } + pq->elements[i] = ai; +} + +static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { + roaring_pq_t *answer = (roaring_pq_t *)malloc(sizeof(roaring_pq_t)); + answer->elements = + (roaring_pq_element_t *)malloc(sizeof(roaring_pq_element_t) * length); + answer->size = length; + for (uint32_t i = 0; i < length; i++) { + answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; + answer->elements[i].is_temporary = false; + answer->elements[i].size = + roaring_bitmap_portable_size_in_bytes(arr[i]); + } + for (int32_t i = (length >> 1); i >= 0; i--) { + percolate_down(answer, i); + } + return answer; +} + +static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { + roaring_pq_element_t ans = *pq->elements; + if (pq->size > 1) { + pq->elements[0] = pq->elements[--pq->size]; + percolate_down(pq, 0); + } else + --pq->size; + // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; + return ans; +} + +// this function consumes and frees the inputs +static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, + roaring_bitmap_t *x2) { + uint8_t container_result_type = 0; + const int length1 = ra_get_size(&x1->high_low_container), + length2 = ra_get_size(&x2->high_low_container); + if (0 == length1) { + roaring_bitmap_free(x1); + return x2; + } + if (0 == length2) { + roaring_bitmap_free(x2); + return x1; + } + uint32_t neededcap = length1 > length2 ? length2 : length1; + roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); + int pos1 = 0, pos2 = 0; + uint8_t container_type_1, container_type_2; + uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + while (true) { + if (s1 == s2) { + // todo: unsharing can be inefficient as it may create a clone where + // none + // is needed, but it has the benefit of being easy to reason about. + ra_unshare_container_at_index(&x1->high_low_container, pos1); + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + assert(container_type_1 != SHARED_CONTAINER_TYPE_CODE); + ra_unshare_container_at_index(&x2->high_low_container, pos2); + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + assert(container_type_2 != SHARED_CONTAINER_TYPE_CODE); + void *c; + + if ((container_type_2 == BITSET_CONTAINER_TYPE_CODE) && + (container_type_1 != BITSET_CONTAINER_TYPE_CODE)) { + c = container_lazy_ior(c2, container_type_2, c1, + container_type_1, + &container_result_type); + container_free(c1, container_type_1); + if (c != c2) { + container_free(c2, container_type_2); + } + } else { + c = container_lazy_ior(c1, container_type_1, c2, + container_type_2, + &container_result_type); + container_free(c2, container_type_2); + if (c != c1) { + container_free(c1, container_type_1); + } + } + // since we assume that the initial containers are non-empty, the + // result here + // can only be non-empty + ra_append(&answer->high_low_container, s1, c, + container_result_type); + ++pos1; + ++pos2; + if (pos1 == length1) break; + if (pos2 == length2) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + + } else if (s1 < s2) { // s1 < s2 + void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, + &container_type_1); + ra_append(&answer->high_low_container, s1, c1, container_type_1); + pos1++; + if (pos1 == length1) break; + s1 = ra_get_key_at_index(&x1->high_low_container, pos1); + + } else { // s1 > s2 + void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, + &container_type_2); + ra_append(&answer->high_low_container, s2, c2, container_type_2); + pos2++; + if (pos2 == length2) break; + s2 = ra_get_key_at_index(&x2->high_low_container, pos2); + } + } + if (pos1 == length1) { + ra_append_move_range(&answer->high_low_container, + &x2->high_low_container, pos2, length2); + } else if (pos2 == length2) { + ra_append_move_range(&answer->high_low_container, + &x1->high_low_container, pos1, length1); + } + ra_clear_without_containers(&x1->high_low_container); + ra_clear_without_containers(&x2->high_low_container); + free(x1); + free(x2); + return answer; +} + +/** + * Compute the union of 'number' bitmaps using a heap. This can + * sometimes be faster than roaring_bitmap_or_many which uses + * a naive algorithm. Caller is responsible for freeing the + * result. + */ +roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, + const roaring_bitmap_t **x) { + if (number == 0) { + return roaring_bitmap_create(); + } + if (number == 1) { + return roaring_bitmap_copy(x[0]); + } + roaring_pq_t *pq = create_pq(x, number); + while (pq->size > 1) { + roaring_pq_element_t x1 = pq_poll(pq); + roaring_pq_element_t x2 = pq_poll(pq); + + if (x1.is_temporary && x2.is_temporary) { + roaring_bitmap_t *newb = + lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); + // should normally return a fresh new bitmap *except* that + // it can return x1.bitmap or x2.bitmap in degenerate cases + bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); + uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); + roaring_pq_element_t newelement = { + .size = bsize, .is_temporary = temporary, .bitmap = newb}; + pq_add(pq, &newelement); + } else if (x2.is_temporary) { + roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); + x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); + pq_add(pq, &x2); + } else if (x1.is_temporary) { + roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); + x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); + + pq_add(pq, &x1); + } else { + roaring_bitmap_t *newb = + roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); + uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); + roaring_pq_element_t newelement = { + .size = bsize, .is_temporary = true, .bitmap = newb}; + + pq_add(pq, &newelement); + } + } + roaring_pq_element_t X = pq_poll(pq); + roaring_bitmap_t *answer = X.bitmap; + roaring_bitmap_repair_after_lazy(answer); + pq_free(pq); + return answer; +} +/* end file src/roaring_priority_queue.c */ diff --git a/gtk/roaring.h b/gtk/roaring.h new file mode 100644 index 0000000000..3837891781 --- /dev/null +++ b/gtk/roaring.h @@ -0,0 +1,7249 @@ +/* auto-generated on Fri 26 Jun 2020 06:06:51 AM CEST. Do not edit! */ +/* begin file include/roaring/roaring_version.h */ +// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand +#ifndef ROARING_INCLUDE_ROARING_VERSION +#define ROARING_INCLUDE_ROARING_VERSION +#define ROARING_VERSION = 0.2.66, +enum { + ROARING_VERSION_MAJOR = 0, + ROARING_VERSION_MINOR = 2, + ROARING_VERSION_REVISION = 66 +}; +#endif // ROARING_INCLUDE_ROARING_VERSION +/* end file include/roaring/roaring_version.h */ +/* begin file include/roaring/portability.h */ +/* + * portability.h + * + */ + +#ifndef INCLUDE_PORTABILITY_H_ +#define INCLUDE_PORTABILITY_H_ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS 1 +#endif + +#if !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L) +#define _POSIX_C_SOURCE 200809L +#endif +#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700) +#define _XOPEN_SOURCE 700 +#endif + +#include +#include +#include // will provide posix_memalign with _POSIX_C_SOURCE as defined above +#if !(defined(__APPLE__)) && !(defined(__FreeBSD__)) +#include // this should never be needed but there are some reports that it is needed. +#endif + + +#if defined(_MSC_VER) && !defined(__clang__) && !defined(_WIN64) && !defined(ROARING_ACK_32BIT) +#pragma message( \ + "You appear to be attempting a 32-bit build under Visual Studio. We recommend a 64-bit build instead.") +#endif + +#if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8 +#error This code assumes 64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported. +#endif + +#if defined(_MSC_VER) +#define __restrict__ __restrict +#endif + +#ifndef DISABLE_X64 // some users may want to compile as if they did not have + // an x64 processor + +/////////////////////// +/// We support X64 hardware in the following manner: +/// +/// if IS_X64 is defined then we have at least SSE and SSE2 +/// (All Intel processors sold in the recent past have at least SSE and SSE2 support, +/// going back to the Pentium 4.) +/// +/// if USESSE4 is defined then we assume at least SSE4.2, SSE4.1, +/// SSSE3, SSE3... + IS_X64 +/// if USEAVX is defined, then we assume AVX2, AVX + USESSE4 +/// +/// So if you have hardware that supports AVX but not AVX2, then "USEAVX" +/// won't be enabled. +/// If you have hardware that supports SSE4.1, but not SSE4.2, then USESSE4 +/// won't be defined. +////////////////////// + +// unless DISABLEAVX was defined, if we have __AVX2__, we enable AVX +#if (!defined(USEAVX)) && (!defined(DISABLEAVX)) && (defined(__AVX2__)) +#define USEAVX +#endif + +// if we have __SSE4_2__, we enable SSE4 +#if (defined(__POPCNT__)) && (defined(__SSE4_2__)) +#define USESSE4 +#endif + +#if defined(USEAVX) || defined(__x86_64__) || defined(_M_X64) +// we have an x64 processor +#define IS_X64 +// we include the intrinsic header +#ifndef _MSC_VER +/* Non-Microsoft C/C++-compatible compiler */ +#include // on some recent GCC, this will declare posix_memalign +#endif +#endif + +#if !defined(USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON) +# define USENEON +#endif +#if defined(USENEON) +# include +#endif + +#ifndef _MSC_VER +/* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline + * assembly */ +#define ROARING_INLINE_ASM +#endif + +#ifdef USEAVX +#define USESSE4 // if we have AVX, then we have SSE4 +#define USE_BMI // we assume that AVX2 and BMI go hand and hand +#define USEAVX2FORDECODING // optimization +// vector operations should work on not just AVX +#define ROARING_VECTOR_OPERATIONS_ENABLED // vector unions (optimization) +#endif + +#endif // DISABLE_X64 + +#ifdef _MSC_VER +/* Microsoft C/C++-compatible compiler */ +#include + +#ifndef __clang__ // if one compiles with MSVC *with* clang, then these + // intrinsics are defined!!! +// sadly there is no way to check whether we are missing these intrinsics +// specifically. + +/* wrappers for Visual Studio built-ins that look like gcc built-ins */ +/* result might be undefined when input_num is zero */ +static inline int __builtin_ctzll(unsigned long long input_num) { + unsigned long index; +#ifdef _WIN64 // highly recommended!!! + _BitScanForward64(&index, input_num); +#else // if we must support 32-bit Windows + if ((uint32_t)input_num != 0) { + _BitScanForward(&index, (uint32_t)input_num); + } else { + _BitScanForward(&index, (uint32_t)(input_num >> 32)); + index += 32; + } +#endif + return index; +} + +/* result might be undefined when input_num is zero */ +static inline int __builtin_clzll(unsigned long long input_num) { + unsigned long index; +#ifdef _WIN64 // highly recommended!!! + _BitScanReverse64(&index, input_num); +#else // if we must support 32-bit Windows + if (input_num > 0xFFFFFFFF) { + _BitScanReverse(&index, (uint32_t)(input_num >> 32)); + index += 32; + } else { + _BitScanReverse(&index, (uint32_t)(input_num)); + } +#endif + return 63 - index; +} + +/* result might be undefined when input_num is zero */ +#ifdef USESSE4 +/* POPCNT support was added to processors around the release of SSE4.2 */ +/* USESSE4 flag guarantees POPCNT support */ +static inline int __builtin_popcountll(unsigned long long input_num) { +#ifdef _WIN64 // highly recommended!!! + return (int)__popcnt64(input_num); +#else // if we must support 32-bit Windows + return (int)(__popcnt((uint32_t)input_num) + + __popcnt((uint32_t)(input_num >> 32))); +#endif +} +#else +/* software implementation avoids POPCNT */ +static inline int __builtin_popcountll(unsigned long long input_num) { + const uint64_t m1 = 0x5555555555555555; //binary: 0101... + const uint64_t m2 = 0x3333333333333333; //binary: 00110011.. + const uint64_t m4 = 0x0f0f0f0f0f0f0f0f; //binary: 4 zeros, 4 ones ... + const uint64_t h01 = 0x0101010101010101; //the sum of 256 to the power of 0,1,2,3... + + input_num -= (input_num >> 1) & m1; + input_num = (input_num & m2) + ((input_num >> 2) & m2); + input_num = (input_num + (input_num >> 4)) & m4; + return (input_num * h01) >> 56; +} +#endif + +/* Use #define so this is effective even under /Ob0 (no inline) */ +#define __builtin_unreachable() __assume(0) +#endif + +#endif + +// portable version of posix_memalign +static inline void *roaring_bitmap_aligned_malloc(size_t alignment, size_t size) { + void *p; +#ifdef _MSC_VER + p = _aligned_malloc(size, alignment); +#elif defined(__MINGW32__) || defined(__MINGW64__) + p = __mingw_aligned_malloc(size, alignment); +#else + // somehow, if this is used before including "x86intrin.h", it creates an + // implicit defined warning. + if (posix_memalign(&p, alignment, size) != 0) return NULL; +#endif + return p; +} + +static inline void roaring_bitmap_aligned_free(void *memblock) { +#ifdef _MSC_VER + _aligned_free(memblock); +#elif defined(__MINGW32__) || defined(__MINGW64__) + __mingw_aligned_free(memblock); +#else + free(memblock); +#endif +} + +#if defined(_MSC_VER) +#define ALIGNED(x) __declspec(align(x)) +#else +#if defined(__GNUC__) +#define ALIGNED(x) __attribute__((aligned(x))) +#endif +#endif + +#ifdef __GNUC__ +#define WARN_UNUSED __attribute__((warn_unused_result)) +#else +#define WARN_UNUSED +#endif + +#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100) + +static inline int hamming(uint64_t x) { +#ifdef USESSE4 + return (int) _mm_popcnt_u64(x); +#else + // won't work under visual studio, but hopeful we have _mm_popcnt_u64 in + // many cases + return __builtin_popcountll(x); +#endif +} + +#ifndef UINT64_C +#define UINT64_C(c) (c##ULL) +#endif + +#ifndef UINT32_C +#define UINT32_C(c) (c##UL) +#endif + +#endif /* INCLUDE_PORTABILITY_H_ */ +/* end file include/roaring/portability.h */ +/* begin file include/roaring/containers/perfparameters.h */ +#ifndef PERFPARAMETERS_H_ +#define PERFPARAMETERS_H_ + +#include + +/** +During lazy computations, we can transform array containers into bitset +containers as +long as we can expect them to have ARRAY_LAZY_LOWERBOUND values. +*/ +enum { ARRAY_LAZY_LOWERBOUND = 1024 }; + +/* default initial size of a run container + setting it to zero delays the malloc.*/ +enum { RUN_DEFAULT_INIT_SIZE = 0 }; + +/* default initial size of an array container + setting it to zero delays the malloc */ +enum { ARRAY_DEFAULT_INIT_SIZE = 0 }; + +/* automatic bitset conversion during lazy or */ +#ifndef LAZY_OR_BITSET_CONVERSION +#define LAZY_OR_BITSET_CONVERSION true +#endif + +/* automatically attempt to convert a bitset to a full run during lazy + * evaluation */ +#ifndef LAZY_OR_BITSET_CONVERSION_TO_FULL +#define LAZY_OR_BITSET_CONVERSION_TO_FULL true +#endif + +/* automatically attempt to convert a bitset to a full run */ +#ifndef OR_BITSET_CONVERSION_TO_FULL +#define OR_BITSET_CONVERSION_TO_FULL true +#endif + +#endif +/* end file include/roaring/containers/perfparameters.h */ +/* begin file include/roaring/array_util.h */ +#ifndef ARRAY_UTIL_H +#define ARRAY_UTIL_H + +#include // for size_t +#include + + +/* + * Good old binary search. + * Assumes that array is sorted, has logarithmic complexity. + * if the result is x, then: + * if ( x>0 ) you have array[x] = ikey + * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey) + * keys the array sorted. + */ +static inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, + uint16_t ikey) { + int32_t low = 0; + int32_t high = lenarray - 1; + while (low <= high) { + int32_t middleIndex = (low + high) >> 1; + uint16_t middleValue = array[middleIndex]; + if (middleValue < ikey) { + low = middleIndex + 1; + } else if (middleValue > ikey) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + return -(low + 1); +} + +/** + * Galloping search + * Assumes that array is sorted, has logarithmic complexity. + * if the result is x, then if x = length, you have that all values in array between pos and length + * are smaller than min. + * otherwise returns the first index x such that array[x] >= min. + */ +static inline int32_t advanceUntil(const uint16_t *array, int32_t pos, + int32_t length, uint16_t min) { + int32_t lower = pos + 1; + + if ((lower >= length) || (array[lower] >= min)) { + return lower; + } + + int32_t spansize = 1; + + while ((lower + spansize < length) && (array[lower + spansize] < min)) { + spansize <<= 1; + } + int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; + + if (array[upper] == min) { + return upper; + } + if (array[upper] < min) { + // means + // array + // has no + // item + // >= min + // pos = array.length; + return length; + } + + // we know that the next-smallest span was too small + lower += (spansize >> 1); + + int32_t mid = 0; + while (lower + 1 != upper) { + mid = (lower + upper) >> 1; + if (array[mid] == min) { + return mid; + } else if (array[mid] < min) { + lower = mid; + } else { + upper = mid; + } + } + return upper; +} + +/** + * Returns number of elements which are less then $ikey. + * Array elements must be unique and sorted. + */ +static inline int32_t count_less(const uint16_t *array, int32_t lenarray, + uint16_t ikey) { + if (lenarray == 0) return 0; + int32_t pos = binarySearch(array, lenarray, ikey); + return pos >= 0 ? pos : -(pos+1); +} + +/** + * Returns number of elements which are greater then $ikey. + * Array elements must be unique and sorted. + */ +static inline int32_t count_greater(const uint16_t *array, int32_t lenarray, + uint16_t ikey) { + if (lenarray == 0) return 0; + int32_t pos = binarySearch(array, lenarray, ikey); + if (pos >= 0) { + return lenarray - (pos+1); + } else { + return lenarray - (-pos-1); + } +} + +/** + * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions + * Optimized by D. Lemire on May 3rd 2013 + * + * C should have capacity greater than the minimum of s_1 and s_b + 8 + * where 8 is sizeof(__m128i)/sizeof(uint16_t). + */ +int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, + const uint16_t *__restrict__ B, size_t s_b, + uint16_t *C); + +/** + * Compute the cardinality of the intersection using SSE4 instructions + */ +int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, + size_t s_a, + const uint16_t *__restrict__ B, + size_t s_b); + +/* Computes the intersection between one small and one large set of uint16_t. + * Stores the result into buffer and return the number of elements. */ +int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s, + const uint16_t *largearray, size_t size_l, + uint16_t *buffer); + +/* Computes the size of the intersection between one small and one large set of + * uint16_t. */ +int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray, + size_t size_s, + const uint16_t *largearray, + size_t size_l); + + +/* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */ +bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s, + const uint16_t *largearray, size_t size_l); +/** + * Generic intersection function. + */ +int32_t intersect_uint16(const uint16_t *A, const size_t lenA, + const uint16_t *B, const size_t lenB, uint16_t *out); +/** + * Compute the size of the intersection (generic). + */ +int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, + const uint16_t *B, const size_t lenB); + +/** + * Checking whether the size of the intersection is non-zero. + */ +bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, + const uint16_t *B, const size_t lenB); +/** + * Generic union function. + */ +size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, + size_t size_2, uint16_t *buffer); + +/** + * Generic XOR function. + */ +int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, + const uint16_t *array_2, int32_t card_2, uint16_t *out); + +/** + * Generic difference function (ANDNOT). + */ +int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, + int length2, uint16_t *a_out); + +/** + * Generic intersection function. + */ +size_t intersection_uint32(const uint32_t *A, const size_t lenA, + const uint32_t *B, const size_t lenB, uint32_t *out); + +/** + * Generic intersection function, returns just the cardinality. + */ +size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, + const uint32_t *B, const size_t lenB); + +/** + * Generic union function. + */ +size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, + size_t size_2, uint32_t *buffer); + +/** + * A fast SSE-based union function. + */ +uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1, + const uint16_t *__restrict__ set_2, uint32_t size_2, + uint16_t *__restrict__ buffer); +/** + * A fast SSE-based XOR function. + */ +uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, + const uint16_t *__restrict__ array2, uint32_t length2, + uint16_t *__restrict__ output); + +/** + * A fast SSE-based difference function. + */ +int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, + const uint16_t *__restrict__ B, size_t s_b, + uint16_t *C); + +/** + * Generic union function, returns just the cardinality. + */ +size_t union_uint32_card(const uint32_t *set_1, size_t size_1, + const uint32_t *set_2, size_t size_2); + +/** +* combines union_uint16 and union_vector16 optimally +*/ +size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, + size_t size_2, uint16_t *buffer); + + +bool memequals(const void *s1, const void *s2, size_t n); + +#endif +/* end file include/roaring/array_util.h */ +/* begin file include/roaring/roaring_types.h */ +/* + Typedefs used by various components +*/ + +#ifndef ROARING_TYPES_H +#define ROARING_TYPES_H + +typedef bool (*roaring_iterator)(uint32_t value, void *param); +typedef bool (*roaring_iterator64)(uint64_t value, void *param); + +/** +* (For advanced users.) +* The roaring_statistics_t can be used to collect detailed statistics about +* the composition of a roaring bitmap. +*/ +typedef struct roaring_statistics_s { + uint32_t n_containers; /* number of containers */ + + uint32_t n_array_containers; /* number of array containers */ + uint32_t n_run_containers; /* number of run containers */ + uint32_t n_bitset_containers; /* number of bitmap containers */ + + uint32_t + n_values_array_containers; /* number of values in array containers */ + uint32_t n_values_run_containers; /* number of values in run containers */ + uint32_t + n_values_bitset_containers; /* number of values in bitmap containers */ + + uint32_t n_bytes_array_containers; /* number of allocated bytes in array + containers */ + uint32_t n_bytes_run_containers; /* number of allocated bytes in run + containers */ + uint32_t n_bytes_bitset_containers; /* number of allocated bytes in bitmap + containers */ + + uint32_t + max_value; /* the maximal value, undefined if cardinality is zero */ + uint32_t + min_value; /* the minimal value, undefined if cardinality is zero */ + uint64_t sum_value; /* the sum of all values (could be used to compute + average) */ + + uint64_t cardinality; /* total number of values stored in the bitmap */ + + // and n_values_arrays, n_values_rle, n_values_bitmap +} roaring_statistics_t; + +#endif /* ROARING_TYPES_H */ +/* end file include/roaring/roaring_types.h */ +/* begin file include/roaring/utilasm.h */ +/* + * utilasm.h + * + */ + +#ifndef INCLUDE_UTILASM_H_ +#define INCLUDE_UTILASM_H_ + + +#if defined(USE_BMI) & defined(ROARING_INLINE_ASM) +#define ASMBITMANIPOPTIMIZATION // optimization flag + +#define ASM_SHIFT_RIGHT(srcReg, bitsReg, destReg) \ + __asm volatile("shrx %1, %2, %0" \ + : "=r"(destReg) \ + : /* write */ \ + "r"(bitsReg), /* read only */ \ + "r"(srcReg) /* read only */ \ + ) + +#define ASM_INPLACESHIFT_RIGHT(srcReg, bitsReg) \ + __asm volatile("shrx %1, %0, %0" \ + : "+r"(srcReg) \ + : /* read/write */ \ + "r"(bitsReg) /* read only */ \ + ) + +#define ASM_SHIFT_LEFT(srcReg, bitsReg, destReg) \ + __asm volatile("shlx %1, %2, %0" \ + : "=r"(destReg) \ + : /* write */ \ + "r"(bitsReg), /* read only */ \ + "r"(srcReg) /* read only */ \ + ) +// set bit at position testBit within testByte to 1 and +// copy cmovDst to cmovSrc if that bit was previously clear +#define ASM_SET_BIT_INC_WAS_CLEAR(testByte, testBit, count) \ + __asm volatile( \ + "bts %2, %0\n" \ + "sbb $-1, %1\n" \ + : "+r"(testByte), /* read/write */ \ + "+r"(count) \ + : /* read/write */ \ + "r"(testBit) /* read only */ \ + ) + +#define ASM_CLEAR_BIT_DEC_WAS_SET(testByte, testBit, count) \ + __asm volatile( \ + "btr %2, %0\n" \ + "sbb $0, %1\n" \ + : "+r"(testByte), /* read/write */ \ + "+r"(count) \ + : /* read/write */ \ + "r"(testBit) /* read only */ \ + ) + +#define ASM_BT64(testByte, testBit, count) \ + __asm volatile( \ + "bt %2,%1\n" \ + "sbb %0,%0" /*could use setb */ \ + : "=r"(count) \ + : /* write */ \ + "r"(testByte), /* read only */ \ + "r"(testBit) /* read only */ \ + ) + +#endif // USE_BMI +#endif /* INCLUDE_UTILASM_H_ */ +/* end file include/roaring/utilasm.h */ +/* begin file include/roaring/bitset_util.h */ +#ifndef BITSET_UTIL_H +#define BITSET_UTIL_H + +#include + + +/* + * Set all bits in indexes [begin,end) to true. + */ +static inline void bitset_set_range(uint64_t *bitmap, uint32_t start, + uint32_t end) { + if (start == end) return; + uint32_t firstword = start / 64; + uint32_t endword = (end - 1) / 64; + if (firstword == endword) { + bitmap[firstword] |= ((~UINT64_C(0)) << (start % 64)) & + ((~UINT64_C(0)) >> ((~end + 1) % 64)); + return; + } + bitmap[firstword] |= (~UINT64_C(0)) << (start % 64); + for (uint32_t i = firstword + 1; i < endword; i++) bitmap[i] = ~UINT64_C(0); + bitmap[endword] |= (~UINT64_C(0)) >> ((~end + 1) % 64); +} + + +/* + * Find the cardinality of the bitset in [begin,begin+lenminusone] + */ +static inline int bitset_lenrange_cardinality(uint64_t *bitmap, uint32_t start, + uint32_t lenminusone) { + uint32_t firstword = start / 64; + uint32_t endword = (start + lenminusone) / 64; + if (firstword == endword) { + return hamming(bitmap[firstword] & + ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) + << (start % 64)); + } + int answer = hamming(bitmap[firstword] & ((~UINT64_C(0)) << (start % 64))); + for (uint32_t i = firstword + 1; i < endword; i++) { + answer += hamming(bitmap[i]); + } + answer += + hamming(bitmap[endword] & + (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)); + return answer; +} + +/* + * Check whether the cardinality of the bitset in [begin,begin+lenminusone] is 0 + */ +static inline bool bitset_lenrange_empty(uint64_t *bitmap, uint32_t start, + uint32_t lenminusone) { + uint32_t firstword = start / 64; + uint32_t endword = (start + lenminusone) / 64; + if (firstword == endword) { + return (bitmap[firstword] & ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) + << (start % 64)) == 0; + } + if(((bitmap[firstword] & ((~UINT64_C(0)) << (start%64)))) != 0) return false; + for (uint32_t i = firstword + 1; i < endword; i++) { + if(bitmap[i] != 0) return false; + } + if((bitmap[endword] & (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64)) != 0) return false; + return true; +} + + +/* + * Set all bits in indexes [begin,begin+lenminusone] to true. + */ +static inline void bitset_set_lenrange(uint64_t *bitmap, uint32_t start, + uint32_t lenminusone) { + uint32_t firstword = start / 64; + uint32_t endword = (start + lenminusone) / 64; + if (firstword == endword) { + bitmap[firstword] |= ((~UINT64_C(0)) >> ((63 - lenminusone) % 64)) + << (start % 64); + return; + } + uint64_t temp = bitmap[endword]; + bitmap[firstword] |= (~UINT64_C(0)) << (start % 64); + for (uint32_t i = firstword + 1; i < endword; i += 2) + bitmap[i] = bitmap[i + 1] = ~UINT64_C(0); + bitmap[endword] = + temp | (~UINT64_C(0)) >> (((~start + 1) - lenminusone - 1) % 64); +} + +/* + * Flip all the bits in indexes [begin,end). + */ +static inline void bitset_flip_range(uint64_t *bitmap, uint32_t start, + uint32_t end) { + if (start == end) return; + uint32_t firstword = start / 64; + uint32_t endword = (end - 1) / 64; + bitmap[firstword] ^= ~((~UINT64_C(0)) << (start % 64)); + for (uint32_t i = firstword; i < endword; i++) bitmap[i] = ~bitmap[i]; + bitmap[endword] ^= ((~UINT64_C(0)) >> ((~end + 1) % 64)); +} + +/* + * Set all bits in indexes [begin,end) to false. + */ +static inline void bitset_reset_range(uint64_t *bitmap, uint32_t start, + uint32_t end) { + if (start == end) return; + uint32_t firstword = start / 64; + uint32_t endword = (end - 1) / 64; + if (firstword == endword) { + bitmap[firstword] &= ~(((~UINT64_C(0)) << (start % 64)) & + ((~UINT64_C(0)) >> ((~end + 1) % 64))); + return; + } + bitmap[firstword] &= ~((~UINT64_C(0)) << (start % 64)); + for (uint32_t i = firstword + 1; i < endword; i++) bitmap[i] = UINT64_C(0); + bitmap[endword] &= ~((~UINT64_C(0)) >> ((~end + 1) % 64)); +} + +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out", values start at "base". + * + * The "out" pointer should be sufficient to store the actual number of bits + * set. + * + * Returns how many values were actually decoded. + * + * This function should only be expected to be faster than + * bitset_extract_setbits + * when the density of the bitset is high. + * + * This function uses AVX2 decoding. + */ +size_t bitset_extract_setbits_avx2(uint64_t *bitset, size_t length, void *vout, + size_t outcapacity, uint32_t base); + +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out", values start at "base". + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + */ +size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout, + uint32_t base); + +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out" as 16-bit integers, values start at "base" (can + *be set to zero) + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + * + * This function should only be expected to be faster than + *bitset_extract_setbits_uint16 + * when the density of the bitset is high. + * + * This function uses SSE decoding. + */ +size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length, + uint16_t *out, size_t outcapacity, + uint16_t base); + +/* + * Given a bitset containing "length" 64-bit words, write out the position + * of all the set bits to "out", values start at "base" + * (can be set to zero) + * + * The "out" pointer should be sufficient to store the actual number of bits + *set. + * + * Returns how many values were actually decoded. + */ +size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length, + uint16_t *out, uint16_t base); + +/* + * Given two bitsets containing "length" 64-bit words, write out the position + * of all the common set bits to "out", values start at "base" + * (can be set to zero) + * + * The "out" pointer should be sufficient to store the actual number of bits + * set. + * + * Returns how many values were actually decoded. + */ +size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1, + const uint64_t * __restrict__ bitset2, + size_t length, uint16_t *out, + uint16_t base); + +/* + * Given a bitset having cardinality card, set all bit values in the list (there + * are length of them) + * and return the updated cardinality. This evidently assumes that the bitset + * already contained data. + */ +uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, + const uint16_t *list, uint64_t length); +/* + * Given a bitset, set all bit values in the list (there + * are length of them). + */ +void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length); + +/* + * Given a bitset having cardinality card, unset all bit values in the list + * (there are length of them) + * and return the updated cardinality. This evidently assumes that the bitset + * already contained data. + */ +uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, + uint64_t length); + +/* + * Given a bitset having cardinality card, toggle all bit values in the list + * (there are length of them) + * and return the updated cardinality. This evidently assumes that the bitset + * already contained data. + */ + +uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card, + const uint16_t *list, uint64_t length); + +void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length); + +#ifdef USEAVX +/*** + * BEGIN Harley-Seal popcount functions. + */ + +/** + * Compute the population count of a 256-bit word + * This is not especially fast, but it is convenient as part of other functions. + */ +static inline __m256i popcount256(__m256i v) { + const __m256i lookuppos = _mm256_setr_epi8( + /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, + /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, + /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, + /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4, + + /* 0 */ 4 + 0, /* 1 */ 4 + 1, /* 2 */ 4 + 1, /* 3 */ 4 + 2, + /* 4 */ 4 + 1, /* 5 */ 4 + 2, /* 6 */ 4 + 2, /* 7 */ 4 + 3, + /* 8 */ 4 + 1, /* 9 */ 4 + 2, /* a */ 4 + 2, /* b */ 4 + 3, + /* c */ 4 + 2, /* d */ 4 + 3, /* e */ 4 + 3, /* f */ 4 + 4); + const __m256i lookupneg = _mm256_setr_epi8( + /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, + /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, + /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, + /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4, + + /* 0 */ 4 - 0, /* 1 */ 4 - 1, /* 2 */ 4 - 1, /* 3 */ 4 - 2, + /* 4 */ 4 - 1, /* 5 */ 4 - 2, /* 6 */ 4 - 2, /* 7 */ 4 - 3, + /* 8 */ 4 - 1, /* 9 */ 4 - 2, /* a */ 4 - 2, /* b */ 4 - 3, + /* c */ 4 - 2, /* d */ 4 - 3, /* e */ 4 - 3, /* f */ 4 - 4); + const __m256i low_mask = _mm256_set1_epi8(0x0f); + + const __m256i lo = _mm256_and_si256(v, low_mask); + const __m256i hi = _mm256_and_si256(_mm256_srli_epi16(v, 4), low_mask); + const __m256i popcnt1 = _mm256_shuffle_epi8(lookuppos, lo); + const __m256i popcnt2 = _mm256_shuffle_epi8(lookupneg, hi); + return _mm256_sad_epu8(popcnt1, popcnt2); +} + +/** + * Simple CSA over 256 bits + */ +static inline void CSA(__m256i *h, __m256i *l, __m256i a, __m256i b, + __m256i c) { + const __m256i u = _mm256_xor_si256(a, b); + *h = _mm256_or_si256(_mm256_and_si256(a, b), _mm256_and_si256(u, c)); + *l = _mm256_xor_si256(u, c); +} + +/** + * Fast Harley-Seal AVX population count function + */ +inline static uint64_t avx2_harley_seal_popcount256(const __m256i *data, + const uint64_t size) { + __m256i total = _mm256_setzero_si256(); + __m256i ones = _mm256_setzero_si256(); + __m256i twos = _mm256_setzero_si256(); + __m256i fours = _mm256_setzero_si256(); + __m256i eights = _mm256_setzero_si256(); + __m256i sixteens = _mm256_setzero_si256(); + __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; + + const uint64_t limit = size - size % 16; + uint64_t i = 0; + + for (; i < limit; i += 16) { + CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i), + _mm256_lddqu_si256(data + i + 1)); + CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 2), + _mm256_lddqu_si256(data + i + 3)); + CSA(&foursA, &twos, twos, twosA, twosB); + CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 4), + _mm256_lddqu_si256(data + i + 5)); + CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 6), + _mm256_lddqu_si256(data + i + 7)); + CSA(&foursB, &twos, twos, twosA, twosB); + CSA(&eightsA, &fours, fours, foursA, foursB); + CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 8), + _mm256_lddqu_si256(data + i + 9)); + CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 10), + _mm256_lddqu_si256(data + i + 11)); + CSA(&foursA, &twos, twos, twosA, twosB); + CSA(&twosA, &ones, ones, _mm256_lddqu_si256(data + i + 12), + _mm256_lddqu_si256(data + i + 13)); + CSA(&twosB, &ones, ones, _mm256_lddqu_si256(data + i + 14), + _mm256_lddqu_si256(data + i + 15)); + CSA(&foursB, &twos, twos, twosA, twosB); + CSA(&eightsB, &fours, fours, foursA, foursB); + CSA(&sixteens, &eights, eights, eightsA, eightsB); + + total = _mm256_add_epi64(total, popcount256(sixteens)); + } + + total = _mm256_slli_epi64(total, 4); // * 16 + total = _mm256_add_epi64( + total, _mm256_slli_epi64(popcount256(eights), 3)); // += 8 * ... + total = _mm256_add_epi64( + total, _mm256_slli_epi64(popcount256(fours), 2)); // += 4 * ... + total = _mm256_add_epi64( + total, _mm256_slli_epi64(popcount256(twos), 1)); // += 2 * ... + total = _mm256_add_epi64(total, popcount256(ones)); + for (; i < size; i++) + total = + _mm256_add_epi64(total, popcount256(_mm256_lddqu_si256(data + i))); + + return (uint64_t)(_mm256_extract_epi64(total, 0)) + + (uint64_t)(_mm256_extract_epi64(total, 1)) + + (uint64_t)(_mm256_extract_epi64(total, 2)) + + (uint64_t)(_mm256_extract_epi64(total, 3)); +} + +#define AVXPOPCNTFNC(opname, avx_intrinsic) \ + static inline uint64_t avx2_harley_seal_popcount256_##opname( \ + const __m256i *data1, const __m256i *data2, const uint64_t size) { \ + __m256i total = _mm256_setzero_si256(); \ + __m256i ones = _mm256_setzero_si256(); \ + __m256i twos = _mm256_setzero_si256(); \ + __m256i fours = _mm256_setzero_si256(); \ + __m256i eights = _mm256_setzero_si256(); \ + __m256i sixteens = _mm256_setzero_si256(); \ + __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ + __m256i A1, A2; \ + const uint64_t limit = size - size % 16; \ + uint64_t i = 0; \ + for (; i < limit; i += 16) { \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ + _mm256_lddqu_si256(data2 + i)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ + _mm256_lddqu_si256(data2 + i + 1)); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ + _mm256_lddqu_si256(data2 + i + 2)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ + _mm256_lddqu_si256(data2 + i + 3)); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursA, &twos, twos, twosA, twosB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ + _mm256_lddqu_si256(data2 + i + 4)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ + _mm256_lddqu_si256(data2 + i + 5)); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ + _mm256_lddqu_si256(data2 + i + 6)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ + _mm256_lddqu_si256(data2 + i + 7)); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursB, &twos, twos, twosA, twosB); \ + CSA(&eightsA, &fours, fours, foursA, foursB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ + _mm256_lddqu_si256(data2 + i + 8)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ + _mm256_lddqu_si256(data2 + i + 9)); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ + _mm256_lddqu_si256(data2 + i + 10)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ + _mm256_lddqu_si256(data2 + i + 11)); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursA, &twos, twos, twosA, twosB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ + _mm256_lddqu_si256(data2 + i + 12)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ + _mm256_lddqu_si256(data2 + i + 13)); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ + _mm256_lddqu_si256(data2 + i + 14)); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ + _mm256_lddqu_si256(data2 + i + 15)); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursB, &twos, twos, twosA, twosB); \ + CSA(&eightsB, &fours, fours, foursA, foursB); \ + CSA(&sixteens, &eights, eights, eightsA, eightsB); \ + total = _mm256_add_epi64(total, popcount256(sixteens)); \ + } \ + total = _mm256_slli_epi64(total, 4); \ + total = _mm256_add_epi64(total, \ + _mm256_slli_epi64(popcount256(eights), 3)); \ + total = \ + _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ + total = \ + _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ + total = _mm256_add_epi64(total, popcount256(ones)); \ + for (; i < size; i++) { \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ + _mm256_lddqu_si256(data2 + i)); \ + total = _mm256_add_epi64(total, popcount256(A1)); \ + } \ + return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ + (uint64_t)(_mm256_extract_epi64(total, 1)) + \ + (uint64_t)(_mm256_extract_epi64(total, 2)) + \ + (uint64_t)(_mm256_extract_epi64(total, 3)); \ + } \ + static inline uint64_t avx2_harley_seal_popcount256andstore_##opname( \ + const __m256i *__restrict__ data1, const __m256i *__restrict__ data2, \ + __m256i *__restrict__ out, const uint64_t size) { \ + __m256i total = _mm256_setzero_si256(); \ + __m256i ones = _mm256_setzero_si256(); \ + __m256i twos = _mm256_setzero_si256(); \ + __m256i fours = _mm256_setzero_si256(); \ + __m256i eights = _mm256_setzero_si256(); \ + __m256i sixteens = _mm256_setzero_si256(); \ + __m256i twosA, twosB, foursA, foursB, eightsA, eightsB; \ + __m256i A1, A2; \ + const uint64_t limit = size - size % 16; \ + uint64_t i = 0; \ + for (; i < limit; i += 16) { \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ + _mm256_lddqu_si256(data2 + i)); \ + _mm256_storeu_si256(out + i, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 1), \ + _mm256_lddqu_si256(data2 + i + 1)); \ + _mm256_storeu_si256(out + i + 1, A2); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 2), \ + _mm256_lddqu_si256(data2 + i + 2)); \ + _mm256_storeu_si256(out + i + 2, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 3), \ + _mm256_lddqu_si256(data2 + i + 3)); \ + _mm256_storeu_si256(out + i + 3, A2); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursA, &twos, twos, twosA, twosB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 4), \ + _mm256_lddqu_si256(data2 + i + 4)); \ + _mm256_storeu_si256(out + i + 4, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 5), \ + _mm256_lddqu_si256(data2 + i + 5)); \ + _mm256_storeu_si256(out + i + 5, A2); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 6), \ + _mm256_lddqu_si256(data2 + i + 6)); \ + _mm256_storeu_si256(out + i + 6, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 7), \ + _mm256_lddqu_si256(data2 + i + 7)); \ + _mm256_storeu_si256(out + i + 7, A2); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursB, &twos, twos, twosA, twosB); \ + CSA(&eightsA, &fours, fours, foursA, foursB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 8), \ + _mm256_lddqu_si256(data2 + i + 8)); \ + _mm256_storeu_si256(out + i + 8, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 9), \ + _mm256_lddqu_si256(data2 + i + 9)); \ + _mm256_storeu_si256(out + i + 9, A2); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 10), \ + _mm256_lddqu_si256(data2 + i + 10)); \ + _mm256_storeu_si256(out + i + 10, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 11), \ + _mm256_lddqu_si256(data2 + i + 11)); \ + _mm256_storeu_si256(out + i + 11, A2); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursA, &twos, twos, twosA, twosB); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 12), \ + _mm256_lddqu_si256(data2 + i + 12)); \ + _mm256_storeu_si256(out + i + 12, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 13), \ + _mm256_lddqu_si256(data2 + i + 13)); \ + _mm256_storeu_si256(out + i + 13, A2); \ + CSA(&twosA, &ones, ones, A1, A2); \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 14), \ + _mm256_lddqu_si256(data2 + i + 14)); \ + _mm256_storeu_si256(out + i + 14, A1); \ + A2 = avx_intrinsic(_mm256_lddqu_si256(data1 + i + 15), \ + _mm256_lddqu_si256(data2 + i + 15)); \ + _mm256_storeu_si256(out + i + 15, A2); \ + CSA(&twosB, &ones, ones, A1, A2); \ + CSA(&foursB, &twos, twos, twosA, twosB); \ + CSA(&eightsB, &fours, fours, foursA, foursB); \ + CSA(&sixteens, &eights, eights, eightsA, eightsB); \ + total = _mm256_add_epi64(total, popcount256(sixteens)); \ + } \ + total = _mm256_slli_epi64(total, 4); \ + total = _mm256_add_epi64(total, \ + _mm256_slli_epi64(popcount256(eights), 3)); \ + total = \ + _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(fours), 2)); \ + total = \ + _mm256_add_epi64(total, _mm256_slli_epi64(popcount256(twos), 1)); \ + total = _mm256_add_epi64(total, popcount256(ones)); \ + for (; i < size; i++) { \ + A1 = avx_intrinsic(_mm256_lddqu_si256(data1 + i), \ + _mm256_lddqu_si256(data2 + i)); \ + _mm256_storeu_si256(out + i, A1); \ + total = _mm256_add_epi64(total, popcount256(A1)); \ + } \ + return (uint64_t)(_mm256_extract_epi64(total, 0)) + \ + (uint64_t)(_mm256_extract_epi64(total, 1)) + \ + (uint64_t)(_mm256_extract_epi64(total, 2)) + \ + (uint64_t)(_mm256_extract_epi64(total, 3)); \ + } + +AVXPOPCNTFNC(or, _mm256_or_si256) +AVXPOPCNTFNC(union, _mm256_or_si256) +AVXPOPCNTFNC(and, _mm256_and_si256) +AVXPOPCNTFNC(intersection, _mm256_and_si256) +AVXPOPCNTFNC (xor, _mm256_xor_si256) +AVXPOPCNTFNC(andnot, _mm256_andnot_si256) + +/*** + * END Harley-Seal popcount functions. + */ + +#endif // USEAVX + +#endif +/* end file include/roaring/bitset_util.h */ +/* begin file include/roaring/containers/array.h */ +/* + * array.h + * + */ + +#ifndef INCLUDE_CONTAINERS_ARRAY_H_ +#define INCLUDE_CONTAINERS_ARRAY_H_ + +#include + + +/* Containers with DEFAULT_MAX_SIZE or less integers should be arrays */ +enum { DEFAULT_MAX_SIZE = 4096 }; + +/* struct array_container - sparse representation of a bitmap + * + * @cardinality: number of indices in `array` (and the bitmap) + * @capacity: allocated size of `array` + * @array: sorted list of integers + */ +struct array_container_s { + int32_t cardinality; + int32_t capacity; + uint16_t *array; +}; + +typedef struct array_container_s array_container_t; + +/* Create a new array with default. Return NULL in case of failure. See also + * array_container_create_given_capacity. */ +array_container_t *array_container_create(void); + +/* Create a new array with a specified capacity size. Return NULL in case of + * failure. */ +array_container_t *array_container_create_given_capacity(int32_t size); + +/* Create a new array containing all values in [min,max). */ +array_container_t * array_container_create_range(uint32_t min, uint32_t max); + +/* + * Shrink the capacity to the actual size, return the number of bytes saved. + */ +int array_container_shrink_to_fit(array_container_t *src); + +/* Free memory owned by `array'. */ +void array_container_free(array_container_t *array); + +/* Duplicate container */ +array_container_t *array_container_clone(const array_container_t *src); + +int32_t array_container_serialize(const array_container_t *container, + char *buf) WARN_UNUSED; + +uint32_t array_container_serialization_len(const array_container_t *container); + +void *array_container_deserialize(const char *buf, size_t buf_len); + +/* Get the cardinality of `array'. */ +static inline int array_container_cardinality(const array_container_t *array) { + return array->cardinality; +} + +static inline bool array_container_nonzero_cardinality( + const array_container_t *array) { + return array->cardinality > 0; +} + +/* Copy one container into another. We assume that they are distinct. */ +void array_container_copy(const array_container_t *src, array_container_t *dst); + +/* Add all the values in [min,max) (included) at a distance k*step from min. + The container must have a size less or equal to DEFAULT_MAX_SIZE after this + addition. */ +void array_container_add_from_range(array_container_t *arr, uint32_t min, + uint32_t max, uint16_t step); + +/* Set the cardinality to zero (does not release memory). */ +static inline void array_container_clear(array_container_t *array) { + array->cardinality = 0; +} + +static inline bool array_container_empty(const array_container_t *array) { + return array->cardinality == 0; +} + +/* check whether the cardinality is equal to the capacity (this does not mean +* that it contains 1<<16 elements) */ +static inline bool array_container_full(const array_container_t *array) { + return array->cardinality == array->capacity; +} + + +/* Compute the union of `src_1' and `src_2' and write the result to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void array_container_union(const array_container_t *src_1, + const array_container_t *src_2, + array_container_t *dst); + +/* symmetric difference, see array_container_union */ +void array_container_xor(const array_container_t *array_1, + const array_container_t *array_2, + array_container_t *out); + +/* Computes the intersection of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void array_container_intersection(const array_container_t *src_1, + const array_container_t *src_2, + array_container_t *dst); + +/* Check whether src_1 and src_2 intersect. */ +bool array_container_intersect(const array_container_t *src_1, + const array_container_t *src_2); + + +/* computers the size of the intersection between two arrays. + */ +int array_container_intersection_cardinality(const array_container_t *src_1, + const array_container_t *src_2); + +/* computes the intersection of array1 and array2 and write the result to + * array1. + * */ +void array_container_intersection_inplace(array_container_t *src_1, + const array_container_t *src_2); + +/* + * Write out the 16-bit integers contained in this container as a list of 32-bit + * integers using base + * as the starting value (it might be expected that base has zeros in its 16 + * least significant bits). + * The function returns the number of values written. + * The caller is responsible for allocating enough memory in out. + */ +int array_container_to_uint32_array(void *vout, const array_container_t *cont, + uint32_t base); + +/* Compute the number of runs */ +int32_t array_container_number_of_runs(const array_container_t *a); + +/* + * Print this container using printf (useful for debugging). + */ +void array_container_printf(const array_container_t *v); + +/* + * Print this container using printf as a comma-separated list of 32-bit + * integers starting at base. + */ +void array_container_printf_as_uint32_array(const array_container_t *v, + uint32_t base); + +/** + * Return the serialized size in bytes of a container having cardinality "card". + */ +static inline int32_t array_container_serialized_size_in_bytes(int32_t card) { + return card * 2 + 2; +} + +/** + * Increase capacity to at least min. + * Whether the existing data needs to be copied over depends on the "preserve" + * parameter. If preserve is false, then the new content will be uninitialized, + * otherwise the old content is copied. + */ +void array_container_grow(array_container_t *container, int32_t min, + bool preserve); + +bool array_container_iterate(const array_container_t *cont, uint32_t base, + roaring_iterator iterator, void *ptr); +bool array_container_iterate64(const array_container_t *cont, uint32_t base, + roaring_iterator64 iterator, uint64_t high_bits, + void *ptr); + +/** + * Writes the underlying array to buf, outputs how many bytes were written. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes written should be + * array_container_size_in_bytes(container). + * + */ +int32_t array_container_write(const array_container_t *container, char *buf); +/** + * Reads the instance from buf, outputs how many bytes were read. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes read should be array_container_size_in_bytes(container). + * You need to provide the (known) cardinality. + */ +int32_t array_container_read(int32_t cardinality, array_container_t *container, + const char *buf); + +/** + * Return the serialized size in bytes of a container (see + * bitset_container_write) + * This is meant to be compatible with the Java and Go versions of Roaring and + * assumes + * that the cardinality of the container is already known. + * + */ +static inline int32_t array_container_size_in_bytes( + const array_container_t *container) { + return container->cardinality * sizeof(uint16_t); +} + +/** + * Return true if the two arrays have the same content. + */ +static inline bool array_container_equals( + const array_container_t *container1, + const array_container_t *container2) { + + if (container1->cardinality != container2->cardinality) { + return false; + } + return memequals(container1->array, container2->array, container1->cardinality*2); +} + +/** + * Return true if container1 is a subset of container2. + */ +bool array_container_is_subset(const array_container_t *container1, + const array_container_t *container2); + +/** + * If the element of given rank is in this container, supposing that the first + * element has rank start_rank, then the function returns true and sets element + * accordingly. + * Otherwise, it returns false and update start_rank. + */ +static inline bool array_container_select(const array_container_t *container, + uint32_t *start_rank, uint32_t rank, + uint32_t *element) { + int card = array_container_cardinality(container); + if (*start_rank + card <= rank) { + *start_rank += card; + return false; + } else { + *element = container->array[rank - *start_rank]; + return true; + } +} + +/* Computes the difference of array1 and array2 and write the result + * to array out. + * Array out does not need to be distinct from array_1 + */ +void array_container_andnot(const array_container_t *array_1, + const array_container_t *array_2, + array_container_t *out); + +/* Append x to the set. Assumes that the value is larger than any preceding + * values. */ +static inline void array_container_append(array_container_t *arr, + uint16_t pos) { + const int32_t capacity = arr->capacity; + + if (array_container_full(arr)) { + array_container_grow(arr, capacity + 1, true); + } + + arr->array[arr->cardinality++] = pos; +} + +/** + * Add value to the set if final cardinality doesn't exceed max_cardinality. + * Return code: + * 1 -- value was added + * 0 -- value was already present + * -1 -- value was not added because cardinality would exceed max_cardinality + */ +static inline int array_container_try_add(array_container_t *arr, uint16_t value, + int32_t max_cardinality) { + const int32_t cardinality = arr->cardinality; + + // best case, we can append. + if ((array_container_empty(arr) || arr->array[cardinality - 1] < value) && + cardinality < max_cardinality) { + array_container_append(arr, value); + return 1; + } + + const int32_t loc = binarySearch(arr->array, cardinality, value); + + if (loc >= 0) { + return 0; + } else if (cardinality < max_cardinality) { + if (array_container_full(arr)) { + array_container_grow(arr, arr->capacity + 1, true); + } + const int32_t insert_idx = -loc - 1; + memmove(arr->array + insert_idx + 1, arr->array + insert_idx, + (cardinality - insert_idx) * sizeof(uint16_t)); + arr->array[insert_idx] = value; + arr->cardinality++; + return 1; + } else { + return -1; + } +} + +/* Add value to the set. Returns true if x was not already present. */ +static inline bool array_container_add(array_container_t *arr, uint16_t value) { + return array_container_try_add(arr, value, INT32_MAX) == 1; +} + +/* Remove x from the set. Returns true if x was present. */ +static inline bool array_container_remove(array_container_t *arr, + uint16_t pos) { + const int32_t idx = binarySearch(arr->array, arr->cardinality, pos); + const bool is_present = idx >= 0; + if (is_present) { + memmove(arr->array + idx, arr->array + idx + 1, + (arr->cardinality - idx - 1) * sizeof(uint16_t)); + arr->cardinality--; + } + + return is_present; +} + +/* Check whether x is present. */ +static inline bool array_container_contains(const array_container_t *arr, + uint16_t pos) { + // return binarySearch(arr->array, arr->cardinality, pos) >= 0; + // binary search with fallback to linear search for short ranges + int32_t low = 0; + const uint16_t * carr = (const uint16_t *) arr->array; + int32_t high = arr->cardinality - 1; + // while (high - low >= 0) { + while(high >= low + 16) { + int32_t middleIndex = (low + high)>>1; + uint16_t middleValue = carr[middleIndex]; + if (middleValue < pos) { + low = middleIndex + 1; + } else if (middleValue > pos) { + high = middleIndex - 1; + } else { + return true; + } + } + + for (int i=low; i <= high; i++) { + uint16_t v = carr[i]; + if (v == pos) { + return true; + } + if ( v > pos ) return false; + } + return false; + +} + +//* Check whether a range of values from range_start (included) to range_end (excluded) is present. */ +static inline bool array_container_contains_range(const array_container_t *arr, + uint32_t range_start, uint32_t range_end) { + + const uint16_t rs_included = range_start; + const uint16_t re_included = range_end - 1; + + const uint16_t *carr = (const uint16_t *) arr->array; + + const int32_t start = advanceUntil(carr, -1, arr->cardinality, rs_included); + const int32_t end = advanceUntil(carr, start - 1, arr->cardinality, re_included); + + return (start < arr->cardinality) && (end < arr->cardinality) + && (((uint16_t)(end - start)) == re_included - rs_included) + && (carr[start] == rs_included) && (carr[end] == re_included); +} + +/* Returns the smallest value (assumes not empty) */ +static inline uint16_t array_container_minimum(const array_container_t *arr) { + if (arr->cardinality == 0) return 0; + return arr->array[0]; +} + +/* Returns the largest value (assumes not empty) */ +static inline uint16_t array_container_maximum(const array_container_t *arr) { + if (arr->cardinality == 0) return 0; + return arr->array[arr->cardinality - 1]; +} + +/* Returns the number of values equal or smaller than x */ +static inline int array_container_rank(const array_container_t *arr, uint16_t x) { + const int32_t idx = binarySearch(arr->array, arr->cardinality, x); + const bool is_present = idx >= 0; + if (is_present) { + return idx + 1; + } else { + return -idx - 1; + } +} + +/* Returns the index of the first value equal or smaller than x, or -1 */ +static inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x) { + const int32_t idx = binarySearch(arr->array, arr->cardinality, x); + const bool is_present = idx >= 0; + if (is_present) { + return idx; + } else { + int32_t candidate = - idx - 1; + if(candidate < arr->cardinality) return candidate; + return -1; + } +} + +/* + * Adds all values in range [min,max] using hint: + * nvals_less is the number of array values less than $min + * nvals_greater is the number of array values greater than $max + */ +static inline void array_container_add_range_nvals(array_container_t *array, + uint32_t min, uint32_t max, + int32_t nvals_less, + int32_t nvals_greater) { + int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; + if (union_cardinality > array->capacity) { + array_container_grow(array, union_cardinality, true); + } + memmove(&(array->array[union_cardinality - nvals_greater]), + &(array->array[array->cardinality - nvals_greater]), + nvals_greater * sizeof(uint16_t)); + for (uint32_t i = 0; i <= max - min; i++) { + array->array[nvals_less + i] = min + i; + } + array->cardinality = union_cardinality; +} + +/** + * Adds all values in range [min,max]. + */ +static inline void array_container_add_range(array_container_t *array, + uint32_t min, uint32_t max) { + int32_t nvals_greater = count_greater(array->array, array->cardinality, max); + int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); + array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); +} + +/* + * Removes all elements array[pos] .. array[pos+count-1] + */ +static inline void array_container_remove_range(array_container_t *array, + uint32_t pos, uint32_t count) { + if (count != 0) { + memmove(&(array->array[pos]), &(array->array[pos+count]), + (array->cardinality - pos - count) * sizeof(uint16_t)); + array->cardinality -= count; + } +} + +#endif /* INCLUDE_CONTAINERS_ARRAY_H_ */ +/* end file include/roaring/containers/array.h */ +/* begin file include/roaring/containers/bitset.h */ +/* + * bitset.h + * + */ + +#ifndef INCLUDE_CONTAINERS_BITSET_H_ +#define INCLUDE_CONTAINERS_BITSET_H_ + +#include +#include + +#ifdef USEAVX +#define ALIGN_AVX __attribute__((aligned(sizeof(__m256i)))) +#else +#define ALIGN_AVX +#endif + +enum { + BITSET_CONTAINER_SIZE_IN_WORDS = (1 << 16) / 64, + BITSET_UNKNOWN_CARDINALITY = -1 +}; + +struct bitset_container_s { + int32_t cardinality; + uint64_t *array; +}; + +typedef struct bitset_container_s bitset_container_t; + +/* Create a new bitset. Return NULL in case of failure. */ +bitset_container_t *bitset_container_create(void); + +/* Free memory. */ +void bitset_container_free(bitset_container_t *bitset); + +/* Clear bitset (sets bits to 0). */ +void bitset_container_clear(bitset_container_t *bitset); + +/* Set all bits to 1. */ +void bitset_container_set_all(bitset_container_t *bitset); + +/* Duplicate bitset */ +bitset_container_t *bitset_container_clone(const bitset_container_t *src); + +int32_t bitset_container_serialize(const bitset_container_t *container, + char *buf) WARN_UNUSED; + +uint32_t bitset_container_serialization_len(void); + +void *bitset_container_deserialize(const char *buf, size_t buf_len); + +/* Set the bit in [begin,end). WARNING: as of April 2016, this method is slow + * and + * should not be used in performance-sensitive code. Ever. */ +void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, + uint32_t end); + +#ifdef ASMBITMANIPOPTIMIZATION +/* Set the ith bit. */ +static inline void bitset_container_set(bitset_container_t *bitset, + uint16_t pos) { + uint64_t shift = 6; + uint64_t offset; + uint64_t p = pos; + ASM_SHIFT_RIGHT(p, shift, offset); + uint64_t load = bitset->array[offset]; + ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality); + bitset->array[offset] = load; +} + +/* Unset the ith bit. */ +static inline void bitset_container_unset(bitset_container_t *bitset, + uint16_t pos) { + uint64_t shift = 6; + uint64_t offset; + uint64_t p = pos; + ASM_SHIFT_RIGHT(p, shift, offset); + uint64_t load = bitset->array[offset]; + ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); + bitset->array[offset] = load; +} + +/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower + * than bitset_container_set. */ +static inline bool bitset_container_add(bitset_container_t *bitset, + uint16_t pos) { + uint64_t shift = 6; + uint64_t offset; + uint64_t p = pos; + ASM_SHIFT_RIGHT(p, shift, offset); + uint64_t load = bitset->array[offset]; + // could be possibly slightly further optimized + const int32_t oldcard = bitset->cardinality; + ASM_SET_BIT_INC_WAS_CLEAR(load, p, bitset->cardinality); + bitset->array[offset] = load; + return bitset->cardinality - oldcard; +} + +/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be + * slower than bitset_container_unset. */ +static inline bool bitset_container_remove(bitset_container_t *bitset, + uint16_t pos) { + uint64_t shift = 6; + uint64_t offset; + uint64_t p = pos; + ASM_SHIFT_RIGHT(p, shift, offset); + uint64_t load = bitset->array[offset]; + // could be possibly slightly further optimized + const int32_t oldcard = bitset->cardinality; + ASM_CLEAR_BIT_DEC_WAS_SET(load, p, bitset->cardinality); + bitset->array[offset] = load; + return oldcard - bitset->cardinality; +} + +/* Get the value of the ith bit. */ +static inline bool bitset_container_get(const bitset_container_t *bitset, + uint16_t pos) { + uint64_t word = bitset->array[pos >> 6]; + const uint64_t p = pos; + ASM_INPLACESHIFT_RIGHT(word, p); + return word & 1; +} + +#else + +/* Set the ith bit. */ +static inline void bitset_container_set(bitset_container_t *bitset, + uint16_t pos) { + const uint64_t old_word = bitset->array[pos >> 6]; + const int index = pos & 63; + const uint64_t new_word = old_word | (UINT64_C(1) << index); + bitset->cardinality += (uint32_t)((old_word ^ new_word) >> index); + bitset->array[pos >> 6] = new_word; +} + +/* Unset the ith bit. */ +static inline void bitset_container_unset(bitset_container_t *bitset, + uint16_t pos) { + const uint64_t old_word = bitset->array[pos >> 6]; + const int index = pos & 63; + const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); + bitset->cardinality -= (uint32_t)((old_word ^ new_word) >> index); + bitset->array[pos >> 6] = new_word; +} + +/* Add `pos' to `bitset'. Returns true if `pos' was not present. Might be slower + * than bitset_container_set. */ +static inline bool bitset_container_add(bitset_container_t *bitset, + uint16_t pos) { + const uint64_t old_word = bitset->array[pos >> 6]; + const int index = pos & 63; + const uint64_t new_word = old_word | (UINT64_C(1) << index); + const uint64_t increment = (old_word ^ new_word) >> index; + bitset->cardinality += (uint32_t)increment; + bitset->array[pos >> 6] = new_word; + return increment > 0; +} + +/* Remove `pos' from `bitset'. Returns true if `pos' was present. Might be + * slower than bitset_container_unset. */ +static inline bool bitset_container_remove(bitset_container_t *bitset, + uint16_t pos) { + const uint64_t old_word = bitset->array[pos >> 6]; + const int index = pos & 63; + const uint64_t new_word = old_word & (~(UINT64_C(1) << index)); + const uint64_t increment = (old_word ^ new_word) >> index; + bitset->cardinality -= (uint32_t)increment; + bitset->array[pos >> 6] = new_word; + return increment > 0; +} + +/* Get the value of the ith bit. */ +static inline bool bitset_container_get(const bitset_container_t *bitset, + uint16_t pos) { + const uint64_t word = bitset->array[pos >> 6]; + return (word >> (pos & 63)) & 1; +} + +#endif + +/* +* Check if all bits are set in a range of positions from pos_start (included) to +* pos_end (excluded). +*/ +static inline bool bitset_container_get_range(const bitset_container_t *bitset, + uint32_t pos_start, uint32_t pos_end) { + + const uint32_t start = pos_start >> 6; + const uint32_t end = pos_end >> 6; + + const uint64_t first = ~((1ULL << (pos_start & 0x3F)) - 1); + const uint64_t last = (1ULL << (pos_end & 0x3F)) - 1; + + if (start == end) return ((bitset->array[end] & first & last) == (first & last)); + if ((bitset->array[start] & first) != first) return false; + + if ((end < BITSET_CONTAINER_SIZE_IN_WORDS) && ((bitset->array[end] & last) != last)){ + + return false; + } + + for (uint16_t i = start + 1; (i < BITSET_CONTAINER_SIZE_IN_WORDS) && (i < end); ++i){ + + if (bitset->array[i] != UINT64_C(0xFFFFFFFFFFFFFFFF)) return false; + } + + return true; +} + +/* Check whether `bitset' is present in `array'. Calls bitset_container_get. */ +static inline bool bitset_container_contains(const bitset_container_t *bitset, + uint16_t pos) { + return bitset_container_get(bitset, pos); +} + +/* +* Check whether a range of bits from position `pos_start' (included) to `pos_end' (excluded) +* is present in `bitset'. Calls bitset_container_get_all. +*/ +static inline bool bitset_container_contains_range(const bitset_container_t *bitset, + uint32_t pos_start, uint32_t pos_end) { + return bitset_container_get_range(bitset, pos_start, pos_end); +} + +/* Get the number of bits set */ +static inline int bitset_container_cardinality( + const bitset_container_t *bitset) { + return bitset->cardinality; +} + + + + +/* Copy one container into another. We assume that they are distinct. */ +void bitset_container_copy(const bitset_container_t *source, + bitset_container_t *dest); + +/* Add all the values [min,max) at a distance k*step from min: min, + * min+step,.... */ +void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, + uint32_t max, uint16_t step); + +/* Get the number of bits set (force computation). This does not modify bitset. + * To update the cardinality, you should do + * bitset->cardinality = bitset_container_compute_cardinality(bitset).*/ +int bitset_container_compute_cardinality(const bitset_container_t *bitset); + +/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), + when the cardinality is unknown, it is computed and stored in the struct */ +static inline bool bitset_container_nonzero_cardinality( + bitset_container_t *bitset) { + // account for laziness + if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { + // could bail early instead with a nonzero result + bitset->cardinality = bitset_container_compute_cardinality(bitset); + } + return bitset->cardinality > 0; +} + +/* Check whether this bitset is empty (see bitset_container_nonzero_cardinality for the reverse), + * it never modifies the bitset struct. */ +static inline bool bitset_container_empty( + const bitset_container_t *bitset) { + if (bitset->cardinality == BITSET_UNKNOWN_CARDINALITY) { + for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { + if((bitset->array[i]) != 0) return false; + } + return true; + } + return bitset->cardinality == 0; +} + + +/* Get whether there is at least one bit set (see bitset_container_empty for the reverse), + the bitset is never modified */ +static inline bool bitset_container_const_nonzero_cardinality( + const bitset_container_t *bitset) { + return !bitset_container_empty(bitset); +} + +/* + * Check whether the two bitsets intersect + */ +bool bitset_container_intersect(const bitset_container_t *src_1, + const bitset_container_t *src_2); + +/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the + * cardinality. */ +int bitset_container_or(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the union of bitsets `src_1' and `src_2' and return the cardinality. + */ +int bitset_container_or_justcard(const bitset_container_t *src_1, + const bitset_container_t *src_2); + +/* Computes the union of bitsets `src_1' and `src_2' into `dst' and return the + * cardinality. Same as bitset_container_or. */ +int bitset_container_union(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the union of bitsets `src_1' and `src_2' and return the + * cardinality. Same as bitset_container_or_justcard. */ +int bitset_container_union_justcard(const bitset_container_t *src_1, + const bitset_container_t *src_2); + +/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not + * update the cardinality. Provided to optimize chained operations. */ +int bitset_container_or_nocard(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the union of bitsets `src_1' and `src_2' into `dst', but does not + * update the cardinality. Same as bitset_container_or_nocard */ +int bitset_container_union_nocard(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and + * return the cardinality. */ +int bitset_container_and(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the intersection of bitsets `src_1' and `src_2' and return the + * cardinality. */ +int bitset_container_and_justcard(const bitset_container_t *src_1, + const bitset_container_t *src_2); + +/* Computes the intersection of bitsets `src_1' and `src_2' into `dst' and + * return the cardinality. Same as bitset_container_and. */ +int bitset_container_intersection(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the intersection of bitsets `src_1' and `src_2' and return the + * cardinality. Same as bitset_container_and_justcard. */ +int bitset_container_intersection_justcard(const bitset_container_t *src_1, + const bitset_container_t *src_2); + +/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does + * not update the cardinality. Provided to optimize chained operations. */ +int bitset_container_and_nocard(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the intersection of bitsets `src_1' and `src_2' into `dst', but does + * not update the cardinality. Same as bitset_container_and_nocard */ +int bitset_container_intersection_nocard(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst' and + * return the cardinality. */ +int bitset_container_xor(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the exclusive or of bitsets `src_1' and `src_2' and return the + * cardinality. */ +int bitset_container_xor_justcard(const bitset_container_t *src_1, + const bitset_container_t *src_2); + +/* Computes the exclusive or of bitsets `src_1' and `src_2' into `dst', but does + * not update the cardinality. Provided to optimize chained operations. */ +int bitset_container_xor_nocard(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the and not of bitsets `src_1' and `src_2' into `dst' and return the + * cardinality. */ +int bitset_container_andnot(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Computes the and not of bitsets `src_1' and `src_2' and return the + * cardinality. */ +int bitset_container_andnot_justcard(const bitset_container_t *src_1, + const bitset_container_t *src_2); + +/* Computes the and not or of bitsets `src_1' and `src_2' into `dst', but does + * not update the cardinality. Provided to optimize chained operations. */ +int bitset_container_andnot_nocard(const bitset_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* + * Write out the 16-bit integers contained in this container as a list of 32-bit + * integers using base + * as the starting value (it might be expected that base has zeros in its 16 + * least significant bits). + * The function returns the number of values written. + * The caller is responsible for allocating enough memory in out. + * The out pointer should point to enough memory (the cardinality times 32 + * bits). + */ +int bitset_container_to_uint32_array(void *out, const bitset_container_t *cont, + uint32_t base); + +/* + * Print this container using printf (useful for debugging). + */ +void bitset_container_printf(const bitset_container_t *v); + +/* + * Print this container using printf as a comma-separated list of 32-bit + * integers starting at base. + */ +void bitset_container_printf_as_uint32_array(const bitset_container_t *v, + uint32_t base); + +/** + * Return the serialized size in bytes of a container. + */ +static inline int32_t bitset_container_serialized_size_in_bytes(void) { + return BITSET_CONTAINER_SIZE_IN_WORDS * 8; +} + +/** + * Return the the number of runs. + */ +int bitset_container_number_of_runs(bitset_container_t *b); + +bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, + roaring_iterator iterator, void *ptr); +bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, + roaring_iterator64 iterator, uint64_t high_bits, + void *ptr); + +/** + * Writes the underlying array to buf, outputs how many bytes were written. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes written should be + * bitset_container_size_in_bytes(container). + */ +int32_t bitset_container_write(const bitset_container_t *container, char *buf); + +/** + * Reads the instance from buf, outputs how many bytes were read. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes read should be bitset_container_size_in_bytes(container). + * You need to provide the (known) cardinality. + */ +int32_t bitset_container_read(int32_t cardinality, + bitset_container_t *container, const char *buf); +/** + * Return the serialized size in bytes of a container (see + * bitset_container_write). + * This is meant to be compatible with the Java and Go versions of Roaring and + * assumes + * that the cardinality of the container is already known or can be computed. + */ +static inline int32_t bitset_container_size_in_bytes( + const bitset_container_t *container) { + (void)container; + return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); +} + +/** + * Return true if the two containers have the same content. + */ +bool bitset_container_equals(const bitset_container_t *container1, + const bitset_container_t *container2); + +/** +* Return true if container1 is a subset of container2. +*/ +bool bitset_container_is_subset(const bitset_container_t *container1, + const bitset_container_t *container2); + +/** + * If the element of given rank is in this container, supposing that the first + * element has rank start_rank, then the function returns true and sets element + * accordingly. + * Otherwise, it returns false and update start_rank. + */ +bool bitset_container_select(const bitset_container_t *container, + uint32_t *start_rank, uint32_t rank, + uint32_t *element); + +/* Returns the smallest value (assumes not empty) */ +uint16_t bitset_container_minimum(const bitset_container_t *container); + +/* Returns the largest value (assumes not empty) */ +uint16_t bitset_container_maximum(const bitset_container_t *container); + +/* Returns the number of values equal or smaller than x */ +int bitset_container_rank(const bitset_container_t *container, uint16_t x); + +/* Returns the index of the first value equal or larger than x, or -1 */ +int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x); +#endif /* INCLUDE_CONTAINERS_BITSET_H_ */ +/* end file include/roaring/containers/bitset.h */ +/* begin file include/roaring/containers/run.h */ +/* + * run.h + * + */ + +#ifndef INCLUDE_CONTAINERS_RUN_H_ +#define INCLUDE_CONTAINERS_RUN_H_ + +#include +#include +#include +#include + + +/* struct rle16_s - run length pair + * + * @value: start position of the run + * @length: length of the run is `length + 1` + * + * An RLE pair {v, l} would represent the integers between the interval + * [v, v+l+1], e.g. {3, 2} = [3, 4, 5]. + */ +struct rle16_s { + uint16_t value; + uint16_t length; +}; + +typedef struct rle16_s rle16_t; + +/* struct run_container_s - run container bitmap + * + * @n_runs: number of rle_t pairs in `runs`. + * @capacity: capacity in rle_t pairs `runs` can hold. + * @runs: pairs of rle_t. + * + */ +struct run_container_s { + int32_t n_runs; + int32_t capacity; + rle16_t *runs; +}; + +typedef struct run_container_s run_container_t; + +/* Create a new run container. Return NULL in case of failure. */ +run_container_t *run_container_create(void); + +/* Create a new run container with given capacity. Return NULL in case of + * failure. */ +run_container_t *run_container_create_given_capacity(int32_t size); + +/* + * Shrink the capacity to the actual size, return the number of bytes saved. + */ +int run_container_shrink_to_fit(run_container_t *src); + +/* Free memory owned by `run'. */ +void run_container_free(run_container_t *run); + +/* Duplicate container */ +run_container_t *run_container_clone(const run_container_t *src); + +int32_t run_container_serialize(const run_container_t *container, + char *buf) WARN_UNUSED; + +uint32_t run_container_serialization_len(const run_container_t *container); + +void *run_container_deserialize(const char *buf, size_t buf_len); + +/* + * Effectively deletes the value at index index, repacking data. + */ +static inline void recoverRoomAtIndex(run_container_t *run, uint16_t index) { + memmove(run->runs + index, run->runs + (1 + index), + (run->n_runs - index - 1) * sizeof(rle16_t)); + run->n_runs--; +} + +/** + * Good old binary search through rle data + */ +static inline int32_t interleavedBinarySearch(const rle16_t *array, int32_t lenarray, + uint16_t ikey) { + int32_t low = 0; + int32_t high = lenarray - 1; + while (low <= high) { + int32_t middleIndex = (low + high) >> 1; + uint16_t middleValue = array[middleIndex].value; + if (middleValue < ikey) { + low = middleIndex + 1; + } else if (middleValue > ikey) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + return -(low + 1); +} + +/* + * Returns index of the run which contains $ikey + */ +static inline int32_t rle16_find_run(const rle16_t *array, int32_t lenarray, + uint16_t ikey) { + int32_t low = 0; + int32_t high = lenarray - 1; + while (low <= high) { + int32_t middleIndex = (low + high) >> 1; + uint16_t min = array[middleIndex].value; + uint16_t max = array[middleIndex].value + array[middleIndex].length; + if (ikey > max) { + low = middleIndex + 1; + } else if (ikey < min) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + return -(low + 1); +} + + +/** + * Returns number of runs which can'be be merged with the key because they + * are less than the key. + * Note that [5,6,7,8] can be merged with the key 9 and won't be counted. + */ +static inline int32_t rle16_count_less(const rle16_t* array, int32_t lenarray, + uint16_t key) { + if (lenarray == 0) return 0; + int32_t low = 0; + int32_t high = lenarray - 1; + while (low <= high) { + int32_t middleIndex = (low + high) >> 1; + uint16_t min_value = array[middleIndex].value; + uint16_t max_value = array[middleIndex].value + array[middleIndex].length; + if (max_value + UINT32_C(1) < key) { // uint32 arithmetic + low = middleIndex + 1; + } else if (key < min_value) { + high = middleIndex - 1; + } else { + return middleIndex; + } + } + return low; +} + +static inline int32_t rle16_count_greater(const rle16_t* array, int32_t lenarray, + uint16_t key) { + if (lenarray == 0) return 0; + int32_t low = 0; + int32_t high = lenarray - 1; + while (low <= high) { + int32_t middleIndex = (low + high) >> 1; + uint16_t min_value = array[middleIndex].value; + uint16_t max_value = array[middleIndex].value + array[middleIndex].length; + if (max_value < key) { + low = middleIndex + 1; + } else if (key + UINT32_C(1) < min_value) { // uint32 arithmetic + high = middleIndex - 1; + } else { + return lenarray - (middleIndex + 1); + } + } + return lenarray - low; +} + +/** + * increase capacity to at least min. Whether the + * existing data needs to be copied over depends on copy. If "copy" is false, + * then the new content will be uninitialized, otherwise a copy is made. + */ +void run_container_grow(run_container_t *run, int32_t min, bool copy); + +/** + * Moves the data so that we can write data at index + */ +static inline void makeRoomAtIndex(run_container_t *run, uint16_t index) { + /* This function calls realloc + memmove sequentially to move by one index. + * Potentially copying twice the array. + */ + if (run->n_runs + 1 > run->capacity) + run_container_grow(run, run->n_runs + 1, true); + memmove(run->runs + 1 + index, run->runs + index, + (run->n_runs - index) * sizeof(rle16_t)); + run->n_runs++; +} + +/* Add `pos' to `run'. Returns true if `pos' was not present. */ +bool run_container_add(run_container_t *run, uint16_t pos); + +/* Remove `pos' from `run'. Returns true if `pos' was present. */ +static inline bool run_container_remove(run_container_t *run, uint16_t pos) { + int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); + if (index >= 0) { + int32_t le = run->runs[index].length; + if (le == 0) { + recoverRoomAtIndex(run, (uint16_t)index); + } else { + run->runs[index].value++; + run->runs[index].length--; + } + return true; + } + index = -index - 2; // points to preceding value, possibly -1 + if (index >= 0) { // possible match + int32_t offset = pos - run->runs[index].value; + int32_t le = run->runs[index].length; + if (offset < le) { + // need to break in two + run->runs[index].length = (uint16_t)(offset - 1); + // need to insert + uint16_t newvalue = pos + 1; + int32_t newlength = le - offset - 1; + makeRoomAtIndex(run, (uint16_t)(index + 1)); + run->runs[index + 1].value = newvalue; + run->runs[index + 1].length = (uint16_t)newlength; + return true; + + } else if (offset == le) { + run->runs[index].length--; + return true; + } + } + // no match + return false; +} + +/* Check whether `pos' is present in `run'. */ +static inline bool run_container_contains(const run_container_t *run, uint16_t pos) { + int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); + if (index >= 0) return true; + index = -index - 2; // points to preceding value, possibly -1 + if (index != -1) { // possible match + int32_t offset = pos - run->runs[index].value; + int32_t le = run->runs[index].length; + if (offset <= le) return true; + } + return false; +} + +/* +* Check whether all positions in a range of positions from pos_start (included) +* to pos_end (excluded) is present in `run'. +*/ +static inline bool run_container_contains_range(const run_container_t *run, + uint32_t pos_start, uint32_t pos_end) { + uint32_t count = 0; + int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos_start); + if (index < 0) { + index = -index - 2; + if ((index == -1) || ((pos_start - run->runs[index].value) > run->runs[index].length)){ + return false; + } + } + for (int32_t i = index; i < run->n_runs; ++i) { + const uint32_t stop = run->runs[i].value + run->runs[i].length; + if (run->runs[i].value >= pos_end) break; + if (stop >= pos_end) { + count += (((pos_end - run->runs[i].value) > 0) ? (pos_end - run->runs[i].value) : 0); + break; + } + const uint32_t min = (stop - pos_start) > 0 ? (stop - pos_start) : 0; + count += (min < run->runs[i].length) ? min : run->runs[i].length; + } + return count >= (pos_end - pos_start - 1); +} + +#ifdef USEAVX + +/* Get the cardinality of `run'. Requires an actual computation. */ +static inline int run_container_cardinality(const run_container_t *run) { + const int32_t n_runs = run->n_runs; + const rle16_t *runs = run->runs; + + /* by initializing with n_runs, we omit counting the +1 for each pair. */ + int sum = n_runs; + int32_t k = 0; + const int32_t step = sizeof(__m256i) / sizeof(rle16_t); + if (n_runs > step) { + __m256i total = _mm256_setzero_si256(); + for (; k + step <= n_runs; k += step) { + __m256i ymm1 = _mm256_lddqu_si256((const __m256i *)(runs + k)); + __m256i justlengths = _mm256_srli_epi32(ymm1, 16); + total = _mm256_add_epi32(total, justlengths); + } + // a store might be faster than extract? + uint32_t buffer[sizeof(__m256i) / sizeof(rle16_t)]; + _mm256_storeu_si256((__m256i *)buffer, total); + sum += (buffer[0] + buffer[1]) + (buffer[2] + buffer[3]) + + (buffer[4] + buffer[5]) + (buffer[6] + buffer[7]); + } + for (; k < n_runs; ++k) { + sum += runs[k].length; + } + + return sum; +} + +#else + +/* Get the cardinality of `run'. Requires an actual computation. */ +static inline int run_container_cardinality(const run_container_t *run) { + const int32_t n_runs = run->n_runs; + const rle16_t *runs = run->runs; + + /* by initializing with n_runs, we omit counting the +1 for each pair. */ + int sum = n_runs; + for (int k = 0; k < n_runs; ++k) { + sum += runs[k].length; + } + + return sum; +} +#endif + +/* Card > 0?, see run_container_empty for the reverse */ +static inline bool run_container_nonzero_cardinality( + const run_container_t *run) { + return run->n_runs > 0; // runs never empty +} + +/* Card == 0?, see run_container_nonzero_cardinality for the reverse */ +static inline bool run_container_empty( + const run_container_t *run) { + return run->n_runs == 0; // runs never empty +} + + + +/* Copy one container into another. We assume that they are distinct. */ +void run_container_copy(const run_container_t *src, run_container_t *dst); + +/* Set the cardinality to zero (does not release memory). */ +static inline void run_container_clear(run_container_t *run) { + run->n_runs = 0; +} + +/** + * Append run described by vl to the run container, possibly merging. + * It is assumed that the run would be inserted at the end of the container, no + * check is made. + * It is assumed that the run container has the necessary capacity: caller is + * responsible for checking memory capacity. + * + * + * This is not a safe function, it is meant for performance: use with care. + */ +static inline void run_container_append(run_container_t *run, rle16_t vl, + rle16_t *previousrl) { + const uint32_t previousend = previousrl->value + previousrl->length; + if (vl.value > previousend + 1) { // we add a new one + run->runs[run->n_runs] = vl; + run->n_runs++; + *previousrl = vl; + } else { + uint32_t newend = vl.value + vl.length + UINT32_C(1); + if (newend > previousend) { // we merge + previousrl->length = (uint16_t)(newend - 1 - previousrl->value); + run->runs[run->n_runs - 1] = *previousrl; + } + } +} + +/** + * Like run_container_append but it is assumed that the content of run is empty. + */ +static inline rle16_t run_container_append_first(run_container_t *run, + rle16_t vl) { + run->runs[run->n_runs] = vl; + run->n_runs++; + return vl; +} + +/** + * append a single value given by val to the run container, possibly merging. + * It is assumed that the value would be inserted at the end of the container, + * no check is made. + * It is assumed that the run container has the necessary capacity: caller is + * responsible for checking memory capacity. + * + * This is not a safe function, it is meant for performance: use with care. + */ +static inline void run_container_append_value(run_container_t *run, + uint16_t val, + rle16_t *previousrl) { + const uint32_t previousend = previousrl->value + previousrl->length; + if (val > previousend + 1) { // we add a new one + //*previousrl = (rle16_t){.value = val, .length = 0};// requires C99 + previousrl->value = val; + previousrl->length = 0; + + run->runs[run->n_runs] = *previousrl; + run->n_runs++; + } else if (val == previousend + 1) { // we merge + previousrl->length++; + run->runs[run->n_runs - 1] = *previousrl; + } +} + +/** + * Like run_container_append_value but it is assumed that the content of run is + * empty. + */ +static inline rle16_t run_container_append_value_first(run_container_t *run, + uint16_t val) { + // rle16_t newrle = (rle16_t){.value = val, .length = 0};// requires C99 + rle16_t newrle; + newrle.value = val; + newrle.length = 0; + + run->runs[run->n_runs] = newrle; + run->n_runs++; + return newrle; +} + +/* Check whether the container spans the whole chunk (cardinality = 1<<16). + * This check can be done in constant time (inexpensive). */ +static inline bool run_container_is_full(const run_container_t *run) { + rle16_t vl = run->runs[0]; + return (run->n_runs == 1) && (vl.value == 0) && (vl.length == 0xFFFF); +} + +/* Compute the union of `src_1' and `src_2' and write the result to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_union(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst); + +/* Compute the union of `src_1' and `src_2' and write the result to `src_1' */ +void run_container_union_inplace(run_container_t *src_1, + const run_container_t *src_2); + +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ +void run_container_intersection(const run_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst); + +/* Compute the size of the intersection of src_1 and src_2 . */ +int run_container_intersection_cardinality(const run_container_t *src_1, + const run_container_t *src_2); + +/* Check whether src_1 and src_2 intersect. */ +bool run_container_intersect(const run_container_t *src_1, + const run_container_t *src_2); + +/* Compute the symmetric difference of `src_1' and `src_2' and write the result + * to `dst' + * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ +void run_container_xor(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst); + +/* + * Write out the 16-bit integers contained in this container as a list of 32-bit + * integers using base + * as the starting value (it might be expected that base has zeros in its 16 + * least significant bits). + * The function returns the number of values written. + * The caller is responsible for allocating enough memory in out. + */ +int run_container_to_uint32_array(void *vout, const run_container_t *cont, + uint32_t base); + +/* + * Print this container using printf (useful for debugging). + */ +void run_container_printf(const run_container_t *v); + +/* + * Print this container using printf as a comma-separated list of 32-bit + * integers starting at base. + */ +void run_container_printf_as_uint32_array(const run_container_t *v, + uint32_t base); + +/** + * Return the serialized size in bytes of a container having "num_runs" runs. + */ +static inline int32_t run_container_serialized_size_in_bytes(int32_t num_runs) { + return sizeof(uint16_t) + + sizeof(rle16_t) * num_runs; // each run requires 2 2-byte entries. +} + +bool run_container_iterate(const run_container_t *cont, uint32_t base, + roaring_iterator iterator, void *ptr); +bool run_container_iterate64(const run_container_t *cont, uint32_t base, + roaring_iterator64 iterator, uint64_t high_bits, + void *ptr); + +/** + * Writes the underlying array to buf, outputs how many bytes were written. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes written should be run_container_size_in_bytes(container). + */ +int32_t run_container_write(const run_container_t *container, char *buf); + +/** + * Reads the instance from buf, outputs how many bytes were read. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes read should be bitset_container_size_in_bytes(container). + * The cardinality parameter is provided for consistency with other containers, + * but + * it might be effectively ignored.. + */ +int32_t run_container_read(int32_t cardinality, run_container_t *container, + const char *buf); + +/** + * Return the serialized size in bytes of a container (see run_container_write). + * This is meant to be compatible with the Java and Go versions of Roaring. + */ +static inline int32_t run_container_size_in_bytes( + const run_container_t *container) { + return run_container_serialized_size_in_bytes(container->n_runs); +} + +/** + * Return true if the two containers have the same content. + */ +static inline bool run_container_equals(const run_container_t *container1, + const run_container_t *container2) { + if (container1->n_runs != container2->n_runs) { + return false; + } + return memequals(container1->runs, container2->runs, + container1->n_runs * sizeof(rle16_t)); +} + +/** +* Return true if container1 is a subset of container2. +*/ +bool run_container_is_subset(const run_container_t *container1, + const run_container_t *container2); + +/** + * Used in a start-finish scan that appends segments, for XOR and NOT + */ + +void run_container_smart_append_exclusive(run_container_t *src, + const uint16_t start, + const uint16_t length); + +/** +* The new container consists of a single run [start,stop). +* It is required that stop>start, the caller is responsability for this check. +* It is required that stop <= (1<<16), the caller is responsability for this check. +* The cardinality of the created container is stop - start. +* Returns NULL on failure +*/ +static inline run_container_t *run_container_create_range(uint32_t start, + uint32_t stop) { + run_container_t *rc = run_container_create_given_capacity(1); + if (rc) { + rle16_t r; + r.value = (uint16_t)start; + r.length = (uint16_t)(stop - start - 1); + run_container_append_first(rc, r); + } + return rc; +} + +/** + * If the element of given rank is in this container, supposing that the first + * element has rank start_rank, then the function returns true and sets element + * accordingly. + * Otherwise, it returns false and update start_rank. + */ +bool run_container_select(const run_container_t *container, + uint32_t *start_rank, uint32_t rank, + uint32_t *element); + +/* Compute the difference of src_1 and src_2 and write the result to + * dst. It is assumed that dst is distinct from both src_1 and src_2. */ + +void run_container_andnot(const run_container_t *src_1, + const run_container_t *src_2, run_container_t *dst); + +/* Returns the smallest value (assumes not empty) */ +static inline uint16_t run_container_minimum(const run_container_t *run) { + if (run->n_runs == 0) return 0; + return run->runs[0].value; +} + +/* Returns the largest value (assumes not empty) */ +static inline uint16_t run_container_maximum(const run_container_t *run) { + if (run->n_runs == 0) return 0; + return run->runs[run->n_runs - 1].value + run->runs[run->n_runs - 1].length; +} + +/* Returns the number of values equal or smaller than x */ +int run_container_rank(const run_container_t *arr, uint16_t x); + +/* Returns the index of the first run containing a value at least as large as x, or -1 */ +static inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x) { + int32_t index = interleavedBinarySearch(arr->runs, arr->n_runs, x); + if (index >= 0) return index; + index = -index - 2; // points to preceding run, possibly -1 + if (index != -1) { // possible match + int32_t offset = x - arr->runs[index].value; + int32_t le = arr->runs[index].length; + if (offset <= le) return index; + } + index += 1; + if(index < arr->n_runs) { + return index; + } + return -1; +} + +/* + * Add all values in range [min, max] using hint. + */ +static inline void run_container_add_range_nruns(run_container_t* run, + uint32_t min, uint32_t max, + int32_t nruns_less, + int32_t nruns_greater) { + int32_t nruns_common = run->n_runs - nruns_less - nruns_greater; + if (nruns_common == 0) { + makeRoomAtIndex(run, nruns_less); + run->runs[nruns_less].value = min; + run->runs[nruns_less].length = max - min; + } else { + uint32_t common_min = run->runs[nruns_less].value; + uint32_t common_max = run->runs[nruns_less + nruns_common - 1].value + + run->runs[nruns_less + nruns_common - 1].length; + uint32_t result_min = (common_min < min) ? common_min : min; + uint32_t result_max = (common_max > max) ? common_max : max; + + run->runs[nruns_less].value = result_min; + run->runs[nruns_less].length = result_max - result_min; + + memmove(&(run->runs[nruns_less + 1]), + &(run->runs[run->n_runs - nruns_greater]), + nruns_greater*sizeof(rle16_t)); + run->n_runs = nruns_less + 1 + nruns_greater; + } +} + +/** + * Add all values in range [min, max] + */ +static inline void run_container_add_range(run_container_t* run, + uint32_t min, uint32_t max) { + int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); + int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); + run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); +} + +/** + * Shifts last $count elements either left (distance < 0) or right (distance > 0) + */ +static inline void run_container_shift_tail(run_container_t* run, + int32_t count, int32_t distance) { + if (distance > 0) { + if (run->capacity < count+distance) { + run_container_grow(run, count+distance, true); + } + } + int32_t srcpos = run->n_runs - count; + int32_t dstpos = srcpos + distance; + memmove(&(run->runs[dstpos]), &(run->runs[srcpos]), sizeof(rle16_t) * count); + run->n_runs += distance; +} + +/** + * Remove all elements in range [min, max] + */ +static inline void run_container_remove_range(run_container_t *run, uint32_t min, uint32_t max) { + int32_t first = rle16_find_run(run->runs, run->n_runs, min); + int32_t last = rle16_find_run(run->runs, run->n_runs, max); + + if (first >= 0 && min > run->runs[first].value && + max < ((uint32_t)run->runs[first].value + (uint32_t)run->runs[first].length)) { + // split this run into two adjacent runs + + // right subinterval + makeRoomAtIndex(run, first+1); + run->runs[first+1].value = max + 1; + run->runs[first+1].length = (run->runs[first].value + run->runs[first].length) - (max + 1); + + // left subinterval + run->runs[first].length = (min - 1) - run->runs[first].value; + + return; + } + + // update left-most partial run + if (first >= 0) { + if (min > run->runs[first].value) { + run->runs[first].length = (min - 1) - run->runs[first].value; + first++; + } + } else { + first = -first-1; + } + + // update right-most run + if (last >= 0) { + uint16_t run_max = run->runs[last].value + run->runs[last].length; + if (run_max > max) { + run->runs[last].value = max + 1; + run->runs[last].length = run_max - (max + 1); + last--; + } + } else { + last = (-last-1) - 1; + } + + // remove intermediate runs + if (first <= last) { + run_container_shift_tail(run, run->n_runs - (last+1), -(last-first+1)); + } +} + + +#endif /* INCLUDE_CONTAINERS_RUN_H_ */ +/* end file include/roaring/containers/run.h */ +/* begin file include/roaring/containers/convert.h */ +/* + * convert.h + * + */ + +#ifndef INCLUDE_CONTAINERS_CONVERT_H_ +#define INCLUDE_CONTAINERS_CONVERT_H_ + + +/* Convert an array into a bitset. The input container is not freed or modified. + */ +bitset_container_t *bitset_container_from_array(const array_container_t *arr); + +/* Convert a run into a bitset. The input container is not freed or modified. */ +bitset_container_t *bitset_container_from_run(const run_container_t *arr); + +/* Convert a run into an array. The input container is not freed or modified. */ +array_container_t *array_container_from_run(const run_container_t *arr); + +/* Convert a bitset into an array. The input container is not freed or modified. + */ +array_container_t *array_container_from_bitset(const bitset_container_t *bits); + +/* Convert an array into a run. The input container is not freed or modified. + */ +run_container_t *run_container_from_array(const array_container_t *c); + +/* convert a run into either an array or a bitset + * might free the container. This does not free the input run container. */ +void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card, + uint8_t *resulttype); + +/* convert containers to and from runcontainers, as is most space efficient. + * The container might be freed. */ +void *convert_run_optimize(void *c, uint8_t typecode_original, + uint8_t *typecode_after); + +/* converts a run container to either an array or a bitset, IF it saves space. + */ +/* If a conversion occurs, the caller is responsible to free the original + * container and + * he becomes reponsible to free the new one. */ +void *convert_run_to_efficient_container(run_container_t *c, + uint8_t *typecode_after); +// like convert_run_to_efficient_container but frees the old result if needed +void *convert_run_to_efficient_container_and_free(run_container_t *c, + uint8_t *typecode_after); + +/** + * Create new bitset container which is a union of run container and + * range [min, max]. Caller is responsible for freeing run container. + */ +bitset_container_t *bitset_container_from_run_range(const run_container_t *run, + uint32_t min, uint32_t max); + +#endif /* INCLUDE_CONTAINERS_CONVERT_H_ */ +/* end file include/roaring/containers/convert.h */ +/* begin file include/roaring/containers/mixed_equal.h */ +/* + * mixed_equal.h + * + */ + +#ifndef CONTAINERS_MIXED_EQUAL_H_ +#define CONTAINERS_MIXED_EQUAL_H_ + + +/** + * Return true if the two containers have the same content. + */ +bool array_container_equal_bitset(const array_container_t* container1, + const bitset_container_t* container2); + +/** + * Return true if the two containers have the same content. + */ +bool run_container_equals_array(const run_container_t* container1, + const array_container_t* container2); +/** + * Return true if the two containers have the same content. + */ +bool run_container_equals_bitset(const run_container_t* container1, + const bitset_container_t* container2); + +#endif /* CONTAINERS_MIXED_EQUAL_H_ */ +/* end file include/roaring/containers/mixed_equal.h */ +/* begin file include/roaring/containers/mixed_subset.h */ +/* + * mixed_subset.h + * + */ + +#ifndef CONTAINERS_MIXED_SUBSET_H_ +#define CONTAINERS_MIXED_SUBSET_H_ + + +/** + * Return true if container1 is a subset of container2. + */ +bool array_container_is_subset_bitset(const array_container_t* container1, + const bitset_container_t* container2); + +/** +* Return true if container1 is a subset of container2. + */ +bool run_container_is_subset_array(const run_container_t* container1, + const array_container_t* container2); + +/** +* Return true if container1 is a subset of container2. + */ +bool array_container_is_subset_run(const array_container_t* container1, + const run_container_t* container2); + +/** +* Return true if container1 is a subset of container2. + */ +bool run_container_is_subset_bitset(const run_container_t* container1, + const bitset_container_t* container2); + +/** +* Return true if container1 is a subset of container2. +*/ +bool bitset_container_is_subset_run(const bitset_container_t* container1, + const run_container_t* container2); + +#endif /* CONTAINERS_MIXED_SUBSET_H_ */ +/* end file include/roaring/containers/mixed_subset.h */ +/* begin file include/roaring/containers/mixed_andnot.h */ +/* + * mixed_andnot.h + */ +#ifndef INCLUDE_CONTAINERS_MIXED_ANDNOT_H_ +#define INCLUDE_CONTAINERS_MIXED_ANDNOT_H_ + + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst, a valid array container that could be the same as dst.*/ +void array_bitset_container_andnot(const array_container_t *src_1, + const bitset_container_t *src_2, + array_container_t *dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * src_1 */ + +void array_bitset_container_iandnot(array_container_t *src_1, + const bitset_container_t *src_2); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst, which does not initially have a valid container. + * Return true for a bitset result; false for array + */ + +bool bitset_array_container_andnot(const bitset_container_t *src_1, + const array_container_t *src_2, void **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_array_container_iandnot(bitset_container_t *src_1, + const array_container_t *src_2, void **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_andnot(const run_container_t *src_1, + const bitset_container_t *src_2, void **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_iandnot(run_container_t *src_1, + const bitset_container_t *src_2, void **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool bitset_run_container_andnot(const bitset_container_t *src_1, + const run_container_t *src_2, void **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_run_container_iandnot(bitset_container_t *src_1, + const run_container_t *src_2, void **dst); + +/* dst does not indicate a valid container initially. Eventually it + * can become any type of container. + */ + +int run_array_container_andnot(const run_container_t *src_1, + const array_container_t *src_2, void **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +int run_array_container_iandnot(run_container_t *src_1, + const array_container_t *src_2, void **dst); + +/* dst must be a valid array container, allowed to be src_1 */ + +void array_run_container_andnot(const array_container_t *src_1, + const run_container_t *src_2, + array_container_t *dst); + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +void array_run_container_iandnot(array_container_t *src_1, + const run_container_t *src_2); + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int run_run_container_andnot(const run_container_t *src_1, + const run_container_t *src_2, void **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +int run_run_container_iandnot(run_container_t *src_1, + const run_container_t *src_2, void **dst); + +/* + * dst is a valid array container and may be the same as src_1 + */ + +void array_array_container_andnot(const array_container_t *src_1, + const array_container_t *src_2, + array_container_t *dst); + +/* inplace array-array andnot will always be able to reuse the space of + * src_1 */ +void array_array_container_iandnot(array_container_t *src_1, + const array_container_t *src_2); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). Return value is + * "dst is a bitset" + */ + +bool bitset_bitset_container_andnot(const bitset_container_t *src_1, + const bitset_container_t *src_2, + void **dst); + +/* Compute the andnot of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_bitset_container_iandnot(bitset_container_t *src_1, + const bitset_container_t *src_2, + void **dst); +#endif +/* end file include/roaring/containers/mixed_andnot.h */ +/* begin file include/roaring/containers/mixed_intersection.h */ +/* + * mixed_intersection.h + * + */ + +#ifndef INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ +#define INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ + +/* These functions appear to exclude cases where the + * inputs have the same type and the output is guaranteed + * to have the same type as the inputs. Eg, array intersection + */ + + +/* Compute the intersection of src_1 and src_2 and write the result to + * dst. It is allowed for dst to be equal to src_1. We assume that dst is a + * valid container. */ +void array_bitset_container_intersection(const array_container_t *src_1, + const bitset_container_t *src_2, + array_container_t *dst); + +/* Compute the size of the intersection of src_1 and src_2. */ +int array_bitset_container_intersection_cardinality( + const array_container_t *src_1, const bitset_container_t *src_2); + + + +/* Checking whether src_1 and src_2 intersect. */ +bool array_bitset_container_intersect(const array_container_t *src_1, + const bitset_container_t *src_2); + +/* + * Compute the intersection between src_1 and src_2 and write the result + * to *dst. If the return function is true, the result is a bitset_container_t + * otherwise is a array_container_t. We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_bitset_container_intersection(const bitset_container_t *src_1, + const bitset_container_t *src_2, + void **dst); + +/* Compute the intersection between src_1 and src_2 and write the result to + * dst. It is allowed for dst to be equal to src_1. We assume that dst is a + * valid container. */ +void array_run_container_intersection(const array_container_t *src_1, + const run_container_t *src_2, + array_container_t *dst); + +/* Compute the intersection between src_1 and src_2 and write the result to + * *dst. If the result is true then the result is a bitset_container_t + * otherwise is a array_container_t. + * If *dst == src_2, then an in-place intersection is attempted + **/ +bool run_bitset_container_intersection(const run_container_t *src_1, + const bitset_container_t *src_2, + void **dst); + +/* Compute the size of the intersection between src_1 and src_2 . */ +int array_run_container_intersection_cardinality(const array_container_t *src_1, + const run_container_t *src_2); + +/* Compute the size of the intersection between src_1 and src_2 + **/ +int run_bitset_container_intersection_cardinality(const run_container_t *src_1, + const bitset_container_t *src_2); + + +/* Check that src_1 and src_2 intersect. */ +bool array_run_container_intersect(const array_container_t *src_1, + const run_container_t *src_2); + +/* Check that src_1 and src_2 intersect. + **/ +bool run_bitset_container_intersect(const run_container_t *src_1, + const bitset_container_t *src_2); + +/* + * Same as bitset_bitset_container_intersection except that if the output is to + * be a + * bitset_container_t, then src_1 is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_bitset_container_intersection_inplace( + bitset_container_t *src_1, const bitset_container_t *src_2, void **dst); + +#endif /* INCLUDE_CONTAINERS_MIXED_INTERSECTION_H_ */ +/* end file include/roaring/containers/mixed_intersection.h */ +/* begin file include/roaring/containers/mixed_negation.h */ +/* + * mixed_negation.h + * + */ + +#ifndef INCLUDE_CONTAINERS_MIXED_NEGATION_H_ +#define INCLUDE_CONTAINERS_MIXED_NEGATION_H_ + + +/* Negation across the entire range of the container. + * Compute the negation of src and write the result + * to *dst. The complement of a + * sufficiently sparse set will always be dense and a hence a bitmap + * We assume that dst is pre-allocated and a valid bitset container + * There can be no in-place version. + */ +void array_container_negation(const array_container_t *src, + bitset_container_t *dst); + +/* Negation across the entire range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation(const bitset_container_t *src, void **dst); + +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_inplace(bitset_container_t *src, void **dst); + +/* Negation across the entire range of container + * Compute the negation of src and write the result + * to *dst. + * Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation(const run_container_t *src, void **dst); + +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_inplace(run_container_t *src, void **dst); + +/* Negation across a range of the container. + * Compute the negation of src and write the result + * to *dst. Returns true if the result is a bitset container + * and false for an array container. *dst is not preallocated. + */ +bool array_container_negation_range(const array_container_t *src, + const int range_start, const int range_end, + void **dst); + +/* Even when the result would fit, it is unclear how to make an + * inplace version without inefficient copying. Thus this routine + * may be a wrapper for the non-in-place version + */ +bool array_container_negation_range_inplace(array_container_t *src, + const int range_start, + const int range_end, void **dst); + +/* Negation across a range of the container + * Compute the negation of src and write the result + * to *dst. A true return value indicates a bitset result, + * otherwise the result is an array container. + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool bitset_container_negation_range(const bitset_container_t *src, + const int range_start, const int range_end, + void **dst); + +/* inplace version */ +/* + * Same as bitset_container_negation except that if the output is to + * be a + * bitset_container_t, then src is modified and no allocation is made. + * If the output is to be an array_container_t, then caller is responsible + * to free the container. + * In all cases, the result is in *dst. + */ +bool bitset_container_negation_range_inplace(bitset_container_t *src, + const int range_start, + const int range_end, void **dst); + +/* Negation across a range of container + * Compute the negation of src and write the result + * to *dst. Return values are the *_TYPECODES as defined * in containers.h + * We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +int run_container_negation_range(const run_container_t *src, + const int range_start, const int range_end, + void **dst); + +/* + * Same as run_container_negation except that if the output is to + * be a + * run_container_t, and has the capacity to hold the result, + * then src is modified and no allocation is made. + * In all cases, the result is in *dst. + */ +int run_container_negation_range_inplace(run_container_t *src, + const int range_start, + const int range_end, void **dst); + +#endif /* INCLUDE_CONTAINERS_MIXED_NEGATION_H_ */ +/* end file include/roaring/containers/mixed_negation.h */ +/* begin file include/roaring/containers/mixed_union.h */ +/* + * mixed_intersection.h + * + */ + +#ifndef INCLUDE_CONTAINERS_MIXED_UNION_H_ +#define INCLUDE_CONTAINERS_MIXED_UNION_H_ + +/* These functions appear to exclude cases where the + * inputs have the same type and the output is guaranteed + * to have the same type as the inputs. Eg, bitset unions + */ + + +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. */ +void array_bitset_container_union(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ +void array_bitset_container_lazy_union(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* + * Compute the union between src_1 and src_2 and write the result + * to *dst. If the return function is true, the result is a bitset_container_t + * otherwise is a array_container_t. We assume that dst is not pre-allocated. In + * case of failure, *dst will be NULL. + */ +bool array_array_container_union(const array_container_t *src_1, + const array_container_t *src_2, void **dst); + +/* + * Compute the union between src_1 and src_2 and write the result + * to *dst if it cannot be written to src_1. If the return function is true, + * the result is a bitset_container_t + * otherwise is a array_container_t. When the result is an array_container_t, it + * it either written to src_1 (if *dst is null) or to *dst. + * If the result is a bitset_container_t and *dst is null, then there was a failure. + */ +bool array_array_container_inplace_union(array_container_t *src_1, + const array_container_t *src_2, void **dst); + +/* + * Same as array_array_container_union except that it will more eagerly produce + * a bitset. + */ +bool array_array_container_lazy_union(const array_container_t *src_1, + const array_container_t *src_2, + void **dst); + +/* + * Same as array_array_container_inplace_union except that it will more eagerly produce + * a bitset. + */ +bool array_array_container_lazy_inplace_union(array_container_t *src_1, + const array_container_t *src_2, + void **dst); + +/* Compute the union of src_1 and src_2 and write the result to + * dst. We assume that dst is a + * valid container. The result might need to be further converted to array or + * bitset container, + * the caller is responsible for the eventual conversion. */ +void array_run_container_union(const array_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst); + +/* Compute the union of src_1 and src_2 and write the result to + * src2. The result might need to be further converted to array or + * bitset container, + * the caller is responsible for the eventual conversion. */ +void array_run_container_inplace_union(const array_container_t *src_1, + run_container_t *src_2); + +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for dst to be src_2. + * If run_container_is_full(src_1) is true, you must not be calling this + *function. + **/ +void run_bitset_container_union(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* Compute the union of src_1 and src_2 and write the result to + * dst. It is allowed for dst to be src_2. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). + * If run_container_is_full(src_1) is true, you must not be calling this + * function. + * */ +void run_bitset_container_lazy_union(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +#endif /* INCLUDE_CONTAINERS_MIXED_UNION_H_ */ +/* end file include/roaring/containers/mixed_union.h */ +/* begin file include/roaring/containers/mixed_xor.h */ +/* + * mixed_xor.h + * + */ + +#ifndef INCLUDE_CONTAINERS_MIXED_XOR_H_ +#define INCLUDE_CONTAINERS_MIXED_XOR_H_ + +/* These functions appear to exclude cases where the + * inputs have the same type and the output is guaranteed + * to have the same type as the inputs. Eg, bitset unions + */ + +/* + * Java implementation (as of May 2016) for array_run, run_run + * and bitset_run don't do anything different for inplace. + * (They are not truly in place.) + */ + + + +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). + * Result is true iff dst is a bitset */ +bool array_bitset_container_xor(const array_container_t *src_1, + const bitset_container_t *src_2, void **dst); + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. It is allowed for src_2 to be dst. This version does not + * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). + */ + +void array_bitset_container_lazy_xor(const array_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). Return value is + * "dst is a bitset" + */ + +bool bitset_bitset_container_xor(const bitset_container_t *src_1, + const bitset_container_t *src_2, void **dst); + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_xor(const run_container_t *src_1, + const bitset_container_t *src_2, void **dst); + +/* lazy xor. Dst is initialized and may be equal to src_2. + * Result is left as a bitset container, even if actual + * cardinality would dictate an array container. + */ + +void run_bitset_container_lazy_xor(const run_container_t *src_1, + const bitset_container_t *src_2, + bitset_container_t *dst); + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int array_run_container_xor(const array_container_t *src_1, + const run_container_t *src_2, void **dst); + +/* dst does not initially have a valid container. Creates either + * an array or a bitset container, indicated by return code + */ + +bool array_array_container_xor(const array_container_t *src_1, + const array_container_t *src_2, void **dst); + +/* dst does not initially have a valid container. Creates either + * an array or a bitset container, indicated by return code. + * A bitset container will not have a valid cardinality and the + * container type might not be correct for the actual cardinality + */ + +bool array_array_container_lazy_xor(const array_container_t *src_1, + const array_container_t *src_2, void **dst); + +/* Dst is a valid run container. (Can it be src_2? Let's say not.) + * Leaves result as run container, even if other options are + * smaller. + */ + +void array_run_container_lazy_xor(const array_container_t *src_1, + const run_container_t *src_2, + run_container_t *dst); + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int run_run_container_xor(const run_container_t *src_1, + const run_container_t *src_2, void **dst); + +/* INPLACE versions (initial implementation may not exploit all inplace + * opportunities (if any...) + */ + +/* Compute the xor of src_1 and src_2 and write the result to + * dst (which has no container initially). It will modify src_1 + * to be dst if the result is a bitset. Otherwise, it will + * free src_1 and dst will be a new array container. In both + * cases, the caller is responsible for deallocating dst. + * Returns true iff dst is a bitset */ + +bool bitset_array_container_ixor(bitset_container_t *src_1, + const array_container_t *src_2, void **dst); + +bool bitset_bitset_container_ixor(bitset_container_t *src_1, + const bitset_container_t *src_2, void **dst); + +bool array_bitset_container_ixor(array_container_t *src_1, + const bitset_container_t *src_2, void **dst); + +/* Compute the xor of src_1 and src_2 and write the result to + * dst. Result may be either a bitset or an array container + * (returns "result is bitset"). dst does not initially have + * any container, but becomes either a bitset container (return + * result true) or an array container. + */ + +bool run_bitset_container_ixor(run_container_t *src_1, + const bitset_container_t *src_2, void **dst); + +bool bitset_run_container_ixor(bitset_container_t *src_1, + const run_container_t *src_2, void **dst); + +/* dst does not indicate a valid container initially. Eventually it + * can become any kind of container. + */ + +int array_run_container_ixor(array_container_t *src_1, + const run_container_t *src_2, void **dst); + +int run_array_container_ixor(run_container_t *src_1, + const array_container_t *src_2, void **dst); + +bool array_array_container_ixor(array_container_t *src_1, + const array_container_t *src_2, void **dst); + +int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, + void **dst); +#endif +/* end file include/roaring/containers/mixed_xor.h */ +/* begin file include/roaring/containers/containers.h */ +#ifndef CONTAINERS_CONTAINERS_H +#define CONTAINERS_CONTAINERS_H + +#include +#include +#include + + +// would enum be possible or better? + +/** + * The switch case statements follow + * BITSET_CONTAINER_TYPE_CODE -- ARRAY_CONTAINER_TYPE_CODE -- + * RUN_CONTAINER_TYPE_CODE + * so it makes more sense to number them 1, 2, 3 (in the vague hope that the + * compiler might exploit this ordering). + */ + +#define BITSET_CONTAINER_TYPE_CODE 1 +#define ARRAY_CONTAINER_TYPE_CODE 2 +#define RUN_CONTAINER_TYPE_CODE 3 +#define SHARED_CONTAINER_TYPE_CODE 4 + +// macro for pairing container type codes +#define CONTAINER_PAIR(c1, c2) (4 * (c1) + (c2)) + +/** + * A shared container is a wrapper around a container + * with reference counting. + */ + +struct shared_container_s { + void *container; + uint8_t typecode; + uint32_t counter; // to be managed atomically +}; + +typedef struct shared_container_s shared_container_t; + +/* + * With copy_on_write = true + * Create a new shared container if the typecode is not SHARED_CONTAINER_TYPE, + * otherwise, increase the count + * If copy_on_write = false, then clone. + * Return NULL in case of failure. + **/ +void *get_copy_of_container(void *container, uint8_t *typecode, + bool copy_on_write); + +/* Frees a shared container (actually decrement its counter and only frees when + * the counter falls to zero). */ +void shared_container_free(shared_container_t *container); + +/* extract a copy from the shared container, freeing the shared container if +there is just one instance left, +clone instances when the counter is higher than one +*/ +void *shared_container_extract_copy(shared_container_t *container, + uint8_t *typecode); + +/* access to container underneath */ +static inline const void *container_unwrap_shared( + const void *candidate_shared_container, uint8_t *type) { + if (*type == SHARED_CONTAINER_TYPE_CODE) { + *type = + ((const shared_container_t *)candidate_shared_container)->typecode; + assert(*type != SHARED_CONTAINER_TYPE_CODE); + return ((const shared_container_t *)candidate_shared_container)->container; + } else { + return candidate_shared_container; + } +} + + +/* access to container underneath */ +static inline void *container_mutable_unwrap_shared( + void *candidate_shared_container, uint8_t *type) { + if (*type == SHARED_CONTAINER_TYPE_CODE) { + *type = + ((shared_container_t *)candidate_shared_container)->typecode; + assert(*type != SHARED_CONTAINER_TYPE_CODE); + return ((shared_container_t *)candidate_shared_container)->container; + } else { + return candidate_shared_container; + } +} + +/* access to container underneath and queries its type */ +static inline uint8_t get_container_type(const void *container, uint8_t type) { + if (type == SHARED_CONTAINER_TYPE_CODE) { + return ((const shared_container_t *)container)->typecode; + } else { + return type; + } +} + +/** + * Copies a container, requires a typecode. This allocates new memory, caller + * is responsible for deallocation. If the container is not shared, then it is + * physically cloned. Sharable containers are not cloneable. + */ +void *container_clone(const void *container, uint8_t typecode); + +/* access to container underneath, cloning it if needed */ +static inline void *get_writable_copy_if_shared( + void *candidate_shared_container, uint8_t *type) { + if (*type == SHARED_CONTAINER_TYPE_CODE) { + return shared_container_extract_copy( + (shared_container_t *)candidate_shared_container, type); + } else { + return candidate_shared_container; + } +} + +/** + * End of shared container code + */ + +static const char *container_names[] = {"bitset", "array", "run", "shared"}; +static const char *shared_container_names[] = { + "bitset (shared)", "array (shared)", "run (shared)"}; + +// no matter what the initial container was, convert it to a bitset +// if a new container is produced, caller responsible for freeing the previous +// one +// container should not be a shared container +static inline void *container_to_bitset(void *container, uint8_t typecode) { + bitset_container_t *result = NULL; + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return container; // nothing to do + case ARRAY_CONTAINER_TYPE_CODE: + result = + bitset_container_from_array((array_container_t *)container); + return result; + case RUN_CONTAINER_TYPE_CODE: + result = bitset_container_from_run((run_container_t *)container); + return result; + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return 0; // unreached + } +} + +/** + * Get the container name from the typecode + */ +static inline const char *get_container_name(uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return container_names[0]; + case ARRAY_CONTAINER_TYPE_CODE: + return container_names[1]; + case RUN_CONTAINER_TYPE_CODE: + return container_names[2]; + case SHARED_CONTAINER_TYPE_CODE: + return container_names[3]; + default: + assert(false); + __builtin_unreachable(); + return "unknown"; + } +} + +static inline const char *get_full_container_name(const void *container, + uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return container_names[0]; + case ARRAY_CONTAINER_TYPE_CODE: + return container_names[1]; + case RUN_CONTAINER_TYPE_CODE: + return container_names[2]; + case SHARED_CONTAINER_TYPE_CODE: + switch (((const shared_container_t *)container)->typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return shared_container_names[0]; + case ARRAY_CONTAINER_TYPE_CODE: + return shared_container_names[1]; + case RUN_CONTAINER_TYPE_CODE: + return shared_container_names[2]; + default: + assert(false); + __builtin_unreachable(); + return "unknown"; + } + break; + default: + assert(false); + __builtin_unreachable(); + return "unknown"; + } + __builtin_unreachable(); + return NULL; +} + +/** + * Get the container cardinality (number of elements), requires a typecode + */ +static inline int container_get_cardinality(const void *container, + uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_cardinality( + (const bitset_container_t *)container); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_cardinality( + (const array_container_t *)container); + case RUN_CONTAINER_TYPE_CODE: + return run_container_cardinality( + (const run_container_t *)container); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return 0; // unreached + } +} + + + +// returns true if a container is known to be full. Note that a lazy bitset +// container +// might be full without us knowing +static inline bool container_is_full(const void *container, uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_cardinality( + (const bitset_container_t *)container) == (1 << 16); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_cardinality( + (const array_container_t *)container) == (1 << 16); + case RUN_CONTAINER_TYPE_CODE: + return run_container_is_full((const run_container_t *)container); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return 0; // unreached + } +} + +static inline int container_shrink_to_fit(void *container, uint8_t typecode) { + container = container_mutable_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return 0; // no shrinking possible + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_shrink_to_fit( + (array_container_t *)container); + case RUN_CONTAINER_TYPE_CODE: + return run_container_shrink_to_fit((run_container_t *)container); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return 0; // unreached + } +} + + +/** + * make a container with a run of ones + */ +/* initially always use a run container, even if an array might be + * marginally + * smaller */ +static inline void *container_range_of_ones(uint32_t range_start, + uint32_t range_end, + uint8_t *result_type) { + assert(range_end >= range_start); + uint64_t cardinality = range_end - range_start + 1; + if(cardinality <= 2) { + *result_type = ARRAY_CONTAINER_TYPE_CODE; + return array_container_create_range(range_start, range_end); + } else { + *result_type = RUN_CONTAINER_TYPE_CODE; + return run_container_create_range(range_start, range_end); + } +} + + +/* Create a container with all the values between in [min,max) at a + distance k*step from min. */ +static inline void *container_from_range(uint8_t *type, uint32_t min, + uint32_t max, uint16_t step) { + if (step == 0) return NULL; // being paranoid + if (step == 1) { + return container_range_of_ones(min,max,type); + // Note: the result is not always a run (need to check the cardinality) + //*type = RUN_CONTAINER_TYPE_CODE; + //return run_container_create_range(min, max); + } + int size = (max - min + step - 1) / step; + if (size <= DEFAULT_MAX_SIZE) { // array container + *type = ARRAY_CONTAINER_TYPE_CODE; + array_container_t *array = array_container_create_given_capacity(size); + array_container_add_from_range(array, min, max, step); + assert(array->cardinality == size); + return array; + } else { // bitset container + *type = BITSET_CONTAINER_TYPE_CODE; + bitset_container_t *bitset = bitset_container_create(); + bitset_container_add_from_range(bitset, min, max, step); + assert(bitset->cardinality == size); + return bitset; + } +} + +/** + * "repair" the container after lazy operations. + */ +static inline void *container_repair_after_lazy(void *container, + uint8_t *typecode) { + container = get_writable_copy_if_shared( + container, typecode); // TODO: this introduces unnecessary cloning + void *result = NULL; + switch (*typecode) { + case BITSET_CONTAINER_TYPE_CODE: + ((bitset_container_t *)container)->cardinality = + bitset_container_compute_cardinality( + (bitset_container_t *)container); + if (((bitset_container_t *)container)->cardinality <= + DEFAULT_MAX_SIZE) { + result = array_container_from_bitset( + (const bitset_container_t *)container); + bitset_container_free((bitset_container_t *)container); + *typecode = ARRAY_CONTAINER_TYPE_CODE; + return result; + } + return container; + case ARRAY_CONTAINER_TYPE_CODE: + return container; // nothing to do + case RUN_CONTAINER_TYPE_CODE: + return convert_run_to_efficient_container_and_free( + (run_container_t *)container, typecode); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return 0; // unreached + } +} + +/** + * Writes the underlying array to buf, outputs how many bytes were written. + * This is meant to be byte-by-byte compatible with the Java and Go versions of + * Roaring. + * The number of bytes written should be + * container_write(container, buf). + * + */ +static inline int32_t container_write(const void *container, uint8_t typecode, + char *buf) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_write((const bitset_container_t *)container, buf); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_write((const array_container_t *)container, buf); + case RUN_CONTAINER_TYPE_CODE: + return run_container_write((const run_container_t *)container, buf); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return 0; // unreached + } +} + +/** + * Get the container size in bytes under portable serialization (see + * container_write), requires a + * typecode + */ +static inline int32_t container_size_in_bytes(const void *container, + uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_size_in_bytes( + (const bitset_container_t *)container); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_size_in_bytes( + (const array_container_t *)container); + case RUN_CONTAINER_TYPE_CODE: + return run_container_size_in_bytes((const run_container_t *)container); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return 0; // unreached + } +} + +/** + * print the container (useful for debugging), requires a typecode + */ +void container_printf(const void *container, uint8_t typecode); + +/** + * print the content of the container as a comma-separated list of 32-bit values + * starting at base, requires a typecode + */ +void container_printf_as_uint32_array(const void *container, uint8_t typecode, + uint32_t base); + +/** + * Checks whether a container is not empty, requires a typecode + */ +static inline bool container_nonzero_cardinality(const void *container, + uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_const_nonzero_cardinality( + (const bitset_container_t *)container); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_nonzero_cardinality( + (const array_container_t *)container); + case RUN_CONTAINER_TYPE_CODE: + return run_container_nonzero_cardinality( + (const run_container_t *)container); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return 0; // unreached + } +} + +/** + * Recover memory from a container, requires a typecode + */ +void container_free(void *container, uint8_t typecode); + +/** + * Convert a container to an array of values, requires a typecode as well as a + * "base" (most significant values) + * Returns number of ints added. + */ +static inline int container_to_uint32_array(uint32_t *output, + const void *container, + uint8_t typecode, uint32_t base) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_to_uint32_array( + output, (const bitset_container_t *)container, base); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_to_uint32_array( + output, (const array_container_t *)container, base); + case RUN_CONTAINER_TYPE_CODE: + return run_container_to_uint32_array( + output, (const run_container_t *)container, base); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return 0; // unreached + } +} + +/** + * Add a value to a container, requires a typecode, fills in new_typecode and + * return (possibly different) container. + * This function may allocate a new container, and caller is responsible for + * memory deallocation + */ +static inline void *container_add(void *container, uint16_t val, + uint8_t typecode, uint8_t *new_typecode) { + container = get_writable_copy_if_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + bitset_container_set((bitset_container_t *)container, val); + *new_typecode = BITSET_CONTAINER_TYPE_CODE; + return container; + case ARRAY_CONTAINER_TYPE_CODE: { + array_container_t *ac = (array_container_t *)container; + if (array_container_try_add(ac, val, DEFAULT_MAX_SIZE) != -1) { + *new_typecode = ARRAY_CONTAINER_TYPE_CODE; + return ac; + } else { + bitset_container_t* bitset = bitset_container_from_array(ac); + bitset_container_add(bitset, val); + *new_typecode = BITSET_CONTAINER_TYPE_CODE; + return bitset; + } + } break; + case RUN_CONTAINER_TYPE_CODE: + // per Java, no container type adjustments are done (revisit?) + run_container_add((run_container_t *)container, val); + *new_typecode = RUN_CONTAINER_TYPE_CODE; + return container; + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +/** + * Remove a value from a container, requires a typecode, fills in new_typecode + * and + * return (possibly different) container. + * This function may allocate a new container, and caller is responsible for + * memory deallocation + */ +static inline void *container_remove(void *container, uint16_t val, + uint8_t typecode, uint8_t *new_typecode) { + container = get_writable_copy_if_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + if (bitset_container_remove((bitset_container_t *)container, val)) { + if (bitset_container_cardinality( + (bitset_container_t *)container) <= DEFAULT_MAX_SIZE) { + *new_typecode = ARRAY_CONTAINER_TYPE_CODE; + return array_container_from_bitset( + (bitset_container_t *)container); + } + } + *new_typecode = typecode; + return container; + case ARRAY_CONTAINER_TYPE_CODE: + *new_typecode = typecode; + array_container_remove((array_container_t *)container, val); + return container; + case RUN_CONTAINER_TYPE_CODE: + // per Java, no container type adjustments are done (revisit?) + run_container_remove((run_container_t *)container, val); + *new_typecode = RUN_CONTAINER_TYPE_CODE; + return container; + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +/** + * Check whether a value is in a container, requires a typecode + */ +static inline bool container_contains(const void *container, uint16_t val, + uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_get((const bitset_container_t *)container, + val); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_contains( + (const array_container_t *)container, val); + case RUN_CONTAINER_TYPE_CODE: + return run_container_contains((const run_container_t *)container, + val); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return false; + } +} + +/** + * Check whether a range of values from range_start (included) to range_end (excluded) + * is in a container, requires a typecode + */ +static inline bool container_contains_range(const void *container, uint32_t range_start, + uint32_t range_end, uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_get_range((const bitset_container_t *)container, + range_start, range_end); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_contains_range((const array_container_t *)container, + range_start, range_end); + case RUN_CONTAINER_TYPE_CODE: + return run_container_contains_range((const run_container_t *)container, + range_start, range_end); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return false; + } +} + +int32_t container_serialize(const void *container, uint8_t typecode, + char *buf) WARN_UNUSED; + +uint32_t container_serialization_len(const void *container, uint8_t typecode); + +void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len); + +/** + * Returns true if the two containers have the same content. Note that + * two containers having different types can be "equal" in this sense. + */ +static inline bool container_equals(const void *c1, uint8_t type1, + const void *c2, uint8_t type2) { + c1 = container_unwrap_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return bitset_container_equals((const bitset_container_t *)c1, + (const bitset_container_t *)c2); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + return run_container_equals_bitset((const run_container_t *)c2, + (const bitset_container_t *)c1); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return run_container_equals_bitset((const run_container_t *)c1, + (const bitset_container_t *)c2); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + // java would always return false? + return array_container_equal_bitset((const array_container_t *)c2, + (const bitset_container_t *)c1); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + // java would always return false? + return array_container_equal_bitset((const array_container_t *)c1, + (const bitset_container_t *)c2); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + return run_container_equals_array((const run_container_t *)c2, + (const array_container_t *)c1); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + return run_container_equals_array((const run_container_t *)c1, + (const array_container_t *)c2); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + return array_container_equals((const array_container_t *)c1, + (const array_container_t *)c2); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + return run_container_equals((const run_container_t *)c1, + (const run_container_t *)c2); + default: + assert(false); + __builtin_unreachable(); + return false; + } +} + +/** + * Returns true if the container c1 is a subset of the container c2. Note that + * c1 can be a subset of c2 even if they have a different type. + */ +static inline bool container_is_subset(const void *c1, uint8_t type1, + const void *c2, uint8_t type2) { + c1 = container_unwrap_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return bitset_container_is_subset((const bitset_container_t *)c1, + (const bitset_container_t *)c2); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + return bitset_container_is_subset_run((const bitset_container_t *)c1, + (const run_container_t *)c2); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return run_container_is_subset_bitset((const run_container_t *)c1, + (const bitset_container_t *)c2); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + return false; // by construction, size(c1) > size(c2) + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return array_container_is_subset_bitset((const array_container_t *)c1, + (const bitset_container_t *)c2); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + return array_container_is_subset_run((const array_container_t *)c1, + (const run_container_t *)c2); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + return run_container_is_subset_array((const run_container_t *)c1, + (const array_container_t *)c2); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + return array_container_is_subset((const array_container_t *)c1, + (const array_container_t *)c2); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + return run_container_is_subset((const run_container_t *)c1, + (const run_container_t *)c2); + default: + assert(false); + __builtin_unreachable(); + return false; + } +} + +// macro-izations possibilities for generic non-inplace binary-op dispatch + +/** + * Compute intersection between two containers, generate a new container (having + * type result_type), requires a typecode. This allocates new memory, caller + * is responsible for deallocation. + */ +static inline void *container_and(const void *c1, uint8_t type1, const void *c2, + uint8_t type2, uint8_t *result_type) { + c1 = container_unwrap_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = bitset_bitset_container_intersection( + (const bitset_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + result = array_container_create(); + array_container_intersection((const array_container_t *)c1, + (const array_container_t *)c2, + (array_container_t *)result); + *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = run_container_create(); + run_container_intersection((const run_container_t *)c1, + (const run_container_t *)c2, + (run_container_t *)result); + return convert_run_to_efficient_container_and_free( + (run_container_t *)result, result_type); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + result = array_container_create(); + array_bitset_container_intersection((const array_container_t *)c2, + (const bitset_container_t *)c1, + (array_container_t *)result); + *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + result = array_container_create(); + *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset + array_bitset_container_intersection((const array_container_t *)c1, + (const bitset_container_t *)c2, + (array_container_t *)result); + return result; + + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + *result_type = run_bitset_container_intersection( + (const run_container_t *)c2, + (const bitset_container_t *)c1, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = run_bitset_container_intersection( + (const run_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = array_container_create(); + *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset + array_run_container_intersection((const array_container_t *)c1, + (const run_container_t *)c2, + (array_container_t *)result); + return result; + + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + result = array_container_create(); + *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset + array_run_container_intersection((const array_container_t *)c2, + (const run_container_t *)c1, + (array_container_t *)result); + return result; + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +/** + * Compute the size of the intersection between two containers. + */ +static inline int container_and_cardinality(const void *c1, uint8_t type1, + const void *c2, uint8_t type2) { + c1 = container_unwrap_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return bitset_container_and_justcard( + (const bitset_container_t *)c1, (const bitset_container_t *)c2); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + return array_container_intersection_cardinality( + (const array_container_t *)c1, (const array_container_t *)c2); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + return run_container_intersection_cardinality( + (const run_container_t *)c1, (const run_container_t *)c2); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + return array_bitset_container_intersection_cardinality( + (const array_container_t *)c2, (const bitset_container_t *)c1); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return array_bitset_container_intersection_cardinality( + (const array_container_t *)c1, (const bitset_container_t *)c2); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + return run_bitset_container_intersection_cardinality( + (const run_container_t *)c2, (const bitset_container_t *)c1); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return run_bitset_container_intersection_cardinality( + (const run_container_t *)c1, (const bitset_container_t *)c2); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + return array_run_container_intersection_cardinality( + (const array_container_t *)c1, (const run_container_t *)c2); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + return array_run_container_intersection_cardinality( + (const array_container_t *)c2, (const run_container_t *)c1); + default: + assert(false); + __builtin_unreachable(); + return 0; + } +} + +/** + * Check whether two containers intersect. + */ +static inline bool container_intersect(const void *c1, uint8_t type1, const void *c2, + uint8_t type2) { + c1 = container_unwrap_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return bitset_container_intersect( + (const bitset_container_t *)c1, + (const bitset_container_t *)c2); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + return array_container_intersect((const array_container_t *)c1, + (const array_container_t *)c2); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + return run_container_intersect((const run_container_t *)c1, + (const run_container_t *)c2); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + return array_bitset_container_intersect((const array_container_t *)c2, + (const bitset_container_t *)c1); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return array_bitset_container_intersect((const array_container_t *)c1, + (const bitset_container_t *)c2); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + return run_bitset_container_intersect( + (const run_container_t *)c2, + (const bitset_container_t *)c1); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + return run_bitset_container_intersect( + (const run_container_t *)c1, + (const bitset_container_t *)c2); + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + return array_run_container_intersect((const array_container_t *)c1, + (const run_container_t *)c2); + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + return array_run_container_intersect((const array_container_t *)c2, + (const run_container_t *)c1); + default: + assert(false); + __builtin_unreachable(); + return 0; + } +} + +/** + * Compute intersection between two containers, with result in the first + container if possible. If the returned pointer is identical to c1, + then the container has been modified. If the returned pointer is different + from c1, then a new container has been created and the caller is responsible + for freeing it. + The type of the first container may change. Returns the modified + (and possibly new) container. +*/ +static inline void *container_iand(void *c1, uint8_t type1, const void *c2, + uint8_t type2, uint8_t *result_type) { + c1 = get_writable_copy_if_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = + bitset_bitset_container_intersection_inplace( + (bitset_container_t *)c1, (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + array_container_intersection_inplace((array_container_t *)c1, + (const array_container_t *)c2); + *result_type = ARRAY_CONTAINER_TYPE_CODE; + return c1; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = run_container_create(); + run_container_intersection((const run_container_t *)c1, + (const run_container_t *)c2, + (run_container_t *)result); + // as of January 2016, Java code used non-in-place intersection for + // two runcontainers + return convert_run_to_efficient_container_and_free( + (run_container_t *)result, result_type); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + // c1 is a bitmap so no inplace possible + result = array_container_create(); + array_bitset_container_intersection((const array_container_t *)c2, + (const bitset_container_t *)c1, + (array_container_t *)result); + *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset + array_bitset_container_intersection( + (const array_container_t *)c1, (const bitset_container_t *)c2, + (array_container_t *)c1); // allowed + return c1; + + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + // will attempt in-place computation + *result_type = run_bitset_container_intersection( + (const run_container_t *)c2, + (const bitset_container_t *)c1, &c1) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return c1; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = run_bitset_container_intersection( + (const run_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = array_container_create(); + *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset + array_run_container_intersection((const array_container_t *)c1, + (const run_container_t *)c2, + (array_container_t *)result); + return result; + + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + result = array_container_create(); + *result_type = ARRAY_CONTAINER_TYPE_CODE; // never bitset + array_run_container_intersection((const array_container_t *)c2, + (const run_container_t *)c1, + (array_container_t *)result); + return result; + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +/** + * Compute union between two containers, generate a new container (having type + * result_type), requires a typecode. This allocates new memory, caller + * is responsible for deallocation. + */ +static inline void *container_or(const void *c1, uint8_t type1, const void *c2, + uint8_t type2, uint8_t *result_type) { + c1 = container_unwrap_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + bitset_container_or((const bitset_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)result); + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = array_array_container_union( + (const array_container_t *)c1, + (const array_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = run_container_create(); + run_container_union((const run_container_t *)c1, + (const run_container_t *)c2, + (run_container_t *)result); + *result_type = RUN_CONTAINER_TYPE_CODE; + // todo: could be optimized since will never convert to array + result = convert_run_to_efficient_container_and_free( + (run_container_t *)result, (uint8_t *)result_type); + return result; + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + array_bitset_container_union((const array_container_t *)c2, + (const bitset_container_t *)c1, + (bitset_container_t *)result); + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + array_bitset_container_union((const array_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)result); + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c2)) { + result = run_container_create(); + *result_type = RUN_CONTAINER_TYPE_CODE; + run_container_copy((const run_container_t *)c2, + (run_container_t *)result); + return result; + } + result = bitset_container_create(); + run_bitset_container_union((const run_container_t *)c2, + (const bitset_container_t *)c1, + (bitset_container_t *)result); + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c1)) { + result = run_container_create(); + *result_type = RUN_CONTAINER_TYPE_CODE; + run_container_copy((const run_container_t *)c1, + (run_container_t *)result); + return result; + } + result = bitset_container_create(); + run_bitset_container_union((const run_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)result); + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = run_container_create(); + array_run_container_union((const array_container_t *)c1, + (const run_container_t *)c2, + (run_container_t *)result); + result = convert_run_to_efficient_container_and_free( + (run_container_t *)result, (uint8_t *)result_type); + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + result = run_container_create(); + array_run_container_union((const array_container_t *)c2, + (const run_container_t *)c1, + (run_container_t *)result); + result = convert_run_to_efficient_container_and_free( + (run_container_t *)result, (uint8_t *)result_type); + return result; + default: + assert(false); + __builtin_unreachable(); + return NULL; // unreached + } +} + +/** + * Compute union between two containers, generate a new container (having type + * result_type), requires a typecode. This allocates new memory, caller + * is responsible for deallocation. + * + * This lazy version delays some operations such as the maintenance of the + * cardinality. It requires repair later on the generated containers. + */ +static inline void *container_lazy_or(const void *c1, uint8_t type1, + const void *c2, uint8_t type2, + uint8_t *result_type) { + c1 = container_unwrap_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + bitset_container_or_nocard( + (const bitset_container_t *)c1, (const bitset_container_t *)c2, + (bitset_container_t *)result); // is lazy + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = array_array_container_lazy_union( + (const array_container_t *)c1, + (const array_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = run_container_create(); + run_container_union((const run_container_t *)c1, + (const run_container_t *)c2, + (run_container_t *)result); + *result_type = RUN_CONTAINER_TYPE_CODE; + // we are being lazy + result = convert_run_to_efficient_container( + (run_container_t *)result, result_type); + return result; + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + array_bitset_container_lazy_union( + (const array_container_t *)c2, (const bitset_container_t *)c1, + (bitset_container_t *)result); // is lazy + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + array_bitset_container_lazy_union( + (const array_container_t *)c1, (const bitset_container_t *)c2, + (bitset_container_t *)result); // is lazy + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c2)) { + result = run_container_create(); + *result_type = RUN_CONTAINER_TYPE_CODE; + run_container_copy((const run_container_t *)c2, + (run_container_t *)result); + return result; + } + result = bitset_container_create(); + run_bitset_container_lazy_union( + (const run_container_t *)c2, (const bitset_container_t *)c1, + (bitset_container_t *)result); // is lazy + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c1)) { + result = run_container_create(); + *result_type = RUN_CONTAINER_TYPE_CODE; + run_container_copy((const run_container_t *)c1, + (run_container_t *)result); + return result; + } + result = bitset_container_create(); + run_bitset_container_lazy_union( + (const run_container_t *)c1, (const bitset_container_t *)c2, + (bitset_container_t *)result); // is lazy + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = run_container_create(); + array_run_container_union((const array_container_t *)c1, + (const run_container_t *)c2, + (run_container_t *)result); + *result_type = RUN_CONTAINER_TYPE_CODE; + // next line skipped since we are lazy + // result = convert_run_to_efficient_container(result, result_type); + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + result = run_container_create(); + array_run_container_union( + (const array_container_t *)c2, (const run_container_t *)c1, + (run_container_t *)result); // TODO make lazy + *result_type = RUN_CONTAINER_TYPE_CODE; + // next line skipped since we are lazy + // result = convert_run_to_efficient_container(result, result_type); + return result; + default: + assert(false); + __builtin_unreachable(); + return NULL; // unreached + } +} + +/** + * Compute the union between two containers, with result in the first container. + * If the returned pointer is identical to c1, then the container has been + * modified. + * If the returned pointer is different from c1, then a new container has been + * created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container +*/ +static inline void *container_ior(void *c1, uint8_t type1, const void *c2, + uint8_t type2, uint8_t *result_type) { + c1 = get_writable_copy_if_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + bitset_container_or((const bitset_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)c1); +#ifdef OR_BITSET_CONVERSION_TO_FULL + if (((bitset_container_t *)c1)->cardinality == + (1 << 16)) { // we convert + result = run_container_create_range(0, (1 << 16)); + *result_type = RUN_CONTAINER_TYPE_CODE; + return result; + } +#endif + *result_type = BITSET_CONTAINER_TYPE_CODE; + return c1; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = array_array_container_inplace_union( + (array_container_t *)c1, + (const array_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + if((result == NULL) + && (*result_type == ARRAY_CONTAINER_TYPE_CODE)) { + return c1; // the computation was done in-place! + } + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + run_container_union_inplace((run_container_t *)c1, + (const run_container_t *)c2); + return convert_run_to_efficient_container((run_container_t *)c1, + result_type); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + array_bitset_container_union((const array_container_t *)c2, + (const bitset_container_t *)c1, + (bitset_container_t *)c1); + *result_type = BITSET_CONTAINER_TYPE_CODE; // never array + return c1; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + // c1 is an array, so no in-place possible + result = bitset_container_create(); + *result_type = BITSET_CONTAINER_TYPE_CODE; + array_bitset_container_union((const array_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)result); + return result; + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c2)) { + result = run_container_create(); + *result_type = RUN_CONTAINER_TYPE_CODE; + run_container_copy((const run_container_t *)c2, + (run_container_t *)result); + return result; + } + run_bitset_container_union((const run_container_t *)c2, + (const bitset_container_t *)c1, + (bitset_container_t *)c1); // allowed + *result_type = BITSET_CONTAINER_TYPE_CODE; + return c1; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c1)) { + *result_type = RUN_CONTAINER_TYPE_CODE; + + return c1; + } + result = bitset_container_create(); + run_bitset_container_union((const run_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)result); + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = run_container_create(); + array_run_container_union((const array_container_t *)c1, + (const run_container_t *)c2, + (run_container_t *)result); + result = convert_run_to_efficient_container_and_free( + (run_container_t *)result, result_type); + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + array_run_container_inplace_union((const array_container_t *)c2, + (run_container_t *)c1); + c1 = convert_run_to_efficient_container((run_container_t *)c1, + result_type); + return c1; + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +/** + * Compute the union between two containers, with result in the first container. + * If the returned pointer is identical to c1, then the container has been + * modified. + * If the returned pointer is different from c1, then a new container has been + * created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container + * + * This lazy version delays some operations such as the maintenance of the + * cardinality. It requires repair later on the generated containers. +*/ +static inline void *container_lazy_ior(void *c1, uint8_t type1, const void *c2, + uint8_t type2, uint8_t *result_type) { + assert(type1 != SHARED_CONTAINER_TYPE_CODE); + // c1 = get_writable_copy_if_shared(c1,&type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): +#ifdef LAZY_OR_BITSET_CONVERSION_TO_FULL + // if we have two bitsets, we might as well compute the cardinality + bitset_container_or((const bitset_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)c1); + // it is possible that two bitsets can lead to a full container + if (((bitset_container_t *)c1)->cardinality == + (1 << 16)) { // we convert + result = run_container_create_range(0, (1 << 16)); + *result_type = RUN_CONTAINER_TYPE_CODE; + return result; + } +#else + bitset_container_or_nocard((const bitset_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)c1); + +#endif + *result_type = BITSET_CONTAINER_TYPE_CODE; + return c1; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = array_array_container_lazy_inplace_union( + (array_container_t *)c1, + (const array_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + if((result == NULL) + && (*result_type == ARRAY_CONTAINER_TYPE_CODE)) { + return c1; // the computation was done in-place! + } + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + run_container_union_inplace((run_container_t *)c1, + (const run_container_t *)c2); + *result_type = RUN_CONTAINER_TYPE_CODE; + return convert_run_to_efficient_container((run_container_t *)c1, + result_type); + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + array_bitset_container_lazy_union( + (const array_container_t *)c2, (const bitset_container_t *)c1, + (bitset_container_t *)c1); // is lazy + *result_type = BITSET_CONTAINER_TYPE_CODE; // never array + return c1; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + // c1 is an array, so no in-place possible + result = bitset_container_create(); + *result_type = BITSET_CONTAINER_TYPE_CODE; + array_bitset_container_lazy_union( + (const array_container_t *)c1, (const bitset_container_t *)c2, + (bitset_container_t *)result); // is lazy + return result; + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c2)) { + result = run_container_create(); + *result_type = RUN_CONTAINER_TYPE_CODE; + run_container_copy((const run_container_t *)c2, + (run_container_t *)result); + return result; + } + run_bitset_container_lazy_union( + (const run_container_t *)c2, (const bitset_container_t *)c1, + (bitset_container_t *)c1); // allowed // lazy + *result_type = BITSET_CONTAINER_TYPE_CODE; + return c1; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c1)) { + *result_type = RUN_CONTAINER_TYPE_CODE; + return c1; + } + result = bitset_container_create(); + run_bitset_container_lazy_union( + (const run_container_t *)c1, (const bitset_container_t *)c2, + (bitset_container_t *)result); // lazy + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = run_container_create(); + array_run_container_union((const array_container_t *)c1, + (const run_container_t *)c2, + (run_container_t *)result); + *result_type = RUN_CONTAINER_TYPE_CODE; + // next line skipped since we are lazy + // result = convert_run_to_efficient_container_and_free(result, + // result_type); + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + array_run_container_inplace_union((const array_container_t *)c2, + (run_container_t *)c1); + *result_type = RUN_CONTAINER_TYPE_CODE; + // next line skipped since we are lazy + // result = convert_run_to_efficient_container_and_free(result, + // result_type); + return c1; + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +/** + * Compute symmetric difference (xor) between two containers, generate a new + * container (having type result_type), requires a typecode. This allocates new + * memory, caller is responsible for deallocation. + */ +static inline void *container_xor(const void *c1, uint8_t type1, const void *c2, + uint8_t type2, uint8_t *result_type) { + c1 = container_unwrap_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = bitset_bitset_container_xor( + (const bitset_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = array_array_container_xor( + (const array_container_t *)c1, + (const array_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + *result_type = + run_run_container_xor((const run_container_t *)c1, + (const run_container_t *)c2, &result); + return result; + + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = array_bitset_container_xor( + (const array_container_t *)c2, + (const bitset_container_t *)c1, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = array_bitset_container_xor( + (const array_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + *result_type = run_bitset_container_xor( + (const run_container_t *)c2, + (const bitset_container_t *)c1, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + + *result_type = run_bitset_container_xor( + (const run_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + *result_type = + array_run_container_xor((const array_container_t *)c1, + (const run_container_t *)c2, &result); + return result; + + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + *result_type = + array_run_container_xor((const array_container_t *)c2, + (const run_container_t *)c1, &result); + return result; + + default: + assert(false); + __builtin_unreachable(); + return NULL; // unreached + } +} + +/** + * Compute xor between two containers, generate a new container (having type + * result_type), requires a typecode. This allocates new memory, caller + * is responsible for deallocation. + * + * This lazy version delays some operations such as the maintenance of the + * cardinality. It requires repair later on the generated containers. + */ +static inline void *container_lazy_xor(const void *c1, uint8_t type1, + const void *c2, uint8_t type2, + uint8_t *result_type) { + c1 = container_unwrap_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + bitset_container_xor_nocard( + (const bitset_container_t *)c1, (const bitset_container_t *)c2, + (bitset_container_t *)result); // is lazy + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = array_array_container_lazy_xor( + (const array_container_t *)c1, + (const array_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + // nothing special done yet. + *result_type = + run_run_container_xor((const run_container_t *)c1, + (const run_container_t *)c2, &result); + return result; + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + *result_type = BITSET_CONTAINER_TYPE_CODE; + array_bitset_container_lazy_xor((const array_container_t *)c2, + (const bitset_container_t *)c1, + (bitset_container_t *)result); + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + *result_type = BITSET_CONTAINER_TYPE_CODE; + array_bitset_container_lazy_xor((const array_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)result); + return result; + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + run_bitset_container_lazy_xor((const run_container_t *)c2, + (const bitset_container_t *)c1, + (bitset_container_t *)result); + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + result = bitset_container_create(); + run_bitset_container_lazy_xor((const run_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)result); + *result_type = BITSET_CONTAINER_TYPE_CODE; + return result; + + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + result = run_container_create(); + array_run_container_lazy_xor((const array_container_t *)c1, + (const run_container_t *)c2, + (run_container_t *)result); + *result_type = RUN_CONTAINER_TYPE_CODE; + // next line skipped since we are lazy + // result = convert_run_to_efficient_container(result, result_type); + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + result = run_container_create(); + array_run_container_lazy_xor((const array_container_t *)c2, + (const run_container_t *)c1, + (run_container_t *)result); + *result_type = RUN_CONTAINER_TYPE_CODE; + // next line skipped since we are lazy + // result = convert_run_to_efficient_container(result, result_type); + return result; + default: + assert(false); + __builtin_unreachable(); + return NULL; // unreached + } +} + +/** + * Compute the xor between two containers, with result in the first container. + * If the returned pointer is identical to c1, then the container has been + * modified. + * If the returned pointer is different from c1, then a new container has been + * created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container +*/ +static inline void *container_ixor(void *c1, uint8_t type1, const void *c2, + uint8_t type2, uint8_t *result_type) { + c1 = get_writable_copy_if_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = bitset_bitset_container_ixor( + (bitset_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = array_array_container_ixor( + (array_container_t *)c1, + (const array_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + *result_type = run_run_container_ixor( + (run_container_t *)c1, (const run_container_t *)c2, &result); + return result; + + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = bitset_array_container_ixor( + (bitset_container_t *)c1, + (const array_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = array_bitset_container_ixor( + (array_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + + return result; + + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + *result_type = + bitset_run_container_ixor((bitset_container_t *)c1, + (const run_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + + return result; + + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = run_bitset_container_ixor( + (run_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + + return result; + + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + *result_type = array_run_container_ixor( + (array_container_t *)c1, (const run_container_t *)c2, &result); + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + *result_type = run_array_container_ixor( + (run_container_t *)c1, (const array_container_t *)c2, &result); + return result; + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +/** + * Compute the xor between two containers, with result in the first container. + * If the returned pointer is identical to c1, then the container has been + * modified. + * If the returned pointer is different from c1, then a new container has been + * created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container + * + * This lazy version delays some operations such as the maintenance of the + * cardinality. It requires repair later on the generated containers. +*/ +static inline void *container_lazy_ixor(void *c1, uint8_t type1, const void *c2, + uint8_t type2, uint8_t *result_type) { + assert(type1 != SHARED_CONTAINER_TYPE_CODE); + // c1 = get_writable_copy_if_shared(c1,&type1); + c2 = container_unwrap_shared(c2, &type2); + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + bitset_container_xor_nocard((bitset_container_t *)c1, + (const bitset_container_t *)c2, + (bitset_container_t *)c1); // is lazy + *result_type = BITSET_CONTAINER_TYPE_CODE; + return c1; + // TODO: other cases being lazy, esp. when we know inplace not likely + // could see the corresponding code for union + default: + // we may have a dirty bitset (without a precomputed cardinality) and + // calling container_ixor on it might be unsafe. + if( (type1 == BITSET_CONTAINER_TYPE_CODE) + && (((const bitset_container_t *)c1)->cardinality == BITSET_UNKNOWN_CARDINALITY)) { + ((bitset_container_t *)c1)->cardinality = bitset_container_compute_cardinality((bitset_container_t *)c1); + } + return container_ixor(c1, type1, c2, type2, result_type); + } +} + +/** + * Compute difference (andnot) between two containers, generate a new + * container (having type result_type), requires a typecode. This allocates new + * memory, caller is responsible for deallocation. + */ +static inline void *container_andnot(const void *c1, uint8_t type1, + const void *c2, uint8_t type2, + uint8_t *result_type) { + c1 = container_unwrap_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = bitset_bitset_container_andnot( + (const bitset_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + result = array_container_create(); + array_array_container_andnot((const array_container_t *)c1, + (const array_container_t *)c2, + (array_container_t *)result); + *result_type = ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c2)) { + result = array_container_create(); + *result_type = ARRAY_CONTAINER_TYPE_CODE; + return result; + } + *result_type = + run_run_container_andnot((const run_container_t *)c1, + (const run_container_t *)c2, &result); + return result; + + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = bitset_array_container_andnot( + (const bitset_container_t *)c1, + (const array_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + result = array_container_create(); + array_bitset_container_andnot((const array_container_t *)c1, + (const bitset_container_t *)c2, + (array_container_t *)result); + *result_type = ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c2)) { + result = array_container_create(); + *result_type = ARRAY_CONTAINER_TYPE_CODE; + return result; + } + *result_type = bitset_run_container_andnot( + (const bitset_container_t *)c1, + (const run_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + + *result_type = run_bitset_container_andnot( + (const run_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + if (run_container_is_full((const run_container_t *)c2)) { + result = array_container_create(); + *result_type = ARRAY_CONTAINER_TYPE_CODE; + return result; + } + result = array_container_create(); + array_run_container_andnot((const array_container_t *)c1, + (const run_container_t *)c2, + (array_container_t *)result); + *result_type = ARRAY_CONTAINER_TYPE_CODE; + return result; + + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + *result_type = run_array_container_andnot( + (const run_container_t *)c1, (const array_container_t *)c2, + &result); + return result; + + default: + assert(false); + __builtin_unreachable(); + return NULL; // unreached + } +} + +/** + * Compute the andnot between two containers, with result in the first + * container. + * If the returned pointer is identical to c1, then the container has been + * modified. + * If the returned pointer is different from c1, then a new container has been + * created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container +*/ +static inline void *container_iandnot(void *c1, uint8_t type1, const void *c2, + uint8_t type2, uint8_t *result_type) { + c1 = get_writable_copy_if_shared(c1, &type1); + c2 = container_unwrap_shared(c2, &type2); + void *result = NULL; + switch (CONTAINER_PAIR(type1, type2)) { + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = bitset_bitset_container_iandnot( + (bitset_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + array_array_container_iandnot((array_container_t *)c1, + (const array_container_t *)c2); + *result_type = ARRAY_CONTAINER_TYPE_CODE; + return c1; + + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + *result_type = run_run_container_iandnot( + (run_container_t *)c1, (const run_container_t *)c2, &result); + return result; + + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + ARRAY_CONTAINER_TYPE_CODE): + *result_type = bitset_array_container_iandnot( + (bitset_container_t *)c1, + (const array_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = ARRAY_CONTAINER_TYPE_CODE; + + array_bitset_container_iandnot((array_container_t *)c1, + (const bitset_container_t *)c2); + return c1; + + case CONTAINER_PAIR(BITSET_CONTAINER_TYPE_CODE, + RUN_CONTAINER_TYPE_CODE): + *result_type = bitset_run_container_iandnot( + (bitset_container_t *)c1, + (const run_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + + return result; + + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, + BITSET_CONTAINER_TYPE_CODE): + *result_type = run_bitset_container_iandnot( + (run_container_t *)c1, + (const bitset_container_t *)c2, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + + return result; + + case CONTAINER_PAIR(ARRAY_CONTAINER_TYPE_CODE, RUN_CONTAINER_TYPE_CODE): + *result_type = ARRAY_CONTAINER_TYPE_CODE; + array_run_container_iandnot((array_container_t *)c1, + (const run_container_t *)c2); + return c1; + case CONTAINER_PAIR(RUN_CONTAINER_TYPE_CODE, ARRAY_CONTAINER_TYPE_CODE): + *result_type = run_array_container_iandnot( + (run_container_t *)c1, (const array_container_t *)c2, &result); + return result; + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +/** + * Visit all values x of the container once, passing (base+x,ptr) + * to iterator. You need to specify a container and its type. + * Returns true if the iteration should continue. + */ +static inline bool container_iterate(const void *container, uint8_t typecode, + uint32_t base, roaring_iterator iterator, + void *ptr) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_iterate( + (const bitset_container_t *)container, base, iterator, ptr); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_iterate((const array_container_t *)container, + base, iterator, ptr); + case RUN_CONTAINER_TYPE_CODE: + return run_container_iterate((const run_container_t *)container, + base, iterator, ptr); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return false; + } +} + +static inline bool container_iterate64(const void *container, uint8_t typecode, + uint32_t base, + roaring_iterator64 iterator, + uint64_t high_bits, void *ptr) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_iterate64( + (const bitset_container_t *)container, base, iterator, + high_bits, ptr); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_iterate64( + (const array_container_t *)container, base, iterator, high_bits, + ptr); + case RUN_CONTAINER_TYPE_CODE: + return run_container_iterate64((const run_container_t *)container, + base, iterator, high_bits, ptr); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return false; + } +} + +static inline void *container_not(const void *c, uint8_t typ, + uint8_t *result_type) { + c = container_unwrap_shared(c, &typ); + void *result = NULL; + switch (typ) { + case BITSET_CONTAINER_TYPE_CODE: + *result_type = bitset_container_negation( + (const bitset_container_t *)c, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case ARRAY_CONTAINER_TYPE_CODE: + result = bitset_container_create(); + *result_type = BITSET_CONTAINER_TYPE_CODE; + array_container_negation((const array_container_t *)c, + (bitset_container_t *)result); + return result; + case RUN_CONTAINER_TYPE_CODE: + *result_type = + run_container_negation((const run_container_t *)c, &result); + return result; + + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +static inline void *container_not_range(const void *c, uint8_t typ, + uint32_t range_start, + uint32_t range_end, + uint8_t *result_type) { + c = container_unwrap_shared(c, &typ); + void *result = NULL; + switch (typ) { + case BITSET_CONTAINER_TYPE_CODE: + *result_type = + bitset_container_negation_range((const bitset_container_t *)c, + range_start, range_end, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case ARRAY_CONTAINER_TYPE_CODE: + *result_type = + array_container_negation_range((const array_container_t *)c, + range_start, range_end, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case RUN_CONTAINER_TYPE_CODE: + *result_type = run_container_negation_range( + (const run_container_t *)c, range_start, range_end, &result); + return result; + + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +static inline void *container_inot(void *c, uint8_t typ, uint8_t *result_type) { + c = get_writable_copy_if_shared(c, &typ); + void *result = NULL; + switch (typ) { + case BITSET_CONTAINER_TYPE_CODE: + *result_type = bitset_container_negation_inplace( + (bitset_container_t *)c, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case ARRAY_CONTAINER_TYPE_CODE: + // will never be inplace + result = bitset_container_create(); + *result_type = BITSET_CONTAINER_TYPE_CODE; + array_container_negation((array_container_t *)c, + (bitset_container_t *)result); + array_container_free((array_container_t *)c); + return result; + case RUN_CONTAINER_TYPE_CODE: + *result_type = + run_container_negation_inplace((run_container_t *)c, &result); + return result; + + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +static inline void *container_inot_range(void *c, uint8_t typ, + uint32_t range_start, + uint32_t range_end, + uint8_t *result_type) { + c = get_writable_copy_if_shared(c, &typ); + void *result = NULL; + switch (typ) { + case BITSET_CONTAINER_TYPE_CODE: + *result_type = + bitset_container_negation_range_inplace( + (bitset_container_t *)c, range_start, range_end, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case ARRAY_CONTAINER_TYPE_CODE: + *result_type = + array_container_negation_range_inplace( + (array_container_t *)c, range_start, range_end, &result) + ? BITSET_CONTAINER_TYPE_CODE + : ARRAY_CONTAINER_TYPE_CODE; + return result; + case RUN_CONTAINER_TYPE_CODE: + *result_type = run_container_negation_range_inplace( + (run_container_t *)c, range_start, range_end, &result); + return result; + + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return NULL; + } +} + +/** + * If the element of given rank is in this container, supposing that + * the first + * element has rank start_rank, then the function returns true and + * sets element + * accordingly. + * Otherwise, it returns false and update start_rank. + */ +static inline bool container_select(const void *container, uint8_t typecode, + uint32_t *start_rank, uint32_t rank, + uint32_t *element) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_select((const bitset_container_t *)container, + start_rank, rank, element); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_select((const array_container_t *)container, + start_rank, rank, element); + case RUN_CONTAINER_TYPE_CODE: + return run_container_select((const run_container_t *)container, + start_rank, rank, element); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return false; + } +} + +static inline uint16_t container_maximum(const void *container, + uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_maximum((const bitset_container_t *)container); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_maximum((const array_container_t *)container); + case RUN_CONTAINER_TYPE_CODE: + return run_container_maximum((const run_container_t *)container); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return false; + } +} + +static inline uint16_t container_minimum(const void *container, + uint8_t typecode) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_minimum((const bitset_container_t *)container); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_minimum((const array_container_t *)container); + case RUN_CONTAINER_TYPE_CODE: + return run_container_minimum((const run_container_t *)container); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return false; + } +} + +// number of values smaller or equal to x +static inline int container_rank(const void *container, uint8_t typecode, + uint16_t x) { + container = container_unwrap_shared(container, &typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE_CODE: + return bitset_container_rank((const bitset_container_t *)container, x); + case ARRAY_CONTAINER_TYPE_CODE: + return array_container_rank((const array_container_t *)container, x); + case RUN_CONTAINER_TYPE_CODE: + return run_container_rank((const run_container_t *)container, x); + case SHARED_CONTAINER_TYPE_CODE: + default: + assert(false); + __builtin_unreachable(); + return false; + } +} + +/** + * Add all values in range [min, max] to a given container. + * + * If the returned pointer is different from $container, then a new container + * has been created and the caller is responsible for freeing it. + * The type of the first container may change. Returns the modified + * (and possibly new) container. + */ +static inline void *container_add_range(void *container, uint8_t type, + uint32_t min, uint32_t max, + uint8_t *result_type) { + // NB: when selecting new container type, we perform only inexpensive checks + switch (type) { + case BITSET_CONTAINER_TYPE_CODE: { + bitset_container_t *bitset = (bitset_container_t *) container; + + int32_t union_cardinality = 0; + union_cardinality += bitset->cardinality; + union_cardinality += max - min + 1; + union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min); + + if (union_cardinality == INT32_C(0x10000)) { + *result_type = RUN_CONTAINER_TYPE_CODE; + return run_container_create_range(0, INT32_C(0x10000)); + } else { + *result_type = BITSET_CONTAINER_TYPE_CODE; + bitset_set_lenrange(bitset->array, min, max - min); + bitset->cardinality = union_cardinality; + return bitset; + } + } + case ARRAY_CONTAINER_TYPE_CODE: { + array_container_t *array = (array_container_t *) container; + + int32_t nvals_greater = count_greater(array->array, array->cardinality, max); + int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); + int32_t union_cardinality = nvals_less + (max - min + 1) + nvals_greater; + + if (union_cardinality == INT32_C(0x10000)) { + *result_type = RUN_CONTAINER_TYPE_CODE; + return run_container_create_range(0, INT32_C(0x10000)); + } else if (union_cardinality <= DEFAULT_MAX_SIZE) { + *result_type = ARRAY_CONTAINER_TYPE_CODE; + array_container_add_range_nvals(array, min, max, nvals_less, nvals_greater); + return array; + } else { + *result_type = BITSET_CONTAINER_TYPE_CODE; + bitset_container_t *bitset = bitset_container_from_array(array); + bitset_set_lenrange(bitset->array, min, max - min); + bitset->cardinality = union_cardinality; + return bitset; + } + } + case RUN_CONTAINER_TYPE_CODE: { + run_container_t *run = (run_container_t *) container; + + int32_t nruns_greater = rle16_count_greater(run->runs, run->n_runs, max); + int32_t nruns_less = rle16_count_less(run->runs, run->n_runs - nruns_greater, min); + + int32_t run_size_bytes = (nruns_less + 1 + nruns_greater) * sizeof(rle16_t); + int32_t bitset_size_bytes = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + + if (run_size_bytes <= bitset_size_bytes) { + run_container_add_range_nruns(run, min, max, nruns_less, nruns_greater); + *result_type = RUN_CONTAINER_TYPE_CODE; + return run; + } else { + *result_type = BITSET_CONTAINER_TYPE_CODE; + return bitset_container_from_run_range(run, min, max); + } + } + case SHARED_CONTAINER_TYPE_CODE: + default: + __builtin_unreachable(); + } +} + +/* + * Removes all elements in range [min, max]. + * Returns one of: + * - NULL if no elements left + * - pointer to the original container + * - pointer to a newly-allocated container (if it is more efficient) + * + * If the returned pointer is different from $container, then a new container + * has been created and the caller is responsible for freeing the original container. + */ +static inline void *container_remove_range(void *container, uint8_t type, + uint32_t min, uint32_t max, + uint8_t *result_type) { + switch (type) { + case BITSET_CONTAINER_TYPE_CODE: { + bitset_container_t *bitset = (bitset_container_t *) container; + + int32_t result_cardinality = bitset->cardinality - + bitset_lenrange_cardinality(bitset->array, min, max-min); + + if (result_cardinality == 0) { + return NULL; + } else if (result_cardinality < DEFAULT_MAX_SIZE) { + *result_type = ARRAY_CONTAINER_TYPE_CODE; + bitset_reset_range(bitset->array, min, max+1); + bitset->cardinality = result_cardinality; + return array_container_from_bitset(bitset); + } else { + *result_type = BITSET_CONTAINER_TYPE_CODE; + bitset_reset_range(bitset->array, min, max+1); + bitset->cardinality = result_cardinality; + return bitset; + } + } + case ARRAY_CONTAINER_TYPE_CODE: { + array_container_t *array = (array_container_t *) container; + + int32_t nvals_greater = count_greater(array->array, array->cardinality, max); + int32_t nvals_less = count_less(array->array, array->cardinality - nvals_greater, min); + int32_t result_cardinality = nvals_less + nvals_greater; + + if (result_cardinality == 0) { + return NULL; + } else { + *result_type = ARRAY_CONTAINER_TYPE_CODE; + array_container_remove_range(array, nvals_less, + array->cardinality - result_cardinality); + return array; + } + } + case RUN_CONTAINER_TYPE_CODE: { + run_container_t *run = (run_container_t *) container; + + if (run->n_runs == 0) { + return NULL; + } + if (min <= run_container_minimum(run) && max >= run_container_maximum(run)) { + return NULL; + } + + run_container_remove_range(run, min, max); + + if (run_container_serialized_size_in_bytes(run->n_runs) <= + bitset_container_serialized_size_in_bytes()) { + *result_type = RUN_CONTAINER_TYPE_CODE; + return run; + } else { + *result_type = BITSET_CONTAINER_TYPE_CODE; + return bitset_container_from_run(run); + } + } + case SHARED_CONTAINER_TYPE_CODE: + default: + __builtin_unreachable(); + } +} + +#endif +/* end file include/roaring/containers/containers.h */ +/* begin file include/roaring/roaring_array.h */ +#ifndef INCLUDE_ROARING_ARRAY_H +#define INCLUDE_ROARING_ARRAY_H +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define MAX_CONTAINERS 65536 + +#define SERIALIZATION_ARRAY_UINT32 1 +#define SERIALIZATION_CONTAINER 2 + +#define ROARING_FLAG_COW UINT8_C(0x1) +#define ROARING_FLAG_FROZEN UINT8_C(0x2) + +enum { + SERIAL_COOKIE_NO_RUNCONTAINER = 12346, + SERIAL_COOKIE = 12347, + FROZEN_COOKIE = 13766, + NO_OFFSET_THRESHOLD = 4 +}; + +/** + * Roaring arrays are array-based key-value pairs having containers as values + * and 16-bit integer keys. A roaring bitmap might be implemented as such. + */ + +// parallel arrays. Element sizes quite different. +// Alternative is array +// of structs. Which would have better +// cache performance through binary searches? + +typedef struct roaring_array_s { + int32_t size; + int32_t allocation_size; + void **containers; + uint16_t *keys; + uint8_t *typecodes; + uint8_t flags; +} roaring_array_t; + +/** + * Create a new roaring array + */ +roaring_array_t *ra_create(void); + +/** + * Initialize an existing roaring array with the specified capacity (in number + * of containers) + */ +bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap); + +/** + * Initialize with zero capacity + */ +void ra_init(roaring_array_t *t); + +/** + * Copies this roaring array, we assume that dest is not initialized + */ +bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, + bool copy_on_write); + +/* + * Shrinks the capacity, returns the number of bytes saved. + */ +int ra_shrink_to_fit(roaring_array_t *ra); + +/** + * Copies this roaring array, we assume that dest is initialized + */ +bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, + bool copy_on_write); + +/** + * Frees the memory used by a roaring array + */ +void ra_clear(roaring_array_t *r); + +/** + * Frees the memory used by a roaring array, but does not free the containers + */ +void ra_clear_without_containers(roaring_array_t *r); + +/** + * Frees just the containers + */ +void ra_clear_containers(roaring_array_t *ra); + +/** + * Get the index corresponding to a 16-bit key + */ +static inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x) { + if ((ra->size == 0) || ra->keys[ra->size - 1] == x) return ra->size - 1; + return binarySearch(ra->keys, (int32_t)ra->size, x); +} + +/** + * Retrieves the container at index i, filling in the typecode + */ +static inline void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i, + uint8_t *typecode) { + *typecode = ra->typecodes[i]; + return ra->containers[i]; +} + +/** + * Retrieves the key at index i + */ +uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i); + +/** + * Add a new key-value pair at index i + */ +void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, + void *container, uint8_t typecode); + +/** + * Append a new key-value pair + */ +void ra_append(roaring_array_t *ra, uint16_t s, void *c, uint8_t typecode); + +/** + * Append a new key-value pair to ra, cloning (in COW sense) a value from sa + * at index index + */ +void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t index, bool copy_on_write); + +/** + * Append new key-value pairs to ra, cloning (in COW sense) values from sa + * at indexes + * [start_index, end_index) + */ +void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, + int32_t start_index, int32_t end_index, + bool copy_on_write); + +/** appends from sa to ra, ending with the greatest key that is + * is less or equal stopping_key + */ +void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t stopping_key, bool copy_on_write); + +/** appends from sa to ra, starting with the smallest key that is + * is strictly greater than before_start + */ + +void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, + uint16_t before_start, bool copy_on_write); + +/** + * Move the key-value pairs to ra from sa at indexes + * [start_index, end_index), old array should not be freed + * (use ra_clear_without_containers) + **/ +void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, + int32_t start_index, int32_t end_index); +/** + * Append new key-value pairs to ra, from sa at indexes + * [start_index, end_index) + */ +void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, + int32_t start_index, int32_t end_index, + bool copy_on_write); + +/** + * Set the container at the corresponding index using the specified + * typecode. + */ +static inline void ra_set_container_at_index(const roaring_array_t *ra, int32_t i, + void *c, uint8_t typecode) { + assert(i < ra->size); + ra->containers[i] = c; + ra->typecodes[i] = typecode; +} + +/** + * If needed, increase the capacity of the array so that it can fit k values + * (at + * least); + */ +bool extend_array(roaring_array_t *ra, int32_t k); + +static inline int32_t ra_get_size(const roaring_array_t *ra) { return ra->size; } + +static inline int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, + int32_t pos) { + return advanceUntil(ra->keys, pos, ra->size, x); +} + +int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos); + +void ra_downsize(roaring_array_t *ra, int32_t new_length); + +static inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, + int32_t i, uint16_t key, + void *c, uint8_t typecode) { + assert(i < ra->size); + + ra->keys[i] = key; + ra->containers[i] = c; + ra->typecodes[i] = typecode; +} + +// write set bits to an array +void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans); + +bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans); + +/** + * write a bitmap to a buffer. This is meant to be compatible with + * the + * Java and Go versions. Return the size in bytes of the serialized + * output (which should be ra_portable_size_in_bytes(ra)). + */ +size_t ra_portable_serialize(const roaring_array_t *ra, char *buf); + +/** + * read a bitmap from a serialized version. This is meant to be compatible + * with the Java and Go versions. + * maxbytes indicates how many bytes available from buf. + * When the function returns true, roaring_array_t is populated with the data + * and *readbytes indicates how many bytes were read. In all cases, if the function + * returns true, then maxbytes >= *readbytes. + */ +bool ra_portable_deserialize(roaring_array_t *ra, const char *buf, const size_t maxbytes, size_t * readbytes); + +/** + * Quickly checks whether there is a serialized bitmap at the pointer, + * not exceeding size "maxbytes" in bytes. This function does not allocate + * memory dynamically. + * + * This function returns 0 if and only if no valid bitmap is found. + * Otherwise, it returns how many bytes are occupied by the bitmap data. + */ +size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes); + +/** + * How many bytes are required to serialize this bitmap (meant to be + * compatible + * with Java and Go versions) + */ +size_t ra_portable_size_in_bytes(const roaring_array_t *ra); + +/** + * return true if it contains at least one run container. + */ +bool ra_has_run_container(const roaring_array_t *ra); + +/** + * Size of the header when serializing (meant to be compatible + * with Java and Go versions) + */ +uint32_t ra_portable_header_size(const roaring_array_t *ra); + +/** + * If the container at the index i is share, unshare it (creating a local + * copy if needed). + */ +static inline void ra_unshare_container_at_index(roaring_array_t *ra, + uint16_t i) { + assert(i < ra->size); + ra->containers[i] = + get_writable_copy_if_shared(ra->containers[i], &ra->typecodes[i]); +} + +/** + * remove at index i, sliding over all entries after i + */ +void ra_remove_at_index(roaring_array_t *ra, int32_t i); + + +/** +* clears all containers, sets the size at 0 and shrinks the memory usage. +*/ +void ra_reset(roaring_array_t *ra); + +/** + * remove at index i, sliding over all entries after i. Free removed container. + */ +void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i); + +/** + * remove a chunk of indices, sliding over entries after it + */ +// void ra_remove_index_range(roaring_array_t *ra, int32_t begin, int32_t end); + +// used in inplace andNot only, to slide left the containers from +// the mutated RoaringBitmap that are after the largest container of +// the argument RoaringBitmap. It is followed by a call to resize. +// +void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, + uint32_t new_begin); + +/** + * Shifts rightmost $count containers to the left (distance < 0) or + * to the right (distance > 0). + * Allocates memory if necessary. + * This function doesn't free or create new containers. + * Caller is responsible for that. + */ +void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance); + +#ifdef __cplusplus +} +#endif + +#endif +/* end file include/roaring/roaring_array.h */ +/* begin file include/roaring/misc/configreport.h */ +/* + * configreport.h + * + */ + +#ifndef INCLUDE_MISC_CONFIGREPORT_H_ +#define INCLUDE_MISC_CONFIGREPORT_H_ + +#include // for size_t +#include +#include + + +#ifdef IS_X64 +// useful for basic info (0) +static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { +#ifdef ROARING_INLINE_ASM + __asm volatile("cpuid" + : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx) + : "0"(*eax), "2"(*ecx)); +#endif /* not sure what to do when inline assembly is unavailable*/ +} + +// CPUID instruction takes no parameters as CPUID implicitly uses the EAX +// register. +// The EAX register should be loaded with a value specifying what information to +// return +static inline void cpuinfo(int code, int *eax, int *ebx, int *ecx, int *edx) { +#ifdef ROARING_INLINE_ASM + __asm__ volatile("cpuid;" // call cpuid instruction + : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), + "=d"(*edx) // output equal to "movl %%eax %1" + : "a"(code) // input equal to "movl %1, %%eax" + //:"%eax","%ebx","%ecx","%edx"// clobbered register + ); +#endif /* not sure what to do when inline assembly is unavailable*/ +} + +static inline int computecacheline(void) { + int eax = 0, ebx = 0, ecx = 0, edx = 0; + cpuinfo((int)0x80000006, &eax, &ebx, &ecx, &edx); + return ecx & 0xFF; +} + +// this is quite imperfect, but can be handy +static inline const char *guessprocessor(void) { + unsigned eax = 1, ebx = 0, ecx = 0, edx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + const char *codename; + switch (eax >> 4) { + case 0x506E: + codename = "Skylake"; + break; + case 0x406C: + codename = "CherryTrail"; + break; + case 0x306D: + codename = "Broadwell"; + break; + case 0x306C: + codename = "Haswell"; + break; + case 0x306A: + codename = "IvyBridge"; + break; + case 0x206A: + case 0x206D: + codename = "SandyBridge"; + break; + case 0x2065: + case 0x206C: + case 0x206F: + codename = "Westmere"; + break; + case 0x106E: + case 0x106A: + case 0x206E: + codename = "Nehalem"; + break; + case 0x1067: + case 0x106D: + codename = "Penryn"; + break; + case 0x006F: + case 0x1066: + codename = "Merom"; + break; + case 0x0066: + codename = "Presler"; + break; + case 0x0063: + case 0x0064: + codename = "Prescott"; + break; + case 0x006D: + codename = "Dothan"; + break; + case 0x0366: + codename = "Cedarview"; + break; + case 0x0266: + codename = "Lincroft"; + break; + case 0x016C: + codename = "Pineview"; + break; + default: + codename = "UNKNOWN"; + break; + } + return codename; +} + +static inline void tellmeall(void) { + printf("Intel processor: %s\t", guessprocessor()); + +#ifdef __VERSION__ + printf(" compiler version: %s\t", __VERSION__); +#endif + printf("\tBuild option USEAVX "); +#ifdef USEAVX + printf("enabled\n"); +#else + printf("disabled\n"); +#endif +#ifndef __AVX2__ + printf("AVX2 is NOT available.\n"); +#endif + + if ((sizeof(int) != 4) || (sizeof(long) != 8)) { + printf("number of bytes: int = %lu long = %lu \n", + (long unsigned int)sizeof(size_t), + (long unsigned int)sizeof(int)); + } +#if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__ +// This is what we expect! +// printf("you have little endian machine"); +#endif +#if defined(__BIG_ENDIAN__) && __BIG_ENDIAN__ + printf("you have a big endian machine"); +#endif +#if __CHAR_BIT__ + if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???"); +#endif + if (computecacheline() != 64) + printf("cache line: %d bytes\n", computecacheline()); +} +#else + +static inline void tellmeall(void) { + printf("Non-X64 processor\n"); +#ifdef __arm__ + printf("ARM processor detected\n"); +#endif +#ifdef __VERSION__ + printf(" compiler version: %s\t", __VERSION__); +#endif + if ((sizeof(int) != 4) || (sizeof(long) != 8)) { + printf("number of bytes: int = %lu long = %lu \n", + (long unsigned int)sizeof(size_t), + (long unsigned int)sizeof(int)); + } +#if __LITTLE_ENDIAN__ +// This is what we expect! +// printf("you have little endian machine"); +#endif +#if __BIG_ENDIAN__ + printf("you have a big endian machine"); +#endif +#if __CHAR_BIT__ + if (__CHAR_BIT__ != 8) printf("on your machine, chars don't have 8bits???"); +#endif +} + +#endif + +#endif /* INCLUDE_MISC_CONFIGREPORT_H_ */ +/* end file include/roaring/misc/configreport.h */ +/* begin file include/roaring/roaring.h */ +/* +An implementation of Roaring Bitmaps in C. +*/ + +#ifndef ROARING_H +#define ROARING_H +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct roaring_bitmap_s { + roaring_array_t high_low_container; +} roaring_bitmap_t; + +/** + * Creates a new bitmap (initially empty) + */ +roaring_bitmap_t *roaring_bitmap_create(void); + +/** + * Add all the values between min (included) and max (excluded) that are at a + * distance k*step from min. +*/ +roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, + uint32_t step); + +/** + * Creates a new bitmap (initially empty) with a provided + * container-storage capacity (it is a performance hint). + */ +roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap); + +/** + * Creates a new bitmap from a pointer of uint32_t integers + */ +roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals); + +/* + * Whether you want to use copy-on-write. + * Saves memory and avoids copies but needs more care in a threaded context. + * Most users should ignore this flag. + * Note: if you do turn this flag to 'true', enabling COW, + * then ensure that you do so for all of your bitmaps since + * interactions between bitmaps with and without COW is unsafe. + */ +static inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r) { + return r->high_low_container.flags & ROARING_FLAG_COW; +} +static inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow) { + if (cow) { + r->high_low_container.flags |= ROARING_FLAG_COW; + } else { + r->high_low_container.flags &= ~ROARING_FLAG_COW; + } +} + +/** + * Describe the inner structure of the bitmap. + */ +void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra); + +/** + * Creates a new bitmap from a list of uint32_t integers + */ +roaring_bitmap_t *roaring_bitmap_of(size_t n, ...); + +/** + * Copies a bitmap. This does memory allocation. The caller is responsible for + * memory management. + * + */ +roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r); + + +/** + * Copies a bitmap from src to dest. It is assumed that the pointer dest + * is to an already allocated bitmap. The content of the dest bitmap is + * freed/deleted. + * + * It might be preferable and simpler to call roaring_bitmap_copy except + * that roaring_bitmap_overwrite can save on memory allocations. + * + */ +bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, + const roaring_bitmap_t *src); + +/** + * Print the content of the bitmap. + */ +void roaring_bitmap_printf(const roaring_bitmap_t *ra); + +/** + * Computes the intersection between two bitmaps and returns new bitmap. The + * caller is + * responsible for memory management. + * + */ +roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Computes the size of the intersection between two bitmaps. + * + */ +uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + + +/** + * Check whether two bitmaps intersect. + * + */ +bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Computes the Jaccard index between two bitmaps. (Also known as the Tanimoto + * distance, + * or the Jaccard similarity coefficient) + * + * The Jaccard index is undefined if both bitmaps are empty. + * + */ +double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Computes the size of the union between two bitmaps. + * + */ +uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Computes the size of the difference (andnot) between two bitmaps. + * + */ +uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Computes the size of the symmetric difference (andnot) between two bitmaps. + * + */ +uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Inplace version modifies x1, x1 == x2 is allowed + */ +void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Computes the union between two bitmaps and returns new bitmap. The caller is + * responsible for memory management. + */ +roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Inplace version of roaring_bitmap_or, modifies x1. TDOO: decide whether x1 == + *x2 ok + * + */ +void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Compute the union of 'number' bitmaps. See also roaring_bitmap_or_many_heap. + * Caller is responsible for freeing the + * result. + * + */ +roaring_bitmap_t *roaring_bitmap_or_many(size_t number, + const roaring_bitmap_t **x); + +/** + * Compute the union of 'number' bitmaps using a heap. This can + * sometimes be faster than roaring_bitmap_or_many which uses + * a naive algorithm. Caller is responsible for freeing the + * result. + * + */ +roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, + const roaring_bitmap_t **x); + +/** + * Computes the symmetric difference (xor) between two bitmaps + * and returns new bitmap. The caller is responsible for memory management. + */ +roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Inplace version of roaring_bitmap_xor, modifies x1. x1 != x2. + * + */ +void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Compute the xor of 'number' bitmaps. + * Caller is responsible for freeing the + * result. + * + */ +roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, + const roaring_bitmap_t **x); + +/** + * Computes the difference (andnot) between two bitmaps + * and returns new bitmap. The caller is responsible for memory management. + */ +roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * Inplace version of roaring_bitmap_andnot, modifies x1. x1 != x2. + * + */ +void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * TODO: consider implementing: + * Compute the xor of 'number' bitmaps using a heap. This can + * sometimes be faster than roaring_bitmap_xor_many which uses + * a naive algorithm. Caller is responsible for freeing the + * result. + * + * roaring_bitmap_t *roaring_bitmap_xor_many_heap(uint32_t number, + * const roaring_bitmap_t **x); + */ + +/** + * Frees the memory. + */ +void roaring_bitmap_free(const roaring_bitmap_t *r); + +/** + * Add value n_args from pointer vals, faster than repeatedly calling + * roaring_bitmap_add + * + */ +void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals); + +/** + * Add value x + * + */ +void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t x); + +/** + * Add value x + * Returns true if a new value was added, false if the value was already existing. + */ +bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t x); + +/** + * Add all values in range [min, max] + */ +void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max); + +/** + * Add all values in range [min, max) + */ +static inline void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) { + if(max == min) return; + roaring_bitmap_add_range_closed(ra, (uint32_t)min, (uint32_t)(max - 1)); +} + +/** + * Remove value x + * + */ +void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t x); + +/** Remove all values in range [min, max] */ +void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max); + +/** Remove all values in range [min, max) */ +static inline void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max) { + if(max == min) return; + roaring_bitmap_remove_range_closed(ra, (uint32_t)min, (uint32_t)(max - 1)); +} + +/** Remove multiple values */ +void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, + const uint32_t *vals); + +/** + * Remove value x + * Returns true if a new value was removed, false if the value was not existing. + */ +bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t x); + +/** + * Check if value x is present + */ +static inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) { + const uint16_t hb = val >> 16; + /* + * the next function call involves a binary search and lots of branching. + */ + int32_t i = ra_get_index(&r->high_low_container, hb); + if (i < 0) return false; + + uint8_t typecode; + // next call ought to be cheap + void *container = + ra_get_container_at_index(&r->high_low_container, i, &typecode); + // rest might be a tad expensive, possibly involving another round of binary search + return container_contains(container, val & 0xFFFF, typecode); +} + +/** + * Check whether a range of values from range_start (included) to range_end (excluded) is present + */ +bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end); + +/** + * Get the cardinality of the bitmap (number of elements). + */ +uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra); + +/** + * Returns the number of elements in the range [range_start, range_end). + */ +uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, + uint64_t range_start, uint64_t range_end); + +/** +* Returns true if the bitmap is empty (cardinality is zero). +*/ +bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra); + + +/** +* Empties the bitmap +*/ +void roaring_bitmap_clear(roaring_bitmap_t *ra); + +/** + * Convert the bitmap to an array. Write the output to "ans", + * caller is responsible to ensure that there is enough memory + * allocated + * (e.g., ans = malloc(roaring_bitmap_get_cardinality(mybitmap) + * * sizeof(uint32_t)) + */ +void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans); + + +/** + * Convert the bitmap to an array from "offset" by "limit". Write the output to "ans". + * so, you can get data in paging. + * caller is responsible to ensure that there is enough memory + * allocated + * (e.g., ans = malloc(roaring_bitmap_get_cardinality(limit) + * * sizeof(uint32_t)) + * Return false in case of failure (e.g., insufficient memory) + */ +bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans); + +/** + * Remove run-length encoding even when it is more space efficient + * return whether a change was applied + */ +bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r); + +/** convert array and bitmap containers to run containers when it is more + * efficient; + * also convert from run containers when more space efficient. Returns + * true if the result has at least one run container. + * Additional savings might be possible by calling shrinkToFit(). + */ +bool roaring_bitmap_run_optimize(roaring_bitmap_t *r); + +/** + * If needed, reallocate memory to shrink the memory usage. Returns + * the number of bytes saved. +*/ +size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r); + +/** +* write the bitmap to an output pointer, this output buffer should refer to +* at least roaring_bitmap_size_in_bytes(ra) allocated bytes. +* +* see roaring_bitmap_portable_serialize if you want a format that's compatible +* with Java and Go implementations +* +* this format has the benefit of being sometimes more space efficient than +* roaring_bitmap_portable_serialize +* e.g., when the data is sparse. +* +* Returns how many bytes were written which should be +* roaring_bitmap_size_in_bytes(ra). +*/ +size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf); + +/** use with roaring_bitmap_serialize +* see roaring_bitmap_portable_deserialize if you want a format that's +* compatible with Java and Go implementations +*/ +roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf); + +/** + * How many bytes are required to serialize this bitmap (NOT compatible + * with Java and Go versions) + */ +size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra); + +/** + * read a bitmap from a serialized version. This is meant to be compatible with + * the Java and Go versions. See format specification at + * https://github.com/RoaringBitmap/RoaringFormatSpec + * In case of failure, a null pointer is returned. + * This function is unsafe in the sense that if there is no valid serialized + * bitmap at the pointer, then many bytes could be read, possibly causing a buffer + * overflow. For a safer approach, + * call roaring_bitmap_portable_deserialize_safe. + */ +roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf); + +/** + * read a bitmap from a serialized version in a safe manner (reading up to maxbytes). + * This is meant to be compatible with + * the Java and Go versions. See format specification at + * https://github.com/RoaringBitmap/RoaringFormatSpec + * In case of failure, a null pointer is returned. + */ +roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); + +/** + * Check how many bytes would be read (up to maxbytes) at this pointer if there + * is a bitmap, returns zero if there is no valid bitmap. + * This is meant to be compatible with + * the Java and Go versions. See format specification at + * https://github.com/RoaringBitmap/RoaringFormatSpec + */ +size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes); + + +/** + * How many bytes are required to serialize this bitmap (meant to be compatible + * with Java and Go versions). See format specification at + * https://github.com/RoaringBitmap/RoaringFormatSpec + */ +size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra); + +/** + * write a bitmap to a char buffer. The output buffer should refer to at least + * roaring_bitmap_portable_size_in_bytes(ra) bytes of allocated memory. + * This is meant to be compatible with + * the + * Java and Go versions. Returns how many bytes were written which should be + * roaring_bitmap_portable_size_in_bytes(ra). See format specification at + * https://github.com/RoaringBitmap/RoaringFormatSpec + */ +size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, char *buf); + +/* + * "Frozen" serialization format imitates memory layout of roaring_bitmap_t. + * Deserialized bitmap is a constant view of the underlying buffer. + * This significantly reduces amount of allocations and copying required during + * deserialization. + * It can be used with memory mapped files. + * Example can be found in benchmarks/frozen_benchmark.c + * + * [#####] const roaring_bitmap_t * + * | | | + * +----+ | +-+ + * | | | + * [#####################################] underlying buffer + * + * Note that because frozen serialization format imitates C memory layout + * of roaring_bitmap_t, it is not fixed. It is different on big/little endian + * platforms and can be changed in future. + */ + +/** + * Returns number of bytes required to serialize bitmap using frozen format. + */ +size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *ra); + +/** + * Serializes bitmap using frozen format. + * Buffer size must be at least roaring_bitmap_frozen_size_in_bytes(). + */ +void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *ra, char *buf); + +/** + * Creates constant bitmap that is a view of a given buffer. + * Buffer must contain data previously written by roaring_bitmap_frozen_serialize(), + * and additionally its beginning must be aligned by 32 bytes. + * Length must be equal exactly to roaring_bitmap_frozen_size_in_bytes(). + * + * On error, NULL is returned. + * + * Bitmap returned by this function can be used in all readonly contexts. + * Bitmap must be freed as usual, by calling roaring_bitmap_free(). + * Underlying buffer must not be freed or modified while it backs any bitmaps. + */ +const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf, size_t length); + + +/** + * Iterate over the bitmap elements. The function iterator is called once for + * all the values with ptr (can be NULL) as the second parameter of each call. + * + * roaring_iterator is simply a pointer to a function that returns bool + * (true means that the iteration should continue while false means that it + * should stop), + * and takes (uint32_t,void*) as inputs. + * + * Returns true if the roaring_iterator returned true throughout (so that + * all data points were necessarily visited). + */ +bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, + void *ptr); + +bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, + uint64_t high_bits, void *ptr); + +/** + * Return true if the two bitmaps contain the same elements. + */ +bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, + const roaring_bitmap_t *ra2); + +/** + * Return true if all the elements of ra1 are also in ra2. + */ +bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, + const roaring_bitmap_t *ra2); + +/** + * Return true if all the elements of ra1 are also in ra2 and ra2 is strictly + * greater + * than ra1. + */ +bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, + const roaring_bitmap_t *ra2); + +/** + * (For expert users who seek high performance.) + * + * Computes the union between two bitmaps and returns new bitmap. The caller is + * responsible for memory management. + * + * The lazy version defers some computations such as the maintenance of the + * cardinality counts. Thus you need + * to call roaring_bitmap_repair_after_lazy after executing "lazy" computations. + * It is safe to repeatedly call roaring_bitmap_lazy_or_inplace on the result. + * The bitsetconversion conversion is a flag which determines + * whether container-container operations force a bitset conversion. + **/ +roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2, + const bool bitsetconversion); + +/** + * (For expert users who seek high performance.) + * Inplace version of roaring_bitmap_lazy_or, modifies x1 + * The bitsetconversion conversion is a flag which determines + * whether container-container operations force a bitset conversion. + */ +void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2, + const bool bitsetconversion); + +/** + * (For expert users who seek high performance.) + * + * Execute maintenance operations on a bitmap created from + * roaring_bitmap_lazy_or + * or modified with roaring_bitmap_lazy_or_inplace. + */ +void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *x1); + +/** + * Computes the symmetric difference between two bitmaps and returns new bitmap. + *The caller is + * responsible for memory management. + * + * The lazy version defers some computations such as the maintenance of the + * cardinality counts. Thus you need + * to call roaring_bitmap_repair_after_lazy after executing "lazy" computations. + * It is safe to repeatedly call roaring_bitmap_lazy_xor_inplace on the result. + * + */ +roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * (For expert users who seek high performance.) + * Inplace version of roaring_bitmap_lazy_xor, modifies x1. x1 != x2 + * + */ +void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, + const roaring_bitmap_t *x2); + +/** + * compute the negation of the roaring bitmap within a specified + * interval: [range_start, range_end). The number of negated values is + * range_end - range_start. + * Areas outside the range are passed through unchanged. + */ + +roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, + uint64_t range_start, uint64_t range_end); + +/** + * compute (in place) the negation of the roaring bitmap within a specified + * interval: [range_start, range_end). The number of negated values is + * range_end - range_start. + * Areas outside the range are passed through unchanged. + */ + +void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, + uint64_t range_end); + +/** + * Selects the element at index 'rank' where the smallest element is at index 0. + * If the size of the roaring bitmap is strictly greater than rank, then this + function returns true and sets element to the element of given rank. + Otherwise, it returns false. + */ +bool roaring_bitmap_select(const roaring_bitmap_t *ra, uint32_t rank, + uint32_t *element); +/** +* roaring_bitmap_rank returns the number of integers that are smaller or equal +* to x. Thus if x is the first element, this function will return 1. If +* x is smaller than the smallest element, this function will return 0. +* +* The indexing convention differs between roaring_bitmap_select and +* roaring_bitmap_rank: roaring_bitmap_select refers to the smallest value +* as having index 0, whereas roaring_bitmap_rank returns 1 when ranking +* the smallest value. +*/ +uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x); + +/** +* roaring_bitmap_smallest returns the smallest value in the set. +* Returns UINT32_MAX if the set is empty. +*/ +uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm); + +/** +* roaring_bitmap_smallest returns the greatest value in the set. +* Returns 0 if the set is empty. +*/ +uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm); + +/** +* (For advanced users.) +* Collect statistics about the bitmap, see roaring_types.h for +* a description of roaring_statistics_t +*/ +void roaring_bitmap_statistics(const roaring_bitmap_t *ra, + roaring_statistics_t *stat); + +/********************* +* What follows is code use to iterate through values in a roaring bitmap + +roaring_bitmap_t *ra =... +roaring_uint32_iterator_t i; +roaring_create_iterator(ra, &i); +while(i.has_value) { + printf("value = %d\n", i.current_value); + roaring_advance_uint32_iterator(&i); +} + +Obviously, if you modify the underlying bitmap, the iterator +becomes invalid. So don't. +*/ + +typedef struct roaring_uint32_iterator_s { + const roaring_bitmap_t *parent; // owner + int32_t container_index; // point to the current container index + int32_t in_container_index; // for bitset and array container, this is out + // index + int32_t run_index; // for run container, this points at the run + + uint32_t current_value; + bool has_value; + + const void + *container; // should be: + // parent->high_low_container.containers[container_index]; + uint8_t typecode; // should be: + // parent->high_low_container.typecodes[container_index]; + uint32_t highbits; // should be: + // parent->high_low_container.keys[container_index]) << + // 16; + +} roaring_uint32_iterator_t; + +/** +* Initialize an iterator object that can be used to iterate through the +* values. If there is a value, then this iterator points to the first value +* and it->has_value is true. The value is in it->current_value. +*/ +void roaring_init_iterator(const roaring_bitmap_t *ra, + roaring_uint32_iterator_t *newit); + +/** +* Initialize an iterator object that can be used to iterate through the +* values. If there is a value, then this iterator points to the last value +* and it->has_value is true. The value is in it->current_value. +*/ +void roaring_init_iterator_last(const roaring_bitmap_t *ra, + roaring_uint32_iterator_t *newit); + +/** +* Create an iterator object that can be used to iterate through the +* values. Caller is responsible for calling roaring_free_iterator. +* The iterator is initialized. If there is a value, then this iterator +* points to the first value and it->has_value is true. +* The value is in it->current_value. +* +* This function calls roaring_init_iterator. +*/ +roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra); + +/** +* Advance the iterator. If there is a new value, then it->has_value is true. +* The new value is in it->current_value. Values are traversed in increasing +* orders. For convenience, returns it->has_value. +*/ +bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it); + +/** +* Decrement the iterator. If there is a new value, then it->has_value is true. +* The new value is in it->current_value. Values are traversed in decreasing +* orders. For convenience, returns it->has_value. +*/ +bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it); + +/** +* Move the iterator to the first value >= val. If there is a such a value, then it->has_value is true. +* The new value is in it->current_value. For convenience, returns it->has_value. +*/ +bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) ; +/** +* Creates a copy of an iterator. +* Caller must free it. +*/ +roaring_uint32_iterator_t *roaring_copy_uint32_iterator( + const roaring_uint32_iterator_t *it); + +/** +* Free memory following roaring_create_iterator +*/ +void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it); + +/* + * Reads next ${count} values from iterator into user-supplied ${buf}. + * Returns the number of read elements. + * This number can be smaller than ${count}, which means that iterator is drained. + * + * This function satisfies semantics of iteration and can be used together with + * other iterator functions. + * - first value is copied from ${it}->current_value + * - after function returns, iterator is positioned at the next element + */ +uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count); + +#ifdef __cplusplus +} +#endif + +#endif +/* end file include/roaring/roaring.h */ diff --git a/po-properties/POTFILES.in b/po-properties/POTFILES.in index a89edb2ce4..64513074de 100644 --- a/po-properties/POTFILES.in +++ b/po-properties/POTFILES.in @@ -255,7 +255,6 @@ gtk/gtkprintoperation-win32.c gtk/gtkprintunixdialog.c gtk/gtkprogressbar.c gtk/gtkpropertylookuplistmodel.c -gtk/gtkpropertyselection.c gtk/gtkradiobutton.c gtk/gtkrange.c gtk/gtkrecentmanager.c diff --git a/po/POTFILES.in b/po/POTFILES.in index 7ca93df204..604d5c8397 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -257,7 +257,6 @@ gtk/gtkprintoperation-win32.c gtk/gtkprintunixdialog.c gtk/gtkprogressbar.c gtk/gtkpropertylookuplistmodel.c -gtk/gtkpropertyselection.c gtk/gtkradiobutton.c gtk/gtkrange.c gtk/gtkrecentmanager.c diff --git a/testsuite/gtk/bitset.c b/testsuite/gtk/bitset.c new file mode 100644 index 0000000000..5c7d0d16d9 --- /dev/null +++ b/testsuite/gtk/bitset.c @@ -0,0 +1,369 @@ +/* GtkRBTree tests. + * + * Copyright (C) 2011, Red Hat, Inc. + * Authors: Benjamin Otte + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see . + */ + +#include + +#include + +#define LARGE_VALUE (1000 * 1000) + +static GtkBitset * +create_powers_of_10 (void) +{ + GtkBitset *set; + guint i; + + set = gtk_bitset_new_empty (); + for (i = 1; i <= LARGE_VALUE; i *= 10) + gtk_bitset_add (set, i); + + return set; +} + +static GtkBitset * +create_powers_of_10_ranges (void) +{ + GtkBitset *set; + guint i, j; + + set = gtk_bitset_new_empty (); + for (i = 1, j = 0; i <= LARGE_VALUE; i *= 10, j++) + gtk_bitset_add_range (set, i - j, 2 * j); + + return set; +} + +static GtkBitset * +create_large_range (void) +{ + GtkBitset *set; + + set = gtk_bitset_new_empty (); + gtk_bitset_add_range (set, 0, LARGE_VALUE); + + return set; +} + +static GtkBitset * +create_large_rectangle (void) +{ + GtkBitset *set; + + set = gtk_bitset_new_empty (); + gtk_bitset_add_rectangle (set, 0, 900, 900, 1000); + + return set; +} + +static struct { + GtkBitset * (* create) (void); + guint n_elements; + guint minimum; + guint maximum; +} bitsets[] = +{ + { gtk_bitset_new_empty, 0, G_MAXUINT, 0 }, + { create_powers_of_10, 7, 1, LARGE_VALUE }, + { create_powers_of_10_ranges, 42, 9, LARGE_VALUE + 5, }, + { create_large_range, LARGE_VALUE, 0, LARGE_VALUE - 1 }, + { create_large_rectangle, 900 * 900, 0, 899899 } +}; + +/* UTILITIES */ + +/* TEST */ + +static void +test_is_empty (void) +{ + guint i; + GtkBitset *set; + + for (i = 0; i < G_N_ELEMENTS (bitsets); i++) + { + set = bitsets[i].create(); + + if (bitsets[i].n_elements == 0) + g_assert_true (gtk_bitset_is_empty (set)); + else + g_assert_false (gtk_bitset_is_empty (set)); + + gtk_bitset_unref (set); + } +} + +static void +test_minimum (void) +{ + guint i; + GtkBitset *set; + GtkBitsetIter iter; + gboolean success; + guint value; + + for (i = 0; i < G_N_ELEMENTS (bitsets); i++) + { + set = bitsets[i].create(); + + g_assert_cmpint (gtk_bitset_get_minimum (set), ==, bitsets[i].minimum); + + success = gtk_bitset_iter_init_first (&iter, set, &value); + if (success) + { + g_assert_false (bitsets[i].n_elements == 0); + g_assert_cmpint (value, ==, bitsets[i].minimum); + } + else + { + g_assert_true (bitsets[i].n_elements == 0); + g_assert_cmpint (value, ==, 0); + } + g_assert_cmpint (gtk_bitset_iter_is_valid (&iter), ==, success); + g_assert_cmpint (gtk_bitset_iter_get_value (&iter), ==, value); + + gtk_bitset_unref (set); + } +} + +static void +test_maximum (void) +{ + guint i; + GtkBitset *set; + GtkBitsetIter iter; + gboolean success; + guint value; + + for (i = 0; i < G_N_ELEMENTS (bitsets); i++) + { + set = bitsets[i].create(); + + g_assert_cmpint (gtk_bitset_get_maximum (set), ==, bitsets[i].maximum); + + success = gtk_bitset_iter_init_last (&iter, set, &value); + if (success) + { + g_assert_false (bitsets[i].n_elements == 0); + g_assert_cmpint (value, ==, bitsets[i].maximum); + } + else + { + g_assert_true (bitsets[i].n_elements == 0); + g_assert_cmpint (value, ==, 0); + } + g_assert_cmpint (gtk_bitset_iter_is_valid (&iter), ==, success); + g_assert_cmpint (gtk_bitset_iter_get_value (&iter), ==, value); + + gtk_bitset_unref (set); + } +} + +static void +test_equals (void) +{ + guint i, j; + GtkBitset *iset, *jset; + + for (i = 0; i < G_N_ELEMENTS (bitsets); i++) + { + iset = bitsets[i].create(); + + g_assert_true (gtk_bitset_equals (iset, iset)); + + for (j = 0; j < G_N_ELEMENTS (bitsets); j++) + { + jset = bitsets[j].create(); + + if (i == j) + g_assert_true (gtk_bitset_equals (iset, jset)); + else + g_assert_false (gtk_bitset_equals (iset, jset)); + + gtk_bitset_unref (jset); + } + + gtk_bitset_unref (iset); + } +} + +static void +test_union (void) +{ + guint i, j, k, min, max; + GtkBitset *iset, *jset, *testset; + + for (i = 0; i < G_N_ELEMENTS (bitsets); i++) + { + iset = bitsets[i].create(); + + g_assert_true (gtk_bitset_equals (iset, iset)); + + for (j = 0; j < G_N_ELEMENTS (bitsets); j++) + { + jset = bitsets[j].create(); + + testset = gtk_bitset_copy (iset); + gtk_bitset_union (testset, jset); + + min = MIN (gtk_bitset_get_minimum (iset), gtk_bitset_get_minimum (jset)); + g_assert_cmpint (min, <=, gtk_bitset_get_minimum (testset)); + max = MAX (gtk_bitset_get_maximum (iset), gtk_bitset_get_maximum (jset)); + g_assert_cmpint (max, >=, gtk_bitset_get_maximum (testset)); + + for (k = min; k <= max; k++) + { + g_assert_cmpint (gtk_bitset_contains (iset, k) || gtk_bitset_contains (jset, k), ==, gtk_bitset_contains (testset, k)); + } + + gtk_bitset_unref (testset); + gtk_bitset_unref (jset); + } + + gtk_bitset_unref (iset); + } +} + +static void +test_intersect (void) +{ + guint i, j, k, min, max; + GtkBitset *iset, *jset, *testset; + + for (i = 0; i < G_N_ELEMENTS (bitsets); i++) + { + iset = bitsets[i].create(); + + g_assert_true (gtk_bitset_equals (iset, iset)); + + for (j = 0; j < G_N_ELEMENTS (bitsets); j++) + { + jset = bitsets[j].create(); + + testset = gtk_bitset_copy (iset); + gtk_bitset_intersect (testset, jset); + + min = MIN (gtk_bitset_get_minimum (iset), gtk_bitset_get_minimum (jset)); + g_assert_cmpint (min, <=, gtk_bitset_get_minimum (testset)); + max = MAX (gtk_bitset_get_maximum (iset), gtk_bitset_get_maximum (jset)); + g_assert_cmpint (max, >=, gtk_bitset_get_maximum (testset)); + + for (k = min; k <= max; k++) + { + g_assert_cmpint (gtk_bitset_contains (iset, k) && gtk_bitset_contains (jset, k), ==, gtk_bitset_contains (testset, k)); + } + + gtk_bitset_unref (testset); + gtk_bitset_unref (jset); + } + + gtk_bitset_unref (iset); + } +} + +static void +test_difference (void) +{ + guint i, j, k, min, max; + GtkBitset *iset, *jset, *testset; + + for (i = 0; i < G_N_ELEMENTS (bitsets); i++) + { + iset = bitsets[i].create(); + + g_assert_true (gtk_bitset_equals (iset, iset)); + + for (j = 0; j < G_N_ELEMENTS (bitsets); j++) + { + jset = bitsets[j].create(); + + testset = gtk_bitset_copy (iset); + gtk_bitset_difference (testset, jset); + + min = MIN (gtk_bitset_get_minimum (iset), gtk_bitset_get_minimum (jset)); + g_assert_cmpint (min, <=, gtk_bitset_get_minimum (testset)); + max = MAX (gtk_bitset_get_maximum (iset), gtk_bitset_get_maximum (jset)); + g_assert_cmpint (max, >=, gtk_bitset_get_maximum (testset)); + + for (k = min; k <= max; k++) + { + g_assert_cmpint (gtk_bitset_contains (iset, k) ^ gtk_bitset_contains (jset, k), ==, gtk_bitset_contains (testset, k)); + } + + gtk_bitset_unref (testset); + gtk_bitset_unref (jset); + } + + gtk_bitset_unref (iset); + } +} + +static void +test_subtract (void) +{ + guint i, j, k, min, max; + GtkBitset *iset, *jset, *testset; + + for (i = 0; i < G_N_ELEMENTS (bitsets); i++) + { + iset = bitsets[i].create(); + + g_assert_true (gtk_bitset_equals (iset, iset)); + + for (j = 0; j < G_N_ELEMENTS (bitsets); j++) + { + jset = bitsets[j].create(); + + testset = gtk_bitset_copy (iset); + gtk_bitset_subtract (testset, jset); + + min = MIN (gtk_bitset_get_minimum (iset), gtk_bitset_get_minimum (jset)); + g_assert_cmpint (min, <=, gtk_bitset_get_minimum (testset)); + max = MAX (gtk_bitset_get_maximum (iset), gtk_bitset_get_maximum (jset)); + g_assert_cmpint (max, >=, gtk_bitset_get_maximum (testset)); + + for (k = min; k <= max; k++) + { + g_assert_cmpint (gtk_bitset_contains (iset, k) && !gtk_bitset_contains (jset, k), ==, gtk_bitset_contains (testset, k)); + } + + gtk_bitset_unref (testset); + gtk_bitset_unref (jset); + } + + gtk_bitset_unref (iset); + } +} + +int +main (int argc, char *argv[]) +{ + g_test_init (&argc, &argv, NULL); + setlocale (LC_ALL, "C"); + + g_test_add_func ("/bitset/is_empty", test_is_empty); + g_test_add_func ("/bitset/minimum", test_minimum); + g_test_add_func ("/bitset/maximum", test_maximum); + g_test_add_func ("/bitset/equals", test_equals); + g_test_add_func ("/bitset/union", test_union); + g_test_add_func ("/bitset/intersect", test_intersect); + g_test_add_func ("/bitset/difference", test_difference); + g_test_add_func ("/bitset/subtract", test_subtract); + + return g_test_run (); +} diff --git a/testsuite/gtk/defaultvalue.c b/testsuite/gtk/defaultvalue.c index 15c9047a93..34e6fb9f18 100644 --- a/testsuite/gtk/defaultvalue.c +++ b/testsuite/gtk/defaultvalue.c @@ -104,9 +104,6 @@ test_type (gconstpointer data) g_type_is_a (type, GTK_TYPE_SHORTCUT_ACTION)) return; - if (g_type_is_a (type, GTK_TYPE_PROPERTY_SELECTION)) - return; - klass = g_type_class_ref (type); if (g_type_is_a (type, GTK_TYPE_SETTINGS)) diff --git a/testsuite/gtk/meson.build b/testsuite/gtk/meson.build index 9859b11209..98b05f9871 100644 --- a/testsuite/gtk/meson.build +++ b/testsuite/gtk/meson.build @@ -12,6 +12,7 @@ tests = [ ['accessible'], ['action'], ['adjustment'], + ['bitset'], ['bitmask', ['../../gtk/gtkallocatedbitmask.c'], ['-DGTK_COMPILATION', '-UG_ENABLE_DEBUG']], ['builder', [], [], gtk_tests_export_dynamic_ldflag], ['builderparser'], diff --git a/testsuite/gtk/multiselection.c b/testsuite/gtk/multiselection.c index 3c59d307f3..6093c3dbf1 100644 --- a/testsuite/gtk/multiselection.c +++ b/testsuite/gtk/multiselection.c @@ -396,7 +396,7 @@ test_select_range (void) ret = gtk_selection_model_select_range (selection, 3, 2, FALSE); g_assert_true (ret); assert_selection (selection, "3 4 5"); - assert_selection_changes (selection, "3:2"); + assert_selection_changes (selection, "4:1"); ret = gtk_selection_model_select_range (selection, 0, 1, TRUE); g_assert_true (ret); @@ -408,7 +408,7 @@ test_select_range (void) } /* Test that removing and readding items - * clears the selected state. + * doesn't clear the selected state. */ static void test_readd (void) @@ -432,62 +432,19 @@ test_readd (void) g_list_model_items_changed (G_LIST_MODEL (store), 1, 3, 3); assert_changes (selection, "1-3+3"); - assert_selection (selection, ""); + assert_selection (selection, "3 4"); g_object_unref (store); g_object_unref (selection); } -typedef struct { - guint start; - guint n; - gboolean in; -} SelectionData; - static void -select_some (guint position, - guint *start, - guint *n, - gboolean *selected, - gpointer data) -{ - SelectionData *sdata = data; - guint i; - - for (i = 0; sdata[i].n != 0; i++) - { - if (sdata[i].start <= position && - position < sdata[i].start + sdata[i].n) - break; - } - - *start = sdata[i].start; - *n = sdata[i].n; - *selected = sdata[i].in; -} - -static void -test_callback (void) +test_set_selection (void) { GtkSelectionModel *selection; gboolean ret; GListStore *store; - SelectionData data[] = { - { 0, 2, FALSE }, - { 2, 3, TRUE }, - { 5, 2, FALSE }, - { 6, 3, TRUE }, - { 9, 1, FALSE }, - { 0, 0, FALSE } - }; - - SelectionData more_data[] = { - { 0, 3, FALSE }, - { 3, 1, TRUE }, - { 4, 3, FALSE }, - { 7, 1, TRUE }, - { 0, 0, FALSE } - }; + GtkBitset *selected, *mask; store = new_store (1, 10, 1); @@ -496,13 +453,26 @@ test_callback (void) assert_selection (selection, ""); assert_selection_changes (selection, ""); - ret = gtk_selection_model_select_callback (selection, FALSE, select_some, data); + selected = gtk_bitset_new_empty (); + gtk_bitset_add_range (selected, 2, 3); + gtk_bitset_add_range (selected, 6, 3); + mask = gtk_bitset_new_empty (); + gtk_bitset_add_range (mask, 0, 100); /* too big on purpose */ + ret = gtk_selection_model_set_selection (selection, selected, mask); g_assert_true (ret); + gtk_bitset_unref (selected); + gtk_bitset_unref (mask); assert_selection (selection, "3 4 5 7 8 9"); assert_selection_changes (selection, "2:7"); - ret = gtk_selection_model_unselect_callback (selection, select_some, more_data); + selected = gtk_bitset_new_empty (); + mask = gtk_bitset_new_empty (); + gtk_bitset_add (mask, 3); + gtk_bitset_add (mask, 7); + ret = gtk_selection_model_set_selection (selection, selected, mask); g_assert_true (ret); + gtk_bitset_unref (selected); + gtk_bitset_unref (mask); assert_selection (selection, "3 5 7 9"); assert_selection_changes (selection, "3:5"); @@ -528,7 +498,7 @@ main (int argc, char *argv[]) g_test_add_func ("/multiselection/selection", test_selection); g_test_add_func ("/multiselection/select-range", test_select_range); g_test_add_func ("/multiselection/readd", test_readd); - g_test_add_func ("/multiselection/callback", test_callback); + g_test_add_func ("/multiselection/set_selection", test_set_selection); return g_test_run (); } diff --git a/testsuite/gtk/notify.c b/testsuite/gtk/notify.c index 209685ed38..ddd806948d 100644 --- a/testsuite/gtk/notify.c +++ b/testsuite/gtk/notify.c @@ -425,10 +425,6 @@ test_type (gconstpointer data) g_type_is_a (type, GTK_TYPE_NAMED_ACTION)) return; - /* needs special item type in underlying model */ - if (g_type_is_a (type, GTK_TYPE_PROPERTY_SELECTION)) - return; - klass = g_type_class_ref (type); if (g_type_is_a (type, GTK_TYPE_SETTINGS)) diff --git a/testsuite/gtk/objects-finalize.c b/testsuite/gtk/objects-finalize.c index bd26c83c23..e46f404424 100644 --- a/testsuite/gtk/objects-finalize.c +++ b/testsuite/gtk/objects-finalize.c @@ -80,8 +80,7 @@ test_finalize_object (gconstpointer data) NULL); g_object_unref (list_store); } - else if (g_type_is_a (test_type, GTK_TYPE_LAYOUT_CHILD) || - g_type_is_a (test_type, GTK_TYPE_PROPERTY_SELECTION)) + else if (g_type_is_a (test_type, GTK_TYPE_LAYOUT_CHILD)) { char *msg = g_strdup_printf ("Skipping %s", g_type_name (test_type)); g_test_skip (msg); diff --git a/testsuite/gtk/singleselection.c b/testsuite/gtk/singleselection.c index 24de458e1c..81b96e90d0 100644 --- a/testsuite/gtk/singleselection.c +++ b/testsuite/gtk/singleselection.c @@ -593,28 +593,30 @@ test_persistence (void) } static void -check_query_range (GtkSelectionModel *selection) +check_get_selection (GtkSelectionModel *selection) { - guint i, j; - guint position, n_items; - gboolean selected; + GtkBitset *set; + guint i, n_items; - /* check that range always contains position, and has uniform selection */ - for (i = 0; i < g_list_model_get_n_items (G_LIST_MODEL (selection)); i++) - { - gtk_selection_model_query_range (selection, i, &position, &n_items, &selected); - g_assert_cmpint (position, <=, i); - g_assert_cmpint (i, <, position + n_items); - for (j = position; j < position + n_items; j++) - g_assert_true (selected == gtk_selection_model_is_selected (selection, j)); - } + set = gtk_selection_model_get_selection (selection); - /* check that out-of-range returns the correct invalid values */ - i = MIN (i, g_random_int ()); - gtk_selection_model_query_range (selection, i, &position, &n_items, &selected); - g_assert_cmpint (position, ==, i); - g_assert_cmpint (n_items, ==, 0); - g_assert_true (!selected); + n_items = g_list_model_get_n_items (G_LIST_MODEL (selection)); + if (n_items == 0) + { + g_assert_true (gtk_bitset_is_empty (set)); + } + else + { + for (i = 0; i < n_items; i++) + { + g_assert_cmpint (gtk_bitset_contains (set, i), ==, gtk_selection_model_is_selected (selection, i)); + } + + /* check that out-of-range has no bits set */ + g_assert_cmpint (gtk_bitset_get_maximum (set), <, g_list_model_get_n_items (G_LIST_MODEL (selection))); + } + + gtk_bitset_unref (set); } static void @@ -625,16 +627,16 @@ test_query_range (void) store = new_store (1, 5, 1); selection = new_model (store, TRUE, TRUE); - check_query_range (selection); + check_get_selection (selection); gtk_selection_model_unselect_item (selection, 0); - check_query_range (selection); + check_get_selection (selection); gtk_selection_model_select_item (selection, 2, TRUE); - check_query_range (selection); + check_get_selection (selection); gtk_selection_model_select_item (selection, 4, TRUE); - check_query_range (selection); + check_get_selection (selection); ignore_selection_changes (selection);