From 5c071cb02eedc18dae13ce8cb548efdcef0c541b Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Wed, 7 Aug 2024 18:07:35 +0200 Subject: [PATCH 01/22] dmabuf: Fix a check We were comparing with destination stride, not with source stride, and in rare cases when those were different, this would trigger aborts in the testsuite. --- gdk/gdkdmabuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gdk/gdkdmabuf.c b/gdk/gdkdmabuf.c index 748d0aa5d3..a054bfaf7d 100644 --- a/gdk/gdkdmabuf.c +++ b/gdk/gdkdmabuf.c @@ -71,7 +71,7 @@ download_memcpy (guchar *dst_data, bpp = gdk_memory_format_bytes_per_pixel (dst_format); src_stride = dmabuf->planes[0].stride; src_data = src_datas[0] + dmabuf->planes[0].offset; - g_return_if_fail (sizes[0] >= dmabuf->planes[0].offset + gdk_memory_format_min_buffer_size (dst_format, dst_stride, width, height)); + g_return_if_fail (sizes[0] >= dmabuf->planes[0].offset + gdk_memory_format_min_buffer_size (dst_format, src_stride, width, height)); if (dst_stride == src_stride) memcpy (dst_data, src_data, (height - 1) * dst_stride + width * bpp); From 88c9a30f7753a35576d85d237dc958b15108bdbd Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Wed, 7 Aug 2024 23:25:17 +0200 Subject: [PATCH 02/22] testsuite: Half the runtime of offscreens test It times out too much in CI. --- .../gsk/compare/lots-of-offscreens-nogl.node | 88 +++++++++---------- 1 file changed, 42 insertions(+), 46 deletions(-) diff --git a/testsuite/gsk/compare/lots-of-offscreens-nogl.node b/testsuite/gsk/compare/lots-of-offscreens-nogl.node index 013f84050b..0ba61a8ece 100644 --- a/testsuite/gsk/compare/lots-of-offscreens-nogl.node +++ b/testsuite/gsk/compare/lots-of-offscreens-nogl.node @@ -1,64 +1,60 @@ cross-fade { - start: cross-fade "foo15" { - start: cross-fade "foo14" { - start: cross-fade "foo13" { - start: cross-fade "foo12" { - start: cross-fade "foo11" { - start: cross-fade "foo10" { - start: cross-fade "foo9" { - start: cross-fade "foo8" { - start: cross-fade "foo7" { - start: cross-fade "foo6" { - start: cross-fade "foo5" { - start: cross-fade "foo4" { - start: cross-fade "foo3" { - start: cross-fade "foo2" { - start: color "foo1" { - bounds: 0 0 10 10; - color: red; - } - end: "foo1"; - progress: 0.4; + start: cross-fade "foo14" { + start: cross-fade "foo13" { + start: cross-fade "foo12" { + start: cross-fade "foo11" { + start: cross-fade "foo10" { + start: cross-fade "foo9" { + start: cross-fade "foo8" { + start: cross-fade "foo7" { + start: cross-fade "foo6" { + start: cross-fade "foo5" { + start: cross-fade "foo4" { + start: cross-fade "foo3" { + start: cross-fade "foo2" { + start: color "foo1" { + bounds: 0 0 10 10; + color: red; } - end: "foo2"; + end: "foo1"; progress: 0.4; } - end: "foo3"; - progress: 0.8; + end: "foo2"; + progress: 0.4; } - end: "foo4"; - progress: 0.6; + end: "foo3"; + progress: 0.8; } - end: "foo5"; - progress: 0.4; + end: "foo4"; + progress: 0.6; } - end: "foo6"; - progress: 0.2; + end: "foo5"; + progress: 0.4; } - end: "foo7"; - progress: 0.6; + end: "foo6"; + progress: 0.2; } - end: "foo8"; + end: "foo7"; progress: 0.6; } - end: "foo9"; - progress: 0.8; + end: "foo8"; + progress: 0.6; } - end: "foo10"; - progress: 0.6; + end: "foo9"; + progress: 0.8; } - end: "foo11"; - progress: 0.4; + end: "foo10"; + progress: 0.6; } - end: "foo12"; - progress: 0.6; + end: "foo11"; + progress: 0.4; } - end: "foo13"; - progress: 0.2; + end: "foo12"; + progress: 0.6; } - end: "foo14"; - progress: 0.4; + end: "foo13"; + progress: 0.2; } - end: "foo15"; - progress: 0.2; + end: "foo14"; + progress: 0.4; } From 97b51dc070e0fa12b1286d804d9bfd9f394fbbaa Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Wed, 7 Aug 2024 04:08:58 +0200 Subject: [PATCH 03/22] inspector: Print opaque rect of nodes Also change the way rectangles are printed by including the bottom right coordinate, too. I'm still not sure what the best way is, but at least I no longer get confused and it has the infos I want. --- gtk/inspector/recorder.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/gtk/inspector/recorder.c b/gtk/inspector/recorder.c index 71aa73391c..7846e9332e 100644 --- a/gtk/inspector/recorder.c +++ b/gtk/inspector/recorder.c @@ -1033,7 +1033,7 @@ static void populate_render_node_properties (GListStore *store, GskRenderNode *node) { - graphene_rect_t bounds; + graphene_rect_t bounds, opaque; g_list_store_remove_all (store); @@ -1042,11 +1042,25 @@ populate_render_node_properties (GListStore *store, add_text_row (store, "Type", "%s", node_type_name (gsk_render_node_get_node_type (node))); add_text_row (store, "Bounds", - "%.2f x %.2f + %.2f + %.2f", - bounds.size.width, - bounds.size.height, + "(%.2f, %.2f) to (%.2f, %.2f) - %.2f x %.2f", bounds.origin.x, - bounds.origin.y); + bounds.origin.y, + bounds.origin.x + bounds.size.width, + bounds.origin.y + bounds.size.height, + bounds.size.width, + bounds.size.height); + + if (gsk_render_node_get_opaque_rect (node, &opaque)) + add_text_row (store, "Opaque", + "(%.2f, %.2f) to (%.2f, %.2f) - %.2f x %.2f", + opaque.origin.x, + opaque.origin.y, + opaque.origin.x + opaque.size.width, + opaque.origin.y + opaque.size.height, + opaque.size.width, + opaque.size.height); + else + add_text_row (store, "Opaque", "no"); switch (gsk_render_node_get_node_type (node)) { From add5dec4a949636c63a4a9a943a8ba6068cdc96e Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Sat, 3 Aug 2024 13:57:38 +0200 Subject: [PATCH 04/22] rect: Add another utility function Add gsk_rect_to_cairo_shrink() to match gsk_rect_to_cairo_grow() --- gsk/gskrectprivate.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/gsk/gskrectprivate.h b/gsk/gskrectprivate.h index 37c198d388..e0e54f0d56 100644 --- a/gsk/gskrectprivate.h +++ b/gsk/gskrectprivate.h @@ -185,6 +185,16 @@ gsk_rect_to_cairo_grow (const graphene_rect_t *graphene, cairo->height = ceilf (graphene->origin.y + graphene->size.height) - cairo->y; } +static inline void +gsk_rect_to_cairo_shrink (const graphene_rect_t *graphene, + cairo_rectangle_int_t *cairo) +{ + cairo->x = ceilf (graphene->origin.x); + cairo->y = ceilf (graphene->origin.y); + cairo->width = floorf (graphene->origin.x + graphene->size.width) - cairo->x; + cairo->height = floorf (graphene->origin.y + graphene->size.height) - cairo->y; +} + static inline gboolean gsk_rect_equal (const graphene_rect_t *r1, const graphene_rect_t *r2) From dbeddd4417a77e6e9d907f855f7df1ff4335f6ad Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Sat, 3 Aug 2024 14:06:56 +0200 Subject: [PATCH 05/22] gpu: Always pass a clip region to render() This way, we can remove the code that checks for its existence in that function, making the code simpler. --- gsk/gpu/gskgpuframe.c | 28 +++++----------------------- gsk/gpu/gskgpurenderer.c | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/gsk/gpu/gskgpuframe.c b/gsk/gpu/gskgpuframe.c index 8ac2b6edad..75248b206f 100644 --- a/gsk/gpu/gskgpuframe.c +++ b/gsk/gpu/gskgpuframe.c @@ -639,35 +639,17 @@ gsk_gpu_frame_record (GskGpuFrame *self, { GskGpuFramePrivate *priv = gsk_gpu_frame_get_instance_private (self); GskRenderPassType pass_type = texture ? GSK_RENDER_PASS_EXPORT : GSK_RENDER_PASS_PRESENT; + int i; priv->timestamp = timestamp; gsk_gpu_cache_set_time (gsk_gpu_device_get_cache (priv->device), timestamp); - if (clip) + for (i = 0; i < cairo_region_num_rectangles (clip); i++) { - int i; + cairo_rectangle_int_t rect; - for (i = 0; i < cairo_region_num_rectangles (clip); i++) - { - cairo_rectangle_int_t rect; - - cairo_region_get_rectangle (clip, i, &rect); - gsk_gpu_node_processor_process (self, target, target_color_state, &rect, node, viewport, pass_type); - } - } - else - { - gsk_gpu_node_processor_process (self, - target, - target_color_state, - &(cairo_rectangle_int_t) { - 0, 0, - gsk_gpu_image_get_width (target), - gsk_gpu_image_get_height (target) - }, - node, - viewport, - pass_type); + cairo_region_get_rectangle (clip, i, &rect); + gsk_gpu_node_processor_process (self, target, target_color_state, &rect, node, viewport, pass_type); } if (texture) diff --git a/gsk/gpu/gskgpurenderer.c b/gsk/gpu/gskgpurenderer.c index 2e35bb5d2f..0b6b410248 100644 --- a/gsk/gpu/gskgpurenderer.c +++ b/gsk/gpu/gskgpurenderer.c @@ -264,6 +264,7 @@ gsk_gpu_renderer_fallback_render_texture (GskGpuRenderer *self, guchar *data; GdkTexture *texture; GdkTextureDownloader downloader; + cairo_region_t *clip_region; GskGpuFrame *frame; max_size = gsk_gpu_device_get_max_image_size (priv->device); @@ -304,12 +305,17 @@ gsk_gpu_renderer_fallback_render_texture (GskGpuRenderer *self, else color_state = GDK_COLOR_STATE_SRGB; + clip_region = cairo_region_create_rectangle (&(cairo_rectangle_int_t) { + 0, 0, + gsk_gpu_image_get_width (image), + gsk_gpu_image_get_height (image) + }); frame = gsk_gpu_renderer_create_frame (self); gsk_gpu_frame_render (frame, g_get_monotonic_time (), image, color_state, - NULL, + clip_region, root, &GRAPHENE_RECT_INIT (rounded_viewport->origin.x + x, rounded_viewport->origin.y + y, @@ -317,6 +323,7 @@ gsk_gpu_renderer_fallback_render_texture (GskGpuRenderer *self, image_height), &texture); g_object_unref (frame); + cairo_region_destroy (clip_region); g_assert (texture); gdk_texture_downloader_init (&downloader, texture); @@ -352,6 +359,7 @@ gsk_gpu_renderer_render_texture (GskRenderer *renderer, GdkTexture *texture; graphene_rect_t rounded_viewport; GdkColorState *color_state; + cairo_region_t *clip_region; gsk_gpu_device_maybe_gc (priv->device); @@ -376,16 +384,23 @@ gsk_gpu_renderer_render_texture (GskRenderer *renderer, frame = gsk_gpu_renderer_create_frame (self); + clip_region = cairo_region_create_rectangle (&(cairo_rectangle_int_t) { + 0, 0, + gsk_gpu_image_get_width (image), + gsk_gpu_image_get_height (image) + }); + texture = NULL; gsk_gpu_frame_render (frame, g_get_monotonic_time (), image, color_state, - NULL, + clip_region, root, &rounded_viewport, &texture); + cairo_region_destroy (clip_region); g_object_unref (frame); g_object_unref (image); From 1328c1409aac87abec767fbe25161452d09a6489 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Sat, 3 Aug 2024 15:18:02 +0200 Subject: [PATCH 06/22] gpu: Make the region argument transfer full I want to modify the region while using it, and everybody destroys it right after, so now there's no need to do a copy. --- gsk/gpu/gskgpuframe.c | 8 +++++--- gsk/gpu/gskgpuframeprivate.h | 2 +- gsk/gpu/gskgpurenderer.c | 4 ---- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/gsk/gpu/gskgpuframe.c b/gsk/gpu/gskgpuframe.c index 75248b206f..7c035521ef 100644 --- a/gsk/gpu/gskgpuframe.c +++ b/gsk/gpu/gskgpuframe.c @@ -632,7 +632,7 @@ gsk_gpu_frame_record (GskGpuFrame *self, gint64 timestamp, GskGpuImage *target, GdkColorState *target_color_state, - const cairo_region_t *clip, + cairo_region_t *clip, GskRenderNode *node, const graphene_rect_t *viewport, GdkTexture **texture) @@ -654,6 +654,8 @@ gsk_gpu_frame_record (GskGpuFrame *self, if (texture) gsk_gpu_download_op (self, target, TRUE, copy_texture, texture); + + cairo_region_destroy (clip); } static void @@ -692,7 +694,7 @@ gsk_gpu_frame_render (GskGpuFrame *self, gint64 timestamp, GskGpuImage *target, GdkColorState *target_color_state, - const cairo_region_t *region, + cairo_region_t *clip, GskRenderNode *node, const graphene_rect_t *viewport, GdkTexture **texture) @@ -701,7 +703,7 @@ gsk_gpu_frame_render (GskGpuFrame *self, gsk_gpu_frame_cleanup (self); - gsk_gpu_frame_record (self, timestamp, target, target_color_state, region, node, viewport, texture); + gsk_gpu_frame_record (self, timestamp, target, target_color_state, clip, node, viewport, texture); gsk_gpu_frame_submit (self, pass_type); } diff --git a/gsk/gpu/gskgpuframeprivate.h b/gsk/gpu/gskgpuframeprivate.h index 384977a4e1..8b993a0f21 100644 --- a/gsk/gpu/gskgpuframeprivate.h +++ b/gsk/gpu/gskgpuframeprivate.h @@ -102,7 +102,7 @@ void gsk_gpu_frame_render (GskGpuF gint64 timestamp, GskGpuImage *target, GdkColorState *target_color_state, - const cairo_region_t *region, + cairo_region_t *clip, GskRenderNode *node, const graphene_rect_t *viewport, GdkTexture **texture); diff --git a/gsk/gpu/gskgpurenderer.c b/gsk/gpu/gskgpurenderer.c index 0b6b410248..270d0168f9 100644 --- a/gsk/gpu/gskgpurenderer.c +++ b/gsk/gpu/gskgpurenderer.c @@ -323,7 +323,6 @@ gsk_gpu_renderer_fallback_render_texture (GskGpuRenderer *self, image_height), &texture); g_object_unref (frame); - cairo_region_destroy (clip_region); g_assert (texture); gdk_texture_downloader_init (&downloader, texture); @@ -400,7 +399,6 @@ gsk_gpu_renderer_render_texture (GskRenderer *renderer, &rounded_viewport, &texture); - cairo_region_destroy (clip_region); g_object_unref (frame); g_object_unref (image); @@ -461,8 +459,6 @@ gsk_gpu_renderer_render (GskRenderer *renderer, gsk_gpu_frame_end (frame, priv->context); gsk_gpu_device_queue_gc (priv->device); - - g_clear_pointer (&render_region, cairo_region_destroy); } static double From 292f54dd6008521aab2a66381eb3652c24bc7de0 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Sat, 3 Aug 2024 17:55:14 +0200 Subject: [PATCH 07/22] gpu: Split out render function Makes the code easier to understand --- gsk/gpu/gskgpunodeprocessor.c | 182 +++++++++++++++++++--------------- 1 file changed, 103 insertions(+), 79 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 1f29b36d30..9b38c1d2fb 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -3886,6 +3886,47 @@ gsk_gpu_get_node_as_image (GskGpuFrame *frame, } } +static void +gsk_gpu_node_processor_render (GskGpuFrame *frame, + GskGpuImage *target, + GdkColorState *ccs, + const cairo_rectangle_int_t *clip, + GskRenderNode *node, + const graphene_rect_t *viewport, + GskRenderPassType pass_type) +{ + GskGpuNodeProcessor self; + + gsk_gpu_node_processor_init (&self, + frame, + target, + ccs, + clip, + viewport); + + if (!gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_OCCLUSION_CULLING) || + !gsk_gpu_node_processor_add_first_node (&self, + target, + clip, + pass_type, + node)) + { + gsk_gpu_render_pass_begin_op (frame, + target, + clip, + GSK_VEC4_TRANSPARENT, + pass_type); + + gsk_gpu_node_processor_add_node (&self, node); + } + + gsk_gpu_render_pass_end_op (frame, + target, + pass_type); + + gsk_gpu_node_processor_finish (&self); +} + void gsk_gpu_node_processor_process (GskGpuFrame *frame, GskGpuImage *target, @@ -3897,6 +3938,16 @@ gsk_gpu_node_processor_process (GskGpuFrame *frame, { GskGpuNodeProcessor self; GdkColorState *ccs; + GskGpuImage *image; + graphene_rect_t clip_bounds, tex_rect; + + ccs = gdk_color_state_get_rendering_color_state (target_color_state); + + if (gdk_color_state_equal (ccs, target_color_state)) + { + gsk_gpu_node_processor_render (frame, target, ccs, clip, node, viewport, pass_type); + return; + } gsk_gpu_node_processor_init (&self, frame, @@ -3905,86 +3956,46 @@ gsk_gpu_node_processor_process (GskGpuFrame *frame, clip, viewport); - ccs = gdk_color_state_get_rendering_color_state (target_color_state); - - if (gdk_color_state_equal (ccs, target_color_state)) + /* Can't use gsk_gpu_node_processor_get_node_as_image () because of colorspaces */ + if (!gsk_gpu_node_processor_clip_node_bounds (&self, node, &clip_bounds)) { - if (!gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_OCCLUSION_CULLING) || - !gsk_gpu_node_processor_add_first_node (&self, - target, - clip, - pass_type, - node)) - { - gsk_gpu_render_pass_begin_op (frame, - target, - clip, - GSK_VEC4_TRANSPARENT, - pass_type); - - gsk_gpu_node_processor_add_node (&self, node); - } - - gsk_gpu_render_pass_end_op (frame, - target, - pass_type); + gsk_gpu_node_processor_finish (&self); + return; } - else + + rect_round_to_pixels (&clip_bounds, &self.scale, &self.offset, &clip_bounds); + image = gsk_gpu_get_node_as_image (self.frame, + ccs, + &clip_bounds, + &self.scale, + node, + &tex_rect); + if (image == NULL) { - GskGpuImage *image; - graphene_rect_t clip_bounds, tex_rect; + gsk_gpu_node_processor_finish (&self); + return; + } - /* Can't use gsk_gpu_node_processor_get_node_as_image () because of colorspaces */ - if (gsk_gpu_node_processor_clip_node_bounds (&self, node, &clip_bounds)) - { - rect_round_to_pixels (&clip_bounds, &self.scale, &self.offset, &clip_bounds); - image = gsk_gpu_get_node_as_image (self.frame, - ccs, - &clip_bounds, - &self.scale, - node, - &tex_rect); - } - else - image = NULL; + gsk_gpu_render_pass_begin_op (frame, + target, + clip, + NULL, + pass_type); - if (image != NULL) - { - gsk_gpu_render_pass_begin_op (frame, - target, - clip, - NULL, - pass_type); + self.blend = GSK_GPU_BLEND_NONE; + self.pending_globals |= GSK_GPU_GLOBAL_BLEND; + gsk_gpu_node_processor_sync_globals (&self, 0); - self.blend = GSK_GPU_BLEND_NONE; - self.pending_globals |= GSK_GPU_GLOBAL_BLEND; - gsk_gpu_node_processor_sync_globals (&self, 0); + if (!GDK_IS_DEFAULT_COLOR_STATE (target_color_state)) + { + const GdkCicp *cicp = gdk_color_state_get_cicp (target_color_state); - if (!GDK_IS_DEFAULT_COLOR_STATE (target_color_state)) - { - const GdkCicp *cicp = gdk_color_state_get_cicp (target_color_state); + g_assert (cicp != NULL); - g_assert (cicp != NULL); - - gsk_gpu_convert_to_cicp_op (self.frame, - gsk_gpu_clip_get_shader_clip (&self.clip, &self.offset, &node->bounds), - cicp, - gsk_gpu_color_states_create_cicp (self.ccs, TRUE, TRUE), - self.opacity, - FALSE, - &self.offset, - &(GskGpuShaderImage) { - image, - GSK_GPU_SAMPLER_DEFAULT, - &node->bounds, - &tex_rect - }); - } - else - { - gsk_gpu_convert_op (self.frame, + gsk_gpu_convert_to_cicp_op (self.frame, gsk_gpu_clip_get_shader_clip (&self.clip, &self.offset, &node->bounds), - gsk_gpu_node_processor_color_states_explicit (&self, ccs, TRUE), + cicp, + gsk_gpu_color_states_create_cicp (self.ccs, TRUE, TRUE), self.opacity, FALSE, &self.offset, @@ -3994,16 +4005,29 @@ gsk_gpu_node_processor_process (GskGpuFrame *frame, &node->bounds, &tex_rect }); - } - - gsk_gpu_render_pass_end_op (frame, - target, - pass_type); - - g_object_unref (image); - } + } + else + { + gsk_gpu_convert_op (self.frame, + gsk_gpu_clip_get_shader_clip (&self.clip, &self.offset, &node->bounds), + gsk_gpu_node_processor_color_states_explicit (&self, ccs, TRUE), + self.opacity, + FALSE, + &self.offset, + &(GskGpuShaderImage) { + image, + GSK_GPU_SAMPLER_DEFAULT, + &node->bounds, + &tex_rect + }); } + gsk_gpu_render_pass_end_op (frame, + target, + pass_type); + gsk_gpu_node_processor_finish (&self); + + g_object_unref (image); } From b637c3e201bedf9540220131c4df265debbbd8a3 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Sat, 3 Aug 2024 18:34:25 +0200 Subject: [PATCH 08/22] gpu: Pass the clip region even further down We are now handling the region inside the nodeprocessor. --- gsk/gpu/gskgpuframe.c | 11 +- gsk/gpu/gskgpunodeprocessor.c | 238 +++++++++++++++------------ gsk/gpu/gskgpunodeprocessorprivate.h | 2 +- 3 files changed, 131 insertions(+), 120 deletions(-) diff --git a/gsk/gpu/gskgpuframe.c b/gsk/gpu/gskgpuframe.c index 7c035521ef..90c789a005 100644 --- a/gsk/gpu/gskgpuframe.c +++ b/gsk/gpu/gskgpuframe.c @@ -639,23 +639,14 @@ gsk_gpu_frame_record (GskGpuFrame *self, { GskGpuFramePrivate *priv = gsk_gpu_frame_get_instance_private (self); GskRenderPassType pass_type = texture ? GSK_RENDER_PASS_EXPORT : GSK_RENDER_PASS_PRESENT; - int i; priv->timestamp = timestamp; gsk_gpu_cache_set_time (gsk_gpu_device_get_cache (priv->device), timestamp); - for (i = 0; i < cairo_region_num_rectangles (clip); i++) - { - cairo_rectangle_int_t rect; - - cairo_region_get_rectangle (clip, i, &rect); - gsk_gpu_node_processor_process (self, target, target_color_state, &rect, node, viewport, pass_type); - } + gsk_gpu_node_processor_process (self, target, target_color_state, clip, node, viewport, pass_type); if (texture) gsk_gpu_download_op (self, target, TRUE, copy_texture, texture); - - cairo_region_destroy (clip); } static void diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 9b38c1d2fb..81ed6b5cba 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -568,7 +568,7 @@ gsk_gpu_node_processor_create_offscreen (GskGpuFrame *frame, gsk_gpu_node_processor_process (frame, image, ccs, - &area, + cairo_region_create_rectangle (&area), node, viewport, GSK_RENDER_PASS_OFFSCREEN); @@ -3890,56 +3890,67 @@ static void gsk_gpu_node_processor_render (GskGpuFrame *frame, GskGpuImage *target, GdkColorState *ccs, - const cairo_rectangle_int_t *clip, + cairo_region_t *clip, GskRenderNode *node, const graphene_rect_t *viewport, GskRenderPassType pass_type) { GskGpuNodeProcessor self; + int i; - gsk_gpu_node_processor_init (&self, - frame, - target, - ccs, - clip, - viewport); - - if (!gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_OCCLUSION_CULLING) || - !gsk_gpu_node_processor_add_first_node (&self, - target, - clip, - pass_type, - node)) + for (i = 0; i < cairo_region_num_rectangles (clip); i++) { - gsk_gpu_render_pass_begin_op (frame, - target, - clip, - GSK_VEC4_TRANSPARENT, - pass_type); + cairo_rectangle_int_t rect; - gsk_gpu_node_processor_add_node (&self, node); + cairo_region_get_rectangle (clip, i, &rect); + + gsk_gpu_node_processor_init (&self, + frame, + target, + ccs, + &rect, + viewport); + + if (!gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_OCCLUSION_CULLING) || + !gsk_gpu_node_processor_add_first_node (&self, + target, + &rect, + pass_type, + node)) + { + gsk_gpu_render_pass_begin_op (frame, + target, + &rect, + GSK_VEC4_TRANSPARENT, + pass_type); + + gsk_gpu_node_processor_add_node (&self, node); + } + + gsk_gpu_render_pass_end_op (frame, + target, + pass_type); + + gsk_gpu_node_processor_finish (&self); } - gsk_gpu_render_pass_end_op (frame, - target, - pass_type); - - gsk_gpu_node_processor_finish (&self); + cairo_region_destroy (clip); } void -gsk_gpu_node_processor_process (GskGpuFrame *frame, - GskGpuImage *target, - GdkColorState *target_color_state, - const cairo_rectangle_int_t *clip, - GskRenderNode *node, - const graphene_rect_t *viewport, - GskRenderPassType pass_type) +gsk_gpu_node_processor_process (GskGpuFrame *frame, + GskGpuImage *target, + GdkColorState *target_color_state, + cairo_region_t *clip, + GskRenderNode *node, + const graphene_rect_t *viewport, + GskRenderPassType pass_type) { GskGpuNodeProcessor self; GdkColorState *ccs; GskGpuImage *image; graphene_rect_t clip_bounds, tex_rect; + int i; ccs = gdk_color_state_get_rendering_color_state (target_color_state); @@ -3949,85 +3960,94 @@ gsk_gpu_node_processor_process (GskGpuFrame *frame, return; } - gsk_gpu_node_processor_init (&self, - frame, - target, - target_color_state, - clip, - viewport); - - /* Can't use gsk_gpu_node_processor_get_node_as_image () because of colorspaces */ - if (!gsk_gpu_node_processor_clip_node_bounds (&self, node, &clip_bounds)) + for (i = 0; i < cairo_region_num_rectangles (clip); i++) { + cairo_rectangle_int_t rect; + + cairo_region_get_rectangle (clip, i, &rect); + + gsk_gpu_node_processor_init (&self, + frame, + target, + target_color_state, + &rect, + viewport); + + /* Can't use gsk_gpu_node_processor_get_node_as_image () because of colorspaces */ + if (!gsk_gpu_node_processor_clip_node_bounds (&self, node, &clip_bounds)) + { + gsk_gpu_node_processor_finish (&self); + return; + } + + rect_round_to_pixels (&clip_bounds, &self.scale, &self.offset, &clip_bounds); + image = gsk_gpu_get_node_as_image (self.frame, + ccs, + &clip_bounds, + &self.scale, + node, + &tex_rect); + if (image == NULL) + { + gsk_gpu_node_processor_finish (&self); + return; + } + + gsk_gpu_render_pass_begin_op (frame, + target, + &rect, + NULL, + pass_type); + + self.blend = GSK_GPU_BLEND_NONE; + self.pending_globals |= GSK_GPU_GLOBAL_BLEND; + gsk_gpu_node_processor_sync_globals (&self, 0); + + if (!GDK_IS_DEFAULT_COLOR_STATE (target_color_state)) + { + const GdkCicp *cicp = gdk_color_state_get_cicp (target_color_state); + + g_assert (cicp != NULL); + + gsk_gpu_convert_to_cicp_op (self.frame, + gsk_gpu_clip_get_shader_clip (&self.clip, &self.offset, &node->bounds), + cicp, + gsk_gpu_color_states_create_cicp (self.ccs, TRUE, TRUE), + self.opacity, + FALSE, + &self.offset, + &(GskGpuShaderImage) { + image, + GSK_GPU_SAMPLER_DEFAULT, + &node->bounds, + &tex_rect + }); + } + else + { + gsk_gpu_convert_op (self.frame, + gsk_gpu_clip_get_shader_clip (&self.clip, &self.offset, &node->bounds), + gsk_gpu_node_processor_color_states_explicit (&self, ccs, TRUE), + self.opacity, + FALSE, + &self.offset, + &(GskGpuShaderImage) { + image, + GSK_GPU_SAMPLER_DEFAULT, + &node->bounds, + &tex_rect + }); + } + + gsk_gpu_render_pass_end_op (frame, + target, + pass_type); + gsk_gpu_node_processor_finish (&self); - return; + + g_object_unref (image); } - rect_round_to_pixels (&clip_bounds, &self.scale, &self.offset, &clip_bounds); - image = gsk_gpu_get_node_as_image (self.frame, - ccs, - &clip_bounds, - &self.scale, - node, - &tex_rect); - if (image == NULL) - { - gsk_gpu_node_processor_finish (&self); - return; - } - - gsk_gpu_render_pass_begin_op (frame, - target, - clip, - NULL, - pass_type); - - self.blend = GSK_GPU_BLEND_NONE; - self.pending_globals |= GSK_GPU_GLOBAL_BLEND; - gsk_gpu_node_processor_sync_globals (&self, 0); - - if (!GDK_IS_DEFAULT_COLOR_STATE (target_color_state)) - { - const GdkCicp *cicp = gdk_color_state_get_cicp (target_color_state); - - g_assert (cicp != NULL); - - gsk_gpu_convert_to_cicp_op (self.frame, - gsk_gpu_clip_get_shader_clip (&self.clip, &self.offset, &node->bounds), - cicp, - gsk_gpu_color_states_create_cicp (self.ccs, TRUE, TRUE), - self.opacity, - FALSE, - &self.offset, - &(GskGpuShaderImage) { - image, - GSK_GPU_SAMPLER_DEFAULT, - &node->bounds, - &tex_rect - }); - } - else - { - gsk_gpu_convert_op (self.frame, - gsk_gpu_clip_get_shader_clip (&self.clip, &self.offset, &node->bounds), - gsk_gpu_node_processor_color_states_explicit (&self, ccs, TRUE), - self.opacity, - FALSE, - &self.offset, - &(GskGpuShaderImage) { - image, - GSK_GPU_SAMPLER_DEFAULT, - &node->bounds, - &tex_rect - }); - } - - gsk_gpu_render_pass_end_op (frame, - target, - pass_type); - - gsk_gpu_node_processor_finish (&self); - - g_object_unref (image); + cairo_region_destroy (clip); } diff --git a/gsk/gpu/gskgpunodeprocessorprivate.h b/gsk/gpu/gskgpunodeprocessorprivate.h index 56f43d60c7..26bde2f266 100644 --- a/gsk/gpu/gskgpunodeprocessorprivate.h +++ b/gsk/gpu/gskgpunodeprocessorprivate.h @@ -8,7 +8,7 @@ G_BEGIN_DECLS void gsk_gpu_node_processor_process (GskGpuFrame *frame, GskGpuImage *target, GdkColorState *target_color_state, - const cairo_rectangle_int_t *clip, + cairo_region_t *clip, GskRenderNode *node, const graphene_rect_t *viewport, GskRenderPassType pass_type); From 9a4d8453ed95c97938fedff964a91728a3ac2113 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Sat, 3 Aug 2024 18:40:23 +0200 Subject: [PATCH 09/22] gpu: Remove unused argument the clip is already available via node->scissor so no need to track that. --- gsk/gpu/gskgpunodeprocessor.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 81ed6b5cba..fd4d80379b 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -130,7 +130,6 @@ static void gsk_gpu_node_processor_add_node (GskGpuN GskRenderNode *node); static gboolean gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, GskGpuImage *target, - const cairo_rectangle_int_t *clip, GskRenderPassType pass_type, GskRenderNode *node); static GskGpuImage * gsk_gpu_get_node_as_image (GskGpuFrame *frame, @@ -998,13 +997,11 @@ gsk_gpu_node_processor_add_clip_node (GskGpuNodeProcessor *self, static gboolean gsk_gpu_node_processor_add_first_clip_node (GskGpuNodeProcessor *self, GskGpuImage *target, - const cairo_rectangle_int_t *clip, GskRenderPassType pass_type, GskRenderNode *node) { return gsk_gpu_node_processor_add_first_node (self, target, - clip, pass_type, gsk_clip_node_get_child (node)); } @@ -1131,7 +1128,6 @@ gsk_gpu_node_processor_add_rounded_clip_node (GskGpuNodeProcessor *self, static gboolean gsk_gpu_node_processor_add_first_rounded_clip_node (GskGpuNodeProcessor *self, GskGpuImage *target, - const cairo_rectangle_int_t *clip, GskRenderPassType pass_type, GskRenderNode *node) { @@ -1144,7 +1140,6 @@ gsk_gpu_node_processor_add_first_rounded_clip_node (GskGpuNodeProcessor return gsk_gpu_node_processor_add_first_node (self, target, - clip, pass_type, gsk_rounded_clip_node_get_child (node)); } @@ -1345,7 +1340,6 @@ gsk_gpu_node_processor_add_transform_node (GskGpuNodeProcessor *self, static gboolean gsk_gpu_node_processor_add_first_transform_node (GskGpuNodeProcessor *self, GskGpuImage *target, - const cairo_rectangle_int_t *clip, GskRenderPassType pass_type, GskRenderNode *node) { @@ -1368,7 +1362,6 @@ gsk_gpu_node_processor_add_first_transform_node (GskGpuNodeProcessor *se self->offset.y += dy; result = gsk_gpu_node_processor_add_first_node (self, target, - clip, pass_type, gsk_transform_node_get_child (node)); self->offset = old_offset; @@ -1391,7 +1384,6 @@ gsk_gpu_node_processor_add_first_transform_node (GskGpuNodeProcessor *se result = gsk_gpu_node_processor_add_first_node (self, target, - clip, pass_type, gsk_transform_node_get_child (node)); @@ -1434,7 +1426,6 @@ gsk_gpu_node_processor_add_first_transform_node (GskGpuNodeProcessor *se result = gsk_gpu_node_processor_add_first_node (self, target, - clip, pass_type, gsk_transform_node_get_child (node)); @@ -1600,7 +1591,6 @@ gsk_gpu_node_processor_add_color_node (GskGpuNodeProcessor *self, static gboolean gsk_gpu_node_processor_add_first_color_node (GskGpuNodeProcessor *self, GskGpuImage *target, - const cairo_rectangle_int_t *clip, GskRenderPassType pass_type, GskRenderNode *node) { @@ -1617,7 +1607,7 @@ gsk_gpu_node_processor_add_first_color_node (GskGpuNodeProcessor *self, gdk_color_to_float (gsk_color_node_get_color2 (node), self->ccs, clear_color); gsk_gpu_render_pass_begin_op (self->frame, target, - clip, + &self->scissor, clear_color, pass_type); @@ -3420,7 +3410,6 @@ gsk_gpu_node_processor_add_container_node (GskGpuNodeProcessor *self, static gboolean gsk_gpu_node_processor_add_first_container_node (GskGpuNodeProcessor *self, GskGpuImage *target, - const cairo_rectangle_int_t *clip, GskRenderPassType pass_type, GskRenderNode *node) { @@ -3434,7 +3423,6 @@ gsk_gpu_node_processor_add_first_container_node (GskGpuNodeProcessor *se { if (gsk_gpu_node_processor_add_first_node (self, target, - clip, pass_type, gsk_container_node_get_child (node, i))) break; @@ -3453,7 +3441,7 @@ gsk_gpu_node_processor_add_first_container_node (GskGpuNodeProcessor *se gsk_gpu_render_pass_begin_op (self->frame, target, - clip, + &self->scissor, NULL, pass_type); } @@ -3499,7 +3487,6 @@ static const struct GskRenderNode *node); gboolean (* process_first_node) (GskGpuNodeProcessor *self, GskGpuImage *target, - const cairo_rectangle_int_t *clip, GskRenderPassType pass_type, GskRenderNode *node); GskGpuImage * (* get_node_as_image) (GskGpuFrame *self, @@ -3787,7 +3774,6 @@ clip_covered_by_rect (const GskGpuClip *self, static gboolean gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, GskGpuImage *target, - const cairo_rectangle_int_t *clip, GskRenderPassType pass_type, GskRenderNode *node) { @@ -3809,7 +3795,7 @@ gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, } if (nodes_vtable[node_type].process_first_node) - return nodes_vtable[node_type].process_first_node (self, target, clip, pass_type, node); + return nodes_vtable[node_type].process_first_node (self, target, pass_type, node); /* fallback starts here */ @@ -3822,7 +3808,7 @@ gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, gsk_gpu_render_pass_begin_op (self->frame, target, - clip, + &self->scissor, NULL, pass_type); @@ -3914,7 +3900,6 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, if (!gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_OCCLUSION_CULLING) || !gsk_gpu_node_processor_add_first_node (&self, target, - &rect, pass_type, node)) { From 1abe9760ab55372cdb02c8c8c4fb8423a3c84bd4 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Sun, 4 Aug 2024 01:21:29 +0200 Subject: [PATCH 10/22] gpu: Change the way clip rectangles are processed Instead of just iterating over all the rectangles of the region, always draw the first rectangle of the region and subtract it when done. This sounds more complicated, but it will allow us to modify the rectangle in future commits. --- gsk/gpu/gskgpunodeprocessor.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index fd4d80379b..0f4acb30df 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -3882,13 +3882,12 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, GskRenderPassType pass_type) { GskGpuNodeProcessor self; - int i; - for (i = 0; i < cairo_region_num_rectangles (clip); i++) + while (cairo_region_num_rectangles (clip) > 0) { cairo_rectangle_int_t rect; - cairo_region_get_rectangle (clip, i, &rect); + cairo_region_get_rectangle (clip, 0, &rect); gsk_gpu_node_processor_init (&self, frame, @@ -3916,6 +3915,8 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, target, pass_type); + cairo_region_subtract_rectangle (clip, &self.scissor); + gsk_gpu_node_processor_finish (&self); } From 08fcba63d0d5232f61b3539abe5d2c811f33e54a Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Wed, 7 Aug 2024 05:46:49 +0200 Subject: [PATCH 11/22] gpu: Split out a function gsk_gpu_node_processor_rect_to_device() is a useful function to have, even if it has to return FALSE sometimes when there is no simple 1:1 mapping - ie when the modelview contains a rotation. --- gsk/gpu/gskgpunodeprocessor.c | 43 +++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 0f4acb30df..9466258899 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -405,11 +405,11 @@ extract_scale_from_transform (GskTransform *transform, } static gboolean -gsk_gpu_node_processor_rect_is_integer (GskGpuNodeProcessor *self, - const graphene_rect_t *rect, - cairo_rectangle_int_t *int_rect) +gsk_gpu_node_processor_rect_clip_to_device (GskGpuNodeProcessor *self, + const graphene_rect_t *src, + graphene_rect_t *dest) { - graphene_rect_t transformed_rect; + graphene_rect_t transformed; float scale_x = graphene_vec2_get_x (&self->scale); float scale_y = graphene_vec2_get_y (&self->scale); @@ -425,8 +425,8 @@ gsk_gpu_node_processor_rect_is_integer (GskGpuNodeProcessor *self, case GSK_FINE_TRANSFORM_CATEGORY_2D_NEGATIVE_AFFINE: case GSK_FINE_TRANSFORM_CATEGORY_2D_AFFINE: case GSK_FINE_TRANSFORM_CATEGORY_2D_TRANSLATE: - gsk_transform_transform_bounds (self->modelview, rect, &transformed_rect); - rect = &transformed_rect; + gsk_transform_transform_bounds (self->modelview, src, &transformed); + src = &transformed; break; case GSK_FINE_TRANSFORM_CATEGORY_IDENTITY: @@ -434,15 +434,30 @@ gsk_gpu_node_processor_rect_is_integer (GskGpuNodeProcessor *self, break; } - int_rect->x = rect->origin.x * scale_x; - int_rect->y = rect->origin.y * scale_y; - int_rect->width = rect->size.width * scale_x; - int_rect->height = rect->size.height * scale_y; + dest->origin.x = src->origin.x * scale_x; + dest->origin.y = src->origin.y * scale_y; + dest->size.width = src->size.width * scale_x; + dest->size.height = src->size.height * scale_y; - return int_rect->x == rect->origin.x * scale_x - && int_rect->y == rect->origin.y * scale_y - && int_rect->width == rect->size.width * scale_x - && int_rect->height == rect->size.height * scale_y; + return TRUE; +} + +static gboolean +gsk_gpu_node_processor_rect_is_integer (GskGpuNodeProcessor *self, + const graphene_rect_t *rect, + cairo_rectangle_int_t *int_rect) +{ + graphene_rect_t tmp; + + if (!gsk_gpu_node_processor_rect_clip_to_device (self, rect, &tmp)) + return FALSE; + + gsk_rect_to_cairo_shrink (&tmp, int_rect); + + return int_rect->x == tmp.origin.x + && int_rect->y == tmp.origin.y + && int_rect->width == tmp.size.width + && int_rect->height == tmp.size.height; } static void From 5976debfcd11023abe319fc9fbfe791388bee1e1 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Sun, 4 Aug 2024 06:04:45 +0200 Subject: [PATCH 12/22] gpu: Change how occlusion passes work Instead of requiring an occlusion pass to cover the whole given scissor rect, allow using a smaller rect to start the pass. When starting such a pass, we adjust the scissor rect to the size of that pass and do not grow it again until the pass is done. The rectangle subtraction at the end will then take care of subtraction that rectangle from the remaining pixels. To not end up with lots of tiny occlusion passes, add a limit for how small such a pass may be. For now that limit is arbitrarily chosen at 100k pixels. --- gsk/gpu/gskgpunodeprocessor.c | 224 ++++++++++++++++++++++++++-------- gsk/gskrectprivate.h | 2 + 2 files changed, 175 insertions(+), 51 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 9466258899..71482d32bc 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -54,6 +54,11 @@ */ #define EPSILON 0.001 +/* the amount of pixels for us to potentially save to warrant + * carving out a rectangle for an extra render pass + */ +#define MIN_PIXELS_FOR_OCCLUSION_PASS 1000 * 100 + /* A note about coordinate systems * * The rendering code keeps track of multiple coordinate systems to optimize rendering as @@ -442,6 +447,68 @@ gsk_gpu_node_processor_rect_clip_to_device (GskGpuNodeProcessor *self, return TRUE; } +static gboolean +gsk_gpu_node_processor_rect_device_to_clip (GskGpuNodeProcessor *self, + const graphene_rect_t *src, + graphene_rect_t *dest) +{ + graphene_rect_t transformed; + float scale_x = graphene_vec2_get_x (&self->scale); + float scale_y = graphene_vec2_get_y (&self->scale); + + switch (gsk_transform_get_fine_category (self->modelview)) + { + case GSK_FINE_TRANSFORM_CATEGORY_UNKNOWN: + case GSK_FINE_TRANSFORM_CATEGORY_ANY: + case GSK_FINE_TRANSFORM_CATEGORY_3D: + case GSK_FINE_TRANSFORM_CATEGORY_2D: + return FALSE; + + case GSK_FINE_TRANSFORM_CATEGORY_2D_DIHEDRAL: + case GSK_FINE_TRANSFORM_CATEGORY_2D_NEGATIVE_AFFINE: + case GSK_FINE_TRANSFORM_CATEGORY_2D_AFFINE: + case GSK_FINE_TRANSFORM_CATEGORY_2D_TRANSLATE: + { + GskTransform *inverse = gsk_transform_invert (gsk_transform_ref (self->modelview)); + gsk_transform_transform_bounds (inverse, src, &transformed); + gsk_transform_unref (inverse); + src = &transformed; + } + break; + + case GSK_FINE_TRANSFORM_CATEGORY_IDENTITY: + default: + break; + } + + dest->origin.x = src->origin.x / scale_x; + dest->origin.y = src->origin.y / scale_y; + dest->size.width = src->size.width / scale_x; + dest->size.height = src->size.height / scale_y; + + return TRUE; +} + +static gboolean +gsk_gpu_node_processor_rect_to_device_shrink (GskGpuNodeProcessor *self, + const graphene_rect_t *rect, + cairo_rectangle_int_t *int_rect) +{ + graphene_rect_t tmp; + + graphene_rect_offset_r (rect, + self->offset.x, + self->offset.y, + &tmp); + + if (!gsk_gpu_node_processor_rect_clip_to_device (self, &tmp, &tmp)) + return FALSE; + + gsk_rect_to_cairo_shrink (&tmp, int_rect); + + return int_rect->width > 0 && int_rect->height > 0; +} + static gboolean gsk_gpu_node_processor_rect_is_integer (GskGpuNodeProcessor *self, const graphene_rect_t *rect, @@ -1009,16 +1076,87 @@ gsk_gpu_node_processor_add_clip_node (GskGpuNodeProcessor *self, gsk_clip_node_get_clip (node)); } +/* + * gsk_gpu_node_processor_clip_first_node: + * @self: the nodeprocessor + * @opaque: an opaque rectangle to clip to + * + * Shrinks the clip during a first node determination to only cover + * the passed in opaque rect - or rather its intersection with the + * previous clip. + * + * This can fail if the resulting scissor rect would be too small to warrant + * an occlusion pass. + * + * Adjusts scissor rect and clip, when not starting a first node, + * you need to revert them. + * + * Returns: TRUE if the adjustment was successful. + **/ static gboolean -gsk_gpu_node_processor_add_first_clip_node (GskGpuNodeProcessor *self, - GskGpuImage *target, - GskRenderPassType pass_type, - GskRenderNode *node) +gsk_gpu_node_processor_clip_first_node (GskGpuNodeProcessor *self, + const graphene_rect_t *opaque) { - return gsk_gpu_node_processor_add_first_node (self, - target, - pass_type, - gsk_clip_node_get_child (node)); + cairo_rectangle_int_t device_clip; + graphene_rect_t rect; + + if (!gsk_gpu_node_processor_rect_to_device_shrink (self, opaque, &device_clip) || + !gdk_rectangle_intersect (&device_clip, &self->scissor, &device_clip) || + device_clip.width * device_clip.height < MIN_PIXELS_FOR_OCCLUSION_PASS) + return FALSE; + + self->scissor = device_clip; + + gsk_gpu_node_processor_rect_device_to_clip (self, + &GSK_RECT_INIT_CAIRO (&device_clip), + &rect); + gsk_gpu_clip_init_empty (&self->clip, &rect); + + return TRUE; +} + +static gboolean +gsk_gpu_node_processor_add_first_node_clipped (GskGpuNodeProcessor *self, + GskGpuImage *target, + GskRenderPassType pass_type, + const graphene_rect_t *clip, + GskRenderNode *node) +{ + GskGpuClip old_clip; + cairo_rectangle_int_t old_scissor; + + old_scissor = self->scissor; + gsk_gpu_clip_init_copy (&old_clip, &self->clip); + + if (!gsk_gpu_node_processor_clip_first_node (self, clip)) + return FALSE; + + if (gsk_gpu_node_processor_add_first_node (self, + target, + pass_type, + node)) + { + /* don't revert clip here, the add_first_node() adjusted it to a correct value */ + return TRUE; + } + + self->scissor = old_scissor; + gsk_gpu_clip_init_copy (&old_clip, &self->clip); + + return FALSE; +} + +static gboolean +gsk_gpu_node_processor_add_first_clip_node (GskGpuNodeProcessor *self, + GskGpuImage *target, + GskRenderPassType pass_type, + GskRenderNode *node) +{ + return gsk_gpu_node_processor_add_first_node_clipped (self, + target, + pass_type, + &node->bounds, + gsk_clip_node_get_child (node)); } static void @@ -1141,22 +1279,23 @@ gsk_gpu_node_processor_add_rounded_clip_node (GskGpuNodeProcessor *self, } static gboolean -gsk_gpu_node_processor_add_first_rounded_clip_node (GskGpuNodeProcessor *self, - GskGpuImage *target, - GskRenderPassType pass_type, - GskRenderNode *node) +gsk_gpu_node_processor_add_first_rounded_clip_node (GskGpuNodeProcessor *self, + GskGpuImage *target, + GskRenderPassType pass_type, + GskRenderNode *node) { - GskRoundedRect node_clip; + graphene_rect_t cover, clip; - node_clip = *gsk_rounded_clip_node_get_clip (node); - gsk_rounded_rect_offset (&node_clip, self->offset.x, self->offset.y); - if (!gsk_rounded_rect_contains_rect (&node_clip, &self->clip.rect.bounds)) - return FALSE; + gsk_gpu_node_processor_get_clip_bounds (self, &clip); + gsk_rounded_rect_get_largest_cover (gsk_rounded_clip_node_get_clip (node), + &clip, + &cover); - return gsk_gpu_node_processor_add_first_node (self, - target, - pass_type, - gsk_rounded_clip_node_get_child (node)); + return gsk_gpu_node_processor_add_first_node_clipped (self, + target, + pass_type, + &cover, + gsk_rounded_clip_node_get_child (node)); } static GskTransform * @@ -1604,19 +1743,17 @@ gsk_gpu_node_processor_add_color_node (GskGpuNodeProcessor *self, } static gboolean -gsk_gpu_node_processor_add_first_color_node (GskGpuNodeProcessor *self, - GskGpuImage *target, - GskRenderPassType pass_type, - GskRenderNode *node) +gsk_gpu_node_processor_add_first_color_node (GskGpuNodeProcessor *self, + GskGpuImage *target, + GskRenderPassType pass_type, + GskRenderNode *node) { - graphene_rect_t clip_bounds; float clear_color[4]; if (!node->fully_opaque) return FALSE; - gsk_gpu_node_processor_get_clip_bounds (self, &clip_bounds); - if (!gsk_rect_contains_rect (&node->bounds, &clip_bounds)) + if (!gsk_gpu_node_processor_clip_first_node (self, &node->bounds)) return FALSE; gdk_color_to_float (gsk_color_node_get_color2 (node), self->ccs, clear_color); @@ -3445,13 +3582,12 @@ gsk_gpu_node_processor_add_first_container_node (GskGpuNodeProcessor *se if (i < 0) { - graphene_rect_t opaque, clip_bounds; + graphene_rect_t opaque; if (!gsk_render_node_get_opaque_rect (node, &opaque)) return FALSE; - gsk_gpu_node_processor_get_clip_bounds (self, &clip_bounds); - if (!gsk_rect_contains_rect (&opaque, &clip_bounds)) + if (!gsk_gpu_node_processor_clip_first_node (self, &opaque)) return FALSE; gsk_gpu_render_pass_begin_op (self->frame, @@ -3774,18 +3910,6 @@ gsk_gpu_node_processor_add_node (GskGpuNodeProcessor *self, } } -static gboolean -clip_covered_by_rect (const GskGpuClip *self, - const graphene_point_t *offset, - const graphene_rect_t *rect) -{ - graphene_rect_t r = *rect; - r.origin.x += offset->x; - r.origin.y += offset->y; - - return gsk_rect_contains_rect (&r, &self->rect.bounds); -} - static gboolean gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, GskGpuImage *target, @@ -3793,13 +3917,16 @@ gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, GskRenderNode *node) { GskRenderNodeType node_type; - graphene_rect_t opaque, clip_bounds; + graphene_rect_t opaque; /* This catches the corner cases of empty nodes, so after this check * there's quaranteed to be at least 1 pixel that needs to be drawn */ if (node->bounds.size.width == 0 || node->bounds.size.height == 0 || - !clip_covered_by_rect (&self->clip, &self->offset, &node->bounds)) + !gsk_render_node_get_opaque_rect (node, &opaque)) + return FALSE; + + if (!gsk_gpu_clip_may_intersect_rect (&self->clip, &self->offset, &node->bounds)) return FALSE; node_type = gsk_render_node_get_node_type (node); @@ -3813,12 +3940,7 @@ gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, return nodes_vtable[node_type].process_first_node (self, target, pass_type, node); /* fallback starts here */ - - if (!gsk_render_node_get_opaque_rect (node, &opaque)) - return FALSE; - - gsk_gpu_node_processor_get_clip_bounds (self, &clip_bounds); - if (!gsk_rect_contains_rect (&opaque, &clip_bounds)) + if (!gsk_gpu_node_processor_clip_first_node (self, &opaque)) return FALSE; gsk_gpu_render_pass_begin_op (self->frame, diff --git a/gsk/gskrectprivate.h b/gsk/gskrectprivate.h index e0e54f0d56..aad5e8812b 100644 --- a/gsk/gskrectprivate.h +++ b/gsk/gskrectprivate.h @@ -5,6 +5,8 @@ #include #include +#define GSK_RECT_INIT_CAIRO(cairo_rect) GRAPHENE_RECT_INIT((cairo_rect)->x, (cairo_rect)->y, (cairo_rect)->width, (cairo_rect)->height) + static inline void gsk_rect_init (graphene_rect_t *r, float x, From 852ecf7c205a24e21d636acd233b8005d06b16cd Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Wed, 7 Aug 2024 19:26:23 +0200 Subject: [PATCH 13/22] gpu: Consult scissor for clip bounds When querying clip bounds, also check the scissor rect, because sometimes that one is tighter than the clip bounds, because the clip bounds need to track some larger rounded corners. Makes a few tests harder to break. --- gsk/gpu/gskgpunodeprocessor.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 71482d32bc..5a7f8635fb 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -531,14 +531,25 @@ static void gsk_gpu_node_processor_get_clip_bounds (GskGpuNodeProcessor *self, graphene_rect_t *out_bounds) { - graphene_rect_offset_r (&self->clip.rect.bounds, - - self->offset.x, - - self->offset.y, - out_bounds); - - /* FIXME: We could try the scissor rect here. - * But how often is that smaller than the clip bounds? - */ + graphene_rect_t scissor; + + if (gsk_gpu_node_processor_rect_device_to_clip (self, + &GSK_RECT_INIT_CAIRO (&self->scissor), + &scissor)) + { + if (!gsk_rect_intersection (&scissor, &self->clip.rect.bounds, out_bounds)) + { + g_warning ("Clipping is broken, everything is clipped, but we didn't early-exit.\n"); + *out_bounds = self->clip.rect.bounds; + } + } + else + { + *out_bounds = self->clip.rect.bounds; + } + + out_bounds->origin.x -= self->offset.x; + out_bounds->origin.y -= self->offset.y; } static gboolean G_GNUC_WARN_UNUSED_RESULT From 57e21683a6eccea95129d11e9d5b5bbff62abe6a Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Sun, 4 Aug 2024 07:03:37 +0200 Subject: [PATCH 14/22] gpu: Try largest clip rect first When trying to cull, try culling from the largest rectangle of the remaining draw region first. That region has the biggest chance of containing a large area to skip. As a side effect, we can stop trying to cull once the largest rectangle isn't big enough anymore to contain anything worth culling. --- gsk/gpu/gskgpunodeprocessor.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 5a7f8635fb..acd84400af 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -4030,12 +4030,24 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, GskRenderPassType pass_type) { GskGpuNodeProcessor self; + int i, n, best, best_size; + cairo_rectangle_int_t rect; - while (cairo_region_num_rectangles (clip) > 0) + while ((n = cairo_region_num_rectangles (clip)) > 0) { - cairo_rectangle_int_t rect; + best = -1; + best_size = 0; + for (i = 0; i < n; i++) + { + cairo_region_get_rectangle (clip, i, &rect); + if (rect.width * rect.height > best_size) + { + best = i; + best_size = rect.width * rect.height; + } + } - cairo_region_get_rectangle (clip, 0, &rect); + cairo_region_get_rectangle (clip, best, &rect); gsk_gpu_node_processor_init (&self, frame, @@ -4044,7 +4056,8 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, &rect, viewport); - if (!gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_OCCLUSION_CULLING) || + if (best_size < MIN_PIXELS_FOR_OCCLUSION_PASS || + !gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_OCCLUSION_CULLING) || !gsk_gpu_node_processor_add_first_node (&self, target, pass_type, From 30e5bfcbf0047c960491e580fecad7982346fff6 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Mon, 5 Aug 2024 03:58:32 +0200 Subject: [PATCH 15/22] gpu: Refactor culling function Split the loop into 2: One for the culling and one for later, once we've decided to not try culling anymore. --- gsk/gpu/gskgpunodeprocessor.c | 41 ++++++++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 5 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index acd84400af..ff3ba0e964 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -4032,8 +4032,12 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, GskGpuNodeProcessor self; int i, n, best, best_size; cairo_rectangle_int_t rect; + gboolean do_culling; - while ((n = cairo_region_num_rectangles (clip)) > 0) + do_culling = gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_OCCLUSION_CULLING); + + while (do_culling && + (n = cairo_region_num_rectangles (clip)) > 0) { best = -1; best_size = 0; @@ -4049,6 +4053,9 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, cairo_region_get_rectangle (clip, best, &rect); + if (best_size < MIN_PIXELS_FOR_OCCLUSION_PASS) + break; + gsk_gpu_node_processor_init (&self, frame, target, @@ -4056,9 +4063,7 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, &rect, viewport); - if (best_size < MIN_PIXELS_FOR_OCCLUSION_PASS || - !gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_OCCLUSION_CULLING) || - !gsk_gpu_node_processor_add_first_node (&self, + if (!gsk_gpu_node_processor_add_first_node (&self, target, pass_type, node)) @@ -4068,8 +4073,8 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, &rect, GSK_VEC4_TRANSPARENT, pass_type); - gsk_gpu_node_processor_add_node (&self, node); + do_culling = FALSE; } gsk_gpu_render_pass_end_op (frame, @@ -4081,6 +4086,32 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, gsk_gpu_node_processor_finish (&self); } + for (i = 0; i < cairo_region_num_rectangles (clip); i++) + { + cairo_region_get_rectangle (clip, i, &rect); + + gsk_gpu_node_processor_init (&self, + frame, + target, + ccs, + &rect, + viewport); + + gsk_gpu_render_pass_begin_op (frame, + target, + &rect, + GSK_VEC4_TRANSPARENT, + pass_type); + + gsk_gpu_node_processor_add_node (&self, node); + + gsk_gpu_render_pass_end_op (frame, + target, + pass_type); + + gsk_gpu_node_processor_finish (&self); + } + cairo_region_destroy (clip); } From b9d868b8eb909435e72e55d8b0391c584f2c7f7c Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Mon, 5 Aug 2024 04:41:39 +0200 Subject: [PATCH 16/22] gpu: Pass min occlusion size as argument That allows as to vary the number. We don't do that yet, but we could now. --- gsk/gpu/gskgpunodeprocessor.c | 59 +++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 20 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index ff3ba0e964..0758a36fa8 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -136,6 +136,7 @@ static void gsk_gpu_node_processor_add_node (GskGpuN static gboolean gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, GskGpuImage *target, GskRenderPassType pass_type, + gsize min_occlusion_pixels, GskRenderNode *node); static GskGpuImage * gsk_gpu_get_node_as_image (GskGpuFrame *frame, GdkColorState *ccs, @@ -1090,14 +1091,15 @@ gsk_gpu_node_processor_add_clip_node (GskGpuNodeProcessor *self, /* * gsk_gpu_node_processor_clip_first_node: * @self: the nodeprocessor + * @min_occlusion_pixels: Minimum size of resulting scissor rect * @opaque: an opaque rectangle to clip to * * Shrinks the clip during a first node determination to only cover * the passed in opaque rect - or rather its intersection with the * previous clip. * - * This can fail if the resulting scissor rect would be too small to warrant - * an occlusion pass. + * This can fail if the resulting scissor rect would be smaller than + * min_occlusion_pixels and not warrant an occlusion pass. * * Adjusts scissor rect and clip, when not starting a first node, * you need to revert them. @@ -1106,6 +1108,7 @@ gsk_gpu_node_processor_add_clip_node (GskGpuNodeProcessor *self, **/ static gboolean gsk_gpu_node_processor_clip_first_node (GskGpuNodeProcessor *self, + gsize min_occlusion_pixels, const graphene_rect_t *opaque) { cairo_rectangle_int_t device_clip; @@ -1113,7 +1116,7 @@ gsk_gpu_node_processor_clip_first_node (GskGpuNodeProcessor *self, if (!gsk_gpu_node_processor_rect_to_device_shrink (self, opaque, &device_clip) || !gdk_rectangle_intersect (&device_clip, &self->scissor, &device_clip) || - device_clip.width * device_clip.height < MIN_PIXELS_FOR_OCCLUSION_PASS) + device_clip.width * device_clip.height < min_occlusion_pixels) return FALSE; self->scissor = device_clip; @@ -1131,6 +1134,7 @@ gsk_gpu_node_processor_add_first_node_clipped (GskGpuNodeProcessor *self, GskGpuImage *target, GskRenderPassType pass_type, const graphene_rect_t *clip, + gsize min_occlusion_pixels, GskRenderNode *node) { GskGpuClip old_clip; @@ -1139,12 +1143,13 @@ gsk_gpu_node_processor_add_first_node_clipped (GskGpuNodeProcessor *self, old_scissor = self->scissor; gsk_gpu_clip_init_copy (&old_clip, &self->clip); - if (!gsk_gpu_node_processor_clip_first_node (self, clip)) + if (!gsk_gpu_node_processor_clip_first_node (self, min_occlusion_pixels, clip)) return FALSE; if (gsk_gpu_node_processor_add_first_node (self, target, pass_type, + min_occlusion_pixels, node)) { /* don't revert clip here, the add_first_node() adjusted it to a correct value */ @@ -1161,12 +1166,14 @@ static gboolean gsk_gpu_node_processor_add_first_clip_node (GskGpuNodeProcessor *self, GskGpuImage *target, GskRenderPassType pass_type, + gsize min_occlusion_pixels, GskRenderNode *node) { return gsk_gpu_node_processor_add_first_node_clipped (self, target, pass_type, &node->bounds, + min_occlusion_pixels, gsk_clip_node_get_child (node)); } @@ -1293,6 +1300,7 @@ static gboolean gsk_gpu_node_processor_add_first_rounded_clip_node (GskGpuNodeProcessor *self, GskGpuImage *target, GskRenderPassType pass_type, + gsize min_occlusion_pixels, GskRenderNode *node) { graphene_rect_t cover, clip; @@ -1306,6 +1314,7 @@ gsk_gpu_node_processor_add_first_rounded_clip_node (GskGpuNodeProcessor *self, target, pass_type, &cover, + min_occlusion_pixels, gsk_rounded_clip_node_get_child (node)); } @@ -1503,10 +1512,11 @@ gsk_gpu_node_processor_add_transform_node (GskGpuNodeProcessor *self, } static gboolean -gsk_gpu_node_processor_add_first_transform_node (GskGpuNodeProcessor *self, - GskGpuImage *target, - GskRenderPassType pass_type, - GskRenderNode *node) +gsk_gpu_node_processor_add_first_transform_node (GskGpuNodeProcessor *self, + GskGpuImage *target, + GskRenderPassType pass_type, + gsize min_occlusion_pixels, + GskRenderNode *node) { GskTransform *transform; float dx, dy, scale_x, scale_y; @@ -1528,6 +1538,7 @@ gsk_gpu_node_processor_add_first_transform_node (GskGpuNodeProcessor *se result = gsk_gpu_node_processor_add_first_node (self, target, pass_type, + min_occlusion_pixels, gsk_transform_node_get_child (node)); self->offset = old_offset; return result; @@ -1550,6 +1561,7 @@ gsk_gpu_node_processor_add_first_transform_node (GskGpuNodeProcessor *se result = gsk_gpu_node_processor_add_first_node (self, target, pass_type, + min_occlusion_pixels, gsk_transform_node_get_child (node)); self->offset = old_offset; @@ -1592,6 +1604,7 @@ gsk_gpu_node_processor_add_first_transform_node (GskGpuNodeProcessor *se result = gsk_gpu_node_processor_add_first_node (self, target, pass_type, + min_occlusion_pixels, gsk_transform_node_get_child (node)); self->offset = old_offset; @@ -1757,6 +1770,7 @@ static gboolean gsk_gpu_node_processor_add_first_color_node (GskGpuNodeProcessor *self, GskGpuImage *target, GskRenderPassType pass_type, + gsize min_occlusion_pixels, GskRenderNode *node) { float clear_color[4]; @@ -1764,7 +1778,7 @@ gsk_gpu_node_processor_add_first_color_node (GskGpuNodeProcessor *self, if (!node->fully_opaque) return FALSE; - if (!gsk_gpu_node_processor_clip_first_node (self, &node->bounds)) + if (!gsk_gpu_node_processor_clip_first_node (self, min_occlusion_pixels, &node->bounds)) return FALSE; gdk_color_to_float (gsk_color_node_get_color2 (node), self->ccs, clear_color); @@ -3571,10 +3585,11 @@ gsk_gpu_node_processor_add_container_node (GskGpuNodeProcessor *self, } static gboolean -gsk_gpu_node_processor_add_first_container_node (GskGpuNodeProcessor *self, - GskGpuImage *target, - GskRenderPassType pass_type, - GskRenderNode *node) +gsk_gpu_node_processor_add_first_container_node (GskGpuNodeProcessor *self, + GskGpuImage *target, + GskRenderPassType pass_type, + gsize min_occlusion_pixels, + GskRenderNode *node) { int i, n; @@ -3587,6 +3602,7 @@ gsk_gpu_node_processor_add_first_container_node (GskGpuNodeProcessor *se if (gsk_gpu_node_processor_add_first_node (self, target, pass_type, + min_occlusion_pixels, gsk_container_node_get_child (node, i))) break; } @@ -3598,7 +3614,7 @@ gsk_gpu_node_processor_add_first_container_node (GskGpuNodeProcessor *se if (!gsk_render_node_get_opaque_rect (node, &opaque)) return FALSE; - if (!gsk_gpu_node_processor_clip_first_node (self, &opaque)) + if (!gsk_gpu_node_processor_clip_first_node (self, min_occlusion_pixels, &opaque)) return FALSE; gsk_gpu_render_pass_begin_op (self->frame, @@ -3650,6 +3666,7 @@ static const struct gboolean (* process_first_node) (GskGpuNodeProcessor *self, GskGpuImage *target, GskRenderPassType pass_type, + gsize min_occlusion_pixels, GskRenderNode *node); GskGpuImage * (* get_node_as_image) (GskGpuFrame *self, GdkColorState *ccs, @@ -3922,10 +3939,11 @@ gsk_gpu_node_processor_add_node (GskGpuNodeProcessor *self, } static gboolean -gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, - GskGpuImage *target, - GskRenderPassType pass_type, - GskRenderNode *node) +gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, + GskGpuImage *target, + GskRenderPassType pass_type, + gsize min_occlusion_pixels, + GskRenderNode *node) { GskRenderNodeType node_type; graphene_rect_t opaque; @@ -3948,10 +3966,10 @@ gsk_gpu_node_processor_add_first_node (GskGpuNodeProcessor *self, } if (nodes_vtable[node_type].process_first_node) - return nodes_vtable[node_type].process_first_node (self, target, pass_type, node); + return nodes_vtable[node_type].process_first_node (self, target, pass_type, min_occlusion_pixels, node); /* fallback starts here */ - if (!gsk_gpu_node_processor_clip_first_node (self, &opaque)) + if (!gsk_gpu_node_processor_clip_first_node (self, min_occlusion_pixels, &opaque)) return FALSE; gsk_gpu_render_pass_begin_op (self->frame, @@ -4066,6 +4084,7 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, if (!gsk_gpu_node_processor_add_first_node (&self, target, pass_type, + MIN_PIXELS_FOR_OCCLUSION_PASS, node)) { gsk_gpu_render_pass_begin_op (frame, From ac37b589b6b82f77dd3cab98a85f91f0adc94495 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Mon, 5 Aug 2024 07:04:32 +0200 Subject: [PATCH 17/22] gpu: Require an occlusion path to be 10% of image That way we are guaranteed to run <=10 occlusion passes. --- gsk/gpu/gskgpunodeprocessor.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 0758a36fa8..a757850ff3 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -59,6 +59,11 @@ */ #define MIN_PIXELS_FOR_OCCLUSION_PASS 1000 * 100 +/* the amount of the whole image for us to potentially save to warrant + * carving out a rectangle for an extra render pass + */ +#define MIN_PERCENTAGE_FOR_OCCLUSION_PASS 10 + /* A note about coordinate systems * * The rendering code keeps track of multiple coordinate systems to optimize rendering as @@ -4049,10 +4054,14 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, { GskGpuNodeProcessor self; int i, n, best, best_size; + gsize min_occlusion_pixels; cairo_rectangle_int_t rect; gboolean do_culling; do_culling = gsk_gpu_frame_should_optimize (frame, GSK_GPU_OPTIMIZE_OCCLUSION_CULLING); + min_occlusion_pixels = gsk_gpu_image_get_width (target) * gsk_gpu_image_get_height (target) * + MIN_PERCENTAGE_FOR_OCCLUSION_PASS / 100; + min_occlusion_pixels = MAX (min_occlusion_pixels, MIN_PIXELS_FOR_OCCLUSION_PASS); while (do_culling && (n = cairo_region_num_rectangles (clip)) > 0) @@ -4084,7 +4093,7 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, if (!gsk_gpu_node_processor_add_first_node (&self, target, pass_type, - MIN_PIXELS_FOR_OCCLUSION_PASS, + min_occlusion_pixels, node)) { gsk_gpu_render_pass_begin_op (frame, From 55597d88a4d0cede6bf8ec5e270c6badff92b339 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Mon, 5 Aug 2024 07:11:54 +0200 Subject: [PATCH 18/22] gpu: Run full check for every clip rect Now that we can specify the min size for an occlusion pass, we can specify that we want the full clip rect to be occluded for occlusion to trigger. The benefit of this is that for partial redraws we almost always get the background color to cover the redrawn rectangle, so occlusion will kick in. --- gsk/gpu/gskgpunodeprocessor.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index a757850ff3..71813ac32e 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -4125,13 +4125,20 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, &rect, viewport); - gsk_gpu_render_pass_begin_op (frame, - target, - &rect, - GSK_VEC4_TRANSPARENT, - pass_type); + if (!gsk_gpu_node_processor_add_first_node (&self, + target, + pass_type, + rect.width * rect.height, + node)) + { + gsk_gpu_render_pass_begin_op (frame, + target, + &rect, + GSK_VEC4_TRANSPARENT, + pass_type); - gsk_gpu_node_processor_add_node (&self, node); + gsk_gpu_node_processor_add_node (&self, node); + } gsk_gpu_render_pass_end_op (frame, target, From afa4eb7d354301e21c827ab151970da2c661460b Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Mon, 5 Aug 2024 17:07:01 +0200 Subject: [PATCH 19/22] gpu: Make containers check opaque size for early exit Container nodes save their opaque region, so it's quick to access. Use that to check if the largest opaque region even qualifies for culling - and if not, just exit. Speeds up walking node trees by a lot. --- gsk/gpu/gskgpunodeprocessor.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 71813ac32e..3eb3e201b2 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -3596,12 +3596,17 @@ gsk_gpu_node_processor_add_first_container_node (GskGpuNodeProcessor *self, gsize min_occlusion_pixels, GskRenderNode *node) { + graphene_rect_t opaque; int i, n; n = gsk_container_node_get_n_children (node); if (n == 0) return FALSE; + if (!gsk_render_node_get_opaque_rect (node, &opaque) || + !gsk_gpu_node_processor_clip_first_node (self, min_occlusion_pixels, &opaque)) + return FALSE; + for (i = n; i-->0; ) { if (gsk_gpu_node_processor_add_first_node (self, @@ -3614,14 +3619,6 @@ gsk_gpu_node_processor_add_first_container_node (GskGpuNodeProcessor *self, if (i < 0) { - graphene_rect_t opaque; - - if (!gsk_render_node_get_opaque_rect (node, &opaque)) - return FALSE; - - if (!gsk_gpu_node_processor_clip_first_node (self, min_occlusion_pixels, &opaque)) - return FALSE; - gsk_gpu_render_pass_begin_op (self->frame, target, &self->scissor, From e9944148d5bc4789b5db1d60d9f5f4780849e5b5 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Mon, 5 Aug 2024 22:11:22 +0200 Subject: [PATCH 20/22] gpu: Add GSK_DEBUG=occlusion Draws a semi-transparent white overlay over all regions that have been chosen for occlusion. --- gsk/gpu/gskgpunodeprocessor.c | 11 +++++++++++ gsk/gskdebug.c | 1 + gsk/gskdebugprivate.h | 1 + 3 files changed, 13 insertions(+) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 3eb3e201b2..646b9ec819 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -4101,6 +4101,17 @@ gsk_gpu_node_processor_render (GskGpuFrame *frame, gsk_gpu_node_processor_add_node (&self, node); do_culling = FALSE; } + else if (GSK_DEBUG_CHECK (OCCLUSION)) + { + gsk_gpu_node_processor_sync_globals (&self, 0); + gsk_gpu_color_op (self.frame, + GSK_GPU_SHADER_CLIP_NONE, + self.ccs, + 1.0, + &self.offset, + &GRAPHENE_RECT_INIT(0, 0, 10000, 10000), + &GDK_COLOR_SRGB (1.0, 1.0, 1.0, 0.6)); + } gsk_gpu_render_pass_end_op (frame, target, diff --git a/gsk/gskdebug.c b/gsk/gskdebug.c index 8db5090799..22070ec358 100644 --- a/gsk/gskdebug.c +++ b/gsk/gskdebug.c @@ -15,6 +15,7 @@ static const GdkDebugKey gsk_debug_keys[] = { { "staging", GSK_DEBUG_STAGING, "Use a staging image for texture upload (Vulkan only)" }, { "offload-disable", GSK_DEBUG_OFFLOAD_DISABLE, "Disable graphics offload" }, { "cairo", GSK_DEBUG_CAIRO, "Overlay error pattern over Cairo drawing (finds fallbacks)" }, + { "occlusion", GSK_DEBUG_OCCLUSION, "Overlay highlight over areas optimized via occlusion culling" }, }; static guint gsk_debug_flags; diff --git a/gsk/gskdebugprivate.h b/gsk/gskdebugprivate.h index d7b71c9df5..bbf2910998 100644 --- a/gsk/gskdebugprivate.h +++ b/gsk/gskdebugprivate.h @@ -18,6 +18,7 @@ typedef enum { GSK_DEBUG_STAGING = 1 << 10, GSK_DEBUG_OFFLOAD_DISABLE = 1 << 11, GSK_DEBUG_CAIRO = 1 << 12, + GSK_DEBUG_OCCLUSION = 1 << 13, } GskDebugFlags; #define GSK_DEBUG_ANY ((1 << 13) - 1) From 82aa2cb5c2019943d1b91e0ee43c8e12a82d9624 Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Wed, 7 Aug 2024 06:53:36 +0200 Subject: [PATCH 21/22] gpu: Implement add_first_node for debug nodes As always, we want to have no influence on any results from these nodes. --- gsk/gpu/gskgpunodeprocessor.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 646b9ec819..713818c13e 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -3639,6 +3639,20 @@ gsk_gpu_node_processor_add_debug_node (GskGpuNodeProcessor *self, gsk_gpu_node_processor_add_node (self, gsk_debug_node_get_child (node)); } +static gboolean +gsk_gpu_node_processor_add_first_debug_node (GskGpuNodeProcessor *self, + GskGpuImage *target, + GskRenderPassType pass_type, + gsize min_occlusion_pixels, + GskRenderNode *node) +{ + return gsk_gpu_node_processor_add_first_node (self, + target, + pass_type, + min_occlusion_pixels, + gsk_debug_node_get_child (node)); +} + static GskGpuImage * gsk_gpu_get_debug_node_as_image (GskGpuFrame *frame, GdkColorState *ccs, @@ -3849,7 +3863,7 @@ static const struct GSK_GPU_GLOBAL_MATRIX | GSK_GPU_GLOBAL_SCALE | GSK_GPU_GLOBAL_CLIP | GSK_GPU_GLOBAL_SCISSOR | GSK_GPU_GLOBAL_BLEND, GSK_GPU_HANDLE_OPACITY, gsk_gpu_node_processor_add_debug_node, - NULL, + gsk_gpu_node_processor_add_first_debug_node, gsk_gpu_get_debug_node_as_image, }, [GSK_GL_SHADER_NODE] = { From 4e4ed1e2d56728557e3f79ca1b50ee1d33fd80ed Mon Sep 17 00:00:00 2001 From: Benjamin Otte Date: Wed, 7 Aug 2024 07:04:56 +0200 Subject: [PATCH 22/22] gpu: Implement occlusion for subsurface nodes In the case of no offloading, we want to pass through to the child (which is likely a big texture doing occlusion). In the case of punching a hole, we want to punch the hole and not draw anything behind it, so we start an occlusion pass with transparency. And in the final case with offloading active, we don't draw anything, so we don't draw anything. This should fix concerns about drawing the background behind the video as mentioned for example in https://github.com/Rafostar/clapper/issues/343#issuecomment-1445425004 --- gsk/gpu/gskgpunodeprocessor.c | 38 ++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/gsk/gpu/gskgpunodeprocessor.c b/gsk/gpu/gskgpunodeprocessor.c index 713818c13e..0169ef0fe3 100644 --- a/gsk/gpu/gskgpunodeprocessor.c +++ b/gsk/gpu/gskgpunodeprocessor.c @@ -3548,6 +3548,42 @@ gsk_gpu_node_processor_add_subsurface_node (GskGpuNodeProcessor *self, } } +static gboolean +gsk_gpu_node_processor_add_first_subsurface_node (GskGpuNodeProcessor *self, + GskGpuImage *target, + GskRenderPassType pass_type, + gsize min_occlusion_pixels, + GskRenderNode *node) +{ + GdkSubsurface *subsurface; + + subsurface = gsk_subsurface_node_get_subsurface (node); + if (subsurface == NULL || + gdk_subsurface_get_texture (subsurface) == NULL || + gdk_subsurface_get_parent (subsurface) != gdk_draw_context_get_surface (gsk_gpu_frame_get_context (self->frame))) + { + return gsk_gpu_node_processor_add_first_node (self, + target, + pass_type, + min_occlusion_pixels, + gsk_subsurface_node_get_child (node)); + } + + if (gdk_subsurface_is_above_parent (subsurface)) + return FALSE; + + if (!gsk_gpu_node_processor_clip_first_node (self, min_occlusion_pixels, &node->bounds)) + return FALSE; + + gsk_gpu_render_pass_begin_op (self->frame, + target, + &self->scissor, + GSK_VEC4_TRANSPARENT, + pass_type); + + return TRUE; +} + static GskGpuImage * gsk_gpu_get_subsurface_node_as_image (GskGpuFrame *frame, GdkColorState *ccs, @@ -3905,7 +3941,7 @@ static const struct GSK_GPU_GLOBAL_MATRIX | GSK_GPU_GLOBAL_SCALE | GSK_GPU_GLOBAL_CLIP | GSK_GPU_GLOBAL_SCISSOR | GSK_GPU_GLOBAL_BLEND, GSK_GPU_HANDLE_OPACITY, gsk_gpu_node_processor_add_subsurface_node, - NULL, + gsk_gpu_node_processor_add_first_subsurface_node, gsk_gpu_get_subsurface_node_as_image, }, };