From a01f310150a8e4d727e2b0260490cb88c089c41e Mon Sep 17 00:00:00 2001 From: Karl-Aksel Puulmann Date: Tue, 26 Nov 2024 12:48:54 +0200 Subject: [PATCH] Perf: Improve performance of top stats query with long time windows We usually don't read events/pageviews from sessions table as sessions can start before or after the time window of the query. This is particularly pronounced with realtime queries. After this change we start reading events/pageviews from sessions table for longer time periods as the inaccuracies introduced by the session timing is minimized this way. The end result is that queries for e.g. top stats on dashboard load for 7 days or longer periods only requires reading sessions table. --- CHANGELOG.md | 3 ++ lib/plausible/stats/table_decider.ex | 23 ++++++++--- test/plausible/stats/table_decider_test.exs | 42 ++++++++++++++++++--- 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc0f3056f8bb..c1b76fc66df8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ All notable changes to this project will be documented in this file. ### Removed ### Changed + +- Improved performance of top stats query with longer time windows + ### Fixed - Fix returning filter suggestions for multiple custom property values in the dashboard Filter modal diff --git a/lib/plausible/stats/table_decider.ex b/lib/plausible/stats/table_decider.ex index 6820f7c607fd..f3916ca80c2d 100644 --- a/lib/plausible/stats/table_decider.ex +++ b/lib/plausible/stats/table_decider.ex @@ -62,10 +62,6 @@ defmodule Plausible.Stats.TableDecider do end end - # Note: This is inaccurate when filtering but required for old backwards compatibility - defp metric_partitioner(%Query{legacy_breakdown: true}, :pageviews), do: :either - defp metric_partitioner(%Query{legacy_breakdown: true}, :events), do: :either - defp metric_partitioner(_, :conversion_rate), do: :either defp metric_partitioner(_, :group_conversion_rate), do: :either defp metric_partitioner(_, :visitors), do: :either @@ -75,12 +71,24 @@ defmodule Plausible.Stats.TableDecider do defp metric_partitioner(_, :average_revenue), do: :event defp metric_partitioner(_, :total_revenue), do: :event defp metric_partitioner(_, :scroll_depth), do: :event - defp metric_partitioner(_, :pageviews), do: :event - defp metric_partitioner(_, :events), do: :event defp metric_partitioner(_, :bounce_rate), do: :session defp metric_partitioner(_, :visit_duration), do: :session defp metric_partitioner(_, :views_per_visit), do: :session + defp metric_partitioner(query, metric) when metric in [:pageviews, :events] do + query_days = DateTime.diff(query.utc_time_range.last, query.utc_time_range.first, :day) + + cond do + # Note: This is inaccurate when filtering but required for old backwards compatibility + legacy_breakdown?(query) -> :either + # When querying large time window aggregates, session-level event/pageview counts + # are accurate enough: the amount of sessions starting/ending outside of time window + # doesn't matter as much. + query_days > 5 and empty?(query.filters) and empty?(query.dimensions) -> :either + true -> :event + end + end + # Calculated metrics - handled on callsite separately from other metrics. defp metric_partitioner(_, :time_on_page), do: :other defp metric_partitioner(_, :total_visitors), do: :other @@ -97,6 +105,9 @@ defmodule Plausible.Stats.TableDecider do defp dimension_partitioner(_, _), do: :either + defp legacy_breakdown?(%Query{} = query), do: query.legacy_breakdown + defp legacy_breakdown?(query) when is_map(query), do: query[:legacy_breakdown] + @default %{event: [], session: [], either: [], other: [], sample_percent: []} defp partition(values, query, partitioner) do Enum.reduce(values, @default, fn value, acc -> diff --git a/test/plausible/stats/table_decider_test.exs b/test/plausible/stats/table_decider_test.exs index 479855489787..2e7e031caf3d 100644 --- a/test/plausible/stats/table_decider_test.exs +++ b/test/plausible/stats/table_decider_test.exs @@ -89,7 +89,8 @@ defmodule Plausible.Stats.TableDeciderTest do assert partition_metrics([:total_revenue, :visitors], query) == {[:total_revenue, :visitors], [], []} - assert partition_metrics([:pageviews, :visits], query) == {[:pageviews, :visits], [], []} + assert partition_metrics([:total_revenue, :visits], query) == + {[:total_revenue, :visits], [], []} end test "metrics that can be calculated on either when session-only metrics" do @@ -150,13 +151,42 @@ defmodule Plausible.Stats.TableDeciderTest do assert partition_metrics([:visitors], make_query([], ["visit:exit_page"])) == {[], [:visitors], []} end + + test "pageviews and events metrics are event-only when short time window" do + query = make_query([], [], %{"period" => "day"}) + + assert partition_metrics([:pageviews, :events, :bounce_rate], query) == + {[:pageviews, :events], [:bounce_rate], []} + end + + test "pageviews and events metrics can be calculated via either table with longer time windows" do + query = make_query([], [], %{"period" => "7d"}) + + assert partition_metrics([:pageviews, :events, :bounce_rate], query) == + {[], [:bounce_rate, :pageviews, :events], []} + end + + test "pageviews and events metrics are event-only when filters or dimensions" do + query1 = make_query(["visit:exit_page"], [], %{"period" => "7d"}) + query2 = make_query([], ["visit:exit_page"], %{"period" => "7d"}) + + assert partition_metrics([:pageviews, :events, :bounce_rate], query1) == + {[:pageviews, :events], [:bounce_rate], []} + + assert partition_metrics([:pageviews, :events, :bounce_rate], query2) == + {[:pageviews, :events], [:bounce_rate], []} + end end - defp make_query(filter_keys, dimensions \\ []) do - Query.from(build(:site), %{ - "filters" => Enum.map(filter_keys, fn filter_key -> ["is", filter_key, []] end), - "dimensions" => dimensions - }) + defp make_query(filter_keys, dimensions \\ [], extra \\ %{}) do + params = + %{ + "filters" => Enum.map(filter_keys, fn filter_key -> ["is", filter_key, []] end), + "dimensions" => dimensions + } + |> Map.merge(extra) + + Query.from(build(:site), params) end defp make_query_full_filters(filters) do