Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Use UDF everywhere #29216

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
368 changes: 174 additions & 194 deletions ee/clickhouse/views/test/funnel/__snapshots__/test_clickhouse_funnel.ambr

Large diffs are not rendered by default.

385 changes: 180 additions & 205 deletions posthog/api/test/__snapshots__/test_insight.ambr

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from posthog.hogql_queries.insights.funnels.utils import (
funnel_window_interval_unit_to_sql,
get_funnel_actor_class,
use_udf,
)
from posthog.hogql_queries.query_runner import QueryRunner
from posthog.models import Team
Expand Down Expand Up @@ -137,9 +136,7 @@ def __init__(
self.context.actorsQuery = self.actors_query

# Used for generating the funnel persons cte
funnel_order_actor_class = get_funnel_actor_class(
self.context.funnelsFilter, use_udf(self.context.funnelsFilter, self.team)
)(context=self.context)
funnel_order_actor_class = get_funnel_actor_class(self.context.funnelsFilter, True)(context=self.context)
assert isinstance(
funnel_order_actor_class, FunnelActors | FunnelStrictActors | FunnelUnorderedActors | FunnelUDF
) # for typings
Expand Down
16 changes: 5 additions & 11 deletions posthog/hogql_queries/insights/funnels/funnels_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from posthog.hogql_queries.insights.funnels.funnel_time_to_convert import FunnelTimeToConvert
from posthog.hogql_queries.insights.funnels.funnel_trends import FunnelTrends
from posthog.hogql_queries.insights.funnels.funnel_trends_udf import FunnelTrendsUDF
from posthog.hogql_queries.insights.funnels.utils import get_funnel_actor_class, get_funnel_order_class, use_udf
from posthog.hogql_queries.insights.funnels.utils import get_funnel_actor_class, get_funnel_order_class
from posthog.hogql_queries.query_runner import QueryRunner
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models import Team
Expand Down Expand Up @@ -105,17 +105,11 @@ def calculate(self):
if response.timings is not None:
timings.extend(response.timings)

return FunnelsQueryResponse(
isUdf=self._use_udf, results=results, timings=timings, hogql=hogql, modifiers=self.modifiers
)

@cached_property
def _use_udf(self):
return use_udf(self.context.funnelsFilter, self.team)
return FunnelsQueryResponse(isUdf=True, results=results, timings=timings, hogql=hogql, modifiers=self.modifiers)

@cached_property
def funnel_order_class(self):
return get_funnel_order_class(self.context.funnelsFilter, use_udf=self._use_udf)(context=self.context)
return get_funnel_order_class(self.context.funnelsFilter, use_udf=True)(context=self.context)

@cached_property
def funnel_class(self):
Expand All @@ -124,7 +118,7 @@ def funnel_class(self):
if funnelVizType == FunnelVizType.TRENDS:
return (
FunnelTrendsUDF(context=self.context, **self.kwargs)
if self._use_udf and self.context.funnelsFilter.funnelOrderType != StepOrderValue.UNORDERED
if self.context.funnelsFilter.funnelOrderType != StepOrderValue.UNORDERED
else FunnelTrends(context=self.context, **self.kwargs)
)
elif funnelVizType == FunnelVizType.TIME_TO_CONVERT:
Expand All @@ -134,7 +128,7 @@ def funnel_class(self):

@cached_property
def funnel_actor_class(self):
return get_funnel_actor_class(self.context.funnelsFilter, self._use_udf)(context=self.context)
return get_funnel_actor_class(self.context.funnelsFilter, True)(context=self.context)

@cached_property
def query_date_range(self):
Expand Down
2,874 changes: 1,048 additions & 1,826 deletions posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from posthog.models.cohort.cohort import Cohort
from posthog.models.filters import Filter
from posthog.queries.funnels.funnel_trends_persons import ClickhouseFunnelTrendsActors
from posthog.schema import FunnelsQuery, FunnelsQueryResponse
from posthog.schema import FunnelsQuery
from posthog.test.base import (
APIBaseTest,
ClickhouseTestMixin,
Expand Down Expand Up @@ -2540,27 +2540,3 @@ def test_breakdown_with_attribution_2(self):
assert [0, 1] == [x["reached_to_step_count"] for x in results if x["breakdown_value"] == ["Chrome"]]
assert [1, 1] == [x["reached_from_step_count"] for x in results if x["breakdown_value"] == ["Safari"]]
assert [1, 0] == [x["reached_to_step_count"] for x in results if x["breakdown_value"] == ["Safari"]]


class TestFunnelTrends(BaseTestFunnelTrends):
__test__ = True

def test_assert_udf_flag_is_working(self):
filters = {
"insight": INSIGHT_FUNNELS,
"funnel_viz_type": "trends",
"display": TRENDS_LINEAR,
"interval": "hour",
"date_from": "2021-05-01 00:00:00",
"funnel_window_interval": 7,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
{"id": "step three", "order": 2},
],
}

query = cast(FunnelsQuery, filter_to_query(filters))
results = cast(FunnelsQueryResponse, FunnelsQueryRunner(query=query, team=self.team).calculate())

self.assertFalse(results.isUdf)
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@
from unittest.mock import patch, Mock

from hogql_parser import parse_expr
from posthog.constants import INSIGHT_FUNNELS, TRENDS_LINEAR, FunnelOrderType
from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType
from posthog.hogql.constants import HogQLGlobalSettings, MAX_BYTES_BEFORE_EXTERNAL_GROUP_BY
from posthog.hogql.query import execute_hogql_query
from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner
from posthog.hogql_queries.insights.funnels.test.test_funnel_trends import BaseTestFunnelTrends
from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
from posthog.schema import (
FunnelsQuery,
FunnelsQueryResponse,
EventsNode,
BreakdownFilter,
FunnelsFilter,
Expand Down Expand Up @@ -77,45 +76,6 @@ def test_redundant_event_filtering(self):
# Make sure the events have been condensed down to two
self.assertEqual(2, len(response.results[0][-1]))

def test_assert_udf_flag_is_working(self):
filters = {
"insight": INSIGHT_FUNNELS,
"funnel_viz_type": "trends",
"display": TRENDS_LINEAR,
"interval": "hour",
"date_from": "2021-05-01 00:00:00",
"funnel_window_interval": 7,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
{"id": "step three", "order": 2},
],
}

query = cast(FunnelsQuery, filter_to_query(filters))
results = cast(FunnelsQueryResponse, FunnelsQueryRunner(query=query, team=self.team).calculate())

self.assertTrue(results.isUdf)

def test_assert_steps_flag_is_off(self):
filters = {
"insight": INSIGHT_FUNNELS,
"funnel_viz_type": "steps",
"interval": "hour",
"date_from": "2021-05-01 00:00:00",
"funnel_window_interval": 7,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
{"id": "step three", "order": 2},
],
}

query = cast(FunnelsQuery, filter_to_query(filters))
results = cast(FunnelsQueryResponse, FunnelsQueryRunner(query=query, team=self.team).calculate())

self.assertFalse(results.isUdf)

def test_different_prop_val_in_strict_filter(self):
funnels_query = FunnelsQuery(
series=[EventsNode(event="first"), EventsNode(event="second")],
Expand Down
19 changes: 0 additions & 19 deletions posthog/hogql_queries/insights/funnels/test/test_funnel_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,25 +83,6 @@ def test_assert_flag_is_on(self):

self.assertTrue(results.isUdf)

def test_assert_trends_flag_is_off(self):
filters = {
"insight": INSIGHT_FUNNELS,
"funnel_viz_type": "trends",
"interval": "hour",
"date_from": "2021-05-01 00:00:00",
"funnel_window_interval": 7,
"events": [
{"id": "step one", "order": 0},
{"id": "step two", "order": 1},
{"id": "step three", "order": 2},
],
}

query = cast(FunnelsQuery, filter_to_query(filters))
results = cast(FunnelsQueryResponse, FunnelsQueryRunner(query=query, team=self.team).calculate())

self.assertFalse(results.isUdf)

# Old style funnels fails on this (not sure why)
def test_events_same_timestamp_no_exclusions(self):
_create_person(distinct_ids=["test"], team_id=self.team.pk)
Expand Down
16 changes: 0 additions & 16 deletions posthog/hogql_queries/insights/funnels/utils.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,10 @@
from posthog.constants import FUNNEL_WINDOW_INTERVAL_TYPES
from posthog.hogql import ast
from posthog.hogql.parser import parse_expr
from posthog.hogql_queries.legacy_compatibility.feature_flag import (
insight_funnels_use_udf_trends,
insight_funnels_use_udf,
)
from posthog.models import Team
from posthog.schema import FunnelConversionWindowTimeUnit, FunnelVizType, FunnelsFilter, StepOrderValue
from rest_framework.exceptions import ValidationError


def use_udf(funnelsFilter: FunnelsFilter, team: Team):
if funnelsFilter.useUdf:
return True
funnelVizType = funnelsFilter.funnelVizType
if funnelVizType == FunnelVizType.TRENDS and insight_funnels_use_udf_trends(team):
return True
if funnelVizType == FunnelVizType.STEPS and insight_funnels_use_udf(team):
return True
return False


def get_funnel_order_class(funnelsFilter: FunnelsFilter, use_udf=False):
from posthog.hogql_queries.insights.funnels import (
Funnel,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,55 +7,38 @@
FROM
(SELECT aggregation_target AS actor_id
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
median(step_1_conversion_time) AS step_1_median_conversion_time_inner
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
arrayJoin(aggregate_funnel_array_v4(2, 1209600, 'first_touch', 'ordered', [[]], arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4)
and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4)
and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3)
and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3)
and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple,
af_tuple.1 AS step_reached,
plus(af_tuple.1, 1) AS steps,
af_tuple.2 AS breakdown,
af_tuple.3 AS timings,
aggregation_target AS aggregation_target
FROM
(SELECT aggregation_target AS aggregation_target,
steps AS steps,
max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
step_1_conversion_time AS step_1_conversion_time
FROM
(SELECT aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
latest_1 AS latest_1,
if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps,
if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
FROM
(SELECT aggregation_target AS aggregation_target,
timestamp AS timestamp,
step_0 AS step_0,
latest_0 AS latest_0,
step_1 AS step_1,
min(latest_1) OVER (PARTITION BY aggregation_target
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS latest_1
FROM
(SELECT toTimeZone(e.timestamp, 'US/Pacific') AS timestamp,
if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
if(equals(e.event, '$pageview'), 1, 0) AS step_0,
if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
if(equals(e.event, '$pageview'), 1, 0) AS step_1,
if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
FROM events AS e
LEFT OUTER JOIN
(SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
person_distinct_id_overrides.distinct_id AS distinct_id
FROM person_distinct_id_overrides
WHERE equals(person_distinct_id_overrides.team_id, 99999)
GROUP BY person_distinct_id_overrides.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-19 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('$pageview'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
WHERE ifNull(equals(step_0, 1), 0)))
GROUP BY aggregation_target,
steps
HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
and isNull(max(max_steps))))
WHERE ifNull(in(steps, [2]), 0)
ORDER BY aggregation_target ASC) AS source
(SELECT toTimeZone(e.timestamp, 'US/Pacific') AS timestamp,
if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
e.uuid AS uuid,
e.`$session_id` AS `$session_id`,
e.`$window_id` AS `$window_id`,
if(equals(e.event, '$pageview'), 1, 0) AS step_0,
if(equals(e.event, '$pageview'), 1, 0) AS step_1
FROM events AS e
LEFT OUTER JOIN
(SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
person_distinct_id_overrides.distinct_id AS distinct_id
FROM person_distinct_id_overrides
WHERE equals(person_distinct_id_overrides.team_id, 99999)
GROUP BY person_distinct_id_overrides.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-19 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('$pageview'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))
GROUP BY aggregation_target
HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
WHERE ifNull(greaterOrEquals(step_reached, 1), 0)
ORDER BY aggregation_target ASC SETTINGS join_algorithm='auto') AS source
INNER JOIN
(SELECT person.id AS id,
replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name
Expand Down
Loading