Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: query metadata to return index usage #29230

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -8607,6 +8607,9 @@
},
"type": "array"
},
"isUsingIndices": {
"$ref": "#/definitions/QueryIndexUsage"
},
"isValid": {
"type": "boolean"
},
Expand Down Expand Up @@ -10508,6 +10511,10 @@
],
"type": "string"
},
"QueryIndexUsage": {
"enum": ["undecisive", "no", "partial", "yes"],
"type": "string"
},
"QueryRequest": {
"additionalProperties": false,
"properties": {
Expand Down Expand Up @@ -10909,6 +10916,9 @@
},
"type": "array"
},
"isUsingIndices": {
"$ref": "#/definitions/QueryIndexUsage"
},
"isValid": {
"type": "boolean"
},
Expand Down
8 changes: 8 additions & 0 deletions frontend/src/queries/schema/schema-general.ts
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,18 @@ export interface HogQLNotice {
fix?: string
}

export enum QueryIndexUsage {
Undecisive = 'undecisive',
No = 'no',
Partial = 'partial',
Yes = 'yes',
}

export interface HogQLMetadataResponse {
query?: string
isValid?: boolean
isValidView?: boolean
isUsingIndices?: QueryIndexUsage
errors: HogQLNotice[]
warnings: HogQLNotice[]
notices: HogQLNotice[]
Expand Down
97 changes: 97 additions & 0 deletions posthog/clickhouse/explain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import json

from posthog.schema import QueryIndexUsage


def find_all_reads(explain: dict) -> list[dict]:
reads = []
plan = explain.get("Plan", explain)
if "Indexes" in plan:
reads.append(plan)
for subplan in plan.get("Plans", []):
reads = reads + find_all_reads(subplan)
return reads


def selected_less_granules(index, tiny_data_granules=100) -> bool:
initial_granules = index.get("Initial Granules", 0)
return index.get("Selected Granules", 0) < initial_granules or initial_granules < tiny_data_granules


def guestimate_index_use(plan_with_indexes: dict) -> dict:
db_table = plan_with_indexes.get("Description", "")
result = {
"table": db_table,
}
if "Indexes" not in plan_with_indexes:
result["use"] = QueryIndexUsage.NO
return result

indexes = plan_with_indexes.get("Indexes", [])

if db_table.endswith(".person_distinct_id_overrides"):
result["use"] = QueryIndexUsage.NO
if len(indexes) == 1:
index = indexes[0]
if (
index.get("Condition", "") != "true"
and "team_id" in index.get("Keys", [])
and selected_less_granules(index)
):
result["use"] = QueryIndexUsage.YES

return result
elif db_table.endswith(".sharded_events"):
result["use"] = QueryIndexUsage.NO
minMax = False
partition = False
primary_key = False
for index in indexes:
if index.get("Condition", "") == "true":
continue
index_type = index.get("Type", "")
if index_type == "MinMax":
minMax = selected_less_granules(index)
elif index_type == "Partition":
partition = selected_less_granules(index)
elif index_type == "PrimaryKey":
primary_key = len(index.get("Keys", [])) > 1 and selected_less_granules(index)
if (minMax or partition) and primary_key:
result["use"] = QueryIndexUsage.YES

return result

result["use"] = QueryIndexUsage.UNDECISIVE
minMax = False
partition = False
primary_key = False
for index in indexes:
if index.get("Condition", "") == "true":
continue
index_type = index.get("Type", "")
if index_type == "MinMax":
minMax = selected_less_granules(index)
elif index_type == "Partition":
partition = selected_less_granules(index)
elif index_type == "PrimaryKey":
primary_key = len(index.get("Keys", [])) > 1 and selected_less_granules(index)
if (minMax or partition) and primary_key:
result["use"] = QueryIndexUsage.YES

return result


def extract_index_usage_from_plan(plan: str) -> QueryIndexUsage:
try:
explain = json.loads(plan)
all_indices_use = [guestimate_index_use(r) for r in find_all_reads(explain[0])]
if all(x["use"] == QueryIndexUsage.YES for x in all_indices_use):
return QueryIndexUsage.YES
elif any(x["use"] == QueryIndexUsage.YES for x in all_indices_use):
return QueryIndexUsage.PARTIAL
elif all(x["use"] == QueryIndexUsage.NO for x in all_indices_use):
return QueryIndexUsage.NO
except json.decoder.JSONDecodeError:
pass

return QueryIndexUsage.UNDECISIVE
Loading
Loading