From a9d281dbbaf06c5bae67f738b91fe12e8d5b3189 Mon Sep 17 00:00:00 2001 From: LeaveMyYard Date: Mon, 4 Sep 2023 17:00:32 +0300 Subject: [PATCH 1/2] Aggregate in case of node change --- .../integrations/prometheus/metrics/cpu.py | 52 +++++++++++-------- .../integrations/prometheus/metrics/memory.py | 30 ++++++----- robusta_krr/strategies/simple.py | 2 +- 3 files changed, 49 insertions(+), 35 deletions(-) diff --git a/robusta_krr/core/integrations/prometheus/metrics/cpu.py b/robusta_krr/core/integrations/prometheus/metrics/cpu.py index b9c281dc..e61a9867 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/cpu.py +++ b/robusta_krr/core/integrations/prometheus/metrics/cpu.py @@ -8,14 +8,16 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: pods_selector = "|".join(pod.name for pod in object.pods) cluster_label = self.get_prometheus_cluster_label() return f""" - rate( - container_cpu_usage_seconds_total{{ - namespace="{object.namespace}", - pod=~"{pods_selector}", - container="{object.container}" - {cluster_label} - }}[{step}] - ) + sum( + rate( + container_cpu_usage_seconds_total{{ + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }}[{step}] + ) + ) by (container, pod, job) """ @@ -27,14 +29,17 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: return f""" quantile_over_time( {round(percentile / 100, 2)}, - rate( - container_cpu_usage_seconds_total{{ - namespace="{object.namespace}", - pod=~"{pods_selector}", - container="{object.container}" - {cluster_label} - }}[{step}] - )[{duration}:{step}] + sum( + rate( + container_cpu_usage_seconds_total{{ + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }}[{step}] + ) + ) by (container, pod, job) + [{duration}:{step}] ) """ @@ -47,11 +52,14 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: cluster_label = self.get_prometheus_cluster_label() return f""" count_over_time( - container_cpu_usage_seconds_total{{ - namespace="{object.namespace}", - pod=~"{pods_selector}", - container="{object.container}" - {cluster_label} - }}[{duration}] + sum( + container_cpu_usage_seconds_total{{ + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }} + ) by (container, pod, job) + [{duration}:{step}] ) """ diff --git a/robusta_krr/core/integrations/prometheus/metrics/memory.py b/robusta_krr/core/integrations/prometheus/metrics/memory.py index 21843b90..dc75f918 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/memory.py +++ b/robusta_krr/core/integrations/prometheus/metrics/memory.py @@ -25,12 +25,15 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: cluster_label = self.get_prometheus_cluster_label() return f""" max_over_time( - container_memory_working_set_bytes{{ - namespace="{object.namespace}", - pod=~"{pods_selector}", - container="{object.container}" - {cluster_label} - }}[{duration}:{step}] + sum( + container_memory_working_set_bytes{{ + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }} + ) by (container, pod, job) + [{duration}:{step}] ) """ @@ -41,11 +44,14 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: cluster_label = self.get_prometheus_cluster_label() return f""" count_over_time( - container_memory_working_set_bytes{{ - namespace="{object.namespace}", - pod=~"{pods_selector}", - container="{object.container}" - {cluster_label} - }}[{duration}:{step}] + sum( + container_memory_working_set_bytes{{ + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }} + ) by (container, pod, job) + [{duration}:{step}] ) """ diff --git a/robusta_krr/strategies/simple.py b/robusta_krr/strategies/simple.py index 93efb5f1..465a6f88 100644 --- a/robusta_krr/strategies/simple.py +++ b/robusta_krr/strategies/simple.py @@ -88,7 +88,7 @@ def __calculate_cpu_proposal( if object_data.hpa is not None and object_data.hpa.target_cpu_utilization_percentage is not None: return ResourceRecommendation.undefined(info="HPA detected") - cpu_usage = self.settings.calculate_cpu_proposal(data) + cpu_usage = self.settings.calculate_cpu_proposal(filtered_data) return ResourceRecommendation(request=cpu_usage, limit=None) def __calculate_memory_proposal( From fc7f4ee436ae146ab8749ce9a00d0f5f436614fa Mon Sep 17 00:00:00 2001 From: LeaveMyYard Date: Tue, 5 Sep 2023 12:59:28 +0300 Subject: [PATCH 2/2] Replace sum aggregation to max aggregation --- robusta_krr/core/integrations/prometheus/metrics/cpu.py | 8 ++++---- .../core/integrations/prometheus/metrics/memory.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/robusta_krr/core/integrations/prometheus/metrics/cpu.py b/robusta_krr/core/integrations/prometheus/metrics/cpu.py index e61a9867..a965b3fc 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/cpu.py +++ b/robusta_krr/core/integrations/prometheus/metrics/cpu.py @@ -8,7 +8,7 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: pods_selector = "|".join(pod.name for pod in object.pods) cluster_label = self.get_prometheus_cluster_label() return f""" - sum( + max( rate( container_cpu_usage_seconds_total{{ namespace="{object.namespace}", @@ -16,7 +16,7 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: container="{object.container}" {cluster_label} }}[{step}] - ) + ) ) by (container, pod, job) """ @@ -29,7 +29,7 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: return f""" quantile_over_time( {round(percentile / 100, 2)}, - sum( + max( rate( container_cpu_usage_seconds_total{{ namespace="{object.namespace}", @@ -52,7 +52,7 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: cluster_label = self.get_prometheus_cluster_label() return f""" count_over_time( - sum( + max( container_cpu_usage_seconds_total{{ namespace="{object.namespace}", pod=~"{pods_selector}", diff --git a/robusta_krr/core/integrations/prometheus/metrics/memory.py b/robusta_krr/core/integrations/prometheus/metrics/memory.py index dc75f918..bc474678 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/memory.py +++ b/robusta_krr/core/integrations/prometheus/metrics/memory.py @@ -8,7 +8,7 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: pods_selector = "|".join(pod.name for pod in object.pods) cluster_label = self.get_prometheus_cluster_label() return f""" - sum( + max( container_memory_working_set_bytes{{ namespace="{object.namespace}", pod=~"{pods_selector}", @@ -25,7 +25,7 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: cluster_label = self.get_prometheus_cluster_label() return f""" max_over_time( - sum( + max( container_memory_working_set_bytes{{ namespace="{object.namespace}", pod=~"{pods_selector}", @@ -44,7 +44,7 @@ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: cluster_label = self.get_prometheus_cluster_label() return f""" count_over_time( - sum( + max( container_memory_working_set_bytes{{ namespace="{object.namespace}", pod=~"{pods_selector}",