From d6bc28495cc28fee959c4ee19513b2184a179040 Mon Sep 17 00:00:00 2001 From: Andrey Bazhutkin Date: Mon, 1 Mar 2021 10:48:49 +1100 Subject: [PATCH 1/3] Fix Aws::CloudWatch::Errors::InvalidParameterValue on startup Edited to use a simpler spec. --- lib/sidekiq/cloudwatchmetrics.rb | 2 ++ spec/sidekiq/cloudwatchmetrics_spec.rb | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/lib/sidekiq/cloudwatchmetrics.rb b/lib/sidekiq/cloudwatchmetrics.rb index f8b94cd..65cb09c 100644 --- a/lib/sidekiq/cloudwatchmetrics.rb +++ b/lib/sidekiq/cloudwatchmetrics.rb @@ -217,6 +217,8 @@ def publish # Returns busy / concurrency averaged across processes (for scaling) private def calculate_utilization(processes) + return 0.0 if processes.empty? + processes.map do |process| process["busy"] / process["concurrency"].to_f end.sum / processes.size.to_f diff --git a/spec/sidekiq/cloudwatchmetrics_spec.rb b/spec/sidekiq/cloudwatchmetrics_spec.rb index 8bfea16..b7664d1 100644 --- a/spec/sidekiq/cloudwatchmetrics_spec.rb +++ b/spec/sidekiq/cloudwatchmetrics_spec.rb @@ -292,6 +292,25 @@ end end end + + context "when there are no processes yet" do + let(:processes) { [] } + + it "publishes metrics including a utilization of 0 (not NaN)" do + Timecop.freeze(now = Time.now) do + publisher.publish + + expect(client).to have_received(:put_metric_data) { |metrics| + expect(metrics[:metric_data]).to include({ + metric_name: "Utilization", + timestamp: now, + value: 0.0, + unit: "Percent", + }) + } + end + end + end end describe "#stop" do From 854f6f339127baa60aeeda7d8d15b5d069b7f86d Mon Sep 17 00:00:00 2001 From: Samuel Cochran Date: Tue, 7 Mar 2023 10:22:55 +1100 Subject: [PATCH 2/3] Skip Utilization when it would be NaN --- lib/sidekiq/cloudwatchmetrics.rb | 14 +++++++++----- spec/sidekiq/cloudwatchmetrics_spec.rb | 9 ++------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/lib/sidekiq/cloudwatchmetrics.rb b/lib/sidekiq/cloudwatchmetrics.rb index 65cb09c..e0b9ab2 100644 --- a/lib/sidekiq/cloudwatchmetrics.rb +++ b/lib/sidekiq/cloudwatchmetrics.rb @@ -150,13 +150,18 @@ def publish value: calculate_capacity(processes), unit: "Count", }, - { + ] + + # Only publish utilization once there is some capacity, otherwise we'll + # publish metrics with an invalid value of NaN. + if processes.any? + metrics << { metric_name: "Utilization", timestamp: now, value: calculate_utilization(processes) * 100.0, unit: "Percent", - }, - ] + } + end processes.each do |process| process_dimensions = [{name: "Hostname", value: process["hostname"]}] @@ -199,6 +204,7 @@ def publish metric[:dimensions] = (metric[:dimensions] || []) + @additional_dimensions end end + # We can only put 20 metrics at a time metrics.each_slice(20) do |some_metrics| @client.put_metric_data( @@ -217,8 +223,6 @@ def publish # Returns busy / concurrency averaged across processes (for scaling) private def calculate_utilization(processes) - return 0.0 if processes.empty? - processes.map do |process| process["busy"] / process["concurrency"].to_f end.sum / processes.size.to_f diff --git a/spec/sidekiq/cloudwatchmetrics_spec.rb b/spec/sidekiq/cloudwatchmetrics_spec.rb index b7664d1..8872623 100644 --- a/spec/sidekiq/cloudwatchmetrics_spec.rb +++ b/spec/sidekiq/cloudwatchmetrics_spec.rb @@ -296,17 +296,12 @@ context "when there are no processes yet" do let(:processes) { [] } - it "publishes metrics including a utilization of 0 (not NaN)" do + it "does not publish Utilization (to avoid NaN values)" do Timecop.freeze(now = Time.now) do publisher.publish expect(client).to have_received(:put_metric_data) { |metrics| - expect(metrics[:metric_data]).to include({ - metric_name: "Utilization", - timestamp: now, - value: 0.0, - unit: "Percent", - }) + expect(metrics[:metric_data]).not_to include(hash_including(metric_name: "Utilization")) } end end From 32d3ed56c03d36ed8606f7994b4feefe712a4dad Mon Sep 17 00:00:00 2001 From: Samuel Cochran Date: Tue, 7 Mar 2023 10:42:26 +1100 Subject: [PATCH 3/3] More comprehensively handle potential NaNs --- lib/sidekiq/cloudwatchmetrics.rb | 43 +++++++++++++---------- spec/sidekiq/cloudwatchmetrics_spec.rb | 47 ++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 18 deletions(-) diff --git a/lib/sidekiq/cloudwatchmetrics.rb b/lib/sidekiq/cloudwatchmetrics.rb index e0b9ab2..b4739f3 100644 --- a/lib/sidekiq/cloudwatchmetrics.rb +++ b/lib/sidekiq/cloudwatchmetrics.rb @@ -152,31 +152,35 @@ def publish }, ] - # Only publish utilization once there is some capacity, otherwise we'll - # publish metrics with an invalid value of NaN. - if processes.any? + utilization = calculate_utilization(processes) * 100.0 + + unless utilization.nan? metrics << { metric_name: "Utilization", timestamp: now, - value: calculate_utilization(processes) * 100.0, + value: utilization, unit: "Percent", } end processes.each do |process| - process_dimensions = [{name: "Hostname", value: process["hostname"]}] - - if process["tag"] - process_dimensions << {name: "Tag", value: process["tag"]} + process_utilization = process["busy"] / process["concurrency"].to_f * 100.0 + + unless process_utilization.nan? + process_dimensions = [{name: "Hostname", value: process["hostname"]}] + + if process["tag"] + process_dimensions << {name: "Tag", value: process["tag"]} + end + + metrics << { + metric_name: "Utilization", + dimensions: process_dimensions, + timestamp: now, + value: process_utilization, + unit: "Percent", + } end - - metrics << { - metric_name: "Utilization", - dimensions: process_dimensions, - timestamp: now, - value: process["busy"] / process["concurrency"].to_f * 100.0, - unit: "Percent", - } end queues.each do |(queue_name, queue_size)| @@ -222,10 +226,13 @@ def publish end # Returns busy / concurrency averaged across processes (for scaling) + # Avoid considering processes not yet running any threads private def calculate_utilization(processes) - processes.map do |process| + process_utilizations = processes.map do |process| process["busy"] / process["concurrency"].to_f - end.sum / processes.size.to_f + end.reject(&:nan?) + + process_utilizations.sum / process_utilizations.size.to_f end def quiet diff --git a/spec/sidekiq/cloudwatchmetrics_spec.rb b/spec/sidekiq/cloudwatchmetrics_spec.rb index 8872623..c1d52d5 100644 --- a/spec/sidekiq/cloudwatchmetrics_spec.rb +++ b/spec/sidekiq/cloudwatchmetrics_spec.rb @@ -306,6 +306,53 @@ end end end + + context "when the only process has no threads yet" do + let(:processes) { [Sidekiq::Process.new("busy" => 0, "concurrency" => 0, "hostname" => "foo")] } + + it "does not publish Utilization (to avoid NaN values)" do + Timecop.freeze(now = Time.now) do + publisher.publish + + expect(client).to have_received(:put_metric_data) { |metrics| + expect(metrics[:metric_data]).not_to include(hash_including(metric_name: "Utilization")) + } + end + end + end + + context "when only one process has no threads yet" do + let(:processes) { [ + Sidekiq::Process.new("busy" => 0, "concurrency" => 0, "hostname" => "foo"), + Sidekiq::Process.new("busy" => 2, "concurrency" => 4, "hostname" => "bar"), + ] } + + it "publishes partial Utilization (to avoid NaN values)" do + Timecop.freeze(now = Time.now) do + publisher.publish + + expect(client).to have_received(:put_metric_data) { |metrics| + utilization_data = metrics[:metric_data].select { |data| data[:metric_name] == "Utilization" } + + expect(utilization_data).to contain_exactly( + { + metric_name: "Utilization", + timestamp: now, + value: 50.0, + unit: "Percent", + }, + { + metric_name: "Utilization", + dimensions: [{name: "Hostname", value: "bar"}], + timestamp: now, + unit: "Percent", + value: 50.0, + }, + ) + } + end + end + end end describe "#stop" do