diff --git a/lib/sidekiq/cloudwatchmetrics.rb b/lib/sidekiq/cloudwatchmetrics.rb index f8b94cd..b4739f3 100644 --- a/lib/sidekiq/cloudwatchmetrics.rb +++ b/lib/sidekiq/cloudwatchmetrics.rb @@ -150,30 +150,39 @@ def publish value: calculate_capacity(processes), unit: "Count", }, - { - metric_name: "Utilization", - timestamp: now, - value: calculate_utilization(processes) * 100.0, - unit: "Percent", - }, ] - processes.each do |process| - process_dimensions = [{name: "Hostname", value: process["hostname"]}] - - if process["tag"] - process_dimensions << {name: "Tag", value: process["tag"]} - end + utilization = calculate_utilization(processes) * 100.0 + unless utilization.nan? metrics << { metric_name: "Utilization", - dimensions: process_dimensions, timestamp: now, - value: process["busy"] / process["concurrency"].to_f * 100.0, + value: utilization, unit: "Percent", } end + processes.each do |process| + process_utilization = process["busy"] / process["concurrency"].to_f * 100.0 + + unless process_utilization.nan? + process_dimensions = [{name: "Hostname", value: process["hostname"]}] + + if process["tag"] + process_dimensions << {name: "Tag", value: process["tag"]} + end + + metrics << { + metric_name: "Utilization", + dimensions: process_dimensions, + timestamp: now, + value: process_utilization, + unit: "Percent", + } + end + end + queues.each do |(queue_name, queue_size)| metrics << { metric_name: "QueueSize", @@ -199,6 +208,7 @@ def publish metric[:dimensions] = (metric[:dimensions] || []) + @additional_dimensions end end + # We can only put 20 metrics at a time metrics.each_slice(20) do |some_metrics| @client.put_metric_data( @@ -216,10 +226,13 @@ def publish end # Returns busy / concurrency averaged across processes (for scaling) + # Avoid considering processes not yet running any threads private def calculate_utilization(processes) - processes.map do |process| + process_utilizations = processes.map do |process| process["busy"] / process["concurrency"].to_f - end.sum / processes.size.to_f + end.reject(&:nan?) + + process_utilizations.sum / process_utilizations.size.to_f end def quiet diff --git a/spec/sidekiq/cloudwatchmetrics_spec.rb b/spec/sidekiq/cloudwatchmetrics_spec.rb index 8bfea16..c1d52d5 100644 --- a/spec/sidekiq/cloudwatchmetrics_spec.rb +++ b/spec/sidekiq/cloudwatchmetrics_spec.rb @@ -292,6 +292,67 @@ end end end + + context "when there are no processes yet" do + let(:processes) { [] } + + it "does not publish Utilization (to avoid NaN values)" do + Timecop.freeze(now = Time.now) do + publisher.publish + + expect(client).to have_received(:put_metric_data) { |metrics| + expect(metrics[:metric_data]).not_to include(hash_including(metric_name: "Utilization")) + } + end + end + end + + context "when the only process has no threads yet" do + let(:processes) { [Sidekiq::Process.new("busy" => 0, "concurrency" => 0, "hostname" => "foo")] } + + it "does not publish Utilization (to avoid NaN values)" do + Timecop.freeze(now = Time.now) do + publisher.publish + + expect(client).to have_received(:put_metric_data) { |metrics| + expect(metrics[:metric_data]).not_to include(hash_including(metric_name: "Utilization")) + } + end + end + end + + context "when only one process has no threads yet" do + let(:processes) { [ + Sidekiq::Process.new("busy" => 0, "concurrency" => 0, "hostname" => "foo"), + Sidekiq::Process.new("busy" => 2, "concurrency" => 4, "hostname" => "bar"), + ] } + + it "publishes partial Utilization (to avoid NaN values)" do + Timecop.freeze(now = Time.now) do + publisher.publish + + expect(client).to have_received(:put_metric_data) { |metrics| + utilization_data = metrics[:metric_data].select { |data| data[:metric_name] == "Utilization" } + + expect(utilization_data).to contain_exactly( + { + metric_name: "Utilization", + timestamp: now, + value: 50.0, + unit: "Percent", + }, + { + metric_name: "Utilization", + dimensions: [{name: "Hostname", value: "bar"}], + timestamp: now, + unit: "Percent", + value: 50.0, + }, + ) + } + end + end + end end describe "#stop" do