Skip to content

Commit

Permalink
Adds improved handling for Metrics over Time (#772)
Browse files Browse the repository at this point in the history
This adds new and improved handling for Metrics over Time.

The overall execution time for the background tasks has been reduced, while also the memory management is way more efficient.

In addition to the improved core handling of the feature, performance metrics for metrics over time will NO LONGER BE WRITTEN.

This will increase the performance of the graphing solutions like InfluxDB a lot, while the monitoring by using the "-ThresholdInterval" argument is still possible.

```powershell
PS> Invoke-IcingaCheckCPU -Warning '5%' -ThresholdInterval '10m';
[WARNING] CPU Load [WARNING] Overall Load, Socket #0
\_ [WARNING] Overall Load: Value 6.546175% is greater than threshold 5% (10m Avg.) 
\_ [WARNING] Socket #0 
     \_ [WARNING] Core 0: Value 18.391566% is greater than threshold 5% (10m Avg.) 
     \_ [WARNING] Core 1: Value 14.100505% is greater than threshold 5% (10m Avg.)
     \_ [WARNING] Core Total: Value 6.546175% is greater than threshold 5% (10m Avg.)
| totalload::ifw_cpu::load=5.804053;5;;0;100 0_0::ifw_cpu::load=18.03764;5;;0;100 0_1::ifw_cpu::load=9.36611;5;;0;100 0_2::ifw_cpu::load=5.830669;5;;0;100 0_3::ifw_cpu::load=0.646737;5;;0;100 0_4::ifw_cpu::load=0.926955;5;;0;100 0_5::ifw_cpu::load=0.016205;5;;0;100 0_total::ifw_cpu::load=5.804053;5;;0;100
```
  • Loading branch information
LordHepipud authored Jan 29, 2025
1 parent 08bea53 commit f5d9ac9
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 113 deletions.
4 changes: 4 additions & 0 deletions doc/100-General/10-Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ Released closed milestones can be found on [GitHub](https://github.com/Icinga/ic

* [#759](https://github.com/Icinga/icinga-powershell-framework/pull/759) Fixes maximum cache duration for service daemons to the right value

### Enhancements

* [#772](https://github.com/Icinga/icinga-powershell-framework/pull/772) Adds new Metric over Time handling

## 1.13.0 Beta-2 (2024-09-19)

[Issues and PRs](https://github.com/Icinga/icinga-powershell-framework/milestone/36)
Expand Down
7 changes: 5 additions & 2 deletions lib/core/framework/New-IcingaEnvironmentVariable.psm1
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,18 @@ function New-IcingaEnvironmentVariable()
$Global:Icinga.Private.Add(
'Scheduler',
@{
'CheckCommand' = '';
'CheckData' = @{ };
'ThresholdCache' = @{ };
'CheckResults' = @();
'PerformanceData' = '';
'PluginException' = $null;
'ExitCode' = $null;
'PerfDataWriter' = @{
'Cache' = @{};
'Storage' = (New-Object System.Text.StringBuilder);
'Cache' = @{ };
'Storage' = (New-Object System.Text.StringBuilder);
'Daemon' = @{ };
'MetricsOverTime' = '';
}
}
);
Expand Down
140 changes: 68 additions & 72 deletions lib/daemons/ServiceCheckDaemon/task/Add-IcingaServiceCheckTask.psm1
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,14 @@ function Add-IcingaServiceCheckTask()
# Read our check result store data from disk for this service check
Read-IcingaCheckResultStore -CheckCommand $CheckCommand;

[int]$CheckInterval = ConvertTo-Seconds $Interval;
$MetricCacheFile = Join-Path -Path (Join-Path -Path (Join-Path -Path (Get-IcingaCacheDir) -ChildPath 'service_check_cache') -ChildPath 'metrics') -ChildPath ([string]::Format('{0}.xml', $CheckCommand));
[int]$CheckInterval = ConvertTo-Seconds $Interval;
[hashtable]$CheckDataCache = @{ };
[array]$PerfDataEntries = @();

if (Test-Path -Path $MetricCacheFile) {
$CheckDataCache = [System.Management.Automation.PSSerializer]::Deserialize((Get-Content -Path $MetricCacheFile -Raw -Encoding UTF8));
}

while ($TRUE) {
if ($Global:Icinga.Private.Daemons.ServiceCheck.PassedTime -lt $CheckInterval) {
Expand All @@ -29,6 +36,9 @@ function Add-IcingaServiceCheckTask()

$Global:Icinga.Private.Daemons.ServiceCheck.PassedTime = 0;

# Clear possible previous performance data from the daemon cache
$Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon.Clear();

# Execute our check with possible arguments
try {
& $CheckCommand @Arguments | Out-Null;
Expand All @@ -45,93 +55,79 @@ function Add-IcingaServiceCheckTask()

$UnixTime = Get-IcingaUnixTime;

try {
foreach ($result in $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'].Keys) {
[string]$HashIndex = $result;
$Global:Icinga.Private.Daemons.ServiceCheck.SortedResult = $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'][$HashIndex].GetEnumerator() | Sort-Object name -Descending;

Add-IcingaHashtableItem `
-Hashtable $Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache `
-Key $HashIndex `
-Value @{ } | Out-Null;
foreach ($PerfLabel in $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon.Keys) {
$PerfValue = $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon[$PerfLabel].Value;
$PerfUnit = $Global:Icinga.Private.Scheduler.PerfDataWriter.Daemon[$PerfLabel].Unit;

foreach ($timeEntry in $Global:Icinga.Private.Daemons.ServiceCheck.SortedResult) {

if ((Test-Numeric $timeEntry.Value) -eq $FALSE) {
continue;
}
if ($CheckDataCache.ContainsKey($PerfLabel) -eq $FALSE) {
$CheckDataCache.Add($PerfLabel, (New-Object System.Collections.ArrayList));
}

foreach ($calc in $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation.Keys) {
if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time) -le [int]$timeEntry.Key) {
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum += $timeEntry.Value;
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count += 1;
}
}
if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds) -le [int]$timeEntry.Key) {
Add-IcingaHashtableItem `
-Hashtable $Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache[$HashIndex] `
-Key ([string]$timeEntry.Key) `
-Value ([string]$timeEntry.Value) | Out-Null;
}
$CheckDataCache[$PerfLabel].Add(
@{
'Time' = $UnixTime;
'Value' = $PerfValue;
'Unit' = $PerfUnit;
}
) | Out-Null;

[int]$IndexCount = $CheckDataCache[$PerfLabel].Count;
[int]$RemoveIndex = 0;
for ($i = 0; $i -lt $IndexCount; $i++) {
# In case we store more values than we require for our max time range, remove the oldest one
if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds) -gt [int]($CheckDataCache[$PerfLabel][$i].Time)) {
$RemoveIndex += 1;
continue;
}

# Calculate the average value for our performance data based on the remaining data
foreach ($calc in $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation.Keys) {
if ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count -ne 0) {
$AverageValue = ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum / $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count);
[string]$MetricMultiName = [string]::Format('::{0}::Interval{1}', (Format-IcingaPerfDataLabel -PerfData $HashIndex -MultiOutput), $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time);
$Global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['average'] | Add-Member -MemberType NoteProperty -Name $MetricMultiName -Value $AverageValue -Force;
if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time) -le [int]($CheckDataCache[$PerfLabel][$i].Time)) {
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum += $CheckDataCache[$PerfLabel][$i].Value;
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count += 1;
}
}
}

$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum = 0;
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count = 0;
# Remove older entries more efficiently. As we store the data in an ArrayList, the oldest entries are at the beginning
# Therefore we can just remove a range of entries from the beginning of the list or clear the list if we need to remove all entries
if ($RemoveIndex -gt 0) {
if ($RemoveIndex -ge $IndexCount) {
$CheckDataCache[$PerfLabel].Clear() | Out-Null;
} else {
$CheckDataCache[$PerfLabel].RemoveRange(0, $RemoveIndex) | Out-Null;
}
$RemoveIndex = 0;
}

Write-IcingaDebugMessage `
-Message 'Object dump of service check daemon' `
-Objects @(
$CheckCommand,
'Average Calc',
($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation | Out-String),
'PerformanceCache',
$Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache,
'Max Time in Seconds',
$Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds,
'Unix Time',
$UnixTime
);

# Flush data we no longer require in our cache to free memory
[array]$CheckStores = $Global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'].Keys;

foreach ($CheckStore in $CheckStores) {
[string]$CheckKey = $CheckStore;
[array]$CheckTimeStamps = $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'][$CheckKey].Keys;

foreach ($TimeSample in $CheckTimeStamps) {
if (($UnixTime - $Global:Icinga.Private.Daemons.ServiceCheck.MaxTimeInSeconds) -gt [int]$TimeSample) {
Remove-IcingaHashtableItem -Hashtable $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['results'][$CheckKey] -Key ([string]$TimeSample);
}
# Now calculate the average values for our performance data
foreach ($calc in $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation.Keys) {
if ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count -ne 0) {
$AverageValue = ($Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum / $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count);
[string]$MetricMultiName = [string]::Format('{0}::Interval{1}={2}{3}', $PerfLabel, $Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Time, (Format-IcingaPerfDataValue $AverageValue), $PerfUnit);
# Write our performance data label
$PerfDataEntries += $MetricMultiName;
}

$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Sum = 0;
$Global:Icinga.Private.Daemons.ServiceCheck.AverageCalculation[$calc].Count = 0;
}
}

Set-IcingaCacheData -Space 'sc_daemon' -CacheStore 'checkresult' -KeyName $CheckCommand -Value $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['average'];
$Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache[$CheckCommand] = $PerfDataEntries -Join ' ';
$PerfDataEntries = @();

# Make the performance data available for all threads
$Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache[$CheckCommand] = $global:Icinga.Private.Scheduler.CheckData[$CheckCommand]['average'];
# Write collected metrics to disk in case we reload the daemon. We will load them back into the module after reload then
Set-IcingaCacheData -Space 'sc_daemon' -CacheStore 'checkresult_store' -KeyName $CheckCommand -Value $Global:Icinga.Private.Daemons.ServiceCheck.PerformanceCache;
$PerformanceLabelFile = Join-Path -Path (Join-Path -Path (Join-Path -Path (Get-IcingaCacheDir) -ChildPath 'service_check_cache') -ChildPath 'performance_labels') -ChildPath ([string]::Format('{0}.db', $CheckCommand));
$CheckCacheXMLObj = [System.Management.Automation.PSSerializer]::Serialize($CheckDataCache);

} catch {
Write-IcingaEventMessage -EventId 1452 -Namespace 'Framework' -ExceptionObject $_ -Objects $CheckCommand, ($Arguments | Out-String), (Get-IcingaInternalPluginOutput);
if ((Test-Path -Path $PerformanceLabelFile) -eq $FALSE) {
New-Item -Path $PerformanceLabelFile -ItemType File -Force | Out-Null;
}
if ((Test-Path -Path $MetricCacheFile) -eq $FALSE) {
New-Item -Path $MetricCacheFile -ItemType File -Force | Out-Null;
}

# Always ensure our check data is cleared regardless of possible
# exceptions which might occur
Clear-IcingaCheckSchedulerEnvironment;
# Reset certain values from the scheduler environment
Clear-IcingaServiceCheckDaemonEnvironment;
# Force Icinga for Windows Garbage Collection
Optimize-IcingaForWindowsMemory -ClearErrorStack -SmartGC;
Set-Content -Path $PerformanceLabelFile -Value $Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache[$CheckCommand] -Force -Encoding UTF8;
Set-Content -Path $MetricCacheFile -Value $CheckCacheXMLObj -Force -Encoding UTF8;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ function New-IcingaServiceCheckDaemonEnvironment()

if ($Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache.ContainsKey($CheckCommand) -eq $FALSE) {
$Global:Icinga.Public.Daemons.ServiceCheck.PerformanceDataCache.Add(
$CheckCommand, @{ }
$CheckCommand, ''
);
}

Expand Down
16 changes: 10 additions & 6 deletions lib/icinga/plugin/Compare-IcingaPluginThresholds.psm1
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ function Compare-IcingaPluginThresholds()
# Fix possible numeric value comparison issues
$TestInput = Test-IcingaDecimal $InputValue;
$BaseInput = Test-IcingaDecimal $BaseValue;
$MoTData = @{
'Label' = $PerfDataLabel;
'Interval' = $TimeInterval;
};

if ($TestInput.Decimal) {
[decimal]$InputValue = [decimal]$TestInput.Value;
Expand Down Expand Up @@ -132,17 +136,17 @@ function Compare-IcingaPluginThresholds()
$CheckResult = $null;

if ($Matches) {
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Matches;
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Matches -MetricsOverTime $MoTData;
} elseif ($NotMatches) {
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.NotMatches;
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.NotMatches -MetricsOverTime $MoTData;
} elseif ($IsBetween) {
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Between;
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.Between -MetricsOverTime $MoTData;
} elseif ($IsLowerEqual) {
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.LowerEqual;
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.LowerEqual -MetricsOverTime $MoTData;
} elseif ($IsGreaterEqual) {
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.GreaterEqual;
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -OverrideMode $IcingaEnums.IcingaThresholdMethod.GreaterEqual -MetricsOverTime $MoTData;
} else {
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation;
$CheckResult = Compare-IcingaPluginValueToThreshold -Value $InputValue -BaseValue $IcingaThresholds.BaseValue -Threshold $IcingaThresholds.Threshold -Unit $Unit -Translation $Translation -MetricsOverTime $MoTData;
}

$IcingaThresholds.Message = $CheckResult.Message;
Expand Down
Loading

0 comments on commit f5d9ac9

Please sign in to comment.