-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmonitor
executable file
·238 lines (204 loc) · 9.49 KB
/
monitor
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#!/bin/bash
# shellcheck disable=SC1091,SC2034
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
CONFIG_DIR="$HOME/.config/engine-stressor"
CONSTANTS_FILE="$CONFIG_DIR/constants"
if [ ! -f "$CONSTANTS_FILE" ]; then
echo "Error: File $CONSTANTS_FILE does not exist."
exit 1
fi
source "$CONSTANTS_FILE"
source "$SHARE_DIR/common"
system_baseline() {
local baseline_time=$1
VMSTAT_INTERVAL=1 # Interval in seconds for vmstat sampling
VMSTAT_COUNT=5 # Number of samples vmstat should take
echo "Establishing baseline metrics over $baseline_time seconds..."
# Initialize baseline sums
local sum_free=0
local sum_si=0
local sum_so=0
local sum_bi=0
local sum_bo=0
local sum_wa=0
local count=0
# Collect baseline metrics
end_time=$((SECONDS + baseline_time))
while [ $SECONDS -lt $end_time ]; do
vmstat $VMSTAT_INTERVAL $VMSTAT_COUNT > vmstat.log
# Ensure vmstat.log is not empty and process the data
if [ -s vmstat.log ]; then
sum_free=$((sum_free + $(awk 'NR>2 { sum += $4 } END { print (sum ? sum : 0) }' vmstat.log)))
sum_si=$((sum_si + $(awk 'NR>2 { sum += $7 } END { print (sum ? sum : 0) }' vmstat.log)))
sum_so=$((sum_so + $(awk 'NR>2 { sum += $8 } END { print (sum ? sum : 0) }' vmstat.log)))
sum_bi=$((sum_bi + $(awk 'NR>2 { sum += $9 } END { print (sum ? sum : 0) }' vmstat.log)))
sum_bo=$((sum_bo + $(awk 'NR>2 { sum += $10 } END { print (sum ? sum : 0) }' vmstat.log)))
sum_wa=$((sum_wa + $(awk 'NR>2 { sum += $16 } END { print (sum ? sum : 0) }' vmstat.log)))
count=$((count + $(awk 'NR>2 { count += 1 } END { print (count ? count : 0) }' vmstat.log)))
fi
sleep $VMSTAT_INTERVAL
done
# Calculate average baseline values
if [ $count -gt 0 ]; then
avg_free=$((sum_free / count))
avg_si=$((sum_si / count))
avg_so=$((sum_so / count))
avg_bi=$((sum_bi / count))
avg_bo=$((sum_bo / count))
avg_wa=$((sum_wa / count))
else
echo "No valid data collected during baseline period."
exit 1
fi
echo "Baseline established:"
echo "=============================="
echo "Average Free Memory: $avg_free KB"
echo "Average Swap In: $avg_si KB/s"
echo "Average Swap Out: $avg_so KB/s"
echo "Average Blocks In: $avg_bi blocks/s"
echo "Average Blocks Out: $avg_bo blocks/s"
echo "Average IO Wait: $avg_wa%"
echo "=============================="
# Set dynamic thresholds as percentage deviations from baseline
MEMORY_PRESSURE_THRESHOLD=$((avg_free / 2)) # Example: 50% of baseline free memory
SWAP_IN_THRESHOLD=$((avg_si * 2)) # Example: 200% of baseline swap in
SWAP_OUT_THRESHOLD=$((avg_so * 2)) # Example: 200% of baseline swap out
BLOCKS_IN_THRESHOLD=$((avg_bi * 2)) # Example: 200% of baseline blocks in
BLOCKS_OUT_THRESHOLD=$((avg_bo * 2)) # Example: 200% of baseline blocks out
IO_WAIT_THRESHOLD=$((avg_wa * 2)) # Example: 200% of baseline IO wait
echo "Dynamic thresholds set:"
echo "=============================="
echo "Free Memory Threshold: $MEMORY_PRESSURE_THRESHOLD KB"
echo "Swap In Threshold: $SWAP_IN_THRESHOLD KB/s"
echo "Swap Out Threshold: $SWAP_OUT_THRESHOLD KB/s"
echo "Blocks In Threshold: $BLOCKS_IN_THRESHOLD blocks/s"
echo "Blocks Out Threshold: $BLOCKS_OUT_THRESHOLD blocks/s"
echo "IO Wait Threshold: $IO_WAIT_THRESHOLD%"
echo "=============================="
}
system_ongoing() {
# Default interval is 5 seconds if not provided
local interval=${1:-5}
local stress_pids=("${@:2}")
# Function to check if any stress-ng process is still running
is_stress_ng_running() {
for pid in "${stress_pids[@]}"; do
if ps -p "$pid" > /dev/null 2>&1; then
return 0
fi
done
return 1
}
echo "Monitoring system performance every $interval seconds..."
local memory_pressure_detected=0
local disk_pressure_detected=0
while is_stress_ng_running; do
vmstat $VMSTAT_INTERVAL $VMSTAT_COUNT > vmstat.log
# Calculate average values from vmstat output
avg_free=$(awk 'NR>2 { sum += $4 } END { if(NR>2) printf "%.0f", sum/(NR-2) }' vmstat.log)
avg_si=$(awk 'NR>2 { sum += $7 } END { if(NR>2) printf "%.0f", sum/(NR-2) }' vmstat.log)
avg_so=$(awk 'NR>2 { sum += $8 } END { if(NR>2) printf "%.0f", sum/(NR-2) }' vmstat.log)
avg_bi=$(awk 'NR>2 { sum += $9 } END { if(NR>2) printf "%.0f", sum/(NR-2) }' vmstat.log)
avg_bo=$(awk 'NR>2 { sum += $10 } END { if(NR>2) printf "%.0f", sum/(NR-2) }' vmstat.log)
avg_wa=$(awk 'NR>2 { sum += $16 } END { if(NR>2) printf "%.0f", sum/(NR-2) }' vmstat.log)
# Determine system state description
local system_state=""
local show_metrics=0
if (( avg_free < MEMORY_PRESSURE_THRESHOLD )); then
system_state="=== MEMORY PRESSURE DETECTED ==="
show_metrics=1
elif (( avg_si > SWAP_IN_THRESHOLD )) || (( avg_so > SWAP_OUT_THRESHOLD )); then
system_state="=== MEMORY PRESSURE DETECTED ==="
show_metrics=1
elif (( avg_bi > BLOCKS_IN_THRESHOLD )) || (( avg_bo > BLOCKS_OUT_THRESHOLD )) || (( avg_wa > IO_WAIT_THRESHOLD )); then
system_state="=== DISK PRESSURE DETECTED ==="
show_metrics=1
fi
if (( show_metrics )); then
# Print average values to the console with timestamp and system state
echo "=============================="
echo "System Performance Metrics"
echo "This output includes a brief note about the current state of the system."
echo "$(date)"
echo "$system_state"
echo "Average Free Memory: $(tput bold)$avg_free KB$(tput sgr0)"
echo "Average Swap In: $(tput bold)$avg_si KB/s$(tput sgr0)"
echo "Average Swap Out: $(tput bold)$avg_so KB/s$(tput sgr0)"
echo "Average Blocks In: $(tput bold)$avg_bi blocks/s$(tput sgr0)"
echo "Average Blocks Out: $(tput bold)$avg_bo blocks/s$(tput sgr0)"
echo "Average IO Wait: $(tput bold)$avg_wa%$(tput sgr0)"
echo "=============================="
else
echo "==================================================="
echo "$(date): === SYSTEM NORMAL: No memory or disk pressure detected ==="
echo "==================================================="
fi
local memory_pressure=0
local disk_pressure=0
# Check if the system is under memory pressure
if (( avg_free < MEMORY_PRESSURE_THRESHOLD )); then
memory_pressure=1
fi
if (( avg_si > SWAP_IN_THRESHOLD )); then
memory_pressure=1
fi
if (( avg_so > SWAP_OUT_THRESHOLD )); then
memory_pressure=1
fi
# Check if the system is under disk pressure
if (( avg_bi > BLOCKS_IN_THRESHOLD )); then
disk_pressure=1
fi
if (( avg_bo > BLOCKS_OUT_THRESHOLD )); then
disk_pressure=1
fi
if (( avg_wa > IO_WAIT_THRESHOLD )); then
disk_pressure=1
fi
# Print pressure detected messages
if [ $memory_pressure -eq 1 ] && [ $memory_pressure_detected -eq 0 ]; then
echo "==================================================="
echo "$(date): === MEMORY PRESSURE DETECTED ==="
echo "==================================================="
memory_pressure_detected=1
fi
if [ $memory_pressure -eq 0 ] && [ $memory_pressure_detected -eq 1 ]; then
echo "==================================================="
echo "$(date): === SYSTEM RECOVERY: Memory pressure resolved ==="
echo "==================================================="
memory_pressure_detected=0
fi
if [ $disk_pressure -eq 1 ] && [ $disk_pressure_detected -eq 0 ]; then
echo "================================================"
echo "$(date): === DISK PRESSURE DETECTED ==="
echo "================================================"
disk_pressure_detected=1
fi
if [ $disk_pressure -eq 0 ] && [ $disk_pressure_detected -eq 1 ]; then
echo "================================================"
echo "$(date): === SYSTEM RECOVERY: Disk pressure resolved ==="
echo "================================================"
disk_pressure_detected=0
fi
if ! is_stress_ng_running; then
echo "Monitoring process terminating because all stress tests ended and system recovered."
exit 0
fi
# Wait for the specified interval before the next iteration
sleep $interval
done
echo "Monitoring process terminating because all stress-ng processes have exited."
exit 0
}