Prometheus: Increase tolerable IoWait percentage
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Tobias Manske 2023-04-19 21:40:48 +02:00
parent c44c730dcc
commit a35dc12ba8
Signed by: tobias
GPG Key ID: 9164B527694A0709

View File

@ -132,13 +132,13 @@ groups:
summary: Host CPU steal noisy neighbor (instance {{ $labels.instance }})
description: "CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostCpuHighIowait
expr: 'avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 10'
expr: 'avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 15'
for: 0m
labels:
severity: warning
annotations:
summary: Host CPU high iowait (instance {{ $labels.instance }})
description: "CPU iowait > 10%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
description: "CPU iowait > 15%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostUnusualDiskIo
expr: 'rate(node_disk_io_time_seconds_total[1m]) > 0.5'
for: 5m