Disable IO Wait warnings as we do not have means of influencing them
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Tobias Manske 2023-11-12 15:57:45 +01:00
parent 5d220348b7
commit fad6404c15
Signed by: tobias
GPG Key ID: 9164B527694A0709

View File

@ -131,22 +131,22 @@ groups:
annotations: annotations:
summary: Host CPU steal noisy neighbor (instance {{ $labels.instance }}) summary: Host CPU steal noisy neighbor (instance {{ $labels.instance }})
description: "CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" description: "CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostCpuHighIowait # - alert: HostCpuHighIowait
expr: 'avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 15' # expr: 'avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 15'
for: 0m # for: 0m
labels: # labels:
severity: warning # severity: warning
annotations: # annotations:
summary: Host CPU high iowait (instance {{ $labels.instance }}) # summary: Host CPU high iowait (instance {{ $labels.instance }})
description: "CPU iowait > 15%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # description: "CPU iowait > 15%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: HostUnusualDiskIo # - alert: HostUnusualDiskIo
expr: 'rate(node_disk_io_time_seconds_total[1m]) > 0.5' # expr: 'rate(node_disk_io_time_seconds_total[1m]) > 0.5'
for: 5m # for: 5m
labels: # labels:
severity: warning # severity: warning
annotations: # annotations:
summary: Host unusual disk IO (instance {{ $labels.instance }}) # summary: Host unusual disk IO (instance {{ $labels.instance }})
description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# - alert: HostContextSwitching # - alert: HostContextSwitching
# expr: '(rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1000' # expr: '(rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1000'
# for: 0m # for: 0m