From fad6404c156fdc60b889796af839892213a947b5 Mon Sep 17 00:00:00 2001 From: Tobias Manske Date: Sun, 12 Nov 2023 15:57:45 +0100 Subject: [PATCH] Disable IO Wait warnings as we do not have means of influencing them --- .../plays/services/prometheus/rules/node.yaml | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/ansible/plays/services/prometheus/rules/node.yaml b/ansible/plays/services/prometheus/rules/node.yaml index f49bfce..b2822d6 100644 --- a/ansible/plays/services/prometheus/rules/node.yaml +++ b/ansible/plays/services/prometheus/rules/node.yaml @@ -131,22 +131,22 @@ groups: annotations: summary: Host CPU steal noisy neighbor (instance {{ $labels.instance }}) description: "CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - - alert: HostCpuHighIowait - expr: 'avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 15' - for: 0m - labels: - severity: warning - annotations: - summary: Host CPU high iowait (instance {{ $labels.instance }}) - description: "CPU iowait > 15%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - - alert: HostUnusualDiskIo - expr: 'rate(node_disk_io_time_seconds_total[1m]) > 0.5' - for: 5m - labels: - severity: warning - annotations: - summary: Host unusual disk IO (instance {{ $labels.instance }}) - description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + # - alert: HostCpuHighIowait + # expr: 'avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 15' + # for: 0m + # labels: + # severity: warning + # annotations: + # summary: Host CPU high iowait (instance {{ $labels.instance }}) + # description: "CPU iowait > 15%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + # - alert: HostUnusualDiskIo + # expr: 'rate(node_disk_io_time_seconds_total[1m]) > 0.5' + # for: 5m + # labels: + # severity: warning + # annotations: + # summary: Host unusual disk IO (instance {{ $labels.instance }}) + # description: "Time spent in IO is too high on {{ $labels.instance }}. Check storage for issues.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" # - alert: HostContextSwitching # expr: '(rate(node_context_switches_total[5m])) / (count without(cpu, mode) (node_cpu_seconds_total{mode="idle"})) > 1000' # for: 0m