From a35dc12ba870387cfac63ca465e65cf5db904713 Mon Sep 17 00:00:00 2001 From: Tobias Manske Date: Wed, 19 Apr 2023 21:40:48 +0200 Subject: [PATCH] Prometheus: Increase tolerable IoWait percentage --- .../compose_project/templates/prometheus/rules/node.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/coreos-config/roles/compose_project/templates/prometheus/rules/node.yaml b/coreos-config/roles/compose_project/templates/prometheus/rules/node.yaml index 5066531..f49bfce 100644 --- a/coreos-config/roles/compose_project/templates/prometheus/rules/node.yaml +++ b/coreos-config/roles/compose_project/templates/prometheus/rules/node.yaml @@ -132,13 +132,13 @@ groups: summary: Host CPU steal noisy neighbor (instance {{ $labels.instance }}) description: "CPU steal is > 10%. A noisy neighbor is killing VM performances or a spot instance may be out of credit.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: HostCpuHighIowait - expr: 'avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 10' + expr: 'avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) * 100 > 15' for: 0m labels: severity: warning annotations: summary: Host CPU high iowait (instance {{ $labels.instance }}) - description: "CPU iowait > 10%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + description: "CPU iowait > 15%. A high iowait means that you are disk or network bound.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" - alert: HostUnusualDiskIo expr: 'rate(node_disk_io_time_seconds_total[1m]) > 0.5' for: 5m