Adjust prometheus rules to leave time for backups
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@ -12,7 +12,7 @@ groups:
|
||||
summary: Host out of memory (instance {{ $labels.instance }})
|
||||
description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
- alert: HostMemoryUnderMemoryPressure
|
||||
expr: 'rate(node_vmstat_pgmajfault[1m]) > 1000'
|
||||
expr: 'rate(node_vmstat_pgmajfault[1m]) > 1000 unless on() hour()>=0 <=3'
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
@ -108,7 +108,7 @@ groups:
|
||||
summary: Host unusual disk write latency (instance {{ $labels.instance }})
|
||||
description: "Disk latency is growing (write operations > 100ms)\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
|
||||
- alert: HostHighCpuLoad
|
||||
expr: '(100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100)) > 80'
|
||||
expr: '(100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100)) > 80 unless on() hour()>=0 <=3'
|
||||
for: 2m
|
||||
labels:
|
||||
severity: warning
|
||||
|
Reference in New Issue
Block a user