Split monitoring/logging stack so clients can federate onto the server

This commit is contained in:
2024-01-09 23:31:44 +01:00
parent f9aeac3285
commit 1379ccb2eb
37 changed files with 822 additions and 771 deletions

View File

@ -0,0 +1 @@
COMPOSE_PROJECT_NAME=grafana

View File

@ -0,0 +1,43 @@
version: "3.4"
services:
grafana:
image: grafana/grafana:latest
restart: unless-stopped
labels:
- "traefik.enable=true"
- "traefik.http.routers.grafana.rule=Host(`grafana.tobiasmanske.de`)"
- "traefik.http.routers.grafana.entryPoints=websecure"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
networks:
- backend
- default
environment:
- "GF_SERVER_ROOT_URL=https://grafana.tobiasmanske.de"
- "GF_SECURITY_ADMIN_USER={{ grafana.admin.user }}"
- "GF_SECURITY_ADMIN_PASSWORD={{ grafana.admin.password }}"
- "GF_AUTH_GENERIC_OAUTH_NAME=Keycloak"
- "GF_AUTH_GENERIC_OAUTH_ENABLED=true"
- "GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP=true"
- "GF_AUTH_GENERIC_OAUTH_CLIENT_ID={{ grafana.oidc.client_id }}"
- "GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET={{ grafana.oidc.client_secret }}"
- "GF_AUTH_GENERIC_OAUTH_SCOPES=openid email profile offline_access roles"
- "GF_AUTH_GENERIC_OAUTH_GROUP_ATTRIBUTE_PATH=groups"
- "GF_AUTH_GENERIC_OAUTH_EMAIL_ATTRIBUTE_PATH=email"
- "GF_AUTH_GENERIC_OAUTH_LOGIN_ATTRIBUTE_PATH=preferred_username"
- "GF_AUTH_GENERIC_OAUTH_NAME_ATTRIBUTE_PATH=full_name"
- "GF_AUTH_GENERIC_OAUTH_AUTH_URL=https://{{ grafana.oidc.url }}/realms/{{ grafana.oidc.realm_name }}/protocol/openid-connect/auth"
- "GF_AUTH_GENERIC_OAUTH_TOKEN_URL=https://{{ grafana.oidc.url }}/realms/{{ grafana.oidc.realm_name }}/protocol/openid-connect/token"
- "GF_AUTH_GENERIC_OAUTH_API_URL=https://{{ grafana.oidc.url }}/realms/{{ grafana.oidc.realm_name }}/protocol/openid-connect/userinfo"
- "GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH=contains(resource_access.grafana.roles[*], 'serveradmin') && 'GrafanaAdmin' || contains(resource_access.grafana.roles[*], 'admin') && 'Admin' || contains(resource_access.grafana.roles[*], 'editor') && 'Editor' || 'Viewer'"
- "GF_AUTH_GENERIC_OAUTH_ALLOW_ASSIGN_GRAFANA_ADMIN=true"
volumes:
- data:/var/lib/grafana
- ./grafana-ds.yml:/etc/grafana/provisioning/datasources/datasource.yml:ro,Z
- ./grafana-db.yml:/etc/grafana/provisioning/dashboards/datasource.yml:ro,Z
- ./grafana-dashboards:/var/lib/grafana/dashboards:ro,Z
volumes:
data:
networks:
backend:
internal: true

View File

@ -0,0 +1,28 @@
apiVersion: 1
datasources:
- name: Mimir Netcup
type: prometheus
basicAuth: true
basicAuthUser: {{ common.mimir.username }}
jsonData:
httpHeaderName1: "X-Scope-OrgID"
secureJsonData:
basicAuthPassword: {{ common.mimir.password }}
httpHeaderValue1: "{{ groups['prometheus']|map('extract', hostvars, 'inventory_hostname')|join('|')|replace('.','-') }}"
url: https://{{ common.mimir.host }}/prometheus
isDefault: false
access: proxy
editable: true
- name: Loki
type: loki
access: proxy
orgId: 1
url: https://{{ common.loki.host }}
basicAuth: true
basicAuthUser: {{ common.loki.username }}
secureJsonData:
basicAuthPassword: {{ common.loki.password }}
isDefault: false
version: 1
editable: true

View File

@ -0,0 +1 @@
COMPOSE_PROJECT_NAME=loki

View File

@ -0,0 +1,27 @@
version: "3.4"
services:
loki:
image: grafana/loki:latest
restart: unless-stopped
command: -config.file=/etc/loki/loki.yaml
volumes:
- ./loki.yml:/etc/loki/loki.yaml:ro,Z
- loki_data:/loki
labels:
- "traefik.enable=true"
- "traefik.http.routers.loki.rule=Host(`loki.tobiasmanske.de`)"
- "traefik.http.middlewares.loki-auth.basicauth.users={{ common.loki.username }}:{{ common.loki.password_hash | mandatory }}"
- "traefik.http.routers.loki.entryPoints=websecure"
- "traefik.http.services.loki.loadbalancer.server.port=3100"
- "traefik.http.routers.loki.middlewares=loki-auth"
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=3100"
networks:
- metrics
- default
volumes:
loki_data:
networks:
metrics:
external: true

View File

@ -78,6 +78,10 @@ rc_joins:
local:
per_second: 100
burst_count: 100
rc_login:
address:
per_second: 1000
burst_count: 1000
server_notices:
system_mxid_localpart: "server"
system_mxid_display_name: "Server Notices"

View File

@ -0,0 +1 @@
COMPOSE_PROJECT_NAME=metrics

View File

@ -0,0 +1,103 @@
version: "3.4"
services:
prometheus:
image: prom/prometheus:latest
restart: unless-stopped
command:
- '--config.file=/etc/prometheus/prometheus.yml'
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro,Z
- prom_data:/prometheus
- label_discovery:/label_discovery:ro
labels:
- "traefik.enable=false"
depends_on:
- prometheus-docker-sd
- cadvisor
- node-exporter
networks:
- default # send
- backend
- metrics
prometheus-docker-sd:
image: registry.tobiasmanske.de/prometheus-docker-sd:latest
restart: unless-stopped
privileged: true
networks:
- backend
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro,Z
- label_discovery:/prometheus-docker-sd:rw
logging: # this service generates a HUGE amout of logs.
driver: "none"
node-exporter:
image: quay.io/prometheus/node-exporter:latest
container_name: "{{ inventory_hostname | replace('.', '-') }}-node-exporter"
privileged: true
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- /:/host:ro,rslave
- /run/dbus/system_bus_socket:/var/run/dbus/system_bus_socket:ro
command:
- '--path.rootfs=/host'
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points'
- "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
- '--collector.systemd'
networks:
- backend
restart: unless-stopped
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
privileged: true
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=8080"
command:
- "-docker_only=true"
- "-housekeeping_interval=10s"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
networks:
- backend
restart: unless-stopped
promtail:
image: grafana/promtail:latest
security_opt:
- label:disable
restart: unless-stopped
volumes:
- ./promtail.yml:/etc/promtail/config.yml:ro
- /var/log:/var/log:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock
command: -config.file=/etc/promtail/config.yml
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=8080"
networks:
- default # send
- backend
- metrics
volumes:
prom_data:
label_discovery:
networks:
backend:
internal: true
metrics:
external: true

View File

@ -0,0 +1,30 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
scrape_configs:
- job_name: prometheus
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
static_configs:
- targets:
- localhost:9090
- job_name: 'service_discovery'
metric_relabel_configs:
- source_labels:
- "container_name"
target_label: "instance"
action: replace
file_sd_configs:
- files:
- /label_discovery/docker-targets.json
remote_write:
- url: https://{{ common.mimir.host | mandatory }}/api/v1/push
headers:
X-Scope-OrgID: "{{ inventory_hostname | replace('.', '-') }}"
basic_auth:
username: "{{ common.mimir.username | mandatory }}"
password: "{{ common.mimir.password | mandatory }}"

View File

@ -4,7 +4,11 @@ server:
http_listen_port: 8080
clients:
- url: http://loki:3100/loki/api/v1/push
- url: https://{{ common.loki.host | mandatory }}/loki/api/v1/push
tenant_id: "{{ inventory_hostname | replace('.', '-') }}"
basic_auth:
username: "{{ common.loki.username | mandatory }}"
password: "{{ common.loki.password | mandatory }}"
scrape_configs:
- job_name: flog_scrape

View File

@ -0,0 +1 @@
COMPOSE_PROJECT_NAME=mimir

View File

@ -31,20 +31,20 @@ route:
receivers:
- name: 'email'
email_configs:
- to: '{{ prometheus.alertmanager.smtp.target }}'
from: '"Alertmanager" <{{ prometheus.alertmanager.smtp.username }}>'
- to: '{{ mimir.alertmanager.smtp.target }}'
from: '"Alertmanager" <{{ mimir.alertmanager.smtp.username }}>'
smarthost: 'mxe8cf.netcup.net:587'
auth_username: '{{ prometheus.alertmanager.smtp.username }}'
auth_identity: '{{ prometheus.alertmanager.smtp.username }}'
auth_password: '{{ prometheus.alertmanager.smtp.password }}'
auth_username: '{{ mimir.alertmanager.smtp.username }}'
auth_identity: '{{ mimir.alertmanager.smtp.username }}'
auth_password: '{{ mimir.alertmanager.smtp.password }}'
- name: 'hcio'
email_configs:
- to: '{{ prometheus.alertmanager.hcio.mail }}'
from: '"Alertmanager" <{{ prometheus.alertmanager.smtp.username }}>'
- to: '{{ mimir.alertmanager.hcio.mail }}'
from: '"Alertmanager" <{{ mimir.alertmanager.smtp.username }}>'
smarthost: 'mxe8cf.netcup.net:587'
auth_username: '{{ prometheus.alertmanager.smtp.username }}'
auth_identity: '{{ prometheus.alertmanager.smtp.username }}'
auth_password: '{{ prometheus.alertmanager.smtp.password }}'
auth_username: '{{ mimir.alertmanager.smtp.username }}'
auth_identity: '{{ mimir.alertmanager.smtp.username }}'
auth_password: '{{ mimir.alertmanager.smtp.password }}'
- name: 'matrix-monitoring'
webhook_configs:
- url: 'http://alertmanager-matrix:3000/alerts?secret={{ prometheus.alertmanager.matrix.alertmanager_token }}'
- url: 'http://alertmanager-matrix:3000/alerts?secret={{ mimir.alertmanager.matrix.alertmanager_token }}'

View File

@ -0,0 +1,80 @@
services:
mimir:
image: grafana/mimir:latest
restart: unless-stopped
volumes:
- data:/mimir
- ./mimir.yml:/etc/mimir-config/mimir.yaml:ro,Z
{% for tenant in groups['prometheus']|map('extract', hostvars, 'inventory_hostname') %}
- ./rules:/data/rules/{{ tenant|replace('.', '-') }}:ro,Z
{% endfor %}
entrypoint:
- /bin/mimir
- -config.file=/etc/mimir-config/mimir.yaml
- -validation.max-label-names-per-series=60
- -tenant-federation.enabled=true
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=8080"
- "traefik.enable=true"
- "traefik.http.routers.mimir.rule=Host(`mimir.tobiasmanske.de`)"
- "traefik.http.middlewares.mimir-auth.basicauth.users={{ common.mimir.username }}:{{ common.mimir.password_hash | mandatory }}"
- "traefik.http.routers.mimir.entryPoints=websecure"
- "traefik.http.services.mimir.loadbalancer.server.port=8080"
- "traefik.http.routers.mimir.middlewares=mimir-auth"
networks:
- backend
- default
alertmanager:
image: prom/alertmanager:latest
labels:
- "traefik.enable=true"
- "traefik.http.routers.alertmanager.rule=Host(`alertmanager.tobiasmanske.de`)"
- "traefik.http.routers.alertmanager.entryPoints=websecure"
- "traefik.http.services.alertmanager.loadbalancer.server.port=9093"
- "traefik.http.routers.alertmanager.middlewares=oauth@file"
volumes:
- ./alertmanager.yml:/etc/alertmanager/config.yml:ro,Z
- alertmanager_data:/data
networks:
- backend
- alertmanager
- default
restart: unless-stopped
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--web.external-url=https://alertmanager.tobiasmanske.de'
- '--storage.path=/data'
alertmanager-matrix:
image: jaywink/matrix-alertmanager:latest
restart: unless-stopped
labels:
- "traefik.enable=true"
- "traefik.http.routers.alertmanager-matrix.rule=Host(`alertmanager.tobiasmanske.de`) && PathPrefix(`/matrix/`)"
- "traefik.http.routers.alertmanager-matrix.middlewares=matrix-strip"
- "traefik.http.middlewares.matrix-strip.stripprefix.prefixes=/matrix"
- "traefik.http.middlewares.matrix-strip.stripprefix.forceslash=false"
- "traefik.http.routers.alertmanager-matrix.entryPoints=websecure"
- "traefik.http.services.alertmanager-matrix.loadbalancer.server.port=3000"
environment:
- APP_PORT=3000
- APP_ALERTMANAGER_SECRET={{ mimir.alertmanager.matrix.alertmanager_token | mandatory }}
- MATRIX_HOMESERVER_URL=http://pantalaimon:8008
- MATRIX_ROOMS={{ mimir.alertmanager.matrix.rooms | join('|') }}
- MATRIX_TOKEN={{ mimir.alertmanager.matrix.matrix_token }}
- MATRIX_USER=@alertmanager:{{ matrix.baseurl }}
- MENTION_ROOM=1
networks:
- alertmanager
- pantalaimon
volumes:
data:
alertmanager_data:
networks:
pantalaimon:
external: true
backend:
internal: true
alertmanager:

View File

@ -30,13 +30,20 @@ store_gateway:
sharding_ring:
replication_factor: 1
# ruler:
# rule_path: /data/ruler
# alertmanager_url: http://127.0.0.1:8080/alertmanager
# ring:
# # Quickly detect unhealthy rulers to speed up the tutorial.
# heartbeat_period: 2s
# heartbeat_timeout: 10s
ruler:
rule_path: /tmp/ruler
alertmanager_url: http://alertmanager:9093/
tenant_federation:
enabled: true
ring:
# Quickly detect unhealthy rulers to speed up the tutorial.
heartbeat_period: 2s
heartbeat_timeout: 10s
ruler_storage:
backend: local
local:
directory: /data/rules
#
# alertmanager:
# data_dir: /data/alertmanager

View File

@ -83,14 +83,14 @@ groups:
annotations:
summary: Prometheus AlertManager E2E dead man switch (instance {{ $labels.instance }})
description: "Prometheus DeadManSwitch is an always-firing alert. It's used as an end-to-end test of Prometheus through the Alertmanager.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PrometheusNotConnectedToAlertmanager
expr: 'prometheus_notifications_alertmanagers_discovered < 1'
for: 0m
labels:
severity: critical
annotations:
summary: Prometheus not connected to alertmanager (instance {{ $labels.instance }})
description: "Prometheus cannot connect the alertmanager\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
# - alert: PrometheusNotConnectedToAlertmanager
# expr: 'prometheus_notifications_alertmanagers_discovered < 1'
# for: 0m
# labels:
# severity: critical
# annotations:
# summary: Prometheus not connected to alertmanager (instance {{ $labels.instance }})
# description: "Prometheus cannot connect the alertmanager\n VALUE = {{ $value }}\n LABELS = {{ $labels }}"
- alert: PrometheusRuleEvaluationFailures
expr: 'increase(prometheus_rule_evaluation_failures_total[3m]) > 0'
for: 0m

View File

@ -1 +0,0 @@
COMPOSE_PROJECT_NAME=prometheus

View File

@ -1,224 +0,0 @@
version: "3.4"
services:
prometheus:
image: prom/prometheus:latest
restart: unless-stopped
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--web.external-url=https://prometheus.tobiasmanske.de'
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro,Z
- prom_data:/prometheus
- label_discovery:/label_discovery:ro
- ./rules:/rules:ro,Z
labels:
- "traefik.enable=true"
- "traefik.http.routers.prometheus.rule=Host(`prometheus.tobiasmanske.de`)"
- "traefik.http.routers.prometheus.entryPoints=websecure"
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
- "traefik.http.routers.prometheus.middlewares=oauth@file"
depends_on:
- prometheus-docker-sd
- cadvisor
- node-exporter
networks:
- backend
- alertmanager
- metrics
prometheus-docker-sd:
image: registry.tobiasmanske.de/prometheus-docker-sd:latest
restart: unless-stopped
privileged: true
networks:
- backend
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro,Z
- label_discovery:/prometheus-docker-sd:rw
logging: # this service generates a HUGE amout of logs.
driver: "none"
alertmanager:
image: prom/alertmanager:latest
labels:
- "traefik.enable=true"
- "traefik.http.routers.alertmanager.rule=Host(`alertmanager.tobiasmanske.de`)"
- "traefik.http.routers.alertmanager.entryPoints=websecure"
- "traefik.http.services.alertmanager.loadbalancer.server.port=9093"
- "traefik.http.routers.alertmanager.middlewares=oauth@file"
volumes:
- ./alertmanager.yml:/etc/alertmanager/config.yml:ro,Z
- alertmanager_data:/data
networks:
- alertmanager
restart: unless-stopped
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--web.external-url=https://alertmanager.tobiasmanske.de'
- '--storage.path=/data'
alertmanager-matrix:
image: jaywink/matrix-alertmanager:latest
restart: unless-stopped
labels:
- "traefik.enable=true"
- "traefik.http.routers.alertmanager-matrix.rule=Host(`alertmanager.tobiasmanske.de`) && PathPrefix(`/matrix/`)"
- "traefik.http.routers.alertmanager-matrix.middlewares=matrix-strip"
- "traefik.http.middlewares.matrix-strip.stripprefix.prefixes=/matrix"
- "traefik.http.middlewares.matrix-strip.stripprefix.forceslash=false"
- "traefik.http.routers.alertmanager-matrix.entryPoints=websecure"
- "traefik.http.services.alertmanager-matrix.loadbalancer.server.port=3000"
environment:
- APP_PORT=3000
- APP_ALERTMANAGER_SECRET={{ prometheus.alertmanager.matrix.alertmanager_token }}
- MATRIX_HOMESERVER_URL=http://pantalaimon:8008
- MATRIX_ROOMS={{ prometheus.alertmanager.matrix.rooms | join('|') }}
- MATRIX_TOKEN={{ prometheus.alertmanager.matrix.matrix_token }}
- MATRIX_USER=@alertmanager:{{ matrix.baseurl }}
- MENTION_ROOM=1
networks:
- alertmanager
- pantalaimon
grafana:
image: grafana/grafana:latest
restart: unless-stopped
labels:
- "traefik.enable=true"
- "traefik.http.routers.grafana.rule=Host(`grafana.tobiasmanske.de`)"
- "traefik.http.routers.grafana.entryPoints=websecure"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
networks:
- backend
- default
environment:
- "GF_SERVER_ROOT_URL=https://grafana.tobiasmanske.de"
- "GF_SECURITY_ADMIN_USER={{ grafana.admin.user }}"
- "GF_SECURITY_ADMIN_PASSWORD={{ grafana.admin.password }}"
- "GF_AUTH_GENERIC_OAUTH_NAME=Keycloak"
- "GF_AUTH_GENERIC_OAUTH_ENABLED=true"
- "GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP=true"
- "GF_AUTH_GENERIC_OAUTH_CLIENT_ID={{ grafana.oidc.client_id }}"
- "GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET={{ grafana.oidc.client_secret }}"
- "GF_AUTH_GENERIC_OAUTH_SCOPES=openid email profile offline_access roles"
- "GF_AUTH_GENERIC_OAUTH_GROUP_ATTRIBUTE_PATH=groups"
- "GF_AUTH_GENERIC_OAUTH_EMAIL_ATTRIBUTE_PATH=email"
- "GF_AUTH_GENERIC_OAUTH_LOGIN_ATTRIBUTE_PATH=preferred_username"
- "GF_AUTH_GENERIC_OAUTH_NAME_ATTRIBUTE_PATH=full_name"
- "GF_AUTH_GENERIC_OAUTH_AUTH_URL=https://{{ grafana.oidc.url }}/realms/{{ grafana.oidc.realm_name }}/protocol/openid-connect/auth"
- "GF_AUTH_GENERIC_OAUTH_TOKEN_URL=https://{{ grafana.oidc.url }}/realms/{{ grafana.oidc.realm_name }}/protocol/openid-connect/token"
- "GF_AUTH_GENERIC_OAUTH_API_URL=https://{{ grafana.oidc.url }}/realms/{{ grafana.oidc.realm_name }}/protocol/openid-connect/userinfo"
- "GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH=contains(resource_access.grafana.roles[*], 'serveradmin') && 'GrafanaAdmin' || contains(resource_access.grafana.roles[*], 'admin') && 'Admin' || contains(resource_access.grafana.roles[*], 'editor') && 'Editor' || 'Viewer'"
- "GF_AUTH_GENERIC_OAUTH_ALLOW_ASSIGN_GRAFANA_ADMIN=true"
volumes:
- grafana_data:/var/lib/grafana
- ./grafana-ds.yml:/etc/grafana/provisioning/datasources/datasource.yml:ro,Z
- ./grafana-db.yml:/etc/grafana/provisioning/dashboards/datasource.yml:ro,Z
- ./grafana-dashboards:/var/lib/grafana/dashboards:ro,Z
node-exporter:
image: quay.io/prometheus/node-exporter:latest
container_name: host-nc-chaoswg-org-node-exporter
privileged: true
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- /:/host:ro,rslave
- /run/dbus/system_bus_socket:/var/run/dbus/system_bus_socket:ro
command:
- '--path.rootfs=/host'
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points'
- "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
- '--collector.systemd'
networks:
- metrics
restart: unless-stopped
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
privileged: true
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=8080"
command:
- "-docker_only=true"
- "-housekeeping_interval=10s"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
networks:
- metrics
restart: unless-stopped
loki:
image: grafana/loki:latest
restart: unless-stopped
command: -config.file=/etc/loki/loki.yaml
volumes:
- ./loki.yml:/etc/loki/loki.yaml:ro,Z
- loki_data:/loki
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=3100"
networks:
- backend
promtail:
image: grafana/promtail:latest
security_opt:
- label:disable
restart: unless-stopped
volumes:
- ./promtail.yml:/etc/promtail/config.yml:ro
- /var/log:/var/log:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock
command: -config.file=/etc/promtail/config.yml
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=8080"
networks:
- backend
- metrics
mimir:
image: grafana/mimir:latest
restart: unless-stopped
volumes:
- mimir_data:/mimir
- ./mimir.yml:/etc/mimir-config/mimir.yaml:ro,Z
entrypoint:
- /bin/mimir
- -config.file=/etc/mimir-config/mimir.yaml
- -validation.max-label-names-per-series=60
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=8080"
networks:
- backend
- metrics
volumes:
prom_data:
grafana_data:
loki_data:
label_discovery:
alertmanager_data:
mimir_data:
networks:
pantalaimon:
external: true
backend:
internal: true
alertmanager:
metrics:
external: true

View File

@ -1,28 +0,0 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true
- name: Mimir Netcup
type: prometheus
jsonData:
httpHeaderName1: "X-Scope-OrgID"
secureJsonData:
httpHeaderValue1: "host-nc-chaoswg-org"
url: http://mimir:8080/prometheus
isDefault: false
access: proxy
editable: true
- name: Loki
type: loki
access: proxy
orgId: 1
url: http://loki:3100
basicAuth: false
isDefault: false
version: 1
editable: true

View File

@ -1,58 +0,0 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- scheme: http
static_configs:
- targets: [ 'alertmanager:9093' ]
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v1
rule_files:
- "/rules/*.yaml"
scrape_configs:
- job_name: prometheus
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /metrics
scheme: http
static_configs:
- targets:
- localhost:9090
- job_name: 'service_discovery'
metric_relabel_configs:
- source_labels:
- "container_name"
target_label: "instance"
action: replace
file_sd_configs:
- files:
- /label_discovery/docker-targets.json
- job_name: minio-job
bearer_token: "{{ prometheus.scrape.s3.bearer_token }}"
metrics_path: /minio/v2/metrics/cluster
scheme: https
static_configs:
- targets: [s3.tobiasmanske.de]
- job_name: drone-job
bearer_token: "{{ prometheus.scrape.drone.bearer_token }}"
scheme: https
static_configs:
- targets: [drone.tobiasmanske.de]
- job_name: 'uptime-kuma-job'
scrape_interval: 30s
scheme: https
static_configs:
- targets: [status.tobiasmanske.de]
basic_auth:
username: "{{ prometheus.scrape.kuma.user }}"
password: "{{ prometheus.scrape.kuma.password }}"
remote_write:
- url: http://mimir:8080/api/v1/push
headers:
X-Scope-OrgID: host-nc-chaoswg-org