infrastructure/ansible/plays/services/prometheus/docker-compose.yaml

224 lines
7.9 KiB
YAML

version: "3.4"
services:
prometheus:
image: prom/prometheus:latest
restart: unless-stopped
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--web.external-url=https://prometheus.tobiasmanske.de'
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro,Z
- prom_data:/prometheus
- label_discovery:/label_discovery:ro
- ./rules:/rules:ro,Z
labels:
- "traefik.enable=true"
- "traefik.http.routers.prometheus.rule=Host(`prometheus.tobiasmanske.de`)"
- "traefik.http.routers.prometheus.entryPoints=websecure"
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
- "traefik.http.routers.prometheus.middlewares=oauth@file"
depends_on:
- prometheus-docker-sd
- cadvisor
- node-exporter
networks:
- backend
- alertmanager
- metrics
prometheus-docker-sd:
image: registry.tobiasmanske.de/prometheus-docker-sd:latest
restart: unless-stopped
privileged: true
networks:
- backend
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro,Z
- label_discovery:/prometheus-docker-sd:rw
logging: # this service generates a HUGE amout of logs.
driver: "none"
alertmanager:
image: prom/alertmanager:latest
labels:
- "traefik.enable=true"
- "traefik.http.routers.alertmanager.rule=Host(`alertmanager.tobiasmanske.de`)"
- "traefik.http.routers.alertmanager.entryPoints=websecure"
- "traefik.http.services.alertmanager.loadbalancer.server.port=9093"
- "traefik.http.routers.alertmanager.middlewares=oauth@file"
volumes:
- ./alertmanager.yml:/etc/alertmanager/config.yml:ro,Z
- alertmanager_data:/data
networks:
- alertmanager
restart: unless-stopped
command:
- '--config.file=/etc/alertmanager/config.yml'
- '--web.external-url=https://alertmanager.tobiasmanske.de'
- '--storage.path=/data'
alertmanager-matrix:
image: jaywink/matrix-alertmanager:latest
restart: unless-stopped
labels:
- "traefik.enable=true"
- "traefik.http.routers.alertmanager-matrix.rule=Host(`alertmanager.tobiasmanske.de`) && PathPrefix(`/matrix/`)"
- "traefik.http.routers.alertmanager-matrix.middlewares=matrix-strip"
- "traefik.http.middlewares.matrix-strip.stripprefix.prefixes=/matrix"
- "traefik.http.middlewares.matrix-strip.stripprefix.forceslash=false"
- "traefik.http.routers.alertmanager-matrix.entryPoints=websecure"
- "traefik.http.services.alertmanager-matrix.loadbalancer.server.port=3000"
environment:
- APP_PORT=3000
- APP_ALERTMANAGER_SECRET={{ prometheus.alertmanager.matrix.alertmanager_token }}
- MATRIX_HOMESERVER_URL=http://pantalaimon:8008
- MATRIX_ROOMS={{ prometheus.alertmanager.matrix.rooms | join('|') }}
- MATRIX_TOKEN={{ prometheus.alertmanager.matrix.matrix_token }}
- MATRIX_USER=@alertmanager:{{ matrix.baseurl }}
- MENTION_ROOM=1
networks:
- alertmanager
- pantalaimon
grafana:
image: grafana/grafana:latest
restart: unless-stopped
labels:
- "traefik.enable=true"
- "traefik.http.routers.grafana.rule=Host(`grafana.tobiasmanske.de`)"
- "traefik.http.routers.grafana.entryPoints=websecure"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
networks:
- backend
environment:
- "GF_SERVER_ROOT_URL=https://grafana.tobiasmanske.de"
- "GF_SECURITY_ADMIN_USER={{ grafana.admin.user }}"
- "GF_SECURITY_ADMIN_PASSWORD={{ grafana.admin.password }}"
- "GF_AUTH_GENERIC_OAUTH_NAME=Keycloak"
- "GF_AUTH_GENERIC_OAUTH_ENABLED=true"
- "GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP=true"
- "GF_AUTH_GENERIC_OAUTH_CLIENT_ID={{ grafana.oidc.client_id }}"
- "GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET={{ grafana.oidc.client_secret }}"
- "GF_AUTH_GENERIC_OAUTH_SCOPES=openid email profile offline_access roles"
- "GF_AUTH_GENERIC_OAUTH_GROUP_ATTRIBUTE_PATH=groups"
- "GF_AUTH_GENERIC_OAUTH_EMAIL_ATTRIBUTE_PATH=email"
- "GF_AUTH_GENERIC_OAUTH_LOGIN_ATTRIBUTE_PATH=preferred_username"
- "GF_AUTH_GENERIC_OAUTH_NAME_ATTRIBUTE_PATH=full_name"
- "GF_AUTH_GENERIC_OAUTH_AUTH_URL=https://{{ grafana.oidc.url }}/realms/{{ grafana.oidc.realm_name }}/protocol/openid-connect/auth"
- "GF_AUTH_GENERIC_OAUTH_TOKEN_URL=https://{{ grafana.oidc.url }}/realms/{{ grafana.oidc.realm_name }}/protocol/openid-connect/token"
- "GF_AUTH_GENERIC_OAUTH_API_URL=https://{{ grafana.oidc.url }}/realms/{{ grafana.oidc.realm_name }}/protocol/openid-connect/userinfo"
- "GF_AUTH_GENERIC_OAUTH_ROLE_ATTRIBUTE_PATH=contains(resource_access.grafana.roles[*], 'serveradmin') && 'GrafanaAdmin' || contains(resource_access.grafana.roles[*], 'admin') && 'Admin' || contains(resource_access.grafana.roles[*], 'editor') && 'Editor' || 'Viewer'"
- "GF_AUTH_GENERIC_OAUTH_ALLOW_ASSIGN_GRAFANA_ADMIN=true"
volumes:
- grafana_data:/var/lib/grafana
- ./grafana-ds.yml:/etc/grafana/provisioning/datasources/datasource.yml:ro,Z
- ./grafana-db.yml:/etc/grafana/provisioning/dashboards/datasource.yml:ro,Z
- ./grafana-dashboards:/var/lib/grafana/dashboards:ro,Z
node-exporter:
image: quay.io/prometheus/node-exporter:latest
container_name: host-nc-chaoswg-org-node-exporter
privileged: true
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
- /:/host:ro,rslave
- /run/dbus/system_bus_socket:/var/run/dbus/system_bus_socket:ro
command:
- '--path.rootfs=/host'
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.ignored-mount-points'
- "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)"
- '--collector.systemd'
networks:
- metrics
restart: unless-stopped
cadvisor:
image: gcr.io/cadvisor/cadvisor:latest
privileged: true
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=8080"
command:
- "-docker_only=true"
- "-housekeeping_interval=10s"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
networks:
- metrics
restart: unless-stopped
loki:
image: grafana/loki:latest
restart: unless-stopped
command: -config.file=/etc/loki/loki.yaml
volumes:
- ./loki.yml:/etc/loki/loki.yaml:ro,Z
- loki_data:/loki
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=3100"
networks:
- backend
promtail:
image: grafana/promtail:latest
security_opt:
- label:disable
restart: unless-stopped
volumes:
- ./promtail.yml:/etc/promtail/config.yml:ro
- /var/log:/var/log:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- /var/run/docker.sock:/var/run/docker.sock
command: -config.file=/etc/promtail/config.yml
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=8080"
networks:
- backend
- metrics
mimir:
image: grafana/mimir:latest
restart: unless-stopped
volumes:
- mimir_data:/mimir
- ./mimir.yml:/etc/mimir-config/mimir.yaml:ro,Z
entrypoint:
- /bin/mimir
- -config.file=/etc/mimir-config/mimir.yaml
- -validation.max-label-names-per-series=60
labels:
- "prometheus-scrape.enabled=true"
- "prometheus-scrape.port=8080"
networks:
- backend
- metrics
volumes:
prom_data:
grafana_data:
loki_data:
label_discovery:
alertmanager_data:
mimir_data:
networks:
pantalaimon:
external: true
backend:
internal: true
alertmanager:
metrics:
external: true