change prometheus_server role variable structure
This commit is contained in:
parent
0363813807
commit
1b30ca221e
@ -28,77 +28,177 @@ users:
|
|||||||
openpgp:0xAD2BFD7F"
|
openpgp:0xAD2BFD7F"
|
||||||
opendoas_settings: "permit nopass ansible"
|
opendoas_settings: "permit nopass ansible"
|
||||||
|
|
||||||
prometheus_options:
|
prometheus_settings:
|
||||||
global:
|
config:
|
||||||
|
global:
|
||||||
|
alerting:
|
||||||
|
alertmanagers:
|
||||||
|
- static_configs:
|
||||||
|
- targets:
|
||||||
|
- 192.168.0.252:9093
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: prometheus
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- 192.168.0.252:9090
|
||||||
|
- job_name: node
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
# main-page
|
||||||
|
- 192.168.0.10:9100
|
||||||
|
labels:
|
||||||
|
env: dev
|
||||||
|
hostname: main-page
|
||||||
|
- targets:
|
||||||
|
# searxng
|
||||||
|
- 192.168.0.15:9100
|
||||||
|
labels:
|
||||||
|
env: dev
|
||||||
|
hostname: searxng
|
||||||
|
- targets:
|
||||||
|
# forgejo
|
||||||
|
- 192.168.0.20:9100
|
||||||
|
labels:
|
||||||
|
env: dev
|
||||||
|
hostname: forgejo
|
||||||
|
- targets:
|
||||||
|
# forgejo
|
||||||
|
- 192.168.0.21:9100
|
||||||
|
labels:
|
||||||
|
env: dev
|
||||||
|
hostname: forgejo-runner
|
||||||
|
- targets:
|
||||||
|
# bastion
|
||||||
|
- 192.168.0.254:9100
|
||||||
|
labels:
|
||||||
|
env: common
|
||||||
|
hostname: bastion
|
||||||
|
- targets:
|
||||||
|
# load-balancer
|
||||||
|
- 192.168.0.253:9100
|
||||||
|
labels:
|
||||||
|
env: common
|
||||||
|
hostname: load-balancer
|
||||||
|
- targets:
|
||||||
|
# monitoring
|
||||||
|
- 192.168.0.252:9100
|
||||||
|
labels:
|
||||||
|
env: common
|
||||||
|
hostname: monitoring
|
||||||
|
- job_name: nginx
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
# load-balancer
|
||||||
|
- 192.168.0.253:9113
|
||||||
|
labels:
|
||||||
|
env: common
|
||||||
|
hostname: monitoring
|
||||||
|
|
||||||
alerting:
|
alerting_rules:
|
||||||
alertmanagers:
|
groups:
|
||||||
- static_configs:
|
- name: DefaultMetrics
|
||||||
- targets:
|
rules:
|
||||||
- 192.168.0.252:9093
|
- alert: HostOutOfMemory
|
||||||
|
expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes < .10)
|
||||||
rule_files:
|
for: 2m
|
||||||
- alerting_rules/*.yml
|
labels:
|
||||||
|
severity: warning
|
||||||
scrape_configs:
|
annotations:
|
||||||
- job_name: prometheus
|
summary: Host out of memory (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
static_configs:
|
description: "Node memory is filling up (< 10% left)\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
- targets:
|
# You may want to increase the alert manager 'repeat_interval' for this type of alert to daily or weekly
|
||||||
- 192.168.0.252:9090
|
- alert: HostMemoryIsUnderutilized
|
||||||
|
expr: min_over_time(node_memory_MemFree_bytes[1w]) > node_memory_MemTotal_bytes * .8
|
||||||
- job_name: node
|
for: 0m
|
||||||
static_configs:
|
labels:
|
||||||
- targets:
|
severity: info
|
||||||
# main-page
|
annotations:
|
||||||
- 192.168.0.10:9100
|
summary: Host Memory is underutilized (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
labels:
|
description: "Node memory usage is < 20% for 1 week. Consider reducing memory space. (instance {{'{{'}} $labels.instance {{'}}'}})\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
env: dev
|
# Please add ignored mountpoints in node_exporter parameters like
|
||||||
hostname: main-page
|
# "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)".
|
||||||
- targets:
|
# Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users.
|
||||||
# searxng
|
- alert: HostOutOfDiskSpace
|
||||||
- 192.168.0.15:9100
|
expr: (node_filesystem_avail_bytes{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"} / node_filesystem_size_bytes < .10 and on (instance, device, mountpoint) node_filesystem_readonly == 0)
|
||||||
labels:
|
for: 2m
|
||||||
env: dev
|
labels:
|
||||||
hostname: searxng
|
severity: critical
|
||||||
- targets:
|
annotations:
|
||||||
# forgejo
|
summary: Host out of disk space (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
- 192.168.0.20:9100
|
description: "Disk is almost full (< 10% left)\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
labels:
|
- alert: HostOutOfInodes
|
||||||
env: dev
|
expr: (node_filesystem_files_free / node_filesystem_files < .10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0)
|
||||||
hostname: forgejo
|
for: 2m
|
||||||
- targets:
|
labels:
|
||||||
# forgejo
|
severity: critical
|
||||||
- 192.168.0.21:9100
|
annotations:
|
||||||
labels:
|
summary: Host out of inodes (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
env: dev
|
description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
hostname: forgejo-runner
|
- alert: HostHighCpuLoad
|
||||||
- targets:
|
expr: (avg by (instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m]))) > .90
|
||||||
# bastion
|
for: 10m
|
||||||
- 192.168.0.254:9100
|
labels:
|
||||||
labels:
|
severity: warning
|
||||||
env: common
|
annotations:
|
||||||
hostname: bastion
|
summary: Host high CPU load (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
- targets:
|
description: "CPU load is > 80%\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
# load-balancer
|
# You may want to increase the alert manager 'repeat_interval' for this type of alert to daily or weekly
|
||||||
- 192.168.0.253:9100
|
- alert: HostCpuIsUnderutilized
|
||||||
labels:
|
expr: (min by (instance) (rate(node_cpu_seconds_total{mode="idle"}[1h]))) > 0.8
|
||||||
env: common
|
for: 1w
|
||||||
hostname: load-balancer
|
labels:
|
||||||
- targets:
|
severity: info
|
||||||
# monitoring
|
annotations:
|
||||||
- 192.168.0.252:9100
|
summary: Host CPU is underutilized (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
labels:
|
description: "CPU load has been < 20% for 1 week. Consider reducing the number of CPUs.\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
env: common
|
- alert: HostCpuHighIowait
|
||||||
hostname: monitoring
|
expr: avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) > .10
|
||||||
|
for: 0m
|
||||||
- job_name: nginx
|
labels:
|
||||||
static_configs:
|
severity: warning
|
||||||
- targets:
|
annotations:
|
||||||
# load-balancer
|
summary: Host CPU high iowait (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
- 192.168.0.253:9113
|
description: "CPU iowait > 10%. Your CPU is idling waiting for storage to respond.\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
labels:
|
- alert: HostSwapIsFillingUp
|
||||||
env: common
|
expr: ((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80)
|
||||||
hostname: monitoring
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: Host swap is filling up (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
|
description: "Swap is filling up (>80%)\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
|
# - alert: HostSystemdServiceCrashed
|
||||||
|
# expr: (node_systemd_unit_state{state="failed"} == 1)
|
||||||
|
# for: 0m
|
||||||
|
# labels:
|
||||||
|
# severity: warning
|
||||||
|
# annotations:
|
||||||
|
# summary: Host systemd service crashed (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
|
# description: "systemd service crashed\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
|
- alert: HostOomKillDetected
|
||||||
|
expr: (increase(node_vmstat_oom_kill[1m]) > 0)
|
||||||
|
for: 0m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: Host OOM kill detected (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
|
description: "OOM kill detected\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
|
- alert: HostClockSkew
|
||||||
|
expr: ((node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0))
|
||||||
|
for: 10m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: Host clock skew (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
|
description: "Clock skew detected. Clock is out of sync. Ensure NTP is configured correctly on this host.\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
|
- alert: HostClockNotSynchronising
|
||||||
|
expr: (min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16)
|
||||||
|
for: 2m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: Host clock not synchronising (instance {{'{{'}} $labels.instance {{'}}'}})
|
||||||
|
description: "Clock not synchronising. Ensure NTP is configured on this host.\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -135,114 +235,6 @@ prometheus_alertmanager_options:
|
|||||||
- to: notifications@cuqmbr.xyz
|
- to: notifications@cuqmbr.xyz
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
prometheus_alerting_rules:
|
|
||||||
groups:
|
|
||||||
- name: DefaultMetrics
|
|
||||||
rules:
|
|
||||||
- alert: HostOutOfMemory
|
|
||||||
expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes < .10)
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host out of memory (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "Node memory is filling up (< 10% left)\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
# You may want to increase the alert manager 'repeat_interval' for this type of alert to daily or weekly
|
|
||||||
- alert: HostMemoryIsUnderutilized
|
|
||||||
expr: min_over_time(node_memory_MemFree_bytes[1w]) > node_memory_MemTotal_bytes * .8
|
|
||||||
for: 0m
|
|
||||||
labels:
|
|
||||||
severity: info
|
|
||||||
annotations:
|
|
||||||
summary: Host Memory is underutilized (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "Node memory usage is < 20% for 1 week. Consider reducing memory space. (instance {{'{{'}} $labels.instance {{'}}'}})\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
# Please add ignored mountpoints in node_exporter parameters like
|
|
||||||
# "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|run)($|/)".
|
|
||||||
# Same rule using "node_filesystem_free_bytes" will fire when disk fills for non-root users.
|
|
||||||
- alert: HostOutOfDiskSpace
|
|
||||||
expr: (node_filesystem_avail_bytes{fstype!~"^(fuse.*|tmpfs|cifs|nfs)"} / node_filesystem_size_bytes < .10 and on (instance, device, mountpoint) node_filesystem_readonly == 0)
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: Host out of disk space (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "Disk is almost full (< 10% left)\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
- alert: HostOutOfInodes
|
|
||||||
expr: (node_filesystem_files_free / node_filesystem_files < .10 and ON (instance, device, mountpoint) node_filesystem_readonly == 0)
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: critical
|
|
||||||
annotations:
|
|
||||||
summary: Host out of inodes (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
- alert: HostHighCpuLoad
|
|
||||||
expr: (avg by (instance) (rate(node_cpu_seconds_total{mode!="idle"}[2m]))) > .90
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host high CPU load (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "CPU load is > 80%\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
# You may want to increase the alert manager 'repeat_interval' for this type of alert to daily or weekly
|
|
||||||
- alert: HostCpuIsUnderutilized
|
|
||||||
expr: (min by (instance) (rate(node_cpu_seconds_total{mode="idle"}[1h]))) > 0.8
|
|
||||||
for: 1w
|
|
||||||
labels:
|
|
||||||
severity: info
|
|
||||||
annotations:
|
|
||||||
summary: Host CPU is underutilized (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "CPU load has been < 20% for 1 week. Consider reducing the number of CPUs.\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
- alert: HostCpuHighIowait
|
|
||||||
expr: avg by (instance) (rate(node_cpu_seconds_total{mode="iowait"}[5m])) > .10
|
|
||||||
for: 0m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host CPU high iowait (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "CPU iowait > 10%. Your CPU is idling waiting for storage to respond.\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
- alert: HostSwapIsFillingUp
|
|
||||||
expr: ((1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80)
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host swap is filling up (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "Swap is filling up (>80%)\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
# - alert: HostSystemdServiceCrashed
|
|
||||||
# expr: (node_systemd_unit_state{state="failed"} == 1)
|
|
||||||
# for: 0m
|
|
||||||
# labels:
|
|
||||||
# severity: warning
|
|
||||||
# annotations:
|
|
||||||
# summary: Host systemd service crashed (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
# description: "systemd service crashed\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
- alert: HostOomKillDetected
|
|
||||||
expr: (increase(node_vmstat_oom_kill[1m]) > 0)
|
|
||||||
for: 0m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host OOM kill detected (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "OOM kill detected\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
- alert: HostClockSkew
|
|
||||||
expr: ((node_timex_offset_seconds > 0.05 and deriv(node_timex_offset_seconds[5m]) >= 0) or (node_timex_offset_seconds < -0.05 and deriv(node_timex_offset_seconds[5m]) <= 0))
|
|
||||||
for: 10m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host clock skew (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "Clock skew detected. Clock is out of sync. Ensure NTP is configured correctly on this host.\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
- alert: HostClockNotSynchronising
|
|
||||||
expr: (min_over_time(node_timex_sync_status[1m]) == 0 and node_timex_maxerror_seconds >= 16)
|
|
||||||
for: 2m
|
|
||||||
labels:
|
|
||||||
severity: warning
|
|
||||||
annotations:
|
|
||||||
summary: Host clock not synchronising (instance {{'{{'}} $labels.instance {{'}}'}})
|
|
||||||
description: "Clock not synchronising. Ensure NTP is configured on this host.\n VALUE = {{'{{'}} $value {{'}}'}}\n LABELS = {{'{{'}} $labels {{'}}'}}"
|
|
||||||
|
|
||||||
|
|
||||||
loki_settings:
|
loki_settings:
|
||||||
config:
|
config:
|
||||||
auth_enabled: false
|
auth_enabled: false
|
||||||
|
@ -1,59 +1,53 @@
|
|||||||
---
|
---
|
||||||
|
|
||||||
prometheus_options:
|
prometheus_settings:
|
||||||
global:
|
|
||||||
# Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
|
||||||
scrape_interval: 15s
|
|
||||||
# Evaluate rules every 15 seconds. The default is every 1 minute.
|
|
||||||
evaluation_interval: 15s
|
|
||||||
# scrape_timeout is set to the global default (10s).
|
|
||||||
|
|
||||||
# Attach these labels to any time series or alerts when communicating with
|
prometheus_default_settings:
|
||||||
# external systems (federation, remote storage, Alertmanager).
|
config:
|
||||||
external_labels:
|
global:
|
||||||
monitor: 'example'
|
# Set the scrape interval to every 15 seconds. Default is every 1 minute.
|
||||||
|
scrape_interval: 15s
|
||||||
# Alertmanager configuration
|
# Evaluate rules every 15 seconds. The default is every 1 minute.
|
||||||
alerting:
|
evaluation_interval: 15s
|
||||||
alertmanagers:
|
# scrape_timeout is set to the global default (10s).
|
||||||
- static_configs:
|
# Attach these labels to any time series or alerts when communicating with
|
||||||
- targets: ['localhost:9093']
|
# external systems (federation, remote storage, Alertmanager).
|
||||||
|
# external_labels:
|
||||||
# Load rules and evaluate them according to the global 'evaluation_interval'.
|
# monitor: 'example'
|
||||||
rule_files:
|
# Alertmanager configuration
|
||||||
- alerting_rules/*.yml
|
# alerting:
|
||||||
|
# alertmanagers:
|
||||||
# A scrape configuration containing exactly one endpoint to scrape:
|
# - static_configs:
|
||||||
# Here it's Prometheus itself.
|
# - targets: ['localhost:9093']
|
||||||
scrape_configs:
|
# Load rules and evaluate them according to the global 'evaluation_interval'
|
||||||
# The job name is added as a label `job=<job_name>`.
|
rule_files:
|
||||||
- job_name: 'prometheus'
|
- alerting_rules/*.yml
|
||||||
|
# A scrape configuration containing exactly one endpoint to scrape:
|
||||||
# Override the global default and scrape targets from this job.
|
# Here it's Prometheus itself.
|
||||||
scrape_interval: 5s
|
scrape_configs:
|
||||||
scrape_timeout: 5s
|
# The job name is added as a label `job=<job_name>`.
|
||||||
|
- job_name: 'prometheus'
|
||||||
# metrics_path defaults to '/metrics'
|
# Override the global default and scrape targets from this job.
|
||||||
# scheme defaults to 'http'.
|
scrape_interval: 5s
|
||||||
|
scrape_timeout: 5s
|
||||||
static_configs:
|
# metrics_path defaults to '/metrics'
|
||||||
- targets: ['localhost:9090']
|
# scheme defaults to 'http'.
|
||||||
|
static_configs:
|
||||||
- job_name: node
|
- targets: ['localhost:9090']
|
||||||
# If prometheus-node-exporter is installed, grab stats about the local
|
- job_name: node
|
||||||
# machine by default.
|
# If prometheus-node-exporter is installed, grab stats about the local
|
||||||
static_configs:
|
# machine by default.
|
||||||
- targets: ['localhost:9100']
|
static_configs:
|
||||||
|
- targets: ['localhost:9100']
|
||||||
prometheus_alerting_rules:
|
alerting_rules:
|
||||||
groups:
|
# groups:
|
||||||
- name: ExampleRedisGroup
|
# - name: ExampleRedisGroup
|
||||||
rules:
|
# rules:
|
||||||
- alert: ExampleRedisDown
|
# - alert: ExampleRedisDown
|
||||||
expr: redis_up{} == 0
|
# expr: redis_up{} == 0
|
||||||
for: 2m
|
# for: 2m
|
||||||
labels:
|
# labels:
|
||||||
severity: critical
|
# severity: critical
|
||||||
annotations:
|
# annotations:
|
||||||
summary: "Redis instance down"
|
# summary: "Redis instance down"
|
||||||
description: "Whatever"
|
# description: "Whatever"
|
||||||
|
@ -1,5 +1,11 @@
|
|||||||
---
|
---
|
||||||
|
|
||||||
|
- name: Combine default and user settings, decrypt vault.
|
||||||
|
ansible.builtin.set_fact:
|
||||||
|
prometheus_settings: "{{ prometheus_default_settings |
|
||||||
|
ansible.builtin.combine(prometheus_settings, recursive=true) }}"
|
||||||
|
no_log: true
|
||||||
|
|
||||||
- name: Install prometheus apt package.
|
- name: Install prometheus apt package.
|
||||||
ansible.builtin.apt:
|
ansible.builtin.apt:
|
||||||
name: prometheus
|
name: prometheus
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
---
|
---
|
||||||
# Managed with Ansible
|
# Managed with Ansible
|
||||||
|
|
||||||
{{ prometheus_alerting_rules | ansible.builtin.to_nice_yaml(indent=2) }}
|
{{
|
||||||
|
prometheus_settings.alerting_rules |
|
||||||
|
ansible.builtin.to_nice_yaml(indent=2)
|
||||||
|
}}
|
||||||
|
@ -1,4 +1,7 @@
|
|||||||
---
|
---
|
||||||
# Managed with Ansible
|
# Managed with Ansible
|
||||||
|
|
||||||
{{ prometheus_options | ansible.builtin.to_nice_yaml(indent=2) }}
|
{{
|
||||||
|
prometheus_settings.config |
|
||||||
|
ansible.builtin.to_nice_yaml(indent=2, width=80)
|
||||||
|
}}
|
||||||
|
Loading…
Reference in New Issue
Block a user