- Created by Sansae, last modified on Feb 23, 2022
You are viewing an old version of this page. View the current version.
Compare with Current View Page History
« Previous Version 3 Current »
Prometheus
version: '3.8'
services:
prometheus:
container_name: prom
image: prom/prometheus:latest
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention=1h'
- '--storage.tsdb.retention.size=1TB'
- '--storage.tsdb.wal-compression'
- '--web.enable-lifecycle'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
volumes:
- ../../config/prometheus-ec2/prometheus.yml:/etc/prometheus/prometheus.yml
- ./rules:/etc/prometheus/rules
ports:
- "9090:9090"
networks:
default:
external:
name: monitoring
prometheus-ec2/prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: 'devops-monitoring'
rule_files:
scrape_configs:
- job_name: 'prometheus'
scrape_interval: 5s
static_configs:
- targets: [ 'localhost:9090' ]
- job_name: 'node'
metrics_path: /metrics
ec2_sd_configs:
- region: ap-northeast-2
port: 9100 # node-exporter
refresh_interval: 10s # default 60s
filters:
- name: tag:monitoring
values:
- 'true'
relabel_configs:
- source_labels: [ __meta_ec2_private_ip ]
regex: '(.*)'
replacement: '${1}:9100'
action: replace
target_label: __address__
- source_labels: [ __meta_ec2_tag_Name ]
target_label: instance
- source_labels: [ __meta_ec2_tag_group ]
target_label: group
- source_labels: [ __meta_ec2_instance_type ]
target_label: instance_type
- source_labels: [ __meta_ec2_architecture ]
target_label: arch
Rules - alerts.yaml
groups:
- name: prometheus-cpu
interval: 10s
rules:
- alert: alerts:cpu_usage:prometheus:80
expr: rate(process_cpu_seconds_total{job=~"prometheus"}[1m]) * 100 > 0
for: 5s
labels:
service: prometheus
serverity: critical
annotations:
title: "Prometheus CPU alert 80%"
summary: "{{ $labels.instance }}: {{ $value }} ( {{ $labels.job }} )"
description: "{{ $labels.instance }}: {{ $value }} ( {{ $labels.job }} )"
- name: blackbox-monitor
interval: 10s
limit: 0
rules:
- alert: alerts:probe_success
expr: probe_success == 0
for: 5s
labels:
serverity: critical
annotations:
title: "black-box monitoring alert"
summary: "black-box monitoring alert ( {{ $labels.job }} )"
description: "{{ $labels.instance }} is down"
Rules - rules.yaml
groups:
- name: dj-rule
interval: 10s
limit: 0
rules:
- record: rules:prometheus_engine_query:speed
expr: prometheus_engine_query_duration_seconds_sum / on (slice) group_left prometheus_engine_query_duration_seconds_count
labels:
dj: test
Grafana
version: '3.8'
services:
grafana:
container_name: grafana
image: grafana/grafana:latest
ports:
- "3000:3000"
volumes:
- ../../config/grafana/datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml
environment:
GF_RENDERING_SERVER_URL: "http://renderer:8081/render"
GF_RENDERING_CALLBACK_URL: "http://grafana:3000/"
GF_LOG_FILTERS: "rendering:debug"
GF_USERS_DEFAULT_THEME: "light"
GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin"
GF_AUTH_ANONYMOUS_ENABLED: "true"
renderer:
image: grafana/grafana-image-renderer:latest
ports:
- "8081:8081"
environment:
IGNORE_HTTPS_ERRORS: "true"
networks:
default:
external:
name: monitoring
/datasources/datasource.yml
# config file version
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
basicAuth: false
isDefault: false
version: 1
editable: false
- No labels