├── 01-monitoreo-recursos ├── docker-compose.yml ├── img │ └── arquitectura-monitoreo.png └── prometheus.yml ├── 02-monitoreo-logs ├── docker-compose.yml ├── img │ └── arquitectura-monitoreo-logs.png ├── loki-config.yml └── plugin loki.sh ├── 03-monitoreo-archivos ├── docker-compose.yml ├── guardar_comandos_tiempo_real.sh ├── img │ └── arquitectura-monitoreo-archivos.jpg ├── loki-config.yml └── promtail.yml ├── 04-monitoreo-centralizado ├── img │ └── docker-Monitoreo Centralizado.png ├── master │ ├── docker-compose.yml │ ├── loki-config.yml │ └── prometheus.yml └── slave │ ├── .env │ ├── docker-compose.yml │ ├── positions.yaml │ ├── prometheus.yml │ ├── promtail.yml │ └── startup.sh ├── 05-grafana-provisioning ├── agent │ ├── .env │ ├── docker-compose.yml │ ├── positions.yaml │ ├── prometheus.yml │ ├── promtail.yml │ └── startup.sh └── master │ ├── docker-compose.yml │ ├── grafana │ └── provisioning │ │ ├── dashboards │ │ ├── dashboard.yml │ │ └── docker-dashboad.json │ │ └── datasources │ │ └── datasource.yml │ └── prometheus.yml ├── 06-alertas-logs-loki ├── README.md ├── docker-compose.yml ├── grafana │ └── provisioning │ │ ├── dashboards │ │ ├── dashboard.yml │ │ └── python-dashboad.json │ │ └── datasources │ │ └── datasource.yml └── server.py ├── 08-blackbox-exporter ├── blackbox.yml ├── docker-compose.yml └── prometheus.yml └── README.md /01-monitoreo-recursos/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | volumes: 4 | grafana-data: 5 | prometheus-data: 6 | 7 | services: 8 | grafana: 9 | image: grafana/grafana:8.0.6 10 | container_name: grafana 11 | restart: unless-stopped 12 | volumes: 13 | - grafana-data:/var/lib/grafana 14 | ports: 15 | - 3000:3000 16 | 17 | prometheus: 18 | image: prom/prometheus:v2.28.1 19 | container_name: prometheus 20 | restart: unless-stopped 21 | volumes: 22 | - ./prometheus.yml:/etc/prometheus/prometheus.yml 23 | - prometheus-data:/prometheus 24 | ports: 25 | - 9090:9090 26 | command: 27 | - '--config.file=/etc/prometheus/prometheus.yml' 28 | - '--storage.tsdb.path=/prometheus' 29 | - '--storage.tsdb.retention.time=1y' 30 | - '--web.enable-lifecycle' 31 | 32 | node_exporter: 33 | image: quay.io/prometheus/node-exporter:latest 34 | container_name: node_exporter 35 | restart: unless-stopped 36 | ports: 37 | - 9100:9100 38 | ###### linux 39 | # command: 40 | # - '--path.rootfs=/host' 41 | # pid: host 42 | # volumes: 43 | # - '/:/host:ro,rslave' 44 | ###### windows 45 | volumes: 46 | - /proc:/host/proc:ro 47 | - /sys:/host/sys:ro 48 | command: 49 | - '--path.procfs=/host/proc' 50 | - '--path.sysfs=/host/sys' 51 | - --collector.filesystem.ignored-mount-points 52 | - "^/(sys|proc|dev|host|etc|rootfs/var/lib/docker/containers|rootfs/var/lib/docker/overlay2|rootfs/run/docker/netns|rootfs/var/lib/docker/aufs)($$|/)" 53 | 54 | cadvisor: 55 | image: gcr.io/cadvisor/cadvisor:latest 56 | container_name: cadvisor 57 | restart: unless-stopped 58 | expose: 59 | - 8080 60 | volumes: 61 | - /:/rootfs:ro 62 | - /var/run:/var/run:rw 63 | - /sys:/sys:ro 64 | - /var/lib/docker/:/var/lib/docker:ro 65 | 66 | app_example: 67 | image: quay.io/brancz/prometheus-example-app:v0.3.0 68 | container_name: app_example 69 | restart: unless-stopped 70 | ports: 71 | - 80:8080 72 | 73 | -------------------------------------------------------------------------------- /01-monitoreo-recursos/img/arquitectura-monitoreo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caosbinario/observability/7e6197e2c92fb90b71f1244f4b5f8a942f802529/01-monitoreo-recursos/img/arquitectura-monitoreo.png -------------------------------------------------------------------------------- /01-monitoreo-recursos/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. 3 | evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. 4 | 5 | scrape_configs: 6 | - job_name: 'prometheus' 7 | static_configs: 8 | - targets: ['prometheus:9090'] 9 | 10 | - job_name: 'cadvisor' 11 | static_configs: 12 | - targets: ['cadvisor:8080'] 13 | 14 | - job_name: 'node_exporter' 15 | static_configs: 16 | - targets: ['node_exporter:9100'] 17 | 18 | - job_name: 'app_example' 19 | static_configs: 20 | - targets: ['app_example:8080'] 21 | #metrics_path: '/metrics' 22 | #metrics_path: '/prometheus' -------------------------------------------------------------------------------- /02-monitoreo-logs/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | volumes: 4 | grafana-data: 5 | loki-data: 6 | 7 | services: 8 | grafana: 9 | image: grafana/grafana:8.0.6 10 | container_name: grafana 11 | restart: unless-stopped 12 | volumes: 13 | - grafana-data:/var/lib/grafana 14 | ports: 15 | - 3000:3000 16 | 17 | loki: 18 | image: grafana/loki:2.0.0 19 | container_name: loki 20 | restart: unless-stopped 21 | volumes: 22 | - ./loki-config.yml:/mnt/config/loki-config.yml 23 | - loki-data:/loki 24 | ports: 25 | - 3100:3100 26 | command: 27 | - '-config.file=/mnt/config/loki-config.yml' 28 | 29 | nginx: 30 | image: nginx 31 | container_name: nginx 32 | restart: unless-stopped 33 | logging: 34 | driver: loki 35 | options: 36 | loki-url: "http://localhost:3100/loki/api/v1/push" 37 | ports: 38 | - 80:80 39 | 40 | -------------------------------------------------------------------------------- /02-monitoreo-logs/img/arquitectura-monitoreo-logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caosbinario/observability/7e6197e2c92fb90b71f1244f4b5f8a942f802529/02-monitoreo-logs/img/arquitectura-monitoreo-logs.png -------------------------------------------------------------------------------- /02-monitoreo-logs/loki-config.yml: -------------------------------------------------------------------------------- 1 | auth_enabled: false 2 | 3 | server: 4 | http_listen_port: 3100 5 | 6 | ingester: 7 | lifecycler: 8 | address: 127.0.0.1 9 | ring: 10 | kvstore: 11 | store: inmemory 12 | replication_factor: 1 13 | final_sleep: 0s 14 | chunk_idle_period: 1h # Any chunk not receiving new logs in this time will be flushed 15 | max_chunk_age: 1h # All chunks will be flushed when they hit this age, default is 1h 16 | chunk_target_size: 1048576 # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first 17 | chunk_retain_period: 30s # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m) 18 | max_transfer_retries: 0 # Chunk transfers disabled 19 | 20 | schema_config: 21 | configs: 22 | - from: 2020-10-24 23 | store: boltdb-shipper 24 | object_store: filesystem 25 | schema: v11 26 | index: 27 | prefix: index_ 28 | period: 24h 29 | 30 | storage_config: 31 | boltdb_shipper: 32 | active_index_directory: /loki/boltdb-shipper-active 33 | cache_location: /loki/boltdb-shipper-cache 34 | cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space 35 | shared_store: filesystem 36 | filesystem: 37 | directory: /loki/chunks 38 | 39 | compactor: 40 | working_directory: /loki/boltdb-shipper-compactor 41 | shared_store: filesystem 42 | 43 | limits_config: 44 | reject_old_samples: true 45 | reject_old_samples_max_age: 168h 46 | 47 | chunk_store_config: 48 | max_look_back_period: 0s 49 | 50 | table_manager: 51 | retention_deletes_enabled: true # true para que limite storage 52 | retention_period: 2160h #3 meses 53 | 54 | ruler: 55 | storage: 56 | type: local 57 | local: 58 | directory: /loki/rules 59 | rule_path: /loki/rules-temp 60 | alertmanager_url: http://localhost:9093 61 | ring: 62 | kvstore: 63 | store: inmemory 64 | enable_api: true 65 | -------------------------------------------------------------------------------- /02-monitoreo-logs/plugin loki.sh: -------------------------------------------------------------------------------- 1 | # descargar plugin 2 | docker plugin install grafana/loki-docker-driver:latest --alias loki --grant-all-permissions -------------------------------------------------------------------------------- /03-monitoreo-archivos/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | 3 | volumes: 4 | grafana-data: 5 | loki-data: 6 | promtail-data: 7 | 8 | services: 9 | grafana: 10 | image: grafana/grafana:8.0.6 11 | container_name: grafana 12 | restart: unless-stopped 13 | volumes: 14 | - grafana-data:/var/lib/grafana 15 | ports: 16 | - 3000:3000 17 | 18 | loki: 19 | image: grafana/loki:2.0.0 20 | container_name: loki 21 | restart: unless-stopped 22 | volumes: 23 | - ./loki-config.yml:/mnt/config/loki-config.yml 24 | - loki-data:/loki 25 | ports: 26 | - 3100:3100 27 | command: 28 | - '-config.file=/mnt/config/loki-config.yml' 29 | 30 | promtail: 31 | image: grafana/promtail:2.3.0 32 | container_name: promtail 33 | restart: unless-stopped 34 | volumes: 35 | - /var/log/:/var/log/:ro 36 | - /home/:/home_server:ro 37 | - /root/:/root_user:ro 38 | - ./promtail.yml:/etc/promtail/promtail.yml 39 | - promtail-data:/tmp 40 | command: -config.file=/etc/promtail/promtail.yml 41 | -------------------------------------------------------------------------------- /03-monitoreo-archivos/guardar_comandos_tiempo_real.sh: -------------------------------------------------------------------------------- 1 | # en comandos de "echo" 2 | echo '' >> ~/.bashrc 3 | echo '#guardar los comandos en tiempo real' >> ~/.bashrc 4 | echo 'shopt -s histappend' >> ~/.bashrc 5 | echo 'PROMPT_COMMAND="history -a;$PROMPT_COMMAND"' >> ~/.bashrc 6 | -------------------------------------------------------------------------------- /03-monitoreo-archivos/img/arquitectura-monitoreo-archivos.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caosbinario/observability/7e6197e2c92fb90b71f1244f4b5f8a942f802529/03-monitoreo-archivos/img/arquitectura-monitoreo-archivos.jpg -------------------------------------------------------------------------------- /03-monitoreo-archivos/loki-config.yml: -------------------------------------------------------------------------------- 1 | auth_enabled: false 2 | 3 | server: 4 | http_listen_port: 3100 5 | 6 | ingester: 7 | lifecycler: 8 | address: 127.0.0.1 9 | ring: 10 | kvstore: 11 | store: inmemory 12 | replication_factor: 1 13 | final_sleep: 0s 14 | chunk_idle_period: 1h # Any chunk not receiving new logs in this time will be flushed 15 | max_chunk_age: 1h # All chunks will be flushed when they hit this age, default is 1h 16 | chunk_target_size: 1048576 # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first 17 | chunk_retain_period: 30s # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m) 18 | max_transfer_retries: 0 # Chunk transfers disabled 19 | 20 | schema_config: 21 | configs: 22 | - from: 2020-10-24 23 | store: boltdb-shipper 24 | object_store: filesystem 25 | schema: v11 26 | index: 27 | prefix: index_ 28 | period: 24h 29 | 30 | storage_config: 31 | boltdb_shipper: 32 | active_index_directory: /loki/boltdb-shipper-active 33 | cache_location: /loki/boltdb-shipper-cache 34 | cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space 35 | shared_store: filesystem 36 | filesystem: 37 | directory: /loki/chunks 38 | 39 | compactor: 40 | working_directory: /loki/boltdb-shipper-compactor 41 | shared_store: filesystem 42 | 43 | limits_config: 44 | reject_old_samples: true 45 | reject_old_samples_max_age: 168h 46 | 47 | chunk_store_config: 48 | max_look_back_period: 0s 49 | 50 | table_manager: 51 | retention_deletes_enabled: true # true para que limite storage 52 | retention_period: 2160h #3 meses 53 | 54 | ruler: 55 | storage: 56 | type: local 57 | local: 58 | directory: /loki/rules 59 | rule_path: /loki/rules-temp 60 | alertmanager_url: http://localhost:9093 61 | ring: 62 | kvstore: 63 | store: inmemory 64 | enable_api: true 65 | -------------------------------------------------------------------------------- /03-monitoreo-archivos/promtail.yml: -------------------------------------------------------------------------------- 1 | server: 2 | http_listen_port: 9080 3 | grpc_listen_port: 0 4 | 5 | positions: 6 | filename: /tmp/positions.yml 7 | 8 | clients: 9 | - url: http://loki:3100/loki/api/v1/push 10 | 11 | scrape_configs: 12 | # Logs sistema 13 | - job_name: system 14 | static_configs: 15 | - targets: 16 | - localhost 17 | labels: 18 | job: varlogs 19 | __path__: /var/log/*log 20 | 21 | # Comandos usuarios 22 | - job_name: history 23 | static_configs: 24 | - targets: 25 | - localhost 26 | labels: 27 | job: history 28 | __path__: /home_server/*/.bash_history 29 | 30 | # Comandos Root 31 | - job_name: history_root 32 | static_configs: 33 | - targets: 34 | - localhost 35 | labels: 36 | job: history 37 | __path__: /root_user/.bash_history 38 | -------------------------------------------------------------------------------- /04-monitoreo-centralizado/img/docker-Monitoreo Centralizado.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caosbinario/observability/7e6197e2c92fb90b71f1244f4b5f8a942f802529/04-monitoreo-centralizado/img/docker-Monitoreo Centralizado.png -------------------------------------------------------------------------------- /04-monitoreo-centralizado/master/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | 3 | volumes: 4 | grafana-data: 5 | prometheus-data: 6 | loki-data: 7 | 8 | services: 9 | loki: 10 | image: grafana/loki:2.0.0 11 | container_name: master_loki 12 | restart: unless-stopped 13 | volumes: 14 | - ./loki-config.yml:/mnt/config/loki-config.yml 15 | - loki-data:/loki 16 | ports: 17 | - 3100:3100 18 | command: 19 | - '-config.file=/mnt/config/loki-config.yml' 20 | 21 | grafana: 22 | image: grafana/grafana:8.0.6 23 | container_name: master_grafana 24 | restart: unless-stopped 25 | volumes: 26 | - grafana-data:/var/lib/grafana 27 | ports: 28 | - 3000:3000 29 | 30 | prometheus: 31 | image: prom/prometheus:v2.30.0 32 | container_name: master_prometheus 33 | restart: unless-stopped 34 | volumes: 35 | - ./prometheus.yml:/etc/prometheus/prometheus.yml 36 | - prometheus-data:/prometheus 37 | ports: 38 | - 9090:9090 39 | command: 40 | - '--config.file=/etc/prometheus/prometheus.yml' 41 | - '--storage.tsdb.path=/prometheus' 42 | - '--storage.tsdb.retention.time=90d' 43 | - '--storage.tsdb.retention.size=100GB' 44 | - '--web.enable-lifecycle' 45 | -------------------------------------------------------------------------------- /04-monitoreo-centralizado/master/loki-config.yml: -------------------------------------------------------------------------------- 1 | auth_enabled: false 2 | 3 | server: 4 | http_listen_port: 3100 5 | 6 | ingester: 7 | lifecycler: 8 | address: 127.0.0.1 9 | ring: 10 | kvstore: 11 | store: inmemory 12 | replication_factor: 1 13 | final_sleep: 0s 14 | chunk_idle_period: 1h # Any chunk not receiving new logs in this time will be flushed 15 | max_chunk_age: 1h # All chunks will be flushed when they hit this age, default is 1h 16 | chunk_target_size: 1048576 # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first 17 | chunk_retain_period: 30s # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m) 18 | max_transfer_retries: 0 # Chunk transfers disabled 19 | 20 | schema_config: 21 | configs: 22 | - from: 2020-10-24 23 | store: boltdb-shipper 24 | object_store: filesystem 25 | schema: v11 26 | index: 27 | prefix: index_ 28 | period: 24h 29 | 30 | storage_config: 31 | boltdb_shipper: 32 | active_index_directory: /loki/boltdb-shipper-active 33 | cache_location: /loki/boltdb-shipper-cache 34 | cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space 35 | shared_store: filesystem 36 | filesystem: 37 | directory: /loki/chunks 38 | 39 | compactor: 40 | working_directory: /loki/boltdb-shipper-compactor 41 | shared_store: filesystem 42 | 43 | limits_config: 44 | reject_old_samples: true 45 | reject_old_samples_max_age: 168h 46 | 47 | chunk_store_config: 48 | max_look_back_period: 0s 49 | 50 | table_manager: 51 | retention_deletes_enabled: true # true para que limite storage 52 | retention_period: 2160h #3 meses 53 | 54 | ruler: 55 | storage: 56 | type: local 57 | local: 58 | directory: /loki/rules 59 | rule_path: /loki/rules-temp 60 | alertmanager_url: http://localhost:9093 61 | ring: 62 | kvstore: 63 | store: inmemory 64 | enable_api: true 65 | -------------------------------------------------------------------------------- /04-monitoreo-centralizado/master/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 30s # El intervalo en el que se traerá las métricas 3 | evaluation_interval: 30s # Evaluará las reglas cada 30 segundos, por defecto es cada minuto 4 | 5 | scrape_configs: 6 | 7 | - job_name: 'Prometheus_master' 8 | honor_labels: true 9 | metrics_path: '/federate' 10 | params: 11 | 'match[]': 12 | - '{__name__=~".+"}' 13 | static_configs: 14 | # ------------------------------------- # 15 | - targets: [ '192.168.0.151:9090'] # DEV 16 | labels: 17 | pais: AR 18 | entorno: DEV 19 | cliente: caosbinario 20 | hostname: dev-caosbinario-ar 21 | # ------------------------------------- # 22 | - targets: [ '192.168.0.152:9090'] # QA 23 | labels: 24 | pais: AR 25 | entorno: QA 26 | cliente: caosbinario 27 | hostname: qa-caosbinario-ar 28 | # ------------------------------------- # 29 | 30 | 31 | -------------------------------------------------------------------------------- /04-monitoreo-centralizado/slave/.env: -------------------------------------------------------------------------------- 1 | pais= #AR/BR/CH 2 | entorno= #DEV/QA/PRD 3 | cliente= #caosbinario/cliente2/cliente3 4 | hostname= #hostname 5 | lokiURL=http://lokiURL:lokiPort -------------------------------------------------------------------------------- /04-monitoreo-centralizado/slave/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | volumes: 4 | prometheus-data: 5 | 6 | services: 7 | node_exporter: 8 | image: quay.io/prometheus/node-exporter:v1.2.2 9 | container_name: slave_node_exporter 10 | command: 11 | - '--path.rootfs=/host' 12 | restart: unless-stopped 13 | env_file: 14 | - .env 15 | pid: host 16 | expose: 17 | - 9100 18 | volumes: 19 | - '/:/host:ro,rslave' 20 | logging: 21 | driver: loki 22 | options: 23 | loki-url: "${lokiURL}/loki/api/v1/push" 24 | 25 | prometheus: 26 | image: prom/prometheus:v2.28.1 27 | container_name: slave_prometheus 28 | restart: unless-stopped 29 | env_file: 30 | - .env 31 | ports: 32 | - 9090:9090 33 | volumes: 34 | - ./prometheus.yml:/etc/prometheus/prometheus.yml 35 | - prometheus-data:/prometheus 36 | logging: 37 | driver: loki 38 | options: 39 | loki-url: "${lokiURL}/loki/api/v1/push" 40 | 41 | cadvisor: 42 | image: gcr.io/cadvisor/cadvisor:v0.38.6 43 | container_name: slave_cAdvisor 44 | restart: unless-stopped 45 | env_file: 46 | - .env 47 | expose: 48 | - 8080 49 | volumes: 50 | - /:/rootfs:ro 51 | - /var/run:/var/run:rw 52 | - /sys:/sys:ro 53 | - /var/lib/docker/:/var/lib/docker:ro 54 | logging: 55 | driver: loki 56 | options: 57 | loki-url: "${lokiURL}/loki/api/v1/push" 58 | 59 | promtail: 60 | image: grafana/promtail:2.3.0 61 | container_name: slave_promtail 62 | restart: unless-stopped 63 | env_file: 64 | - .env 65 | volumes: 66 | - /var/log/:/var/log/:ro 67 | - /home/:/home_server:ro 68 | - /root/:/root_user:ro 69 | - ./promtail.yml:/etc/promtail/promtail.yml 70 | - ./positions.yaml:/tmp/positions.yaml 71 | command: -config.file=/etc/promtail/promtail.yml -config.expand-env=true # env vars 72 | logging: 73 | driver: loki 74 | options: 75 | loki-url: "${lokiURL}/loki/api/v1/push" 76 | 77 | -------------------------------------------------------------------------------- /04-monitoreo-centralizado/slave/positions.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caosbinario/observability/7e6197e2c92fb90b71f1244f4b5f8a942f802529/04-monitoreo-centralizado/slave/positions.yaml -------------------------------------------------------------------------------- /04-monitoreo-centralizado/slave/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 30s # El intervalo en el que se traerá las métricas 3 | evaluation_interval: 30s # Evaluará las reglas cada 30 segundos, por defecto es cada minuto 4 | 5 | scrape_configs: 6 | 7 | - job_name: node_exporter 8 | static_configs: 9 | - targets: ['node_exporter:9100'] 10 | 11 | - job_name: cadvisor 12 | scrape_interval: 30s 13 | static_configs: 14 | - targets: 15 | - cadvisor:8080 16 | 17 | -------------------------------------------------------------------------------- /04-monitoreo-centralizado/slave/promtail.yml: -------------------------------------------------------------------------------- 1 | server: 2 | http_listen_port: 9080 3 | grpc_listen_port: 0 4 | 5 | positions: 6 | filename: /tmp/positions.yaml 7 | 8 | clients: 9 | - url: ${lokiURL}/loki/api/v1/push 10 | 11 | scrape_configs: 12 | # Logs sistema 13 | - job_name: system 14 | static_configs: 15 | - targets: 16 | - localhost 17 | labels: 18 | job: varlogs 19 | pais: ${pais} 20 | entorno: ${entorno} 21 | cliente: ${cliente} 22 | hostname: ${hostname} 23 | __path__: /var/log/*log 24 | 25 | # Comandos usuarios 26 | - job_name: history 27 | static_configs: 28 | - targets: 29 | - localhost 30 | labels: 31 | job: history 32 | pais: ${pais} 33 | entorno: ${entorno} 34 | cliente: ${cliente} 35 | hostname: ${hostname} 36 | __path__: /home_server/*/.bash_history 37 | 38 | # Comandos Root 39 | - job_name: history_root 40 | static_configs: 41 | - targets: 42 | - localhost 43 | labels: 44 | job: history 45 | pais: ${pais} 46 | entorno: ${entorno} 47 | cliente: ${cliente} 48 | hostname: ${hostname} 49 | __path__: /root_user/.bash_history 50 | -------------------------------------------------------------------------------- /04-monitoreo-centralizado/slave/startup.sh: -------------------------------------------------------------------------------- 1 | # comandos en tiempo real 2 | echo '' >> ~/.bashrc 3 | echo '#guardar los comandos en tiempo real' >> ~/.bashrc 4 | echo 'shopt -s histappend' >> ~/.bashrc 5 | echo 'PROMPT_COMMAND="history -a;$PROMPT_COMMAND"' >> ~/.bashrc 6 | 7 | # descargar plugin 8 | docker plugin install grafana/loki-docker-driver:latest --alias loki --grant-all-permissions 9 | 10 | -------------------------------------------------------------------------------- /05-grafana-provisioning/agent/.env: -------------------------------------------------------------------------------- 1 | pais=AR #AR/BR/CH 2 | entorno=DEV #DEV/QA/PRD 3 | cliente=caosbinario #caosbinario/cliente2/cliente3 4 | hostname=agent #hostname 5 | lokiURL=http://192.168.56.101:3100 -------------------------------------------------------------------------------- /05-grafana-provisioning/agent/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.7" 2 | 3 | volumes: 4 | prometheus-data: 5 | 6 | services: 7 | node_exporter: 8 | image: prom/node-exporter 9 | container_name: slave-node-exporter 10 | command: 11 | - '--path.rootfs=/host' 12 | restart: unless-stopped 13 | env_file: 14 | - .env 15 | pid: host 16 | expose: 17 | - 9100 18 | volumes: 19 | - '/:/host:ro,rslave' 20 | logging: 21 | driver: loki 22 | options: 23 | loki-url: "${lokiURL}/loki/api/v1/push" 24 | 25 | prometheus: 26 | image: prom/prometheus:latest 27 | container_name: slave-prometheus 28 | restart: unless-stopped 29 | env_file: 30 | - .env 31 | ports: 32 | - 9090:9090 33 | volumes: 34 | - ./prometheus.yml:/etc/prometheus/prometheus.yml 35 | - prometheus-data:/prometheus 36 | logging: 37 | driver: loki 38 | options: 39 | loki-url: "${lokiURL}/loki/api/v1/push" 40 | 41 | cadvisor: 42 | image: gcr.io/cadvisor/cadvisor:v0.38.6 43 | container_name: slave-cAdvisor 44 | restart: unless-stopped 45 | env_file: 46 | - .env 47 | expose: 48 | - 8080 49 | volumes: 50 | - /:/rootfs:ro 51 | - /var/run:/var/run:rw 52 | - /sys:/sys:ro 53 | - /var/lib/docker/:/var/lib/docker:ro 54 | logging: 55 | driver: loki 56 | options: 57 | loki-url: "${lokiURL}/loki/api/v1/push" 58 | 59 | promtail: 60 | image: grafana/promtail:2.3.0 61 | container_name: slave-promtail 62 | restart: unless-stopped 63 | env_file: 64 | - .env 65 | volumes: 66 | - /var/log/:/var/log/:ro 67 | - /home/:/home_server:ro 68 | - /root/:/root_user:ro 69 | - ./promtail.yml:/etc/promtail/promtail.yml 70 | - ./positions.yaml:/tmp/positions.yaml 71 | command: -config.file=/etc/promtail/promtail.yml -config.expand-env=true # env vars 72 | logging: 73 | driver: loki 74 | options: 75 | loki-url: "${lokiURL}/loki/api/v1/push" 76 | 77 | -------------------------------------------------------------------------------- /05-grafana-provisioning/agent/positions.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/caosbinario/observability/7e6197e2c92fb90b71f1244f4b5f8a942f802529/05-grafana-provisioning/agent/positions.yaml -------------------------------------------------------------------------------- /05-grafana-provisioning/agent/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 30s # El intervalo en el que se traerá las métricas 3 | evaluation_interval: 30s # Evaluará las reglas cada 30 segundos, por defecto es cada minuto 4 | 5 | scrape_configs: 6 | 7 | - job_name: node_exporter 8 | static_configs: 9 | - targets: ['node_exporter:9100'] 10 | 11 | - job_name: cadvisor 12 | scrape_interval: 30s 13 | static_configs: 14 | - targets: 15 | - cadvisor:8080 16 | 17 | -------------------------------------------------------------------------------- /05-grafana-provisioning/agent/promtail.yml: -------------------------------------------------------------------------------- 1 | server: 2 | http_listen_port: 9080 3 | grpc_listen_port: 0 4 | 5 | positions: 6 | filename: /tmp/positions.yaml 7 | 8 | clients: 9 | - url: ${lokiURL}/loki/api/v1/push 10 | 11 | scrape_configs: 12 | # Logs sistema 13 | - job_name: system 14 | static_configs: 15 | - targets: 16 | - localhost 17 | labels: 18 | job: varlogs 19 | pais: ${pais} 20 | entorno: ${entorno} 21 | cliente: ${cliente} 22 | hostname: ${hostname} 23 | __path__: /var/log/*log 24 | 25 | # Comandos usuarios 26 | - job_name: history 27 | static_configs: 28 | - targets: 29 | - localhost 30 | labels: 31 | job: history 32 | pais: ${pais} 33 | entorno: ${entorno} 34 | cliente: ${cliente} 35 | hostname: ${hostname} 36 | __path__: /home_server/*/.bash_history 37 | 38 | # Comandos Root 39 | - job_name: history_root 40 | static_configs: 41 | - targets: 42 | - localhost 43 | labels: 44 | job: history 45 | pais: ${pais} 46 | entorno: ${entorno} 47 | cliente: ${cliente} 48 | hostname: ${hostname} 49 | __path__: /root_user/.bash_history 50 | -------------------------------------------------------------------------------- /05-grafana-provisioning/agent/startup.sh: -------------------------------------------------------------------------------- 1 | # comandos en tiempo real 2 | echo '' >> ~/.bashrc 3 | echo '#guardar los comandos en tiempo real' >> ~/.bashrc 4 | echo 'shopt -s histappend' >> ~/.bashrc 5 | echo 'PROMPT_COMMAND="history -a;$PROMPT_COMMAND"' >> ~/.bashrc 6 | 7 | # descargar plugin 8 | docker plugin install grafana/loki-docker-driver:latest --alias loki --grant-all-permissions 9 | 10 | -------------------------------------------------------------------------------- /05-grafana-provisioning/master/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | 3 | volumes: 4 | grafana-data: 5 | prometheus-data: 6 | loki-data: 7 | 8 | services: 9 | loki: 10 | image: grafana/loki:latest 11 | container_name: master-loki 12 | restart: unless-stopped 13 | volumes: 14 | - loki-data:/loki 15 | ports: 16 | - 3100:3100 17 | command: -config.file=/etc/loki/local-config.yaml 18 | 19 | grafana: 20 | image: grafana/grafana:latest 21 | container_name: master-grafana 22 | restart: unless-stopped 23 | volumes: 24 | - grafana-data:/var/lib/grafana 25 | - ./grafana/provisioning/:/etc/grafana/provisioning/ 26 | ports: 27 | - 3000:3000 28 | 29 | prometheus: 30 | image: prom/prometheus:latest 31 | container_name: master-prometheus 32 | restart: unless-stopped 33 | volumes: 34 | - ./prometheus.yml:/etc/prometheus/prometheus.yml 35 | - prometheus-data:/prometheus 36 | ports: 37 | - 9090:9090 38 | command: 39 | - '--config.file=/etc/prometheus/prometheus.yml' 40 | - '--storage.tsdb.path=/prometheus' 41 | - '--storage.tsdb.retention.time=90d' 42 | - '--storage.tsdb.retention.size=100GB' 43 | - '--web.enable-lifecycle' 44 | -------------------------------------------------------------------------------- /05-grafana-provisioning/master/grafana/provisioning/dashboards/dashboard.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'Prometheus' 5 | orgId: 1 6 | folder: '' 7 | type: file 8 | disableDeletion: false 9 | editable: true 10 | options: 11 | path: /etc/grafana/provisioning/dashboards -------------------------------------------------------------------------------- /05-grafana-provisioning/master/grafana/provisioning/dashboards/docker-dashboad.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": "-- Grafana --", 7 | "enable": true, 8 | "hide": true, 9 | "iconColor": "rgba(0, 211, 255, 1)", 10 | "name": "Annotations & Alerts", 11 | "type": "dashboard" 12 | } 13 | ] 14 | }, 15 | "description": "Docker Monitoring Template", 16 | "editable": true, 17 | "gnetId": 179, 18 | "graphTooltip": 1, 19 | "id": 1, 20 | "iteration": 1571330223815, 21 | "links": [], 22 | "panels": [ 23 | { 24 | "collapsed": false, 25 | "datasource": null, 26 | "gridPos": { 27 | "h": 1, 28 | "w": 24, 29 | "x": 0, 30 | "y": 0 31 | }, 32 | "id": 17, 33 | "panels": [], 34 | "title": "Host Info", 35 | "type": "row" 36 | }, 37 | { 38 | "cacheTimeout": null, 39 | "colorBackground": false, 40 | "colorValue": false, 41 | "colors": [ 42 | "#299c46", 43 | "rgba(237, 129, 40, 0.89)", 44 | "#d44a3a" 45 | ], 46 | "datasource": "Prometheus", 47 | "decimals": null, 48 | "format": "s", 49 | "gauge": { 50 | "maxValue": 100, 51 | "minValue": 0, 52 | "show": false, 53 | "thresholdLabels": false, 54 | "thresholdMarkers": true 55 | }, 56 | "gridPos": { 57 | "h": 7, 58 | "w": 3, 59 | "x": 0, 60 | "y": 1 61 | }, 62 | "id": 15, 63 | "interval": null, 64 | "links": [], 65 | "mappingType": 1, 66 | "mappingTypes": [ 67 | { 68 | "name": "value to text", 69 | "value": 1 70 | }, 71 | { 72 | "name": "range to text", 73 | "value": 2 74 | } 75 | ], 76 | "maxDataPoints": 100, 77 | "nullPointMode": "connected", 78 | "nullText": null, 79 | "options": {}, 80 | "postfix": "", 81 | "postfixFontSize": "50%", 82 | "prefix": "", 83 | "prefixFontSize": "50%", 84 | "rangeMaps": [ 85 | { 86 | "from": "null", 87 | "text": "N/A", 88 | "to": "null" 89 | } 90 | ], 91 | "sparkline": { 92 | "fillColor": "rgba(31, 118, 189, 0.18)", 93 | "full": false, 94 | "lineColor": "rgb(31, 120, 193)", 95 | "show": false 96 | }, 97 | "tableColumn": "", 98 | "targets": [ 99 | { 100 | "expr": "time() - process_start_time_seconds{job=\"prometheus\"}", 101 | "format": "time_series", 102 | "intervalFactor": 1, 103 | "refId": "A" 104 | } 105 | ], 106 | "thresholds": "", 107 | "title": "Uptime", 108 | "type": "singlestat", 109 | "valueFontSize": "80%", 110 | "valueMaps": [ 111 | { 112 | "op": "=", 113 | "text": "N/A", 114 | "value": "null" 115 | } 116 | ], 117 | "valueName": "current" 118 | }, 119 | { 120 | "cacheTimeout": null, 121 | "colorBackground": false, 122 | "colorValue": false, 123 | "colors": [ 124 | "#299c46", 125 | "rgba(237, 129, 40, 0.89)", 126 | "#d44a3a" 127 | ], 128 | "datasource": "Prometheus", 129 | "format": "short", 130 | "gauge": { 131 | "maxValue": 100, 132 | "minValue": 0, 133 | "show": false, 134 | "thresholdLabels": false, 135 | "thresholdMarkers": true 136 | }, 137 | "gridPos": { 138 | "h": 4, 139 | "w": 3, 140 | "x": 3, 141 | "y": 1 142 | }, 143 | "id": 35, 144 | "interval": null, 145 | "links": [], 146 | "mappingType": 1, 147 | "mappingTypes": [ 148 | { 149 | "name": "value to text", 150 | "value": 1 151 | }, 152 | { 153 | "name": "range to text", 154 | "value": 2 155 | } 156 | ], 157 | "maxDataPoints": 100, 158 | "nullPointMode": "connected", 159 | "nullText": null, 160 | "options": {}, 161 | "postfix": "", 162 | "postfixFontSize": "50%", 163 | "prefix": "", 164 | "prefixFontSize": "50%", 165 | "rangeMaps": [ 166 | { 167 | "from": "null", 168 | "text": "N/A", 169 | "to": "null" 170 | } 171 | ], 172 | "sparkline": { 173 | "fillColor": "rgba(31, 118, 189, 0.18)", 174 | "full": false, 175 | "lineColor": "rgb(31, 120, 193)", 176 | "show": false, 177 | "ymax": null, 178 | "ymin": null 179 | }, 180 | "tableColumn": "", 181 | "targets": [ 182 | { 183 | "expr": "count(count(node_cpu_seconds_total{instance=~\"$node\", mode='system'}) by (cpu))", 184 | "instant": true, 185 | "refId": "A" 186 | } 187 | ], 188 | "thresholds": "", 189 | "timeFrom": null, 190 | "timeShift": null, 191 | "title": "CPU Cores", 192 | "type": "singlestat", 193 | "valueFontSize": "80%", 194 | "valueMaps": [ 195 | { 196 | "op": "=", 197 | "text": "N/A", 198 | "value": "null" 199 | } 200 | ], 201 | "valueName": "current" 202 | }, 203 | { 204 | "cacheTimeout": null, 205 | "colorBackground": true, 206 | "colorValue": false, 207 | "colors": [ 208 | "#299c46", 209 | "rgba(237, 129, 40, 0.89)", 210 | "#d44a3a" 211 | ], 212 | "datasource": "Prometheus", 213 | "format": "none", 214 | "gauge": { 215 | "maxValue": 100, 216 | "minValue": 0, 217 | "show": false, 218 | "thresholdLabels": false, 219 | "thresholdMarkers": true 220 | }, 221 | "gridPos": { 222 | "h": 7, 223 | "w": 5, 224 | "x": 6, 225 | "y": 1 226 | }, 227 | "id": 13, 228 | "interval": null, 229 | "links": [], 230 | "mappingType": 1, 231 | "mappingTypes": [ 232 | { 233 | "name": "value to text", 234 | "value": 1 235 | }, 236 | { 237 | "name": "range to text", 238 | "value": 2 239 | } 240 | ], 241 | "maxDataPoints": 100, 242 | "nullPointMode": "connected", 243 | "nullText": null, 244 | "options": {}, 245 | "postfix": "", 246 | "postfixFontSize": "50%", 247 | "prefix": "", 248 | "prefixFontSize": "50%", 249 | "rangeMaps": [ 250 | { 251 | "from": "null", 252 | "text": "N/A", 253 | "to": "null" 254 | } 255 | ], 256 | "sparkline": { 257 | "fillColor": "rgba(31, 118, 189, 0.18)", 258 | "full": false, 259 | "lineColor": "rgb(31, 120, 193)", 260 | "show": false 261 | }, 262 | "tableColumn": "", 263 | "targets": [ 264 | { 265 | "expr": "sum(ALERTS)", 266 | "format": "time_series", 267 | "intervalFactor": 1, 268 | "refId": "A" 269 | } 270 | ], 271 | "thresholds": "0,1", 272 | "title": "Alerts", 273 | "type": "singlestat", 274 | "valueFontSize": "80%", 275 | "valueMaps": [ 276 | { 277 | "op": "=", 278 | "text": "N/A", 279 | "value": "0" 280 | } 281 | ], 282 | "valueName": "avg" 283 | }, 284 | { 285 | "cacheTimeout": null, 286 | "colorBackground": true, 287 | "colorValue": false, 288 | "colors": [ 289 | "#d44a3a", 290 | "rgba(237, 129, 40, 0.89)", 291 | "#299c46" 292 | ], 293 | "datasource": "Prometheus", 294 | "format": "none", 295 | "gauge": { 296 | "maxValue": 100, 297 | "minValue": 0, 298 | "show": false, 299 | "thresholdLabels": false, 300 | "thresholdMarkers": true 301 | }, 302 | "gridPos": { 303 | "h": 7, 304 | "w": 4, 305 | "x": 11, 306 | "y": 1 307 | }, 308 | "id": 11, 309 | "interval": null, 310 | "links": [], 311 | "mappingType": 1, 312 | "mappingTypes": [ 313 | { 314 | "name": "value to text", 315 | "value": 1 316 | }, 317 | { 318 | "name": "range to text", 319 | "value": 2 320 | } 321 | ], 322 | "maxDataPoints": 100, 323 | "nullPointMode": "connected", 324 | "nullText": null, 325 | "options": {}, 326 | "postfix": "", 327 | "postfixFontSize": "50%", 328 | "prefix": "", 329 | "prefixFontSize": "50%", 330 | "rangeMaps": [ 331 | { 332 | "from": "null", 333 | "text": "N/A", 334 | "to": "null" 335 | } 336 | ], 337 | "sparkline": { 338 | "fillColor": "rgba(31, 118, 189, 0.18)", 339 | "full": false, 340 | "lineColor": "rgb(31, 120, 193)", 341 | "show": false 342 | }, 343 | "tableColumn": "", 344 | "targets": [ 345 | { 346 | "expr": "sum(up)", 347 | "format": "time_series", 348 | "intervalFactor": 1, 349 | "refId": "A" 350 | } 351 | ], 352 | "thresholds": "0,1", 353 | "title": "Targets Online", 354 | "type": "singlestat", 355 | "valueFontSize": "80%", 356 | "valueMaps": [ 357 | { 358 | "op": "=", 359 | "text": "N/A", 360 | "value": "null" 361 | } 362 | ], 363 | "valueName": "current" 364 | }, 365 | { 366 | "cacheTimeout": null, 367 | "colorBackground": false, 368 | "colorValue": false, 369 | "colors": [ 370 | "#d44a3a", 371 | "rgba(237, 129, 40, 0.89)", 372 | "#299c46" 373 | ], 374 | "datasource": "Prometheus", 375 | "format": "none", 376 | "gauge": { 377 | "maxValue": 100, 378 | "minValue": 0, 379 | "show": false, 380 | "thresholdLabels": false, 381 | "thresholdMarkers": true 382 | }, 383 | "gridPos": { 384 | "h": 7, 385 | "w": 4, 386 | "x": 15, 387 | "y": 1 388 | }, 389 | "id": 31, 390 | "interval": null, 391 | "links": [], 392 | "mappingType": 1, 393 | "mappingTypes": [ 394 | { 395 | "name": "value to text", 396 | "value": 1 397 | }, 398 | { 399 | "name": "range to text", 400 | "value": 2 401 | } 402 | ], 403 | "maxDataPoints": 100, 404 | "nullPointMode": "connected", 405 | "nullText": null, 406 | "options": {}, 407 | "postfix": "", 408 | "postfixFontSize": "50%", 409 | "prefix": "", 410 | "prefixFontSize": "50%", 411 | "rangeMaps": [ 412 | { 413 | "from": "null", 414 | "text": "N/A", 415 | "to": "null" 416 | } 417 | ], 418 | "sparkline": { 419 | "fillColor": "rgba(31, 118, 189, 0.18)", 420 | "full": false, 421 | "lineColor": "rgb(31, 120, 193)", 422 | "show": true 423 | }, 424 | "tableColumn": "", 425 | "targets": [ 426 | { 427 | "expr": "count(rate(container_last_seen{job=\"cadvisor\", name!=\"\"}[5m]))", 428 | "format": "time_series", 429 | "intervalFactor": 1, 430 | "refId": "A" 431 | } 432 | ], 433 | "thresholds": "0,1", 434 | "title": "Running Containers", 435 | "type": "singlestat", 436 | "valueFontSize": "80%", 437 | "valueMaps": [ 438 | { 439 | "op": "=", 440 | "text": "N/A", 441 | "value": "null" 442 | } 443 | ], 444 | "valueName": "current" 445 | }, 446 | { 447 | "cacheTimeout": null, 448 | "colorBackground": false, 449 | "colorValue": false, 450 | "colors": [ 451 | "#299c46", 452 | "rgba(237, 129, 40, 0.89)", 453 | "#d44a3a" 454 | ], 455 | "datasource": null, 456 | "decimals": null, 457 | "format": "decbytes", 458 | "gauge": { 459 | "maxValue": 100, 460 | "minValue": 0, 461 | "show": false, 462 | "thresholdLabels": false, 463 | "thresholdMarkers": true 464 | }, 465 | "gridPos": { 466 | "h": 3, 467 | "w": 3, 468 | "x": 3, 469 | "y": 5 470 | }, 471 | "id": 37, 472 | "interval": null, 473 | "links": [], 474 | "mappingType": 1, 475 | "mappingTypes": [ 476 | { 477 | "name": "value to text", 478 | "value": 1 479 | }, 480 | { 481 | "name": "range to text", 482 | "value": 2 483 | } 484 | ], 485 | "maxDataPoints": 100, 486 | "nullPointMode": "connected", 487 | "nullText": null, 488 | "options": {}, 489 | "postfix": "", 490 | "postfixFontSize": "50%", 491 | "prefix": "", 492 | "prefixFontSize": "50%", 493 | "rangeMaps": [ 494 | { 495 | "from": "null", 496 | "text": "N/A", 497 | "to": "null" 498 | } 499 | ], 500 | "sparkline": { 501 | "fillColor": "rgba(31, 118, 189, 0.18)", 502 | "full": false, 503 | "lineColor": "rgb(31, 120, 193)", 504 | "show": false, 505 | "ymax": null, 506 | "ymin": null 507 | }, 508 | "tableColumn": "", 509 | "targets": [ 510 | { 511 | "expr": "node_memory_MemTotal_bytes{instance=~\"$node\"}", 512 | "refId": "A" 513 | } 514 | ], 515 | "thresholds": "", 516 | "timeFrom": null, 517 | "timeShift": null, 518 | "title": "Host Memory", 519 | "type": "singlestat", 520 | "valueFontSize": "80%", 521 | "valueMaps": [ 522 | { 523 | "op": "=", 524 | "text": "N/A", 525 | "value": "null" 526 | } 527 | ], 528 | "valueName": "current" 529 | }, 530 | { 531 | "cacheTimeout": null, 532 | "colorBackground": false, 533 | "colorValue": false, 534 | "colors": [ 535 | "rgba(50, 172, 45, 0.97)", 536 | "rgba(237, 129, 40, 0.89)", 537 | "rgba(245, 54, 54, 0.9)" 538 | ], 539 | "datasource": "Prometheus", 540 | "editable": true, 541 | "error": false, 542 | "format": "percent", 543 | "gauge": { 544 | "maxValue": 100, 545 | "minValue": 0, 546 | "show": true, 547 | "thresholdLabels": false, 548 | "thresholdMarkers": true 549 | }, 550 | "gridPos": { 551 | "h": 6, 552 | "w": 6, 553 | "x": 0, 554 | "y": 8 555 | }, 556 | "id": 4, 557 | "interval": null, 558 | "isNew": true, 559 | "links": [], 560 | "mappingType": 1, 561 | "mappingTypes": [ 562 | { 563 | "name": "value to text", 564 | "value": 1 565 | }, 566 | { 567 | "name": "range to text", 568 | "value": 2 569 | } 570 | ], 571 | "maxDataPoints": 100, 572 | "nullPointMode": "connected", 573 | "nullText": null, 574 | "options": {}, 575 | "postfix": "", 576 | "postfixFontSize": "50%", 577 | "prefix": "", 578 | "prefixFontSize": "50%", 579 | "rangeMaps": [ 580 | { 581 | "from": "null", 582 | "text": "N/A", 583 | "to": "null" 584 | } 585 | ], 586 | "sparkline": { 587 | "fillColor": "rgba(31, 118, 189, 0.18)", 588 | "full": false, 589 | "lineColor": "rgb(31, 120, 193)", 590 | "show": false 591 | }, 592 | "tableColumn": "", 593 | "targets": [ 594 | { 595 | "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes +node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", 596 | "format": "time_series", 597 | "interval": "10s", 598 | "intervalFactor": 1, 599 | "refId": "A", 600 | "step": 10 601 | } 602 | ], 603 | "thresholds": "65, 90", 604 | "title": "Memory usage", 605 | "type": "singlestat", 606 | "valueFontSize": "80%", 607 | "valueMaps": [ 608 | { 609 | "op": "=", 610 | "text": "N/A", 611 | "value": "null" 612 | } 613 | ], 614 | "valueName": "current" 615 | }, 616 | { 617 | "cacheTimeout": null, 618 | "colorBackground": false, 619 | "colorValue": false, 620 | "colors": [ 621 | "rgba(50, 172, 45, 0.97)", 622 | "rgba(237, 129, 40, 0.89)", 623 | "rgba(245, 54, 54, 0.9)" 624 | ], 625 | "datasource": "Prometheus", 626 | "decimals": 2, 627 | "editable": true, 628 | "error": false, 629 | "format": "percent", 630 | "gauge": { 631 | "maxValue": 100, 632 | "minValue": 0, 633 | "show": true, 634 | "thresholdLabels": false, 635 | "thresholdMarkers": true 636 | }, 637 | "gridPos": { 638 | "h": 6, 639 | "w": 6, 640 | "x": 6, 641 | "y": 8 642 | }, 643 | "id": 6, 644 | "interval": null, 645 | "isNew": true, 646 | "links": [], 647 | "mappingType": 1, 648 | "mappingTypes": [ 649 | { 650 | "name": "value to text", 651 | "value": 1 652 | }, 653 | { 654 | "name": "range to text", 655 | "value": 2 656 | } 657 | ], 658 | "maxDataPoints": 100, 659 | "nullPointMode": "connected", 660 | "nullText": null, 661 | "options": {}, 662 | "postfix": "", 663 | "postfixFontSize": "50%", 664 | "prefix": "", 665 | "prefixFontSize": "50%", 666 | "rangeMaps": [ 667 | { 668 | "from": "null", 669 | "text": "N/A", 670 | "to": "null" 671 | } 672 | ], 673 | "sparkline": { 674 | "fillColor": "rgba(31, 118, 189, 0.18)", 675 | "full": false, 676 | "lineColor": "rgb(31, 120, 193)", 677 | "show": false 678 | }, 679 | "tableColumn": "", 680 | "targets": [ 681 | { 682 | "expr": "100 - (avg(irate(node_cpu_seconds_total{instance=~\"$node\",mode=\"idle\"}[5m])) * 100)", 683 | "format": "time_series", 684 | "interval": "1m", 685 | "intervalFactor": 1, 686 | "legendFormat": "", 687 | "refId": "A", 688 | "step": 10 689 | } 690 | ], 691 | "thresholds": "65, 90", 692 | "title": "CPU usage", 693 | "type": "singlestat", 694 | "valueFontSize": "80%", 695 | "valueMaps": [ 696 | { 697 | "op": "=", 698 | "text": "N/A", 699 | "value": "null" 700 | } 701 | ], 702 | "valueName": "current" 703 | }, 704 | { 705 | "cacheTimeout": null, 706 | "colorBackground": false, 707 | "colorValue": false, 708 | "colors": [ 709 | "rgba(50, 172, 45, 0.97)", 710 | "rgba(237, 129, 40, 0.89)", 711 | "rgba(245, 54, 54, 0.9)" 712 | ], 713 | "datasource": "Prometheus", 714 | "decimals": 2, 715 | "editable": true, 716 | "error": false, 717 | "format": "percent", 718 | "gauge": { 719 | "maxValue": 100, 720 | "minValue": 0, 721 | "show": true, 722 | "thresholdLabels": false, 723 | "thresholdMarkers": true 724 | }, 725 | "gridPos": { 726 | "h": 6, 727 | "w": 7, 728 | "x": 12, 729 | "y": 8 730 | }, 731 | "id": 7, 732 | "interval": null, 733 | "isNew": true, 734 | "links": [], 735 | "mappingType": 1, 736 | "mappingTypes": [ 737 | { 738 | "name": "value to text", 739 | "value": 1 740 | }, 741 | { 742 | "name": "range to text", 743 | "value": 2 744 | } 745 | ], 746 | "maxDataPoints": 100, 747 | "nullPointMode": "connected", 748 | "nullText": null, 749 | "options": {}, 750 | "postfix": "", 751 | "postfixFontSize": "50%", 752 | "prefix": "", 753 | "prefixFontSize": "50%", 754 | "rangeMaps": [ 755 | { 756 | "from": "null", 757 | "text": "N/A", 758 | "to": "null" 759 | } 760 | ], 761 | "sparkline": { 762 | "fillColor": "rgba(31, 118, 189, 0.18)", 763 | "full": false, 764 | "lineColor": "rgb(31, 120, 193)", 765 | "show": false 766 | }, 767 | "tableColumn": "", 768 | "targets": [ 769 | { 770 | "expr": "avg( node_filesystem_avail_bytes {mountpoint=\"/\"} / node_filesystem_size_bytes{mountpoint=\"/\"})", 771 | "interval": "10s", 772 | "intervalFactor": 1, 773 | "metric": "", 774 | "refId": "A", 775 | "step": 10 776 | } 777 | ], 778 | "thresholds": "65, 90", 779 | "title": "Filesystem usage", 780 | "type": "singlestat", 781 | "valueFontSize": "80%", 782 | "valueMaps": [ 783 | { 784 | "op": "=", 785 | "text": "N/A", 786 | "value": "null" 787 | } 788 | ], 789 | "valueName": "current" 790 | }, 791 | { 792 | "aliasColors": { 793 | "RECEIVE": "#ea6460", 794 | "SENT": "#1f78c1", 795 | "TRANSMIT": "#1f78c1" 796 | }, 797 | "bars": false, 798 | "dashLength": 10, 799 | "dashes": false, 800 | "datasource": "Prometheus", 801 | "fill": 4, 802 | "fillGradient": 0, 803 | "gridPos": { 804 | "h": 9, 805 | "w": 6, 806 | "x": 0, 807 | "y": 14 808 | }, 809 | "id": 25, 810 | "legend": { 811 | "avg": false, 812 | "current": false, 813 | "max": false, 814 | "min": false, 815 | "show": true, 816 | "total": false, 817 | "values": false 818 | }, 819 | "lines": true, 820 | "linewidth": 1, 821 | "links": [], 822 | "nullPointMode": "null", 823 | "options": { 824 | "dataLinks": [] 825 | }, 826 | "percentage": false, 827 | "pointradius": 5, 828 | "points": false, 829 | "renderer": "flot", 830 | "seriesOverrides": [], 831 | "spaceLength": 10, 832 | "stack": false, 833 | "steppedLine": false, 834 | "targets": [ 835 | { 836 | "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", 837 | "format": "time_series", 838 | "interval": "2m", 839 | "intervalFactor": 2, 840 | "legendFormat": "RECEIVE", 841 | "refId": "A" 842 | }, 843 | { 844 | "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", 845 | "format": "time_series", 846 | "interval": "2m", 847 | "intervalFactor": 2, 848 | "legendFormat": "TRANSMIT", 849 | "refId": "B" 850 | } 851 | ], 852 | "thresholds": [], 853 | "timeFrom": null, 854 | "timeRegions": [], 855 | "timeShift": null, 856 | "title": "Node Network Traffic", 857 | "tooltip": { 858 | "shared": true, 859 | "sort": 0, 860 | "value_type": "cumulative" 861 | }, 862 | "type": "graph", 863 | "xaxis": { 864 | "buckets": null, 865 | "mode": "time", 866 | "name": null, 867 | "show": true, 868 | "values": [] 869 | }, 870 | "yaxes": [ 871 | { 872 | "format": "Bps", 873 | "label": null, 874 | "logBase": 1, 875 | "max": null, 876 | "min": null, 877 | "show": true 878 | }, 879 | { 880 | "format": "s", 881 | "label": null, 882 | "logBase": 1, 883 | "max": null, 884 | "min": null, 885 | "show": true 886 | } 887 | ], 888 | "yaxis": { 889 | "align": false, 890 | "alignLevel": null 891 | } 892 | }, 893 | { 894 | "aliasColors": { 895 | "Available Memory": "#508642", 896 | "Used Memory": "#bf1b00" 897 | }, 898 | "bars": false, 899 | "dashLength": 10, 900 | "dashes": false, 901 | "datasource": "Prometheus", 902 | "fill": 3, 903 | "fillGradient": 0, 904 | "gridPos": { 905 | "h": 9, 906 | "w": 6, 907 | "x": 6, 908 | "y": 14 909 | }, 910 | "id": 27, 911 | "legend": { 912 | "avg": false, 913 | "current": false, 914 | "max": false, 915 | "min": false, 916 | "show": true, 917 | "total": false, 918 | "values": false 919 | }, 920 | "lines": true, 921 | "linewidth": 1, 922 | "links": [], 923 | "nullPointMode": "null", 924 | "options": { 925 | "dataLinks": [] 926 | }, 927 | "percentage": false, 928 | "pointradius": 5, 929 | "points": false, 930 | "renderer": "flot", 931 | "seriesOverrides": [], 932 | "spaceLength": 10, 933 | "stack": true, 934 | "steppedLine": false, 935 | "targets": [ 936 | { 937 | "expr": "sum(node_memory_MemTotal_bytes) - sum(node_memory_MemAvailable_bytes)", 938 | "format": "time_series", 939 | "interval": "2m", 940 | "intervalFactor": 2, 941 | "legendFormat": "Used Memory", 942 | "refId": "B" 943 | }, 944 | { 945 | "expr": "sum(node_memory_MemAvailable_bytes)", 946 | "format": "time_series", 947 | "interval": "2m", 948 | "intervalFactor": 2, 949 | "legendFormat": "Available Memory", 950 | "refId": "A" 951 | } 952 | ], 953 | "thresholds": [], 954 | "timeFrom": null, 955 | "timeRegions": [], 956 | "timeShift": null, 957 | "title": "Node Mermory", 958 | "tooltip": { 959 | "shared": true, 960 | "sort": 0, 961 | "value_type": "individual" 962 | }, 963 | "type": "graph", 964 | "xaxis": { 965 | "buckets": null, 966 | "mode": "time", 967 | "name": null, 968 | "show": true, 969 | "values": [] 970 | }, 971 | "yaxes": [ 972 | { 973 | "format": "decbytes", 974 | "label": null, 975 | "logBase": 1, 976 | "max": null, 977 | "min": null, 978 | "show": true 979 | }, 980 | { 981 | "format": "s", 982 | "label": null, 983 | "logBase": 1, 984 | "max": null, 985 | "min": null, 986 | "show": true 987 | } 988 | ], 989 | "yaxis": { 990 | "align": false, 991 | "alignLevel": null 992 | } 993 | }, 994 | { 995 | "aliasColors": { 996 | "Available Memory": "#508642", 997 | "Free Storage": "#447ebc", 998 | "Total Storage Available": "#508642", 999 | "Used Memory": "#bf1b00", 1000 | "Used Storage": "#bf1b00" 1001 | }, 1002 | "bars": false, 1003 | "dashLength": 10, 1004 | "dashes": false, 1005 | "datasource": "Prometheus", 1006 | "fill": 3, 1007 | "fillGradient": 0, 1008 | "gridPos": { 1009 | "h": 9, 1010 | "w": 7, 1011 | "x": 12, 1012 | "y": 14 1013 | }, 1014 | "id": 28, 1015 | "legend": { 1016 | "avg": false, 1017 | "current": false, 1018 | "max": false, 1019 | "min": false, 1020 | "show": true, 1021 | "total": false, 1022 | "values": false 1023 | }, 1024 | "lines": true, 1025 | "linewidth": 1, 1026 | "links": [], 1027 | "nullPointMode": "null", 1028 | "options": { 1029 | "dataLinks": [] 1030 | }, 1031 | "percentage": false, 1032 | "pointradius": 5, 1033 | "points": false, 1034 | "renderer": "flot", 1035 | "seriesOverrides": [], 1036 | "spaceLength": 10, 1037 | "stack": true, 1038 | "steppedLine": false, 1039 | "targets": [ 1040 | { 1041 | "expr": "sum(node_filesystem_free_bytes {job=\"node-exporter\", instance=~\".*9100\", device=~\"/dev/.*\", mountpoint!=\"/var/lib/docker/aufs\"}) ", 1042 | "format": "time_series", 1043 | "interval": "2m", 1044 | "intervalFactor": 2, 1045 | "legendFormat": "Free Storage", 1046 | "refId": "A" 1047 | } 1048 | ], 1049 | "thresholds": [], 1050 | "timeFrom": null, 1051 | "timeRegions": [], 1052 | "timeShift": null, 1053 | "title": "Filesystem Available", 1054 | "tooltip": { 1055 | "shared": true, 1056 | "sort": 0, 1057 | "value_type": "individual" 1058 | }, 1059 | "type": "graph", 1060 | "xaxis": { 1061 | "buckets": null, 1062 | "mode": "time", 1063 | "name": null, 1064 | "show": true, 1065 | "values": [] 1066 | }, 1067 | "yaxes": [ 1068 | { 1069 | "decimals": null, 1070 | "format": "decbytes", 1071 | "label": null, 1072 | "logBase": 1, 1073 | "max": null, 1074 | "min": null, 1075 | "show": true 1076 | }, 1077 | { 1078 | "format": "s", 1079 | "label": null, 1080 | "logBase": 1, 1081 | "max": null, 1082 | "min": null, 1083 | "show": true 1084 | } 1085 | ], 1086 | "yaxis": { 1087 | "align": false, 1088 | "alignLevel": null 1089 | } 1090 | }, 1091 | { 1092 | "collapsed": false, 1093 | "datasource": null, 1094 | "gridPos": { 1095 | "h": 1, 1096 | "w": 24, 1097 | "x": 0, 1098 | "y": 23 1099 | }, 1100 | "id": 19, 1101 | "panels": [], 1102 | "repeat": null, 1103 | "title": "Container Performance", 1104 | "type": "row" 1105 | }, 1106 | { 1107 | "aliasColors": {}, 1108 | "bars": false, 1109 | "dashLength": 10, 1110 | "dashes": false, 1111 | "datasource": "Prometheus", 1112 | "decimals": 3, 1113 | "editable": true, 1114 | "error": false, 1115 | "fill": 0, 1116 | "fillGradient": 0, 1117 | "grid": {}, 1118 | "gridPos": { 1119 | "h": 10, 1120 | "w": 6, 1121 | "x": 0, 1122 | "y": 24 1123 | }, 1124 | "id": 3, 1125 | "isNew": true, 1126 | "legend": { 1127 | "alignAsTable": true, 1128 | "avg": true, 1129 | "current": true, 1130 | "max": false, 1131 | "min": false, 1132 | "rightSide": false, 1133 | "show": true, 1134 | "sort": "current", 1135 | "sortDesc": true, 1136 | "total": false, 1137 | "values": true 1138 | }, 1139 | "lines": true, 1140 | "linewidth": 2, 1141 | "links": [], 1142 | "nullPointMode": "connected", 1143 | "options": { 1144 | "dataLinks": [] 1145 | }, 1146 | "percentage": false, 1147 | "pointradius": 5, 1148 | "points": false, 1149 | "renderer": "flot", 1150 | "seriesOverrides": [], 1151 | "spaceLength": 10, 1152 | "stack": false, 1153 | "steppedLine": false, 1154 | "targets": [ 1155 | { 1156 | "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\"}[1m])) by (id,name)", 1157 | "format": "time_series", 1158 | "interval": "10s", 1159 | "intervalFactor": 1, 1160 | "legendFormat": "{{ name }}", 1161 | "metric": "container_cpu_user_seconds_total", 1162 | "refId": "A", 1163 | "step": 10 1164 | } 1165 | ], 1166 | "thresholds": [], 1167 | "timeFrom": null, 1168 | "timeRegions": [], 1169 | "timeShift": null, 1170 | "title": "Container CPU usage", 1171 | "tooltip": { 1172 | "msResolution": true, 1173 | "shared": true, 1174 | "sort": 0, 1175 | "value_type": "cumulative" 1176 | }, 1177 | "type": "graph", 1178 | "xaxis": { 1179 | "buckets": null, 1180 | "mode": "time", 1181 | "name": null, 1182 | "show": true, 1183 | "values": [] 1184 | }, 1185 | "yaxes": [ 1186 | { 1187 | "format": "percentunit", 1188 | "label": null, 1189 | "logBase": 1, 1190 | "max": null, 1191 | "min": null, 1192 | "show": true 1193 | }, 1194 | { 1195 | "format": "short", 1196 | "label": null, 1197 | "logBase": 1, 1198 | "max": null, 1199 | "min": null, 1200 | "show": true 1201 | } 1202 | ], 1203 | "yaxis": { 1204 | "align": false, 1205 | "alignLevel": null 1206 | } 1207 | }, 1208 | { 1209 | "aliasColors": {}, 1210 | "bars": false, 1211 | "dashLength": 10, 1212 | "dashes": false, 1213 | "datasource": "Prometheus", 1214 | "decimals": 2, 1215 | "editable": true, 1216 | "error": false, 1217 | "fill": 0, 1218 | "fillGradient": 0, 1219 | "grid": {}, 1220 | "gridPos": { 1221 | "h": 10, 1222 | "w": 6, 1223 | "x": 6, 1224 | "y": 24 1225 | }, 1226 | "id": 2, 1227 | "isNew": true, 1228 | "legend": { 1229 | "alignAsTable": true, 1230 | "avg": true, 1231 | "current": true, 1232 | "max": false, 1233 | "min": false, 1234 | "rightSide": false, 1235 | "show": true, 1236 | "sort": "current", 1237 | "sortDesc": true, 1238 | "total": false, 1239 | "values": true 1240 | }, 1241 | "lines": true, 1242 | "linewidth": 2, 1243 | "links": [], 1244 | "nullPointMode": "connected", 1245 | "options": { 1246 | "dataLinks": [] 1247 | }, 1248 | "percentage": false, 1249 | "pointradius": 5, 1250 | "points": false, 1251 | "renderer": "flot", 1252 | "seriesOverrides": [], 1253 | "spaceLength": 10, 1254 | "stack": false, 1255 | "steppedLine": false, 1256 | "targets": [ 1257 | { 1258 | "expr": "container_memory_max_usage_bytes{image!=\"\"}", 1259 | "format": "time_series", 1260 | "interval": "10s", 1261 | "intervalFactor": 1, 1262 | "legendFormat": "{{ name }}", 1263 | "metric": "container_memory_usage:sort_desc", 1264 | "refId": "A", 1265 | "step": 10 1266 | } 1267 | ], 1268 | "thresholds": [], 1269 | "timeFrom": null, 1270 | "timeRegions": [], 1271 | "timeShift": null, 1272 | "title": "Container Memory Usage", 1273 | "tooltip": { 1274 | "msResolution": false, 1275 | "shared": true, 1276 | "sort": 0, 1277 | "value_type": "cumulative" 1278 | }, 1279 | "type": "graph", 1280 | "xaxis": { 1281 | "buckets": null, 1282 | "mode": "time", 1283 | "name": null, 1284 | "show": true, 1285 | "values": [] 1286 | }, 1287 | "yaxes": [ 1288 | { 1289 | "format": "bytes", 1290 | "label": null, 1291 | "logBase": 1, 1292 | "max": null, 1293 | "min": null, 1294 | "show": true 1295 | }, 1296 | { 1297 | "format": "short", 1298 | "label": null, 1299 | "logBase": 1, 1300 | "max": null, 1301 | "min": null, 1302 | "show": true 1303 | } 1304 | ], 1305 | "yaxis": { 1306 | "align": false, 1307 | "alignLevel": null 1308 | } 1309 | }, 1310 | { 1311 | "columns": [], 1312 | "datasource": "Prometheus", 1313 | "fontSize": "100%", 1314 | "gridPos": { 1315 | "h": 13, 1316 | "w": 10, 1317 | "x": 12, 1318 | "y": 24 1319 | }, 1320 | "id": 23, 1321 | "links": [], 1322 | "options": {}, 1323 | "pageSize": null, 1324 | "scroll": true, 1325 | "showHeader": true, 1326 | "sort": { 1327 | "col": 0, 1328 | "desc": true 1329 | }, 1330 | "styles": [ 1331 | { 1332 | "alias": "Time", 1333 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 1334 | "pattern": "Time", 1335 | "type": "date" 1336 | }, 1337 | { 1338 | "alias": "", 1339 | "colorMode": null, 1340 | "colors": [ 1341 | "rgba(245, 54, 54, 0.9)", 1342 | "rgba(237, 129, 40, 0.89)", 1343 | "rgba(50, 172, 45, 0.97)" 1344 | ], 1345 | "decimals": 2, 1346 | "pattern": "/.*/", 1347 | "thresholds": [], 1348 | "type": "number", 1349 | "unit": "short" 1350 | } 1351 | ], 1352 | "targets": [ 1353 | { 1354 | "expr": "ALERTS", 1355 | "format": "table", 1356 | "intervalFactor": 1, 1357 | "refId": "A" 1358 | } 1359 | ], 1360 | "title": "Alerts", 1361 | "transform": "table", 1362 | "type": "table" 1363 | }, 1364 | { 1365 | "aliasColors": {}, 1366 | "bars": false, 1367 | "dashLength": 10, 1368 | "dashes": false, 1369 | "datasource": "Prometheus", 1370 | "decimals": 2, 1371 | "editable": true, 1372 | "error": false, 1373 | "fill": 0, 1374 | "fillGradient": 0, 1375 | "grid": {}, 1376 | "gridPos": { 1377 | "h": 14, 1378 | "w": 6, 1379 | "x": 0, 1380 | "y": 34 1381 | }, 1382 | "id": 8, 1383 | "isNew": true, 1384 | "legend": { 1385 | "alignAsTable": true, 1386 | "avg": true, 1387 | "current": true, 1388 | "max": false, 1389 | "min": false, 1390 | "rightSide": false, 1391 | "show": true, 1392 | "sort": "current", 1393 | "sortDesc": true, 1394 | "total": false, 1395 | "values": true 1396 | }, 1397 | "lines": true, 1398 | "linewidth": 2, 1399 | "links": [], 1400 | "nullPointMode": "connected", 1401 | "options": { 1402 | "dataLinks": [] 1403 | }, 1404 | "percentage": false, 1405 | "pointradius": 5, 1406 | "points": false, 1407 | "renderer": "flot", 1408 | "seriesOverrides": [], 1409 | "spaceLength": 10, 1410 | "stack": false, 1411 | "steppedLine": false, 1412 | "targets": [ 1413 | { 1414 | "expr": "sort_desc(sum by (name) (rate(container_network_receive_bytes_total{image!=\"\"}[1m] ) ))", 1415 | "interval": "10s", 1416 | "intervalFactor": 1, 1417 | "legendFormat": "{{ name }}", 1418 | "metric": "container_network_receive_bytes_total", 1419 | "refId": "A", 1420 | "step": 10 1421 | } 1422 | ], 1423 | "thresholds": [], 1424 | "timeFrom": null, 1425 | "timeRegions": [], 1426 | "timeShift": null, 1427 | "title": "Container Network Input", 1428 | "tooltip": { 1429 | "msResolution": false, 1430 | "shared": true, 1431 | "sort": 0, 1432 | "value_type": "cumulative" 1433 | }, 1434 | "type": "graph", 1435 | "xaxis": { 1436 | "buckets": null, 1437 | "mode": "time", 1438 | "name": null, 1439 | "show": true, 1440 | "values": [] 1441 | }, 1442 | "yaxes": [ 1443 | { 1444 | "format": "bytes", 1445 | "label": null, 1446 | "logBase": 1, 1447 | "max": null, 1448 | "min": null, 1449 | "show": true 1450 | }, 1451 | { 1452 | "format": "short", 1453 | "label": null, 1454 | "logBase": 1, 1455 | "max": null, 1456 | "min": null, 1457 | "show": true 1458 | } 1459 | ], 1460 | "yaxis": { 1461 | "align": false, 1462 | "alignLevel": null 1463 | } 1464 | }, 1465 | { 1466 | "aliasColors": {}, 1467 | "bars": false, 1468 | "dashLength": 10, 1469 | "dashes": false, 1470 | "datasource": "Prometheus", 1471 | "decimals": 2, 1472 | "editable": true, 1473 | "error": false, 1474 | "fill": 0, 1475 | "fillGradient": 0, 1476 | "grid": {}, 1477 | "gridPos": { 1478 | "h": 14, 1479 | "w": 6, 1480 | "x": 6, 1481 | "y": 34 1482 | }, 1483 | "id": 9, 1484 | "isNew": true, 1485 | "legend": { 1486 | "alignAsTable": true, 1487 | "avg": true, 1488 | "current": true, 1489 | "max": false, 1490 | "min": false, 1491 | "rightSide": false, 1492 | "show": true, 1493 | "sort": "current", 1494 | "sortDesc": true, 1495 | "total": false, 1496 | "values": true 1497 | }, 1498 | "lines": true, 1499 | "linewidth": 2, 1500 | "links": [], 1501 | "nullPointMode": "connected", 1502 | "options": { 1503 | "dataLinks": [] 1504 | }, 1505 | "percentage": false, 1506 | "pointradius": 5, 1507 | "points": false, 1508 | "renderer": "flot", 1509 | "seriesOverrides": [], 1510 | "spaceLength": 10, 1511 | "stack": false, 1512 | "steppedLine": false, 1513 | "targets": [ 1514 | { 1515 | "expr": "sort_desc(sum by (name) (rate(container_network_transmit_bytes_total{image!=\"\"}[1m] ) ))", 1516 | "format": "time_series", 1517 | "intervalFactor": 2, 1518 | "legendFormat": "{{ name }}", 1519 | "metric": "container_network_transmit_bytes_total", 1520 | "refId": "B", 1521 | "step": 4 1522 | } 1523 | ], 1524 | "thresholds": [], 1525 | "timeFrom": null, 1526 | "timeRegions": [], 1527 | "timeShift": null, 1528 | "title": "Container Network Output", 1529 | "tooltip": { 1530 | "msResolution": false, 1531 | "shared": true, 1532 | "sort": 0, 1533 | "value_type": "cumulative" 1534 | }, 1535 | "type": "graph", 1536 | "xaxis": { 1537 | "buckets": null, 1538 | "mode": "time", 1539 | "name": null, 1540 | "show": true, 1541 | "values": [] 1542 | }, 1543 | "yaxes": [ 1544 | { 1545 | "format": "bytes", 1546 | "label": null, 1547 | "logBase": 1, 1548 | "max": null, 1549 | "min": null, 1550 | "show": true 1551 | }, 1552 | { 1553 | "format": "short", 1554 | "label": null, 1555 | "logBase": 1, 1556 | "max": null, 1557 | "min": null, 1558 | "show": false 1559 | } 1560 | ], 1561 | "yaxis": { 1562 | "align": false, 1563 | "alignLevel": null 1564 | } 1565 | }, 1566 | { 1567 | "columns": [], 1568 | "datasource": "Prometheus", 1569 | "fontSize": "100%", 1570 | "gridPos": { 1571 | "h": 10, 1572 | "w": 10, 1573 | "x": 12, 1574 | "y": 37 1575 | }, 1576 | "id": 30, 1577 | "links": [], 1578 | "options": {}, 1579 | "pageSize": 10, 1580 | "scroll": true, 1581 | "showHeader": true, 1582 | "sort": { 1583 | "col": 0, 1584 | "desc": true 1585 | }, 1586 | "styles": [ 1587 | { 1588 | "alias": "Time", 1589 | "dateFormat": "YYYY-MM-DD HH:mm:ss", 1590 | "link": false, 1591 | "linkUrl": "", 1592 | "pattern": "Time", 1593 | "type": "date" 1594 | }, 1595 | { 1596 | "alias": "", 1597 | "colorMode": null, 1598 | "colors": [ 1599 | "rgba(245, 54, 54, 0.9)", 1600 | "rgba(237, 129, 40, 0.89)", 1601 | "rgba(50, 172, 45, 0.97)" 1602 | ], 1603 | "decimals": 2, 1604 | "pattern": "/.*/", 1605 | "thresholds": [], 1606 | "type": "number", 1607 | "unit": "short" 1608 | } 1609 | ], 1610 | "targets": [ 1611 | { 1612 | "expr": "cadvisor_version_info", 1613 | "format": "table", 1614 | "instant": false, 1615 | "interval": "15m", 1616 | "intervalFactor": 2, 1617 | "legendFormat": "cAdvisor Version: {{cadvisorVersion}}", 1618 | "refId": "A" 1619 | }, 1620 | { 1621 | "expr": "prometheus_build_info", 1622 | "format": "table", 1623 | "interval": "15m", 1624 | "intervalFactor": 2, 1625 | "legendFormat": "Prometheus Version: {{version}}", 1626 | "refId": "B" 1627 | }, 1628 | { 1629 | "expr": "node_exporter_build_info", 1630 | "format": "table", 1631 | "interval": "15m", 1632 | "intervalFactor": 2, 1633 | "legendFormat": "Node-Exporter Version: {{version}}", 1634 | "refId": "C" 1635 | } 1636 | ], 1637 | "title": "Running Versions", 1638 | "transform": "table", 1639 | "type": "table" 1640 | } 1641 | ], 1642 | "refresh": "10s", 1643 | "schemaVersion": 20, 1644 | "style": "dark", 1645 | "tags": [ 1646 | "docker", 1647 | "prometheus, ", 1648 | "node-exporter", 1649 | "cadvisor" 1650 | ], 1651 | "templating": { 1652 | "list": [ 1653 | { 1654 | "auto": false, 1655 | "auto_count": 30, 1656 | "auto_min": "10s", 1657 | "current": { 1658 | "text": "1m", 1659 | "value": "1m" 1660 | }, 1661 | "hide": 0, 1662 | "label": "interval", 1663 | "name": "interval", 1664 | "options": [ 1665 | { 1666 | "selected": true, 1667 | "text": "1m", 1668 | "value": "1m" 1669 | }, 1670 | { 1671 | "selected": false, 1672 | "text": "10m", 1673 | "value": "10m" 1674 | }, 1675 | { 1676 | "selected": false, 1677 | "text": "30m", 1678 | "value": "30m" 1679 | }, 1680 | { 1681 | "selected": false, 1682 | "text": "1h", 1683 | "value": "1h" 1684 | }, 1685 | { 1686 | "selected": false, 1687 | "text": "6h", 1688 | "value": "6h" 1689 | }, 1690 | { 1691 | "selected": false, 1692 | "text": "12h", 1693 | "value": "12h" 1694 | }, 1695 | { 1696 | "selected": false, 1697 | "text": "1d", 1698 | "value": "1d" 1699 | }, 1700 | { 1701 | "selected": false, 1702 | "text": "7d", 1703 | "value": "7d" 1704 | }, 1705 | { 1706 | "selected": false, 1707 | "text": "14d", 1708 | "value": "14d" 1709 | }, 1710 | { 1711 | "selected": false, 1712 | "text": "30d", 1713 | "value": "30d" 1714 | } 1715 | ], 1716 | "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", 1717 | "refresh": 2, 1718 | "skipUrlSync": false, 1719 | "type": "interval" 1720 | }, 1721 | { 1722 | "allValue": null, 1723 | "current": { 1724 | "text": "All", 1725 | "value": "$__all" 1726 | }, 1727 | "datasource": "Prometheus", 1728 | "definition": "label_values(node_exporter_build_info{name=~'$name'},instance)", 1729 | "hide": 0, 1730 | "includeAll": true, 1731 | "label": "IP", 1732 | "multi": true, 1733 | "name": "node", 1734 | "options": [], 1735 | "query": "label_values(node_exporter_build_info{name=~'$name'},instance)", 1736 | "refresh": 2, 1737 | "regex": "", 1738 | "skipUrlSync": false, 1739 | "sort": 1, 1740 | "tagValuesQuery": "", 1741 | "tags": [], 1742 | "tagsQuery": "", 1743 | "type": "query", 1744 | "useTags": false 1745 | }, 1746 | { 1747 | "allValue": null, 1748 | "current": { 1749 | "text": "All", 1750 | "value": "$__all" 1751 | }, 1752 | "datasource": "Prometheus", 1753 | "definition": "label_values(node_exporter_build_info,env)", 1754 | "hide": 0, 1755 | "includeAll": true, 1756 | "label": "Env", 1757 | "multi": true, 1758 | "name": "env", 1759 | "options": [], 1760 | "query": "label_values(node_exporter_build_info,env)", 1761 | "refresh": 2, 1762 | "regex": "", 1763 | "skipUrlSync": false, 1764 | "sort": 0, 1765 | "tagValuesQuery": "", 1766 | "tags": [], 1767 | "tagsQuery": "", 1768 | "type": "query", 1769 | "useTags": false 1770 | }, 1771 | { 1772 | "allValue": null, 1773 | "current": { 1774 | "text": "All", 1775 | "value": "$__all" 1776 | }, 1777 | "datasource": "Prometheus", 1778 | "definition": "label_values(node_exporter_build_info{env=~'$env'},name)", 1779 | "hide": 0, 1780 | "includeAll": true, 1781 | "label": "CPU Name", 1782 | "multi": true, 1783 | "name": "name", 1784 | "options": [], 1785 | "query": "label_values(node_exporter_build_info{env=~'$env'},name)", 1786 | "refresh": 2, 1787 | "regex": "", 1788 | "skipUrlSync": false, 1789 | "sort": 0, 1790 | "tagValuesQuery": "", 1791 | "tags": [], 1792 | "tagsQuery": "", 1793 | "type": "query", 1794 | "useTags": false 1795 | } 1796 | ] 1797 | }, 1798 | "time": { 1799 | "from": "now-5m", 1800 | "to": "now" 1801 | }, 1802 | "timepicker": { 1803 | "refresh_intervals": [ 1804 | "5s", 1805 | "10s", 1806 | "30s", 1807 | "1m", 1808 | "5m", 1809 | "15m", 1810 | "30m", 1811 | "1h", 1812 | "2h", 1813 | "1d" 1814 | ], 1815 | "time_options": [ 1816 | "5m", 1817 | "15m", 1818 | "1h", 1819 | "6h", 1820 | "12h", 1821 | "24h", 1822 | "2d", 1823 | "7d", 1824 | "30d" 1825 | ] 1826 | }, 1827 | "timezone": "browser", 1828 | "title": "Docker Prometheus Monitoring", 1829 | "uid": "64nrElFmk", 1830 | "version": 2 1831 | } -------------------------------------------------------------------------------- /05-grafana-provisioning/master/grafana/provisioning/datasources/datasource.yml: -------------------------------------------------------------------------------- 1 | # config file version 2 | apiVersion: 1 3 | 4 | # list of datasources that should be deleted from the database 5 | deleteDatasources: 6 | - name: Loki 7 | orgId: 1 8 | - name: Prometheus 9 | orgId: 1 10 | 11 | # list of datasources to insert/update depending 12 | # whats available in the database 13 | datasources: 14 | # name of the datasource. Required 15 | - name: Loki 16 | # datasource type. Required 17 | type: loki 18 | # access mode. direct or proxy. Required 19 | access: proxy 20 | # org id. will default to orgId 1 if not specified 21 | orgId: 1 22 | # url 23 | url: http://loki:3100 24 | # database password, if used 25 | password: 26 | # database user, if used 27 | user: 28 | # database name, if used 29 | database: 30 | # enable/disable basic auth 31 | basicAuth: false 32 | # basic auth username, if used 33 | basicAuthUser: 34 | # basic auth password, if used 35 | basicAuthPassword: 36 | # enable/disable with credentials headers 37 | withCredentials: 38 | # mark as default datasource. Max one per org 39 | isDefault: true 40 | # fields that will be converted to json and stored in json_data 41 | jsonData: 42 | graphiteVersion: "1.1" 43 | tlsAuth: false 44 | tlsAuthWithCACert: false 45 | # json object of data that will be encrypted. 46 | secureJsonData: 47 | tlsCACert: "..." 48 | tlsClientCert: "..." 49 | tlsClientKey: "..." 50 | version: 1 51 | # allow users to edit datasources from the UI. 52 | editable: true 53 | 54 | - name: Prometheus 55 | # datasource type. Required 56 | type: prometheus 57 | # access mode. direct or proxy. Required 58 | access: proxy 59 | # org id. will default to orgId 1 if not specified 60 | orgId: 1 61 | # url 62 | url: http://prometheus:9090 63 | # database password, if used 64 | password: 65 | # database user, if used 66 | user: 67 | # database name, if used 68 | database: 69 | # enable/disable basic auth 70 | basicAuth: false 71 | # basic auth username, if used 72 | basicAuthUser: 73 | # basic auth password, if used 74 | basicAuthPassword: 75 | # enable/disable with credentials headers 76 | withCredentials: 77 | # mark as default datasource. Max one per org 78 | isDefault: false 79 | # fields that will be converted to json and stored in json_data 80 | jsonData: 81 | graphiteVersion: "1.1" 82 | tlsAuth: false 83 | tlsAuthWithCACert: false 84 | # json object of data that will be encrypted. 85 | secureJsonData: 86 | tlsCACert: "..." 87 | tlsClientCert: "..." 88 | tlsClientKey: "..." 89 | version: 1 90 | # allow users to edit datasources from the UI. 91 | editable: true -------------------------------------------------------------------------------- /05-grafana-provisioning/master/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 30s # El intervalo en el que se traerá las métricas 3 | evaluation_interval: 30s # Evaluará las reglas cada 30 segundos, por defecto es cada minuto 4 | 5 | scrape_configs: 6 | 7 | - job_name: 'servers-dev' 8 | honor_labels: true 9 | metrics_path: '/federate' 10 | params: 11 | 'match[]': 12 | - '{__name__=~".+"}' 13 | static_configs: 14 | - targets: [ '192.168.56.102:9090'] # DEV 15 | labels: 16 | pais: AR 17 | entorno: DEV 18 | cliente: caosbinario 19 | hostname: dev-caosbinario-ar 20 | 21 | -------------------------------------------------------------------------------- /06-alertas-logs-loki/README.md: -------------------------------------------------------------------------------- 1 | # Alertas logs con loki 2 | 3 | Para ejecutar lo visto en el video deben hacer lo siguiente: 4 | 5 | Levantamos grafana y loki: 6 | ``` 7 | docker-compose up -d 8 | ``` 9 | Por defecto, grafana va a levantarse en el puerto 3000 y genera el usuario y contraseña "admin" "admin" 10 | 11 | Luego vamos al archivo [server.py](server.py) y editamos la linea 39 con la URL del servidor de Loki. 12 | Y levantamos el servicio de python. 13 | Recuerden instalar las dependencias, ahora no recuerdo cuales eran, pero les va a saltar que faltan las librerias y las instalan. 14 | ``` 15 | python server.py 16 | ``` 17 | 18 | Para tirar logs desde la app a loki, tienen que pegarle a la URL del servidor con el parametro "txt" y el valor del log. 19 | Ejemplo: 20 | http://localhost:8080?txt="app-log" 21 | 22 | La aplicación es bastante básica, cuando le llega una petición genera un número random, del 1 al 3, y depende que número sea, escribe el log como INFO, WARNING o ERROR. 23 | -------------------------------------------------------------------------------- /06-alertas-logs-loki/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.3" 2 | 3 | services: 4 | loki: 5 | image: grafana/loki:latest 6 | container_name: master-loki 7 | restart: unless-stopped 8 | ports: 9 | - 3100:3100 10 | command: -config.file=/etc/loki/local-config.yaml 11 | 12 | grafana: 13 | image: grafana/grafana:latest 14 | container_name: master-grafana 15 | restart: unless-stopped 16 | volumes: 17 | - ./grafana/provisioning/:/etc/grafana/provisioning/ 18 | ports: 19 | - 3000:3000 -------------------------------------------------------------------------------- /06-alertas-logs-loki/grafana/provisioning/dashboards/dashboard.yml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | 3 | providers: 4 | - name: 'Prometheus' 5 | orgId: 1 6 | folder: '' 7 | type: file 8 | disableDeletion: false 9 | editable: true 10 | options: 11 | path: /etc/grafana/provisioning/dashboards -------------------------------------------------------------------------------- /06-alertas-logs-loki/grafana/provisioning/dashboards/python-dashboad.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "grafana", 8 | "uid": "-- Grafana --" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "target": { 15 | "limit": 100, 16 | "matchAny": false, 17 | "tags": [], 18 | "type": "dashboard" 19 | }, 20 | "type": "dashboard" 21 | } 22 | ] 23 | }, 24 | "editable": true, 25 | "fiscalYearStartMonth": 0, 26 | "graphTooltip": 0, 27 | "id": 1, 28 | "links": [], 29 | "liveNow": false, 30 | "panels": [ 31 | { 32 | "datasource": { 33 | "type": "loki", 34 | "uid": "P8E80F9AEF21F6940" 35 | }, 36 | "gridPos": { 37 | "h": 30, 38 | "w": 11, 39 | "x": 0, 40 | "y": 0 41 | }, 42 | "id": 2, 43 | "options": { 44 | "dedupStrategy": "none", 45 | "enableLogDetails": true, 46 | "prettifyLogMessage": false, 47 | "showCommonLabels": false, 48 | "showLabels": false, 49 | "showTime": false, 50 | "sortOrder": "Descending", 51 | "wrapLogMessage": false 52 | }, 53 | "targets": [ 54 | { 55 | "datasource": { 56 | "type": "loki", 57 | "uid": "P8E80F9AEF21F6940" 58 | }, 59 | "expr": "{source=\"PythonApp\"}", 60 | "refId": "A" 61 | } 62 | ], 63 | "title": "Logs", 64 | "type": "logs" 65 | }, 66 | { 67 | "datasource": { 68 | "type": "loki", 69 | "uid": "P8E80F9AEF21F6940" 70 | }, 71 | "description": "", 72 | "fieldConfig": { 73 | "defaults": { 74 | "color": { 75 | "mode": "thresholds" 76 | }, 77 | "mappings": [], 78 | "thresholds": { 79 | "mode": "absolute", 80 | "steps": [ 81 | { 82 | "color": "red", 83 | "value": null 84 | } 85 | ] 86 | } 87 | }, 88 | "overrides": [] 89 | }, 90 | "gridPos": { 91 | "h": 10, 92 | "w": 4, 93 | "x": 11, 94 | "y": 0 95 | }, 96 | "id": 3, 97 | "options": { 98 | "colorMode": "value", 99 | "graphMode": "none", 100 | "justifyMode": "auto", 101 | "orientation": "auto", 102 | "reduceOptions": { 103 | "calcs": [ 104 | "lastNotNull" 105 | ], 106 | "fields": "", 107 | "values": false 108 | }, 109 | "textMode": "auto" 110 | }, 111 | "pluginVersion": "8.5.0", 112 | "targets": [ 113 | { 114 | "datasource": { 115 | "type": "loki", 116 | "uid": "tvqsxKw7z" 117 | }, 118 | "expr": "sum(\n count_over_time(\n {source=\"PythonApp\"} \n | logfmt \n | code = 202\n [1h]\n )\n)", 119 | "refId": "A" 120 | } 121 | ], 122 | "title": "ERRORS", 123 | "type": "stat" 124 | }, 125 | { 126 | "datasource": { 127 | "type": "loki", 128 | "uid": "P8E80F9AEF21F6940" 129 | }, 130 | "fieldConfig": { 131 | "defaults": { 132 | "color": { 133 | "mode": "thresholds" 134 | }, 135 | "mappings": [], 136 | "thresholds": { 137 | "mode": "absolute", 138 | "steps": [ 139 | { 140 | "color": "yellow", 141 | "value": null 142 | } 143 | ] 144 | } 145 | }, 146 | "overrides": [] 147 | }, 148 | "gridPos": { 149 | "h": 10, 150 | "w": 4, 151 | "x": 15, 152 | "y": 0 153 | }, 154 | "id": 6, 155 | "options": { 156 | "colorMode": "value", 157 | "graphMode": "none", 158 | "justifyMode": "auto", 159 | "orientation": "auto", 160 | "reduceOptions": { 161 | "calcs": [ 162 | "lastNotNull" 163 | ], 164 | "fields": "", 165 | "values": false 166 | }, 167 | "textMode": "auto" 168 | }, 169 | "pluginVersion": "8.5.0", 170 | "targets": [ 171 | { 172 | "datasource": { 173 | "type": "loki", 174 | "uid": "tvqsxKw7z" 175 | }, 176 | "expr": "sum(\n count_over_time(\n {source=\"PythonApp\"} \n | logfmt \n | code = 201 \n [1h]\n )\n)", 177 | "refId": "A" 178 | } 179 | ], 180 | "title": "WARNING", 181 | "type": "stat" 182 | }, 183 | { 184 | "datasource": {}, 185 | "fieldConfig": { 186 | "defaults": { 187 | "color": { 188 | "mode": "thresholds" 189 | }, 190 | "mappings": [], 191 | "thresholds": { 192 | "mode": "absolute", 193 | "steps": [ 194 | { 195 | "color": "green", 196 | "value": null 197 | } 198 | ] 199 | } 200 | }, 201 | "overrides": [] 202 | }, 203 | "gridPos": { 204 | "h": 10, 205 | "w": 4, 206 | "x": 19, 207 | "y": 0 208 | }, 209 | "id": 4, 210 | "options": { 211 | "colorMode": "value", 212 | "graphMode": "none", 213 | "justifyMode": "auto", 214 | "orientation": "auto", 215 | "reduceOptions": { 216 | "calcs": [ 217 | "lastNotNull" 218 | ], 219 | "fields": "", 220 | "values": false 221 | }, 222 | "textMode": "auto" 223 | }, 224 | "pluginVersion": "8.5.0", 225 | "targets": [ 226 | { 227 | "datasource": { 228 | "type": "loki", 229 | "uid": "tvqsxKw7z" 230 | }, 231 | "expr": "sum(\n count_over_time(\n {source=\"PythonApp\"} \n | logfmt \n | code = 200 \n [1h]\n )\n)", 232 | "refId": "A" 233 | } 234 | ], 235 | "title": "INFO", 236 | "type": "stat" 237 | }, 238 | { 239 | "datasource": { 240 | "type": "loki", 241 | "uid": "P8E80F9AEF21F6940" 242 | }, 243 | "gridPos": { 244 | "h": 9, 245 | "w": 12, 246 | "x": 11, 247 | "y": 10 248 | }, 249 | "id": 5, 250 | "options": { 251 | "dedupStrategy": "none", 252 | "enableLogDetails": true, 253 | "prettifyLogMessage": false, 254 | "showCommonLabels": false, 255 | "showLabels": false, 256 | "showTime": false, 257 | "sortOrder": "Descending", 258 | "wrapLogMessage": false 259 | }, 260 | "pluginVersion": "8.5.0", 261 | "targets": [ 262 | { 263 | "datasource": { 264 | "type": "loki", 265 | "uid": "tvqsxKw7z" 266 | }, 267 | "expr": "{source=\"PythonApp\"}\n| logfmt \n| code = 202", 268 | "refId": "A" 269 | } 270 | ], 271 | "title": "ERRORS", 272 | "type": "logs" 273 | }, 274 | { 275 | "datasource": { 276 | "type": "loki", 277 | "uid": "P8E80F9AEF21F6940" 278 | }, 279 | "fieldConfig": { 280 | "defaults": { 281 | "color": { 282 | "mode": "thresholds" 283 | }, 284 | "mappings": [], 285 | "thresholds": { 286 | "mode": "absolute", 287 | "steps": [ 288 | { 289 | "color": "blue", 290 | "value": null 291 | } 292 | ] 293 | } 294 | }, 295 | "overrides": [] 296 | }, 297 | "gridPos": { 298 | "h": 11, 299 | "w": 4, 300 | "x": 11, 301 | "y": 19 302 | }, 303 | "id": 7, 304 | "options": { 305 | "colorMode": "value", 306 | "graphMode": "none", 307 | "justifyMode": "auto", 308 | "orientation": "auto", 309 | "reduceOptions": { 310 | "calcs": [ 311 | "lastNotNull" 312 | ], 313 | "fields": "", 314 | "values": false 315 | }, 316 | "textMode": "auto" 317 | }, 318 | "pluginVersion": "8.5.0", 319 | "targets": [ 320 | { 321 | "datasource": { 322 | "type": "loki", 323 | "uid": "tvqsxKw7z" 324 | }, 325 | "expr": "sum(\n count_over_time(\n {source=\"PythonApp\"} \n | logfmt \n | userId = 1\n [1h]\n )\n)", 326 | "refId": "A" 327 | } 328 | ], 329 | "title": "USER 1", 330 | "type": "stat" 331 | }, 332 | { 333 | "datasource": { 334 | "type": "loki", 335 | "uid": "P8E80F9AEF21F6940" 336 | }, 337 | "fieldConfig": { 338 | "defaults": { 339 | "color": { 340 | "mode": "thresholds" 341 | }, 342 | "mappings": [], 343 | "thresholds": { 344 | "mode": "absolute", 345 | "steps": [ 346 | { 347 | "color": "blue", 348 | "value": null 349 | } 350 | ] 351 | } 352 | }, 353 | "overrides": [] 354 | }, 355 | "gridPos": { 356 | "h": 11, 357 | "w": 4, 358 | "x": 15, 359 | "y": 19 360 | }, 361 | "id": 8, 362 | "options": { 363 | "colorMode": "value", 364 | "graphMode": "none", 365 | "justifyMode": "auto", 366 | "orientation": "auto", 367 | "reduceOptions": { 368 | "calcs": [ 369 | "lastNotNull" 370 | ], 371 | "fields": "", 372 | "values": false 373 | }, 374 | "textMode": "auto" 375 | }, 376 | "pluginVersion": "8.5.0", 377 | "targets": [ 378 | { 379 | "datasource": { 380 | "type": "loki", 381 | "uid": "tvqsxKw7z" 382 | }, 383 | "expr": "sum(\n count_over_time(\n {source=\"PythonApp\"} \n | logfmt \n | userId = 2\n [1h]\n )\n)", 384 | "refId": "A" 385 | } 386 | ], 387 | "title": "USER 2", 388 | "type": "stat" 389 | }, 390 | { 391 | "datasource": { 392 | "type": "loki", 393 | "uid": "P8E80F9AEF21F6940" 394 | }, 395 | "fieldConfig": { 396 | "defaults": { 397 | "color": { 398 | "mode": "thresholds" 399 | }, 400 | "mappings": [], 401 | "thresholds": { 402 | "mode": "absolute", 403 | "steps": [ 404 | { 405 | "color": "blue", 406 | "value": null 407 | } 408 | ] 409 | } 410 | }, 411 | "overrides": [] 412 | }, 413 | "gridPos": { 414 | "h": 11, 415 | "w": 4, 416 | "x": 19, 417 | "y": 19 418 | }, 419 | "id": 9, 420 | "options": { 421 | "colorMode": "value", 422 | "graphMode": "none", 423 | "justifyMode": "auto", 424 | "orientation": "auto", 425 | "reduceOptions": { 426 | "calcs": [ 427 | "lastNotNull" 428 | ], 429 | "fields": "", 430 | "values": false 431 | }, 432 | "textMode": "auto" 433 | }, 434 | "pluginVersion": "8.5.0", 435 | "targets": [ 436 | { 437 | "datasource": { 438 | "type": "loki", 439 | "uid": "tvqsxKw7z" 440 | }, 441 | "expr": "sum(\n count_over_time(\n {source=\"PythonApp\"} \n | logfmt \n | userId = 3\n [1h]\n )\n)", 442 | "refId": "A" 443 | } 444 | ], 445 | "title": "USER 3", 446 | "type": "stat" 447 | } 448 | ], 449 | "refresh": "5s", 450 | "schemaVersion": 36, 451 | "style": "dark", 452 | "tags": [], 453 | "templating": { 454 | "list": [] 455 | }, 456 | "time": { 457 | "from": "now-15m", 458 | "to": "now" 459 | }, 460 | "timepicker": {}, 461 | "timezone": "", 462 | "title": "Python Dashboard", 463 | "uid": "32Q0icw7z", 464 | "version": 1, 465 | "weekStart": "" 466 | } -------------------------------------------------------------------------------- /06-alertas-logs-loki/grafana/provisioning/datasources/datasource.yml: -------------------------------------------------------------------------------- 1 | # config file version 2 | apiVersion: 1 3 | 4 | # list of datasources that should be deleted from the database 5 | deleteDatasources: 6 | - name: Loki 7 | orgId: 1 8 | 9 | # list of datasources to insert/update depending 10 | # whats available in the database 11 | datasources: 12 | # name of the datasource. Required 13 | - name: Loki 14 | # datasource type. Required 15 | type: loki 16 | # access mode. direct or proxy. Required 17 | access: proxy 18 | # org id. will default to orgId 1 if not specified 19 | orgId: 1 20 | # url 21 | url: http://loki:3100 22 | # database password, if used 23 | password: 24 | # database user, if used 25 | user: 26 | # database name, if used 27 | database: 28 | # enable/disable basic auth 29 | basicAuth: false 30 | # basic auth username, if used 31 | basicAuthUser: 32 | # basic auth password, if used 33 | basicAuthPassword: 34 | # enable/disable with credentials headers 35 | withCredentials: 36 | # mark as default datasource. Max one per org 37 | isDefault: true 38 | # fields that will be converted to json and stored in json_data 39 | jsonData: 40 | graphiteVersion: "1.1" 41 | tlsAuth: false 42 | tlsAuthWithCACert: false 43 | # json object of data that will be encrypted. 44 | secureJsonData: 45 | tlsCACert: "..." 46 | tlsClientCert: "..." 47 | tlsClientKey: "..." 48 | version: 1 49 | # allow users to edit datasources from the UI. 50 | editable: true 51 | -------------------------------------------------------------------------------- /06-alertas-logs-loki/server.py: -------------------------------------------------------------------------------- 1 | # Python 3 server example 2 | import os 3 | from http.server import BaseHTTPRequestHandler, HTTPServer 4 | from urllib.parse import urlparse, parse_qs 5 | import time 6 | import requests 7 | import json 8 | import datetime 9 | import pytz 10 | import random 11 | 12 | def info(): 13 | return 'INFO' 14 | 15 | def warning(): 16 | return 'WARNING' 17 | 18 | def error(): 19 | return 'ERROR' 20 | 21 | logType = { 22 | 200: info, 23 | 201: warning, 24 | 202: error 25 | } 26 | 27 | def pushToLoki(msg): 28 | host = 'ariel-pc' 29 | curr_datetime = datetime.datetime.now(pytz.timezone('Asia/Yekaterinburg')) 30 | curr_datetime = curr_datetime.isoformat('T') 31 | 32 | code = random.randint(200, 202) 33 | userid = random.randint(1, 3) 34 | 35 | logTypeTxt = logType.get(code, error)() 36 | logToPush = '[{logTypeTxt}] code={code} userId={userId} log={msg}'.format(logTypeTxt=logTypeTxt,code=code,userId=userid,msg=msg) 37 | 38 | # push msg log into grafana-loki 39 | url = 'http://:3100/api/prom/push' 40 | headers = { 41 | 'Content-type': 'application/json' 42 | } 43 | payload = { 44 | 'streams': [ 45 | { 46 | 'labels': '{source=\"PythonApp\",job=\"Cliente-01\", host=\"' + host + '\"}', 47 | 'entries': [ 48 | { 49 | 'ts': curr_datetime, 50 | 'line': logToPush 51 | } 52 | ] 53 | } 54 | ] 55 | } 56 | payload = json.dumps(payload) 57 | answer = requests.post(url, data=payload, headers=headers) 58 | print(answer) 59 | response = answer 60 | print(response) 61 | 62 | hostName = "localhost" 63 | serverPort = 8080 64 | 65 | class MyServer(BaseHTTPRequestHandler): 66 | def do_GET(self): 67 | query_string = parse_qs(urlparse(self.path).query) 68 | print("query_string " , query_string) 69 | try: 70 | log = query_string["txt"][0] 71 | print("log= ", log) 72 | pushToLoki(log) 73 | except: 74 | pass 75 | self.send_response(200) 76 | self.send_header("Content-type", "text/html") 77 | self.end_headers() 78 | self.wfile.write(bytes("https://pythonbasics.org", "utf-8")) 79 | self.wfile.write(bytes("

Request: %s

" % self.path, "utf-8")) 80 | self.wfile.write(bytes("", "utf-8")) 81 | self.wfile.write(bytes("

This is an example web server.

", "utf-8")) 82 | self.wfile.write(bytes("", "utf-8")) 83 | 84 | if __name__ == "__main__": 85 | webServer = HTTPServer((hostName, serverPort), MyServer) 86 | print("Server started http://%s:%s" % (hostName, serverPort)) 87 | 88 | try: 89 | webServer.serve_forever() 90 | except KeyboardInterrupt: 91 | pass 92 | 93 | webServer.server_close() 94 | print("Server stopped.") -------------------------------------------------------------------------------- /08-blackbox-exporter/blackbox.yml: -------------------------------------------------------------------------------- 1 | modules: 2 | http_2xx: 3 | prober: http 4 | http: 5 | preferred_ip_protocol: "ip4" -------------------------------------------------------------------------------- /08-blackbox-exporter/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | prometheus: 5 | image: prom/prometheus:latest 6 | privileged: true 7 | volumes: 8 | - ./prometheus.yml:/etc/prometheus/prometheus.yml 9 | ports: 10 | - '9090:9090' 11 | 12 | blackbox: 13 | image: prom/blackbox-exporter:latest 14 | privileged: true 15 | volumes: 16 | - ./blackbox.yml:/etc/blackbox_exporter/config.yml 17 | ports: 18 | - '9115:9115' 19 | 20 | grafana: 21 | image: grafana/grafana:latest 22 | environment: 23 | - GF_SECURITY_ADMIN_PASSWORD=pass 24 | depends_on: 25 | - prometheus 26 | ports: 27 | - "3000:3000" -------------------------------------------------------------------------------- /08-blackbox-exporter/prometheus.yml: -------------------------------------------------------------------------------- 1 | # my global config 2 | global: 3 | scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. 4 | evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. 5 | # scrape_timeout is set to the global default (10s). 6 | 7 | # A scrape configuration containing exactly one endpoint to scrape: 8 | # Here it's Prometheus itself. 9 | scrape_configs: 10 | 11 | - job_name: 'blackbox' 12 | # scheme: http 13 | metrics_path: /probe 14 | params: 15 | module: [http_2xx] # Look for a HTTP 200 response. 16 | static_configs: 17 | - targets: 18 | - http://prometheus.io # Target to probe with http. 19 | - https://prometheus.io # Target to probe with https. 20 | - http://0.0.0.0:8080 # fake local web service. 21 | - http://caosbinario.com 22 | relabel_configs: 23 | - source_labels: [__address__] 24 | target_label: __param_target 25 | - source_labels: [__param_target] 26 | target_label: instance 27 | - target_label: __address__ 28 | replacement: blackbox:9115 # The blackbox exporter's real hostname:port. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Observability 2 | Código relacionado a los videos de Observabilidad: https://www.youtube.com/playlist?list=PLC-jxfv-8E7L-w6bdX61qa4ehrrgCIh4R 3 | --------------------------------------------------------------------------------