├── .gitignore ├── README.md ├── ansible.cfg ├── group_vars └── monitor.yml ├── main.yml ├── prometheus_config.yml ├── roles ├── alert_manager │ ├── files │ │ └── alertmanager.service │ ├── tasks │ │ └── main.yml │ └── templates │ │ └── alertmanager.yml.j2 ├── grafana │ ├── files │ │ └── grafana.nginx.conf │ └── tasks │ │ └── main.yml ├── loki │ ├── files │ │ ├── loki.service │ │ └── loki.yml │ └── tasks │ │ └── main.yml ├── nginx │ └── tasks │ │ └── main.yml ├── node_exporter │ ├── files │ │ └── node_exporter.service │ └── tasks │ │ └── main.yml ├── prepare │ └── tasks │ │ └── main.yml ├── prometheus │ ├── files │ │ ├── prometheus.nginx.conf │ │ ├── prometheus.service │ │ ├── prometheus.yml │ │ └── rules.yml │ └── tasks │ │ ├── install_prometheus.yml │ │ ├── main.yml │ │ └── secure_nginx.yml ├── prometheus_config │ └── tasks │ │ └── main.yml └── promtail │ ├── files │ ├── promtail.service │ └── promtail.yml │ └── tasks │ └── main.yml └── samples ├── hosts.sample ├── inventory.sample └── prometheus.yml.sample /.gitignore: -------------------------------------------------------------------------------- 1 | inventory.ini 2 | /roles/prometheus_config/files/hosts 3 | /roles/prometheus_config/files/prometheus.yml 4 | /roles/prometheus_config/templates/prometheus.yml.j2 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Prometheus, Grafana and Alert Manager Setup 2 | 3 | This repo is to set up Prometheus, Grafana and Alert Manager for a group of networked servers. 4 | 5 | ## Step 1: Set up your own inventory file 6 | 7 | Copy inventory file, and make your edits. 8 | 9 | ```bash 10 | cp samples/inventory.sample inventory 11 | ``` 12 | 13 | ## Step 2: Customize several key files 14 | 15 | Copy two configuration files, and make edits to reflect how your network is setup and what prometheus jobs you want to run. 16 | 17 | ```bash 18 | cp samples/hosts.sample roles/prometheus_config/files/hosts 19 | cp samples/prometheus.yml.sample roles/prometheus_config/templates/prometheus.yml.j2 20 | ``` 21 | 22 | The config_hosts.yml file is how we set up internal DNS lookup by editing /etc/hosts file. This will make the Grafana dashboard easier to read as each server has its human-readable name rather than an IP address like "10.0.0.1" 23 | 24 | # Step 3: Run main playbook to set up a fresh monitor 25 | 26 | The main monitor ansible file is main.yml, which sets up a new fresh monitor from scratch. It will set up firewall, install Prometheus, Grafana and Alert Manager. 27 | 28 | ```bash 29 | ansible-playbook -i inventory main.yml 30 | ``` 31 | 32 | That's it! 33 | -------------------------------------------------------------------------------- /ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | inventory=inventory.ini -------------------------------------------------------------------------------- /group_vars/monitor.yml: -------------------------------------------------------------------------------- 1 | --- 2 | prometheus_version: 2.43.0 3 | grafana_version: 9.4.7 4 | alert_manager_version: 0.25.0 5 | loki_version: 2.8.0 6 | promtail_version: 2.8.0 7 | node_exporter_version: 1.5.0 8 | -------------------------------------------------------------------------------- /main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Set up a monitor 3 | hosts: monitor 4 | become: true 5 | gather_facts: false 6 | roles: 7 | - prepare 8 | - nginx 9 | - node_exporter 10 | - prometheus 11 | - grafana 12 | - alert_manager 13 | - loki 14 | - promtail 15 | -------------------------------------------------------------------------------- /prometheus_config.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Update prometheus config 3 | hosts: monitor 4 | become: true 5 | gather_facts: false 6 | roles: 7 | - prometheus_config 8 | -------------------------------------------------------------------------------- /roles/alert_manager/files/alertmanager.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=AlertManager Server Service 3 | Wants=network-online.target 4 | After=network-online.target 5 | 6 | [Service] 7 | User=root 8 | Group=root 9 | Type=simple 10 | ExecStart=/usr/local/bin/alertmanager --config.file /etc/alertmanager/alertmanager.yml --web.external-url=http://localhost:9093 --cluster.advertise-address='0.0.0.0:9093' 11 | 12 | [Install] 13 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /roles/alert_manager/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: download Alert Manager 3 | get_url: 4 | url: 'https://github.com/prometheus/alertmanager/releases/download/v{{ alert_manager_version }}/alertmanager-{{ alert_manager_version }}.linux-amd64.tar.gz' 5 | dest: '/tmp/alertmanager-{{ alert_manager_version }}.linux-amd64.tar.gz' 6 | mode: '0700' 7 | 8 | - name: unarchive Alert Manager 9 | unarchive: 10 | src: '/tmp/alertmanager-{{ alert_manager_version }}.linux-amd64.tar.gz' 11 | remote_src: yes 12 | dest: '/tmp' 13 | 14 | - name: Copy binary files 15 | copy: 16 | src: '/tmp/alertmanager-{{ alert_manager_version }}.linux-amd64/alertmanager' 17 | dest: '/usr/local/bin/' 18 | remote_src: true 19 | owner: prometheus 20 | group: prometheus 21 | mode: 755 22 | 23 | - name: Creates alert manager directory 24 | file: 25 | path: '/etc/alertmanager' 26 | state: directory 27 | owner: prometheus 28 | group: prometheus 29 | recurse: true 30 | mode: 0755 31 | changed_when: false 32 | 33 | - name: Copy Alert Manager config 34 | template: 35 | src: 'alertmanager.yml.j2' 36 | dest: '/etc/alertmanager/alertmanager.yml' 37 | owner: prometheus 38 | group: prometheus 39 | 40 | - name: Copy Alert Manager service file 41 | copy: 42 | src: 'alertmanager.service' 43 | dest: '/etc/systemd/system/alertmanager.service' 44 | owner: root 45 | group: root 46 | mode: 600 47 | 48 | - name: start Alert Manager service 49 | systemd: 50 | name: alertmanager 51 | state: restarted 52 | daemon_reload: true 53 | enabled: true 54 | changed_when: false 55 | 56 | - name: Install Grafana plugin 57 | shell: 'sudo grafana-cli plugins install camptocamp-prometheus-alertmanager-datasource' 58 | changed_when: false 59 | 60 | - name: Restart Grafana 61 | systemd: 62 | name: grafana-server 63 | state: restarted 64 | changed_when: false 65 | -------------------------------------------------------------------------------- /roles/alert_manager/templates/alertmanager.yml.j2: -------------------------------------------------------------------------------- 1 | global: 2 | resolve_timeout: 1m 3 | 4 | route: 5 | receiver: "gmail-notifications" 6 | 7 | receivers: 8 | - name: "gmail-notifications" 9 | email_configs: 10 | - to: {{ receiving_email }} 11 | from: {{ notifications_email }} 12 | smarthost: smtp.gmail.com:587 13 | auth_username: {{ notifications_email }} 14 | auth_identity: {{ notifications_email }} 15 | auth_password: {{ notifications_email_password }} 16 | send_resolved: true 17 | -------------------------------------------------------------------------------- /roles/grafana/files/grafana.nginx.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80 default_server; 3 | server_name monitor.polkachu.com; 4 | 5 | root /usr/share/nginx/html; 6 | index index.html index.htm; 7 | 8 | location / { 9 | proxy_pass http://localhost:3000/; 10 | proxy_set_header Host $http_host; 11 | } 12 | 13 | location /loki/ { 14 | proxy_pass http://localhost:3100/; 15 | auth_basic "Prometheus"; 16 | auth_basic_user_file ".loki"; 17 | } 18 | 19 | location /metrics { 20 | deny all; 21 | } 22 | } -------------------------------------------------------------------------------- /roles/grafana/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Install adduser and libfontconfig1 3 | apt: 4 | name: 5 | - adduser 6 | - libfontconfig1 7 | state: latest 8 | 9 | - name: download Grafana 10 | get_url: 11 | url: 'https://dl.grafana.com/oss/release/grafana_{{grafana_version}}_amd64.deb' 12 | dest: '/tmp/grafana_{{grafana_version}}_amd64.deb' 13 | 14 | - name: install Grafana 15 | apt: 16 | deb: '/tmp/grafana_{{grafana_version}}_amd64.deb' 17 | 18 | - name: start grafana service 19 | systemd: 20 | name: grafana-server 21 | state: restarted 22 | daemon_reload: true 23 | enabled: true 24 | changed_when: false 25 | 26 | - name: Disable the default nginx site 27 | file: 28 | path: '/etc/nginx/sites-enabled/default' 29 | state: absent 30 | 31 | - name: create nginx config for grafana 32 | copy: 33 | src: grafana.nginx.conf 34 | dest: /etc/nginx/sites-enabled/grafana.conf 35 | 36 | - name: restart nginx service 37 | systemd: 38 | name: nginx 39 | state: restarted 40 | daemon_reload: yes 41 | enabled: yes 42 | changed_when: false 43 | -------------------------------------------------------------------------------- /roles/loki/files/loki.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Loki service 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | User=loki 8 | ExecStart=/usr/local/bin/loki -config.file /etc/loki/loki.yml 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /roles/loki/files/loki.yml: -------------------------------------------------------------------------------- 1 | auth_enabled: false 2 | 3 | server: 4 | http_listen_port: 3100 5 | grpc_listen_port: 9096 6 | 7 | ingester: 8 | wal: 9 | enabled: true 10 | dir: /tmp/wal 11 | lifecycler: 12 | address: 127.0.0.1 13 | ring: 14 | kvstore: 15 | store: inmemory 16 | replication_factor: 1 17 | final_sleep: 0s 18 | chunk_idle_period: 1h # Any chunk not receiving new logs in this time will be flushed 19 | max_chunk_age: 1h # All chunks will be flushed when they hit this age, default is 1h 20 | chunk_target_size: 1048576 # Loki will attempt to build chunks up to 1.5MB, flushing first if chunk_idle_period or max_chunk_age is reached first 21 | chunk_retain_period: 30s # Must be greater than index read cache TTL if using an index cache (Default index read cache TTL is 5m) 22 | max_transfer_retries: 0 # Chunk transfers disabled 23 | 24 | schema_config: 25 | configs: 26 | - from: 2020-10-24 27 | store: boltdb-shipper 28 | object_store: filesystem 29 | schema: v11 30 | index: 31 | prefix: index_ 32 | period: 24h 33 | 34 | storage_config: 35 | boltdb_shipper: 36 | active_index_directory: /tmp/loki/boltdb-shipper-active 37 | cache_location: /tmp/loki/boltdb-shipper-cache 38 | cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space 39 | shared_store: filesystem 40 | filesystem: 41 | directory: /tmp/loki/chunks 42 | 43 | compactor: 44 | working_directory: /tmp/loki/boltdb-shipper-compactor 45 | shared_store: filesystem 46 | 47 | limits_config: 48 | reject_old_samples: true 49 | reject_old_samples_max_age: 168h 50 | 51 | chunk_store_config: 52 | max_look_back_period: 0s 53 | 54 | table_manager: 55 | retention_deletes_enabled: false 56 | retention_period: 0s 57 | 58 | ruler: 59 | storage: 60 | type: local 61 | local: 62 | directory: /tmp/loki/rules 63 | rule_path: /tmp/loki/rules-temp 64 | alertmanager_url: http://localhost:9093 65 | ring: 66 | kvstore: 67 | store: inmemory 68 | enable_api: true 69 | -------------------------------------------------------------------------------- /roles/loki/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Create Loki user 3 | user: 4 | name: loki 5 | comment: 'Loki user' 6 | shell: /usr/sbin/nologin 7 | state: present 8 | 9 | - name: generate .loki file 10 | shell: | 11 | set -o pipefail 12 | htpasswd -bc /etc/nginx/.loki {{ loki_user }} {{ loki_password }} 13 | chmod 600 /etc/nginx/.loki 14 | chown www-data:www-data /etc/nginx/.loki 15 | args: 16 | executable: /bin/bash 17 | changed_when: false 18 | 19 | - name: Creates Loki directory 20 | file: 21 | path: '/etc/loki' 22 | state: directory 23 | owner: loki 24 | group: loki 25 | recurse: true 26 | mode: 0755 27 | changed_when: false 28 | 29 | - name: install unzip 30 | apt: 31 | name: unzip 32 | state: present 33 | 34 | - name: download Loki 35 | get_url: 36 | url: 'https://github.com/grafana/loki/releases/download/v{{ loki_version }}/loki-linux-amd64.zip' 37 | dest: '/tmp/loki-linux-amd64.zip' 38 | mode: '0700' 39 | 40 | - name: unarchive Loki 41 | unarchive: 42 | src: '/tmp/loki-linux-amd64.zip' 43 | remote_src: yes 44 | dest: '/tmp' 45 | 46 | - name: Copy binary file 47 | copy: 48 | src: '/tmp/loki-linux-amd64' 49 | dest: '/usr/local/bin/loki' 50 | remote_src: true 51 | owner: loki 52 | group: loki 53 | mode: 755 54 | 55 | - name: Copy loki config 56 | copy: 57 | src: 'loki.yml' 58 | dest: '/etc/loki/loki.yml' 59 | owner: loki 60 | group: loki 61 | 62 | - name: Copy loki service file 63 | copy: 64 | src: 'loki.service' 65 | dest: '/etc/systemd/system/loki.service' 66 | owner: root 67 | group: root 68 | mode: 600 69 | 70 | - name: start loki service 71 | systemd: 72 | name: loki 73 | state: restarted 74 | daemon_reload: true 75 | enabled: true 76 | changed_when: false 77 | -------------------------------------------------------------------------------- /roles/nginx/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Install Nginx 3 | apt: 4 | name: nginx 5 | state: latest 6 | 7 | - name: Start Nginx 8 | service: 9 | name: nginx 10 | state: started 11 | enabled: true 12 | -------------------------------------------------------------------------------- /roles/node_exporter/files/node_exporter.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Node Exporter 3 | 4 | [Service] 5 | User=root 6 | Group=root 7 | ExecStart=/usr/local/bin/node_exporter 8 | 9 | Restart=always 10 | 11 | [Install] 12 | WantedBy=multi-user.target 13 | -------------------------------------------------------------------------------- /roles/node_exporter/tasks/main.yml: -------------------------------------------------------------------------------- 1 | - name: download node_exporter 2 | get_url: 3 | url: 'https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_version }}/node_exporter-{{ node_exporter_version }}.linux-amd64.tar.gz' 4 | dest: '/tmp/node_exporter-{{ node_exporter_version }}.linux-amd64.tar.gz' 5 | mode: '0700' 6 | 7 | - name: unarchive node_exporter 8 | unarchive: 9 | src: '/tmp/node_exporter-{{ node_exporter_version }}.linux-amd64.tar.gz' 10 | remote_src: yes 11 | dest: '/tmp' 12 | 13 | - name: copy node_exporter binary 14 | copy: 15 | src: '/tmp/node_exporter-{{ node_exporter_version }}.linux-amd64/node_exporter' 16 | dest: '/usr/local/bin/' 17 | remote_src: true 18 | owner: root 19 | group: root 20 | mode: 755 21 | 22 | - name: create node_exporter systemd unit 23 | copy: 24 | src: node_exporter.service 25 | dest: /etc/systemd/system/node_exporter.service 26 | owner: root 27 | group: root 28 | mode: '600' 29 | 30 | - name: start node_exporter service 31 | systemd: 32 | name: node_exporter 33 | state: restarted 34 | daemon_reload: yes 35 | enabled: yes 36 | changed_when: false 37 | -------------------------------------------------------------------------------- /roles/prepare/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: install packages 3 | apt: 4 | name: ufw 5 | state: present 6 | update_cache: yes 7 | 8 | - name: ufw already enabled 9 | command: ufw status verbose 10 | register: ufw_status_result 11 | changed_when: false 12 | 13 | - name: open http port 80 14 | command: ufw allow 80/tcp 15 | 16 | - name: enable firewall 17 | shell: | 18 | set -o pipefail 19 | echo "y" | ufw enable 20 | args: 21 | executable: /bin/bash 22 | -------------------------------------------------------------------------------- /roles/prometheus/files/prometheus.nginx.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | server_name prometheus.polkachu.com; 4 | 5 | root /usr/share/nginx/html; 6 | index index.html index.htm; 7 | 8 | location / { 9 | proxy_pass http://localhost:9090/; 10 | 11 | auth_basic "Prometheus"; 12 | auth_basic_user_file ".prometheus"; 13 | } 14 | } -------------------------------------------------------------------------------- /roles/prometheus/files/prometheus.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Prometheus Monitoring 3 | Wants=network-online.target 4 | After=network-online.target 5 | 6 | [Service] 7 | User=prometheus 8 | Group=prometheus 9 | Type=simple 10 | ExecStart=/usr/local/bin/prometheus \ 11 | --config.file /etc/prometheus/prometheus.yml \ 12 | --storage.tsdb.path /var/lib/prometheus/ \ 13 | --web.console.templates=/etc/prometheus/consoles \ 14 | --web.console.libraries=/etc/prometheus/console_libraries 15 | ExecReload=/bin/kill -HUP $MAINPID 16 | 17 | [Install] 18 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /roles/prometheus/files/prometheus.yml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s 3 | evaluation_interval: 15s 4 | 5 | rule_files: 6 | - 'rules.yml' 7 | 8 | alerting: 9 | alertmanagers: 10 | - static_configs: 11 | - targets: 12 | - localhost:9093 13 | -------------------------------------------------------------------------------- /roles/prometheus/files/rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: alert_rules 3 | rules: 4 | - alert: InstanceDown 5 | expr: up == 0 6 | for: 5m 7 | labels: 8 | severity: critical 9 | annotations: 10 | summary: 'Instance [{{ $labels.instance }}] down' 11 | description: '[{{ $labels.instance }}] of job [{{ $labels.job }}] has been down for more than 1 minute.' 12 | 13 | - alert: HostOutOfMemory 14 | expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10 15 | for: 2m 16 | labels: 17 | severity: warning 18 | annotations: 19 | summary: Host out of memory (instance {{ $labels.instance }}) 20 | description: Node memory is filling up (< 10% left)\n VALUE = {{ $value }} 21 | 22 | - alert: HostHighCpuLoad 23 | expr: 100 - (avg by(instance)(rate(node_cpu_seconds_total{mode="idle"}[2m])) * 100) > 80 24 | for: 0m 25 | labels: 26 | severity: warning 27 | annotations: 28 | summary: Host high CPU load 29 | description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" 30 | 31 | - alert: HighStorage 32 | expr: 100 - (100 * ((node_filesystem_avail_bytes{mountpoint="/",fstype!="rootfs"} ) / (node_filesystem_size_bytes{mountpoint="/",fstype!="rootfs"}) )) > 80 33 | for: 0m 34 | labels: 35 | severity: warning 36 | annotations: 37 | summary: High Storage 38 | description: "Storage utilization is > 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" 39 | -------------------------------------------------------------------------------- /roles/prometheus/tasks/install_prometheus.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Create Prometheus user 3 | user: 4 | name: prometheus 5 | comment: 'Prometheus user' 6 | shell: /usr/sbin/nologin 7 | state: present 8 | 9 | - name: Creates Prometheus directory 10 | file: 11 | path: '{{ item }}' 12 | state: directory 13 | owner: prometheus 14 | group: prometheus 15 | recurse: true 16 | mode: 0755 17 | changed_when: false 18 | with_items: 19 | - /etc/prometheus 20 | - /var/lib/prometheus 21 | 22 | - name: download Prometheus 23 | get_url: 24 | url: 'https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/prometheus-{{ prometheus_version }}.linux-amd64.tar.gz' 25 | dest: '/tmp/prometheus-{{ prometheus_version }}.linux-amd64.tar.gz' 26 | mode: '0700' 27 | 28 | - name: unarchive Prometheus 29 | unarchive: 30 | src: '/tmp/prometheus-{{ prometheus_version }}.linux-amd64.tar.gz' 31 | remote_src: yes 32 | dest: '/tmp' 33 | 34 | - name: Copy binary files 35 | copy: 36 | src: '/tmp/prometheus-{{ prometheus_version }}.linux-amd64/{{ item }}' 37 | dest: '/usr/local/bin/' 38 | remote_src: true 39 | owner: prometheus 40 | group: prometheus 41 | mode: 755 42 | with_items: 43 | - prometheus 44 | - promtool 45 | 46 | - name: Copy folders files 47 | copy: 48 | src: '/tmp/prometheus-{{ prometheus_version }}.linux-amd64/{{ item }}' 49 | dest: '/etc/prometheus/' 50 | remote_src: true 51 | owner: prometheus 52 | group: prometheus 53 | with_items: 54 | - consoles 55 | - console_libraries 56 | 57 | - name: Copy prometheus config 58 | copy: 59 | src: 'prometheus.yml' 60 | dest: '/etc/prometheus/prometheus.yml' 61 | owner: prometheus 62 | group: prometheus 63 | 64 | - name: Copy prometheus service file 65 | copy: 66 | src: 'prometheus.service' 67 | dest: '/etc/systemd/system/prometheus.service' 68 | owner: root 69 | group: root 70 | mode: 600 71 | 72 | - name: Copy prometheus rule file 73 | copy: 74 | src: 'rules.yml' 75 | dest: '/etc/prometheus/rules.yml' 76 | owner: root 77 | group: root 78 | changed_when: false 79 | 80 | - name: start prometheus service 81 | systemd: 82 | name: prometheus 83 | state: restarted 84 | daemon_reload: true 85 | enabled: true 86 | changed_when: false 87 | -------------------------------------------------------------------------------- /roles/prometheus/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Install Prometheus 3 | import_tasks: install_prometheus.yml 4 | 5 | - name: Secure Nginx 6 | import_tasks: secure_nginx.yml 7 | -------------------------------------------------------------------------------- /roles/prometheus/tasks/secure_nginx.yml: -------------------------------------------------------------------------------- 1 | - name: check if .prometheus file already exists 2 | stat: 3 | path: /etc/nginx/.prometheus 4 | register: htpasswd_file 5 | 6 | - name: remove libapr related packages 7 | apt: 8 | pkg: 9 | - libapr1 10 | - libaprutil1 11 | state: absent 12 | update_cache: yes 13 | autoremove: yes 14 | when: not htpasswd_file.stat.exists 15 | 16 | - name: install apache2-utils 17 | apt: 18 | name: apache2-utils 19 | state: present 20 | update_cache: yes 21 | when: not htpasswd_file.stat.exists 22 | 23 | - name: generate .prometheus file 24 | shell: | 25 | set -o pipefail 26 | htpasswd -bc /etc/nginx/.prometheus {{ prometheus_user }} {{ prometheus_password }} 27 | chmod 600 /etc/nginx/.prometheus 28 | chown www-data:www-data /etc/nginx/.prometheus 29 | args: 30 | executable: /bin/bash 31 | changed_when: false 32 | 33 | - name: create nginx config for prometheus 34 | copy: 35 | src: prometheus.nginx.conf 36 | dest: /etc/nginx/sites-enabled/prometheus.conf 37 | 38 | - name: restart nginx service 39 | systemd: 40 | name: nginx 41 | state: restarted 42 | daemon_reload: yes 43 | enabled: yes 44 | changed_when: false 45 | -------------------------------------------------------------------------------- /roles/prometheus_config/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Update the /etc/hosts file 3 | copy: 4 | src: 'hosts' 5 | dest: '/etc/hosts' 6 | owner: root 7 | group: prometheus 8 | mode: '0644' 9 | 10 | - name: Copy prometheus config 11 | template: 12 | src: 'prometheus.yml.j2' 13 | dest: '/etc/prometheus/prometheus.yml' 14 | owner: prometheus 15 | group: prometheus 16 | 17 | - name: start prometheus service 18 | systemd: 19 | name: prometheus 20 | state: restarted 21 | daemon_reload: true 22 | enabled: true 23 | changed_when: false 24 | -------------------------------------------------------------------------------- /roles/promtail/files/promtail.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Promtail service 3 | After=network.target 4 | 5 | [Service] 6 | Type=simple 7 | User=promtail 8 | ExecStart=/usr/local/bin/promtail -config.file /etc/promtail/promtail.yml 9 | 10 | [Install] 11 | WantedBy=multi-user.target -------------------------------------------------------------------------------- /roles/promtail/files/promtail.yml: -------------------------------------------------------------------------------- 1 | server: 2 | http_listen_port: 9080 3 | grpc_listen_port: 0 4 | 5 | positions: 6 | filename: /tmp/positions.yaml 7 | 8 | clients: 9 | - url: http://127.0.0.1:3100/loki/api/v1/push 10 | 11 | scrape_configs: 12 | - job_name: journal 13 | journal: 14 | max_age: 12h 15 | labels: 16 | job: systemd-journal 17 | host: monitor 18 | relabel_configs: 19 | - source_labels: ['__journal__systemd_unit'] 20 | target_label: 'unit' 21 | 22 | - job_name: nginx 23 | static_configs: 24 | - targets: 25 | - localhost 26 | labels: 27 | job: nginx 28 | host: monitor 29 | __path__: /var/log/nginx/*log 30 | pipeline_stages: 31 | - match: 32 | selector: '{job="nginx"}' 33 | stages: 34 | - regex: 35 | expression: '^(?P[\w\.]+) - (?P[^ ]*) \[(?P.*)\] "(?P[^ ]*) (?P[^ ]*) (?P[^ ]*)" (?P[\d]+) (?P[\d]+) "(?P[^"]*)" "(?P[^"]*)"?' 36 | - labels: 37 | #remote_addr: 38 | #remote_user: 39 | #time_local: 40 | method: 41 | #request: 42 | #protocol: 43 | status: 44 | #body_bytes_sent: 45 | #http_referer: 46 | #http_user_agent: 47 | -------------------------------------------------------------------------------- /roles/promtail/tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - name: Create Promtail user 3 | user: 4 | name: promtail 5 | comment: 'Promtail user' 6 | shell: /usr/sbin/nologin 7 | groups: ['systemd-journal', 'adm'] 8 | state: present 9 | 10 | - name: Creates Promtail directory 11 | file: 12 | path: '/etc/promtail' 13 | state: directory 14 | owner: promtail 15 | group: promtail 16 | recurse: true 17 | mode: 0755 18 | changed_when: false 19 | 20 | - name: install unzip 21 | apt: 22 | name: unzip 23 | state: present 24 | 25 | - name: download Promtail 26 | get_url: 27 | url: 'https://github.com/grafana/loki/releases/download/v{{ promtail_version }}/promtail-linux-amd64.zip' 28 | dest: '/tmp/promtail-linux-amd64.zip' 29 | mode: '0700' 30 | 31 | - name: unarchive promtail 32 | unarchive: 33 | src: '/tmp/promtail-linux-amd64.zip' 34 | remote_src: yes 35 | dest: '/tmp' 36 | 37 | - name: Copy binary file 38 | copy: 39 | src: '/tmp/promtail-linux-amd64' 40 | dest: '/usr/local/bin/promtail' 41 | remote_src: true 42 | owner: promtail 43 | group: promtail 44 | mode: 755 45 | 46 | - name: Copy promtail config 47 | copy: 48 | src: 'promtail.yml' 49 | dest: '/etc/promtail/promtail.yml' 50 | owner: promtail 51 | group: promtail 52 | 53 | - name: Copy promtail service file 54 | copy: 55 | src: 'promtail.service' 56 | dest: '/etc/systemd/system/promtail.service' 57 | owner: root 58 | group: root 59 | mode: 600 60 | 61 | - name: start promtail service 62 | systemd: 63 | name: promtail 64 | state: restarted 65 | daemon_reload: true 66 | enabled: true 67 | changed_when: false 68 | -------------------------------------------------------------------------------- /samples/hosts.sample: -------------------------------------------------------------------------------- 1 | 127.0.0.1 localhost 2 | ::1 ip6-localhost ip6-loopback 3 | fe00::0 ip6-localnet 4 | ff00::0 ip6-mcastprefix 5 | ff02::1 ip6-allnodes 6 | ff02::2 ip6-allrouters 7 | ff02::3 ip6-allhosts 8 | 9 | 10.0.0.2 kusama01 kusama01 10 | -------------------------------------------------------------------------------- /samples/inventory.sample: -------------------------------------------------------------------------------- 1 | [monitor] 2 | 10.10.10.10 3 | 4 | [monitor:vars] 5 | receiving_email=receive@gmail.com 6 | notifications_email=someone@gmail.com 7 | notifications_email_password=password 8 | prometheus_user=prometheus_user 9 | prometheus_password=prometheus_password 10 | 11 | [all:vars] 12 | ansible_user=ansible_user 13 | ansible_port=22 14 | ansible_ssh_private_key_file="~/.ssh/id_rsa" -------------------------------------------------------------------------------- /samples/prometheus.yml.sample: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 15s 3 | evaluation_interval: 15s 4 | 5 | rule_files: 6 | - "rules.yml" 7 | 8 | alerting: 9 | alertmanagers: 10 | - static_configs: 11 | - targets: 12 | - localhost:9093 13 | 14 | scrape_configs: 15 | - job_name: "prometheus" 16 | scrape_interval: 5s 17 | static_configs: 18 | - targets: ["localhost:9090"] 19 | --------------------------------------------------------------------------------