├── config ├── alertmanager.yml ├── datasources.yaml ├── prometheus.yaml ├── promtail.yaml ├── nginx.conf └── loki.yaml ├── README.md ├── scripts ├── Dockerfile └── log_to_metrics.py ├── rules └── docker │ └── rules.yml ├── docker-compose.yaml └── grafana-dashboard.json /config/alertmanager.yml: -------------------------------------------------------------------------------- 1 | route: 2 | receiver: 'default-receiver' 3 | group_wait: 30s 4 | group_interval: 30m 5 | group_by: [ alertname ] 6 | 7 | receivers: 8 | - name: 'default-receiver' -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Loki Logs and Prometheus Metrics 2 | 3 | This repo contains all the code needed to follow along with our [YouTube Tutorial](https://youtu.be/IdWD-lHTurY) 4 | 5 | ## Become a Cloud and DevOps Engineer 6 | 7 | Learn every tool that matters: https://rayanslim.com 8 | -------------------------------------------------------------------------------- /scripts/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9-slim 2 | 3 | WORKDIR /app 4 | 5 | # Install required package 6 | RUN pip install --no-cache-dir prometheus-client 7 | 8 | # Copy the script 9 | COPY log_to_metrics.py . 10 | 11 | # Set environment variables 12 | ENV PYTHONUNBUFFERED=1 13 | 14 | # Expose the metrics port 15 | EXPOSE 8082 16 | 17 | # Run the script 18 | CMD ["python", "log_to_metrics.py"] -------------------------------------------------------------------------------- /config/datasources.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: 1 2 | datasources: 3 | - access: proxy 4 | basicAuth: false 5 | jsonData: 6 | httpHeaderName1: "X-Scope-OrgID" 7 | secureJsonData: 8 | httpHeaderValue1: "docker" 9 | editable: true 10 | isDefault: true 11 | name: loki 12 | type: loki 13 | uid: loki 14 | url: http://loki-gateway 15 | version: 1 16 | 17 | - access: proxy 18 | basicAuth: false 19 | editable: true 20 | isDefault: false 21 | name: prometheus 22 | type: prometheus 23 | uid: prometheus 24 | url: http://prometheus:9090 25 | version: 1 -------------------------------------------------------------------------------- /rules/docker/rules.yml: -------------------------------------------------------------------------------- 1 | groups: 2 | - name: Sample Rule Group 3 | interval: 5s 4 | rules: 5 | - record: generated_logs:rate1m 6 | expr: sum by (http_method) (rate({job="generated-logs"}[1m])) 7 | labels: 8 | source: "recording rule" 9 | - record: scalar 10 | expr: 10 11 | labels: 12 | source: "static" 13 | - alert: NoGeneratedLogs 14 | expr: absent_over_time({job="generated-logs"}[1m]) 15 | labels: 16 | source: "alerting rule" 17 | - alert: AlwaysFiring 18 | expr: absent_over_time({job="blah"}[1m]) 19 | labels: 20 | source: "alerting rule" -------------------------------------------------------------------------------- /config/prometheus.yaml: -------------------------------------------------------------------------------- 1 | global: 2 | scrape_interval: 5s 3 | 4 | scrape_configs: 5 | - job_name: 'prometheus' 6 | static_configs: 7 | - targets: 8 | - 'prometheus:9090' 9 | 10 | - job_name: 'loki' 11 | dns_sd_configs: 12 | - names: 13 | - loki-read 14 | - loki-write 15 | - loki-backend 16 | type: A 17 | port: 3100 18 | 19 | - job_name: 'promtail' 20 | dns_sd_configs: 21 | - names: 22 | - promtail 23 | type: A 24 | port: 9080 25 | 26 | # Add this new job to scrape metrics from our exporter 27 | - job_name: 'http-metrics' 28 | static_configs: 29 | - targets: 30 | - 'metrics-exporter:8082' -------------------------------------------------------------------------------- /config/promtail.yaml: -------------------------------------------------------------------------------- 1 | server: 2 | http_listen_port: 9080 3 | grpc_listen_port: 0 4 | log_level: "info" 5 | 6 | positions: 7 | filename: /tmp/positions.yaml 8 | 9 | clients: 10 | - url: http://loki-gateway:80/loki/api/v1/push 11 | tenant_id: docker 12 | 13 | scrape_configs: 14 | - job_name: generated-logs 15 | static_configs: 16 | - targets: 17 | - localhost 18 | labels: 19 | job: generated-logs 20 | __path__: /var/log/generated-logs.txt 21 | pipeline_stages: 22 | - json: 23 | expressions: 24 | http_method: 'method' 25 | http_status: "status" 26 | - labels: 27 | http_method: 28 | http_status: 29 | -------------------------------------------------------------------------------- /config/nginx.conf: -------------------------------------------------------------------------------- 1 | error_log /dev/stderr; 2 | pid /tmp/nginx.pid; 3 | worker_rlimit_nofile 8192; 4 | 5 | events { 6 | worker_connections 4096; ## Default: 1024 7 | } 8 | 9 | http { 10 | default_type application/octet-stream; 11 | log_format main '$remote_addr - $remote_user [$time_local] $status ' 12 | '"$request" $body_bytes_sent "$http_referer" ' 13 | '"$http_user_agent" "$http_x_forwarded_for"'; 14 | access_log /dev/stderr main; 15 | sendfile on; 16 | tcp_nopush on; 17 | 18 | upstream read { 19 | server loki-read:3100; 20 | } 21 | 22 | upstream write { 23 | server loki-write:3100; 24 | } 25 | 26 | upstream cluster { 27 | server loki-read:3100; 28 | server loki-write:3100; 29 | } 30 | 31 | server { 32 | listen 80; 33 | listen 3100; 34 | 35 | location = /ring { 36 | proxy_pass http://cluster$request_uri; 37 | } 38 | 39 | location = /memberlist { 40 | proxy_pass http://cluster$request_uri; 41 | } 42 | 43 | location = /config { 44 | proxy_pass http://cluster$request_uri; 45 | } 46 | 47 | location = /metrics { 48 | proxy_pass http://cluster$request_uri; 49 | } 50 | 51 | location = /ready { 52 | proxy_pass http://cluster$request_uri; 53 | } 54 | 55 | location = /loki/api/v1/push { 56 | proxy_pass http://write$request_uri; 57 | } 58 | 59 | location = /loki/api/v1/tail { 60 | proxy_pass http://read$request_uri; 61 | proxy_set_header Upgrade $http_upgrade; 62 | proxy_set_header Connection "upgrade"; 63 | } 64 | 65 | location ~ /loki/api/.* { 66 | proxy_pass http://read$request_uri; 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /config/loki.yaml: -------------------------------------------------------------------------------- 1 | auth_enabled: true 2 | 3 | server: 4 | http_listen_address: 0.0.0.0 5 | grpc_listen_address: 0.0.0.0 6 | http_listen_port: 3100 7 | grpc_listen_port: 9095 8 | log_level: info 9 | 10 | common: 11 | path_prefix: /loki 12 | compactor_address: http://loki-backend:3100 13 | replication_factor: 3 14 | 15 | storage_config: 16 | aws: 17 | endpoint: minio:9000 18 | insecure: true 19 | bucketnames: loki-data 20 | access_key_id: loki 21 | secret_access_key: supersecret 22 | s3forcepathstyle: true 23 | 24 | memberlist: 25 | join_members: ["loki-read", "loki-write", "loki-backend"] 26 | dead_node_reclaim_time: 30s 27 | gossip_to_dead_nodes_time: 15s 28 | left_ingesters_timeout: 30s 29 | bind_addr: ['0.0.0.0'] 30 | bind_port: 7946 31 | gossip_interval: 2s 32 | 33 | ingester: 34 | lifecycler: 35 | join_after: 10s 36 | observe_period: 5s 37 | ring: 38 | replication_factor: 3 39 | kvstore: 40 | store: memberlist 41 | final_sleep: 0s 42 | chunk_idle_period: 1m 43 | wal: 44 | enabled: true 45 | dir: /loki/wal 46 | max_chunk_age: 1m 47 | chunk_retain_period: 30s 48 | chunk_encoding: snappy 49 | chunk_target_size: 1.572864e+06 50 | chunk_block_size: 262144 51 | flush_op_timeout: 10s 52 | 53 | ruler: 54 | enable_api: true 55 | enable_sharding: true 56 | wal: 57 | dir: /loki/ruler-wal 58 | evaluation: 59 | mode: remote 60 | query_frontend: 61 | address: dns:///loki-read:9095 62 | storage: 63 | type: local 64 | local: 65 | directory: /loki/rules 66 | rule_path: /loki/prom-rules 67 | remote_write: 68 | enabled: true 69 | clients: 70 | local: 71 | url: http://prometheus:9090/api/v1/write 72 | queue_config: 73 | # send immediately as soon as a sample is generated 74 | capacity: 1 75 | batch_send_deadline: 0s 76 | 77 | schema_config: 78 | configs: 79 | - from: 2020-08-01 80 | store: boltdb-shipper 81 | object_store: s3 82 | schema: v11 83 | index: 84 | prefix: index_ 85 | period: 24h 86 | - from: 2023-07-11 87 | store: tsdb 88 | object_store: s3 89 | schema: v12 90 | index: 91 | prefix: index_ 92 | period: 24h 93 | - from: 2024-01-10 94 | store: tsdb 95 | object_store: s3 96 | schema: v12 97 | index: 98 | prefix: index_ 99 | period: 24h 100 | - from: 2024-03-29 101 | store: tsdb 102 | object_store: s3 103 | schema: v13 104 | index: 105 | prefix: index_ 106 | period: 24h 107 | 108 | 109 | limits_config: 110 | max_cache_freshness_per_query: '10m' 111 | reject_old_samples: true 112 | reject_old_samples_max_age: 30m 113 | ingestion_rate_mb: 10 114 | ingestion_burst_size_mb: 20 115 | # parallelize queries in 15min intervals 116 | split_queries_by_interval: 15m 117 | volume_enabled: true 118 | 119 | table_manager: 120 | retention_deletes_enabled: true 121 | retention_period: 336h 122 | 123 | query_range: 124 | # make queries more cache-able by aligning them with their step intervals 125 | align_queries_with_step: true 126 | max_retries: 5 127 | parallelise_shardable_queries: true 128 | cache_results: true 129 | 130 | frontend: 131 | log_queries_longer_than: 5s 132 | compress_responses: true 133 | max_outstanding_per_tenant: 2048 134 | 135 | query_scheduler: 136 | max_outstanding_requests_per_tenant: 1024 137 | 138 | querier: 139 | query_ingesters_within: 2h 140 | 141 | compactor: 142 | working_directory: /tmp/compactor 143 | -------------------------------------------------------------------------------- /scripts/log_to_metrics.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | import os 4 | import sys 5 | from prometheus_client import start_http_server, Counter, Histogram, Gauge 6 | 7 | # Create metrics 8 | http_requests_total = Counter('http_requests_total', 'Total HTTP Requests', ['method', 'status', 'path']) 9 | http_request_duration_seconds = Histogram('http_request_duration_seconds', 'HTTP request duration in seconds', ['method', 'path']) 10 | http_response_size_bytes = Counter('http_response_size_bytes', 'HTTP response size in bytes', ['method', 'path']) 11 | http_status_codes = Counter('http_status_codes', 'HTTP Status Codes', ['status']) 12 | 13 | # Path to the log file 14 | LOG_FILE = '/var/log/generated-logs.txt' 15 | 16 | # Initialize last position 17 | last_position = 0 18 | 19 | def parse_log_line(line): 20 | try: 21 | data = json.loads(line.strip()) 22 | # Extract relevant information 23 | method = data.get('method', 'UNKNOWN') 24 | status = str(data.get('status', '0')) 25 | path = data.get('request', '/unknown') 26 | bytes_sent = int(data.get('bytes', 0)) 27 | 28 | # Increment metrics 29 | http_requests_total.labels(method=method, status=status, path=path).inc() 30 | http_response_size_bytes.labels(method=method, path=path).inc(bytes_sent) 31 | http_status_codes.labels(status=status).inc() 32 | 33 | # Simulate duration based on response size (just for demo purposes) 34 | duration = max(0.001, min(bytes_sent / 1000000.0, 10.0)) # Between 1ms and 10s 35 | http_request_duration_seconds.labels(method=method, path=path).observe(duration) 36 | 37 | print(f"Processed log: {method} {path} {status} {bytes_sent}B") 38 | 39 | except json.JSONDecodeError: 40 | print(f"Failed to parse log line: {line}", file=sys.stderr) 41 | except Exception as e: 42 | print(f"Error processing log line: {str(e)}", file=sys.stderr) 43 | 44 | def tail_log_file(): 45 | global last_position 46 | 47 | try: 48 | if not os.path.exists(LOG_FILE): 49 | print(f"Waiting for log file {LOG_FILE} to be created...") 50 | time.sleep(1) 51 | return 52 | 53 | with open(LOG_FILE, 'r') as f: 54 | # Get file size 55 | f.seek(0, 2) 56 | file_size = f.tell() 57 | 58 | # If file has been truncated or overwritten, reset position 59 | if last_position > file_size: 60 | print("Log file was truncated, resetting position") 61 | last_position = 0 62 | 63 | # If there's new content 64 | if file_size > last_position: 65 | # Go to the last read position 66 | f.seek(last_position) 67 | 68 | # Read new lines 69 | new_lines = f.readlines() 70 | 71 | # Process each new line 72 | for line in new_lines: 73 | if line.strip(): 74 | parse_log_line(line) 75 | 76 | # Update position 77 | last_position = file_size 78 | except Exception as e: 79 | print(f"Error in tail_log_file: {str(e)}", file=sys.stderr) 80 | time.sleep(1) 81 | 82 | if __name__ == '__main__': 83 | print("Starting metrics exporter...") 84 | 85 | # Start up the server to expose the metrics 86 | start_http_server(8082) 87 | print("Metrics server started on port 8082") 88 | 89 | # Main loop 90 | while True: 91 | tail_log_file() 92 | time.sleep(0.1) # Check for new logs frequently -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | networks: 2 | loki: 3 | 4 | volumes: 5 | prometheus: 6 | grafana: 7 | alertmanager-data: 8 | loki: 9 | 10 | services: 11 | # Metrics exporter to convert logs to Prometheus metrics 12 | metrics-exporter: 13 | build: 14 | context: ./scripts 15 | dockerfile: Dockerfile 16 | volumes: 17 | - ./loki/:/var/log/ 18 | ports: 19 | - "8082:8082" 20 | networks: 21 | - loki 22 | depends_on: 23 | - log-generator 24 | 25 | 26 | 27 | # Since the Loki containers are running as user 10001 and the mounted data volume is owned by root, 28 | # Loki would not have permissions to create the directories. 29 | # Therefore the init container changes permissions of the mounted directory. 30 | init: 31 | image: &lokiImage grafana/loki:3.4.3 32 | user: root 33 | entrypoint: 34 | - "chown" 35 | - "10001:10001" 36 | - "/loki" 37 | volumes: 38 | - ./loki:/loki 39 | networks: 40 | - loki 41 | 42 | grafana: 43 | image: grafana/grafana:11.6.0 44 | ports: 45 | - "3000:3000" 46 | environment: 47 | GF_AUTH_ANONYMOUS_ENABLED: "true" 48 | GF_AUTH_DISABLE_LOGIN_FORM: "true" 49 | GF_AUTH_ANONYMOUS_ORG_ROLE: "Admin" 50 | volumes: 51 | - ./config/datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yml 52 | - grafana:/var/lib/grafana 53 | networks: 54 | - loki 55 | 56 | prometheus: 57 | image: prom/prometheus:v3.2.1 58 | ports: 59 | - "9090:9090" # Exposing on a fixed port for easier access 60 | volumes: 61 | - ./config/prometheus.yaml:/etc/prometheus/prometheus.yml 62 | - prometheus:/prometheus 63 | command: 64 | [ 65 | '--log.level=debug', 66 | '--config.file=/etc/prometheus/prometheus.yml', 67 | '--enable-feature=remote-write-receiver', 68 | '--query.lookback-delta=30s' 69 | ] 70 | networks: 71 | - loki 72 | 73 | # for testing purposes only, disable in production 74 | log-generator: 75 | image: mingrammer/flog:0.4.3 76 | command: 77 | - --loop 78 | - --format=json 79 | - --number=10 # number of log lines to generate per second 80 | - --delay=100ms # delay between log lines 81 | - --output=/var/log/generated-logs.txt 82 | - --overwrite 83 | - --type=log 84 | volumes: 85 | - ./loki/:/var/log/ 86 | networks: 87 | - loki 88 | 89 | promtail: 90 | image: grafana/promtail:3.4.3 91 | volumes: 92 | - ./loki/:/var/log/ 93 | - ./config:/etc/promtail/ 94 | ports: 95 | - 9080 96 | command: -config.file=/etc/promtail/promtail.yaml 97 | networks: 98 | - loki 99 | 100 | minio: 101 | image: minio/minio:RELEASE.2025-04-08T15-41-24Z 102 | entrypoint: 103 | - sh 104 | - -euc 105 | - | 106 | mkdir -p /data/loki-data && \ 107 | mkdir -p /data/loki-ruler && 108 | minio server --address "0.0.0.0:9000" --console-address "0.0.0.0:9001" /data 109 | environment: 110 | - MINIO_ROOT_USER=loki 111 | - MINIO_ROOT_PASSWORD=supersecret 112 | - MINIO_PROMETHEUS_AUTH_TYPE=public 113 | - MINIO_UPDATE=off 114 | ports: 115 | - "9000:9000" 116 | - "9001:9001" 117 | volumes: 118 | - ./.data/minio:/data 119 | networks: 120 | - loki 121 | 122 | loki-gateway: 123 | image: nginx:1.27.5 124 | volumes: 125 | - ./config/nginx.conf:/etc/nginx/nginx.conf 126 | ports: 127 | - "8080:80" 128 | - "3100" 129 | networks: 130 | - loki 131 | 132 | loki-read: 133 | image: *lokiImage 134 | volumes: 135 | - ./config:/etc/loki/ 136 | ports: 137 | - "3100" 138 | - "7946" 139 | command: "-config.file=/etc/loki/loki.yaml -target=read -legacy-read-mode=false" 140 | networks: 141 | - loki 142 | restart: always 143 | deploy: 144 | mode: replicated 145 | replicas: 3 146 | 147 | loki-write: 148 | image: *lokiImage 149 | volumes: 150 | - ./config:/etc/loki/ 151 | ports: 152 | - "3100" 153 | - "7946" 154 | command: "-config.file=/etc/loki/loki.yaml -target=write" 155 | networks: 156 | - loki 157 | restart: always 158 | deploy: 159 | mode: replicated 160 | replicas: 3 161 | 162 | loki-backend: 163 | image: *lokiImage 164 | volumes: 165 | - ./config:/etc/loki/ 166 | - ./rules:/loki/rules:ro 167 | ports: 168 | - "3100" 169 | - "7946" 170 | command: "-config.file=/etc/loki/loki.yaml -target=backend -legacy-read-mode=false" 171 | networks: 172 | - loki 173 | restart: always 174 | deploy: 175 | mode: replicated 176 | replicas: 3 177 | 178 | # alertmanager to enable receiving alerts 179 | alertmanager: 180 | image: prom/alertmanager:v0.28.1 181 | restart: unless-stopped 182 | ports: 183 | - "9093:9093" 184 | volumes: 185 | - "./config:/config" 186 | - alertmanager-data:/data 187 | command: --config.file=/config/alertmanager.yml --log.level=debug 188 | networks: 189 | - loki 190 | -------------------------------------------------------------------------------- /grafana-dashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "annotations": { 3 | "list": [ 4 | { 5 | "builtIn": 1, 6 | "datasource": { 7 | "type": "grafana", 8 | "uid": "-- Grafana --" 9 | }, 10 | "enable": true, 11 | "hide": true, 12 | "iconColor": "rgba(0, 211, 255, 1)", 13 | "name": "Annotations & Alerts", 14 | "type": "dashboard" 15 | } 16 | ] 17 | }, 18 | "editable": true, 19 | "fiscalYearStartMonth": 0, 20 | "graphTooltip": 1, 21 | "id": null, 22 | "links": [], 23 | "liveNow": false, 24 | "panels": [ 25 | { 26 | "datasource": { 27 | "type": "prometheus", 28 | "uid": "prometheus" 29 | }, 30 | "fieldConfig": { 31 | "defaults": { 32 | "color": { 33 | "mode": "palette-classic" 34 | }, 35 | "custom": { 36 | "axisBorderShow": false, 37 | "axisCenteredZero": false, 38 | "axisColorMode": "text", 39 | "axisLabel": "", 40 | "axisPlacement": "auto", 41 | "barAlignment": 0, 42 | "drawStyle": "line", 43 | "fillOpacity": 10, 44 | "gradientMode": "none", 45 | "hideFrom": { 46 | "legend": false, 47 | "tooltip": false, 48 | "viz": false 49 | }, 50 | "insertNulls": false, 51 | "lineInterpolation": "linear", 52 | "lineWidth": 2, 53 | "pointSize": 5, 54 | "scaleDistribution": { 55 | "type": "linear" 56 | }, 57 | "showPoints": "never", 58 | "spanNulls": false, 59 | "stacking": { 60 | "group": "A", 61 | "mode": "none" 62 | }, 63 | "thresholdsStyle": { 64 | "mode": "off" 65 | } 66 | }, 67 | "mappings": [], 68 | "thresholds": { 69 | "mode": "absolute", 70 | "steps": [ 71 | { 72 | "color": "green", 73 | "value": null 74 | }, 75 | { 76 | "color": "red", 77 | "value": 80 78 | } 79 | ] 80 | }, 81 | "unit": "reqps" 82 | }, 83 | "overrides": [] 84 | }, 85 | "gridPos": { 86 | "h": 8, 87 | "w": 12, 88 | "x": 0, 89 | "y": 0 90 | }, 91 | "id": 1, 92 | "options": { 93 | "legend": { 94 | "calcs": [ 95 | "mean", 96 | "max", 97 | "lastNotNull" 98 | ], 99 | "displayMode": "table", 100 | "placement": "bottom", 101 | "showLegend": true 102 | }, 103 | "tooltip": { 104 | "mode": "multi", 105 | "sort": "desc" 106 | } 107 | }, 108 | "pluginVersion": "11.6.0", 109 | "targets": [ 110 | { 111 | "datasource": { 112 | "type": "prometheus", 113 | "uid": "prometheus" 114 | }, 115 | "editorMode": "code", 116 | "expr": "sum by(method) (rate(http_requests_total{method=~\"$method_filter\"}[5m]))", 117 | "legendFormat": "{{method}}", 118 | "range": true, 119 | "refId": "A" 120 | } 121 | ], 122 | "title": "Request Rate by Method", 123 | "type": "timeseries" 124 | }, 125 | { 126 | "datasource": { 127 | "type": "prometheus", 128 | "uid": "prometheus" 129 | }, 130 | "fieldConfig": { 131 | "defaults": { 132 | "color": { 133 | "mode": "palette-classic" 134 | }, 135 | "custom": { 136 | "hideFrom": { 137 | "legend": false, 138 | "tooltip": false, 139 | "viz": false 140 | } 141 | }, 142 | "mappings": [] 143 | }, 144 | "overrides": [] 145 | }, 146 | "gridPos": { 147 | "h": 8, 148 | "w": 12, 149 | "x": 12, 150 | "y": 0 151 | }, 152 | "id": 2, 153 | "options": { 154 | "displayLabels": ["percent", "name"], 155 | "legend": { 156 | "displayMode": "table", 157 | "placement": "bottom", 158 | "showLegend": true, 159 | "values": ["value", "percent"] 160 | }, 161 | "pieType": "pie", 162 | "reduceOptions": { 163 | "calcs": [ 164 | "lastNotNull" 165 | ], 166 | "fields": "", 167 | "values": false 168 | }, 169 | "tooltip": { 170 | "mode": "single", 171 | "sort": "none" 172 | } 173 | }, 174 | "targets": [ 175 | { 176 | "datasource": { 177 | "type": "prometheus", 178 | "uid": "prometheus" 179 | }, 180 | "editorMode": "code", 181 | "expr": "sum by(status) (rate(http_status_codes_total{status=~\"$status_filter\"}[5m]))", 182 | "legendFormat": "{{status}}", 183 | "range": true, 184 | "refId": "A" 185 | } 186 | ], 187 | "title": "Status Code Distribution", 188 | "type": "piechart" 189 | }, 190 | { 191 | "datasource": { 192 | "type": "prometheus", 193 | "uid": "prometheus" 194 | }, 195 | "fieldConfig": { 196 | "defaults": { 197 | "color": { 198 | "mode": "palette-classic" 199 | }, 200 | "custom": { 201 | "axisBorderShow": false, 202 | "axisCenteredZero": false, 203 | "axisColorMode": "text", 204 | "axisLabel": "", 205 | "axisPlacement": "auto", 206 | "barAlignment": 0, 207 | "drawStyle": "line", 208 | "fillOpacity": 20, 209 | "gradientMode": "none", 210 | "hideFrom": { 211 | "legend": false, 212 | "tooltip": false, 213 | "viz": false 214 | }, 215 | "insertNulls": false, 216 | "lineInterpolation": "smooth", 217 | "lineWidth": 2, 218 | "pointSize": 5, 219 | "scaleDistribution": { 220 | "type": "linear" 221 | }, 222 | "showPoints": "never", 223 | "spanNulls": false, 224 | "stacking": { 225 | "group": "A", 226 | "mode": "none" 227 | }, 228 | "thresholdsStyle": { 229 | "mode": "off" 230 | } 231 | }, 232 | "mappings": [], 233 | "thresholds": { 234 | "mode": "absolute", 235 | "steps": [ 236 | { 237 | "color": "green", 238 | "value": null 239 | } 240 | ] 241 | }, 242 | "unit": "s" 243 | }, 244 | "overrides": [] 245 | }, 246 | "gridPos": { 247 | "h": 8, 248 | "w": 12, 249 | "x": 0, 250 | "y": 8 251 | }, 252 | "id": 3, 253 | "options": { 254 | "legend": { 255 | "calcs": [ 256 | "mean", 257 | "max" 258 | ], 259 | "displayMode": "table", 260 | "placement": "bottom", 261 | "showLegend": true 262 | }, 263 | "tooltip": { 264 | "mode": "multi", 265 | "sort": "desc" 266 | } 267 | }, 268 | "targets": [ 269 | { 270 | "datasource": { 271 | "type": "prometheus", 272 | "uid": "prometheus" 273 | }, 274 | "editorMode": "code", 275 | "expr": "histogram_quantile(0.95, sum by(le, method) (rate(http_request_duration_seconds_bucket{method=~\"$method_filter\"}[5m])))", 276 | "legendFormat": "{{method}} (p95)", 277 | "range": true, 278 | "refId": "A" 279 | }, 280 | { 281 | "datasource": { 282 | "type": "prometheus", 283 | "uid": "prometheus" 284 | }, 285 | "editorMode": "code", 286 | "expr": "histogram_quantile(0.50, sum by(le, method) (rate(http_request_duration_seconds_bucket{method=~\"$method_filter\"}[5m])))", 287 | "hide": false, 288 | "legendFormat": "{{method}} (p50)", 289 | "range": true, 290 | "refId": "B" 291 | } 292 | ], 293 | "title": "Response Time by Method (p50 and p95)", 294 | "type": "timeseries" 295 | }, 296 | { 297 | "datasource": { 298 | "type": "prometheus", 299 | "uid": "prometheus" 300 | }, 301 | "fieldConfig": { 302 | "defaults": { 303 | "color": { 304 | "mode": "palette-classic" 305 | }, 306 | "custom": { 307 | "axisBorderShow": false, 308 | "axisCenteredZero": false, 309 | "axisColorMode": "text", 310 | "axisLabel": "", 311 | "axisPlacement": "auto", 312 | "barAlignment": 0, 313 | "drawStyle": "line", 314 | "fillOpacity": 20, 315 | "gradientMode": "scheme", 316 | "hideFrom": { 317 | "legend": false, 318 | "tooltip": false, 319 | "viz": false 320 | }, 321 | "insertNulls": false, 322 | "lineInterpolation": "smooth", 323 | "lineWidth": 2, 324 | "pointSize": 5, 325 | "scaleDistribution": { 326 | "type": "linear" 327 | }, 328 | "showPoints": "never", 329 | "spanNulls": false, 330 | "stacking": { 331 | "group": "A", 332 | "mode": "normal" 333 | }, 334 | "thresholdsStyle": { 335 | "mode": "off" 336 | } 337 | }, 338 | "mappings": [], 339 | "thresholds": { 340 | "mode": "absolute", 341 | "steps": [ 342 | { 343 | "color": "green", 344 | "value": null 345 | } 346 | ] 347 | }, 348 | "unit": "reqps" 349 | }, 350 | "overrides": [ 351 | { 352 | "matcher": { 353 | "id": "byRegexp", 354 | "options": "2.*" 355 | }, 356 | "properties": [ 357 | { 358 | "id": "color", 359 | "value": { 360 | "fixedColor": "green", 361 | "mode": "fixed" 362 | } 363 | } 364 | ] 365 | }, 366 | { 367 | "matcher": { 368 | "id": "byRegexp", 369 | "options": "3.*" 370 | }, 371 | "properties": [ 372 | { 373 | "id": "color", 374 | "value": { 375 | "fixedColor": "blue", 376 | "mode": "fixed" 377 | } 378 | } 379 | ] 380 | }, 381 | { 382 | "matcher": { 383 | "id": "byRegexp", 384 | "options": "4.*" 385 | }, 386 | "properties": [ 387 | { 388 | "id": "color", 389 | "value": { 390 | "fixedColor": "orange", 391 | "mode": "fixed" 392 | } 393 | } 394 | ] 395 | }, 396 | { 397 | "matcher": { 398 | "id": "byRegexp", 399 | "options": "5.*" 400 | }, 401 | "properties": [ 402 | { 403 | "id": "color", 404 | "value": { 405 | "fixedColor": "red", 406 | "mode": "fixed" 407 | } 408 | } 409 | ] 410 | } 411 | ] 412 | }, 413 | "gridPos": { 414 | "h": 8, 415 | "w": 12, 416 | "x": 12, 417 | "y": 8 418 | }, 419 | "id": 4, 420 | "options": { 421 | "legend": { 422 | "calcs": [ 423 | "sum", 424 | "lastNotNull" 425 | ], 426 | "displayMode": "table", 427 | "placement": "bottom", 428 | "showLegend": true 429 | }, 430 | "tooltip": { 431 | "mode": "multi", 432 | "sort": "desc" 433 | } 434 | }, 435 | "targets": [ 436 | { 437 | "datasource": { 438 | "type": "prometheus", 439 | "uid": "prometheus" 440 | }, 441 | "editorMode": "code", 442 | "expr": "sum by(status) (rate(http_status_codes_total{status=~\"$status_filter\"}[5m]))", 443 | "legendFormat": "{{status}}", 444 | "range": true, 445 | "refId": "A" 446 | } 447 | ], 448 | "title": "Status Codes Over Time", 449 | "type": "timeseries" 450 | }, 451 | { 452 | "datasource": { 453 | "type": "prometheus", 454 | "uid": "prometheus" 455 | }, 456 | "fieldConfig": { 457 | "defaults": { 458 | "color": { 459 | "mode": "palette-classic" 460 | }, 461 | "custom": { 462 | "axisBorderShow": false, 463 | "axisCenteredZero": false, 464 | "axisColorMode": "text", 465 | "axisLabel": "", 466 | "axisPlacement": "auto", 467 | "barAlignment": 0, 468 | "drawStyle": "bars", 469 | "fillOpacity": 80, 470 | "gradientMode": "none", 471 | "hideFrom": { 472 | "legend": false, 473 | "tooltip": false, 474 | "viz": false 475 | }, 476 | "insertNulls": false, 477 | "lineInterpolation": "linear", 478 | "lineWidth": 1, 479 | "pointSize": 5, 480 | "scaleDistribution": { 481 | "type": "linear" 482 | }, 483 | "showPoints": "never", 484 | "spanNulls": false, 485 | "stacking": { 486 | "group": "A", 487 | "mode": "normal" 488 | }, 489 | "thresholdsStyle": { 490 | "mode": "off" 491 | } 492 | }, 493 | "mappings": [], 494 | "thresholds": { 495 | "mode": "absolute", 496 | "steps": [ 497 | { 498 | "color": "green", 499 | "value": null 500 | } 501 | ] 502 | }, 503 | "unit": "bytes" 504 | }, 505 | "overrides": [] 506 | }, 507 | "gridPos": { 508 | "h": 8, 509 | "w": 12, 510 | "x": 0, 511 | "y": 16 512 | }, 513 | "id": 5, 514 | "options": { 515 | "legend": { 516 | "calcs": [ 517 | "mean", 518 | "max" 519 | ], 520 | "displayMode": "table", 521 | "placement": "bottom", 522 | "showLegend": true 523 | }, 524 | "tooltip": { 525 | "mode": "multi", 526 | "sort": "desc" 527 | } 528 | }, 529 | "targets": [ 530 | { 531 | "datasource": { 532 | "type": "prometheus", 533 | "uid": "prometheus" 534 | }, 535 | "editorMode": "code", 536 | "expr": "sum by(method) (rate(http_response_size_bytes_total{method=~\"$method_filter\"}[5m]))", 537 | "legendFormat": "{{method}}", 538 | "range": true, 539 | "refId": "A" 540 | } 541 | ], 542 | "title": "Response Size by Method", 543 | "type": "timeseries" 544 | }, 545 | { 546 | "datasource": { 547 | "type": "prometheus", 548 | "uid": "prometheus" 549 | }, 550 | "fieldConfig": { 551 | "defaults": { 552 | "color": { 553 | "mode": "thresholds" 554 | }, 555 | "custom": { 556 | "align": "auto", 557 | "cellOptions": { 558 | "type": "auto" 559 | }, 560 | "inspect": false 561 | }, 562 | "mappings": [], 563 | "thresholds": { 564 | "mode": "absolute", 565 | "steps": [ 566 | { 567 | "color": "green", 568 | "value": null 569 | }, 570 | { 571 | "color": "orange", 572 | "value": 1 573 | }, 574 | { 575 | "color": "red", 576 | "value": 3 577 | } 578 | ] 579 | }, 580 | "unit": "s" 581 | }, 582 | "overrides": [ 583 | { 584 | "matcher": { 585 | "id": "byName", 586 | "options": "path" 587 | }, 588 | "properties": [ 589 | { 590 | "id": "custom.width", 591 | "value": 300 592 | } 593 | ] 594 | }, 595 | { 596 | "matcher": { 597 | "id": "byName", 598 | "options": "Value" 599 | }, 600 | "properties": [ 601 | { 602 | "id": "custom.width", 603 | "value": 150 604 | }, 605 | { 606 | "id": "displayName", 607 | "value": "p95 Response Time" 608 | } 609 | ] 610 | } 611 | ] 612 | }, 613 | "gridPos": { 614 | "h": 8, 615 | "w": 12, 616 | "x": 12, 617 | "y": 16 618 | }, 619 | "id": 6, 620 | "options": { 621 | "cellHeight": "sm", 622 | "footer": { 623 | "countRows": false, 624 | "enablePagination": false, 625 | "fields": "", 626 | "reducer": [ 627 | "sum" 628 | ], 629 | "show": false 630 | }, 631 | "showHeader": true, 632 | "sortBy": [ 633 | { 634 | "desc": true, 635 | "displayName": "p95 Response Time" 636 | } 637 | ] 638 | }, 639 | "pluginVersion": "11.6.0", 640 | "targets": [ 641 | { 642 | "datasource": { 643 | "type": "prometheus", 644 | "uid": "prometheus" 645 | }, 646 | "editorMode": "code", 647 | "expr": "topk(10, histogram_quantile(0.95, sum by(le, path) (rate(http_request_duration_seconds_bucket[5m]))))", 648 | "format": "table", 649 | "instant": true, 650 | "legendFormat": "{{path}}", 651 | "range": false, 652 | "refId": "A" 653 | } 654 | ], 655 | "title": "Top 10 Slowest Endpoints (p95)", 656 | "type": "table" 657 | }, 658 | { 659 | "datasource": { 660 | "type": "prometheus", 661 | "uid": "prometheus" 662 | }, 663 | "fieldConfig": { 664 | "defaults": { 665 | "color": { 666 | "mode": "palette-classic" 667 | }, 668 | "custom": { 669 | "axisBorderShow": false, 670 | "axisCenteredZero": false, 671 | "axisColorMode": "text", 672 | "axisLabel": "", 673 | "axisPlacement": "auto", 674 | "barAlignment": 0, 675 | "drawStyle": "bars", 676 | "fillOpacity": 60, 677 | "gradientMode": "none", 678 | "hideFrom": { 679 | "legend": false, 680 | "tooltip": false, 681 | "viz": false 682 | }, 683 | "insertNulls": false, 684 | "lineInterpolation": "linear", 685 | "lineWidth": 1, 686 | "pointSize": 5, 687 | "scaleDistribution": { 688 | "type": "linear" 689 | }, 690 | "showPoints": "auto", 691 | "spanNulls": false, 692 | "stacking": { 693 | "group": "A", 694 | "mode": "none" 695 | }, 696 | "thresholdsStyle": { 697 | "mode": "off" 698 | } 699 | }, 700 | "mappings": [], 701 | "thresholds": { 702 | "mode": "absolute", 703 | "steps": [ 704 | { 705 | "color": "green", 706 | "value": null 707 | } 708 | ] 709 | }, 710 | "unit": "reqps" 711 | }, 712 | "overrides": [] 713 | }, 714 | "gridPos": { 715 | "h": 8, 716 | "w": 24, 717 | "x": 0, 718 | "y": 24 719 | }, 720 | "id": 7, 721 | "options": { 722 | "legend": { 723 | "calcs": [ 724 | "sum", 725 | "max" 726 | ], 727 | "displayMode": "table", 728 | "placement": "right", 729 | "showLegend": true 730 | }, 731 | "tooltip": { 732 | "mode": "multi", 733 | "sort": "desc" 734 | } 735 | }, 736 | "targets": [ 737 | { 738 | "datasource": { 739 | "type": "prometheus", 740 | "uid": "prometheus" 741 | }, 742 | "editorMode": "code", 743 | "expr": "topk(10, sum by(path) (rate(http_requests_total[5m])))", 744 | "legendFormat": "{{path}}", 745 | "range": true, 746 | "refId": "A" 747 | } 748 | ], 749 | "title": "Top 10 Endpoints by Request Volume", 750 | "type": "timeseries" 751 | }, 752 | { 753 | "datasource": { 754 | "type": "loki", 755 | "uid": "loki" 756 | }, 757 | "fieldConfig": { 758 | "defaults": { 759 | "color": { 760 | "mode": "palette-classic" 761 | }, 762 | "custom": { 763 | "hideFrom": { 764 | "legend": false, 765 | "tooltip": false, 766 | "viz": false 767 | } 768 | }, 769 | "mappings": [] 770 | }, 771 | "overrides": [] 772 | }, 773 | "gridPos": { 774 | "h": 8, 775 | "w": 12, 776 | "x": 0, 777 | "y": 32 778 | }, 779 | "id": 8, 780 | "options": { 781 | "displayLabels": ["percent"], 782 | "legend": { 783 | "displayMode": "table", 784 | "placement": "right", 785 | "showLegend": true, 786 | "values": ["count", "percent"] 787 | }, 788 | "pieType": "pie", 789 | "reduceOptions": { 790 | "calcs": [ 791 | "lastNotNull" 792 | ], 793 | "fields": "", 794 | "values": false 795 | }, 796 | "tooltip": { 797 | "mode": "single", 798 | "sort": "none" 799 | } 800 | }, 801 | "targets": [ 802 | { 803 | "datasource": { 804 | "type": "loki", 805 | "uid": "loki" 806 | }, 807 | "editorMode": "code", 808 | "expr": "sum by (http_status) (count_over_time({service_name=\"$service\"} |= \"$filter\" | json | __error__=\"\" [$__auto]))", 809 | "legendFormat": "{{http_status}}", 810 | "queryType": "range", 811 | "refId": "A" 812 | } 813 | ], 814 | "title": "Status Codes from Logs", 815 | "type": "piechart" 816 | }, 817 | { 818 | "datasource": { 819 | "type": "loki", 820 | "uid": "loki" 821 | }, 822 | "fieldConfig": { 823 | "defaults": { 824 | "color": { 825 | "mode": "palette-classic" 826 | }, 827 | "custom": { 828 | "axisBorderShow": false, 829 | "axisCenteredZero": false, 830 | "axisColorMode": "text", 831 | "axisLabel": "", 832 | "axisPlacement": "auto", 833 | "barAlignment": 0, 834 | "drawStyle": "bars", 835 | "fillOpacity": 70, 836 | "gradientMode": "none", 837 | "hideFrom": { 838 | "legend": false, 839 | "tooltip": false, 840 | "viz": false 841 | }, 842 | "insertNulls": false, 843 | "lineInterpolation": "linear", 844 | "lineWidth": 1, 845 | "pointSize": 5, 846 | "scaleDistribution": { 847 | "type": "linear" 848 | }, 849 | "showPoints": "never", 850 | "spanNulls": false, 851 | "stacking": { 852 | "group": "A", 853 | "mode": "normal" 854 | }, 855 | "thresholdsStyle": { 856 | "mode": "off" 857 | } 858 | }, 859 | "mappings": [], 860 | "thresholds": { 861 | "mode": "absolute", 862 | "steps": [ 863 | { 864 | "color": "green", 865 | "value": null 866 | } 867 | ] 868 | }, 869 | "unit": "logs" 870 | }, 871 | "overrides": [] 872 | }, 873 | "gridPos": { 874 | "h": 8, 875 | "w": 12, 876 | "x": 12, 877 | "y": 32 878 | }, 879 | "id": 9, 880 | "options": { 881 | "legend": { 882 | "calcs": [ 883 | "sum" 884 | ], 885 | "displayMode": "table", 886 | "placement": "right", 887 | "showLegend": true 888 | }, 889 | "tooltip": { 890 | "mode": "multi", 891 | "sort": "desc" 892 | } 893 | }, 894 | "targets": [ 895 | { 896 | "datasource": { 897 | "type": "loki", 898 | "uid": "loki" 899 | }, 900 | "editorMode": "code", 901 | "expr": "sum by (http_method) (count_over_time({service_name=\"$service\"} |= \"$filter\" | json | __error__=\"\" [$__auto]))", 902 | "legendFormat": "{{http_method}}", 903 | "queryType": "range", 904 | "refId": "A" 905 | } 906 | ], 907 | "title": "Log Volume by HTTP Method", 908 | "type": "timeseries" 909 | }, 910 | { 911 | "datasource": { 912 | "type": "loki", 913 | "uid": "loki" 914 | }, 915 | "gridPos": { 916 | "h": 10, 917 | "w": 24, 918 | "x": 0, 919 | "y": 40 920 | }, 921 | "id": 10, 922 | "options": { 923 | "dedupStrategy": "none", 924 | "enableLogDetails": true, 925 | "prettifyLogMessage": false, 926 | "showCommonLabels": false, 927 | "showLabels": false, 928 | "showTime": true, 929 | "sortOrder": "Descending", 930 | "wrapLogMessage": false 931 | }, 932 | "targets": [ 933 | { 934 | "datasource": { 935 | "type": "loki", 936 | "uid": "loki" 937 | }, 938 | "editorMode": "code", 939 | "expr": "{service_name=\"$service\"} |= \"$filter\" | json | http_status=~\"$status_filter\" | http_method=~\"$method_filter\"", 940 | "queryType": "range", 941 | "refId": "A" 942 | } 943 | ], 944 | "title": "Raw Logs", 945 | "type": "logs" 946 | } 947 | ], 948 | "refresh": "10s", 949 | "schemaVersion": 41, 950 | "tags": ["monitoring", "logs", "metrics", "prometheus", "loki"], 951 | "templating": { 952 | "list": [ 953 | { 954 | "current": { 955 | "selected": false, 956 | "text": "generated-logs", 957 | "value": "generated-logs" 958 | }, 959 | "datasource": { 960 | "type": "loki", 961 | "uid": "loki" 962 | }, 963 | "definition": "", 964 | "hide": 0, 965 | "includeAll": false, 966 | "label": "Service", 967 | "multi": false, 968 | "name": "service", 969 | "options": [], 970 | "query": { 971 | "label": "service_name", 972 | "refId": "LokiVariableQueryEditor-VariableQuery", 973 | "stream": "", 974 | "type": 1 975 | }, 976 | "refresh": 1, 977 | "regex": "", 978 | "skipUrlSync": false, 979 | "sort": 0, 980 | "type": "query" 981 | }, 982 | { 983 | "current": { 984 | "selected": false, 985 | "text": "", 986 | "value": "" 987 | }, 988 | "hide": 0, 989 | "name": "filter", 990 | "options": [ 991 | { 992 | "selected": true, 993 | "text": "", 994 | "value": "" 995 | } 996 | ], 997 | "query": "", 998 | "skipUrlSync": false, 999 | "type": "textbox" 1000 | }, 1001 | { 1002 | "current": { 1003 | "selected": true, 1004 | "text": [".*"], 1005 | "value": [".*"] 1006 | }, 1007 | "hide": 0, 1008 | "includeAll": false, 1009 | "name": "status_filter", 1010 | "options": [ 1011 | { 1012 | "selected": true, 1013 | "text": ".*", 1014 | "value": ".*" 1015 | }, 1016 | { 1017 | "selected": false, 1018 | "text": "2.*", 1019 | "value": "2.*" 1020 | }, 1021 | { 1022 | "selected": false, 1023 | "text": "3.*", 1024 | "value": "3.*" 1025 | }, 1026 | { 1027 | "selected": false, 1028 | "text": "4.*", 1029 | "value": "4.*" 1030 | }, 1031 | { 1032 | "selected": false, 1033 | "text": "5.*", 1034 | "value": "5.*" 1035 | } 1036 | ], 1037 | "query": ".*,2.*,3.*,4.*,5.*", 1038 | "queryValue": "", 1039 | "skipUrlSync": false, 1040 | "type": "custom" 1041 | }, 1042 | { 1043 | "current": { 1044 | "selected": true, 1045 | "text": [".*"], 1046 | "value": [".*"] 1047 | }, 1048 | "hide": 0, 1049 | "includeAll": false, 1050 | "name": "method_filter", 1051 | "options": [ 1052 | { 1053 | "selected": true, 1054 | "text": ".*", 1055 | "value": ".*" 1056 | }, 1057 | { 1058 | "selected": false, 1059 | "text": "GET", 1060 | "value": "GET" 1061 | }, 1062 | { 1063 | "selected": false, 1064 | "text": "POST", 1065 | "value": "POST" 1066 | }, 1067 | { 1068 | "selected": false, 1069 | "text": "PUT", 1070 | "value": "PUT" 1071 | }, 1072 | { 1073 | "selected": false, 1074 | "text": "DELETE", 1075 | "value": "DELETE" 1076 | }, 1077 | { 1078 | "selected": false, 1079 | "text": "PATCH", 1080 | "value": "PATCH" 1081 | }, 1082 | { 1083 | "selected": false, 1084 | "text": "HEAD", 1085 | "value": "HEAD" 1086 | } 1087 | ], 1088 | "query": ".*,GET,POST,PUT,DELETE,PATCH,HEAD", 1089 | "queryValue": "", 1090 | "skipUrlSync": false, 1091 | "type": "custom" 1092 | } 1093 | ] 1094 | }, 1095 | "time": { 1096 | "from": "now-1h", 1097 | "to": "now" 1098 | }, 1099 | "timepicker": { 1100 | "refresh_intervals": [ 1101 | "5s", 1102 | "10s", 1103 | "30s", 1104 | "1m", 1105 | "5m", 1106 | "15m", 1107 | "30m", 1108 | "1h", 1109 | "2h", 1110 | "1d" 1111 | ] 1112 | }, 1113 | "timezone": "", 1114 | "title": "Application Monitoring & Logging Dashboard", 1115 | "uid": "app-monitoring-logs", 1116 | "version": 1, 1117 | "weekStart": "" 1118 | } --------------------------------------------------------------------------------