├── images ├── ooms-num.png ├── yaml-dev.jpg ├── ooms-stress.png ├── yaml-dev-life.jpg ├── logviewer-chronograf.png └── exploring-syslog-chronograf.png ├── from-logs-to-metrics.pdf ├── namespace.yaml ├── kapacitor ├── tasks │ └── example.tick └── kapacitor.conf ├── go.mod ├── docker-compose.yaml ├── stress.yaml ├── Dockerfile ├── roles.yaml ├── chronograf.yaml ├── influxdb.yaml ├── go.sum ├── README.md ├── kapacitor.yaml ├── main.go └── telelog.yaml /images/ooms-num.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leodido/logs2metrics-talk/master/images/ooms-num.png -------------------------------------------------------------------------------- /images/yaml-dev.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leodido/logs2metrics-talk/master/images/yaml-dev.jpg -------------------------------------------------------------------------------- /images/ooms-stress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leodido/logs2metrics-talk/master/images/ooms-stress.png -------------------------------------------------------------------------------- /from-logs-to-metrics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leodido/logs2metrics-talk/master/from-logs-to-metrics.pdf -------------------------------------------------------------------------------- /images/yaml-dev-life.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leodido/logs2metrics-talk/master/images/yaml-dev-life.jpg -------------------------------------------------------------------------------- /namespace.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Section: logging namespace 3 | apiVersion: v1 4 | kind: Namespace 5 | metadata: 6 | name: logging -------------------------------------------------------------------------------- /images/logviewer-chronograf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leodido/logs2metrics-talk/master/images/logviewer-chronograf.png -------------------------------------------------------------------------------- /images/exploring-syslog-chronograf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/leodido/logs2metrics-talk/master/images/exploring-syslog-chronograf.png -------------------------------------------------------------------------------- /kapacitor/tasks/example.tick: -------------------------------------------------------------------------------- 1 | dbrp "telegraf"."autogen" 2 | 3 | stream 4 | |from() 5 | .measurement('syslog') 6 | .truncate(1ms) 7 | .where(lambda: "appname" == 'kernel') 8 | .where(lambda: "message" =~ /sacrifice/) 9 | @example() 10 | |influxDBOut() 11 | .database('telegraf') 12 | .measurement('k8s') -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/leodido/logs2metrics-talk 2 | 3 | require ( 4 | github.com/davecgh/go-spew v1.1.1 5 | github.com/golang/protobuf v1.2.0 // indirect 6 | github.com/influxdata/kapacitor v1.5.1 7 | github.com/sirupsen/logrus v1.1.0 8 | golang.org/x/net v0.0.0-20180926154720-4dfa2610cdf3 // indirect 9 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f // indirect 10 | ) 11 | -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: "3.0" 2 | services: 3 | kapacitor: 4 | network_mode: host 5 | image: kapacitor:1.5 6 | volumes: 7 | - ./kapacitor:/opt/kapacitor 8 | - ./kapacitor/kapacitor.conf:/etc/kapacitor/kapacitor.conf 9 | - /tmp/example.sock:/tmp/example.sock 10 | environment: 11 | - KAPACITOR_LOAD_ENABLED=true 12 | - KAPACITOR_LOAD_DIR=/opt/kapacitor 13 | -------------------------------------------------------------------------------- /stress.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: Pod 4 | metadata: 5 | name: memory-stress 6 | namespace: logging 7 | spec: 8 | containers: 9 | - name: memory-stress 10 | image: polinux/stress 11 | resources: 12 | limits: 13 | memory: "200M" 14 | requests: 15 | memory: "50M" 16 | command: ["stress"] 17 | args: ["--vm", "1", "--vm-bytes", "250M", "--vm-hang", "1"] -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker.io/golang:1.11.0-stretch as bbbuilder 2 | ADD main.go /go/src/github.com/leodido/logs2metrics-talk/ 3 | ADD go.mod /go/src/github.com/leodido/logs2metrics-talk/ 4 | ADD go.sum /go/src/github.com/leodido/logs2metrics-talk/ 5 | WORKDIR /go/src/github.com/leodido/logs2metrics-talk 6 | ENV GO111MODULE on 7 | RUN go mod download 8 | RUN CGO_ENABLED=0 GOOS=linux go build -o /example . 9 | 10 | FROM scratch 11 | COPY --from=bbbuilder /example /example 12 | ENTRYPOINT ["/example"] -------------------------------------------------------------------------------- /kapacitor/kapacitor.conf: -------------------------------------------------------------------------------- 1 | data_dir = "/var/lib/kapacitor" 2 | 3 | [replay] 4 | dir = "/var/lib/kapacitor/replay" 5 | 6 | [storage] 7 | boltdb = "/var/lib/kapacitor/kapacitor.db" 8 | 9 | [udf] 10 | [udf.functions] 11 | [udf.functions.example] 12 | socket = "/tmp/example.sock" 13 | timeout = "10s" 14 | 15 | [logging] 16 | file = "STDOUT" 17 | level = "ERROR" 18 | 19 | [[influxdb]] 20 | enabled = true 21 | default = true 22 | name = "logging" 23 | urls = ["http://localhost:8086"] 24 | timeout = 0 25 | startup-timeout = "5m" 26 | 27 | [influxdb.subscriptions] 28 | telegraf = ["autogen"] -------------------------------------------------------------------------------- /roles.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Section: cluster role for mmkubernetes to be able to call some k8s enpoints 3 | apiVersion: rbac.authorization.k8s.io/v1 4 | kind: ClusterRole 5 | metadata: 6 | name: logging-clusterrole 7 | rules: 8 | - apiGroups: [""] 9 | resources: ["namespaces", "pods"] 10 | verbs: ["get", "list", "read"] 11 | --- 12 | # Section: cluster role binding 13 | apiVersion: rbac.authorization.k8s.io/v1 14 | kind: ClusterRoleBinding 15 | metadata: 16 | name: logging-custerrolebinding 17 | roleRef: 18 | apiGroup: rbac.authorization.k8s.io 19 | kind: ClusterRole 20 | name: logging-clusterrole 21 | subjects: 22 | - kind: ServiceAccount 23 | name: default 24 | namespace: logging -------------------------------------------------------------------------------- /chronograf.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Section: chronograf service 3 | apiVersion: v1 4 | kind: Service 5 | metadata: 6 | namespace: logging 7 | name: chronograf 8 | labels: 9 | component: chronograf 10 | app: chronograf 11 | spec: 12 | ports: 13 | - port: 80 14 | targetPort: 8888 15 | name: server 16 | selector: 17 | component: chronograf 18 | --- 19 | # Section: chronograf deployment 20 | apiVersion: apps/v1 21 | kind: Deployment 22 | metadata: 23 | namespace: logging 24 | name: chronograf 25 | labels: 26 | app: chronograf 27 | component: chronograf 28 | spec: 29 | strategy: 30 | type: "Recreate" 31 | selector: 32 | matchLabels: 33 | component: chronograf 34 | replicas: 1 35 | template: 36 | metadata: 37 | name: chronograf 38 | labels: 39 | app: chronograf 40 | component: chronograf 41 | spec: 42 | containers: 43 | - name: chronograf 44 | image: quay.io/influxdb/chronograf:nightly 45 | env: 46 | - name: RESOURCES_PATH 47 | value: "/usr/share/chronograf/resources" 48 | - name: LOG_LEVEL 49 | value: "error" 50 | ports: 51 | - containerPort: 8888 52 | name: server 53 | volumeMounts: 54 | - name: data 55 | mountPath: /var/lib/chronograf 56 | volumes: 57 | - name: data 58 | persistentVolumeClaim: 59 | claimName: chronograf 60 | --- 61 | # Section: chronograf persistent volume claim 62 | kind: PersistentVolumeClaim 63 | apiVersion: v1 64 | metadata: 65 | namespace: logging 66 | name: chronograf 67 | labels: 68 | app: chronograf 69 | component: chronograf 70 | spec: 71 | # storageClassName: "dynamic" 72 | accessModes: 73 | - "ReadWriteOnce" 74 | resources: 75 | requests: 76 | storage: 1Gi -------------------------------------------------------------------------------- /influxdb.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Section: influxdb service 3 | apiVersion: v1 4 | kind: Service 5 | metadata: 6 | namespace: logging 7 | name: influxdb 8 | labels: 9 | component: influxdb 10 | app: influxdb 11 | annotations: 12 | service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" 13 | spec: 14 | clusterIP: None 15 | ports: 16 | - port: 8086 17 | name: server 18 | selector: 19 | component: influxdb 20 | --- 21 | # Section: influxdb statefulset 22 | apiVersion: apps/v1 23 | kind: StatefulSet 24 | metadata: 25 | namespace: logging 26 | name: influxdb 27 | labels: 28 | component: influxdb 29 | app: influxdb 30 | spec: 31 | serviceName: influxdb 32 | selector: 33 | matchLabels: 34 | component: influxdb 35 | replicas: 1 36 | template: 37 | metadata: 38 | name: influxdb 39 | labels: 40 | component: influxdb 41 | app: influxdb 42 | spec: 43 | containers: 44 | - name: influxdb 45 | image: quay.io/influxdb/influxdb:nightly 46 | imagePullPolicy: Always 47 | resources: 48 | limits: 49 | memory: 2G 50 | requests: 51 | memory: 1G 52 | env: 53 | - name: INFLUXDB_IFQL_ENABLED 54 | value: "true" 55 | - name: INFLUXDB_LOGGING_LEVEL 56 | value: "error" 57 | - name: INFLUXDB_HTTP_LOG_ENABLED 58 | value: "false" 59 | volumeMounts: 60 | - name: data 61 | mountPath: /var/lib/influxdb 62 | ports: 63 | - containerPort: 8086 64 | name: server 65 | - containerPort: 8082 66 | name: ifql 67 | volumeClaimTemplates: 68 | - metadata: 69 | namespace: logging 70 | name: data 71 | spec: 72 | # storageClassName: "dynamic" 73 | accessModes: 74 | - "ReadWriteOnce" 75 | resources: 76 | requests: 77 | storage: 2Gi -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= 4 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 5 | github.com/influxdata/kapacitor v1.5.1 h1:zjG7iH1cnVYJvHg4WPSo4fdpj3rkkXeg7hALs8lpk/o= 6 | github.com/influxdata/kapacitor v1.5.1/go.mod h1:vv15yTwFBi1kUhCUrM/PGdfsbh5Y5F8BaCrdOg2S+d8= 7 | github.com/konsorten/go-windows-terminal-sequences v0.0.0-20180402223658-b729f2633dfe h1:CHRGQ8V7OlCYtwaKPJi3iA7J+YdNKdo8j7nG5IgDhjs= 8 | github.com/konsorten/go-windows-terminal-sequences v0.0.0-20180402223658-b729f2633dfe/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= 9 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 10 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 11 | github.com/sirupsen/logrus v1.1.0 h1:65VZabgUiV9ktjGM5nTq0+YurgTyX+YI2lSSfDjI+qU= 12 | github.com/sirupsen/logrus v1.1.0/go.mod h1:zrgwTnHtNr00buQ1vSptGe8m1f/BbgsPukg8qsT7A+A= 13 | github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1w= 14 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 15 | golang.org/x/crypto v0.0.0-20180904163835-0709b304e793 h1:u+LnwYTOOW7Ukr/fppxEb1Nwz0AtPflrblfvUudpo+I= 16 | golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 17 | golang.org/x/net v0.0.0-20180926154720-4dfa2610cdf3 h1:dgd4x4kJt7G4k4m93AYLzM8Ni6h2qLTfh9n9vXJT3/0= 18 | golang.org/x/net v0.0.0-20180926154720-4dfa2610cdf3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 19 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f h1:wMNYb4v58l5UBM7MYRLPG6ZhfOqbKu7X5eyFl8ZhKvA= 20 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 21 | golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33 h1:I6FyU15t786LL7oL/hn43zqTuEGr4PN7F4XJ1p4E3Y8= 22 | golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Talk: From logs to metric with the TICK stack 2 | 3 | [**Slides**](http://bit.ly/from-logs-to-metrics-tick). 4 | 5 | This repository contains the PoC associated with the talk "From logs to metric with the TICK stack". 6 | 7 | Its main goal is to show how to extract (structured) value from the huge amount of (unstructured) information that logs contain. 8 | 9 | In brief, the steps are as follows: parsing of syslog messages into structured data, ingesting/collecting them via Telegraf syslog input plugin, visualizing and plot them via Chronograf's log viewer, and eliciting new meaningful metrics (eg. number of process OOM killed) to plot processing them via a Kapacitor [UDF](https://docs.influxdata.com/kapacitor/v1.5/guides/socket_udf/). 10 | 11 | The stack used to achieve this is: 12 | 13 | - [Telegraf](https://github.com/influxdata/telegraf) with [syslog input plugin](https://github.com/influxdata/telegraf/tree/master/plugins/inputs/syslog), which uses this blazing fast [go-syslog](https://github.com/influxdata/go-syslog) parser 14 | - Chronograf 15 | - InfluxDB 16 | - [Kapacitor](https://github.com/influxdata/kapacitor) 17 | 18 | ![Chronograf Log Viewer](images/logviewer-chronograf.png "Chronograf Log Viewer") 19 | 20 | ![Exploring RFC5425 syslog messages with Chronograf](images/exploring-syslog-chronograf.png "Exploring RFC5425 syslog messages with Chronograf") 21 | 22 | ![Couting OOMs](images/ooms-num.png "Couting OOMs") 23 | 24 | ![Counting OOMs of stress pod](images/ooms-stress.png "Counting OOMs of stress pod") 25 | 26 | ## Setup 27 | 28 | First of all we need a local k8s environment. 29 | 30 | Let's proceed with minikube. 31 | 32 | ```bash 33 | minikube start --docker-opt log-driver=journald 34 | ``` 35 | 36 | Note that we need the **journald log driver** for the inner docker since the rsyslog's mmkubernetes module [only works with it](https://www.rsyslog.com/doc/master/configuration/modules/mmkubernetes.html) (or with **json-file docker log driver**). 37 | 38 | The following step is to become a YAML developer :hear_no_evil: :speak_no_evil:, applying all the YAML files describing our setup. 39 | 40 | | ![YAML meme](images/yaml-dev.jpg) | ![The life of a YAML developer](images/yaml-dev-life.jpg) | 41 | |:---:|:---:| 42 | 43 | Assuming your minikube setup is capable of provisioning volumes, execute the following commands. 44 | 45 | ```bash 46 | kubectl apply -f namespace.yaml 47 | kubectl apply -f roles.yaml 48 | kubectl apply -f influxdb.yaml 49 | kubectl apply -f telelog.yaml 50 | kubectl apply -f chronograf.yaml 51 | kubectl apply -f kapacitor.yaml 52 | kubectl apply -f stress.yaml 53 | ``` 54 | 55 | Finally to access Chronograf from within our local browser we need the following port forward. 56 | 57 | ```bash 58 | kubectl port-forward svc/chronograf -n logging 8888:80 59 | ``` 60 | 61 | Go to [localhost:8888](http://localhost:8888) now! 62 | 63 | ## Run with local up cluster 64 | 65 | _TBD_. 66 | 67 | ## Developing the Kapacitor UDF 68 | 69 | File `docker-compose.yaml` is useful during the development and debugging of the Kapacitor UDF. 70 | 71 | To make it working do not forget to forward the port of the influxdb within minikube. 72 | 73 | ```bash 74 | kubectl port-forward svc/influxdb -n logging 8686:8686 75 | ``` 76 | 77 | Then run 78 | 79 | ```bash 80 | docker-compose up -d 81 | ``` 82 | 83 | ## Other suitable docker log drivers 84 | 85 | It is possible to use this with **[syslog docker log driver](https://docs.docker.com/config/containers/logging/syslog/#options)** with following log options: 86 | 87 | - `syslog-format=rfc5424micro` 88 | - `syslog-address=udp://1.2.3.4:1111` (telegraf syslog plugin) 89 | 90 | In such case: 91 | 92 | - there is not need for rsyslog 93 | - telegraf syslog plugin in UDF mode (at the moment in TCP/TLS mode there is not way to disable octet framing requirement - ie., RFC5425) 94 | - syslog facility will be fixed (depending on the `syslog-facility` option) 95 | 96 | _TBD_: create an alternative setup for this setup. 97 | 98 | --- 99 | 100 | [![Analytics](https://ga-beacon.appspot.com/UA-49657176-1/logs2metrics-talk?flat)](https://github.com/igrigorik/ga-beacon) 101 | -------------------------------------------------------------------------------- /kapacitor.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: kapacitor-task 6 | namespace: logging 7 | data: 8 | example.tick: |+ 9 | dbrp "telegraf"."autogen" 10 | stream 11 | |from() 12 | .measurement('syslog') 13 | .truncate(1ms) 14 | .where(lambda: "appname" == 'kernel') 15 | .where(lambda: "message" =~ /sacrifice/) 16 | @example() 17 | |influxDBOut() 18 | .database('telegraf') 19 | .measurement('k8s') 20 | --- 21 | apiVersion: v1 22 | kind: ConfigMap 23 | metadata: 24 | name: kapacitor-config 25 | namespace: logging 26 | data: 27 | kapacitor.conf: |+ 28 | data_dir = "/var/lib/kapacitor" 29 | [replay] 30 | dir = "/var/lib/kapacitor/replay" 31 | [storage] 32 | boltdb = "/var/lib/kapacitor/kapacitor.db" 33 | [load] 34 | enabled = true 35 | dir = "/opt/kapacitor" 36 | [udf] 37 | [udf.functions] 38 | [udf.functions.example] 39 | socket = "/var/run/example.sock" 40 | timeout = "10s" 41 | [logging] 42 | file = "STDOUT" 43 | level = "ERROR" 44 | [[influxdb]] 45 | enabled = true 46 | default = true 47 | name = "logging" 48 | urls = ["http://localhost:8086"] 49 | timeout = 0 50 | startup-timeout = "5m" 51 | [influxdb.subscriptions] 52 | telegraf = ["autogen"] 53 | --- 54 | apiVersion: v1 55 | kind: Service 56 | metadata: 57 | name: kapacitor-example 58 | namespace: logging 59 | labels: 60 | app: kapacitor-example 61 | component: kapacitor-example 62 | spec: 63 | selector: 64 | component: kapacitor-example 65 | ports: 66 | - name: backend 67 | port: 9092 68 | protocol: TCP 69 | --- 70 | apiVersion: apps/v1 71 | kind: Deployment 72 | metadata: 73 | name: kapacitor-example 74 | namespace: logging 75 | labels: 76 | app: kapacitor-example 77 | annotations: 78 | kubernetes.io/hostname: kapacitor-example.logging.svc 79 | spec: 80 | replicas: 1 81 | strategy: 82 | type: Recreate 83 | selector: 84 | matchLabels: 85 | component: kapacitor-example 86 | template: 87 | metadata: 88 | labels: 89 | app: kapacitor-example 90 | component: kapacitor-example 91 | spec: 92 | volumes: 93 | - name: shared-socket 94 | emptyDir: {} 95 | - name: kapacitor-task 96 | configMap: 97 | name: kapacitor-task 98 | - name: kapacitor-config 99 | configMap: 100 | name: kapacitor-config 101 | - name: kapacitor-volclaim 102 | persistentVolumeClaim: 103 | claimName: kapacitor-volclaim 104 | containers: 105 | - name: kapacitor 106 | image: docker.io/kapacitor:1.5 107 | env: 108 | - name: "KAPACITOR_HOSTNAME" 109 | value: "kapacitor-example.logging.svc" 110 | - name: "KAPACITOR_INFLUXDB_0_URLS_0" 111 | value: "http://influxdb.logging.svc:8086" 112 | resources: 113 | limits: 114 | memory: 500M 115 | requests: 116 | memory: 250M 117 | volumeMounts: 118 | - name: shared-socket 119 | mountPath: /var/run 120 | - name: kapacitor-config 121 | mountPath: /etc/kapacitor 122 | - name: kapacitor-task 123 | mountPath: /opt/kapacitor/tasks 124 | - name: kapacitor-volclaim 125 | mountPath: /var/lib/kapacitor 126 | ports: 127 | - containerPort: 9092 128 | - name: example-udf 129 | imagePullPolicy: Always 130 | image: quay.io/leodido/example-udf:latest 131 | args: ["--socket", "/var/run/example.sock"] 132 | volumeMounts: 133 | - name: shared-socket 134 | mountPath: /var/run 135 | --- 136 | kind: PersistentVolumeClaim 137 | apiVersion: v1 138 | metadata: 139 | name: kapacitor-volclaim 140 | namespace: logging 141 | spec: 142 | # storageClassName: "dynamic" 143 | accessModes: 144 | - ReadWriteOnce 145 | resources: 146 | requests: 147 | storage: 1Gi -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/gob" 6 | "errors" 7 | "flag" 8 | "net" 9 | "os" 10 | "regexp" 11 | "strings" 12 | "syscall" 13 | "time" 14 | 15 | log "github.com/sirupsen/logrus" 16 | 17 | "github.com/influxdata/kapacitor/udf/agent" 18 | ) 19 | 20 | type myState struct { 21 | Counter int64 22 | } 23 | 24 | // Update the moving average with the next data point. 25 | func (s *myState) update() { 26 | s.Counter = s.Counter + 1 27 | } 28 | 29 | // An Agent.Handler that ... 30 | type handler struct { 31 | agent *agent.Agent 32 | state map[string]*myState 33 | } 34 | 35 | func newHandler(a *agent.Agent) *handler { 36 | return &handler{ 37 | state: make(map[string]*myState), 38 | agent: a, 39 | } 40 | } 41 | 42 | // Return the InfoResponse to describe this UDF agent. 43 | // 44 | // Note that it does not have any option. 45 | func (h *handler) Info() (*agent.InfoResponse, error) { 46 | info := &agent.InfoResponse{ 47 | Wants: agent.EdgeType_STREAM, 48 | Provides: agent.EdgeType_STREAM, 49 | Options: map[string]*agent.OptionInfo{}, 50 | } 51 | return info, nil 52 | } 53 | 54 | // Initialze the handler. 55 | func (h *handler) Init(r *agent.InitRequest) (*agent.InitResponse, error) { 56 | init := &agent.InitResponse{ 57 | Success: true, 58 | Error: "", 59 | } 60 | return init, nil 61 | } 62 | 63 | // This handler does not do batching. 64 | func (h *handler) BeginBatch(*agent.BeginBatch) error { 65 | return errors.New("batching not supported") 66 | } 67 | 68 | // This handler does not do batching. 69 | func (h *handler) EndBatch(*agent.EndBatch) error { 70 | return errors.New("batching not supported") 71 | } 72 | 73 | // Stop the handler gracefully. 74 | func (h *handler) Stop() { 75 | close(h.agent.Responses) 76 | } 77 | 78 | // Create a snapshot of the running state of the process. 79 | func (h *handler) Snapshot() (*agent.SnapshotResponse, error) { 80 | var buf bytes.Buffer 81 | enc := gob.NewEncoder(&buf) 82 | enc.Encode(h.state) 83 | 84 | return &agent.SnapshotResponse{ 85 | Snapshot: buf.Bytes(), 86 | }, nil 87 | } 88 | 89 | // Restore a previous snapshot. 90 | func (h *handler) Restore(req *agent.RestoreRequest) (*agent.RestoreResponse, error) { 91 | buf := bytes.NewReader(req.Snapshot) 92 | dec := gob.NewDecoder(buf) 93 | err := dec.Decode(&h.state) 94 | msg := "" 95 | if err != nil { 96 | msg = err.Error() 97 | } 98 | return &agent.RestoreResponse{ 99 | Success: err == nil, 100 | Error: msg, 101 | }, nil 102 | } 103 | 104 | func mapSubexpNames(m, n []string) map[string]string { 105 | m, n = m[1:], n[1:] 106 | r := make(map[string]string, len(m)) 107 | for i := range n { 108 | r[n[i]] = m[i] 109 | } 110 | return r 111 | } 112 | 113 | // Receive a point and do something with it. 114 | // Send a response with the average value. 115 | func (h *handler) Point(p *agent.Point) error { 116 | var r = regexp.MustCompile(`(?m).*Kill process (?P\d+) (?P\(.*\)) score (?P\d+)`) 117 | message, ok := p.FieldsString["message"] 118 | if ok { 119 | m := r.FindStringSubmatch(message) 120 | data := mapSubexpNames(m, r.SubexpNames()) 121 | 122 | proc := strings.Trim(data["proc"], "()") 123 | state := h.state[proc] 124 | if state == nil { 125 | state := &myState{Counter: 0} 126 | h.state[proc] = state 127 | } 128 | h.state[proc].update() 129 | 130 | newpoint := &agent.Point{ 131 | Time: time.Now().UnixNano(), 132 | Tags: map[string]string{ 133 | "proc": string(proc), 134 | "pid": string(data["pid"]), 135 | }, 136 | FieldsInt: map[string]int64{ 137 | "count": h.state[proc].Counter, 138 | }, 139 | } 140 | 141 | // Send point 142 | h.agent.Responses <- &agent.Response{ 143 | Message: &agent.Response_Point{ 144 | Point: newpoint, 145 | }, 146 | } 147 | } 148 | 149 | return nil 150 | } 151 | 152 | type accepter struct { 153 | count int64 154 | } 155 | 156 | // Create a new agent/handler for each new connection. 157 | // Count and log each new connection and termination. 158 | func (acc *accepter) Accept(conn net.Conn) { 159 | count := acc.count 160 | acc.count++ 161 | a := agent.New(conn, conn) 162 | h := newHandler(a) 163 | a.Handler = h 164 | 165 | log.WithField("connections", count).Info("Starting agent for connection") 166 | a.Start() 167 | go func() { 168 | err := a.Wait() 169 | if err != nil { 170 | log.Fatal(err) 171 | } 172 | log.WithField("connections", count).Info("Agent for connection finished") 173 | }() 174 | } 175 | 176 | var socketPath = flag.String("socket", "/tmp/example.sock", "Where to create the unix socket.") 177 | 178 | func main() { 179 | flag.Parse() 180 | addr, err := net.ResolveUnixAddr("unix", *socketPath) 181 | if err != nil { 182 | log.Fatal(err) 183 | } 184 | 185 | syscall.Unlink(*socketPath) 186 | 187 | l, err := net.ListenUnix("unix", addr) 188 | if err != nil { 189 | log.Fatal(err) 190 | } 191 | 192 | s := agent.NewServer(l, &accepter{}) 193 | 194 | s.StopOnSignals(os.Interrupt, syscall.SIGTERM) 195 | 196 | log.WithField("address", addr.String()).Infoln("Server listening") 197 | err = s.Serve() 198 | if err != nil { 199 | log.Fatal(err) 200 | } 201 | log.WithField("address", addr.String()).Infoln("Server stopped") 202 | } 203 | -------------------------------------------------------------------------------- /telelog.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Section: telegraf config map 3 | apiVersion: v1 4 | kind: ConfigMap 5 | metadata: 6 | name: telegraf 7 | namespace: logging 8 | labels: 9 | component: telegraf 10 | app: telegraf 11 | data: 12 | telegraf.conf: |+ 13 | [global_tags] 14 | env = "logging" 15 | [agent] 16 | interval = "10s" 17 | round_interval = true 18 | metric_batch_size = 1000 19 | metric_buffer_limit = 10000 20 | collection_jitter = "0s" 21 | flush_interval = "10s" 22 | flush_jitter = "0s" 23 | precision = "" 24 | debug = false 25 | quiet = false 26 | logfile = "" 27 | omit_hostname = true 28 | [[outputs.influxdb]] 29 | urls = ["http://influxdb:8086"] # required 30 | database = "telegraf" # required 31 | retention_policy = "autogen" 32 | write_consistency = "any" 33 | timeout = "1m" 34 | [[inputs.syslog]] 35 | server = "tcp://:6514" 36 | best_effort = true 37 | --- 38 | # Section: rsyslog config map 39 | apiVersion: v1 40 | kind: ConfigMap 41 | metadata: 42 | name: rsyslog 43 | namespace: logging 44 | labels: 45 | component: rsyslog 46 | app: rsyslog 47 | data: 48 | rsyslog.conf: |+ 49 | # This only works with the following docker logging drivers currently: journald, json-file, and CRI-O log files 50 | global(processInternalMessages="on") 51 | global(parser.permitSlashInProgramName="on") 52 | global(workDirectory="/var/spool/rsyslog") # default location for work (spool) files 53 | 54 | # Raise limits within /etc/systemd/journald.conf on the host(s) - ie., RateLimitIntervalSec=30s + RateLimitBurst=1000000 55 | module(load="imjournal" ignorepreviousmessages="on" ratelimit.interval="60" ratelimit.burst="2000000" persiststateinterval="10000" statefile="/var/spool/rsyslog/imjournal.state") 56 | 57 | module(load="mmutf8fix") 58 | module(load="mmkubernetes" 59 | tls.cacert="/run/secrets/kubernetes.io/serviceaccount/ca.crt" 60 | tokenfile="/run/secrets/kubernetes.io/serviceaccount/token" 61 | annotation_match=["."]) 62 | 63 | # Extracts k8s metadata 64 | action(type="mmkubernetes") 65 | 66 | # Compose k8s namespace and pod name into an app-name only when they are available 67 | template(name="k8s_app" type="list") { 68 | property(name="!kubernetes!namespace_name") 69 | constant(value="/") 70 | property(name="!kubernetes!pod_name") 71 | } 72 | set $!custom_appname = exec_template("k8s_app"); 73 | # Otherwise use the default app-name for journal entries not regarding k8s 74 | template(name="appname" type="list") { 75 | property(name="APP-NAME") 76 | } 77 | if $!custom_appname == "/" then { 78 | set $!custom_appname = exec_template("appname"); 79 | } 80 | if $!custom_appname startswith "rsyslogd-" then { 81 | set $!custom_appname = "rsyslogd"; 82 | } 83 | 84 | # Use the hostname for journal entries not regarding k8s 85 | template(name="hostname" type="list") { 86 | property(name="!_HOSTNAME") 87 | } 88 | set $!custom_hostname = exec_template("hostname"); 89 | # When empty it's because message does not come from journald but directly from rsyslogd 90 | if $!custom_hostname == "" then { 91 | set $!custom_hostname = "FROM-RSYSLOGD"; 92 | } 93 | 94 | # Create structured data containing k8s metadata 95 | template(name="k8s_cid" type="list") { 96 | property(name="!docker!container_id" position.from="1" position.to="12") 97 | } 98 | set $!custom_cid = exec_template("k8s_cid"); 99 | template(name="k8s_nid" type="list") { 100 | property(name="!kubernetes!namespace_id" position.from="1" position.to="12") 101 | } 102 | set $!custom_nid = exec_template("k8s_nid"); 103 | template(name="k8s_pid" type="list") { 104 | property(name="!kubernetes!pod_id" position.from="1" position.to="12") 105 | } 106 | set $!custom_pid = exec_template("k8s_pid"); 107 | 108 | template(name="k8s_component" type="list") { 109 | property(name="!kubernetes!labels!component" position.from="1" position.to="32") 110 | } 111 | set $!custom_component = exec_template("k8s_component"); 112 | template(name="k8s_crevision" type="list") { 113 | property(name="!kubernetes!labels!controller-revision-hash" position.from="1" position.to="32") 114 | } 115 | set $!custom_crevision = exec_template("k8s_crevision"); 116 | 117 | set $!custom_ids = ""; 118 | if $!custom_cid != "" then { 119 | set $!custom_ids = 'container="' & $!custom_cid & '"'; 120 | } 121 | if $!custom_nid != "" then { 122 | set $!custom_ids = $!custom_ids & ' namespace="' & $!custom_nid & '"'; 123 | } 124 | if $!custom_pid != "" then { 125 | set $!custom_ids = $!custom_ids & ' pod="' & $!custom_pid & '"'; 126 | } 127 | if $!custom_ids != "" then { 128 | set $!custom_ids = "[id " & $!custom_ids & "]"; 129 | } 130 | 131 | set $!custom_labels = ""; 132 | if $!custom_component != "" then { 133 | set $!custom_labels = 'component="' & $!custom_component & '"'; 134 | } 135 | if $!custom_crevision != "" then { 136 | set $!custom_labels = $!custom_labels & ' controller-revision-hash="' & $!custom_crevision & '"'; 137 | } 138 | if $!custom_labels != "" then { 139 | set $!custom_labels = "[label " & $!custom_labels & "]"; 140 | } 141 | 142 | template(name="c_sddata" type="list") { 143 | property(name="!custom_ids" compressspace="on") 144 | property(name="!custom_labels" compressspace="on") 145 | } 146 | template(name="sddata" type="list") { 147 | property(name="STRUCTURED-DATA") 148 | } 149 | if $!custom_labels == "" and $!custom_ids == "" then { 150 | set $!custom_sddata = exec_template("sddata"); 151 | } else { 152 | set $!custom_sddata = exec_template("c_sddata"); 153 | } 154 | 155 | # Compose RFC5424 message 156 | template(name="rfc5424" type="list") { 157 | constant(value="<") 158 | property(name="PRI") 159 | constant(value=">1 ") 160 | property(name="TIMESTAMP" dateFormat="rfc3339" date.inUTC="on") 161 | constant(value=" ") 162 | property(name="!custom_hostname" position.from="1" position.to="255" caseConversion="lower") 163 | constant(value=" ") 164 | property(name="!custom_appname" position.from="1" position.to="48" caseConversion="lower") 165 | constant(value=" ") 166 | property(name="PROCID" position.from="1" position.to="128") 167 | constant(value=" ") 168 | property(name="MSGID" position.from="1" position.to="32") 169 | constant(value=" ") 170 | property(name="!custom_sddata") 171 | constant(value=" ") 172 | property(name="msg" droplastlf="on") 173 | constant(value="\n") 174 | } 175 | 176 | action(type="mmutf8fix") 177 | 178 | action(type="omfwd" 179 | target="127.0.0.1" 180 | port="6514" 181 | protocol="tcp" 182 | tcp_framing="octet-counted" 183 | template="rfc5424" 184 | queue.type="LinkedList" 185 | queue.size="5000000" 186 | queue.filename="forwarding" 187 | queue.maxdiskspace="1g") 188 | 189 | # Uncomment do enable debug 190 | # action(type="omfile" file="/var/log/debuglog" template="RSYSLOG_DebugFormat") 191 | # action(type="omfile" file="/var/log/rfc_5424" template="rfc5424") 192 | --- 193 | # Section: telegraf + rsyslog daemon set 194 | apiVersion: apps/v1 195 | kind: DaemonSet 196 | metadata: 197 | name: telegraf 198 | namespace: logging 199 | labels: 200 | app: telegraf 201 | component: telegraf 202 | spec: 203 | selector: 204 | matchLabels: 205 | name: telegraf 206 | template: 207 | metadata: 208 | labels: 209 | name: telegraf 210 | spec: 211 | tolerations: 212 | - key: node-role.kubernetes.io/master 213 | effect: NoSchedule 214 | containers: 215 | - name: telegraf 216 | image: docker.io/telegraf:1.8.0-alpine 217 | resources: 218 | limits: 219 | memory: 500Mi 220 | requests: 221 | cpu: 500m 222 | memory: 500Mi 223 | env: 224 | - name: HOSTNAME 225 | valueFrom: 226 | fieldRef: 227 | fieldPath: spec.nodeName 228 | ports: 229 | - containerPort: 6514 230 | name: receiver 231 | volumeMounts: 232 | - name: docker-socket 233 | mountPath: /var/run/docker.sock 234 | - name: telegraf-config 235 | mountPath: /etc/telegraf 236 | - name: rsyslog 237 | image: quay.io/leodido/rsyslog:latest 238 | command: ["bash"] 239 | args: ["-c", "rsyslogd -n -f /etc/rsyslog/rsyslog.conf >/dev/null 2>&1"] 240 | volumeMounts: 241 | - name: journal-var 242 | mountPath: /var/log/journal 243 | readOnly: true 244 | - name: journal-run 245 | mountPath: /run/log/journal 246 | readOnly: true 247 | - name: journal-sys 248 | mountPath: /run/systemd/journal 249 | readOnly: true 250 | - name: machine-id 251 | mountPath: /etc/machine-id 252 | readOnly: true 253 | - name: rsyslog-config 254 | mountPath: /etc/rsyslog 255 | terminationGracePeriodSeconds: 30 256 | volumes: 257 | - hostPath: 258 | path: /var/run/docker.sock 259 | type: "" 260 | name: docker-socket 261 | - hostPath: 262 | path: /var/log/journal 263 | type: "" 264 | name: journal-var 265 | - hostPath: 266 | path: /run/log/journal 267 | type: "" 268 | name: journal-run 269 | - hostPath: 270 | path: /run/systemd/journal 271 | type: "" 272 | name: journal-sys 273 | - hostPath: 274 | path: /etc/machine-id 275 | type: "" 276 | name: machine-id 277 | - configMap: 278 | defaultMode: 420 279 | name: telegraf 280 | name: telegraf-config 281 | - configMap: 282 | defaultMode: 420 283 | name: rsyslog 284 | name: rsyslog-config --------------------------------------------------------------------------------