├── .github └── workflows │ ├── build.yml │ ├── image.yml │ └── release.yml ├── .gitignore ├── .goreleaser.yml ├── Dockerfile ├── Dockerfile.cloudbuild ├── LICENSE ├── Makefile ├── README.md ├── cloudbuild.release.yaml ├── cloudbuild.yaml ├── cmd ├── integration-tester │ └── main.go ├── load-generator │ └── main.go └── process-exporter │ └── main.go ├── collector └── process_collector.go ├── common.go ├── config ├── base_test.go ├── config.go └── config_test.go ├── fixtures ├── 14804 │ ├── cgroup │ ├── cmdline │ ├── comm │ ├── exe │ ├── fd │ │ ├── 0 │ │ ├── 1 │ │ ├── 2 │ │ ├── 3 │ │ └── 10 │ ├── io │ ├── limits │ ├── stat │ └── status ├── stat └── symlinktargets │ ├── README │ ├── abc │ ├── def │ ├── ghi │ ├── uvw │ └── xyz ├── go.mod ├── go.sum ├── packaging ├── conf │ └── all.yaml ├── default │ └── process-exporter ├── process-exporter.service └── scripts │ ├── postinstall.sh │ ├── postremove.sh │ └── preremove.sh └── proc ├── base_test.go ├── grouper.go ├── grouper_test.go ├── read.go ├── read_test.go ├── tracker.go └── tracker_test.go /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | on: [push] 2 | jobs: 3 | build: 4 | runs-on: ubuntu-latest 5 | steps: 6 | - name: setup Go 7 | uses: actions/setup-go@v2 8 | with: 9 | go-version: '1.23.8' 10 | 11 | - uses: actions/checkout@v4 12 | with: 13 | fetch-depth: 0 14 | 15 | - run: make style 16 | - run: make vet 17 | - run: make test 18 | - run: make BRANCH=${{ github.head_ref || github.ref_name }} build 19 | - run: make integ -------------------------------------------------------------------------------- /.github/workflows/image.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [master] 4 | tags: ['v*'] 5 | pull_request: 6 | 7 | jobs: 8 | image: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: setup buildx 12 | id: buildx 13 | uses: docker/setup-buildx-action@v3 14 | with: 15 | version: latest 16 | 17 | - name: login to docker hub 18 | if: github.event_name != 'pull_request' 19 | uses: docker/login-action@v1 20 | with: 21 | registry: docker.io 22 | username: ${{ secrets.DOCKER_USERNAME }} 23 | password: ${{ secrets.DOCKER_PASSWORD }} 24 | 25 | - uses: actions/checkout@v4 26 | with: 27 | fetch-depth: 0 28 | 29 | - name: generate docker metadata 30 | id: meta 31 | uses: docker/metadata-action@v5 32 | with: 33 | # list of Docker images to use as base name for tags 34 | images: | 35 | ncabatoff/process-exporter 36 | # generate Docker tags based on the following events/attributes 37 | tags: | 38 | type=ref,event=tag 39 | type=ref,event=branch 40 | type=ref,event=pr 41 | type=semver,pattern={{version}} 42 | type=sha 43 | 44 | - name: build docker image and, if not PR, push 45 | uses: docker/build-push-action@v5 46 | with: 47 | file: ./Dockerfile 48 | context: . 49 | platforms: linux/amd64,linux/arm/v6,linux/arm/v7,linux/arm64 50 | push: ${{ github.event_name != 'pull_request' }} 51 | tags: ${{ steps.meta.outputs.tags }} 52 | labels: ${{ steps.meta.outputs.labels }} -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | - v[0-9].* 5 | 6 | permissions: 7 | contents: write 8 | 9 | jobs: 10 | release: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: setup Go 19 | uses: actions/setup-go@v4 20 | with: 21 | go-version: '1.23.8' 22 | 23 | - run: make test 24 | 25 | - name: Run GoReleaser 26 | uses: goreleaser/goreleaser-action@v5 27 | with: 28 | distribution: goreleaser 29 | version: "~> 1.25" 30 | args: release --rm-dist 31 | env: 32 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | /process-exporter 3 | load-generator 4 | integration-tester 5 | dist 6 | /vendor 7 | /.vscode 8 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | builds: 2 | - main: cmd/process-exporter/main.go 3 | binary: process-exporter 4 | ldflags: 5 | - -s -w 6 | - -X github.com/prometheus/common/version.BuildDate={{.Date}} 7 | - -X github.com/prometheus/common/version.BuildUser=goreleaser 8 | - -X github.com/prometheus/common/version.Revision={{.FullCommit}} 9 | - -X main.version={{.Version}} 10 | env: 11 | - CGO_ENABLED=0 12 | goos: 13 | - linux 14 | goarch: 15 | - amd64 16 | - 386 17 | - arm 18 | - arm64 19 | - ppc64 20 | - ppc64le 21 | goarm: 22 | - 6 23 | - 7 24 | archives: 25 | - name_template: "process-exporter-{{ .Version }}.{{ .Os }}-{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}" 26 | wrap_in_directory: true 27 | checksum: 28 | name_template: checksums.txt 29 | nfpms: 30 | - homepage: https://github.com/ncabatoff/process-exporter 31 | maintainer: nick.cabatoff+procexp@gmail.com 32 | description: Prometheus exporter to report on processes running 33 | license: MIT 34 | formats: 35 | - deb 36 | - rpm 37 | bindir: /usr/bin 38 | contents: 39 | - src: packaging/process-exporter.service 40 | dst: /lib/systemd/system/process-exporter.service 41 | - src: packaging/conf/all.yaml 42 | dst: /etc/process-exporter/all.yaml 43 | type: config 44 | - src: packaging/default/process-exporter 45 | dst: /etc/default/process-exporter 46 | type: config 47 | 48 | scripts: 49 | postinstall: "packaging/scripts/postinstall.sh" 50 | postremove: "packaging/scripts/postremove.sh" 51 | preremove: "packaging/scripts/preremove.sh" 52 | release: 53 | github: 54 | owner: ncabatoff 55 | name: process-exporter 56 | draft: false 57 | prerelease: true 58 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Start from a Debian image with the latest version of Go installed 2 | # and a workspace (GOPATH) configured at /go. 3 | FROM --platform=$BUILDPLATFORM golang:1.23.8 AS build 4 | ARG TARGETARCH 5 | ARG BUILDPLATFORM 6 | WORKDIR /go/src/github.com/ncabatoff/process-exporter 7 | ADD . . 8 | 9 | # Build the process-exporter command inside the container. 10 | RUN CGO_ENABLED=0 GOARCH=$TARGETARCH make build 11 | 12 | FROM scratch 13 | 14 | COPY --from=build /go/src/github.com/ncabatoff/process-exporter/process-exporter /bin/process-exporter 15 | 16 | # Run the process-exporter command by default when the container starts. 17 | ENTRYPOINT ["/bin/process-exporter"] 18 | 19 | # Document that the service listens on port 9256. 20 | EXPOSE 9256 21 | -------------------------------------------------------------------------------- /Dockerfile.cloudbuild: -------------------------------------------------------------------------------- 1 | FROM scratch 2 | COPY gopath/bin/process-exporter /process-exporter 3 | ENTRYPOINT ["/process-exporter"] 4 | EXPOSE 9256 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 ncabatoff 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | pkgs = $(shell go list ./...) 2 | 3 | PREFIX ?= $(shell pwd) 4 | BIN_DIR ?= $(shell pwd) 5 | DOCKER_IMAGE_NAME ?= ncabatoff/process-exporter 6 | 7 | BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD) 8 | BUILDDATE ?= $(shell date --iso-8601=seconds) 9 | BUILDUSER ?= $(shell whoami)@$(shell hostname) 10 | REVISION ?= $(shell git rev-parse HEAD) 11 | TAG_VERSION ?= $(shell git describe --tags --abbrev=0) 12 | 13 | VERSION_LDFLAGS := \ 14 | -X github.com/prometheus/common/version.Branch=$(BRANCH) \ 15 | -X github.com/prometheus/common/version.BuildDate=$(BUILDDATE) \ 16 | -X github.com/prometheus/common/version.BuildUser=$(BUILDUSER) \ 17 | -X github.com/prometheus/common/version.Revision=$(REVISION) \ 18 | -X main.version=$(TAG_VERSION) 19 | 20 | SMOKE_TEST = -config.path packaging/conf/all.yaml -once-to-stdout-delay 1s |grep -q 'namedprocess_namegroup_memory_bytes{groupname="process-exporte",memtype="virtual"}' 21 | 22 | all: format vet test build smoke 23 | 24 | style: 25 | @echo ">> checking code style" 26 | @! gofmt -d $(shell find . -name '*.go' -print) | grep '^' 27 | 28 | test: 29 | @echo ">> running short tests" 30 | go test -short $(pkgs) 31 | 32 | format: 33 | @echo ">> formatting code" 34 | go fmt $(pkgs) 35 | 36 | vet: 37 | @echo ">> vetting code" 38 | go vet $(pkgs) 39 | 40 | build: 41 | @echo ">> building code" 42 | cd cmd/process-exporter; CGO_ENABLED=0 go build -ldflags "$(VERSION_LDFLAGS)" -o ../../process-exporter -a -tags netgo 43 | 44 | smoke: 45 | @echo ">> smoke testing process-exporter" 46 | ./process-exporter $(SMOKE_TEST) 47 | 48 | integ: 49 | @echo ">> integration testing process-exporter" 50 | go build -o integration-tester cmd/integration-tester/main.go 51 | go build -o load-generator cmd/load-generator/main.go 52 | ./integration-tester -write-size-bytes 65536 53 | 54 | install: 55 | @echo ">> installing binary" 56 | cd cmd/process-exporter; CGO_ENABLED=0 go install -a -tags netgo 57 | 58 | docker: 59 | @echo ">> building docker image" 60 | docker build -t "$(DOCKER_IMAGE_NAME):$(TAG_VERSION)" . 61 | docker rm configs 62 | docker create -v /packaging --name configs alpine:3.4 /bin/true 63 | docker cp packaging/conf configs:/packaging/conf 64 | docker run --rm --volumes-from configs "$(DOCKER_IMAGE_NAME):$(TAG_VERSION)" $(SMOKE_TEST) 65 | 66 | dockertest: 67 | docker run --rm -it -v `pwd`:/go/src/github.com/ncabatoff/process-exporter golang:1.23.8 make -C /go/src/github.com/ncabatoff/process-exporter test 68 | 69 | dockerinteg: 70 | docker run --rm -it -v `pwd`:/go/src/github.com/ncabatoff/process-exporter golang:1.23.8 make -C /go/src/github.com/ncabatoff/process-exporter build integ 71 | 72 | .PHONY: update-go-deps 73 | update-go-deps: 74 | @echo ">> updating Go dependencies" 75 | @for m in $$(go list -mod=readonly -m -f '{{ if and (not .Indirect) (not .Main)}}{{.Path}}{{end}}' all); do \ 76 | go get $$m; \ 77 | done 78 | go mod tidy 79 | 80 | .PHONY: all style format test vet build integ docker 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # process-exporter 2 | Prometheus exporter that mines /proc to report on selected processes. 3 | 4 | [release]: https://github.com/ncabatoff/process-exporter/releases/latest 5 | 6 | [![Release](https://img.shields.io/github/release/ncabatoff/process-exporter.svg?style=flat-square")][release] 7 | [![Powered By: GoReleaser](https://img.shields.io/badge/powered%20by-goreleaser-green.svg?branch=master)](https://github.com/goreleaser) 8 | ![Build](https://github.com/ncabatoff/process-exporter/actions/workflows/build.yml/badge.svg) 9 | 10 | Some apps are impractical to instrument directly, either because you 11 | don't control the code or they're written in a language that isn't easy to 12 | instrument with Prometheus. We must instead resort to mining /proc. 13 | 14 | ## Installation 15 | 16 | Either grab a package for your OS from the [Releases][release] page, or 17 | install via [docker](https://hub.docker.com/r/ncabatoff/process-exporter/). 18 | 19 | ## Running 20 | 21 | Usage: 22 | 23 | ``` 24 | process-exporter [options] -config.path filename.yml 25 | ``` 26 | 27 | or via docker: 28 | 29 | ``` 30 | docker run -d --rm -p 9256:9256 --privileged -v /proc:/host/proc -v `pwd`:/config ncabatoff/process-exporter --procfs /host/proc -config.path /config/filename.yml 31 | 32 | ``` 33 | 34 | Important options (run process-exporter --help for full list): 35 | 36 | -children (default:true) makes it so that any process that otherwise 37 | isn't part of its own group becomes part of the first group found (if any) when 38 | walking the process tree upwards. In other words, resource usage of 39 | subprocesses is added to their parent's usage unless the subprocess identifies 40 | as a different group name. 41 | 42 | -threads (default:true) means that metrics will be broken down by thread name 43 | as well as group name. 44 | 45 | -recheck (default:false) means that on each scrape the process names are 46 | re-evaluated. This is disabled by default as an optimization, but since 47 | processes can choose to change their names, this may result in a process 48 | falling into the wrong group if we happen to see it for the first time before 49 | it's assumed its proper name. You can use -recheck-with-time-limit to enable this 50 | feature only for a specific duration after process starts. 51 | 52 | -procnames is intended as a quick alternative to using a config file. Details 53 | in the following section. 54 | 55 | -remove-empty-groups (default:false) forget process groups with no processes. 56 | This is particularly useful if you have some process groups that you expect will 57 | never return (e.g. if you have process groups named "scan-", and once 58 | the scan is completed no more process will ever run for that scan again). 59 | 60 | To disable any of these options, use the `-option=false`. 61 | 62 | ## Configuration and group naming 63 | 64 | To select and group the processes to monitor, either provide command-line 65 | arguments or use a YAML configuration file. 66 | 67 | The recommended option is to use a config file via -config.path, but for 68 | convenience and backwards compatibility the -procnames/-namemapping options 69 | exist as an alternative. 70 | 71 | ### Using a config file 72 | 73 | The general format of the -config.path YAML file is a top-level 74 | `process_names` section, containing a list of name matchers: 75 | 76 | ``` 77 | process_names: 78 | - matcher1 79 | - matcher2 80 | ... 81 | - matcherN 82 | ``` 83 | 84 | The default config shipped with the deb/rpm packages is: 85 | 86 | ``` 87 | process_names: 88 | - name: "{{.Comm}}" 89 | cmdline: 90 | - '.+' 91 | ``` 92 | 93 | A process may only belong to one group: even if multiple items would match, the 94 | first one listed in the file wins. 95 | 96 | (Side note: to avoid confusion with the cmdline YAML element, we'll refer to 97 | the command-line arguments of a process `/proc//cmdline` as the array 98 | `argv[]`.) 99 | 100 | #### Using a config file: group name 101 | 102 | Each item in `process_names` gives a recipe for identifying and naming 103 | processes. The optional `name` tag defines a template to use to name 104 | matching processes; if not specified, `name` defaults to `{{.ExeBase}}`. 105 | 106 | Template variables available: 107 | - `{{.Comm}}` contains the basename of the original executable, i.e. 2nd field in `/proc//stat` 108 | - `{{.ExeBase}}` contains the basename of the executable 109 | - `{{.ExeFull}}` contains the fully qualified path of the executable 110 | - `{{.Username}}` contains the username of the effective user 111 | - `{{.Matches}}` map contains all the matches resulting from applying cmdline regexps 112 | - `{{.PID}}` contains the PID of the process. Note that using PID means the group 113 | will only contain a single process. 114 | - `{{.StartTime}}` contains the start time of the process. This can be useful 115 | in conjunction with PID because PIDs get reused over time. 116 | - `{{.Cgroups}}` contains (if supported) the cgroups of the process 117 | (`/proc/self/cgroup`). This is particularly useful for identifying to which container 118 | a process belongs. 119 | 120 | Using `PID` or `StartTime` is discouraged: this is almost never what you want, 121 | and is likely to result in high cardinality metrics which Prometheus will have 122 | trouble with. 123 | 124 | #### Using a config file: process selectors 125 | 126 | Each item in `process_names` must contain one or more selectors (`comm`, `exe` 127 | or `cmdline`); if more than one selector is present, they must all match. Each 128 | selector is a list of strings to match against a process's `comm`, `argv[0]`, 129 | or in the case of `cmdline`, a regexp to apply to the command line. The cmdline 130 | regexp uses the [Go syntax](https://golang.org/pkg/regexp). 131 | 132 | For `comm` and `exe`, the list of strings is an OR, meaning any process 133 | matching any of the strings will be added to the item's group. 134 | 135 | For `cmdline`, the list of regexes is an AND, meaning they all must match. Any 136 | capturing groups in a regexp must use the `?P` option to assign a name to 137 | the capture, which is used to populate `.Matches`. 138 | 139 | Performance tip: give an exe or comm clause in addition to any cmdline 140 | clause, so you avoid executing the regexp when the executable name doesn't 141 | match. 142 | 143 | ``` 144 | 145 | process_names: 146 | # comm is the second field of /proc//stat minus parens. 147 | # It is the base executable name, truncated at 15 chars. 148 | # It cannot be modified by the program, unlike exe. 149 | - comm: 150 | - bash 151 | 152 | # exe is argv[0]. If no slashes, only basename of argv[0] need match. 153 | # If exe contains slashes, argv[0] must match exactly. 154 | - exe: 155 | - postgres 156 | - /usr/local/bin/prometheus 157 | 158 | # cmdline is a list of regexps applied to argv. 159 | # Each must match, and any captures are added to the .Matches map. 160 | - name: "{{.ExeFull}}:{{.Matches.Cfgfile}}" 161 | exe: 162 | - /usr/local/bin/process-exporter 163 | cmdline: 164 | - -config.path\s+(?P\S+) 165 | 166 | ``` 167 | 168 | Here's the config I use on my home machine: 169 | 170 | ``` 171 | 172 | process_names: 173 | - comm: 174 | - chromium-browse 175 | - bash 176 | - prometheus 177 | - gvim 178 | - exe: 179 | - /sbin/upstart 180 | cmdline: 181 | - --user 182 | name: upstart:-user 183 | 184 | ``` 185 | 186 | ### Using -procnames/-namemapping instead of config.path 187 | 188 | Every name in the procnames list becomes a process group. The default name of 189 | a process is the value found in the second field of /proc//stat 190 | ("comm"), which is truncated at 15 chars. Usually this is the same as the 191 | name of the executable. 192 | 193 | If -namemapping isn't provided, every process with a comm value present 194 | in -procnames is assigned to a group based on that name, and any other 195 | processes are ignored. 196 | 197 | The -namemapping option is a comma-separated list of alternating 198 | name,regexp values. It allows assigning a name to a process based on a 199 | combination of the process name and command line. For example, using 200 | 201 | -namemapping "python2,([^/]+)\.py,java,-jar\s+([^/]+).jar" 202 | 203 | will make it so that each different python2 and java -jar invocation will be 204 | tracked with distinct metrics. Processes whose remapped name is absent from 205 | the procnames list will be ignored. On a Ubuntu Xenian machine being used as 206 | a workstation, here's a good way of tracking resource usage for a few 207 | different key user apps: 208 | 209 | process-exporter -namemapping "upstart,(--user)" \ 210 | -procnames chromium-browse,bash,gvim,prometheus,process-exporter,upstart:-user 211 | 212 | Since upstart --user is the parent process of the X11 session, this will 213 | make all apps started by the user fall into the group named "upstart:-user", 214 | unless they're one of the others named explicitly with -procnames, like gvim. 215 | 216 | ## Group Metrics 217 | 218 | There's no meaningful way to name a process that will only ever name a single process, so process-exporter assumes that every metric will be attached 219 | to a group of processes - not a 220 | [process group](https://en.wikipedia.org/wiki/Process_group) in the technical 221 | sense, just one or more processes that meet a configuration's specification 222 | of what should be monitored and how to name it. 223 | 224 | All these metrics start with `namedprocess_namegroup_` and have at minimum 225 | the label `groupname`. 226 | 227 | ### num_procs gauge 228 | 229 | Number of processes in this group. 230 | 231 | ### cpu_seconds_total counter 232 | 233 | CPU usage based on /proc/[pid]/stat fields utime(14) and stime(15) i.e. user and system time. This is similar to the node\_exporter's `node_cpu_seconds_total`. 234 | 235 | ### read_bytes_total counter 236 | 237 | Bytes read based on /proc/[pid]/io field read_bytes. The man page 238 | says 239 | 240 | > Attempt to count the number of bytes which this process really did cause to be fetched from the storage layer. This is accurate for block-backed filesystems. 241 | 242 | but I would take it with a grain of salt. 243 | 244 | As `/proc/[pid]/io` are set by the kernel as read only to the process' user (see #137), to get these values you should run `process-exporter` either as that user or as `root`. Otherwise, we can't read these values and you'll get a constant 0 in the metric. 245 | 246 | ### write_bytes_total counter 247 | 248 | Bytes written based on /proc/[pid]/io field write_bytes. As with 249 | read_bytes, somewhat dubious. May be useful for isolating which processes 250 | are doing the most I/O, but probably not measuring just how much I/O is happening. 251 | 252 | ### major_page_faults_total counter 253 | 254 | Number of major page faults based on /proc/[pid]/stat field majflt(12). 255 | 256 | ### minor_page_faults_total counter 257 | 258 | Number of minor page faults based on /proc/[pid]/stat field minflt(10). 259 | 260 | ### context_switches_total counter 261 | 262 | Number of context switches based on /proc/[pid]/status fields voluntary_ctxt_switches 263 | and nonvoluntary_ctxt_switches. The extra label `ctxswitchtype` can have two values: 264 | `voluntary` and `nonvoluntary`. 265 | 266 | ### memory_bytes gauge 267 | 268 | Number of bytes of memory used. The extra label `memtype` can have three values: 269 | 270 | *resident*: Field rss(24) from /proc/[pid]/stat, whose doc says: 271 | 272 | > This is just the pages which count toward text, data, or stack space. This does not include pages which have not been demand-loaded in, or which are swapped out. 273 | 274 | *virtual*: Field vsize(23) from /proc/[pid]/stat, virtual memory size. 275 | 276 | *swapped*: Field VmSwap from /proc/[pid]/status, translated from KB to bytes. 277 | 278 | If gathering smaps file is enabled, two additional values for `memtype` are added: 279 | 280 | *proportionalResident*: Sum of "Pss" fields from /proc/[pid]/smaps, whose doc says: 281 | 282 | > The "proportional set size" (PSS) of a process is the count of pages it has 283 | > in memory, where each page is divided by the number of processes sharing it. 284 | 285 | *proportionalSwapped*: Sum of "SwapPss" fields from /proc/[pid]/smaps 286 | 287 | ### open_filedesc gauge 288 | 289 | Number of file descriptors, based on counting how many entries are in the directory 290 | /proc/[pid]/fd. 291 | 292 | ### worst_fd_ratio gauge 293 | 294 | Worst ratio of open filedescs to filedesc limit, amongst all the procs in the 295 | group. The limit is the fd soft limit based on /proc/[pid]/limits. 296 | 297 | Normally Prometheus metrics ought to be as "basic" as possible (i.e. the raw 298 | values rather than a derived ratio), but we use a ratio here because nothing 299 | else makes sense. Suppose there are 10 procs in a given group, each with a 300 | soft limit of 4096, and one of them has 4000 open fds and the others all have 301 | 40, their total fdcount is 4360 and total soft limit is 40960, so the ratio 302 | is 1:10, but in fact one of the procs is about to run out of fds. With 303 | worst_fd_ratio we're able to know this: in the above example it would be 304 | 0.97, rather than the 0.10 you'd see if you computed sum(open_filedesc) / 305 | sum(limit_filedesc). 306 | 307 | ### oldest_start_time_seconds gauge 308 | 309 | Epoch time (seconds since 1970/1/1) at which the oldest process in the group 310 | started. This is derived from field starttime(22) from /proc/[pid]/stat, added 311 | to boot time to make it relative to epoch. 312 | 313 | ### num_threads gauge 314 | 315 | Sum of number of threads of all process in the group. Based on field num_threads(20) 316 | from /proc/[pid]/stat. 317 | 318 | ### states gauge 319 | 320 | Number of threads in the group in each of various states, based on the field 321 | state(3) from /proc/[pid]/stat. 322 | 323 | The extra label `state` can have these values: `Running`, `Sleeping`, `Waiting`, `Zombie`, `Other`. 324 | 325 | ## Group Thread Metrics 326 | 327 | Since publishing thread metrics adds a lot of overhead, use the `-threads` command-line argument to disable them, 328 | if necessary. 329 | 330 | All these metrics start with `namedprocess_namegroup_` and have at minimum 331 | the labels `groupname` and `threadname`. `threadname` is field comm(2) from 332 | /proc/[pid]/stat. Just as groupname breaks the set of processes down into 333 | groups, threadname breaks a given process group down into subgroups. 334 | 335 | ### thread_count gauge 336 | 337 | Number of threads in this thread subgroup. 338 | 339 | ### thread_cpu_seconds_total counter 340 | 341 | Same as cpu_user_seconds_total and cpu_system_seconds_total, but broken down 342 | per-thread subgroup. Unlike cpu_user_seconds_total/cpu_system_seconds_total, 343 | the label `cpumode` is used to distinguish between `user` and `system` time. 344 | 345 | ### thread_io_bytes_total counter 346 | 347 | Same as read_bytes_total and write_bytes_total, but broken down 348 | per-thread subgroup. Unlike read_bytes_total/write_bytes_total, 349 | the label `iomode` is used to distinguish between `read` and `write` bytes. 350 | 351 | ### thread_major_page_faults_total counter 352 | 353 | Same as major_page_faults_total, but broken down per-thread subgroup. 354 | 355 | ### thread_minor_page_faults_total counter 356 | 357 | Same as minor_page_faults_total, but broken down per-thread subgroup. 358 | 359 | ### thread_context_switches_total counter 360 | 361 | Same as context_switches_total, but broken down per-thread subgroup. 362 | 363 | ## Instrumentation cost 364 | 365 | process-exporter will consume CPU in proportion to the number of processes in 366 | the system and the rate at which new ones are created. The most expensive 367 | parts - applying regexps and executing templates - are only applied once per 368 | process seen, unless the command-line option -recheck is provided. 369 | 370 | If you have mostly long-running processes process-exporter overhead should be 371 | minimal: each time a scrape occurs, it will parse of /proc/$pid/stat and 372 | /proc/$pid/cmdline for every process being monitored and add a few numbers. 373 | 374 | ## Dashboards 375 | 376 | An example Grafana dashboard to view the metrics is available at https://grafana.net/dashboards/249 377 | 378 | ## Building 379 | 380 | Requires Go 1.21 (at least) installed. 381 | ``` 382 | make 383 | ``` 384 | 385 | ## Exposing metrics through HTTPS 386 | 387 | web-config.yml 388 | ``` 389 | # Minimal TLS configuration example. Additionally, a certificate and a key file 390 | # are needed. 391 | tls_server_config: 392 | cert_file: server.crt 393 | key_file: server.key 394 | ``` 395 | Running 396 | ``` 397 | $ ./process-exporter -web.config.file web-config.yml & 398 | $ curl -sk https://localhost:9256/metrics | grep process 399 | 400 | # HELP namedprocess_scrape_errors general scrape errors: no proc metrics collected during a cycle 401 | # TYPE namedprocess_scrape_errors counter 402 | namedprocess_scrape_errors 0 403 | # HELP namedprocess_scrape_partial_errors incremented each time a tracked proc's metrics collection fails partially, e.g. unreadable I/O stats 404 | # TYPE namedprocess_scrape_partial_errors counter 405 | namedprocess_scrape_partial_errors 0 406 | # HELP namedprocess_scrape_procread_errors incremented each time a proc's metrics collection fails 407 | # TYPE namedprocess_scrape_procread_errors counter 408 | namedprocess_scrape_procread_errors 0 409 | # HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. 410 | # TYPE process_cpu_seconds_total counter 411 | process_cpu_seconds_total 0.21 412 | # HELP process_exporter_build_info A metric with a constant '1' value labeled by version, revision, branch, and goversion from which process_exporter was built. 413 | # TYPE process_exporter_build_info gauge 414 | process_exporter_build_info{branch="",goversion="go1.17.3",revision="",version=""} 1 415 | # HELP process_max_fds Maximum number of open file descriptors. 416 | # TYPE process_max_fds gauge 417 | process_max_fds 1.048576e+06 418 | # HELP process_open_fds Number of open file descriptors. 419 | # TYPE process_open_fds gauge 420 | process_open_fds 10 421 | ``` 422 | 423 | For further information about TLS configuration, please visit: [exporter-toolkit](https://github.com/prometheus/exporter-toolkit/blob/master/docs/web-configuration.md) 424 | -------------------------------------------------------------------------------- /cloudbuild.release.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | # - name: string 3 | # args: string 4 | # env: string 5 | # dir: string 6 | # id: string 7 | # waitFor: string 8 | # entrypoint: string 9 | # secretEnv: string 10 | 11 | # Setup the workspace 12 | - name: gcr.io/cloud-builders/go 13 | env: ['PROJECT_ROOT=github.com/ncabatoff/process-exporter'] 14 | args: ['env'] 15 | 16 | # Build project 17 | - name: gcr.io/cloud-builders/docker 18 | entrypoint: 'bash' 19 | args: ['-c', 'docker build -t ncabatoff/process-exporter:`echo $TAG_NAME|sed s/^v//` .'] 20 | 21 | # Login to docker hub 22 | - name: gcr.io/cloud-builders/docker 23 | entrypoint: 'bash' 24 | args: ['-c', 'docker login --username=ncabatoff --password=$$DOCKER_PASSWORD'] 25 | secretEnv: ['DOCKER_PASSWORD'] 26 | 27 | # Push to docker hub 28 | - name: gcr.io/cloud-builders/docker 29 | entrypoint: 'bash' 30 | args: ['-c', 'docker push ncabatoff/process-exporter:`echo $TAG_NAME|sed s/^v//`'] 31 | 32 | # Create github release 33 | - name: goreleaser/goreleaser 34 | entrypoint: /bin/sh 35 | dir: gopath/src/github.com 36 | env: ['GOPATH=/workspace/gopath'] 37 | args: ['-c', 'cd ncabatoff/process-exporter && git tag $TAG_NAME && /goreleaser' ] 38 | secretEnv: ['GITHUB_TOKEN'] 39 | 40 | secrets: 41 | - kmsKeyName: projects/process-exporter/locations/global/keyRings/cloudbuild/cryptoKeys/mykey 42 | secretEnv: 43 | DOCKER_PASSWORD: | 44 | CiQAeHUuEinm1h2j9mp8r0NjPw1l1bBwzDG+JHPUPf3GvtmdjXESMAD3wUauaxWrxid/zPunG67x 45 | 5+1CYedV5exh0XwQ32eu4UkniS7HHJNWBudklaG0JA== 46 | GITHUB_TOKEN: | 47 | CiQAeHUuEhEKAvfIHlUZrCgHNScm0mDKI8Z1w/N3OzDk8Ql6kAUSUQD3wUau7qRc+H7OnTUo6b2Z 48 | DKA1eMKHNg729KfHj2ZMqZXinrJloYMbZcZRXP9xv91xCq6QJB5UoFoyYDnXGdvgXC08YUstR6UB 49 | H0bwHhe1GQ== 50 | -------------------------------------------------------------------------------- /cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | # - name: string 3 | # args: string 4 | # env: string 5 | # dir: string 6 | # id: string 7 | # waitFor: string 8 | # entrypoint: string 9 | # secretEnv: string 10 | # - name: gcr.io/cloud-builders/curl 11 | # args: ['-L', '-s', '-o', 'dep', 'https://github.com/golang/dep/releases/download/v0.5.0/dep-linux-amd64'] 12 | # - name: ubuntu 13 | # args: ['chmod', '+x', 'dep'] 14 | # Setup the workspace 15 | - name: gcr.io/cloud-builders/go 16 | env: ['PROJECT_ROOT=github.com/ncabatoff/process-exporter'] 17 | args: ['env'] 18 | # Run dep in the workspace created in previous step 19 | # - name: gcr.io/cloud-builders/go 20 | # entrypoint: /bin/sh 21 | # dir: gopath/src/github.com 22 | # env: ['GOPATH=/workspace/gopath'] 23 | # args: ['-c', 'cd ncabatoff/process-exporter && /workspace/dep ensure -vendor-only' ] 24 | - name: gcr.io/cloud-builders/go 25 | entrypoint: /bin/sh 26 | dir: gopath/src/github.com 27 | env: ['GOPATH=/workspace/gopath'] 28 | args: ['-c', 'make -C ncabatoff/process-exporter style vet test build integ install' ] 29 | - name: gcr.io/cloud-builders/docker 30 | args: ['build', '--tag=gcr.io/$PROJECT_ID/process-exporter', '.', '-f', 'Dockerfile.cloudbuild'] 31 | images: ['gcr.io/$PROJECT_ID/process-exporter'] 32 | -------------------------------------------------------------------------------- /cmd/integration-tester/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "context" 7 | "flag" 8 | "fmt" 9 | "log" 10 | "os" 11 | "os/exec" 12 | "path/filepath" 13 | "strconv" 14 | "strings" 15 | "time" 16 | ) 17 | 18 | // You wouldn't think our child could start before us, but I have observed it; maybe due to rounding? 19 | var start = time.Now().Unix() - 1 20 | 21 | func main() { 22 | var ( 23 | flagProcessExporter = flag.String("process-exporter", "./process-exporter", "path to process-exporter") 24 | flagLoadGenerator = flag.String("load-generator", "./load-generator", "path to load-generator") 25 | flagAttempts = flag.Int("attempts", 3, "try this many times before returning failure") 26 | flagWriteSizeBytes = flag.Int("write-size-bytes", 1024*1024, "how many bytes to write each cycle") 27 | ) 28 | flag.Parse() 29 | 30 | ctx, cancel := context.WithCancel(context.Background()) 31 | defer cancel() 32 | cmdlg := exec.CommandContext(ctx, *flagLoadGenerator, "-write-size-bytes", strconv.Itoa(*flagWriteSizeBytes)) 33 | var buf = &bytes.Buffer{} 34 | cmdlg.Stdout = buf 35 | err := cmdlg.Start() 36 | if err != nil { 37 | log.Fatalf("Error launching load generator %q: %v", *flagLoadGenerator, err) 38 | } 39 | for !strings.HasPrefix(buf.String(), "ready") { 40 | time.Sleep(time.Second) 41 | } 42 | 43 | success := false 44 | for i := 0; i < *flagAttempts; i++ { 45 | comm := filepath.Base(*flagLoadGenerator) 46 | cmdpe := exec.CommandContext(ctx, *flagProcessExporter, "-once-to-stdout-delay", "20s", 47 | "-procnames", comm, "-threads=true") 48 | out, err := cmdpe.Output() 49 | if err != nil { 50 | log.Fatalf("Error launching process-exporter %q: %v", *flagProcessExporter, err) 51 | } 52 | log.Println(string(out)) 53 | 54 | results := getResults(comm, string(out)) 55 | if verify(results) { 56 | success = true 57 | break 58 | } 59 | log.Printf("try %d/%d failed", i+1, *flagAttempts) 60 | } 61 | 62 | cancel() 63 | cmdlg.Wait() 64 | 65 | if !success { 66 | os.Exit(1) 67 | } 68 | } 69 | 70 | type result struct { 71 | name string 72 | labels map[string]string 73 | value float64 74 | } 75 | 76 | func getResults(group string, out string) map[string][]result { 77 | results := make(map[string][]result) 78 | 79 | skiplabel := fmt.Sprintf(`groupname="%s"`, group) 80 | lines := bufio.NewScanner(strings.NewReader(out)) 81 | lines.Split(bufio.ScanLines) 82 | for lines.Scan() { 83 | line := lines.Text() 84 | metric, value := "", 0.0 85 | _, err := fmt.Sscanf(line, "namedprocess_namegroup_%s %f", &metric, &value) 86 | if err != nil { 87 | continue 88 | } 89 | 90 | pos := strings.IndexByte(metric, '{') 91 | if pos == -1 { 92 | log.Fatalf("cannot parse metric %q, no open curly found", metric) 93 | } 94 | 95 | name, labelstr := metric[:pos], metric[pos+1:] 96 | labelstr = labelstr[:len(labelstr)-1] 97 | labels := make(map[string]string) 98 | for _, kv := range strings.Split(labelstr, ",") { 99 | if kv != skiplabel { 100 | pieces := strings.SplitN(kv, "=", 2) 101 | labelname, labelvalue := pieces[0], pieces[1][1:len(pieces[1])-1] 102 | labels[labelname] = labelvalue 103 | } 104 | } 105 | 106 | results[name] = append(results[name], result{name, labels, value}) 107 | } 108 | return results 109 | } 110 | 111 | func verify(results map[string][]result) bool { 112 | success := true 113 | 114 | assertExact := func(name string, got, want float64) { 115 | if got != want { 116 | success = false 117 | log.Printf("expected %s to be %f, got %f", name, want, got) 118 | } 119 | } 120 | 121 | assertGreaterOrEqual := func(name string, got, want float64) { 122 | if got < want { 123 | success = false 124 | log.Printf("expected %s to have at least %f, got %f", name, want, got) 125 | } 126 | } 127 | 128 | assertExact("num_procs", results["num_procs"][0].value, 1) 129 | 130 | // Four locked threads plus go runtime means more than 7, but we'll say 7 to play it safe. 131 | assertGreaterOrEqual("num_threads", results["num_threads"][0].value, 7) 132 | 133 | // Our child must have started later than us. 134 | assertGreaterOrEqual("oldest_start_time_seconds", 135 | results["oldest_start_time_seconds"][0].value, float64(start)) 136 | 137 | for _, result := range results["states"] { 138 | switch state := result.labels["state"]; state { 139 | case "Other", "Zombie": 140 | assertExact("state "+state, result.value, 0) 141 | case "Running": 142 | assertGreaterOrEqual("state "+state, result.value, 2) 143 | case "Waiting": 144 | assertGreaterOrEqual("state "+state, result.value, 0) 145 | case "Sleeping": 146 | assertGreaterOrEqual("state "+state, result.value, 4) 147 | } 148 | } 149 | 150 | for _, result := range results["thread_count"] { 151 | switch tname := result.labels["threadname"]; tname { 152 | case "blocking", "sysbusy", "userbusy", "waiting": 153 | assertExact("thread_count "+tname, result.value, 1) 154 | case "main": 155 | assertGreaterOrEqual("thread_count "+tname, result.value, 3) 156 | } 157 | } 158 | 159 | for _, result := range results["thread_cpu_seconds_total"] { 160 | if result.labels["mode"] == "system" { 161 | switch tname := result.labels["threadname"]; tname { 162 | case "sysbusy", "blocking": 163 | assertGreaterOrEqual("thread_cpu_seconds_total system "+tname, result.value, 0.00001) 164 | default: 165 | assertGreaterOrEqual("thread_cpu_seconds_total system "+tname, result.value, 0) 166 | } 167 | } else if result.labels["mode"] == "user" { 168 | switch tname := result.labels["threadname"]; tname { 169 | case "userbusy": 170 | assertGreaterOrEqual("thread_cpu_seconds_total user "+tname, result.value, 0.00001) 171 | default: 172 | assertGreaterOrEqual("thread_cpu_seconds_total user "+tname, result.value, 0) 173 | } 174 | } 175 | } 176 | 177 | for _, result := range results["thread_io_bytes_total"] { 178 | tname, iomode := result.labels["threadname"], result.labels["iomode"] 179 | if iomode == "read" { 180 | continue 181 | } 182 | rname := fmt.Sprintf("%s %s %s", "thread_io_bytes_total", iomode, tname) 183 | 184 | switch tname { 185 | case "blocking", "sysbusy": 186 | assertGreaterOrEqual(rname, result.value, 0.00001) 187 | default: 188 | assertExact(rname, result.value, 0) 189 | } 190 | } 191 | 192 | otherwchan := 0.0 193 | for _, result := range results["threads_wchan"] { 194 | switch wchan := result.labels["wchan"]; wchan { 195 | case "poll_schedule_timeout": 196 | assertGreaterOrEqual(wchan, result.value, 1) 197 | case "futex_wait_queue_me": 198 | assertGreaterOrEqual(wchan, result.value, 4) 199 | default: 200 | // The specific wchan involved for the blocking thread varies by filesystem. 201 | otherwchan++ 202 | } 203 | } 204 | // assertGreaterOrEqual("other wchan", otherwchan, 1) 205 | 206 | return success 207 | } 208 | -------------------------------------------------------------------------------- /cmd/load-generator/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "io/ioutil" 7 | "math/rand" 8 | "runtime" 9 | "syscall" 10 | "unsafe" 11 | ) 12 | 13 | var ready = make(chan struct{}) 14 | 15 | func init() { 16 | var ( 17 | flagWaiting = flag.Int("waiting", 1, "minimum number of waiting threads") 18 | flagUserBusy = flag.Int("userbusy", 1, "minimum number of userbusy threads") 19 | flagSysBusy = flag.Int("sysbusy", 1, "minimum number of sysbusy threads") 20 | flagBlocking = flag.Int("blocking", 1, "minimum number of io blocking threads") 21 | flagWriteSizeBytes = flag.Int("write-size-bytes", 1024*1024, "how many bytes to write each cycle") 22 | ) 23 | flag.Parse() 24 | runtime.LockOSThread() 25 | for i := 0; i < *flagWaiting; i++ { 26 | go waiting() 27 | <-ready 28 | } 29 | for i := 0; i < *flagUserBusy; i++ { 30 | go userbusy() 31 | <-ready 32 | } 33 | for i := 0; i < *flagSysBusy; i++ { 34 | go diskio(false, *flagWriteSizeBytes) 35 | <-ready 36 | } 37 | for i := 0; i < *flagBlocking; i++ { 38 | go diskio(true, *flagWriteSizeBytes) 39 | <-ready 40 | } 41 | } 42 | 43 | func main() { 44 | c := make(chan struct{}) 45 | fmt.Println("ready") 46 | <-c 47 | } 48 | 49 | func setPrName(name string) error { 50 | bytes := append([]byte(name), 0) 51 | ptr := unsafe.Pointer(&bytes[0]) 52 | 53 | _, _, errno := syscall.RawSyscall6(syscall.SYS_PRCTL, syscall.PR_SET_NAME, uintptr(ptr), 0, 0, 0, 0) 54 | if errno != 0 { 55 | return syscall.Errno(errno) 56 | } 57 | return nil 58 | } 59 | 60 | func waiting() { 61 | runtime.LockOSThread() 62 | setPrName("waiting") 63 | ready <- struct{}{} 64 | 65 | c := make(chan struct{}) 66 | <-c 67 | } 68 | 69 | func userbusy() { 70 | runtime.LockOSThread() 71 | setPrName("userbusy") 72 | ready <- struct{}{} 73 | 74 | i := 1.0000001 75 | for { 76 | i *= i 77 | } 78 | } 79 | 80 | func diskio(sync bool, writesize int) { 81 | runtime.LockOSThread() 82 | if sync { 83 | setPrName("blocking") 84 | } else { 85 | setPrName("sysbusy") 86 | } 87 | 88 | // Use random data because if we're on a filesystem that does compression like ZFS, 89 | // using zeroes is almost a no-op. 90 | b := make([]byte, writesize) 91 | _, err := rand.Read(b) 92 | if err != nil { 93 | panic("unable to get rands: " + err.Error()) 94 | } 95 | 96 | f, err := ioutil.TempFile("", "loadgen") 97 | if err != nil { 98 | panic("unable to create tempfile: " + err.Error()) 99 | } 100 | defer f.Close() 101 | 102 | sentready := false 103 | 104 | offset := int64(0) 105 | for { 106 | _, err = f.WriteAt(b, offset) 107 | if err != nil { 108 | panic("unable to write tempfile: " + err.Error()) 109 | } 110 | 111 | if sync { 112 | err = f.Sync() 113 | if err != nil { 114 | panic("unable to sync tempfile: " + err.Error()) 115 | } 116 | } 117 | 118 | _, err = f.ReadAt(b, 0) 119 | if err != nil { 120 | panic("unable to read tempfile: " + err.Error()) 121 | } 122 | if !sentready { 123 | ready <- struct{}{} 124 | sentready = true 125 | } 126 | offset++ 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /cmd/process-exporter/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "log" 8 | "net/http" 9 | _ "net/http/pprof" 10 | "os" 11 | "os/signal" 12 | "regexp" 13 | "strings" 14 | "syscall" 15 | "time" 16 | 17 | "github.com/ncabatoff/fakescraper" 18 | common "github.com/ncabatoff/process-exporter" 19 | "github.com/ncabatoff/process-exporter/collector" 20 | "github.com/ncabatoff/process-exporter/config" 21 | "github.com/prometheus/client_golang/prometheus" 22 | verCollector "github.com/prometheus/client_golang/prometheus/collectors/version" 23 | "github.com/prometheus/client_golang/prometheus/promhttp" 24 | "github.com/prometheus/common/promlog" 25 | promVersion "github.com/prometheus/common/version" 26 | "github.com/prometheus/exporter-toolkit/web" 27 | ) 28 | 29 | // Version is set at build time use ldflags. 30 | var version string 31 | 32 | func printManual() { 33 | fmt.Print(`Usage: 34 | process-exporter [options] -config.path filename.yml 35 | 36 | or 37 | 38 | process-exporter [options] -procnames name1,...,nameN [-namemapping k1,v1,...,kN,vN] 39 | 40 | The recommended option is to use a config file, but for convenience and 41 | backwards compatibility the -procnames/-namemapping options exist as an 42 | alternative. 43 | 44 | The -children option (default:true) makes it so that any process that otherwise 45 | isn't part of its own group becomes part of the first group found (if any) when 46 | walking the process tree upwards. In other words, resource usage of 47 | subprocesses is added to their parent's usage unless the subprocess identifies 48 | as a different group name. 49 | 50 | Command-line process selection (procnames/namemapping): 51 | 52 | Every process not in the procnames list is ignored. Otherwise, all processes 53 | found are reported on as a group based on the process name they share. 54 | Here 'process name' refers to the value found in the second field of 55 | /proc//stat, which is truncated at 15 chars. 56 | 57 | The -namemapping option allows assigning a group name based on a combination of 58 | the process name and command line. For example, using 59 | 60 | -namemapping "python2,([^/]+)\.py,java,-jar\s+([^/]+).jar" 61 | 62 | will make it so that each different python2 and java -jar invocation will be 63 | tracked with distinct metrics. Processes whose remapped name is absent from 64 | the procnames list will be ignored. Here's an example that I run on my home 65 | machine (Ubuntu Xenian): 66 | 67 | process-exporter -namemapping "upstart,(--user)" \ 68 | -procnames chromium-browse,bash,prometheus,prombench,gvim,upstart:-user 69 | 70 | Since it appears that upstart --user is the parent process of my X11 session, 71 | this will make all apps I start count against it, unless they're one of the 72 | others named explicitly with -procnames. 73 | 74 | Config file process selection (filename.yml): 75 | 76 | See README.md. 77 | ` + "\n") 78 | 79 | } 80 | 81 | type ( 82 | prefixRegex struct { 83 | prefix string 84 | regex *regexp.Regexp 85 | } 86 | 87 | nameMapperRegex struct { 88 | mapping map[string]*prefixRegex 89 | } 90 | ) 91 | 92 | func (nmr *nameMapperRegex) String() string { 93 | return fmt.Sprintf("%+v", nmr.mapping) 94 | } 95 | 96 | // Create a nameMapperRegex based on a string given as the -namemapper argument. 97 | func parseNameMapper(s string) (*nameMapperRegex, error) { 98 | mapper := make(map[string]*prefixRegex) 99 | if s == "" { 100 | return &nameMapperRegex{mapper}, nil 101 | } 102 | 103 | toks := strings.Split(s, ",") 104 | if len(toks)%2 == 1 { 105 | return nil, fmt.Errorf("bad namemapper: odd number of tokens") 106 | } 107 | 108 | for i, tok := range toks { 109 | if tok == "" { 110 | return nil, fmt.Errorf("bad namemapper: token %d is empty", i) 111 | } 112 | if i%2 == 1 { 113 | name, regexstr := toks[i-1], tok 114 | matchName := name 115 | prefix := name + ":" 116 | 117 | if r, err := regexp.Compile(regexstr); err != nil { 118 | return nil, fmt.Errorf("error compiling regexp '%s': %v", regexstr, err) 119 | } else { 120 | mapper[matchName] = &prefixRegex{prefix: prefix, regex: r} 121 | } 122 | } 123 | } 124 | 125 | return &nameMapperRegex{mapper}, nil 126 | } 127 | 128 | func (nmr *nameMapperRegex) MatchAndName(nacl common.ProcAttributes) (bool, string) { 129 | if pregex, ok := nmr.mapping[nacl.Name]; ok { 130 | if pregex == nil { 131 | return true, nacl.Name 132 | } 133 | matches := pregex.regex.FindStringSubmatch(strings.Join(nacl.Cmdline, " ")) 134 | if len(matches) > 1 { 135 | for _, matchstr := range matches[1:] { 136 | if matchstr != "" { 137 | return true, pregex.prefix + matchstr 138 | } 139 | } 140 | } 141 | } 142 | 143 | return false, "" 144 | } 145 | 146 | func init() { 147 | promVersion.Version = version 148 | prometheus.MustRegister(verCollector.NewCollector("process_exporter")) 149 | } 150 | 151 | func main() { 152 | var ( 153 | listenAddress = flag.String("web.listen-address", ":9256", 154 | "Address on which to expose metrics and web interface.") 155 | metricsPath = flag.String("web.telemetry-path", "/metrics", 156 | "Path under which to expose metrics.") 157 | onceToStdoutDelay = flag.Duration("once-to-stdout-delay", 0, 158 | "Don't bind, just wait this much time, print the metrics once to stdout, and exit") 159 | procNames = flag.String("procnames", "", 160 | "comma-separated list of process names to monitor") 161 | procfsPath = flag.String("procfs", "/proc", 162 | "path to read proc data from") 163 | nameMapping = flag.String("namemapping", "", 164 | "comma-separated list, alternating process name and capturing regex to apply to cmdline") 165 | children = flag.Bool("children", true, 166 | "if a proc is tracked, track with it any children that aren't part of their own group") 167 | threads = flag.Bool("threads", true, 168 | "report on per-threadname metrics as well") 169 | smaps = flag.Bool("gather-smaps", true, 170 | "gather metrics from smaps file, which contains proportional resident memory size") 171 | man = flag.Bool("man", false, 172 | "print manual") 173 | configPath = flag.String("config.path", "", 174 | "path to YAML config file") 175 | tlsConfigFile = flag.String("web.config.file", "", 176 | "path to YAML web config file") 177 | recheck = flag.Bool("recheck", false, 178 | "recheck process names on each scrape") 179 | recheckTimeLimit = flag.Duration("recheck-with-time-limit", 0, 180 | "recheck processes only this much time after their start, but no longer.") 181 | debug = flag.Bool("debug", false, 182 | "log debugging information to stdout") 183 | showVersion = flag.Bool("version", false, 184 | "print version information and exit") 185 | removeEmptyGroups = flag.Bool("remove-empty-groups", false, "forget process groups with no processes") 186 | ) 187 | flag.Parse() 188 | 189 | promlogConfig := &promlog.Config{} 190 | logger := promlog.New(promlogConfig) 191 | 192 | if *showVersion { 193 | fmt.Printf("%s\n", promVersion.Print("process-exporter")) 194 | os.Exit(0) 195 | } 196 | 197 | if *man { 198 | printManual() 199 | return 200 | } 201 | 202 | var matchnamer common.MatchNamer 203 | 204 | if *configPath != "" { 205 | if *nameMapping != "" || *procNames != "" { 206 | log.Fatalf("-config.path cannot be used with -namemapping or -procnames") 207 | } 208 | 209 | cfg, err := config.ReadFile(*configPath, *debug) 210 | if err != nil { 211 | log.Fatalf("error reading config file %q: %v", *configPath, err) 212 | } 213 | log.Printf("Reading metrics from %s based on %q", *procfsPath, *configPath) 214 | matchnamer = cfg.MatchNamers 215 | if *debug { 216 | log.Printf("using config matchnamer: %v", cfg.MatchNamers) 217 | } 218 | } else { 219 | namemapper, err := parseNameMapper(*nameMapping) 220 | if err != nil { 221 | log.Fatalf("Error parsing -namemapping argument '%s': %v", *nameMapping, err) 222 | } 223 | 224 | var names []string 225 | for _, s := range strings.Split(*procNames, ",") { 226 | if s != "" { 227 | if _, ok := namemapper.mapping[s]; !ok { 228 | namemapper.mapping[s] = nil 229 | } 230 | names = append(names, s) 231 | } 232 | } 233 | 234 | log.Printf("Reading metrics from %s for procnames: %v", *procfsPath, names) 235 | if *debug { 236 | log.Printf("using cmdline matchnamer: %v", namemapper) 237 | } 238 | matchnamer = namemapper 239 | } 240 | 241 | if *recheckTimeLimit != 0 { 242 | *recheck = true 243 | } 244 | 245 | pc, err := collector.NewProcessCollector( 246 | collector.ProcessCollectorOption{ 247 | ProcFSPath: *procfsPath, 248 | Children: *children, 249 | Threads: *threads, 250 | GatherSMaps: *smaps, 251 | Namer: matchnamer, 252 | Recheck: *recheck, 253 | RecheckTimeLimit: *recheckTimeLimit, 254 | Debug: *debug, 255 | RemoveEmptyGroups: *removeEmptyGroups, 256 | }, 257 | ) 258 | if err != nil { 259 | log.Fatalf("Error initializing: %v", err) 260 | } 261 | 262 | prometheus.MustRegister(pc) 263 | 264 | if *onceToStdoutDelay != 0 { 265 | // We throw away the first result because that first collection primes the pump, and 266 | // otherwise we won't see our counter metrics. This is specific to the implementation 267 | // of NamedProcessCollector.Collect(). 268 | fscraper := fakescraper.NewFakeScraper() 269 | fscraper.Scrape() 270 | time.Sleep(*onceToStdoutDelay) 271 | fmt.Print(fscraper.Scrape()) 272 | return 273 | } 274 | 275 | sigs := make(chan os.Signal, 1) 276 | signal.Notify(sigs, syscall.SIGTERM, syscall.SIGINT) 277 | 278 | http.Handle(*metricsPath, promhttp.Handler()) 279 | 280 | http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { 281 | w.Write([]byte(` 282 | Named Process Exporter 283 | 284 |

Named Process Exporter

285 |

Metrics

286 | 287 | `)) 288 | }) 289 | server := &http.Server{Addr: *listenAddress} 290 | 291 | go func() { 292 | <-sigs 293 | log.Printf("Shutting down the server\n") 294 | 295 | ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 296 | defer cancel() 297 | 298 | if err := server.Shutdown(ctx); err != nil { 299 | log.Fatalf("Server Shutdown Failed: %v", err) 300 | } 301 | }() 302 | 303 | if err := web.ListenAndServe(server, &web.FlagConfig{ 304 | WebListenAddresses: &[]string{*listenAddress}, 305 | WebConfigFile: tlsConfigFile, 306 | }, logger); err != nil && err != http.ErrServerClosed { 307 | log.Fatalf("Failed to start the server: %v", err) 308 | os.Exit(1) 309 | } 310 | } 311 | -------------------------------------------------------------------------------- /collector/process_collector.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "log" 5 | "time" 6 | 7 | common "github.com/ncabatoff/process-exporter" 8 | "github.com/ncabatoff/process-exporter/proc" 9 | "github.com/prometheus/client_golang/prometheus" 10 | ) 11 | 12 | var ( 13 | numprocsDesc = prometheus.NewDesc( 14 | "namedprocess_namegroup_num_procs", 15 | "number of processes in this group", 16 | []string{"groupname"}, 17 | nil) 18 | 19 | cpuSecsDesc = prometheus.NewDesc( 20 | "namedprocess_namegroup_cpu_seconds_total", 21 | "Cpu user usage in seconds", 22 | []string{"groupname", "mode"}, 23 | nil) 24 | 25 | readBytesDesc = prometheus.NewDesc( 26 | "namedprocess_namegroup_read_bytes_total", 27 | "number of bytes read by this group", 28 | []string{"groupname"}, 29 | nil) 30 | 31 | writeBytesDesc = prometheus.NewDesc( 32 | "namedprocess_namegroup_write_bytes_total", 33 | "number of bytes written by this group", 34 | []string{"groupname"}, 35 | nil) 36 | 37 | majorPageFaultsDesc = prometheus.NewDesc( 38 | "namedprocess_namegroup_major_page_faults_total", 39 | "Major page faults", 40 | []string{"groupname"}, 41 | nil) 42 | 43 | minorPageFaultsDesc = prometheus.NewDesc( 44 | "namedprocess_namegroup_minor_page_faults_total", 45 | "Minor page faults", 46 | []string{"groupname"}, 47 | nil) 48 | 49 | contextSwitchesDesc = prometheus.NewDesc( 50 | "namedprocess_namegroup_context_switches_total", 51 | "Context switches", 52 | []string{"groupname", "ctxswitchtype"}, 53 | nil) 54 | 55 | membytesDesc = prometheus.NewDesc( 56 | "namedprocess_namegroup_memory_bytes", 57 | "number of bytes of memory in use", 58 | []string{"groupname", "memtype"}, 59 | nil) 60 | 61 | openFDsDesc = prometheus.NewDesc( 62 | "namedprocess_namegroup_open_filedesc", 63 | "number of open file descriptors for this group", 64 | []string{"groupname"}, 65 | nil) 66 | 67 | worstFDRatioDesc = prometheus.NewDesc( 68 | "namedprocess_namegroup_worst_fd_ratio", 69 | "the worst (closest to 1) ratio between open fds and max fds among all procs in this group", 70 | []string{"groupname"}, 71 | nil) 72 | 73 | startTimeDesc = prometheus.NewDesc( 74 | "namedprocess_namegroup_oldest_start_time_seconds", 75 | "start time in seconds since 1970/01/01 of oldest process in group", 76 | []string{"groupname"}, 77 | nil) 78 | 79 | numThreadsDesc = prometheus.NewDesc( 80 | "namedprocess_namegroup_num_threads", 81 | "Number of threads", 82 | []string{"groupname"}, 83 | nil) 84 | 85 | statesDesc = prometheus.NewDesc( 86 | "namedprocess_namegroup_states", 87 | "Number of processes in states Running, Sleeping, Waiting, Zombie, or Other", 88 | []string{"groupname", "state"}, 89 | nil) 90 | 91 | scrapeErrorsDesc = prometheus.NewDesc( 92 | "namedprocess_scrape_errors", 93 | "general scrape errors: no proc metrics collected during a cycle", 94 | nil, 95 | nil) 96 | 97 | scrapeProcReadErrorsDesc = prometheus.NewDesc( 98 | "namedprocess_scrape_procread_errors", 99 | "incremented each time a proc's metrics collection fails", 100 | nil, 101 | nil) 102 | 103 | scrapePartialErrorsDesc = prometheus.NewDesc( 104 | "namedprocess_scrape_partial_errors", 105 | "incremented each time a tracked proc's metrics collection fails partially, e.g. unreadable I/O stats", 106 | nil, 107 | nil) 108 | 109 | threadWchanDesc = prometheus.NewDesc( 110 | "namedprocess_namegroup_threads_wchan", 111 | "Number of threads in this group waiting on each wchan", 112 | []string{"groupname", "wchan"}, 113 | nil) 114 | 115 | threadCountDesc = prometheus.NewDesc( 116 | "namedprocess_namegroup_thread_count", 117 | "Number of threads in this group with same threadname", 118 | []string{"groupname", "threadname"}, 119 | nil) 120 | 121 | threadCpuSecsDesc = prometheus.NewDesc( 122 | "namedprocess_namegroup_thread_cpu_seconds_total", 123 | "Cpu user/system usage in seconds", 124 | []string{"groupname", "threadname", "mode"}, 125 | nil) 126 | 127 | threadIoBytesDesc = prometheus.NewDesc( 128 | "namedprocess_namegroup_thread_io_bytes_total", 129 | "number of bytes read/written by these threads", 130 | []string{"groupname", "threadname", "iomode"}, 131 | nil) 132 | 133 | threadMajorPageFaultsDesc = prometheus.NewDesc( 134 | "namedprocess_namegroup_thread_major_page_faults_total", 135 | "Major page faults for these threads", 136 | []string{"groupname", "threadname"}, 137 | nil) 138 | 139 | threadMinorPageFaultsDesc = prometheus.NewDesc( 140 | "namedprocess_namegroup_thread_minor_page_faults_total", 141 | "Minor page faults for these threads", 142 | []string{"groupname", "threadname"}, 143 | nil) 144 | 145 | threadContextSwitchesDesc = prometheus.NewDesc( 146 | "namedprocess_namegroup_thread_context_switches_total", 147 | "Context switches for these threads", 148 | []string{"groupname", "threadname", "ctxswitchtype"}, 149 | nil) 150 | ) 151 | 152 | type ( 153 | scrapeRequest struct { 154 | results chan<- prometheus.Metric 155 | done chan struct{} 156 | } 157 | 158 | ProcessCollectorOption struct { 159 | ProcFSPath string 160 | Children bool 161 | Threads bool 162 | GatherSMaps bool 163 | Namer common.MatchNamer 164 | Recheck bool 165 | RecheckTimeLimit time.Duration 166 | Debug bool 167 | RemoveEmptyGroups bool 168 | } 169 | 170 | NamedProcessCollector struct { 171 | scrapeChan chan scrapeRequest 172 | *proc.Grouper 173 | threads bool 174 | smaps bool 175 | source proc.Source 176 | scrapeErrors int 177 | scrapeProcReadErrors int 178 | scrapePartialErrors int 179 | debug bool 180 | } 181 | ) 182 | 183 | func NewProcessCollector(options ProcessCollectorOption) (*NamedProcessCollector, error) { 184 | fs, err := proc.NewFS(options.ProcFSPath, options.Debug) 185 | if err != nil { 186 | return nil, err 187 | } 188 | 189 | fs.GatherSMaps = options.GatherSMaps 190 | p := &NamedProcessCollector{ 191 | scrapeChan: make(chan scrapeRequest), 192 | Grouper: proc.NewGrouper(options.Namer, options.Children, options.Threads, options.Recheck, options.RecheckTimeLimit, options.Debug, options.RemoveEmptyGroups), 193 | source: fs, 194 | threads: options.Threads, 195 | smaps: options.GatherSMaps, 196 | debug: options.Debug, 197 | } 198 | 199 | colErrs, _, err := p.Update(p.source.AllProcs()) 200 | if err != nil { 201 | if options.Debug { 202 | log.Print(err) 203 | } 204 | return nil, err 205 | } 206 | p.scrapePartialErrors += colErrs.Partial 207 | p.scrapeProcReadErrors += colErrs.Read 208 | 209 | go p.start() 210 | 211 | return p, nil 212 | } 213 | 214 | // Describe implements prometheus.Collector. 215 | func (p *NamedProcessCollector) Describe(ch chan<- *prometheus.Desc) { 216 | ch <- cpuSecsDesc 217 | ch <- numprocsDesc 218 | ch <- readBytesDesc 219 | ch <- writeBytesDesc 220 | ch <- membytesDesc 221 | ch <- openFDsDesc 222 | ch <- worstFDRatioDesc 223 | ch <- startTimeDesc 224 | ch <- majorPageFaultsDesc 225 | ch <- minorPageFaultsDesc 226 | ch <- contextSwitchesDesc 227 | ch <- numThreadsDesc 228 | ch <- statesDesc 229 | ch <- scrapeErrorsDesc 230 | ch <- scrapeProcReadErrorsDesc 231 | ch <- scrapePartialErrorsDesc 232 | ch <- threadWchanDesc 233 | ch <- threadCountDesc 234 | ch <- threadCpuSecsDesc 235 | ch <- threadIoBytesDesc 236 | ch <- threadMajorPageFaultsDesc 237 | ch <- threadMinorPageFaultsDesc 238 | ch <- threadContextSwitchesDesc 239 | } 240 | 241 | // Collect implements prometheus.Collector. 242 | func (p *NamedProcessCollector) Collect(ch chan<- prometheus.Metric) { 243 | req := scrapeRequest{results: ch, done: make(chan struct{})} 244 | p.scrapeChan <- req 245 | <-req.done 246 | } 247 | 248 | func (p *NamedProcessCollector) start() { 249 | for req := range p.scrapeChan { 250 | ch := req.results 251 | p.scrape(ch) 252 | req.done <- struct{}{} 253 | } 254 | } 255 | 256 | func (p *NamedProcessCollector) scrape(ch chan<- prometheus.Metric) { 257 | permErrs, groups, err := p.Update(p.source.AllProcs()) 258 | p.scrapePartialErrors += permErrs.Partial 259 | if err != nil { 260 | p.scrapeErrors++ 261 | log.Printf("error reading procs: %v", err) 262 | } else { 263 | for gname, gcounts := range groups { 264 | ch <- prometheus.MustNewConstMetric(numprocsDesc, 265 | prometheus.GaugeValue, float64(gcounts.Procs), gname) 266 | ch <- prometheus.MustNewConstMetric(membytesDesc, 267 | prometheus.GaugeValue, float64(gcounts.Memory.ResidentBytes), gname, "resident") 268 | ch <- prometheus.MustNewConstMetric(membytesDesc, 269 | prometheus.GaugeValue, float64(gcounts.Memory.VirtualBytes), gname, "virtual") 270 | ch <- prometheus.MustNewConstMetric(membytesDesc, 271 | prometheus.GaugeValue, float64(gcounts.Memory.VmSwapBytes), gname, "swapped") 272 | ch <- prometheus.MustNewConstMetric(startTimeDesc, 273 | prometheus.GaugeValue, float64(gcounts.OldestStartTime.Unix()), gname) 274 | ch <- prometheus.MustNewConstMetric(openFDsDesc, 275 | prometheus.GaugeValue, float64(gcounts.OpenFDs), gname) 276 | ch <- prometheus.MustNewConstMetric(worstFDRatioDesc, 277 | prometheus.GaugeValue, float64(gcounts.WorstFDratio), gname) 278 | ch <- prometheus.MustNewConstMetric(cpuSecsDesc, 279 | prometheus.CounterValue, gcounts.CPUUserTime, gname, "user") 280 | ch <- prometheus.MustNewConstMetric(cpuSecsDesc, 281 | prometheus.CounterValue, gcounts.CPUSystemTime, gname, "system") 282 | ch <- prometheus.MustNewConstMetric(readBytesDesc, 283 | prometheus.CounterValue, float64(gcounts.ReadBytes), gname) 284 | ch <- prometheus.MustNewConstMetric(writeBytesDesc, 285 | prometheus.CounterValue, float64(gcounts.WriteBytes), gname) 286 | ch <- prometheus.MustNewConstMetric(majorPageFaultsDesc, 287 | prometheus.CounterValue, float64(gcounts.MajorPageFaults), gname) 288 | ch <- prometheus.MustNewConstMetric(minorPageFaultsDesc, 289 | prometheus.CounterValue, float64(gcounts.MinorPageFaults), gname) 290 | ch <- prometheus.MustNewConstMetric(contextSwitchesDesc, 291 | prometheus.CounterValue, float64(gcounts.CtxSwitchVoluntary), gname, "voluntary") 292 | ch <- prometheus.MustNewConstMetric(contextSwitchesDesc, 293 | prometheus.CounterValue, float64(gcounts.CtxSwitchNonvoluntary), gname, "nonvoluntary") 294 | ch <- prometheus.MustNewConstMetric(numThreadsDesc, 295 | prometheus.GaugeValue, float64(gcounts.NumThreads), gname) 296 | ch <- prometheus.MustNewConstMetric(statesDesc, 297 | prometheus.GaugeValue, float64(gcounts.States.Running), gname, "Running") 298 | ch <- prometheus.MustNewConstMetric(statesDesc, 299 | prometheus.GaugeValue, float64(gcounts.States.Sleeping), gname, "Sleeping") 300 | ch <- prometheus.MustNewConstMetric(statesDesc, 301 | prometheus.GaugeValue, float64(gcounts.States.Waiting), gname, "Waiting") 302 | ch <- prometheus.MustNewConstMetric(statesDesc, 303 | prometheus.GaugeValue, float64(gcounts.States.Zombie), gname, "Zombie") 304 | ch <- prometheus.MustNewConstMetric(statesDesc, 305 | prometheus.GaugeValue, float64(gcounts.States.Other), gname, "Other") 306 | 307 | for wchan, count := range gcounts.Wchans { 308 | ch <- prometheus.MustNewConstMetric(threadWchanDesc, 309 | prometheus.GaugeValue, float64(count), gname, wchan) 310 | } 311 | 312 | if p.smaps { 313 | ch <- prometheus.MustNewConstMetric(membytesDesc, 314 | prometheus.GaugeValue, float64(gcounts.Memory.ProportionalBytes), gname, "proportionalResident") 315 | ch <- prometheus.MustNewConstMetric(membytesDesc, 316 | prometheus.GaugeValue, float64(gcounts.Memory.ProportionalSwapBytes), gname, "proportionalSwapped") 317 | } 318 | 319 | if p.threads { 320 | for _, thr := range gcounts.Threads { 321 | ch <- prometheus.MustNewConstMetric(threadCountDesc, 322 | prometheus.GaugeValue, float64(thr.NumThreads), 323 | gname, thr.Name) 324 | ch <- prometheus.MustNewConstMetric(threadCpuSecsDesc, 325 | prometheus.CounterValue, float64(thr.CPUUserTime), 326 | gname, thr.Name, "user") 327 | ch <- prometheus.MustNewConstMetric(threadCpuSecsDesc, 328 | prometheus.CounterValue, float64(thr.CPUSystemTime), 329 | gname, thr.Name, "system") 330 | ch <- prometheus.MustNewConstMetric(threadIoBytesDesc, 331 | prometheus.CounterValue, float64(thr.ReadBytes), 332 | gname, thr.Name, "read") 333 | ch <- prometheus.MustNewConstMetric(threadIoBytesDesc, 334 | prometheus.CounterValue, float64(thr.WriteBytes), 335 | gname, thr.Name, "write") 336 | ch <- prometheus.MustNewConstMetric(threadMajorPageFaultsDesc, 337 | prometheus.CounterValue, float64(thr.MajorPageFaults), 338 | gname, thr.Name) 339 | ch <- prometheus.MustNewConstMetric(threadMinorPageFaultsDesc, 340 | prometheus.CounterValue, float64(thr.MinorPageFaults), 341 | gname, thr.Name) 342 | ch <- prometheus.MustNewConstMetric(threadContextSwitchesDesc, 343 | prometheus.CounterValue, float64(thr.CtxSwitchVoluntary), 344 | gname, thr.Name, "voluntary") 345 | ch <- prometheus.MustNewConstMetric(threadContextSwitchesDesc, 346 | prometheus.CounterValue, float64(thr.CtxSwitchNonvoluntary), 347 | gname, thr.Name, "nonvoluntary") 348 | } 349 | } 350 | } 351 | } 352 | ch <- prometheus.MustNewConstMetric(scrapeErrorsDesc, 353 | prometheus.CounterValue, float64(p.scrapeErrors)) 354 | ch <- prometheus.MustNewConstMetric(scrapeProcReadErrorsDesc, 355 | prometheus.CounterValue, float64(p.scrapeProcReadErrors)) 356 | ch <- prometheus.MustNewConstMetric(scrapePartialErrorsDesc, 357 | prometheus.CounterValue, float64(p.scrapePartialErrors)) 358 | } 359 | -------------------------------------------------------------------------------- /common.go: -------------------------------------------------------------------------------- 1 | package common 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | ) 7 | 8 | type ( 9 | ProcAttributes struct { 10 | Name string 11 | Cmdline []string 12 | Cgroups []string 13 | Username string 14 | PID int 15 | StartTime time.Time 16 | } 17 | 18 | MatchNamer interface { 19 | // MatchAndName returns false if the match failed, otherwise 20 | // true and the resulting name. 21 | MatchAndName(ProcAttributes) (bool, string) 22 | fmt.Stringer 23 | } 24 | ) 25 | -------------------------------------------------------------------------------- /config/base_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "testing" 5 | 6 | . "gopkg.in/check.v1" 7 | ) 8 | 9 | // Hook up gocheck into the "go test" runner. 10 | func Test(t *testing.T) { TestingT(t) } 11 | 12 | type MySuite struct{} 13 | 14 | var _ = Suite(&MySuite{}) 15 | -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io/ioutil" 7 | "log" 8 | "path/filepath" 9 | "regexp" 10 | "strings" 11 | "text/template" 12 | "time" 13 | 14 | common "github.com/ncabatoff/process-exporter" 15 | "gopkg.in/yaml.v2" 16 | ) 17 | 18 | type ( 19 | Matcher interface { 20 | // Match returns empty string for no match, or the group name on success. 21 | Match(common.ProcAttributes) bool 22 | } 23 | 24 | FirstMatcher struct { 25 | matchers []common.MatchNamer 26 | } 27 | 28 | commMatcher struct { 29 | comms map[string]struct{} 30 | } 31 | 32 | exeMatcher struct { 33 | exes map[string]string 34 | } 35 | 36 | cmdlineMatcher struct { 37 | regexes []*regexp.Regexp 38 | captures map[string]string 39 | } 40 | 41 | andMatcher []Matcher 42 | 43 | templateNamer struct { 44 | template *template.Template 45 | } 46 | 47 | matchNamer struct { 48 | andMatcher 49 | templateNamer 50 | } 51 | 52 | templateParams struct { 53 | Cgroups []string 54 | Comm string 55 | ExeBase string 56 | ExeFull string 57 | Username string 58 | PID int 59 | StartTime time.Time 60 | Matches map[string]string 61 | } 62 | ) 63 | 64 | func (c *cmdlineMatcher) String() string { 65 | return fmt.Sprintf("cmdlines: %+v", c.regexes) 66 | } 67 | 68 | func (e *exeMatcher) String() string { 69 | return fmt.Sprintf("exes: %+v", e.exes) 70 | } 71 | 72 | func (c *commMatcher) String() string { 73 | var comms = make([]string, 0, len(c.comms)) 74 | for cm := range c.comms { 75 | comms = append(comms, cm) 76 | } 77 | return fmt.Sprintf("comms: %+v", comms) 78 | } 79 | 80 | func (f FirstMatcher) String() string { 81 | return fmt.Sprintf("%v", f.matchers) 82 | } 83 | 84 | func (f FirstMatcher) MatchAndName(nacl common.ProcAttributes) (bool, string) { 85 | for _, m := range f.matchers { 86 | if matched, name := m.MatchAndName(nacl); matched { 87 | return true, name 88 | } 89 | } 90 | return false, "" 91 | } 92 | 93 | func (m *matchNamer) String() string { 94 | return fmt.Sprintf("%+v", m.andMatcher) 95 | } 96 | 97 | func (m *matchNamer) MatchAndName(nacl common.ProcAttributes) (bool, string) { 98 | if !m.Match(nacl) { 99 | return false, "" 100 | } 101 | 102 | matches := make(map[string]string) 103 | for _, m := range m.andMatcher { 104 | if mc, ok := m.(*cmdlineMatcher); ok { 105 | for k, v := range mc.captures { 106 | matches[k] = v 107 | } 108 | } 109 | } 110 | 111 | exebase, exefull := nacl.Name, nacl.Name 112 | if len(nacl.Cmdline) > 0 { 113 | exefull = nacl.Cmdline[0] 114 | exebase = filepath.Base(exefull) 115 | } 116 | 117 | var buf bytes.Buffer 118 | m.template.Execute(&buf, &templateParams{ 119 | Comm: nacl.Name, 120 | Cgroups: nacl.Cgroups, 121 | ExeBase: exebase, 122 | ExeFull: exefull, 123 | Matches: matches, 124 | Username: nacl.Username, 125 | PID: nacl.PID, 126 | StartTime: nacl.StartTime, 127 | }) 128 | return true, buf.String() 129 | } 130 | 131 | func (m *commMatcher) Match(nacl common.ProcAttributes) bool { 132 | _, found := m.comms[nacl.Name] 133 | return found 134 | } 135 | 136 | func (m *exeMatcher) Match(nacl common.ProcAttributes) bool { 137 | if len(nacl.Cmdline) == 0 { 138 | return false 139 | } 140 | thisbase := filepath.Base(nacl.Cmdline[0]) 141 | fqpath, found := m.exes[thisbase] 142 | if !found { 143 | return false 144 | } 145 | if fqpath == "" { 146 | return true 147 | } 148 | 149 | return fqpath == nacl.Cmdline[0] 150 | } 151 | 152 | func (m *cmdlineMatcher) Match(nacl common.ProcAttributes) bool { 153 | for _, regex := range m.regexes { 154 | captures := regex.FindStringSubmatch(strings.Join(nacl.Cmdline, " ")) 155 | if m.captures == nil { 156 | return false 157 | } 158 | subexpNames := regex.SubexpNames() 159 | if len(subexpNames) != len(captures) { 160 | return false 161 | } 162 | 163 | for i, name := range subexpNames { 164 | m.captures[name] = captures[i] 165 | } 166 | } 167 | return true 168 | } 169 | 170 | func (m andMatcher) Match(nacl common.ProcAttributes) bool { 171 | for _, matcher := range m { 172 | if !matcher.Match(nacl) { 173 | return false 174 | } 175 | } 176 | return true 177 | } 178 | 179 | type Config struct { 180 | MatchNamers FirstMatcher 181 | } 182 | 183 | func (c *Config) UnmarshalYAML(unmarshal func(v interface{}) error) error { 184 | type ( 185 | root struct { 186 | Matchers MatcherRules `yaml:"process_names"` 187 | } 188 | ) 189 | 190 | var r root 191 | if err := unmarshal(&r); err != nil { 192 | return err 193 | } 194 | 195 | cfg, err := r.Matchers.ToConfig() 196 | if err != nil { 197 | return err 198 | } 199 | *c = *cfg 200 | return nil 201 | } 202 | 203 | type MatcherGroup struct { 204 | Name string `yaml:"name"` 205 | CommRules []string `yaml:"comm"` 206 | ExeRules []string `yaml:"exe"` 207 | CmdlineRules []string `yaml:"cmdline"` 208 | } 209 | 210 | type MatcherRules []MatcherGroup 211 | 212 | func (r MatcherRules) ToConfig() (*Config, error) { 213 | var cfg Config 214 | 215 | for _, matcher := range r { 216 | var matchers andMatcher 217 | 218 | if matcher.CommRules != nil { 219 | comms := make(map[string]struct{}) 220 | for _, c := range matcher.CommRules { 221 | comms[c] = struct{}{} 222 | } 223 | matchers = append(matchers, &commMatcher{comms}) 224 | } 225 | if matcher.ExeRules != nil { 226 | exes := make(map[string]string) 227 | for _, e := range matcher.ExeRules { 228 | if strings.Contains(e, "/") { 229 | exes[filepath.Base(e)] = e 230 | } else { 231 | exes[e] = "" 232 | } 233 | } 234 | matchers = append(matchers, &exeMatcher{exes}) 235 | } 236 | if matcher.CmdlineRules != nil { 237 | var rs []*regexp.Regexp 238 | for _, c := range matcher.CmdlineRules { 239 | r, err := regexp.Compile(c) 240 | if err != nil { 241 | return nil, fmt.Errorf("bad cmdline regex %q: %v", c, err) 242 | } 243 | rs = append(rs, r) 244 | } 245 | matchers = append(matchers, &cmdlineMatcher{ 246 | regexes: rs, 247 | captures: make(map[string]string), 248 | }) 249 | } 250 | if len(matchers) == 0 { 251 | return nil, fmt.Errorf("no matchers provided") 252 | } 253 | 254 | nametmpl := matcher.Name 255 | if nametmpl == "" { 256 | nametmpl = "{{.ExeBase}}" 257 | } 258 | tmpl := template.New("cmdname") 259 | tmpl, err := tmpl.Parse(nametmpl) 260 | if err != nil { 261 | return nil, fmt.Errorf("bad name template %q: %v", nametmpl, err) 262 | } 263 | 264 | matchNamer := &matchNamer{matchers, templateNamer{tmpl}} 265 | cfg.MatchNamers.matchers = append(cfg.MatchNamers.matchers, matchNamer) 266 | } 267 | 268 | return &cfg, nil 269 | } 270 | 271 | // ReadRecipesFile opens the named file and extracts recipes from it. 272 | func ReadFile(cfgpath string, debug bool) (*Config, error) { 273 | content, err := ioutil.ReadFile(cfgpath) 274 | if err != nil { 275 | return nil, fmt.Errorf("error reading config file %q: %v", cfgpath, err) 276 | } 277 | if debug { 278 | log.Printf("Config file %q contents:\n%s", cfgpath, content) 279 | } 280 | return GetConfig(string(content), debug) 281 | } 282 | 283 | // GetConfig extracts Config from content by parsing it as YAML. 284 | func GetConfig(content string, debug bool) (*Config, error) { 285 | var cfg Config 286 | err := yaml.Unmarshal([]byte(content), &cfg) 287 | if err != nil { 288 | return nil, err 289 | } 290 | return &cfg, nil 291 | } 292 | -------------------------------------------------------------------------------- /config/config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | // "github.com/kylelemons/godebug/pretty" 5 | common "github.com/ncabatoff/process-exporter" 6 | . "gopkg.in/check.v1" 7 | "time" 8 | ) 9 | 10 | func (s MySuite) TestConfigBasic(c *C) { 11 | yml := ` 12 | process_names: 13 | - exe: 14 | - bash 15 | - exe: 16 | - sh 17 | - exe: 18 | - /bin/ksh 19 | ` 20 | cfg, err := GetConfig(yml, false) 21 | c.Assert(err, IsNil) 22 | c.Check(cfg.MatchNamers.matchers, HasLen, 3) 23 | 24 | bash := common.ProcAttributes{Name: "bash", Cmdline: []string{"/bin/bash"}} 25 | sh := common.ProcAttributes{Name: "sh", Cmdline: []string{"sh"}} 26 | ksh := common.ProcAttributes{Name: "ksh", Cmdline: []string{"/bin/ksh"}} 27 | 28 | found, name := cfg.MatchNamers.matchers[0].MatchAndName(bash) 29 | c.Check(found, Equals, true) 30 | c.Check(name, Equals, "bash") 31 | found, name = cfg.MatchNamers.matchers[0].MatchAndName(sh) 32 | c.Check(found, Equals, false) 33 | found, name = cfg.MatchNamers.matchers[0].MatchAndName(ksh) 34 | c.Check(found, Equals, false) 35 | 36 | found, name = cfg.MatchNamers.matchers[1].MatchAndName(bash) 37 | c.Check(found, Equals, false) 38 | found, name = cfg.MatchNamers.matchers[1].MatchAndName(sh) 39 | c.Check(found, Equals, true) 40 | c.Check(name, Equals, "sh") 41 | found, name = cfg.MatchNamers.matchers[1].MatchAndName(ksh) 42 | c.Check(found, Equals, false) 43 | 44 | found, name = cfg.MatchNamers.matchers[2].MatchAndName(bash) 45 | c.Check(found, Equals, false) 46 | found, name = cfg.MatchNamers.matchers[2].MatchAndName(sh) 47 | c.Check(found, Equals, false) 48 | found, name = cfg.MatchNamers.matchers[2].MatchAndName(ksh) 49 | c.Check(found, Equals, true) 50 | c.Check(name, Equals, "ksh") 51 | } 52 | 53 | func (s MySuite) TestConfigTemplates(c *C) { 54 | yml := ` 55 | process_names: 56 | - exe: 57 | - postmaster 58 | cmdline: 59 | - "-D\\s+.+?(?P[^/]+)(?:$|\\s)" 60 | name: "{{.ExeBase}}:{{.Matches.Path}}" 61 | - exe: 62 | - prometheus 63 | name: "{{.ExeFull}}:{{.PID}}" 64 | - comm: 65 | - cat 66 | name: "{{.StartTime}}" 67 | ` 68 | cfg, err := GetConfig(yml, false) 69 | c.Assert(err, IsNil) 70 | c.Check(cfg.MatchNamers.matchers, HasLen, 3) 71 | 72 | postgres := common.ProcAttributes{Name: "postmaster", Cmdline: []string{"/usr/bin/postmaster", "-D", "/data/pg"}} 73 | found, name := cfg.MatchNamers.matchers[0].MatchAndName(postgres) 74 | c.Check(found, Equals, true) 75 | c.Check(name, Equals, "postmaster:pg") 76 | 77 | pm := common.ProcAttributes{ 78 | Name: "prometheus", 79 | Cmdline: []string{"/usr/local/bin/prometheus"}, 80 | PID: 23, 81 | } 82 | found, name = cfg.MatchNamers.matchers[1].MatchAndName(pm) 83 | c.Check(found, Equals, true) 84 | c.Check(name, Equals, "/usr/local/bin/prometheus:23") 85 | 86 | now := time.Now() 87 | cat := common.ProcAttributes{ 88 | Name: "cat", 89 | Cmdline: []string{"/bin/cat"}, 90 | StartTime: now, 91 | } 92 | found, name = cfg.MatchNamers.matchers[2].MatchAndName(cat) 93 | c.Check(found, Equals, true) 94 | c.Check(name, Equals, now.String()) 95 | } 96 | -------------------------------------------------------------------------------- /fixtures/14804/cgroup: -------------------------------------------------------------------------------- 1 | 0::/system.slice/docker-8dde0b0d6e919baef8d635cd9399b22639ed1e400eaec1b1cb94ff3b216cf3c3.scope 2 | -------------------------------------------------------------------------------- /fixtures/14804/cmdline: -------------------------------------------------------------------------------- 1 | ./process-exporter-procnamesbash -------------------------------------------------------------------------------- /fixtures/14804/comm: -------------------------------------------------------------------------------- 1 | process-exporte 2 | -------------------------------------------------------------------------------- /fixtures/14804/exe: -------------------------------------------------------------------------------- 1 | /usr/bin/process-exporter -------------------------------------------------------------------------------- /fixtures/14804/fd/0: -------------------------------------------------------------------------------- 1 | ../../symlinktargets/abc -------------------------------------------------------------------------------- /fixtures/14804/fd/1: -------------------------------------------------------------------------------- 1 | ../../symlinktargets/def -------------------------------------------------------------------------------- /fixtures/14804/fd/10: -------------------------------------------------------------------------------- 1 | ../../symlinktargets/xyz -------------------------------------------------------------------------------- /fixtures/14804/fd/2: -------------------------------------------------------------------------------- 1 | ../../symlinktargets/ghi -------------------------------------------------------------------------------- /fixtures/14804/fd/3: -------------------------------------------------------------------------------- 1 | ../../symlinktargets/uvw -------------------------------------------------------------------------------- /fixtures/14804/io: -------------------------------------------------------------------------------- 1 | rchar: 1605958 2 | wchar: 69 3 | syscr: 5534 4 | syscw: 1 5 | read_bytes: 1814455 6 | write_bytes: 0 7 | cancelled_write_bytes: 0 8 | -------------------------------------------------------------------------------- /fixtures/14804/limits: -------------------------------------------------------------------------------- 1 | Limit Soft Limit Hard Limit Units 2 | Max cpu time unlimited unlimited seconds 3 | Max file size unlimited unlimited bytes 4 | Max data size unlimited unlimited bytes 5 | Max stack size 8388608 unlimited bytes 6 | Max core file size 0 unlimited bytes 7 | Max resident set unlimited unlimited bytes 8 | Max processes 31421 31421 processes 9 | Max open files 1024 65536 files 10 | Max locked memory 65536 65536 bytes 11 | Max address space unlimited unlimited bytes 12 | Max file locks unlimited unlimited locks 13 | Max pending signals 31421 31421 signals 14 | Max msgqueue size 819200 819200 bytes 15 | Max nice priority 0 0 16 | Max realtime priority 0 0 17 | Max realtime timeout unlimited unlimited us 18 | -------------------------------------------------------------------------------- /fixtures/14804/stat: -------------------------------------------------------------------------------- 1 | 14804 (process-exporte) S 10884 14804 10884 34834 14895 1077936128 1603 0 767 0 10 4 0 0 20 0 7 0 324219 17174528 1969 18446744073709551615 4194304 7971236 140736389529632 140736389529064 4564099 0 0 0 2143420159 0 0 0 17 4 0 0 2 0 0 10805248 11036864 42311680 140736389534279 140736389534314 140736389534314 140736389537765 0 2 | -------------------------------------------------------------------------------- /fixtures/14804/status: -------------------------------------------------------------------------------- 1 | Name: process-exporte 2 | State: S (sleeping) 3 | Tgid: 14804 4 | Ngid: 0 5 | Pid: 14804 6 | PPid: 10884 7 | TracerPid: 0 8 | Uid: 1000 1000 1000 1000 9 | Gid: 1000 1000 1000 1000 10 | FDSize: 256 11 | Groups: 4 24 27 30 46 110 111 127 1000 12 | NStgid: 14804 13 | NSpid: 14804 14 | NSpgid: 14804 15 | NSsid: 10884 16 | VmPeak: 16772 kB 17 | VmSize: 16772 kB 18 | VmLck: 0 kB 19 | VmPin: 0 kB 20 | VmHWM: 7876 kB 21 | VmRSS: 7876 kB 22 | VmData: 9956 kB 23 | VmStk: 132 kB 24 | VmExe: 3692 kB 25 | VmLib: 0 kB 26 | VmPTE: 48 kB 27 | VmPMD: 20 kB 28 | VmSwap: 10 kB 29 | HugetlbPages: 0 kB 30 | Threads: 7 31 | SigQ: 0/31421 32 | SigPnd: 0000000000000000 33 | ShdPnd: 0000000000000000 34 | SigBlk: 0000000000000000 35 | SigIgn: 0000000000000000 36 | SigCgt: fffffffe7fc1feff 37 | CapInh: 0000000000000000 38 | CapPrm: 0000000000000000 39 | CapEff: 0000000000000000 40 | CapBnd: 0000003fffffffff 41 | CapAmb: 0000000000000000 42 | Seccomp: 0 43 | Cpus_allowed: ff 44 | Cpus_allowed_list: 0-7 45 | Mems_allowed: 00000000,00000001 46 | Mems_allowed_list: 0 47 | voluntary_ctxt_switches: 72 48 | nonvoluntary_ctxt_switches: 6 49 | -------------------------------------------------------------------------------- /fixtures/stat: -------------------------------------------------------------------------------- 1 | cpu 258072 10128 55919 2163830 6946 0 2336 0 0 0 2 | cpu0 44237 138 12166 358089 1410 0 306 0 0 0 3 | cpu1 39583 23 11894 363839 1027 0 230 0 0 0 4 | cpu2 44817 2670 9943 355700 1509 0 824 0 0 0 5 | cpu3 41434 3808 6188 363646 886 0 250 0 0 0 6 | cpu4 46320 2279 9630 356546 1342 0 312 0 0 0 7 | cpu5 41680 1209 6096 366008 769 0 412 0 0 0 8 | intr 16484556 45 2 0 0 0 0 0 2 1 0 0 0 4 0 0 988 219000 4 3 1601 0 0 247107 0 0 0 0 771839 691840 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 | ctxt 30119844 10 | btime 1508450329 11 | processes 28048 12 | procs_running 2 13 | procs_blocked 0 14 | softirq 5524311 18 1594113 712 780657 248302 0 24642 1420512 0 1455355 15 | -------------------------------------------------------------------------------- /fixtures/symlinktargets/README: -------------------------------------------------------------------------------- 1 | This directory contains some empty files that are the symlinks the files in the "fd" directory point to. 2 | They are otherwise ignored by the tests 3 | -------------------------------------------------------------------------------- /fixtures/symlinktargets/abc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncabatoff/process-exporter/e52ab0a1c03d6baf8ed60154c254ecc0008549f3/fixtures/symlinktargets/abc -------------------------------------------------------------------------------- /fixtures/symlinktargets/def: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncabatoff/process-exporter/e52ab0a1c03d6baf8ed60154c254ecc0008549f3/fixtures/symlinktargets/def -------------------------------------------------------------------------------- /fixtures/symlinktargets/ghi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncabatoff/process-exporter/e52ab0a1c03d6baf8ed60154c254ecc0008549f3/fixtures/symlinktargets/ghi -------------------------------------------------------------------------------- /fixtures/symlinktargets/uvw: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncabatoff/process-exporter/e52ab0a1c03d6baf8ed60154c254ecc0008549f3/fixtures/symlinktargets/uvw -------------------------------------------------------------------------------- /fixtures/symlinktargets/xyz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncabatoff/process-exporter/e52ab0a1c03d6baf8ed60154c254ecc0008549f3/fixtures/symlinktargets/xyz -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ncabatoff/process-exporter 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.23.8 6 | 7 | require ( 8 | github.com/google/go-cmp v0.6.0 9 | github.com/ncabatoff/fakescraper v0.0.0-20201102132415-4b37ba603d65 10 | github.com/ncabatoff/go-seq v0.0.0-20180805175032-b08ef85ed833 11 | github.com/prometheus/client_golang v1.19.0 12 | github.com/prometheus/common v0.52.3 13 | github.com/prometheus/exporter-toolkit v0.11.0 14 | github.com/prometheus/procfs v0.14.0 15 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c 16 | gopkg.in/yaml.v2 v2.4.0 17 | ) 18 | 19 | require ( 20 | github.com/beorn7/perks v1.0.1 // indirect 21 | github.com/cespare/xxhash/v2 v2.2.0 // indirect 22 | github.com/coreos/go-systemd/v22 v22.5.0 // indirect 23 | github.com/go-kit/log v0.2.1 // indirect 24 | github.com/go-logfmt/logfmt v0.5.1 // indirect 25 | github.com/golang/protobuf v1.5.3 // indirect 26 | github.com/jpillora/backoff v1.0.0 // indirect 27 | github.com/kr/pretty v0.3.1 // indirect 28 | github.com/kr/text v0.2.0 // indirect 29 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect 30 | github.com/prometheus/client_model v0.6.0 // indirect 31 | github.com/rogpeppe/go-internal v1.10.0 // indirect 32 | github.com/stretchr/testify v1.9.0 // indirect 33 | golang.org/x/crypto v0.35.0 // indirect 34 | golang.org/x/net v0.36.0 // indirect 35 | golang.org/x/oauth2 v0.18.0 // indirect 36 | golang.org/x/sync v0.11.0 // indirect 37 | golang.org/x/sys v0.30.0 // indirect 38 | golang.org/x/text v0.22.0 // indirect 39 | google.golang.org/appengine v1.6.7 // indirect 40 | google.golang.org/protobuf v1.33.0 // indirect 41 | ) 42 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 2 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 3 | github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= 4 | github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 5 | github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= 6 | github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= 7 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 8 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 9 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 10 | github.com/go-kit/log v0.2.1 h1:MRVx0/zhvdseW+Gza6N9rVzU/IVzaeE1SFI4raAhmBU= 11 | github.com/go-kit/log v0.2.1/go.mod h1:NwTd00d/i8cPZ3xOwwiv2PO5MOcx78fFErGNcVmBjv0= 12 | github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA= 13 | github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= 14 | github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 15 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 16 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 17 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 18 | github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 19 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 20 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 21 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 22 | github.com/jpillora/backoff v1.0.0 h1:uvFg412JmmHBHw7iwprIxkPMI+sGQ4kzOWsMeHnm2EA= 23 | github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= 24 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 25 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 26 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 27 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 28 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 29 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 30 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 31 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= 32 | github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= 33 | github.com/ncabatoff/fakescraper v0.0.0-20201102132415-4b37ba603d65 h1:Og+dVkxEQNvRGU2vUKeOwYT2UJ+pEaDMWB6tIQnIh6A= 34 | github.com/ncabatoff/fakescraper v0.0.0-20201102132415-4b37ba603d65/go.mod h1:Tx6UMSMyIsjLG/VU/F6xA1+0XI+/f9o1dGJnf1l+bPg= 35 | github.com/ncabatoff/go-seq v0.0.0-20180805175032-b08ef85ed833 h1:t4WWQ9I797y7QUgeEjeXnVb+oYuEDQc6gLvrZJTYo94= 36 | github.com/ncabatoff/go-seq v0.0.0-20180805175032-b08ef85ed833/go.mod h1:0CznHmXSjMEqs5Tezj/w2emQoM41wzYM9KpDKUHPYag= 37 | github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= 38 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 39 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 40 | github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU= 41 | github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k= 42 | github.com/prometheus/client_model v0.6.0 h1:k1v3CzpSRUTrKMppY35TLwPvxHqBu0bYgxZzqGIgaos= 43 | github.com/prometheus/client_model v0.6.0/go.mod h1:NTQHnmxFpouOD0DpvP4XujX3CdOAGQPoaGhyTchlyt8= 44 | github.com/prometheus/common v0.52.3 h1:5f8uj6ZwHSscOGNdIQg6OiZv/ybiK2CO2q2drVZAQSA= 45 | github.com/prometheus/common v0.52.3/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U= 46 | github.com/prometheus/exporter-toolkit v0.11.0 h1:yNTsuZ0aNCNFQ3aFTD2uhPOvr4iD7fdBvKPAEGkNf+g= 47 | github.com/prometheus/exporter-toolkit v0.11.0/go.mod h1:BVnENhnNecpwoTLiABx7mrPB/OLRIgN74qlQbV+FK1Q= 48 | github.com/prometheus/procfs v0.14.0 h1:Lw4VdGGoKEZilJsayHf0B+9YgLGREba2C6xr+Fdfq6s= 49 | github.com/prometheus/procfs v0.14.0/go.mod h1:XL+Iwz8k8ZabyZfMFHPiilCniixqQarAy5Mu67pHlNQ= 50 | github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= 51 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 52 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= 53 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 54 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 55 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 56 | golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs= 57 | golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ= 58 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 59 | golang.org/x/net v0.36.0 h1:vWF2fRbw4qslQsQzgFqZff+BItCvGFQqKzKIzx1rmoA= 60 | golang.org/x/net v0.36.0/go.mod h1:bFmbeoIPfrw4sMHNhb4J9f6+tPziuGjq7Jk/38fxi1I= 61 | golang.org/x/oauth2 v0.18.0 h1:09qnuIAgzdx1XplqJvW6CQqMCtGZykZWcXzPMPUusvI= 62 | golang.org/x/oauth2 v0.18.0/go.mod h1:Wf7knwG0MPoWIMMBgFlEaSUDaKskp0dCfrlJRJXbBi8= 63 | golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= 64 | golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 65 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 66 | golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= 67 | golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 68 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 69 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 70 | golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= 71 | golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= 72 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 73 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 74 | google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= 75 | google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= 76 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 77 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= 78 | google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= 79 | google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= 80 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 81 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 82 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 83 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 84 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 85 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 86 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 87 | -------------------------------------------------------------------------------- /packaging/conf/all.yaml: -------------------------------------------------------------------------------- 1 | process_names: 2 | - name: "{{.Comm}}" 3 | cmdline: 4 | - '.+' -------------------------------------------------------------------------------- /packaging/default/process-exporter: -------------------------------------------------------------------------------- 1 | # process-exporter startup flags 2 | OPTS='--config.path /etc/process-exporter/all.yaml --web.listen-address=:9256' 3 | -------------------------------------------------------------------------------- /packaging/process-exporter.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description=Process Exporter for Prometheus 3 | 4 | [Service] 5 | User=root 6 | Type=simple 7 | EnvironmentFile=-/etc/default/process-exporter 8 | ExecStart=/usr/bin/process-exporter $OPTS 9 | KillMode=process 10 | Restart=always 11 | 12 | [Install] 13 | WantedBy=multi-user.target 14 | -------------------------------------------------------------------------------- /packaging/scripts/postinstall.sh: -------------------------------------------------------------------------------- 1 | systemctl daemon-reload 2 | systemctl enable process-exporter.service 3 | systemctl restart process-exporter.service 4 | -------------------------------------------------------------------------------- /packaging/scripts/postremove.sh: -------------------------------------------------------------------------------- 1 | systemctl daemon-reload 2 | -------------------------------------------------------------------------------- /packaging/scripts/preremove.sh: -------------------------------------------------------------------------------- 1 | systemctl stop process-exporter.service 2 | systemctl disable process-exporter.service 3 | -------------------------------------------------------------------------------- /proc/base_test.go: -------------------------------------------------------------------------------- 1 | package proc 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | 7 | common "github.com/ncabatoff/process-exporter" 8 | ) 9 | 10 | type msi map[string]int 11 | 12 | // procinfo reads the ProcIdInfo for a proc and returns it or a zero value plus 13 | // an error. 14 | func procinfo(p Proc) (IDInfo, error) { 15 | id, err := p.GetProcID() 16 | if err != nil { 17 | return IDInfo{}, err 18 | } 19 | static, err := p.GetStatic() 20 | if err != nil { 21 | return IDInfo{}, err 22 | } 23 | metrics, _, err := p.GetMetrics() 24 | if err != nil { 25 | return IDInfo{}, err 26 | } 27 | return IDInfo{id, static, metrics, nil}, nil 28 | } 29 | 30 | // read everything in the iterator 31 | func consumeIter(pi Iter) ([]IDInfo, error) { 32 | infos := []IDInfo{} 33 | for pi.Next() { 34 | info, err := procinfo(pi) 35 | if err != nil { 36 | return nil, err 37 | } 38 | infos = append(infos, info) 39 | } 40 | return infos, nil 41 | } 42 | 43 | type namer map[string]struct{} 44 | 45 | func newNamer(names ...string) namer { 46 | nr := make(namer, len(names)) 47 | for _, name := range names { 48 | nr[name] = struct{}{} 49 | } 50 | return nr 51 | } 52 | 53 | func (n namer) String() string { 54 | var ss = make([]string, 0, len(n)) 55 | for s := range n { 56 | ss = append(ss, s) 57 | } 58 | return fmt.Sprintf("%v", ss) 59 | } 60 | 61 | func (n namer) MatchAndName(nacl common.ProcAttributes) (bool, string) { 62 | if _, ok := n[nacl.Name]; ok { 63 | return true, nacl.Name 64 | } 65 | return false, "" 66 | } 67 | 68 | func newProcIDStatic(pid, ppid int, startTime uint64, name string, cmdline []string) (ID, Static) { 69 | return ID{pid, startTime}, 70 | Static{name, cmdline, []string{}, ppid, time.Unix(int64(startTime), 0).UTC(), 1000} 71 | } 72 | 73 | func newProc(pid int, name string, m Metrics) IDInfo { 74 | id, static := newProcIDStatic(pid, 0, 0, name, nil) 75 | return IDInfo{id, static, m, nil} 76 | } 77 | 78 | func newProcStart(pid int, name string, startTime uint64) IDInfo { 79 | id, static := newProcIDStatic(pid, 0, startTime, name, nil) 80 | return IDInfo{id, static, Metrics{}, nil} 81 | } 82 | 83 | func newProcParent(pid int, name string, ppid int) IDInfo { 84 | id, static := newProcIDStatic(pid, ppid, 0, name, nil) 85 | return IDInfo{id, static, Metrics{}, nil} 86 | } 87 | 88 | func piinfot(pid int, name string, c Counts, m Memory, f Filedesc, threads []Thread) IDInfo { 89 | pii := piinfo(pid, name, c, m, f, len(threads)) 90 | pii.Threads = threads 91 | return pii 92 | } 93 | 94 | func piinfo(pid int, name string, c Counts, m Memory, f Filedesc, t int) IDInfo { 95 | return piinfost(pid, name, c, m, f, t, States{}) 96 | } 97 | 98 | func piinfost(pid int, name string, c Counts, m Memory, f Filedesc, t int, s States) IDInfo { 99 | id, static := newProcIDStatic(pid, 0, 0, name, nil) 100 | return IDInfo{ 101 | ID: id, 102 | Static: static, 103 | Metrics: Metrics{c, m, f, uint64(t), s, ""}, 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /proc/grouper.go: -------------------------------------------------------------------------------- 1 | package proc 2 | 3 | import ( 4 | "time" 5 | 6 | seq "github.com/ncabatoff/go-seq/seq" 7 | common "github.com/ncabatoff/process-exporter" 8 | ) 9 | 10 | type ( 11 | // Grouper is the top-level interface to the process metrics. All tracked 12 | // procs sharing the same group name are aggregated. 13 | Grouper struct { 14 | // groupAccum records the historical accumulation of a group so that 15 | // we can avoid ever decreasing the counts we return. 16 | groupAccum map[string]Counts 17 | tracker *Tracker 18 | threadAccum map[string]map[string]Threads 19 | debug bool 20 | removeEmptyGroups bool 21 | } 22 | 23 | // GroupByName maps group name to group metrics. 24 | GroupByName map[string]Group 25 | 26 | // Threads collects metrics for threads in a group sharing a thread name. 27 | Threads struct { 28 | Name string 29 | NumThreads int 30 | Counts 31 | } 32 | 33 | // Group describes the metrics of a single group. 34 | Group struct { 35 | Counts 36 | States 37 | Wchans map[string]int 38 | Procs int 39 | Memory 40 | OldestStartTime time.Time 41 | OpenFDs uint64 42 | WorstFDratio float64 43 | NumThreads uint64 44 | Threads []Threads 45 | } 46 | ) 47 | 48 | // Returns true if x < y. Test designers should ensure they always have 49 | // a unique name/numthreads combination for each group. 50 | func lessThreads(x, y Threads) bool { return seq.Compare(x, y) < 0 } 51 | 52 | // NewGrouper creates a grouper. 53 | func NewGrouper(namer common.MatchNamer, trackChildren, trackThreads, recheck bool, recheckTimeLimit time.Duration, debug bool, removeEmptyGroups bool) *Grouper { 54 | g := Grouper{ 55 | groupAccum: make(map[string]Counts), 56 | threadAccum: make(map[string]map[string]Threads), 57 | tracker: NewTracker(namer, trackChildren, recheck, recheckTimeLimit, debug), 58 | debug: debug, 59 | removeEmptyGroups: removeEmptyGroups, 60 | } 61 | return &g 62 | } 63 | 64 | func groupadd(grp Group, ts Update) Group { 65 | var zeroTime time.Time 66 | 67 | grp.Procs++ 68 | grp.Memory.ResidentBytes += ts.Memory.ResidentBytes 69 | grp.Memory.VirtualBytes += ts.Memory.VirtualBytes 70 | grp.Memory.VmSwapBytes += ts.Memory.VmSwapBytes 71 | grp.Memory.ProportionalBytes += ts.Memory.ProportionalBytes 72 | grp.Memory.ProportionalSwapBytes += ts.Memory.ProportionalSwapBytes 73 | if ts.Filedesc.Open != -1 { 74 | grp.OpenFDs += uint64(ts.Filedesc.Open) 75 | } 76 | openratio := float64(ts.Filedesc.Open) / float64(ts.Filedesc.Limit) 77 | if grp.WorstFDratio < openratio { 78 | grp.WorstFDratio = openratio 79 | } 80 | grp.NumThreads += ts.NumThreads 81 | grp.Counts.Add(ts.Latest) 82 | grp.States.Add(ts.States) 83 | if grp.OldestStartTime == zeroTime || ts.Start.Before(grp.OldestStartTime) { 84 | grp.OldestStartTime = ts.Start 85 | } 86 | 87 | if grp.Wchans == nil { 88 | grp.Wchans = make(map[string]int) 89 | } 90 | for wchan, count := range ts.Wchans { 91 | grp.Wchans[wchan] += count 92 | } 93 | 94 | return grp 95 | } 96 | 97 | // Update asks the tracker to report on each tracked process by name. 98 | // These are aggregated by groupname, augmented by accumulated counts 99 | // from the past, and returned. Note that while the Tracker reports 100 | // only what counts have changed since last cycle, Grouper.Update 101 | // returns counts that never decrease. If removeEmptyGroups is false, 102 | // then even once the last process with name X disappears, name X will 103 | // still appear in the results with the same counts as before; of course, 104 | // all non-count metrics will be zero. 105 | func (g *Grouper) Update(iter Iter) (CollectErrors, GroupByName, error) { 106 | cerrs, tracked, err := g.tracker.Update(iter) 107 | if err != nil { 108 | return cerrs, nil, err 109 | } 110 | return cerrs, g.groups(tracked), nil 111 | } 112 | 113 | // Translate the updates into a new GroupByName and update internal history. 114 | func (g *Grouper) groups(tracked []Update) GroupByName { 115 | groups := make(GroupByName) 116 | threadsByGroup := make(map[string][]ThreadUpdate) 117 | 118 | for _, update := range tracked { 119 | groups[update.GroupName] = groupadd(groups[update.GroupName], update) 120 | if update.Threads != nil { 121 | threadsByGroup[update.GroupName] = 122 | append(threadsByGroup[update.GroupName], update.Threads...) 123 | } 124 | } 125 | 126 | // Add any accumulated counts to what was just observed, 127 | // and update the accumulators. 128 | for gname, group := range groups { 129 | if oldcounts, ok := g.groupAccum[gname]; ok { 130 | group.Counts.Add(Delta(oldcounts)) 131 | } 132 | g.groupAccum[gname] = group.Counts 133 | group.Threads = g.threads(gname, threadsByGroup[gname]) 134 | groups[gname] = group 135 | } 136 | 137 | // Now add any groups that were observed in the past but aren't running now (or delete them, if removeEmptyGroups is true). 138 | for gname, gcounts := range g.groupAccum { 139 | if _, ok := groups[gname]; !ok { 140 | if g.removeEmptyGroups { 141 | delete(g.groupAccum, gname) 142 | delete(g.threadAccum, gname) 143 | } else { 144 | groups[gname] = Group{Counts: gcounts} 145 | } 146 | } 147 | } 148 | 149 | return groups 150 | } 151 | 152 | func (g *Grouper) threads(gname string, tracked []ThreadUpdate) []Threads { 153 | if len(tracked) == 0 { 154 | delete(g.threadAccum, gname) 155 | return nil 156 | } 157 | 158 | ret := make([]Threads, 0, len(tracked)) 159 | threads := make(map[string]Threads) 160 | 161 | // First aggregate the thread metrics by thread name. 162 | for _, nc := range tracked { 163 | curthr := threads[nc.ThreadName] 164 | curthr.NumThreads++ 165 | curthr.Counts.Add(nc.Latest) 166 | curthr.Name = nc.ThreadName 167 | threads[nc.ThreadName] = curthr 168 | } 169 | 170 | // Add any accumulated counts to what was just observed, 171 | // and update the accumulators. 172 | if history := g.threadAccum[gname]; history != nil { 173 | for tname := range threads { 174 | if oldcounts, ok := history[tname]; ok { 175 | counts := threads[tname] 176 | counts.Add(Delta(oldcounts.Counts)) 177 | threads[tname] = counts 178 | } 179 | } 180 | } 181 | 182 | g.threadAccum[gname] = threads 183 | 184 | for _, thr := range threads { 185 | ret = append(ret, thr) 186 | } 187 | return ret 188 | } 189 | -------------------------------------------------------------------------------- /proc/grouper_test.go: -------------------------------------------------------------------------------- 1 | package proc 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/google/go-cmp/cmp" 8 | "github.com/google/go-cmp/cmp/cmpopts" 9 | ) 10 | 11 | type grouptest struct { 12 | grouper *Grouper 13 | procs Iter 14 | want GroupByName 15 | } 16 | 17 | //func (gt grouptest) run(c *C) { 18 | // _, err := gt.grouper.Update(gt.procs) 19 | // c.Assert(err, IsNil) 20 | // 21 | // got := gt.grouper.curgroups() 22 | // c.Check(got, DeepEquals, gt.want, Commentf("diff %s", pretty.Compare(got, gt.want))) 23 | //} 24 | 25 | func rungroup(t *testing.T, gr *Grouper, procs Iter) GroupByName { 26 | _, groups, err := gr.Update(procs) 27 | if err != nil { 28 | t.Fatalf("group.Update error: %v", err) 29 | } 30 | 31 | return groups 32 | } 33 | 34 | // TestGrouperBasic tests core Update/curgroups functionality on single-proc 35 | // groups: the grouper adds to counts and updates the other tracked metrics like 36 | // Memory. 37 | func TestGrouperBasic(t *testing.T) { 38 | p1, p2 := 1, 2 39 | n1, n2 := "g1", "g2" 40 | starttime := time.Unix(0, 0).UTC() 41 | 42 | tests := []struct { 43 | procs []IDInfo 44 | want GroupByName 45 | }{ 46 | { 47 | []IDInfo{ 48 | piinfost(p1, n1, Counts{1, 2, 3, 4, 5, 6, 0, 0}, Memory{7, 8, 0, 0, 0}, 49 | Filedesc{4, 400}, 2, States{Other: 1}), 50 | piinfost(p2, n2, Counts{2, 3, 4, 5, 6, 7, 0, 0}, Memory{8, 9, 0, 0, 0}, 51 | Filedesc{40, 400}, 3, States{Waiting: 1}), 52 | }, 53 | GroupByName{ 54 | "g1": Group{Counts{}, States{Other: 1}, msi{}, 1, Memory{7, 8, 0, 0, 0}, starttime, 55 | 4, 0.01, 2, nil}, 56 | "g2": Group{Counts{}, States{Waiting: 1}, msi{}, 1, Memory{8, 9, 0, 0, 0}, starttime, 57 | 40, 0.1, 3, nil}, 58 | }, 59 | }, 60 | { 61 | []IDInfo{ 62 | piinfost(p1, n1, Counts{2, 3, 4, 5, 6, 7, 0, 0}, 63 | Memory{6, 7, 0, 0, 0}, Filedesc{100, 400}, 4, States{Zombie: 1}), 64 | piinfost(p2, n2, Counts{4, 5, 6, 7, 8, 9, 0, 0}, 65 | Memory{9, 8, 0, 0, 0}, Filedesc{400, 400}, 2, States{Running: 1}), 66 | }, 67 | GroupByName{ 68 | "g1": Group{Counts{1, 1, 1, 1, 1, 1, 0, 0}, States{Zombie: 1}, msi{}, 1, 69 | Memory{6, 7, 0, 0, 0}, starttime, 100, 0.25, 4, nil}, 70 | "g2": Group{Counts{2, 2, 2, 2, 2, 2, 0, 0}, States{Running: 1}, msi{}, 1, 71 | Memory{9, 8, 0, 0, 0}, starttime, 400, 1, 2, nil}, 72 | }, 73 | }, 74 | } 75 | 76 | gr := NewGrouper(newNamer(n1, n2), false, false, false, 0, false, false) 77 | for i, tc := range tests { 78 | got := rungroup(t, gr, procInfoIter(tc.procs...)) 79 | if diff := cmp.Diff(got, tc.want); diff != "" { 80 | t.Errorf("%d: curgroups differs: (-got +want)\n%s", i, diff) 81 | } 82 | } 83 | } 84 | 85 | // TestGrouperProcJoin tests the appearance of a new process in a group, 86 | // and that all procs metrics contribute to a group. 87 | func TestGrouperProcJoin(t *testing.T) { 88 | p1, p2 := 1, 2 89 | n1, n2 := "g1", "g1" 90 | starttime := time.Unix(0, 0).UTC() 91 | 92 | tests := []struct { 93 | procs []IDInfo 94 | want GroupByName 95 | }{ 96 | { 97 | []IDInfo{ 98 | piinfo(p1, n1, Counts{1, 2, 3, 4, 5, 6, 0, 0}, Memory{3, 4, 0, 0, 0}, Filedesc{4, 400}, 2), 99 | }, 100 | GroupByName{ 101 | "g1": Group{Counts{}, States{}, msi{}, 1, Memory{3, 4, 0, 0, 0}, starttime, 4, 0.01, 2, nil}, 102 | }, 103 | }, { 104 | // The counts for pid2 won't be factored into the total yet because we only add 105 | // to counts starting with the second time we see a proc. Memory and FDs are 106 | // affected though. 107 | []IDInfo{ 108 | piinfost(p1, n1, Counts{3, 4, 5, 6, 7, 8, 0, 0}, 109 | Memory{3, 4, 0, 0, 0}, Filedesc{4, 400}, 2, States{Running: 1}), 110 | piinfost(p2, n2, Counts{1, 1, 1, 1, 1, 1, 0, 0}, 111 | Memory{1, 2, 0, 0, 0}, Filedesc{40, 400}, 3, States{Sleeping: 1}), 112 | }, 113 | GroupByName{ 114 | "g1": Group{Counts{2, 2, 2, 2, 2, 2, 0, 0}, States{Running: 1, Sleeping: 1}, msi{}, 2, 115 | Memory{4, 6, 0, 0, 0}, starttime, 44, 0.1, 5, nil}, 116 | }, 117 | }, { 118 | []IDInfo{ 119 | piinfost(p1, n1, Counts{4, 5, 6, 7, 8, 9, 0, 0}, 120 | Memory{1, 5, 0, 0, 0}, Filedesc{4, 400}, 2, States{Running: 1}), 121 | piinfost(p2, n2, Counts{2, 2, 2, 2, 2, 2, 0, 0}, 122 | Memory{2, 4, 0, 0, 0}, Filedesc{40, 400}, 3, States{Running: 1}), 123 | }, 124 | GroupByName{ 125 | "g1": Group{Counts{4, 4, 4, 4, 4, 4, 0, 0}, States{Running: 2}, msi{}, 2, 126 | Memory{3, 9, 0, 0, 0}, starttime, 44, 0.1, 5, nil}, 127 | }, 128 | }, 129 | } 130 | 131 | gr := NewGrouper(newNamer(n1), false, false, false, 0, false, false) 132 | for i, tc := range tests { 133 | got := rungroup(t, gr, procInfoIter(tc.procs...)) 134 | if diff := cmp.Diff(got, tc.want); diff != "" { 135 | t.Errorf("%d: curgroups differs: (-got +want)\n%s", i, diff) 136 | } 137 | } 138 | } 139 | 140 | // TestGrouperNonDecreasing tests the disappearance of a process. Its previous 141 | // contribution to the counts should not go away when that happens if removeEmptyGroups is false. 142 | func TestGrouperNonDecreasing(t *testing.T) { 143 | p1, p2 := 1, 2 144 | n1, n2 := "g1", "g1" 145 | starttime := time.Unix(0, 0).UTC() 146 | 147 | tests := []struct { 148 | procs []IDInfo 149 | want GroupByName 150 | }{ 151 | { 152 | []IDInfo{ 153 | piinfo(p1, n1, Counts{3, 4, 5, 6, 7, 8, 0, 0}, Memory{3, 4, 0, 0, 0}, Filedesc{4, 400}, 2), 154 | piinfo(p2, n2, Counts{1, 1, 1, 1, 1, 1, 0, 0}, Memory{1, 2, 0, 0, 0}, Filedesc{40, 400}, 3), 155 | }, 156 | GroupByName{ 157 | "g1": Group{Counts{}, States{}, msi{}, 2, Memory{4, 6, 0, 0, 0}, starttime, 44, 0.1, 5, nil}, 158 | }, 159 | }, { 160 | []IDInfo{ 161 | piinfo(p1, n1, Counts{4, 5, 6, 7, 8, 9, 0, 0}, Memory{1, 5, 0, 0, 0}, Filedesc{4, 400}, 2), 162 | }, 163 | GroupByName{ 164 | "g1": Group{Counts{1, 1, 1, 1, 1, 1, 0, 0}, States{}, msi{}, 1, Memory{1, 5, 0, 0, 0}, starttime, 4, 0.01, 2, nil}, 165 | }, 166 | }, { 167 | []IDInfo{}, 168 | GroupByName{ 169 | "g1": Group{Counts{1, 1, 1, 1, 1, 1, 0, 0}, States{}, nil, 0, Memory{}, time.Time{}, 0, 0, 0, nil}, 170 | }, 171 | }, 172 | } 173 | 174 | gr := NewGrouper(newNamer(n1), false, false, false, 0, false, false) 175 | for i, tc := range tests { 176 | got := rungroup(t, gr, procInfoIter(tc.procs...)) 177 | if diff := cmp.Diff(got, tc.want); diff != "" { 178 | t.Errorf("%d: curgroups differs: (-got +want)\n%s", i, diff) 179 | } 180 | } 181 | } 182 | 183 | // TestGrouperNonDecreasing tests the disappearance of a process. 184 | // We want the group to disappear if removeEmptyGroups is true. 185 | func TestGrouperRemoveEmptyGroups(t *testing.T) { 186 | p1, p2 := 1, 2 187 | n1, n2 := "g1", "g2" 188 | starttime := time.Unix(0, 0).UTC() 189 | 190 | tests := []struct { 191 | procs []IDInfo 192 | want GroupByName 193 | }{ 194 | { 195 | []IDInfo{ 196 | piinfo(p1, n1, Counts{3, 4, 5, 6, 7, 8, 0, 0}, Memory{3, 4, 0, 0, 0}, Filedesc{4, 400}, 2), 197 | piinfo(p2, n2, Counts{1, 1, 1, 1, 1, 1, 0, 0}, Memory{1, 2, 0, 0, 0}, Filedesc{40, 400}, 3), 198 | }, 199 | GroupByName{ 200 | n1: Group{Counts{}, States{}, msi{}, 1, Memory{3, 4, 0, 0, 0}, starttime, 4, 0.01, 2, nil}, 201 | n2: Group{Counts{}, States{}, msi{}, 1, Memory{1, 2, 0, 0, 0}, starttime, 40, 0.1, 3, nil}, 202 | }, 203 | }, { 204 | []IDInfo{ 205 | piinfo(p1, n1, Counts{4, 5, 6, 7, 8, 9, 0, 0}, Memory{1, 5, 0, 0, 0}, Filedesc{4, 400}, 2), 206 | }, 207 | GroupByName{ 208 | n1: Group{Counts{1, 1, 1, 1, 1, 1, 0, 0}, States{}, msi{}, 1, Memory{1, 5, 0, 0, 0}, starttime, 4, 0.01, 2, nil}, 209 | }, 210 | }, { 211 | []IDInfo{}, 212 | GroupByName{}, 213 | }, 214 | } 215 | 216 | gr := NewGrouper(newNamer(n1, n2), false, false, false, 0, false, true) 217 | for i, tc := range tests { 218 | got := rungroup(t, gr, procInfoIter(tc.procs...)) 219 | if diff := cmp.Diff(got, tc.want); diff != "" { 220 | t.Errorf("%d: curgroups differs: (-got +want)\n%s", i, diff) 221 | } 222 | } 223 | } 224 | 225 | func TestGrouperThreads(t *testing.T) { 226 | p, n, tm := 1, "g1", time.Unix(0, 0).UTC() 227 | 228 | tests := []struct { 229 | proc IDInfo 230 | want GroupByName 231 | }{ 232 | { 233 | piinfot(p, n, Counts{}, Memory{}, Filedesc{1, 1}, []Thread{ 234 | {ThreadID(ID{p, 0}), "t1", Counts{1, 2, 3, 4, 5, 6, 0, 0}, "", States{}}, 235 | {ThreadID(ID{p + 1, 0}), "t2", Counts{1, 1, 1, 1, 1, 1, 0, 0}, "", States{}}, 236 | }), 237 | GroupByName{ 238 | "g1": Group{Counts{}, States{}, msi{}, 1, Memory{}, tm, 1, 1, 2, []Threads{ 239 | Threads{"t1", 1, Counts{}}, 240 | Threads{"t2", 1, Counts{}}, 241 | }}, 242 | }, 243 | }, { 244 | piinfot(p, n, Counts{}, Memory{}, Filedesc{1, 1}, []Thread{ 245 | {ThreadID(ID{p, 0}), "t1", Counts{2, 3, 4, 5, 6, 7, 0, 0}, "", States{}}, 246 | {ThreadID(ID{p + 1, 0}), "t2", Counts{2, 2, 2, 2, 2, 2, 0, 0}, "", States{}}, 247 | {ThreadID(ID{p + 2, 0}), "t2", Counts{1, 1, 1, 1, 1, 1, 0, 0}, "", States{}}, 248 | }), 249 | GroupByName{ 250 | "g1": Group{Counts{}, States{}, msi{}, 1, Memory{}, tm, 1, 1, 3, []Threads{ 251 | Threads{"t1", 1, Counts{1, 1, 1, 1, 1, 1, 0, 0}}, 252 | Threads{"t2", 2, Counts{1, 1, 1, 1, 1, 1, 0, 0}}, 253 | }}, 254 | }, 255 | }, { 256 | piinfot(p, n, Counts{}, Memory{}, Filedesc{1, 1}, []Thread{ 257 | {ThreadID(ID{p + 1, 0}), "t2", Counts{4, 4, 4, 4, 4, 4, 0, 0}, "", States{}}, 258 | {ThreadID(ID{p + 2, 0}), "t2", Counts{2, 3, 4, 5, 6, 7, 0, 0}, "", States{}}, 259 | }), 260 | GroupByName{ 261 | "g1": Group{Counts{}, States{}, msi{}, 1, Memory{}, tm, 1, 1, 2, []Threads{ 262 | Threads{"t2", 2, Counts{4, 5, 6, 7, 8, 9, 0, 0}}, 263 | }}, 264 | }, 265 | }, 266 | } 267 | 268 | opts := cmpopts.SortSlices(lessThreads) 269 | gr := NewGrouper(newNamer(n), false, true, false, 0, false, false) 270 | for i, tc := range tests { 271 | got := rungroup(t, gr, procInfoIter(tc.proc)) 272 | if diff := cmp.Diff(got, tc.want, opts); diff != "" { 273 | t.Errorf("%d: curgroups differs: (-got +want)\n%s", i, diff) 274 | } 275 | } 276 | } 277 | -------------------------------------------------------------------------------- /proc/read.go: -------------------------------------------------------------------------------- 1 | package proc 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "path/filepath" 7 | "strconv" 8 | "time" 9 | 10 | "github.com/prometheus/procfs" 11 | ) 12 | 13 | // ErrProcNotExist indicates a process couldn't be read because it doesn't exist, 14 | // typically because it disappeared while we were reading it. 15 | var ErrProcNotExist = fmt.Errorf("process does not exist") 16 | 17 | type ( 18 | // ID uniquely identifies a process. 19 | ID struct { 20 | // UNIX process id 21 | Pid int 22 | // The time the process started after system boot, the value is expressed 23 | // in clock ticks. 24 | StartTimeRel uint64 25 | } 26 | 27 | ThreadID ID 28 | 29 | // Static contains data read from /proc/pid/* 30 | Static struct { 31 | Name string 32 | Cmdline []string 33 | Cgroups []string 34 | ParentPid int 35 | StartTime time.Time 36 | EffectiveUID int 37 | } 38 | 39 | // Counts are metric counters common to threads and processes and groups. 40 | Counts struct { 41 | CPUUserTime float64 42 | CPUSystemTime float64 43 | ReadBytes uint64 44 | WriteBytes uint64 45 | MajorPageFaults uint64 46 | MinorPageFaults uint64 47 | CtxSwitchVoluntary uint64 48 | CtxSwitchNonvoluntary uint64 49 | } 50 | 51 | // Memory describes a proc's memory usage. 52 | Memory struct { 53 | ResidentBytes uint64 54 | VirtualBytes uint64 55 | VmSwapBytes uint64 56 | ProportionalBytes uint64 57 | ProportionalSwapBytes uint64 58 | } 59 | 60 | // Filedesc describes a proc's file descriptor usage and soft limit. 61 | Filedesc struct { 62 | // Open is the count of open file descriptors, -1 if unknown. 63 | Open int64 64 | // Limit is the fd soft limit for the process. 65 | Limit uint64 66 | } 67 | 68 | // States counts how many threads are in each state. 69 | States struct { 70 | Running int 71 | Sleeping int 72 | Waiting int 73 | Zombie int 74 | Other int 75 | } 76 | 77 | // Metrics contains data read from /proc/pid/* 78 | Metrics struct { 79 | Counts 80 | Memory 81 | Filedesc 82 | NumThreads uint64 83 | States 84 | Wchan string 85 | } 86 | 87 | // Thread contains per-thread data. 88 | Thread struct { 89 | ThreadID 90 | ThreadName string 91 | Counts 92 | Wchan string 93 | States 94 | } 95 | 96 | // IDInfo groups all info for a single process. 97 | IDInfo struct { 98 | ID 99 | Static 100 | Metrics 101 | Threads []Thread 102 | } 103 | 104 | // ProcIdInfoThreads struct { 105 | // ProcIdInfo 106 | // Threads []ProcThread 107 | // } 108 | 109 | // Proc wraps the details of the underlying procfs-reading library. 110 | // Any of these methods may fail if the process has disapeared. 111 | // We try to return as much as possible rather than an error, e.g. 112 | // if some /proc files are unreadable. 113 | Proc interface { 114 | // GetPid() returns the POSIX PID (process id). They may be reused over time. 115 | GetPid() int 116 | // GetProcID() returns (pid,starttime), which can be considered a unique process id. 117 | GetProcID() (ID, error) 118 | // GetStatic() returns various details read from files under /proc//. Technically 119 | // name may not be static, but we'll pretend it is. 120 | GetStatic() (Static, error) 121 | // GetMetrics() returns various metrics read from files under /proc//. 122 | // It returns an error on complete failure. Otherwise, it returns metrics 123 | // and 0 on complete success, 1 if some (like I/O) couldn't be read. 124 | GetMetrics() (Metrics, int, error) 125 | GetStates() (States, error) 126 | GetWchan() (string, error) 127 | GetCounts() (Counts, int, error) 128 | GetThreads() ([]Thread, error) 129 | } 130 | 131 | // proccache implements the Proc interface by acting as wrapper for procfs.Proc 132 | // that caches results of some reads. 133 | proccache struct { 134 | procfs.Proc 135 | procid *ID 136 | stat *procfs.ProcStat 137 | status *procfs.ProcStatus 138 | cmdline []string 139 | cgroups []procfs.Cgroup 140 | io *procfs.ProcIO 141 | fs *FS 142 | wchan *string 143 | } 144 | 145 | proc struct { 146 | proccache 147 | } 148 | 149 | // procs is a fancier []Proc that saves on some copying. 150 | procs interface { 151 | get(int) Proc 152 | length() int 153 | } 154 | 155 | // procfsprocs implements procs using procfs. 156 | procfsprocs struct { 157 | Procs []procfs.Proc 158 | fs *FS 159 | } 160 | 161 | // Iter is an iterator over a sequence of procs. 162 | Iter interface { 163 | // Next returns true if the iterator is not exhausted. 164 | Next() bool 165 | // Close releases any resources the iterator uses. 166 | Close() error 167 | // The iterator satisfies the Proc interface. 168 | Proc 169 | } 170 | 171 | // procIterator implements the Iter interface 172 | procIterator struct { 173 | // procs is the list of Proc we're iterating over. 174 | procs 175 | // idx is the current iteration, i.e. it's an index into procs. 176 | idx int 177 | // err is set with an error when Next() fails. It is not affected by failures accessing 178 | // the current iteration variable, e.g. with GetProcId. 179 | err error 180 | // Proc is the current iteration variable, or nil if Next() has never been called or the 181 | // iterator is exhausted. 182 | Proc 183 | } 184 | 185 | // Source is a source of procs. 186 | Source interface { 187 | // AllProcs returns all the processes in this source at this moment in time. 188 | AllProcs() Iter 189 | } 190 | 191 | // FS implements Source. 192 | FS struct { 193 | procfs.FS 194 | BootTime uint64 195 | MountPoint string 196 | GatherSMaps bool 197 | debug bool 198 | } 199 | ) 200 | 201 | func (ii IDInfo) String() string { 202 | return fmt.Sprintf("%+v:%+v", ii.ID, ii.Static) 203 | } 204 | 205 | // Add adds c2 to the counts. 206 | func (c *Counts) Add(c2 Delta) { 207 | c.CPUUserTime += c2.CPUUserTime 208 | c.CPUSystemTime += c2.CPUSystemTime 209 | c.ReadBytes += c2.ReadBytes 210 | c.WriteBytes += c2.WriteBytes 211 | c.MajorPageFaults += c2.MajorPageFaults 212 | c.MinorPageFaults += c2.MinorPageFaults 213 | c.CtxSwitchVoluntary += c2.CtxSwitchVoluntary 214 | c.CtxSwitchNonvoluntary += c2.CtxSwitchNonvoluntary 215 | } 216 | 217 | // Sub subtracts c2 from the counts. 218 | func (c Counts) Sub(c2 Counts) Delta { 219 | c.CPUUserTime -= c2.CPUUserTime 220 | c.CPUSystemTime -= c2.CPUSystemTime 221 | c.ReadBytes -= c2.ReadBytes 222 | c.WriteBytes -= c2.WriteBytes 223 | c.MajorPageFaults -= c2.MajorPageFaults 224 | c.MinorPageFaults -= c2.MinorPageFaults 225 | c.CtxSwitchVoluntary -= c2.CtxSwitchVoluntary 226 | c.CtxSwitchNonvoluntary -= c2.CtxSwitchNonvoluntary 227 | return Delta(c) 228 | } 229 | 230 | func (s *States) Add(s2 States) { 231 | s.Other += s2.Other 232 | s.Running += s2.Running 233 | s.Sleeping += s2.Sleeping 234 | s.Waiting += s2.Waiting 235 | s.Zombie += s2.Zombie 236 | } 237 | 238 | func (p IDInfo) GetThreads() ([]Thread, error) { 239 | return p.Threads, nil 240 | } 241 | 242 | // GetPid implements Proc. 243 | func (p IDInfo) GetPid() int { 244 | return p.ID.Pid 245 | } 246 | 247 | // GetProcID implements Proc. 248 | func (p IDInfo) GetProcID() (ID, error) { 249 | return p.ID, nil 250 | } 251 | 252 | // GetStatic implements Proc. 253 | func (p IDInfo) GetStatic() (Static, error) { 254 | return p.Static, nil 255 | } 256 | 257 | // GetCounts implements Proc. 258 | func (p IDInfo) GetCounts() (Counts, int, error) { 259 | return p.Metrics.Counts, 0, nil 260 | } 261 | 262 | // GetMetrics implements Proc. 263 | func (p IDInfo) GetMetrics() (Metrics, int, error) { 264 | return p.Metrics, 0, nil 265 | } 266 | 267 | // GetStates implements Proc. 268 | func (p IDInfo) GetStates() (States, error) { 269 | return p.States, nil 270 | } 271 | 272 | func (p IDInfo) GetWchan() (string, error) { 273 | return p.Wchan, nil 274 | } 275 | 276 | func (p *proccache) GetPid() int { 277 | return p.Proc.PID 278 | } 279 | 280 | func (p *proccache) getStat() (procfs.ProcStat, error) { 281 | if p.stat == nil { 282 | stat, err := p.Proc.NewStat() 283 | if err != nil { 284 | return procfs.ProcStat{}, err 285 | } 286 | p.stat = &stat 287 | } 288 | 289 | return *p.stat, nil 290 | } 291 | 292 | func (p *proccache) getStatus() (procfs.ProcStatus, error) { 293 | if p.status == nil { 294 | status, err := p.Proc.NewStatus() 295 | if err != nil { 296 | return procfs.ProcStatus{}, err 297 | } 298 | p.status = &status 299 | } 300 | 301 | return *p.status, nil 302 | } 303 | 304 | func (p *proccache) getCgroups() ([]procfs.Cgroup, error) { 305 | if p.cgroups == nil { 306 | cgroups, err := p.Proc.Cgroups() 307 | if err != nil { 308 | return nil, err 309 | } 310 | p.cgroups = cgroups 311 | } 312 | 313 | return p.cgroups, nil 314 | } 315 | 316 | // GetProcID implements Proc. 317 | func (p *proccache) GetProcID() (ID, error) { 318 | if p.procid == nil { 319 | stat, err := p.getStat() 320 | if err != nil { 321 | return ID{}, err 322 | } 323 | p.procid = &ID{Pid: p.GetPid(), StartTimeRel: stat.Starttime} 324 | } 325 | 326 | return *p.procid, nil 327 | } 328 | 329 | func (p *proccache) getCmdLine() ([]string, error) { 330 | if p.cmdline == nil { 331 | cmdline, err := p.Proc.CmdLine() 332 | if err != nil { 333 | return nil, err 334 | } 335 | p.cmdline = cmdline 336 | } 337 | return p.cmdline, nil 338 | } 339 | 340 | func (p *proccache) getWchan() (string, error) { 341 | if p.wchan == nil { 342 | wchan, err := p.Proc.Wchan() 343 | if err != nil { 344 | return "", err 345 | } 346 | p.wchan = &wchan 347 | } 348 | return *p.wchan, nil 349 | } 350 | 351 | func (p *proccache) getIo() (procfs.ProcIO, error) { 352 | if p.io == nil { 353 | io, err := p.Proc.IO() 354 | if err != nil { 355 | return procfs.ProcIO{}, err 356 | } 357 | p.io = &io 358 | } 359 | return *p.io, nil 360 | } 361 | 362 | // GetStatic returns the ProcStatic corresponding to this proc. 363 | func (p *proccache) GetStatic() (Static, error) { 364 | // /proc//cmdline is normally world-readable. 365 | cmdline, err := p.getCmdLine() 366 | if err != nil { 367 | return Static{}, err 368 | } 369 | 370 | // /proc//stat is normally world-readable. 371 | stat, err := p.getStat() 372 | if err != nil { 373 | return Static{}, err 374 | } 375 | startTime := time.Unix(int64(p.fs.BootTime), 0).UTC() 376 | startTime = startTime.Add(time.Second / userHZ * time.Duration(stat.Starttime)) 377 | 378 | // /proc//status is normally world-readable. 379 | status, err := p.getStatus() 380 | if err != nil { 381 | return Static{}, err 382 | } 383 | 384 | // /proc//cgroup(s) is normally world-readable. 385 | // However cgroups aren't always supported -> return an empty array in that 386 | // case. 387 | cgroups, err := p.getCgroups() 388 | var cgroupsStr []string 389 | if err != nil { 390 | cgroupsStr = []string{} 391 | } else { 392 | for _, c := range cgroups { 393 | cgroupsStr = append(cgroupsStr, c.Path) 394 | } 395 | } 396 | 397 | return Static{ 398 | Name: stat.Comm, 399 | Cmdline: cmdline, 400 | Cgroups: cgroupsStr, 401 | ParentPid: stat.PPID, 402 | StartTime: startTime, 403 | EffectiveUID: int(status.UIDs[1]), 404 | }, nil 405 | } 406 | 407 | func (p proc) GetCounts() (Counts, int, error) { 408 | stat, err := p.getStat() 409 | if err != nil { 410 | if err == os.ErrNotExist { 411 | err = ErrProcNotExist 412 | } 413 | return Counts{}, 0, fmt.Errorf("error reading stat file: %v", err) 414 | } 415 | 416 | status, err := p.getStatus() 417 | if err != nil { 418 | if err == os.ErrNotExist { 419 | err = ErrProcNotExist 420 | } 421 | return Counts{}, 0, fmt.Errorf("error reading status file: %v", err) 422 | } 423 | 424 | io, err := p.getIo() 425 | softerrors := 0 426 | if err != nil { 427 | softerrors++ 428 | } 429 | return Counts{ 430 | CPUUserTime: float64(stat.UTime) / userHZ, 431 | CPUSystemTime: float64(stat.STime) / userHZ, 432 | ReadBytes: io.ReadBytes, 433 | WriteBytes: io.WriteBytes, 434 | MajorPageFaults: uint64(stat.MajFlt), 435 | MinorPageFaults: uint64(stat.MinFlt), 436 | CtxSwitchVoluntary: uint64(status.VoluntaryCtxtSwitches), 437 | CtxSwitchNonvoluntary: uint64(status.NonVoluntaryCtxtSwitches), 438 | }, softerrors, nil 439 | } 440 | 441 | func (p proc) GetWchan() (string, error) { 442 | return p.getWchan() 443 | } 444 | 445 | func (p proc) GetStates() (States, error) { 446 | stat, err := p.getStat() 447 | if err != nil { 448 | return States{}, err 449 | } 450 | 451 | var s States 452 | switch stat.State { 453 | case "R": 454 | s.Running++ 455 | case "S": 456 | s.Sleeping++ 457 | case "D": 458 | s.Waiting++ 459 | case "Z": 460 | s.Zombie++ 461 | default: 462 | s.Other++ 463 | } 464 | return s, nil 465 | } 466 | 467 | // GetMetrics returns the current metrics for the proc. The results are 468 | // not cached. 469 | func (p proc) GetMetrics() (Metrics, int, error) { 470 | counts, softerrors, err := p.GetCounts() 471 | if err != nil { 472 | return Metrics{}, 0, err 473 | } 474 | 475 | // We don't need to check for error here because p will have cached 476 | // the successful result of calling getStat in GetCounts. 477 | // Since GetMetrics isn't a pointer receiver method, our callers 478 | // won't see the effect of the caching between calls. 479 | stat, _ := p.getStat() 480 | 481 | // Ditto for states 482 | states, _ := p.GetStates() 483 | 484 | // Ditto for status 485 | status, _ := p.getStatus() 486 | 487 | numfds, err := p.Proc.FileDescriptorsLen() 488 | if err != nil { 489 | numfds = -1 490 | softerrors |= 1 491 | } 492 | 493 | limits, err := p.Proc.NewLimits() 494 | if err != nil { 495 | return Metrics{}, 0, err 496 | } 497 | 498 | wchan, err := p.getWchan() 499 | if err != nil { 500 | softerrors |= 1 501 | } 502 | 503 | memory := Memory{ 504 | ResidentBytes: uint64(stat.ResidentMemory()), 505 | VirtualBytes: uint64(stat.VirtualMemory()), 506 | VmSwapBytes: uint64(status.VmSwap), 507 | } 508 | 509 | if p.proccache.fs.GatherSMaps { 510 | smaps, err := p.Proc.ProcSMapsRollup() 511 | if err != nil { 512 | softerrors |= 1 513 | } else { 514 | memory.ProportionalBytes = smaps.Pss 515 | memory.ProportionalSwapBytes = smaps.SwapPss 516 | } 517 | } 518 | 519 | return Metrics{ 520 | Counts: counts, 521 | Memory: memory, 522 | Filedesc: Filedesc{ 523 | Open: int64(numfds), 524 | Limit: uint64(limits.OpenFiles), 525 | }, 526 | NumThreads: uint64(stat.NumThreads), 527 | States: states, 528 | Wchan: wchan, 529 | }, softerrors, nil 530 | } 531 | 532 | func (p proc) GetThreads() ([]Thread, error) { 533 | fs, err := p.fs.threadFs(p.PID) 534 | if err != nil { 535 | return nil, err 536 | } 537 | 538 | threads := []Thread{} 539 | iter := fs.AllProcs() 540 | for iter.Next() { 541 | var id ID 542 | id, err = iter.GetProcID() 543 | if err != nil { 544 | continue 545 | } 546 | 547 | var static Static 548 | static, err = iter.GetStatic() 549 | if err != nil { 550 | continue 551 | } 552 | 553 | var counts Counts 554 | counts, _, err = iter.GetCounts() 555 | if err != nil { 556 | continue 557 | } 558 | 559 | wchan, _ := iter.GetWchan() 560 | states, _ := iter.GetStates() 561 | 562 | threads = append(threads, Thread{ 563 | ThreadID: ThreadID(id), 564 | ThreadName: static.Name, 565 | Counts: counts, 566 | Wchan: wchan, 567 | States: states, 568 | }) 569 | } 570 | err = iter.Close() 571 | if err != nil { 572 | return nil, err 573 | } 574 | if len(threads) < 2 { 575 | return nil, nil 576 | } 577 | 578 | return threads, nil 579 | } 580 | 581 | // See https://github.com/prometheus/procfs/blob/master/proc_stat.go for details on userHZ. 582 | const userHZ = 100 583 | 584 | // NewFS returns a new FS mounted under the given mountPoint. It will error 585 | // if the mount point can't be read. 586 | func NewFS(mountPoint string, debug bool) (*FS, error) { 587 | fs, err := procfs.NewFS(mountPoint) 588 | if err != nil { 589 | return nil, err 590 | } 591 | stat, err := fs.NewStat() 592 | if err != nil { 593 | return nil, err 594 | } 595 | return &FS{fs, stat.BootTime, mountPoint, false, debug}, nil 596 | } 597 | 598 | func (fs *FS) threadFs(pid int) (*FS, error) { 599 | mountPoint := filepath.Join(fs.MountPoint, strconv.Itoa(pid), "task") 600 | tfs, err := procfs.NewFS(mountPoint) 601 | if err != nil { 602 | return nil, err 603 | } 604 | return &FS{tfs, fs.BootTime, mountPoint, fs.GatherSMaps, false}, nil 605 | } 606 | 607 | // AllProcs implements Source. 608 | func (fs *FS) AllProcs() Iter { 609 | procs, err := fs.FS.AllProcs() 610 | if err != nil { 611 | err = fmt.Errorf("Error reading procs: %v", err) 612 | } 613 | return &procIterator{procs: procfsprocs{procs, fs}, err: err, idx: -1} 614 | } 615 | 616 | // get implements procs. 617 | func (p procfsprocs) get(i int) Proc { 618 | return &proc{proccache{Proc: p.Procs[i], fs: p.fs}} 619 | } 620 | 621 | // length implements procs. 622 | func (p procfsprocs) length() int { 623 | return len(p.Procs) 624 | } 625 | 626 | // Next implements Iter. 627 | func (pi *procIterator) Next() bool { 628 | pi.idx++ 629 | if pi.idx < pi.procs.length() { 630 | pi.Proc = pi.procs.get(pi.idx) 631 | } else { 632 | pi.Proc = nil 633 | } 634 | return pi.idx < pi.procs.length() 635 | } 636 | 637 | // Close implements Iter. 638 | func (pi *procIterator) Close() error { 639 | pi.Next() 640 | pi.procs = nil 641 | pi.Proc = nil 642 | return pi.err 643 | } 644 | -------------------------------------------------------------------------------- /proc/read_test.go: -------------------------------------------------------------------------------- 1 | package proc 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | "testing" 8 | "time" 9 | 10 | "github.com/google/go-cmp/cmp" 11 | ) 12 | 13 | type ( 14 | // procIDInfos implements procs using a slice of already 15 | // populated ProcIdInfo. Used for testing. 16 | procIDInfos []IDInfo 17 | ) 18 | 19 | func (p procIDInfos) get(i int) Proc { 20 | return &p[i] 21 | } 22 | 23 | func (p procIDInfos) length() int { 24 | return len(p) 25 | } 26 | 27 | func procInfoIter(ps ...IDInfo) *procIterator { 28 | return &procIterator{procs: procIDInfos(ps), idx: -1} 29 | } 30 | 31 | func allprocs(procpath string) Iter { 32 | fs, err := NewFS(procpath, false) 33 | if err != nil { 34 | cwd, _ := os.Getwd() 35 | panic("can't read " + procpath + ", cwd=" + cwd + ", err=" + fmt.Sprintf("%v", err)) 36 | } 37 | return fs.AllProcs() 38 | } 39 | 40 | func TestReadFixture(t *testing.T) { 41 | procs := allprocs("../fixtures") 42 | var pii IDInfo 43 | 44 | count := 0 45 | for procs.Next() { 46 | count++ 47 | var err error 48 | pii, err = procinfo(procs) 49 | noerr(t, err) 50 | } 51 | err := procs.Close() 52 | noerr(t, err) 53 | if count != 1 { 54 | t.Fatalf("got %d procs, want 1", count) 55 | } 56 | 57 | wantprocid := ID{Pid: 14804, StartTimeRel: 0x4f27b} 58 | if diff := cmp.Diff(pii.ID, wantprocid); diff != "" { 59 | t.Errorf("procid differs: (-got +want)\n%s", diff) 60 | } 61 | 62 | stime, _ := time.Parse(time.RFC3339Nano, "2017-10-19T22:52:51.19Z") 63 | wantstatic := Static{ 64 | Name: "process-exporte", 65 | Cmdline: []string{"./process-exporter", "-procnames", "bash"}, 66 | Cgroups: []string{"/system.slice/docker-8dde0b0d6e919baef8d635cd9399b22639ed1e400eaec1b1cb94ff3b216cf3c3.scope"}, 67 | ParentPid: 10884, 68 | StartTime: stime, 69 | EffectiveUID: 1000, 70 | } 71 | if diff := cmp.Diff(pii.Static, wantstatic); diff != "" { 72 | t.Errorf("static differs: (-got +want)\n%s", diff) 73 | } 74 | 75 | wantmetrics := Metrics{ 76 | Counts: Counts{ 77 | CPUUserTime: 0.1, 78 | CPUSystemTime: 0.04, 79 | ReadBytes: 1814455, 80 | WriteBytes: 0, 81 | MajorPageFaults: 0x2ff, 82 | MinorPageFaults: 0x643, 83 | CtxSwitchVoluntary: 72, 84 | CtxSwitchNonvoluntary: 6, 85 | }, 86 | Memory: Memory{ 87 | ResidentBytes: 0x7b1000, 88 | VirtualBytes: 0x1061000, 89 | VmSwapBytes: 0x2800, 90 | }, 91 | Filedesc: Filedesc{ 92 | Open: 5, 93 | Limit: 0x400, 94 | }, 95 | NumThreads: 7, 96 | States: States{Sleeping: 1}, 97 | } 98 | if diff := cmp.Diff(pii.Metrics, wantmetrics); diff != "" { 99 | t.Errorf("metrics differs: (-got +want)\n%s", diff) 100 | } 101 | } 102 | 103 | func noerr(t *testing.T, err error) { 104 | if err != nil { 105 | t.Fatalf("error: %v", err) 106 | } 107 | } 108 | 109 | // Basic test of proc reading: does AllProcs return at least two procs, one of which is us. 110 | func TestAllProcs(t *testing.T) { 111 | procs := allprocs("/proc") 112 | count := 0 113 | for procs.Next() { 114 | count++ 115 | if procs.GetPid() != os.Getpid() { 116 | continue 117 | } 118 | procid, err := procs.GetProcID() 119 | noerr(t, err) 120 | if procid.Pid != os.Getpid() { 121 | t.Errorf("got %d, want %d", procid.Pid, os.Getpid()) 122 | } 123 | static, err := procs.GetStatic() 124 | noerr(t, err) 125 | if static.ParentPid != os.Getppid() { 126 | t.Errorf("got %d, want %d", static.ParentPid, os.Getppid()) 127 | } 128 | metrics, _, err := procs.GetMetrics() 129 | noerr(t, err) 130 | if metrics.ResidentBytes == 0 { 131 | t.Errorf("got 0 bytes resident, want nonzero") 132 | } 133 | // All Go programs have multiple threads. 134 | if metrics.NumThreads < 2 { 135 | t.Errorf("got %d threads, want >1", metrics.NumThreads) 136 | } 137 | var zstates States 138 | if metrics.States == zstates { 139 | t.Errorf("got empty states") 140 | } 141 | threads, err := procs.GetThreads() 142 | if len(threads) < 2 { 143 | t.Errorf("got %d thread details, want >1", len(threads)) 144 | } 145 | } 146 | err := procs.Close() 147 | noerr(t, err) 148 | if count == 0 { 149 | t.Errorf("got %d, want 0", count) 150 | } 151 | } 152 | 153 | // Test that we can observe the absence of a child process before it spawns and after it exits, 154 | // and its presence during its lifetime. 155 | func TestAllProcsSpawn(t *testing.T) { 156 | childprocs := func() []IDInfo { 157 | found := []IDInfo{} 158 | procs := allprocs("/proc") 159 | mypid := os.Getpid() 160 | for procs.Next() { 161 | procid, err := procs.GetProcID() 162 | if err != nil { 163 | continue 164 | } 165 | static, err := procs.GetStatic() 166 | if err != nil { 167 | continue 168 | } 169 | if static.ParentPid == mypid { 170 | found = append(found, IDInfo{procid, static, Metrics{}, nil}) 171 | } 172 | } 173 | err := procs.Close() 174 | if err != nil { 175 | t.Fatalf("error closing procs iterator: %v", err) 176 | } 177 | return found 178 | } 179 | 180 | foundcat := func(procs []IDInfo) bool { 181 | for _, proc := range procs { 182 | if proc.Name == "cat" { 183 | return true 184 | } 185 | } 186 | return false 187 | } 188 | 189 | if foundcat(childprocs()) { 190 | t.Errorf("found cat before spawning it") 191 | } 192 | 193 | cmd := exec.Command("/bin/cat") 194 | wc, err := cmd.StdinPipe() 195 | noerr(t, err) 196 | err = cmd.Start() 197 | noerr(t, err) 198 | 199 | if !foundcat(childprocs()) { 200 | t.Errorf("didn't find cat after spawning it") 201 | } 202 | 203 | err = wc.Close() 204 | noerr(t, err) 205 | err = cmd.Wait() 206 | noerr(t, err) 207 | 208 | if foundcat(childprocs()) { 209 | t.Errorf("found cat after exit") 210 | } 211 | } 212 | 213 | func TestIterator(t *testing.T) { 214 | p1 := newProc(1, "p1", Metrics{}) 215 | p2 := newProc(2, "p2", Metrics{}) 216 | want := []IDInfo{p1, p2} 217 | pis := procInfoIter(want...) 218 | got, err := consumeIter(pis) 219 | noerr(t, err) 220 | if diff := cmp.Diff(got, want); diff != "" { 221 | t.Errorf("procs differs: (-got +want)\n%s", diff) 222 | } 223 | } 224 | -------------------------------------------------------------------------------- /proc/tracker.go: -------------------------------------------------------------------------------- 1 | package proc 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os/user" 7 | "strconv" 8 | "time" 9 | 10 | seq "github.com/ncabatoff/go-seq/seq" 11 | common "github.com/ncabatoff/process-exporter" 12 | ) 13 | 14 | type ( 15 | // Tracker tracks processes and records metrics. 16 | Tracker struct { 17 | // namer determines what processes to track and names them 18 | namer common.MatchNamer 19 | // tracked holds the processes are being monitored. Processes 20 | // may be blacklisted such that they no longer get tracked by 21 | // setting their value in the tracked map to nil. 22 | tracked map[ID]*trackedProc 23 | // procIds is a map from pid to ProcId. This is a convenience 24 | // to allow finding the Tracked entry of a parent process. 25 | procIds map[int]ID 26 | // firstUpdateAt is the time the first update was run. It allows to 27 | // count first usage of a process started between two Update() calls 28 | firstUpdateAt time.Time 29 | // trackChildren makes Tracker track descendants of procs the 30 | // namer wanted tracked. 31 | trackChildren bool 32 | // never ignore processes, i.e. always re-check untracked processes in case comm has changed 33 | recheck bool 34 | // limit rechecks to this much time 35 | recheckTimeLimit time.Duration 36 | username map[int]string 37 | debug bool 38 | } 39 | 40 | // Delta is an alias of Counts used to signal that its contents are not 41 | // totals, but rather the result of subtracting two totals. 42 | Delta Counts 43 | 44 | trackedThread struct { 45 | name string 46 | accum Counts 47 | latest Delta 48 | lastUpdate time.Time 49 | wchan string 50 | } 51 | 52 | // trackedProc accumulates metrics for a process, as well as 53 | // remembering an optional GroupName tag associated with it. 54 | trackedProc struct { 55 | // lastUpdate is used internally during the update cycle to find which procs have exited 56 | lastUpdate time.Time 57 | // static 58 | static Static 59 | metrics Metrics 60 | // lastaccum is the increment to the counters seen in the last update. 61 | lastaccum Delta 62 | // groupName is the tag for this proc given by the namer. 63 | groupName string 64 | threads map[ThreadID]trackedThread 65 | } 66 | 67 | // ThreadUpdate describes what's changed for a thread since the last cycle. 68 | ThreadUpdate struct { 69 | // ThreadName is the name of the thread based on field of stat. 70 | ThreadName string 71 | // Latest is how much the counts increased since last cycle. 72 | Latest Delta 73 | } 74 | 75 | // Update reports on the latest stats for a process. 76 | Update struct { 77 | // GroupName is the name given by the namer to the process. 78 | GroupName string 79 | // Latest is how much the counts increased since last cycle. 80 | Latest Delta 81 | // Memory is the current memory usage. 82 | Memory 83 | // Filedesc is the current fd usage/limit. 84 | Filedesc 85 | // Start is the time the process started. 86 | Start time.Time 87 | // NumThreads is the number of threads. 88 | NumThreads uint64 89 | // States is how many processes are in which run state. 90 | States 91 | // Wchans is how many threads are in each non-zero wchan. 92 | Wchans map[string]int 93 | // Threads are the thread updates for this process, if the Tracker 94 | // has trackThreads==true. 95 | Threads []ThreadUpdate 96 | } 97 | 98 | // CollectErrors describes non-fatal errors found while collecting proc 99 | // metrics. 100 | CollectErrors struct { 101 | // Read is incremented every time GetMetrics() returns an error. 102 | // This means we failed to load even the basics for the process, 103 | // and not just because it disappeared on us. 104 | Read int 105 | // Partial is incremented every time we're unable to collect 106 | // some metrics (e.g. I/O) for a tracked proc, but we're still able 107 | // to get the basic stuff like cmdline and core stats. 108 | Partial int 109 | } 110 | ) 111 | 112 | func lessUpdateGroupName(x, y Update) bool { return x.GroupName < y.GroupName } 113 | 114 | func lessThreadUpdate(x, y ThreadUpdate) bool { return seq.Compare(x, y) < 0 } 115 | 116 | func lessCounts(x, y Counts) bool { return seq.Compare(x, y) < 0 } 117 | 118 | func (tp *trackedProc) getUpdate() Update { 119 | u := Update{ 120 | GroupName: tp.groupName, 121 | Latest: tp.lastaccum, 122 | Memory: tp.metrics.Memory, 123 | Filedesc: tp.metrics.Filedesc, 124 | Start: tp.static.StartTime, 125 | NumThreads: tp.metrics.NumThreads, 126 | States: tp.metrics.States, 127 | Wchans: make(map[string]int), 128 | } 129 | if tp.metrics.Wchan != "" { 130 | u.Wchans[tp.metrics.Wchan] = 1 131 | } 132 | if len(tp.threads) > 1 { 133 | for _, tt := range tp.threads { 134 | u.Threads = append(u.Threads, ThreadUpdate{tt.name, tt.latest}) 135 | if tt.wchan != "" { 136 | u.Wchans[tt.wchan]++ 137 | } 138 | } 139 | } 140 | return u 141 | } 142 | 143 | // NewTracker creates a Tracker. 144 | func NewTracker(namer common.MatchNamer, trackChildren bool, recheck bool, recheckTimeLimit time.Duration, debug bool) *Tracker { 145 | return &Tracker{ 146 | namer: namer, 147 | tracked: make(map[ID]*trackedProc), 148 | procIds: make(map[int]ID), 149 | trackChildren: trackChildren, 150 | recheck: recheck, 151 | recheckTimeLimit: recheckTimeLimit, 152 | username: make(map[int]string), 153 | debug: debug, 154 | } 155 | } 156 | 157 | func (t *Tracker) track(groupName string, idinfo IDInfo) { 158 | tproc := trackedProc{ 159 | groupName: groupName, 160 | static: idinfo.Static, 161 | metrics: idinfo.Metrics, 162 | } 163 | if len(idinfo.Threads) > 0 { 164 | tproc.threads = make(map[ThreadID]trackedThread) 165 | for _, thr := range idinfo.Threads { 166 | tproc.threads[thr.ThreadID] = trackedThread{ 167 | thr.ThreadName, thr.Counts, Delta{}, time.Time{}, thr.Wchan} 168 | } 169 | } 170 | 171 | // If the process started while Tracker was running, all current counter happened 172 | // between the last Update() and the current Update() and should be counted. 173 | if idinfo.StartTime.After(t.firstUpdateAt) { 174 | tproc.lastaccum = Delta(tproc.metrics.Counts) 175 | } 176 | 177 | t.tracked[idinfo.ID] = &tproc 178 | } 179 | 180 | func (t *Tracker) ignore(id ID, startTime time.Time) { 181 | // only ignore ID if we didn't set recheck to true 182 | if t.recheck { 183 | if t.recheckTimeLimit == 0 { 184 | // plain -recheck with no time limit: 185 | return 186 | } 187 | if startTime.Add(t.recheckTimeLimit).After(time.Now()) { 188 | // -recheckWithTimeLimit is used and the limit is not reached yet: 189 | return 190 | } 191 | } 192 | t.tracked[id] = nil 193 | } 194 | 195 | func (tp *trackedProc) update(metrics Metrics, now time.Time, cerrs *CollectErrors, threads []Thread) { 196 | // newcounts: resource consumption since last cycle 197 | newcounts := metrics.Counts 198 | tp.lastaccum = newcounts.Sub(tp.metrics.Counts) 199 | tp.metrics = metrics 200 | tp.lastUpdate = now 201 | if len(threads) > 1 { 202 | if tp.threads == nil { 203 | tp.threads = make(map[ThreadID]trackedThread) 204 | } 205 | for _, thr := range threads { 206 | tt := trackedThread{thr.ThreadName, thr.Counts, Delta{}, now, thr.Wchan} 207 | if old, ok := tp.threads[thr.ThreadID]; ok { 208 | tt.latest, tt.accum = thr.Counts.Sub(old.accum), thr.Counts 209 | } 210 | tp.threads[thr.ThreadID] = tt 211 | } 212 | for id, tt := range tp.threads { 213 | if tt.lastUpdate != now { 214 | delete(tp.threads, id) 215 | } 216 | } 217 | } else { 218 | tp.threads = nil 219 | } 220 | } 221 | 222 | // handleProc updates the tracker if it's a known and not ignored proc. 223 | // If it's neither known nor ignored, newProc will be non-nil. 224 | // It is not an error if the process disappears while we are reading 225 | // its info out of /proc, it just means nothing will be returned and 226 | // the tracker will be unchanged. 227 | func (t *Tracker) handleProc(proc Proc, updateTime time.Time) (*IDInfo, CollectErrors) { 228 | var cerrs CollectErrors 229 | procID, err := proc.GetProcID() 230 | if err != nil { 231 | if t.debug { 232 | log.Printf("error getting proc ID for pid %+v: %v", proc.GetPid(), err) 233 | } 234 | return nil, cerrs 235 | } 236 | 237 | // Do nothing if we're ignoring this proc. 238 | last, known := t.tracked[procID] 239 | if known && last == nil { 240 | return nil, cerrs 241 | } 242 | 243 | metrics, softerrors, err := proc.GetMetrics() 244 | if err != nil { 245 | if t.debug { 246 | log.Printf("error reading metrics for %+v: %v", procID, err) 247 | } 248 | // This usually happens due to the proc having exited, i.e. 249 | // we lost the race. We don't count that as an error. 250 | if err != ErrProcNotExist { 251 | cerrs.Read++ 252 | } 253 | return nil, cerrs 254 | } 255 | 256 | var threads []Thread 257 | threads, err = proc.GetThreads() 258 | if err != nil { 259 | if t.debug { 260 | log.Printf("can't read thread metrics for %+v: %v", procID, err) 261 | } 262 | softerrors |= 1 263 | } 264 | cerrs.Partial += softerrors 265 | 266 | if len(threads) > 0 { 267 | metrics.Counts.CtxSwitchNonvoluntary, metrics.Counts.CtxSwitchVoluntary = 0, 0 268 | for _, thread := range threads { 269 | metrics.Counts.CtxSwitchNonvoluntary += thread.Counts.CtxSwitchNonvoluntary 270 | metrics.Counts.CtxSwitchVoluntary += thread.Counts.CtxSwitchVoluntary 271 | metrics.States.Add(thread.States) 272 | } 273 | } 274 | 275 | var newProc *IDInfo 276 | if known { 277 | last.update(metrics, updateTime, &cerrs, threads) 278 | } else { 279 | static, err := proc.GetStatic() 280 | if err != nil { 281 | if t.debug { 282 | log.Printf("error reading static details for %+v: %v", procID, err) 283 | } 284 | return nil, cerrs 285 | } 286 | newProc = &IDInfo{procID, static, metrics, threads} 287 | if t.debug { 288 | log.Printf("found new proc: %s", newProc) 289 | } 290 | 291 | // Is this a new process with the same pid as one we already know? 292 | // Then delete it from the known map, otherwise the cleanup in Update() 293 | // will remove the ProcIds entry we're creating here. 294 | if oldProcID, ok := t.procIds[procID.Pid]; ok { 295 | delete(t.tracked, oldProcID) 296 | } 297 | t.procIds[procID.Pid] = procID 298 | } 299 | return newProc, cerrs 300 | } 301 | 302 | // update scans procs and updates metrics for those which are tracked. Processes 303 | // that have gone away get removed from the Tracked map. New processes are 304 | // returned, along with the count of nonfatal errors. 305 | func (t *Tracker) update(procs Iter) ([]IDInfo, CollectErrors, error) { 306 | var newProcs []IDInfo 307 | var colErrs CollectErrors 308 | var now = time.Now() 309 | 310 | for procs.Next() { 311 | newProc, cerrs := t.handleProc(procs, now) 312 | if newProc != nil { 313 | newProcs = append(newProcs, *newProc) 314 | } 315 | colErrs.Read += cerrs.Read 316 | colErrs.Partial += cerrs.Partial 317 | } 318 | 319 | err := procs.Close() 320 | if err != nil { 321 | return nil, colErrs, fmt.Errorf("Error reading procs: %v", err) 322 | } 323 | 324 | // Rather than allocating a new map each time to detect procs that have 325 | // disappeared, we bump the last update time on those that are still 326 | // present. Then as a second pass we traverse the map looking for 327 | // stale procs and removing them. 328 | for procID, pinfo := range t.tracked { 329 | if pinfo == nil { 330 | // TODO is this a bug? we're not tracking the proc so we don't see it go away so ProcIds 331 | // and Tracked are leaking? 332 | continue 333 | } 334 | if pinfo.lastUpdate != now { 335 | delete(t.tracked, procID) 336 | delete(t.procIds, procID.Pid) 337 | } 338 | } 339 | 340 | return newProcs, colErrs, nil 341 | } 342 | 343 | // checkAncestry walks the process tree recursively towards the root, 344 | // stopping at pid 1 or upon finding a parent that's already tracked 345 | // or ignored. If we find a tracked parent track this one too; if not, 346 | // ignore this one. 347 | func (t *Tracker) checkAncestry(idinfo IDInfo, newprocs map[ID]IDInfo) string { 348 | ppid := idinfo.ParentPid 349 | pProcID := t.procIds[ppid] 350 | if pProcID.Pid < 1 { 351 | if t.debug { 352 | log.Printf("ignoring unmatched proc with no matched parent: %+v", idinfo) 353 | } 354 | // Reached root of process tree without finding a tracked parent. 355 | t.ignore(idinfo.ID, idinfo.Static.StartTime) 356 | return "" 357 | } 358 | 359 | // Is the parent already known to the tracker? 360 | if ptproc, ok := t.tracked[pProcID]; ok { 361 | if ptproc != nil { 362 | if t.debug { 363 | log.Printf("matched as %q because child of %+v: %+v", 364 | ptproc.groupName, pProcID, idinfo) 365 | } 366 | // We've found a tracked parent. 367 | t.track(ptproc.groupName, idinfo) 368 | return ptproc.groupName 369 | } 370 | // We've found an untracked parent. 371 | t.ignore(idinfo.ID, idinfo.Static.StartTime) 372 | return "" 373 | } 374 | 375 | // Is the parent another new process? 376 | if pinfoid, ok := newprocs[pProcID]; ok { 377 | if name := t.checkAncestry(pinfoid, newprocs); name != "" { 378 | if t.debug { 379 | log.Printf("matched as %q because child of %+v: %+v", 380 | name, pProcID, idinfo) 381 | } 382 | // We've found a tracked parent, which implies this entire lineage should be tracked. 383 | t.track(name, idinfo) 384 | return name 385 | } 386 | } 387 | 388 | // Parent is dead, i.e. we never saw it, or there's no tracked proc in our ancestry. 389 | if t.debug { 390 | log.Printf("ignoring unmatched proc with no matched parent: %+v", idinfo) 391 | } 392 | t.ignore(idinfo.ID, idinfo.Static.StartTime) 393 | return "" 394 | } 395 | 396 | func (t *Tracker) lookupUid(uid int) string { 397 | if name, ok := t.username[uid]; ok { 398 | return name 399 | } 400 | 401 | var name string 402 | uidstr := strconv.Itoa(uid) 403 | u, err := user.LookupId(uidstr) 404 | if err != nil { 405 | name = uidstr 406 | } else { 407 | name = u.Username 408 | } 409 | t.username[uid] = name 410 | return name 411 | } 412 | 413 | // Update modifies the tracker's internal state based on what it reads from 414 | // iter. Tracks any new procs the namer wants tracked, and updates 415 | // its metrics for existing tracked procs. Returns nonfatal errors 416 | // and the status of all tracked procs, or an error if fatal. 417 | func (t *Tracker) Update(iter Iter) (CollectErrors, []Update, error) { 418 | if t.firstUpdateAt.IsZero() { 419 | t.firstUpdateAt = time.Now() 420 | } 421 | 422 | newProcs, colErrs, err := t.update(iter) 423 | if err != nil { 424 | return colErrs, nil, err 425 | } 426 | 427 | // Step 1: track any new proc that should be tracked based on its name and cmdline. 428 | untracked := make(map[ID]IDInfo) 429 | for _, idinfo := range newProcs { 430 | nacl := common.ProcAttributes{ 431 | Name: idinfo.Name, 432 | Cmdline: idinfo.Cmdline, 433 | Cgroups: idinfo.Cgroups, 434 | Username: t.lookupUid(idinfo.EffectiveUID), 435 | PID: idinfo.Pid, 436 | StartTime: idinfo.StartTime, 437 | } 438 | wanted, gname := t.namer.MatchAndName(nacl) 439 | if wanted { 440 | if t.debug { 441 | log.Printf("matched as %q: %+v", gname, idinfo) 442 | } 443 | t.track(gname, idinfo) 444 | } else { 445 | untracked[idinfo.ID] = idinfo 446 | } 447 | } 448 | 449 | // Step 2: track any untracked new proc that should be tracked because its parent is tracked. 450 | if t.trackChildren { 451 | for _, idinfo := range untracked { 452 | if _, ok := t.tracked[idinfo.ID]; ok { 453 | // Already tracked or ignored in an earlier iteration 454 | continue 455 | } 456 | 457 | t.checkAncestry(idinfo, untracked) 458 | } 459 | } 460 | 461 | tp := []Update{} 462 | for _, tproc := range t.tracked { 463 | if tproc != nil { 464 | tp = append(tp, tproc.getUpdate()) 465 | } 466 | } 467 | return colErrs, tp, nil 468 | } 469 | -------------------------------------------------------------------------------- /proc/tracker_test.go: -------------------------------------------------------------------------------- 1 | package proc 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/google/go-cmp/cmp" 8 | "github.com/google/go-cmp/cmp/cmpopts" 9 | ) 10 | 11 | // Verify that the tracker finds and tracks or ignores procs based on the 12 | // namer, and that it can distinguish between two procs with the same pid 13 | // but different start time. 14 | func TestTrackerBasic(t *testing.T) { 15 | p1, p2, p3 := 1, 2, 3 16 | n1, n2, n3, n4 := "g1", "g2", "g3", "g4" 17 | t1, t2, t3 := time.Unix(1, 0).UTC(), time.Unix(2, 0).UTC(), time.Unix(3, 0).UTC() 18 | 19 | tests := []struct { 20 | procs []IDInfo 21 | want []Update 22 | }{ 23 | { 24 | []IDInfo{newProcStart(p1, n1, 1), newProcStart(p3, n3, 1)}, 25 | []Update{{GroupName: n1, Start: t1, Wchans: msi{}}}, 26 | }, 27 | { 28 | // p3 (ignored) has exited and p2 has appeared 29 | []IDInfo{newProcStart(p1, n1, 1), newProcStart(p2, n2, 2)}, 30 | []Update{{GroupName: n1, Start: t1, Wchans: msi{}}, {GroupName: n2, Start: t2, Wchans: msi{}}}, 31 | }, 32 | { 33 | // p1 has exited and a new proc with a new name has taken its pid 34 | []IDInfo{newProcStart(p1, n4, 3), newProcStart(p2, n2, 2)}, 35 | []Update{{GroupName: n4, Start: t3, Wchans: msi{}}, {GroupName: n2, Start: t2, Wchans: msi{}}}, 36 | }, 37 | } 38 | // Note that n3 should not be tracked according to our namer. 39 | tr := NewTracker(newNamer(n1, n2, n4), false, false, 0, false) 40 | 41 | opts := cmpopts.SortSlices(lessUpdateGroupName) 42 | for i, tc := range tests { 43 | _, got, err := tr.Update(procInfoIter(tc.procs...)) 44 | noerr(t, err) 45 | if diff := cmp.Diff(got, tc.want, opts); diff != "" { 46 | t.Errorf("%d: update differs: (-got +want)\n%s", i, diff) 47 | } 48 | } 49 | } 50 | 51 | // TestTrackerChildren verifies that when the tracker is asked to track 52 | // children, processes not selected by the namer are still tracked if 53 | // they're children of ones that are. 54 | func TestTrackerChildren(t *testing.T) { 55 | p1, p2, p3 := 1, 2, 3 56 | n1, n2, n3 := "g1", "g2", "g3" 57 | // In this test everything starts at time t1 for simplicity 58 | t1 := time.Unix(0, 0).UTC() 59 | 60 | tests := []struct { 61 | procs []IDInfo 62 | want []Update 63 | }{ 64 | { 65 | []IDInfo{ 66 | newProcParent(p1, n1, 0), 67 | newProcParent(p2, n2, p1), 68 | }, 69 | []Update{{GroupName: n2, Start: t1, Wchans: msi{}}}, 70 | }, 71 | { 72 | []IDInfo{ 73 | newProcParent(p1, n1, 0), 74 | newProcParent(p2, n2, p1), 75 | newProcParent(p3, n3, p2), 76 | }, 77 | []Update{{GroupName: n2, Start: t1, Wchans: msi{}}, {GroupName: n2, Start: t1, Wchans: msi{}}}, 78 | }, 79 | } 80 | // Only n2 and children of n2s should be tracked 81 | tr := NewTracker(newNamer(n2), true, false, 0, false) 82 | 83 | for i, tc := range tests { 84 | _, got, err := tr.Update(procInfoIter(tc.procs...)) 85 | noerr(t, err) 86 | if diff := cmp.Diff(got, tc.want); diff != "" { 87 | t.Errorf("%d: update differs: (-got +want)\n%s", i, diff) 88 | } 89 | } 90 | } 91 | 92 | // TestTrackerMetrics verifies that the updates returned by the tracker 93 | // match the input we're giving it. 94 | func TestTrackerMetrics(t *testing.T) { 95 | p, n, tm := 1, "g1", time.Unix(0, 0).UTC() 96 | 97 | tests := []struct { 98 | proc IDInfo 99 | want Update 100 | }{ 101 | { 102 | piinfost(p, n, Counts{1, 2, 3, 4, 5, 6, 0, 0}, Memory{7, 8, 0, 0, 0}, 103 | Filedesc{1, 10}, 9, States{Sleeping: 1}), 104 | Update{n, Delta{}, Memory{7, 8, 0, 0, 0}, Filedesc{1, 10}, tm, 105 | 9, States{Sleeping: 1}, msi{}, nil}, 106 | }, 107 | { 108 | piinfost(p, n, Counts{2, 3, 4, 5, 6, 7, 0, 0}, Memory{1, 2, 0, 0, 0}, 109 | Filedesc{2, 20}, 1, States{Running: 1}), 110 | Update{n, Delta{1, 1, 1, 1, 1, 1, 0, 0}, Memory{1, 2, 0, 0, 0}, 111 | Filedesc{2, 20}, tm, 1, States{Running: 1}, msi{}, nil}, 112 | }, 113 | } 114 | tr := NewTracker(newNamer(n), false, false, 0, false) 115 | 116 | for i, tc := range tests { 117 | _, got, err := tr.Update(procInfoIter(tc.proc)) 118 | noerr(t, err) 119 | if diff := cmp.Diff(got, []Update{tc.want}); diff != "" { 120 | t.Errorf("%d: update differs: (-got +want)\n%s", i, diff) 121 | } 122 | } 123 | } 124 | 125 | func TestTrackerThreads(t *testing.T) { 126 | p, n, tm := 1, "g1", time.Unix(0, 0).UTC() 127 | 128 | tests := []struct { 129 | proc IDInfo 130 | want Update 131 | }{ 132 | { 133 | piinfo(p, n, Counts{}, Memory{}, Filedesc{1, 1}, 1), 134 | Update{n, Delta{}, Memory{}, Filedesc{1, 1}, tm, 1, States{}, msi{}, nil}, 135 | }, { 136 | piinfot(p, n, Counts{}, Memory{}, Filedesc{1, 1}, []Thread{ 137 | {ThreadID(ID{p, 0}), "t1", Counts{1, 2, 3, 4, 5, 6, 0, 0}, "", States{}}, 138 | {ThreadID(ID{p + 1, 0}), "t2", Counts{1, 1, 1, 1, 1, 1, 0, 0}, "", States{}}, 139 | }), 140 | Update{n, Delta{}, Memory{}, Filedesc{1, 1}, tm, 2, States{}, msi{}, 141 | []ThreadUpdate{ 142 | {"t1", Delta{}}, 143 | {"t2", Delta{}}, 144 | }, 145 | }, 146 | }, { 147 | piinfot(p, n, Counts{}, Memory{}, Filedesc{1, 1}, []Thread{ 148 | {ThreadID(ID{p, 0}), "t1", Counts{2, 3, 4, 5, 6, 7, 0, 0}, "", States{}}, 149 | {ThreadID(ID{p + 1, 0}), "t2", Counts{2, 2, 2, 2, 2, 2, 0, 0}, "", States{}}, 150 | {ThreadID(ID{p + 2, 0}), "t2", Counts{1, 1, 1, 1, 1, 1, 0, 0}, "", States{}}, 151 | }), 152 | Update{n, Delta{}, Memory{}, Filedesc{1, 1}, tm, 3, States{}, msi{}, 153 | []ThreadUpdate{ 154 | {"t1", Delta{1, 1, 1, 1, 1, 1, 0, 0}}, 155 | {"t2", Delta{1, 1, 1, 1, 1, 1, 0, 0}}, 156 | {"t2", Delta{}}, 157 | }, 158 | }, 159 | }, { 160 | piinfot(p, n, Counts{}, Memory{}, Filedesc{1, 1}, []Thread{ 161 | {ThreadID(ID{p, 0}), "t1", Counts{2, 3, 4, 5, 6, 7, 0, 0}, "", States{}}, 162 | {ThreadID(ID{p + 2, 0}), "t2", Counts{1, 2, 3, 4, 5, 6, 0, 0}, "", States{}}, 163 | }), 164 | Update{n, Delta{}, Memory{}, Filedesc{1, 1}, tm, 2, States{}, msi{}, 165 | []ThreadUpdate{ 166 | {"t1", Delta{}}, 167 | {"t2", Delta{0, 1, 2, 3, 4, 5, 0, 0}}, 168 | }, 169 | }, 170 | }, 171 | } 172 | tr := NewTracker(newNamer(n), false, false, 0, false) 173 | 174 | opts := cmpopts.SortSlices(lessThreadUpdate) 175 | for i, tc := range tests { 176 | _, got, err := tr.Update(procInfoIter(tc.proc)) 177 | noerr(t, err) 178 | if diff := cmp.Diff(got, []Update{tc.want}, opts); diff != "" { 179 | t.Errorf("%d: update differs: (-got +want)\n%s, %v, %v", i, diff, got[0].Threads, tc.want.Threads) 180 | } 181 | } 182 | } 183 | --------------------------------------------------------------------------------