├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── collector ├── asserts.go ├── background_flushing.go ├── collection_status.go ├── collection_wiredtiger.go ├── conn_pool_stats.go ├── conn_pool_stats_hosts.go ├── connections.go ├── cursors.go ├── database_status.go ├── durability.go ├── extra_info.go ├── fixtures │ ├── server_status.bson │ ├── server_status.json │ ├── top_status.bson │ └── top_status.json ├── global_lock.go ├── index_counters.go ├── locks.go ├── main_test.go ├── memory.go ├── metrics.go ├── mongodb_collector.go ├── mongodb_collector_test.go ├── network.go ├── op_counters.go ├── oplog_status.go ├── oplog_tail.go ├── profile_status.go ├── replset_conf.go ├── replset_status.go ├── server_status.go ├── server_status_test.go ├── storage_engine.go ├── tcmalloc.go ├── top_counters.go ├── top_status.go ├── top_status_test.go └── wiredtiger.go ├── glide.lock ├── glide.yaml ├── grafana_dashboards └── dashboard.json ├── groups.yml ├── mongodb_exporter.go ├── screenshots └── mongodb-dashboard-1.png ├── shared ├── connection.go ├── group_desc.go ├── group_desc_test.go ├── utils.go └── utils_test.go └── snap ├── daemon_arguments ├── snap_config_wrapper └── snapcraft.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | *.out 6 | 7 | # Folders 8 | _obj 9 | _test 10 | 11 | # Architecture specific extensions/prefixes 12 | *.[568vq] 13 | [568vq].out 14 | 15 | *.cgo1.go 16 | *.cgo2.c 17 | _cgo_defun.c 18 | _cgo_gotypes.go 19 | _cgo_export.* 20 | 21 | _testmain.go 22 | 23 | *.exe 24 | *.test 25 | *.prof 26 | *.swp 27 | .DS_Store 28 | 29 | mongodb_exporter 30 | vendor/ 31 | release/ 32 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.9.2 4 | 5 | branches: 6 | only: 7 | - master 8 | - /^v\d+(\.\d+)+$/ 9 | 10 | env: 11 | - DIR_DARWIN=mongodb_exporter-${TRAVIS_BRANCH}.darwin-amd64 RELEASE_DIR_DARWIN=release/${DIR_DARWIN} DIR_LINUX=mongodb_exporter-${TRAVIS_BRANCH}.linux-amd64 RELEASE_DIR_LINUX=release/${DIR_LINUX} 12 | 13 | before_install: 14 | - sudo apt-get install curl git make perl 15 | - curl -s https://glide.sh/get | sh 16 | 17 | script: 18 | - make release 19 | 20 | - | 21 | tarRelease() { 22 | tarDirName=${1} 23 | releaseBinary=${2} 24 | arch=${3} 25 | 26 | tarDir=release/${tarDirName} 27 | 28 | mkdir -p ${tarDir} 29 | cp release/${releaseBinary} ${tarDir}/mongodb_exporter 30 | cp LICENSE README.md ${tarDir} 31 | ( cd release; tar -cvzf mongodb_exporter-${TRAVIS_BRANCH}.${arch}.tar.gz $1/* ) 32 | } 33 | 34 | tarRelease ${DIR_DARWIN} mongodb_exporter-darwin-amd64 darwin-amd64 35 | tarRelease ${DIR_LINUX} mongodb_exporter-linux-amd64 linux-amd64 36 | 37 | 38 | deploy: 39 | provider: releases 40 | api_key: 41 | secure: 42 | file: 43 | - release/mongodb_exporter-${TRAVIS_BRANCH}.darwin-amd64.tar.gz 44 | - release/mongodb_exporter-${TRAVIS_BRANCH}.linux-amd64.tar.gz 45 | skip_cleanup: true 46 | on: 47 | tags: true 48 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:alpine as builder 2 | 3 | RUN apk --no-cache add curl git make perl 4 | RUN curl -s https://glide.sh/get | sh 5 | COPY . /go/src/github.com/dcu/mongodb_exporter 6 | RUN cd /go/src/github.com/dcu/mongodb_exporter && make release 7 | 8 | FROM alpine:3.4 9 | MAINTAINER David Cuadrado 10 | EXPOSE 9001 11 | 12 | RUN apk add --update ca-certificates 13 | COPY --from=builder /go/src/github.com/dcu/mongodb_exporter/release/mongodb_exporter-linux-amd64 /usr/local/bin/mongodb_exporter 14 | 15 | ENTRYPOINT [ "mongodb_exporter" ] 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 David Cuadrado 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | package = github.com/dcu/mongodb_exporter 2 | TAG := $(shell git tag | sort -r | head -n 1) 3 | 4 | test: 5 | go test github.com/dcu/mongodb_exporter/collector -cover -coverprofile=collector_coverage.out -short 6 | go tool cover -func=collector_coverage.out 7 | go test github.com/dcu/mongodb_exporter/shared -cover -coverprofile=shared_coverage.out -short 8 | go tool cover -func=shared_coverage.out 9 | @rm *.out 10 | 11 | deps: 12 | glide install 13 | 14 | build: deps 15 | go build mongodb_exporter.go 16 | 17 | release: deps 18 | mkdir -p release 19 | perl -p -i -e 's/\{\{VERSION}}/$(TAG)/g' mongodb_exporter.go 20 | GOOS=darwin GOARCH=amd64 go build -o release/mongodb_exporter-darwin-amd64 $(package) 21 | GOOS=linux GOARCH=amd64 go build -o release/mongodb_exporter-linux-amd64 $(package) 22 | perl -p -i -e 's/$(TAG)/\{\{VERSION}}/g' mongodb_exporter.go 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mongodb Exporter 2 | 3 | MongoDB exporter for prometheus.io, written in go. 4 | 5 | ![screenshot](https://raw.githubusercontent.com/dcu/mongodb_exporter/321189c90831d5ad5a8c6fb04925a335b37f51b8/screenshots/mongodb-dashboard-1.png) 6 | 7 | [Grafana Dashboard](https://grafana.com/dashboards/2583) 8 | 9 | ## Installing 10 | 11 | Download a [release](https://github.com/dcu/mongodb_exporter/releases) 12 | 13 | ## Building 14 | 15 | Requires [glide](https://github.com/Masterminds/glide) for dependency management 16 | 17 | git clone git@github.com:dcu/mongodb_exporter.git $GOPATH/src/github.com/dcu/mongodb_exporter 18 | cd $GOPATH/src/github.com/dcu/mongodb_exporter 19 | make build 20 | ./mongodb_exporter -h 21 | 22 | ## Building Docker image 23 | 24 | Build the Docker image 25 | 26 | docker build -t mongodb_exporter . 27 | 28 | Verify Docker image runs 29 | 30 | docker run --rm mongodb_exporter -h 31 | 32 | The mongodb url can contain credentials which can be seen by other users on the system when passed in as command line flag. 33 | To pass in the mongodb url securely, you can set the MONGODB_URL environment variable instead. 34 | 35 | ## Available groups of data 36 | 37 | Name | Description 38 | ---------|------------ 39 | asserts | The asserts group reports the number of asserts on the database. While assert errors are typically uncommon, if there are non-zero values for the asserts, you should check the log file for the mongod process for more information. In many cases these errors are trivial, but are worth investigating. 40 | durability | The durability group contains data regarding the mongod's journaling-related operations and performance. mongod must be running with journaling for these data to appear in the output of "serverStatus". 41 | background_flushing | mongod periodically flushes writes to disk. In the default configuration, this happens every 60 seconds. The background_flushing group contains data regarding these operations. Consider these values if you have concerns about write performance and journaling. 42 | connections | The connections groups contains data regarding the current status of incoming connections and availability of the database server. Use these values to assess the current load and capacity requirements of the server. 43 | extra_info | The extra_info group holds data collected by the mongod instance about the underlying system. Your system may only report a subset of these fields. 44 | global_lock | The global_lock group contains information regarding the database’s current lock state, historical lock status, current operation queue, and the number of active clients. 45 | index_counters | The index_counters groupp reports information regarding the state and use of indexes in MongoDB. 46 | network | The network group contains data regarding MongoDB’s network use. 47 | op_counters | The op_counters group provides an overview of database operations by type and makes it possible to analyze the load on the database in more granular manner. These numbers will grow over time and in response to database use. Analyze these values over time to track database utilization. 48 | op_counters_repl | The op_counters_repl group, similar to the op_counters data structure, provides an overview of database replication operations by type and makes it possible to analyze the load on the replica in more granular manner. These values only appear when the current host has replication enabled. These values will differ from the opcounters values because of how MongoDB serializes operations during replication. These numbers will grow over time in response to database use. Analyze these values over time to track database utilization. 49 | memory | The memory group holds information regarding the target system architecture of mongod and current memory use 50 | locks | The locks group containsdata that provides a granular report on MongoDB database-level lock use 51 | metrics | The metrics group holds a number of statistics that reflect the current use and state of a running mongod instance. 52 | cursors | The cursors group contains data regarding cursor state and use. This group is disabled by default because it is deprecated in mongodb >= 2.6. 53 | top | The top group provides an overview of database operations by type for each database collections and makes it possible to analyze the load on the database in more granular manner. These numbers will grow over time and in response to database use. Analyze these values over time to track database utilization. For more information see [the official documentation.](http://docs.mongodb.com/manual/reference/command/top/index.html) 54 | 55 | For more information see [the official documentation.](http://docs.mongodb.org/manual/reference/command/serverStatus/) 56 | 57 | 58 | ## Roadmap 59 | 60 | - Collect data from http://docs.mongodb.org/manual/reference/command/replSetGetStatus/ 61 | -------------------------------------------------------------------------------- /collector/asserts.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | assertsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 9 | Namespace: Namespace, 10 | Name: "asserts_total", 11 | Help: "The asserts document reports the number of asserts on the database. While assert errors are typically uncommon, if there are non-zero values for the asserts, you should check the log file for the mongod process for more information. In many cases these errors are trivial, but are worth investigating.", 12 | }, []string{"type"}) 13 | ) 14 | 15 | // AssertsStats has the assets metrics 16 | type AssertsStats struct { 17 | Regular float64 `bson:"regular"` 18 | Warning float64 `bson:"warning"` 19 | Msg float64 `bson:"msg"` 20 | User float64 `bson:"user"` 21 | Rollovers float64 `bson:"rollovers"` 22 | } 23 | 24 | // Export exports the metrics to prometheus. 25 | func (asserts *AssertsStats) Export(ch chan<- prometheus.Metric) { 26 | assertsTotal.WithLabelValues("regular").Set(asserts.Regular) 27 | assertsTotal.WithLabelValues("warning").Set(asserts.Warning) 28 | assertsTotal.WithLabelValues("msg").Set(asserts.Msg) 29 | assertsTotal.WithLabelValues("user").Set(asserts.User) 30 | assertsTotal.WithLabelValues("rollovers").Set(asserts.Rollovers) 31 | assertsTotal.Collect(ch) 32 | } 33 | 34 | // Describe describes the metrics for prometheus 35 | func (asserts *AssertsStats) Describe(ch chan<- *prometheus.Desc) { 36 | assertsTotal.Describe(ch) 37 | } 38 | -------------------------------------------------------------------------------- /collector/background_flushing.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/prometheus/client_golang/prometheus" 7 | ) 8 | 9 | var ( 10 | backgroundFlushingflushesTotal = prometheus.NewCounter(prometheus.CounterOpts{ 11 | Namespace: Namespace, 12 | Subsystem: "background_flushing", 13 | Name: "flushes_total", 14 | Help: "flushes is a counter that collects the number of times the database has flushed all writes to disk. This value will grow as database runs for longer periods of time", 15 | }) 16 | backgroundFlushingtotalMilliseconds = prometheus.NewCounter(prometheus.CounterOpts{ 17 | Namespace: Namespace, 18 | Subsystem: "background_flushing", 19 | Name: "total_milliseconds", 20 | Help: "The total_ms value provides the total number of milliseconds (ms) that the mongod processes have spent writing (i.e. flushing) data to disk. Because this is an absolute value, consider the value offlushes and average_ms to provide better context for this datum", 21 | }) 22 | backgroundFlushingaverageMilliseconds = prometheus.NewGauge(prometheus.GaugeOpts{ 23 | Namespace: Namespace, 24 | Subsystem: "background_flushing", 25 | Name: "average_milliseconds", 26 | Help: `The average_ms value describes the relationship between the number of flushes and the total amount of time that the database has spent writing data to disk. The larger flushes is, the more likely this value is likely to represent a "normal," time; however, abnormal data can skew this value`, 27 | }) 28 | backgroundFlushinglastMilliseconds = prometheus.NewGauge(prometheus.GaugeOpts{ 29 | Namespace: Namespace, 30 | Subsystem: "background_flushing", 31 | Name: "last_milliseconds", 32 | Help: "The value of the last_ms field is the amount of time, in milliseconds, that the last flush operation took to complete. Use this value to verify that the current performance of the server and is in line with the historical data provided by average_ms and total_ms", 33 | }) 34 | backgroundFlushinglastFinishedTime = prometheus.NewGauge(prometheus.GaugeOpts{ 35 | Namespace: Namespace, 36 | Subsystem: "background_flushing", 37 | Name: "last_finished_time", 38 | Help: "The last_finished field provides a timestamp of the last completed flush operation in the ISODateformat. If this value is more than a few minutes old relative to your server's current time and accounting for differences in time zone, restarting the database may result in some data loss", 39 | }) 40 | ) 41 | 42 | // FlushStats is the flush stats metrics 43 | type FlushStats struct { 44 | Flushes float64 `bson:"flushes"` 45 | TotalMs float64 `bson:"total_ms"` 46 | AverageMs float64 `bson:"average_ms"` 47 | LastMs float64 `bson:"last_ms"` 48 | LastFinished time.Time `bson:"last_finished"` 49 | } 50 | 51 | // Export exports the metrics for prometheus. 52 | func (flushStats *FlushStats) Export(ch chan<- prometheus.Metric) { 53 | backgroundFlushingflushesTotal.Set(flushStats.Flushes) 54 | backgroundFlushingtotalMilliseconds.Set(flushStats.TotalMs) 55 | backgroundFlushingaverageMilliseconds.Set(flushStats.AverageMs) 56 | backgroundFlushinglastMilliseconds.Set(flushStats.LastMs) 57 | backgroundFlushinglastFinishedTime.Set(float64(flushStats.LastFinished.Unix())) 58 | 59 | backgroundFlushingflushesTotal.Collect(ch) 60 | backgroundFlushingtotalMilliseconds.Collect(ch) 61 | backgroundFlushingaverageMilliseconds.Collect(ch) 62 | backgroundFlushinglastMilliseconds.Collect(ch) 63 | backgroundFlushinglastFinishedTime.Collect(ch) 64 | } 65 | 66 | // Describe describes the metrics for prometheus 67 | func (flushStats *FlushStats) Describe(ch chan<- *prometheus.Desc) { 68 | backgroundFlushingflushesTotal.Describe(ch) 69 | backgroundFlushingtotalMilliseconds.Describe(ch) 70 | backgroundFlushingaverageMilliseconds.Describe(ch) 71 | backgroundFlushinglastMilliseconds.Describe(ch) 72 | backgroundFlushinglastFinishedTime.Describe(ch) 73 | } 74 | -------------------------------------------------------------------------------- /collector/collection_status.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/golang/glog" 5 | "github.com/prometheus/client_golang/prometheus" 6 | mgo "gopkg.in/mgo.v2" 7 | "gopkg.in/mgo.v2/bson" 8 | ) 9 | 10 | var ( 11 | count = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 12 | Namespace: Namespace, 13 | Subsystem: "collection", 14 | Name: "total_objects", 15 | Help: "The number of objects or documents in this collection", 16 | }, []string{"ns"}) 17 | 18 | size = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 19 | Namespace: Namespace, 20 | Subsystem: "collection", 21 | Name: "size_bytes", 22 | Help: "The total size in memory of all records in a collection", 23 | }, []string{"ns"}) 24 | 25 | avgObjSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 26 | Namespace: Namespace, 27 | Subsystem: "collection", 28 | Name: "avg_objsize_bytes", 29 | Help: "The average size of an object in the collection", 30 | }, []string{"ns"}) 31 | 32 | storageSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 33 | Namespace: Namespace, 34 | Subsystem: "collection", 35 | Name: "storage_size_bytes", 36 | Help: "The total amount of storage allocated to this collection for document storage", 37 | }, []string{"ns"}) 38 | 39 | collIndexSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 40 | Namespace: Namespace, 41 | Subsystem: "collection", 42 | Name: "index_size_bytes", 43 | Help: "The total size of all indexes", 44 | }, []string{"ns"}) 45 | ) 46 | 47 | type CollectionStatus struct { 48 | Name string `bson:"ns"` 49 | Count int `bson:"count"` 50 | Size int `bson:"size"` 51 | AvgSize int `bson:"avgObjSize"` 52 | StorageSize int `bson:"storageSize"` 53 | IndexSize int `bson:"totalIndexSize"` 54 | WiredTiger *CollWiredTigerStats `bson:"wiredTiger"` 55 | } 56 | 57 | func (collStatus *CollectionStatus) Export(ch chan<- prometheus.Metric) { 58 | count.WithLabelValues(collStatus.Name).Set(float64(collStatus.Count)) 59 | size.WithLabelValues(collStatus.Name).Set(float64(collStatus.Size)) 60 | avgObjSize.WithLabelValues(collStatus.Name).Set(float64(collStatus.AvgSize)) 61 | storageSize.WithLabelValues(collStatus.Name).Set(float64(collStatus.StorageSize)) 62 | collIndexSize.WithLabelValues(collStatus.Name).Set(float64(collStatus.IndexSize)) 63 | 64 | if collStatus.WiredTiger != nil { 65 | collStatus.WiredTiger.Export(ch, collStatus.Name) 66 | } 67 | 68 | count.Collect(ch) 69 | size.Collect(ch) 70 | avgObjSize.Collect(ch) 71 | storageSize.Collect(ch) 72 | collIndexSize.Collect(ch) 73 | 74 | count.Reset() 75 | size.Reset() 76 | avgObjSize.Reset() 77 | storageSize.Reset() 78 | collIndexSize.Reset() 79 | } 80 | 81 | func (collStatus *CollectionStatus) Describe(ch chan<- *prometheus.Desc) { 82 | count.Describe(ch) 83 | size.Describe(ch) 84 | avgObjSize.Describe(ch) 85 | storageSize.Describe(ch) 86 | collIndexSize.Describe(ch) 87 | 88 | if collStatus.WiredTiger != nil { 89 | collStatus.WiredTiger.Describe(ch) 90 | } 91 | } 92 | 93 | func GetCollectionStatus(session *mgo.Session, db string, collection string) *CollectionStatus { 94 | var collStatus CollectionStatus 95 | err := session.DB(db).Run(bson.D{{"collStats", collection}, {"scale", 1}}, &collStatus) 96 | if err != nil { 97 | glog.Error(err) 98 | return nil 99 | } 100 | 101 | return &collStatus 102 | } 103 | 104 | func CollectCollectionStatus(session *mgo.Session, db string, ch chan<- prometheus.Metric) { 105 | collection_names, err := session.DB(db).CollectionNames() 106 | if err != nil { 107 | glog.Error("Failed to get collection names for db=" + db) 108 | return 109 | } 110 | for _, collection_name := range collection_names { 111 | collStats := GetCollectionStatus(session, db, collection_name) 112 | if collStats != nil { 113 | glog.V(1).Infof("exporting Database Metrics for db=%q, table=%q", db, collection_name) 114 | collStats.Export(ch) 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /collector/collection_wiredtiger.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | collWTBlockManagerBlocksTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 9 | Namespace: Namespace, 10 | Subsystem: "collection_wiredtiger_blockmanager", 11 | Name: "blocks_total", 12 | Help: "The total number of blocks allocated by the WiredTiger BlockManager", 13 | }, []string{"ns", "type"}) 14 | ) 15 | 16 | var ( 17 | collWTCachePages = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 18 | Namespace: Namespace, 19 | Subsystem: "collection_wiredtiger_cache", 20 | Name: "pages", 21 | Help: "The current number of pages in the WiredTiger Cache", 22 | }, []string{"ns", "type"}) 23 | collWTCachePagesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 24 | Namespace: Namespace, 25 | Subsystem: "collection_wiredtiger_cache", 26 | Name: "pages_total", 27 | Help: "The total number of pages read into/from the WiredTiger Cache", 28 | }, []string{"ns", "type"}) 29 | collWTCacheBytes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 30 | Namespace: Namespace, 31 | Subsystem: "collection_wiredtiger_cache", 32 | Name: "bytes", 33 | Help: "The current size of data in the WiredTiger Cache in bytes", 34 | }, []string{"ns", "type"}) 35 | collWTCacheBytesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 36 | Namespace: Namespace, 37 | Subsystem: "collection_wiredtiger_cache", 38 | Name: "bytes_total", 39 | Help: "The total number of bytes read into/from the WiredTiger Cache", 40 | }, []string{"ns", "type"}) 41 | collWTCacheEvictedTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 42 | Namespace: Namespace, 43 | Subsystem: "collection_wiredtiger_cache", 44 | Name: "evicted_total", 45 | Help: "The total number of pages evicted from the WiredTiger Cache", 46 | }, []string{"ns", "type"}) 47 | ) 48 | 49 | var ( 50 | collWTTransactionsUpdateConflicts = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 51 | Namespace: Namespace, 52 | Subsystem: "collection_wiredtiger_transactions", 53 | Name: "update_conflicts", 54 | Help: "The number of conflicts updating transactions", 55 | }, []string{"ns"}) 56 | ) 57 | 58 | var ( 59 | collWTOpenCursors = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 60 | Namespace: Namespace, 61 | Subsystem: "collection_wiredtiger_session", 62 | Name: "open_cursors_total", 63 | Help: "The total number of cursors opened in WiredTiger", 64 | }, []string{"ns"}) 65 | ) 66 | 67 | // blockmanager stats 68 | type CollWTBlockManagerStats struct { 69 | BlocksFreed float64 `bson:"blocks freed"` 70 | BlocksAllocated float64 `bson:"blocks allocated"` 71 | } 72 | 73 | func (stats *CollWTBlockManagerStats) Export(ch chan<- prometheus.Metric, collection string) { 74 | collWTBlockManagerBlocksTotal.WithLabelValues(collection, "freed").Set(stats.BlocksFreed) 75 | collWTBlockManagerBlocksTotal.WithLabelValues(collection, "allocated").Set(stats.BlocksAllocated) 76 | } 77 | 78 | func (stats *CollWTBlockManagerStats) Describe(ch chan<- *prometheus.Desc) { 79 | collWTBlockManagerBlocksTotal.Describe(ch) 80 | } 81 | 82 | // cache stats 83 | type CollWTCacheStats struct { 84 | BytesTotal float64 `bson:"bytes currently in the cache"` 85 | BytesDirty float64 `bson:"tracked dirty bytes in the cache"` 86 | BytesReadInto float64 `bson:"bytes read into cache"` 87 | BytesWrittenFrom float64 `bson:"bytes written from cache"` 88 | EvictedUnmodified float64 `bson:"unmodified pages evicted"` 89 | EvictedModified float64 `bson:"modified pages evicted"` 90 | PagesReadInto float64 `bson:"pages read into cache"` 91 | PagesWrittenFrom float64 `bson:"pages written from cache"` 92 | } 93 | 94 | func (stats *CollWTCacheStats) Export(ch chan<- prometheus.Metric, collection string) { 95 | collWTCachePagesTotal.WithLabelValues(collection, "read").Set(stats.PagesReadInto) 96 | collWTCachePagesTotal.WithLabelValues(collection, "written").Set(stats.PagesWrittenFrom) 97 | collWTCacheBytesTotal.WithLabelValues(collection, "read").Set(stats.BytesReadInto) 98 | collWTCacheBytesTotal.WithLabelValues(collection, "written").Set(stats.BytesWrittenFrom) 99 | collWTCacheEvictedTotal.WithLabelValues(collection, "modified").Set(stats.EvictedModified) 100 | collWTCacheEvictedTotal.WithLabelValues(collection, "unmodified").Set(stats.EvictedUnmodified) 101 | collWTCacheBytes.WithLabelValues(collection, "total").Set(stats.BytesTotal) 102 | collWTCacheBytes.WithLabelValues(collection, "dirty").Set(stats.BytesDirty) 103 | } 104 | 105 | func (stats *CollWTCacheStats) Describe(ch chan<- *prometheus.Desc) { 106 | collWTCachePagesTotal.Describe(ch) 107 | collWTCacheEvictedTotal.Describe(ch) 108 | collWTCachePages.Describe(ch) 109 | collWTCacheBytes.Describe(ch) 110 | } 111 | 112 | // session stats 113 | type CollWTSessionStats struct { 114 | Cursors float64 `bson:"open cursor count"` 115 | } 116 | 117 | func (stats *CollWTSessionStats) Export(ch chan<- prometheus.Metric, collection string) { 118 | collWTOpenCursors.WithLabelValues(collection).Set(stats.Cursors) 119 | } 120 | 121 | func (stats *CollWTSessionStats) Describe(ch chan<- *prometheus.Desc) { 122 | collWTOpenCursors.Describe(ch) 123 | } 124 | 125 | // transaction stats 126 | type CollWTTransactionStats struct { 127 | UpdateConflicts float64 `bson:"update conflicts"` 128 | } 129 | 130 | func (stats *CollWTTransactionStats) Export(ch chan<- prometheus.Metric, collection string) { 131 | collWTTransactionsUpdateConflicts.WithLabelValues(collection).Set(stats.UpdateConflicts) 132 | } 133 | 134 | func (stats *CollWTTransactionStats) Describe(ch chan<- *prometheus.Desc) { 135 | collWTTransactionsUpdateConflicts.Describe(ch) 136 | } 137 | 138 | // WiredTiger stats 139 | type CollWiredTigerStats struct { 140 | BlockManager *CollWTBlockManagerStats `bson:"block-manager"` 141 | Cache *CollWTCacheStats `bson:"cache"` 142 | Session *CollWTSessionStats `bson:"session"` 143 | Transaction *CollWTTransactionStats `bson:"transaction"` 144 | } 145 | 146 | func (stats *CollWiredTigerStats) Describe(ch chan<- *prometheus.Desc) { 147 | if stats.BlockManager != nil { 148 | stats.BlockManager.Describe(ch) 149 | } 150 | 151 | if stats.Cache != nil { 152 | stats.Cache.Describe(ch) 153 | } 154 | if stats.Transaction != nil { 155 | stats.Transaction.Describe(ch) 156 | } 157 | if stats.Session != nil { 158 | stats.Session.Describe(ch) 159 | } 160 | } 161 | 162 | func (stats *CollWiredTigerStats) Export(ch chan<- prometheus.Metric, collection string) { 163 | if stats.BlockManager != nil { 164 | stats.BlockManager.Export(ch, collection) 165 | } 166 | 167 | if stats.Cache != nil { 168 | stats.Cache.Export(ch, collection) 169 | } 170 | 171 | if stats.Transaction != nil { 172 | stats.Transaction.Export(ch, collection) 173 | } 174 | 175 | if stats.Session != nil { 176 | stats.Session.Export(ch, collection) 177 | } 178 | 179 | collWTBlockManagerBlocksTotal.Collect(ch) 180 | collWTCachePagesTotal.Collect(ch) 181 | collWTCacheBytesTotal.Collect(ch) 182 | collWTCacheEvictedTotal.Collect(ch) 183 | collWTCachePages.Collect(ch) 184 | collWTCacheBytes.Collect(ch) 185 | collWTTransactionsUpdateConflicts.Collect(ch) 186 | collWTOpenCursors.Collect(ch) 187 | 188 | collWTBlockManagerBlocksTotal.Reset() 189 | collWTCachePagesTotal.Reset() 190 | collWTCacheBytesTotal.Reset() 191 | collWTCacheEvictedTotal.Reset() 192 | collWTCachePages.Reset() 193 | collWTCacheBytes.Reset() 194 | collWTTransactionsUpdateConflicts.Reset() 195 | collWTOpenCursors.Reset() 196 | } 197 | -------------------------------------------------------------------------------- /collector/conn_pool_stats.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/golang/glog" 5 | "github.com/prometheus/client_golang/prometheus" 6 | mgo "gopkg.in/mgo.v2" 7 | "gopkg.in/mgo.v2/bson" 8 | ) 9 | 10 | // server connections -- all of these! 11 | var ( 12 | syncClientConnections = prometheus.NewGauge(prometheus.GaugeOpts{ 13 | Namespace: Namespace, 14 | Subsystem: "connpoolstats", 15 | Name: "connection_sync", 16 | Help: "Corresponds to the total number of client connections to mongo.", 17 | }) 18 | 19 | numAScopedConnections = prometheus.NewGauge(prometheus.GaugeOpts{ 20 | Namespace: Namespace, 21 | Subsystem: "connpoolstats", 22 | Name: "connections_scoped_sync", 23 | Help: "Corresponds to the number of active and stored outgoing scoped synchronous connections from the current instance to other members of the sharded cluster or replica set.", 24 | }) 25 | 26 | totalInUse = prometheus.NewGauge(prometheus.GaugeOpts{ 27 | Namespace: Namespace, 28 | Subsystem: "connpoolstats", 29 | Name: "connections_in_use", 30 | Help: "Corresponds to the total number of client connections to mongo currently in use.", 31 | }) 32 | 33 | totalAvailable = prometheus.NewGauge(prometheus.GaugeOpts{ 34 | Namespace: Namespace, 35 | Subsystem: "connpoolstats", 36 | Name: "connections_available", 37 | Help: "Corresponds to the total number of client connections to mongo that are currently available.", 38 | }) 39 | 40 | totalCreated = prometheus.NewCounter(prometheus.CounterOpts{ 41 | Namespace: Namespace, 42 | Subsystem: "connpoolstats", 43 | Name: "connections_created_total", 44 | Help: "Corresponds to the total number of client connections to mongo created since instance start", 45 | }) 46 | ) 47 | 48 | // ServerStatus keeps the data returned by the serverStatus() method. 49 | type ConnPoolStats struct { 50 | SyncClientConnections float64 `bson:"numClientConnections"` 51 | ASScopedConnections float64 `bson:"numAScopedConnections"` 52 | TotalInUse float64 `bson:"totalInUse"` 53 | TotalAvailable float64 `bson:"totalAvailable"` 54 | TotalCreated float64 `bson:"totalCreated"` 55 | 56 | Hosts map[string]*HostConnPoolStats `bson:hosts"` 57 | // TODO:? not sure if *this* level of granularity is helpful 58 | //ReplicaSets map[string]ConnPoolReplicaSetStats `bson:"replicaSets"` 59 | } 60 | 61 | // Export exports the server status to be consumed by prometheus. 62 | func (stats *ConnPoolStats) Export(ch chan<- prometheus.Metric) { 63 | syncClientConnections.Set(stats.SyncClientConnections) 64 | syncClientConnections.Collect(ch) 65 | 66 | numAScopedConnections.Set(stats.ASScopedConnections) 67 | numAScopedConnections.Collect(ch) 68 | 69 | totalInUse.Set(stats.TotalInUse) 70 | totalInUse.Collect(ch) 71 | 72 | totalAvailable.Set(stats.TotalAvailable) 73 | totalAvailable.Collect(ch) 74 | 75 | totalCreated.Set(stats.TotalCreated) 76 | totalCreated.Collect(ch) 77 | 78 | for hostname, hostStat := range stats.Hosts { 79 | hostStat.Export(hostname, ch) 80 | } 81 | } 82 | 83 | // Describe describes the server status for prometheus. 84 | func (stats *ConnPoolStats) Describe(ch chan<- *prometheus.Desc) { 85 | syncClientConnections.Describe(ch) 86 | 87 | numAScopedConnections.Describe(ch) 88 | 89 | totalInUse.Describe(ch) 90 | 91 | totalAvailable.Describe(ch) 92 | 93 | totalCreated.Describe(ch) 94 | 95 | for _, hostStat := range stats.Hosts { 96 | hostStat.Describe(ch) 97 | } 98 | } 99 | 100 | // GetServerStatus returns the server status info. 101 | func GetConnPoolStats(session *mgo.Session) *ConnPoolStats { 102 | result := &ConnPoolStats{} 103 | err := session.DB("admin").Run(bson.D{{"connPoolStats", 1}, {"recordStats", 0}}, result) 104 | if err != nil { 105 | glog.Error("Failed to get server status.") 106 | return nil 107 | } 108 | return result 109 | } 110 | -------------------------------------------------------------------------------- /collector/conn_pool_stats_hosts.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | inUse = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 9 | Namespace: Namespace, 10 | Subsystem: "connpoolstats", 11 | Name: "in_use", 12 | Help: "Corresponds to the total number of client connections to mongo.", 13 | // TODO: tags 14 | }, []string{"host"}) 15 | 16 | available = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 17 | Namespace: Namespace, 18 | Subsystem: "connpoolstats", 19 | // TODO 20 | Name: "available", 21 | Help: "Corresponds to the total number of client connections to mongo.", 22 | }, []string{"host"}) 23 | 24 | created = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 25 | Namespace: Namespace, 26 | Subsystem: "connpoolstats", 27 | // TODO 28 | Name: "created", 29 | Help: "Corresponds to the total number of client connections to mongo.", 30 | }, []string{"host"}) 31 | ) 32 | 33 | // ServerStatus keeps the data returned by the serverStatus() method. 34 | type HostConnPoolStats struct { 35 | InUse float64 `bson:"inUse"` 36 | Available float64 `bson:"available"` 37 | Created float64 `bson:"created"` 38 | } 39 | 40 | // Export exports the server status to be consumed by prometheus. 41 | func (stats *HostConnPoolStats) Export(hostname string, ch chan<- prometheus.Metric) { 42 | inUse.WithLabelValues(hostname).Set(float64(stats.InUse)) 43 | inUse.Collect(ch) 44 | inUse.Reset() 45 | 46 | available.WithLabelValues(hostname).Set(float64(stats.Available)) 47 | available.Collect(ch) 48 | available.Reset() 49 | 50 | created.WithLabelValues(hostname).Set(float64(stats.Created)) 51 | created.Collect(ch) 52 | created.Reset() 53 | } 54 | 55 | // Describe describes the server status for prometheus. 56 | func (stats *HostConnPoolStats) Describe(ch chan<- *prometheus.Desc) { 57 | inUse.Describe(ch) 58 | 59 | available.Describe(ch) 60 | 61 | created.Describe(ch) 62 | } 63 | -------------------------------------------------------------------------------- /collector/connections.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | connections = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 9 | Namespace: Namespace, 10 | Name: "connections", 11 | Help: "The connections sub document data regarding the current status of incoming connections and availability of the database server. Use these values to assess the current load and capacity requirements of the server", 12 | }, []string{"state"}) 13 | ) 14 | var ( 15 | connectionsMetricsCreatedTotal = prometheus.NewCounter(prometheus.CounterOpts{ 16 | Namespace: Namespace, 17 | Subsystem: "connections_metrics", 18 | Name: "created_total", 19 | Help: "totalCreated provides a count of all incoming connections created to the server. This number includes connections that have since closed", 20 | }) 21 | ) 22 | 23 | // ConnectionStats are connections metrics 24 | type ConnectionStats struct { 25 | Current float64 `bson:"current"` 26 | Available float64 `bson:"available"` 27 | TotalCreated float64 `bson:"totalCreated"` 28 | } 29 | 30 | // Export exports the data to prometheus. 31 | func (connectionStats *ConnectionStats) Export(ch chan<- prometheus.Metric) { 32 | connections.WithLabelValues("current").Set(connectionStats.Current) 33 | connections.WithLabelValues("available").Set(connectionStats.Available) 34 | connections.Collect(ch) 35 | 36 | connectionsMetricsCreatedTotal.Set(connectionStats.TotalCreated) 37 | connectionsMetricsCreatedTotal.Collect(ch) 38 | } 39 | 40 | // Describe describes the metrics for prometheus 41 | func (connectionStats *ConnectionStats) Describe(ch chan<- *prometheus.Desc) { 42 | connections.Describe(ch) 43 | connectionsMetricsCreatedTotal.Describe(ch) 44 | } 45 | -------------------------------------------------------------------------------- /collector/cursors.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | cursorsGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 9 | Namespace: Namespace, 10 | Name: "cursors", 11 | Help: "The cursors data structure contains data regarding cursor state and use", 12 | }, []string{"state"}) 13 | ) 14 | 15 | // Cursors are the cursor metrics 16 | type Cursors struct { 17 | TotalOpen float64 `bson:"totalOpen"` 18 | TimeOut float64 `bson:"timedOut"` 19 | TotalNoTimeout float64 `bson:"totalNoTimeout"` 20 | Pinned float64 `bson:"pinned"` 21 | } 22 | 23 | // Export exports the data to prometheus. 24 | func (cursors *Cursors) Export(ch chan<- prometheus.Metric) { 25 | cursorsGauge.WithLabelValues("total_open").Set(cursors.TotalOpen) 26 | cursorsGauge.WithLabelValues("timed_out").Set(cursors.TimeOut) 27 | cursorsGauge.WithLabelValues("total_no_timeout").Set(cursors.TotalNoTimeout) 28 | cursorsGauge.WithLabelValues("pinned").Set(cursors.Pinned) 29 | cursorsGauge.Collect(ch) 30 | } 31 | 32 | // Describe describes the metrics for prometheus 33 | func (cursors *Cursors) Describe(ch chan<- *prometheus.Desc) { 34 | cursorsGauge.Describe(ch) 35 | } 36 | -------------------------------------------------------------------------------- /collector/database_status.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/golang/glog" 7 | "github.com/prometheus/client_golang/prometheus" 8 | mgo "gopkg.in/mgo.v2" 9 | "gopkg.in/mgo.v2/bson" 10 | ) 11 | 12 | var ( 13 | indexSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 14 | Namespace: Namespace, 15 | Subsystem: "db", 16 | Name: "index_size_bytes", 17 | Help: "The total size in bytes of all indexes created on this database", 18 | }, []string{"db", "shard"}) 19 | dataSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 20 | Namespace: Namespace, 21 | Subsystem: "db", 22 | Name: "data_size_bytes", 23 | Help: "The total size in bytes of the uncompressed data held in this database", 24 | }, []string{"db", "shard"}) 25 | collectionsTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 26 | Namespace: Namespace, 27 | Subsystem: "db", 28 | Name: "collections_total", 29 | Help: "Contains a count of the number of collections in that database", 30 | }, []string{"db", "shard"}) 31 | indexesTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 32 | Namespace: Namespace, 33 | Subsystem: "db", 34 | Name: "indexes_total", 35 | Help: "Contains a count of the total number of indexes across all collections in the database", 36 | }, []string{"db", "shard"}) 37 | objectsTotal = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 38 | Namespace: Namespace, 39 | Subsystem: "db", 40 | Name: "objects_total", 41 | Help: "Contains a count of the number of objects (i.e. documents) in the database across all collections", 42 | }, []string{"db", "shard"}) 43 | ) 44 | 45 | // DatabaseStatus represents stats about a database 46 | type DatabaseStatus struct { 47 | Name string `bson:"db,omitempty"` 48 | IndexSize int `bson:"indexSize,omitempty"` 49 | DataSize int `bson:"dataSize,omitempty"` 50 | Collections int `bson:"collections,omitempty"` 51 | Objects int `bson:"objects,omitempty"` 52 | Indexes int `bson:"indexes,omitempty"` 53 | Shards map[string]*RawStatus `bson:"raw,omitempty"` 54 | } 55 | 56 | // RawStatus represents stats about a database shard 57 | type RawStatus struct { 58 | Name string `bson:"db,omitempty"` 59 | IndexSize int `bson:"indexSize,omitempty"` 60 | DataSize int `bson:"dataSize,omitempty"` 61 | Collections int `bson:"collections,omitempty"` 62 | Objects int `bson:"objects,omitempty"` 63 | Indexes int `bson:"indexes,omitempty"` 64 | } 65 | 66 | // Export exports database stats to prometheus 67 | func (dbStatus *DatabaseStatus) Export(ch chan<- prometheus.Metric) { 68 | if len(dbStatus.Shards) > 0 { 69 | for shard, stats := range dbStatus.Shards { 70 | shard = strings.Split(shard, "/")[0] 71 | indexSize.WithLabelValues(stats.Name, shard).Set(float64(stats.IndexSize)) 72 | dataSize.WithLabelValues(stats.Name, shard).Set(float64(stats.DataSize)) 73 | collectionsTotal.WithLabelValues(stats.Name, shard).Set(float64(stats.Collections)) 74 | indexesTotal.WithLabelValues(stats.Name, shard).Set(float64(stats.Indexes)) 75 | objectsTotal.WithLabelValues(stats.Name, shard).Set(float64(stats.Objects)) 76 | } 77 | } else { 78 | indexSize.WithLabelValues(dbStatus.Name, "").Set(float64(dbStatus.IndexSize)) 79 | dataSize.WithLabelValues(dbStatus.Name, "").Set(float64(dbStatus.DataSize)) 80 | collectionsTotal.WithLabelValues(dbStatus.Name, "").Set(float64(dbStatus.Collections)) 81 | indexesTotal.WithLabelValues(dbStatus.Name, "").Set(float64(dbStatus.Indexes)) 82 | objectsTotal.WithLabelValues(dbStatus.Name, "").Set(float64(dbStatus.Objects)) 83 | } 84 | 85 | indexSize.Collect(ch) 86 | dataSize.Collect(ch) 87 | collectionsTotal.Collect(ch) 88 | indexesTotal.Collect(ch) 89 | objectsTotal.Collect(ch) 90 | 91 | indexSize.Reset() 92 | dataSize.Reset() 93 | collectionsTotal.Reset() 94 | indexesTotal.Reset() 95 | objectsTotal.Reset() 96 | } 97 | 98 | // Describe describes database stats for prometheus 99 | func (dbStatus *DatabaseStatus) Describe(ch chan<- *prometheus.Desc) { 100 | indexSize.Describe(ch) 101 | dataSize.Describe(ch) 102 | collectionsTotal.Describe(ch) 103 | indexesTotal.Describe(ch) 104 | objectsTotal.Describe(ch) 105 | } 106 | 107 | // GetDatabaseStatus returns stats for a given database 108 | func GetDatabaseStatus(session *mgo.Session, db string) *DatabaseStatus { 109 | var dbStatus DatabaseStatus 110 | err := session.DB(db).Run(bson.D{{"dbStats", 1}, {"scale", 1}}, &dbStatus) 111 | if err != nil { 112 | glog.Error(err) 113 | return nil 114 | } 115 | 116 | return &dbStatus 117 | } 118 | -------------------------------------------------------------------------------- /collector/durability.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | durabilityCommits = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 9 | Namespace: Namespace, 10 | Name: "durability_commits", 11 | Help: "Durability commits", 12 | }, []string{"state"}) 13 | ) 14 | var ( 15 | durabilityJournaledMegabytes = prometheus.NewGauge(prometheus.GaugeOpts{ 16 | Namespace: Namespace, 17 | Subsystem: "durability", 18 | Name: "journaled_megabytes", 19 | Help: "The journaledMB provides the amount of data in megabytes (MB) written to journal during the last journal group commit interval", 20 | }) 21 | durabilityWriteToDataFilesMegabytes = prometheus.NewGauge(prometheus.GaugeOpts{ 22 | Namespace: Namespace, 23 | Subsystem: "durability", 24 | Name: "write_to_data_files_megabytes", 25 | Help: "The writeToDataFilesMB provides the amount of data in megabytes (MB) written from journal to the data files during the last journal group commit interval", 26 | }) 27 | durabilityCompression = prometheus.NewGauge(prometheus.GaugeOpts{ 28 | Namespace: Namespace, 29 | Subsystem: "durability", 30 | Name: "compression", 31 | Help: "The compression represents the compression ratio of the data written to the journal: ( journaled_size_of_data / uncompressed_size_of_data )", 32 | }) 33 | durabilityEarlyCommits = prometheus.NewSummary(prometheus.SummaryOpts{ 34 | Namespace: Namespace, 35 | Subsystem: "durability", 36 | Name: "early_commits", 37 | Help: "The earlyCommits value reflects the number of times MongoDB requested a commit before the scheduled journal group commit interval. Use this value to ensure that your journal group commit interval is not too long for your deployment", 38 | }) 39 | ) 40 | var ( 41 | durabilityTimeMilliseconds = prometheus.NewSummaryVec(prometheus.SummaryOpts{ 42 | Namespace: Namespace, 43 | Name: "durability_time_milliseconds", 44 | Help: "Summary of times spent during the journaling process.", 45 | }, []string{"stage"}) 46 | ) 47 | 48 | // DurTiming is the information about durability returned from the server. 49 | type DurTiming struct { 50 | Dt float64 `bson:"dt"` 51 | PrepLogBuffer float64 `bson:"prepLogBuffer"` 52 | WriteToJournal float64 `bson:"writeToJournal"` 53 | WriteToDataFiles float64 `bson:"writeToDataFiles"` 54 | RemapPrivateView float64 `bson:"remapPrivateView"` 55 | } 56 | 57 | // Export exports the data for the prometheus server. 58 | func (durTiming *DurTiming) Export(ch chan<- prometheus.Metric) { 59 | durabilityTimeMilliseconds.WithLabelValues("dt").Observe(durTiming.Dt) 60 | durabilityTimeMilliseconds.WithLabelValues("prep_log_buffer").Observe(durTiming.PrepLogBuffer) 61 | durabilityTimeMilliseconds.WithLabelValues("write_to_journal").Observe(durTiming.WriteToJournal) 62 | durabilityTimeMilliseconds.WithLabelValues("write_to_data_files").Observe(durTiming.WriteToDataFiles) 63 | durabilityTimeMilliseconds.WithLabelValues("remap_private_view").Observe(durTiming.RemapPrivateView) 64 | durabilityTimeMilliseconds.Collect(ch) 65 | } 66 | 67 | // DurStats are the stats related to durability. 68 | type DurStats struct { 69 | Commits float64 `bson:"commits"` 70 | JournaledMB float64 `bson:"journaledMB"` 71 | WriteToDataFilesMB float64 `bson:"writeToDataFilesMB"` 72 | Compression float64 `bson:"compression"` 73 | CommitsInWriteLock float64 `bson:"commitsInWriteLock"` 74 | EarlyCommits float64 `bson:"earlyCommits"` 75 | TimeMs DurTiming `bson:"timeMs"` 76 | } 77 | 78 | // Export export the durability stats for the prometheus server. 79 | func (durStats *DurStats) Export(ch chan<- prometheus.Metric) { 80 | durabilityCommits.WithLabelValues("written").Set(durStats.Commits) 81 | durabilityCommits.WithLabelValues("in_write_lock").Set(durStats.CommitsInWriteLock) 82 | 83 | durabilityJournaledMegabytes.Set(durStats.JournaledMB) 84 | durabilityWriteToDataFilesMegabytes.Set(durStats.WriteToDataFilesMB) 85 | durabilityCompression.Set(durStats.Compression) 86 | durabilityEarlyCommits.Observe(durStats.EarlyCommits) 87 | 88 | durStats.TimeMs.Export(ch) 89 | 90 | durStats.Collect(ch) 91 | } 92 | 93 | // Collect collects the metrics for prometheus 94 | func (durStats *DurStats) Collect(ch chan<- prometheus.Metric) { 95 | durabilityCommits.Collect(ch) 96 | durabilityJournaledMegabytes.Collect(ch) 97 | durabilityWriteToDataFilesMegabytes.Collect(ch) 98 | durabilityCompression.Collect(ch) 99 | durabilityEarlyCommits.Collect(ch) 100 | } 101 | 102 | // Describe describes the metrics for prometheus 103 | func (durStats *DurStats) Describe(ch chan<- *prometheus.Desc) { 104 | durabilityCommits.Describe(ch) 105 | durabilityJournaledMegabytes.Describe(ch) 106 | durabilityWriteToDataFilesMegabytes.Describe(ch) 107 | durabilityCompression.Describe(ch) 108 | durabilityEarlyCommits.Describe(ch) 109 | durabilityTimeMilliseconds.Describe(ch) 110 | } 111 | -------------------------------------------------------------------------------- /collector/extra_info.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | extraInfopageFaultsTotal = prometheus.NewGauge(prometheus.GaugeOpts{ 9 | Namespace: Namespace, 10 | Subsystem: "extra_info", 11 | Name: "page_faults_total", 12 | Help: "The page_faults Reports the total number of page faults that require disk operations. Page faults refer to operations that require the database server to access data which isn't available in active memory. The page_faults counter may increase dramatically during moments of poor performance and may correlate with limited memory environments and larger data sets. Limited and sporadic page faults do not necessarily indicate an issue", 13 | }) 14 | extraInfoheapUsageBytes = prometheus.NewGauge(prometheus.GaugeOpts{ 15 | Namespace: Namespace, 16 | Subsystem: "extra_info", 17 | Name: "heap_usage_bytes", 18 | Help: "The heap_usage_bytes field is only available on Unix/Linux systems, and reports the total size in bytes of heap space used by the database process", 19 | }) 20 | ) 21 | 22 | // ExtraInfo has extra info metrics 23 | type ExtraInfo struct { 24 | HeapUsageBytes float64 `bson:"heap_usage_bytes"` 25 | PageFaults float64 `bson:"page_faults"` 26 | } 27 | 28 | // Export exports the metrics to prometheus. 29 | func (extraInfo *ExtraInfo) Export(ch chan<- prometheus.Metric) { 30 | extraInfoheapUsageBytes.Set(extraInfo.HeapUsageBytes) 31 | extraInfopageFaultsTotal.Set(extraInfo.PageFaults) 32 | 33 | extraInfoheapUsageBytes.Collect(ch) 34 | extraInfopageFaultsTotal.Collect(ch) 35 | 36 | } 37 | 38 | // Describe describes the metrics for prometheus 39 | func (extraInfo *ExtraInfo) Describe(ch chan<- *prometheus.Desc) { 40 | extraInfoheapUsageBytes.Describe(ch) 41 | extraInfopageFaultsTotal.Describe(ch) 42 | } 43 | -------------------------------------------------------------------------------- /collector/fixtures/server_status.bson: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcu/mongodb_exporter/8f53431089a13bc46f4f86533451c01b60582478/collector/fixtures/server_status.bson -------------------------------------------------------------------------------- /collector/fixtures/server_status.json: -------------------------------------------------------------------------------- 1 | { 2 | "host" : "localhost", 3 | "version" : "2.6.7", 4 | "process" : "mongod", 5 | "pid" : 2593, 6 | "uptime" : 127859, 7 | "uptimeMillis" : 127859430, 8 | "uptimeEstimate" : 13850, 9 | "localTime" : "2015-02-07T01:29:27.458Z", 10 | "asserts" : { 11 | "regular" : 0, 12 | "warning" : 0, 13 | "msg" : 0, 14 | "user" : 3, 15 | "rollovers" : 0 16 | }, 17 | "backgroundFlushing" : { 18 | "flushes" : 3, 19 | "total_ms" : 23, 20 | "average_ms" : 7.666666666666667, 21 | "last_ms" : 7, 22 | "last_finished" : "2015-02-06T21:25:57.012Z" 23 | }, 24 | "connections" : { 25 | "current" : 1, 26 | "available" : 818, 27 | "totalCreated" : 4145 28 | }, 29 | "cursors" : { 30 | "note" : "deprecated, use server status metrics", 31 | "clientCursors_size" : 0, 32 | "totalOpen" : 0, 33 | "pinned" : 0, 34 | "totalNoTimeout" : 0, 35 | "timedOut" : 3 36 | }, 37 | "dur" : { 38 | "commits" : 10, 39 | "journaledMB" : 0, 40 | "writeToDataFilesMB" : 0, 41 | "compression" : 0, 42 | "commitsInWriteLock" : 0, 43 | "earlyCommits" : 0, 44 | "timeMs" : { 45 | "dt" : 3187, 46 | "prepLogBuffer" : 0, 47 | "writeToJournal" : 0, 48 | "writeToDataFiles" : 0, 49 | "remapPrivateView" : 0 50 | } 51 | }, 52 | "extra_info" : { 53 | "note" : "fields vary by platform", 54 | "page_faults" : 1724156 55 | }, 56 | "globalLock" : { 57 | "totalTime" : "127859430000", 58 | "lockTime" : 7097013, 59 | "currentQueue" : { 60 | "total" : 0, 61 | "readers" : 0, 62 | "writers" : 0 63 | }, 64 | "activeClients" : { 65 | "total" : 0, 66 | "readers" : 0, 67 | "writers" : 0 68 | } 69 | }, 70 | "indexCounters" : { 71 | "accesses" : 2094239, 72 | "hits" : 2094239, 73 | "misses" : 0, 74 | "resets" : 0, 75 | "missRatio" : 0 76 | }, 77 | "locks" : { 78 | "." : { 79 | "timeLockedMicros" : { 80 | "R" : 1979803, 81 | "W" : 7097013 82 | }, 83 | "timeAcquiringMicros" : { 84 | "R" : 1466095, 85 | "W" : 2005190 86 | } 87 | }, 88 | "admin" : { 89 | "timeLockedMicros" : { 90 | "r" : 1452829, 91 | "w" : 0 92 | }, 93 | "timeAcquiringMicros" : { 94 | "r" : 38353, 95 | "w" : 0 96 | } 97 | }, 98 | "local" : { 99 | "timeLockedMicros" : { 100 | "r" : 1863485, 101 | "w" : 537 102 | }, 103 | "timeAcquiringMicros" : { 104 | "r" : 293787, 105 | "w" : 2 106 | } 107 | }, 108 | "a-db" : { 109 | "timeLockedMicros" : { 110 | "r" : 986732, 111 | "w" : 525 112 | }, 113 | "timeAcquiringMicros" : { 114 | "r" : 45866, 115 | "w" : 21 116 | } 117 | } 118 | }, 119 | "network" : { 120 | "bytesIn" : 162962349, 121 | "bytesOut" : 399007475, 122 | "numRequests" : 1263839 123 | }, 124 | "opcounters" : { 125 | "insert" : 46035, 126 | "query" : 460137, 127 | "update" : 51878, 128 | "delete" : 123287, 129 | "getmore" : 20, 130 | "command" : 642339 131 | }, 132 | "opcountersRepl" : { 133 | "insert" : 0, 134 | "query" : 0, 135 | "update" : 0, 136 | "delete" : 0, 137 | "getmore" : 0, 138 | "command" : 0 139 | }, 140 | "writeBacksQueued" : false, 141 | "mem" : { 142 | "bits" : 64, 143 | "resident" : 10, 144 | "virtual" : 49653, 145 | "supported" : true, 146 | "mapped" : 23561, 147 | "mappedWithJournal" : 47122 148 | }, 149 | "metrics" : { 150 | "cursor" : { 151 | "timedOut" : 3, 152 | "open" : { 153 | "noTimeout" : 0, 154 | "pinned" : 0, 155 | "total" : 0 156 | } 157 | }, 158 | "document" : { 159 | "deleted" : 45726, 160 | "inserted" : 46035, 161 | "returned" : 426416, 162 | "updated" : 76873 163 | }, 164 | "getLastError" : { 165 | "wtime" : { 166 | "num" : 0, 167 | "totalMillis" : 0 168 | }, 169 | "wtimeouts" : 0 170 | }, 171 | "operation" : { 172 | "fastmod" : 46466, 173 | "idhack" : 4259, 174 | "scanAndOrder" : 11 175 | }, 176 | "queryExecutor" : { 177 | "scanned" : 295888337, 178 | "scannedObjects" : 290443242 179 | }, 180 | "record" : { 181 | "moves" : 5306 182 | }, 183 | "repl" : { 184 | "apply" : { 185 | "batches" : { 186 | "num" : 0, 187 | "totalMillis" : 0 188 | }, 189 | "ops" : 0 190 | }, 191 | "buffer" : { 192 | "count" : 0, 193 | "maxSizeBytes" : 268435456, 194 | "sizeBytes" : 0 195 | }, 196 | "network" : { 197 | "bytes" : 0, 198 | "getmores" : { 199 | "num" : 0, 200 | "totalMillis" : 0 201 | }, 202 | "ops" : 0, 203 | "readersCreated" : 0 204 | }, 205 | "preload" : { 206 | "docs" : { 207 | "num" : 0, 208 | "totalMillis" : 0 209 | }, 210 | "indexes" : { 211 | "num" : 0, 212 | "totalMillis" : 0 213 | } 214 | } 215 | }, 216 | "storage" : { 217 | "freelist" : { 218 | "search" : { 219 | "bucketExhausted" : 271, 220 | "requests" : 51031, 221 | "scanned" : 260603 222 | } 223 | } 224 | }, 225 | "ttl" : { 226 | "deletedDocuments" : 0, 227 | "passes" : 1130 228 | } 229 | }, 230 | "ok" : 1 231 | } 232 | -------------------------------------------------------------------------------- /collector/fixtures/top_status.bson: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcu/mongodb_exporter/8f53431089a13bc46f4f86533451c01b60582478/collector/fixtures/top_status.bson -------------------------------------------------------------------------------- /collector/fixtures/top_status.json: -------------------------------------------------------------------------------- 1 | { 2 | "totals" : { 3 | "note" : "all times in microseconds", 4 | "admin.system.roles" : { 5 | "total" : { 6 | "time" : 15, 7 | "count" : 1 8 | }, 9 | "readLock" : { 10 | "time" : 15, 11 | "count" : 1 12 | }, 13 | "writeLock" : { 14 | "time" : 0, 15 | "count" : 0 16 | }, 17 | "queries" : { 18 | "time" : 15, 19 | "count" : 1 20 | }, 21 | "getmore" : { 22 | "time" : 0, 23 | "count" : 0 24 | }, 25 | "insert" : { 26 | "time" : 0, 27 | "count" : 0 28 | }, 29 | "update" : { 30 | "time" : 0, 31 | "count" : 0 32 | }, 33 | "remove" : { 34 | "time" : 0, 35 | "count" : 0 36 | }, 37 | "commands" : { 38 | "time" : 0, 39 | "count" : 0 40 | } 41 | }, 42 | "admin.system.version" : { 43 | "total" : { 44 | "time" : 4, 45 | "count" : 1 46 | }, 47 | "readLock" : { 48 | "time" : 4, 49 | "count" : 1 50 | }, 51 | "writeLock" : { 52 | "time" : 0, 53 | "count" : 0 54 | }, 55 | "queries" : { 56 | "time" : 0, 57 | "count" : 0 58 | }, 59 | "getmore" : { 60 | "time" : 0, 61 | "count" : 0 62 | }, 63 | "insert" : { 64 | "time" : 0, 65 | "count" : 0 66 | }, 67 | "update" : { 68 | "time" : 0, 69 | "count" : 0 70 | }, 71 | "remove" : { 72 | "time" : 0, 73 | "count" : 0 74 | }, 75 | "commands" : { 76 | "time" : 0, 77 | "count" : 0 78 | } 79 | }, 80 | "dummy.collection" : { 81 | "total" : { 82 | "time" : 61, 83 | "count" : 2 84 | }, 85 | "readLock" : { 86 | "time" : 61, 87 | "count" : 2 88 | }, 89 | "writeLock" : { 90 | "time" : 0, 91 | "count" : 0 92 | }, 93 | "queries" : { 94 | "time" : 61, 95 | "count" : 2 96 | }, 97 | "getmore" : { 98 | "time" : 0, 99 | "count" : 0 100 | }, 101 | "insert" : { 102 | "time" : 0, 103 | "count" : 0 104 | }, 105 | "update" : { 106 | "time" : 0, 107 | "count" : 0 108 | }, 109 | "remove" : { 110 | "time" : 0, 111 | "count" : 0 112 | }, 113 | "commands" : { 114 | "time" : 0, 115 | "count" : 0 116 | } 117 | }, 118 | "dummy.users" : { 119 | "total" : { 120 | "time" : 1095531, 121 | "count" : 17428 122 | }, 123 | "readLock" : { 124 | "time" : 267953, 125 | "count" : 17420 126 | }, 127 | "writeLock" : { 128 | "time" : 827578, 129 | "count" : 8 130 | }, 131 | "queries" : { 132 | "time" : 899, 133 | "count" : 10 134 | }, 135 | "getmore" : { 136 | "time" : 0, 137 | "count" : 0 138 | }, 139 | "insert" : { 140 | "time" : 826929, 141 | "count" : 5 142 | }, 143 | "update" : { 144 | "time" : 456, 145 | "count" : 2 146 | }, 147 | "remove" : { 148 | "time" : 193, 149 | "count" : 1 150 | }, 151 | "commands" : { 152 | "time" : 0, 153 | "count" : 0 154 | } 155 | }, 156 | "local.oplog.rs" : { 157 | "total" : { 158 | "time" : 337, 159 | "count" : 20 160 | }, 161 | "readLock" : { 162 | "time" : 337, 163 | "count" : 20 164 | }, 165 | "writeLock" : { 166 | "time" : 0, 167 | "count" : 0 168 | }, 169 | "queries" : { 170 | "time" : 0, 171 | "count" : 0 172 | }, 173 | "getmore" : { 174 | "time" : 0, 175 | "count" : 0 176 | }, 177 | "insert" : { 178 | "time" : 0, 179 | "count" : 0 180 | }, 181 | "update" : { 182 | "time" : 0, 183 | "count" : 0 184 | }, 185 | "remove" : { 186 | "time" : 0, 187 | "count" : 0 188 | }, 189 | "commands" : { 190 | "time" : 337, 191 | "count" : 20 192 | } 193 | }, 194 | "local.startup_log" : { 195 | "total" : { 196 | "time" : 55073, 197 | "count" : 17439 198 | }, 199 | "readLock" : { 200 | "time" : 55071, 201 | "count" : 17438 202 | }, 203 | "writeLock" : { 204 | "time" : 2, 205 | "count" : 1 206 | }, 207 | "queries" : { 208 | "time" : 0, 209 | "count" : 0 210 | }, 211 | "getmore" : { 212 | "time" : 0, 213 | "count" : 0 214 | }, 215 | "insert" : { 216 | "time" : 0, 217 | "count" : 0 218 | }, 219 | "update" : { 220 | "time" : 0, 221 | "count" : 0 222 | }, 223 | "remove" : { 224 | "time" : 0, 225 | "count" : 0 226 | }, 227 | "commands" : { 228 | "time" : 0, 229 | "count" : 0 230 | } 231 | }, 232 | "local.system.replset" : { 233 | "total" : { 234 | "time" : 21, 235 | "count" : 2 236 | }, 237 | "readLock" : { 238 | "time" : 21, 239 | "count" : 2 240 | }, 241 | "writeLock" : { 242 | "time" : 0, 243 | "count" : 0 244 | }, 245 | "queries" : { 246 | "time" : 0, 247 | "count" : 0 248 | }, 249 | "getmore" : { 250 | "time" : 0, 251 | "count" : 0 252 | }, 253 | "insert" : { 254 | "time" : 0, 255 | "count" : 0 256 | }, 257 | "update" : { 258 | "time" : 0, 259 | "count" : 0 260 | }, 261 | "remove" : { 262 | "time" : 0, 263 | "count" : 0 264 | }, 265 | "commands" : { 266 | "time" : 0, 267 | "count" : 0 268 | } 269 | } 270 | }, 271 | "ok" : 1 272 | } 273 | -------------------------------------------------------------------------------- /collector/global_lock.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | globalLockRatio = prometheus.NewGauge(prometheus.GaugeOpts{ 9 | Namespace: Namespace, 10 | Subsystem: "global_lock", 11 | Name: "ratio", 12 | Help: "The value of ratio displays the relationship between lockTime and totalTime. Low values indicate that operations have held the globalLock frequently for shorter periods of time. High values indicate that operations have held globalLock infrequently for longer periods of time", 13 | }) 14 | globalLockTotal = prometheus.NewCounter(prometheus.CounterOpts{ 15 | Namespace: Namespace, 16 | Subsystem: "global_lock", 17 | Name: "total", 18 | Help: "The value of totalTime represents the time, in microseconds, since the database last started and creation of the globalLock. This is roughly equivalent to total server uptime", 19 | }) 20 | globalLockLockTotal = prometheus.NewCounter(prometheus.CounterOpts{ 21 | Namespace: Namespace, 22 | Subsystem: "global_lock", 23 | Name: "lock_total", 24 | Help: "The value of lockTime represents the time, in microseconds, since the database last started, that the globalLock has been held", 25 | }) 26 | ) 27 | var ( 28 | globalLockCurrentQueue = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 29 | Namespace: Namespace, 30 | Name: "global_lock_current_queue", 31 | Help: "The currentQueue data structure value provides more granular information concerning the number of operations queued because of a lock", 32 | }, []string{"type"}) 33 | ) 34 | var ( 35 | globalLockClient = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 36 | Namespace: Namespace, 37 | Name: "global_lock_client", 38 | Help: "The activeClients data structure provides more granular information about the number of connected clients and the operation types (e.g. read or write) performed by these clients", 39 | }, []string{"type"}) 40 | ) 41 | 42 | // ClientStats metrics for client stats 43 | type ClientStats struct { 44 | Total float64 `bson:"total"` 45 | Readers float64 `bson:"readers"` 46 | Writers float64 `bson:"writers"` 47 | } 48 | 49 | // Export exports the metrics to prometheus 50 | func (clientStats *ClientStats) Export(ch chan<- prometheus.Metric) { 51 | globalLockClient.WithLabelValues("reader").Set(clientStats.Readers) 52 | globalLockClient.WithLabelValues("writer").Set(clientStats.Writers) 53 | } 54 | 55 | // QueueStats queue stats 56 | type QueueStats struct { 57 | Total float64 `bson:"total"` 58 | Readers float64 `bson:"readers"` 59 | Writers float64 `bson:"writers"` 60 | } 61 | 62 | // Export exports the metrics to prometheus 63 | func (queueStats *QueueStats) Export(ch chan<- prometheus.Metric) { 64 | globalLockCurrentQueue.WithLabelValues("reader").Set(queueStats.Readers) 65 | globalLockCurrentQueue.WithLabelValues("writer").Set(queueStats.Writers) 66 | } 67 | 68 | // GlobalLockStats global lock stats 69 | type GlobalLockStats struct { 70 | TotalTime float64 `bson:"totalTime"` 71 | LockTime float64 `bson:"lockTime"` 72 | Ratio float64 `bson:"ratio"` 73 | CurrentQueue *QueueStats `bson:"currentQueue"` 74 | ActiveClients *ClientStats `bson:"activeClients"` 75 | } 76 | 77 | // Export exports the metrics to prometheus 78 | func (globalLock *GlobalLockStats) Export(ch chan<- prometheus.Metric) { 79 | globalLockTotal.Set(globalLock.LockTime) 80 | globalLockRatio.Set(globalLock.Ratio) 81 | 82 | globalLock.CurrentQueue.Export(ch) 83 | globalLock.ActiveClients.Export(ch) 84 | 85 | globalLockTotal.Collect(ch) 86 | globalLockRatio.Collect(ch) 87 | globalLockCurrentQueue.Collect(ch) 88 | globalLockClient.Collect(ch) 89 | } 90 | 91 | // Describe describes the metrics for prometheus 92 | func (globalLock *GlobalLockStats) Describe(ch chan<- *prometheus.Desc) { 93 | globalLockTotal.Describe(ch) 94 | globalLockRatio.Describe(ch) 95 | globalLockCurrentQueue.Describe(ch) 96 | globalLockClient.Describe(ch) 97 | } 98 | -------------------------------------------------------------------------------- /collector/index_counters.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | indexCountersMissRatio = prometheus.NewGauge(prometheus.GaugeOpts{ 9 | Namespace: Namespace, 10 | Subsystem: "index_counters", 11 | Name: "miss_ratio", 12 | Help: "The missRatio value is the ratio of hits to misses. This value is typically 0 or approaching 0", 13 | }) 14 | ) 15 | 16 | var ( 17 | indexCountersTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 18 | Namespace: Namespace, 19 | Name: "index_counters_total", 20 | Help: "Total indexes by type", 21 | }, []string{"type"}) 22 | ) 23 | 24 | //IndexCounterStats index counter stats 25 | type IndexCounterStats struct { 26 | Accesses float64 `bson:"accesses"` 27 | Hits float64 `bson:"hits"` 28 | Misses float64 `bson:"misses"` 29 | Resets float64 `bson:"resets"` 30 | MissRatio float64 `bson:"missRatio"` 31 | } 32 | 33 | // Export exports the data to prometheus. 34 | func (indexCountersStats *IndexCounterStats) Export(ch chan<- prometheus.Metric) { 35 | indexCountersTotal.WithLabelValues("accesses").Set(indexCountersStats.Accesses) 36 | indexCountersTotal.WithLabelValues("hits").Set(indexCountersStats.Hits) 37 | indexCountersTotal.WithLabelValues("misses").Set(indexCountersStats.Misses) 38 | indexCountersTotal.WithLabelValues("resets").Set(indexCountersStats.Resets) 39 | 40 | indexCountersMissRatio.Set(indexCountersStats.MissRatio) 41 | 42 | indexCountersTotal.Collect(ch) 43 | indexCountersMissRatio.Collect(ch) 44 | 45 | } 46 | 47 | // Describe describes the metrics for prometheus 48 | func (indexCountersStats *IndexCounterStats) Describe(ch chan<- *prometheus.Desc) { 49 | indexCountersTotal.Describe(ch) 50 | indexCountersMissRatio.Describe(ch) 51 | } 52 | -------------------------------------------------------------------------------- /collector/locks.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | locksTimeLockedGlobalMicrosecondsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 9 | Namespace: Namespace, 10 | Name: "locks_time_locked_global_microseconds_total", 11 | Help: "amount of time in microseconds that any database has held the global lock", 12 | }, []string{"type", "database"}) 13 | ) 14 | var ( 15 | locksTimeLockedLocalMicrosecondsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 16 | Namespace: Namespace, 17 | Name: "locks_time_locked_local_microseconds_total", 18 | Help: "amount of time in microseconds that any database has held the local lock", 19 | }, []string{"type", "database"}) 20 | ) 21 | var ( 22 | locksTimeAcquiringGlobalMicrosecondsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 23 | Namespace: Namespace, 24 | Name: "locks_time_acquiring_global_microseconds_total", 25 | Help: "amount of time in microseconds that any database has spent waiting for the global lock", 26 | }, []string{"type", "database"}) 27 | ) 28 | 29 | // LockStatsMap is a map of lock stats 30 | type LockStatsMap map[string]LockStats 31 | 32 | // ReadWriteLockTimes information about the lock 33 | type ReadWriteLockTimes struct { 34 | Read float64 `bson:"R"` 35 | Write float64 `bson:"W"` 36 | ReadLower float64 `bson:"r"` 37 | WriteLower float64 `bson:"w"` 38 | } 39 | 40 | // LockStats lock stats 41 | type LockStats struct { 42 | TimeLockedMicros ReadWriteLockTimes `bson:"timeLockedMicros"` 43 | TimeAcquiringMicros ReadWriteLockTimes `bson:"timeAcquiringMicros"` 44 | } 45 | 46 | // Export exports the data to prometheus. 47 | func (locks LockStatsMap) Export(ch chan<- prometheus.Metric) { 48 | for key, locks := range locks { 49 | if key == "." { 50 | key = "dot" 51 | } 52 | 53 | locksTimeLockedGlobalMicrosecondsTotal.WithLabelValues("read", key).Set(locks.TimeLockedMicros.Read) 54 | locksTimeLockedGlobalMicrosecondsTotal.WithLabelValues("write", key).Set(locks.TimeLockedMicros.Write) 55 | 56 | locksTimeLockedLocalMicrosecondsTotal.WithLabelValues("read", key).Set(locks.TimeLockedMicros.ReadLower) 57 | locksTimeLockedLocalMicrosecondsTotal.WithLabelValues("write", key).Set(locks.TimeLockedMicros.WriteLower) 58 | 59 | locksTimeAcquiringGlobalMicrosecondsTotal.WithLabelValues("read", key).Set(locks.TimeAcquiringMicros.ReadLower) 60 | locksTimeAcquiringGlobalMicrosecondsTotal.WithLabelValues("write", key).Set(locks.TimeAcquiringMicros.WriteLower) 61 | } 62 | 63 | locksTimeLockedGlobalMicrosecondsTotal.Collect(ch) 64 | locksTimeLockedLocalMicrosecondsTotal.Collect(ch) 65 | locksTimeAcquiringGlobalMicrosecondsTotal.Collect(ch) 66 | } 67 | 68 | // Describe describes the metrics for prometheus 69 | func (locks LockStatsMap) Describe(ch chan<- *prometheus.Desc) { 70 | locksTimeLockedGlobalMicrosecondsTotal.Describe(ch) 71 | locksTimeLockedLocalMicrosecondsTotal.Describe(ch) 72 | locksTimeAcquiringGlobalMicrosecondsTotal.Describe(ch) 73 | } 74 | -------------------------------------------------------------------------------- /collector/main_test.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "testing" 7 | ) 8 | 9 | func TestMain(m *testing.M) { 10 | os.Exit(m.Run()) 11 | } 12 | 13 | func LoadFixture(name string) []byte { 14 | data, err := ioutil.ReadFile("fixtures/" + name) 15 | if err != nil { 16 | panic(err) 17 | } 18 | 19 | return data 20 | } 21 | -------------------------------------------------------------------------------- /collector/memory.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | memory = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 9 | Namespace: Namespace, 10 | Name: "memory", 11 | Help: "The mem data structure holds information regarding the target system architecture of mongod and current memory use", 12 | }, []string{"type"}) 13 | ) 14 | 15 | // MemStats tracks the mem stats metrics. 16 | type MemStats struct { 17 | Bits float64 `bson:"bits"` 18 | Resident float64 `bson:"resident"` 19 | Virtual float64 `bson:"virtual"` 20 | Mapped float64 `bson:"mapped"` 21 | MappedWithJournal float64 `bson:"mappedWithJournal"` 22 | } 23 | 24 | // Export exports the data to prometheus. 25 | func (memStats *MemStats) Export(ch chan<- prometheus.Metric) { 26 | memory.WithLabelValues("resident").Set(memStats.Resident) 27 | memory.WithLabelValues("virtual").Set(memStats.Virtual) 28 | memory.WithLabelValues("mapped").Set(memStats.Mapped) 29 | memory.WithLabelValues("mapped_with_journal").Set(memStats.MappedWithJournal) 30 | memory.Collect(ch) 31 | } 32 | 33 | // Describe describes the metrics for prometheus 34 | func (memStats *MemStats) Describe(ch chan<- *prometheus.Desc) { 35 | memory.Describe(ch) 36 | } 37 | -------------------------------------------------------------------------------- /collector/metrics.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | metricsCursorTimedOutTotal = prometheus.NewCounter(prometheus.CounterOpts{ 9 | Namespace: Namespace, 10 | Subsystem: "metrics_cursor", 11 | Name: "timed_out_total", 12 | Help: "timedOut provides the total number of cursors that have timed out since the server process started. If this number is large or growing at a regular rate, this may indicate an application error", 13 | }) 14 | ) 15 | var ( 16 | metricsCursorOpen = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 17 | Namespace: Namespace, 18 | Name: "metrics_cursor_open", 19 | Help: "The open is an embedded document that contains data regarding open cursors", 20 | }, []string{"state"}) 21 | ) 22 | var ( 23 | metricsDocumentTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 24 | Namespace: Namespace, 25 | Name: "metrics_document_total", 26 | Help: "The document holds a document of that reflect document access and modification patterns and data use. Compare these values to the data in the opcounters document, which track total number of operations", 27 | }, []string{"state"}) 28 | ) 29 | var ( 30 | metricsGetLastErrorWtimeNumTotal = prometheus.NewGauge(prometheus.GaugeOpts{ 31 | Namespace: Namespace, 32 | Subsystem: "metrics_get_last_error_wtime", 33 | Name: "num_total", 34 | Help: "num reports the total number of getLastError operations with a specified write concern (i.e. w) that wait for one or more members of a replica set to acknowledge the write operation (i.e. a w value greater than 1.)", 35 | }) 36 | metricsGetLastErrorWtimeTotalMilliseconds = prometheus.NewCounter(prometheus.CounterOpts{ 37 | Namespace: Namespace, 38 | Subsystem: "metrics_get_last_error_wtime", 39 | Name: "total_milliseconds", 40 | Help: "total_millis reports the total amount of time in milliseconds that the mongod has spent performing getLastError operations with write concern (i.e. w) that wait for one or more members of a replica set to acknowledge the write operation (i.e. a w value greater than 1.)", 41 | }) 42 | ) 43 | var ( 44 | metricsGetLastErrorWtimeoutsTotal = prometheus.NewCounter(prometheus.CounterOpts{ 45 | Namespace: Namespace, 46 | Subsystem: "metrics_get_last_error", 47 | Name: "wtimeouts_total", 48 | Help: "wtimeouts reports the number of times that write concern operations have timed out as a result of the wtimeout threshold to getLastError.", 49 | }) 50 | ) 51 | var ( 52 | metricsOperationTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 53 | Namespace: Namespace, 54 | Name: "metrics_operation_total", 55 | Help: "operation is a sub-document that holds counters for several types of update and query operations that MongoDB handles using special operation types", 56 | }, []string{"type"}) 57 | ) 58 | var ( 59 | metricsQueryExecutorTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 60 | Namespace: Namespace, 61 | Name: "metrics_query_executor_total", 62 | Help: "queryExecutor is a document that reports data from the query execution system", 63 | }, []string{"state"}) 64 | ) 65 | var ( 66 | metricsRecordMovesTotal = prometheus.NewCounter(prometheus.CounterOpts{ 67 | Namespace: Namespace, 68 | Subsystem: "metrics_record", 69 | Name: "moves_total", 70 | Help: "moves reports the total number of times documents move within the on-disk representation of the MongoDB data set. Documents move as a result of operations that increase the size of the document beyond their allocated record size", 71 | }) 72 | ) 73 | var ( 74 | metricsReplApplyBatchesNumTotal = prometheus.NewCounter(prometheus.CounterOpts{ 75 | Namespace: Namespace, 76 | Subsystem: "metrics_repl_apply_batches", 77 | Name: "num_total", 78 | Help: "num reports the total number of batches applied across all databases", 79 | }) 80 | metricsReplApplyBatchesTotalMilliseconds = prometheus.NewCounter(prometheus.CounterOpts{ 81 | Namespace: Namespace, 82 | Subsystem: "metrics_repl_apply_batches", 83 | Name: "total_milliseconds", 84 | Help: "total_millis reports the total amount of time the mongod has spent applying operations from the oplog", 85 | }) 86 | ) 87 | var ( 88 | metricsReplApplyOpsTotal = prometheus.NewCounter(prometheus.CounterOpts{ 89 | Namespace: Namespace, 90 | Subsystem: "metrics_repl_apply", 91 | Name: "ops_total", 92 | Help: "ops reports the total number of oplog operations applied", 93 | }) 94 | ) 95 | var ( 96 | metricsReplBufferCount = prometheus.NewGauge(prometheus.GaugeOpts{ 97 | Namespace: Namespace, 98 | Subsystem: "metrics_repl_buffer", 99 | Name: "count", 100 | Help: "count reports the current number of operations in the oplog buffer", 101 | }) 102 | metricsReplBufferMaxSizeBytes = prometheus.NewCounter(prometheus.CounterOpts{ 103 | Namespace: Namespace, 104 | Subsystem: "metrics_repl_buffer", 105 | Name: "max_size_bytes", 106 | Help: "maxSizeBytes reports the maximum size of the buffer. This value is a constant setting in the mongod, and is not configurable", 107 | }) 108 | metricsReplBufferSizeBytes = prometheus.NewGauge(prometheus.GaugeOpts{ 109 | Namespace: Namespace, 110 | Subsystem: "metrics_repl_buffer", 111 | Name: "size_bytes", 112 | Help: "sizeBytes reports the current size of the contents of the oplog buffer", 113 | }) 114 | ) 115 | var ( 116 | metricsReplNetworkGetmoresNumTotal = prometheus.NewCounter(prometheus.CounterOpts{ 117 | Namespace: Namespace, 118 | Subsystem: "metrics_repl_network_getmores", 119 | Name: "num_total", 120 | Help: "num reports the total number of getmore operations, which are operations that request an additional set of operations from the replication sync source.", 121 | }) 122 | metricsReplNetworkGetmoresTotalMilliseconds = prometheus.NewCounter(prometheus.CounterOpts{ 123 | Namespace: Namespace, 124 | Subsystem: "metrics_repl_network_getmores", 125 | Name: "total_milliseconds", 126 | Help: "total_millis reports the total amount of time required to collect data from getmore operations", 127 | }) 128 | ) 129 | var ( 130 | metricsReplNetworkBytesTotal = prometheus.NewCounter(prometheus.CounterOpts{ 131 | Namespace: Namespace, 132 | Subsystem: "metrics_repl_network", 133 | Name: "bytes_total", 134 | Help: "bytes reports the total amount of data read from the replication sync source", 135 | }) 136 | metricsReplNetworkOpsTotal = prometheus.NewCounter(prometheus.CounterOpts{ 137 | Namespace: Namespace, 138 | Subsystem: "metrics_repl_network", 139 | Name: "ops_total", 140 | Help: "ops reports the total number of operations read from the replication source.", 141 | }) 142 | metricsReplNetworkReadersCreatedTotal = prometheus.NewCounter(prometheus.CounterOpts{ 143 | Namespace: Namespace, 144 | Subsystem: "metrics_repl_network", 145 | Name: "readers_created_total", 146 | Help: "readersCreated reports the total number of oplog query processes created. MongoDB will create a new oplog query any time an error occurs in the connection, including a timeout, or a network operation. Furthermore, readersCreated will increment every time MongoDB selects a new source fore replication.", 147 | }) 148 | ) 149 | var ( 150 | metricsReplOplogInsertNumTotal = prometheus.NewCounter(prometheus.CounterOpts{ 151 | Namespace: Namespace, 152 | Subsystem: "metrics_repl_oplog_insert", 153 | Name: "num_total", 154 | Help: "num reports the total number of items inserted into the oplog.", 155 | }) 156 | metricsReplOplogInsertTotalMilliseconds = prometheus.NewCounter(prometheus.CounterOpts{ 157 | Namespace: Namespace, 158 | Subsystem: "metrics_repl_oplog_insert", 159 | Name: "total_milliseconds", 160 | Help: "total_millis reports the total amount of time spent for the mongod to insert data into the oplog.", 161 | }) 162 | ) 163 | var ( 164 | metricsReplOplogInsertBytesTotal = prometheus.NewCounter(prometheus.CounterOpts{ 165 | Namespace: Namespace, 166 | Subsystem: "metrics_repl_oplog", 167 | Name: "insert_bytes_total", 168 | Help: "insertBytes the total size of documents inserted into the oplog.", 169 | }) 170 | ) 171 | var ( 172 | metricsReplPreloadDocsNumTotal = prometheus.NewCounter(prometheus.CounterOpts{ 173 | Namespace: Namespace, 174 | Subsystem: "metrics_repl_preload_docs", 175 | Name: "num_total", 176 | Help: "num reports the total number of documents loaded during the pre-fetch stage of replication", 177 | }) 178 | metricsReplPreloadDocsTotalMilliseconds = prometheus.NewCounter(prometheus.CounterOpts{ 179 | Namespace: Namespace, 180 | Subsystem: "metrics_repl_preload_docs", 181 | Name: "total_milliseconds", 182 | Help: "total_millis reports the total amount of time spent loading documents as part of the pre-fetch stage of replication", 183 | }) 184 | ) 185 | var ( 186 | metricsReplPreloadIndexesNumTotal = prometheus.NewCounter(prometheus.CounterOpts{ 187 | Namespace: Namespace, 188 | Subsystem: "metrics_repl_preload_indexes", 189 | Name: "num_total", 190 | Help: "num reports the total number of index entries loaded by members before updating documents as part of the pre-fetch stage of replication", 191 | }) 192 | metricsReplPreloadIndexesTotalMilliseconds = prometheus.NewCounter(prometheus.CounterOpts{ 193 | Namespace: Namespace, 194 | Subsystem: "metrics_repl_preload_indexes", 195 | Name: "total_milliseconds", 196 | Help: "total_millis reports the total amount of time spent loading index entries as part of the pre-fetch stage of replication", 197 | }) 198 | ) 199 | var ( 200 | metricsStorageFreelistSearchTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 201 | Namespace: Namespace, 202 | Name: "metrics_storage_freelist_search_total", 203 | Help: "metrics about searching records in the database.", 204 | }, []string{"type"}) 205 | ) 206 | var ( 207 | metricsTTLDeletedDocumentsTotal = prometheus.NewCounter(prometheus.CounterOpts{ 208 | Namespace: Namespace, 209 | Subsystem: "metrics_ttl", 210 | Name: "deleted_documents_total", 211 | Help: "deletedDocuments reports the total number of documents deleted from collections with a ttl index.", 212 | }) 213 | metricsTTLPassesTotal = prometheus.NewCounter(prometheus.CounterOpts{ 214 | Namespace: Namespace, 215 | Subsystem: "metrics_ttl", 216 | Name: "passes_total", 217 | Help: "passes reports the number of times the background process removes documents from collections with a ttl index", 218 | }) 219 | ) 220 | 221 | // DocumentStats are the stats associated to a document. 222 | type DocumentStats struct { 223 | Deleted float64 `bson:"deleted"` 224 | Inserted float64 `bson:"inserted"` 225 | Returned float64 `bson:"returned"` 226 | Updated float64 `bson:"updated"` 227 | } 228 | 229 | // Export exposes the document stats to be consumed by the prometheus server. 230 | func (documentStats *DocumentStats) Export(ch chan<- prometheus.Metric) { 231 | metricsDocumentTotal.WithLabelValues("deleted").Set(documentStats.Deleted) 232 | metricsDocumentTotal.WithLabelValues("inserted").Set(documentStats.Inserted) 233 | metricsDocumentTotal.WithLabelValues("returned").Set(documentStats.Returned) 234 | metricsDocumentTotal.WithLabelValues("updated").Set(documentStats.Updated) 235 | } 236 | 237 | // BenchmarkStats is bechmark info about an operation. 238 | type BenchmarkStats struct { 239 | Num float64 `bson:"num"` 240 | TotalMillis float64 `bson:"totalMillis"` 241 | } 242 | 243 | // GetLastErrorStats are the last error stats. 244 | type GetLastErrorStats struct { 245 | Wtimeouts float64 `bson:"wtimeouts"` 246 | Wtime *BenchmarkStats `bson:"wtime"` 247 | } 248 | 249 | // Export exposes the get last error stats. 250 | func (getLastErrorStats *GetLastErrorStats) Export(ch chan<- prometheus.Metric) { 251 | metricsGetLastErrorWtimeNumTotal.Set(getLastErrorStats.Wtime.Num) 252 | metricsGetLastErrorWtimeTotalMilliseconds.Set(getLastErrorStats.Wtime.TotalMillis) 253 | 254 | metricsGetLastErrorWtimeoutsTotal.Set(getLastErrorStats.Wtimeouts) 255 | } 256 | 257 | // OperationStats are the stats for some kind of operations. 258 | type OperationStats struct { 259 | Fastmod float64 `bson:"fastmod"` 260 | Idhack float64 `bson:"idhack"` 261 | ScanAndOrder float64 `bson:"scanAndOrder"` 262 | } 263 | 264 | // Export exports the operation stats. 265 | func (operationStats *OperationStats) Export(ch chan<- prometheus.Metric) { 266 | metricsOperationTotal.WithLabelValues("fastmod").Set(operationStats.Fastmod) 267 | metricsOperationTotal.WithLabelValues("idhack").Set(operationStats.Idhack) 268 | metricsOperationTotal.WithLabelValues("scan_and_order").Set(operationStats.ScanAndOrder) 269 | } 270 | 271 | // QueryExecutorStats are the stats associated with a query execution. 272 | type QueryExecutorStats struct { 273 | Scanned float64 `bson:"scanned"` 274 | ScannedObjects float64 `bson:"scannedObjects"` 275 | } 276 | 277 | // Export exports the query executor stats. 278 | func (queryExecutorStats *QueryExecutorStats) Export(ch chan<- prometheus.Metric) { 279 | metricsQueryExecutorTotal.WithLabelValues("scanned").Set(queryExecutorStats.Scanned) 280 | metricsQueryExecutorTotal.WithLabelValues("scanned_objects").Set(queryExecutorStats.ScannedObjects) 281 | } 282 | 283 | // RecordStats are stats associated with a record. 284 | type RecordStats struct { 285 | Moves float64 `bson:"moves"` 286 | } 287 | 288 | // Export exposes the record stats. 289 | func (recordStats *RecordStats) Export(ch chan<- prometheus.Metric) { 290 | metricsRecordMovesTotal.Set(recordStats.Moves) 291 | } 292 | 293 | // ApplyStats are the stats associated with the apply operation. 294 | type ApplyStats struct { 295 | Batches *BenchmarkStats `bson:"batches"` 296 | Ops float64 `bson:"ops"` 297 | } 298 | 299 | // Export exports the apply stats 300 | func (applyStats *ApplyStats) Export(ch chan<- prometheus.Metric) { 301 | metricsReplApplyOpsTotal.Set(applyStats.Ops) 302 | 303 | metricsReplApplyBatchesNumTotal.Set(applyStats.Batches.Num) 304 | metricsReplApplyBatchesTotalMilliseconds.Set(applyStats.Batches.TotalMillis) 305 | } 306 | 307 | // BufferStats are the stats associated with the buffer 308 | type BufferStats struct { 309 | Count float64 `bson:"count"` 310 | MaxSizeBytes float64 `bson:"maxSizeBytes"` 311 | SizeBytes float64 `bson:"sizeBytes"` 312 | } 313 | 314 | // Export exports the buffer stats. 315 | func (bufferStats *BufferStats) Export(ch chan<- prometheus.Metric) { 316 | metricsReplBufferCount.Set(bufferStats.Count) 317 | metricsReplBufferMaxSizeBytes.Set(bufferStats.MaxSizeBytes) 318 | metricsReplBufferSizeBytes.Set(bufferStats.SizeBytes) 319 | } 320 | 321 | // MetricsNetworkStats are the network stats. 322 | type MetricsNetworkStats struct { 323 | Bytes float64 `bson:"bytes"` 324 | Ops float64 `bson:"ops"` 325 | GetMores *BenchmarkStats `bson:"getmores"` 326 | ReadersCreated float64 `bson:"readersCreated"` 327 | } 328 | 329 | // Export exposes the network stats. 330 | func (metricsNetworkStats *MetricsNetworkStats) Export(ch chan<- prometheus.Metric) { 331 | metricsReplNetworkBytesTotal.Set(metricsNetworkStats.Bytes) 332 | metricsReplNetworkOpsTotal.Set(metricsNetworkStats.Ops) 333 | metricsReplNetworkReadersCreatedTotal.Set(metricsNetworkStats.ReadersCreated) 334 | 335 | metricsReplNetworkGetmoresNumTotal.Set(metricsNetworkStats.GetMores.Num) 336 | metricsReplNetworkGetmoresTotalMilliseconds.Set(metricsNetworkStats.GetMores.TotalMillis) 337 | } 338 | 339 | // ReplStats are the stats associated with the replication process. 340 | type ReplStats struct { 341 | Apply *ApplyStats `bson:"apply"` 342 | Buffer *BufferStats `bson:"buffer"` 343 | Network *MetricsNetworkStats `bson:"network"` 344 | PreloadStats *PreloadStats `bson:"preload"` 345 | } 346 | 347 | // Export exposes the replication stats. 348 | func (replStats *ReplStats) Export(ch chan<- prometheus.Metric) { 349 | replStats.Apply.Export(ch) 350 | replStats.Buffer.Export(ch) 351 | replStats.Network.Export(ch) 352 | replStats.PreloadStats.Export(ch) 353 | } 354 | 355 | // PreloadStats are the stats associated with preload operation. 356 | type PreloadStats struct { 357 | Docs *BenchmarkStats `bson:"docs"` 358 | Indexes *BenchmarkStats `bson:"indexes"` 359 | } 360 | 361 | // Export exposes the preload stats. 362 | func (preloadStats *PreloadStats) Export(ch chan<- prometheus.Metric) { 363 | metricsReplPreloadDocsNumTotal.Set(preloadStats.Docs.Num) 364 | metricsReplPreloadDocsTotalMilliseconds.Set(preloadStats.Docs.TotalMillis) 365 | 366 | metricsReplPreloadIndexesNumTotal.Set(preloadStats.Indexes.Num) 367 | metricsReplPreloadIndexesTotalMilliseconds.Set(preloadStats.Indexes.TotalMillis) 368 | } 369 | 370 | // StorageStats are the stats associated with the storage. 371 | type StorageStats struct { 372 | BucketExhausted float64 `bson:"freelist.search.bucketExhausted"` 373 | Requests float64 `bson:"freelist.search.requests"` 374 | Scanned float64 `bson:"freelist.search.scanned"` 375 | } 376 | 377 | // Export exports the storage stats. 378 | func (storageStats *StorageStats) Export(ch chan<- prometheus.Metric) { 379 | metricsStorageFreelistSearchTotal.WithLabelValues("bucket_exhausted").Set(storageStats.BucketExhausted) 380 | metricsStorageFreelistSearchTotal.WithLabelValues("requests").Set(storageStats.Requests) 381 | metricsStorageFreelistSearchTotal.WithLabelValues("scanned").Set(storageStats.Scanned) 382 | } 383 | 384 | // TtlStats are the stats associated with the TTL deletes 385 | type TtlStats struct { 386 | Passes float64 `bson:"passes"` 387 | DeletedDocuments float64 `bson:"deletedDocuments"` 388 | } 389 | 390 | // Export exports the ttl stats. 391 | func (ttlStats *TtlStats) Export(ch chan<- prometheus.Metric) { 392 | metricsTTLPassesTotal.Set(ttlStats.Passes) 393 | metricsTTLDeletedDocumentsTotal.Set(ttlStats.DeletedDocuments) 394 | } 395 | 396 | // CursorStatsOpen are the stats for open cursors 397 | type CursorStatsOpen struct { 398 | NoTimeout float64 `bson:"noTimeout"` 399 | Pinned float64 `bson:"pinned"` 400 | Total float64 `bson:"total"` 401 | } 402 | 403 | // CursorStats are the stats for cursors 404 | type CursorStats struct { 405 | TimedOut float64 `bson:"timedOut"` 406 | Open *CursorStatsOpen `bson:"open"` 407 | } 408 | 409 | // Export exports the cursor stats. 410 | func (cursorStats *CursorStats) Export(ch chan<- prometheus.Metric) { 411 | metricsCursorTimedOutTotal.Set(cursorStats.TimedOut) 412 | metricsCursorOpen.WithLabelValues("timed_out").Set(cursorStats.Open.NoTimeout) 413 | metricsCursorOpen.WithLabelValues("pinned").Set(cursorStats.Open.Pinned) 414 | metricsCursorOpen.WithLabelValues("total").Set(cursorStats.Open.Total) 415 | } 416 | 417 | // MetricsStats are all stats associated with metrics of the system 418 | type MetricsStats struct { 419 | Document *DocumentStats `bson:"document"` 420 | GetLastError *GetLastErrorStats `bson:"getLastError"` 421 | Operation *OperationStats `bson:"operation"` 422 | QueryExecutor *QueryExecutorStats `bson:"queryExecutor"` 423 | Record *RecordStats `bson:"record"` 424 | Repl *ReplStats `bson:"repl"` 425 | Storage *StorageStats `bson:"storage"` 426 | Cursor *CursorStats `bson:"cursor"` 427 | Ttl *TtlStats `bson:"ttl"` 428 | } 429 | 430 | // Export exports the metrics stats. 431 | func (metricsStats *MetricsStats) Export(ch chan<- prometheus.Metric) { 432 | if metricsStats.Document != nil { 433 | metricsStats.Document.Export(ch) 434 | } 435 | if metricsStats.GetLastError != nil { 436 | metricsStats.GetLastError.Export(ch) 437 | } 438 | if metricsStats.Operation != nil { 439 | metricsStats.Operation.Export(ch) 440 | } 441 | if metricsStats.QueryExecutor != nil { 442 | metricsStats.QueryExecutor.Export(ch) 443 | } 444 | if metricsStats.Record != nil { 445 | metricsStats.Record.Export(ch) 446 | } 447 | if metricsStats.Repl != nil { 448 | metricsStats.Repl.Export(ch) 449 | } 450 | if metricsStats.Storage != nil { 451 | metricsStats.Storage.Export(ch) 452 | } 453 | if metricsStats.Ttl != nil { 454 | metricsStats.Ttl.Export(ch) 455 | } 456 | if metricsStats.Cursor != nil { 457 | metricsStats.Cursor.Export(ch) 458 | } 459 | 460 | metricsCursorTimedOutTotal.Collect(ch) 461 | metricsCursorOpen.Collect(ch) 462 | metricsDocumentTotal.Collect(ch) 463 | metricsGetLastErrorWtimeNumTotal.Collect(ch) 464 | metricsGetLastErrorWtimeTotalMilliseconds.Collect(ch) 465 | metricsGetLastErrorWtimeoutsTotal.Collect(ch) 466 | metricsOperationTotal.Collect(ch) 467 | metricsQueryExecutorTotal.Collect(ch) 468 | metricsRecordMovesTotal.Collect(ch) 469 | metricsReplApplyBatchesNumTotal.Collect(ch) 470 | metricsReplApplyBatchesTotalMilliseconds.Collect(ch) 471 | metricsReplApplyOpsTotal.Collect(ch) 472 | metricsReplBufferCount.Collect(ch) 473 | metricsReplBufferMaxSizeBytes.Collect(ch) 474 | metricsReplBufferSizeBytes.Collect(ch) 475 | metricsReplNetworkGetmoresNumTotal.Collect(ch) 476 | metricsReplNetworkGetmoresTotalMilliseconds.Collect(ch) 477 | metricsReplNetworkBytesTotal.Collect(ch) 478 | metricsReplNetworkOpsTotal.Collect(ch) 479 | metricsReplNetworkReadersCreatedTotal.Collect(ch) 480 | metricsReplOplogInsertNumTotal.Collect(ch) 481 | metricsReplOplogInsertTotalMilliseconds.Collect(ch) 482 | metricsReplOplogInsertBytesTotal.Collect(ch) 483 | metricsReplPreloadDocsNumTotal.Collect(ch) 484 | metricsReplPreloadDocsTotalMilliseconds.Collect(ch) 485 | metricsReplPreloadIndexesNumTotal.Collect(ch) 486 | metricsReplPreloadIndexesTotalMilliseconds.Collect(ch) 487 | metricsStorageFreelistSearchTotal.Collect(ch) 488 | metricsTTLDeletedDocumentsTotal.Collect(ch) 489 | metricsTTLPassesTotal.Collect(ch) 490 | } 491 | 492 | // Describe describes the metrics for prometheus 493 | func (metricsStats *MetricsStats) Describe(ch chan<- *prometheus.Desc) { 494 | metricsCursorTimedOutTotal.Describe(ch) 495 | metricsCursorOpen.Describe(ch) 496 | metricsDocumentTotal.Describe(ch) 497 | metricsGetLastErrorWtimeNumTotal.Describe(ch) 498 | metricsGetLastErrorWtimeTotalMilliseconds.Describe(ch) 499 | metricsGetLastErrorWtimeoutsTotal.Describe(ch) 500 | metricsOperationTotal.Describe(ch) 501 | metricsQueryExecutorTotal.Describe(ch) 502 | metricsRecordMovesTotal.Describe(ch) 503 | metricsReplApplyBatchesNumTotal.Describe(ch) 504 | metricsReplApplyBatchesTotalMilliseconds.Describe(ch) 505 | metricsReplApplyOpsTotal.Describe(ch) 506 | metricsReplBufferCount.Describe(ch) 507 | metricsReplBufferMaxSizeBytes.Describe(ch) 508 | metricsReplBufferSizeBytes.Describe(ch) 509 | metricsReplNetworkGetmoresNumTotal.Describe(ch) 510 | metricsReplNetworkGetmoresTotalMilliseconds.Describe(ch) 511 | metricsReplNetworkBytesTotal.Describe(ch) 512 | metricsReplNetworkOpsTotal.Describe(ch) 513 | metricsReplNetworkReadersCreatedTotal.Describe(ch) 514 | metricsReplOplogInsertNumTotal.Describe(ch) 515 | metricsReplOplogInsertTotalMilliseconds.Describe(ch) 516 | metricsReplOplogInsertBytesTotal.Describe(ch) 517 | metricsReplPreloadDocsNumTotal.Describe(ch) 518 | metricsReplPreloadDocsTotalMilliseconds.Describe(ch) 519 | metricsReplPreloadIndexesNumTotal.Describe(ch) 520 | metricsReplPreloadIndexesTotalMilliseconds.Describe(ch) 521 | metricsStorageFreelistSearchTotal.Describe(ch) 522 | metricsTTLDeletedDocumentsTotal.Describe(ch) 523 | metricsTTLPassesTotal.Describe(ch) 524 | } 525 | -------------------------------------------------------------------------------- /collector/mongodb_collector.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/dcu/mongodb_exporter/shared" 7 | "github.com/golang/glog" 8 | "github.com/prometheus/client_golang/prometheus" 9 | "gopkg.in/mgo.v2" 10 | ) 11 | 12 | var ( 13 | // Namespace is the namespace of the metrics 14 | Namespace = "mongodb" 15 | ) 16 | 17 | var ( 18 | upGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 19 | Namespace: Namespace, 20 | Name: "up", 21 | Help: "To show if we can connect to mongodb instance", 22 | }, []string{}) 23 | ) 24 | 25 | // MongodbCollectorOpts is the options of the mongodb collector. 26 | type MongodbCollectorOpts struct { 27 | URI string 28 | TLSCertificateFile string 29 | TLSPrivateKeyFile string 30 | TLSCaFile string 31 | TLSHostnameValidation bool 32 | CollectReplSet bool 33 | CollectOplog bool 34 | TailOplog bool 35 | CollectTopMetrics bool 36 | CollectDatabaseMetrics bool 37 | CollectCollectionMetrics bool 38 | CollectProfileMetrics bool 39 | CollectConnPoolStats bool 40 | UserName string 41 | AuthMechanism string 42 | SocketTimeout time.Duration 43 | } 44 | 45 | func (in MongodbCollectorOpts) toSessionOps() shared.MongoSessionOpts { 46 | return shared.MongoSessionOpts{ 47 | URI: in.URI, 48 | TLSCertificateFile: in.TLSCertificateFile, 49 | TLSPrivateKeyFile: in.TLSPrivateKeyFile, 50 | TLSCaFile: in.TLSCaFile, 51 | TLSHostnameValidation: in.TLSHostnameValidation, 52 | UserName: in.UserName, 53 | AuthMechanism: in.AuthMechanism, 54 | SocketTimeout: in.SocketTimeout, 55 | } 56 | } 57 | 58 | // MongodbCollector is in charge of collecting mongodb's metrics. 59 | type MongodbCollector struct { 60 | Opts MongodbCollectorOpts 61 | } 62 | 63 | // NewMongodbCollector returns a new instance of a MongodbCollector. 64 | func NewMongodbCollector(opts MongodbCollectorOpts) *MongodbCollector { 65 | exporter := &MongodbCollector{ 66 | Opts: opts, 67 | } 68 | 69 | return exporter 70 | } 71 | 72 | // Describe describes all mongodb's metrics. 73 | func (exporter *MongodbCollector) Describe(ch chan<- *prometheus.Desc) { 74 | (&ServerStatus{}).Describe(ch) 75 | (&ReplSetStatus{}).Describe(ch) 76 | (&ReplSetConf{}).Describe(ch) 77 | (&DatabaseStatus{}).Describe(ch) 78 | 79 | if exporter.Opts.CollectTopMetrics { 80 | (&TopStatus{}).Describe(ch) 81 | } 82 | } 83 | 84 | // Collect collects all mongodb's metrics. 85 | func (exporter *MongodbCollector) Collect(ch chan<- prometheus.Metric) { 86 | mongoSess := shared.MongoSession(exporter.Opts.toSessionOps()) 87 | if mongoSess != nil { 88 | upGauge.WithLabelValues().Set(float64(1)) 89 | upGauge.Collect(ch) 90 | upGauge.Reset() 91 | defer mongoSess.Close() 92 | glog.Info("Collecting Server Status") 93 | exporter.collectServerStatus(mongoSess, ch) 94 | if exporter.Opts.CollectReplSet { 95 | glog.Info("Collecting ReplSet Status") 96 | exporter.collectReplSetStatus(mongoSess, ch) 97 | exporter.collectReplSetConf(mongoSess, ch) 98 | } 99 | if exporter.Opts.CollectOplog { 100 | glog.Info("Collecting Oplog Status") 101 | exporter.collectOplogStatus(mongoSess, ch) 102 | } 103 | 104 | if exporter.Opts.TailOplog { 105 | glog.Info("Collecting Oplog Tail Stats") 106 | exporter.collectOplogTailStats(mongoSess, ch) 107 | } 108 | 109 | if exporter.Opts.CollectTopMetrics { 110 | glog.Info("Collecting Top Metrics") 111 | exporter.collectTopStatus(mongoSess, ch) 112 | } 113 | 114 | if exporter.Opts.CollectDatabaseMetrics { 115 | glog.Info("Collecting Database Metrics") 116 | exporter.collectDatabaseStatus(mongoSess, ch) 117 | } 118 | 119 | if exporter.Opts.CollectCollectionMetrics { 120 | glog.Info("Collection Collection Metrics") 121 | exporter.collectCollectionStatus(mongoSess, ch) 122 | } 123 | 124 | if exporter.Opts.CollectProfileMetrics { 125 | glog.Info("Collection Profile Metrics") 126 | exporter.collectProfileStatus(mongoSess, ch) 127 | } 128 | 129 | if exporter.Opts.CollectConnPoolStats { 130 | glog.Info("Collecting Connection Pool Stats") 131 | exporter.collectConnPoolStats(mongoSess, ch) 132 | } 133 | } else { 134 | upGauge.WithLabelValues().Set(float64(0)) 135 | upGauge.Collect(ch) 136 | upGauge.Reset() 137 | } 138 | } 139 | 140 | func (exporter *MongodbCollector) collectServerStatus(session *mgo.Session, ch chan<- prometheus.Metric) *ServerStatus { 141 | serverStatus := GetServerStatus(session) 142 | if serverStatus != nil { 143 | glog.Info("exporting ServerStatus Metrics") 144 | serverStatus.Export(ch) 145 | } 146 | return serverStatus 147 | } 148 | 149 | func (exporter *MongodbCollector) collectReplSetStatus(session *mgo.Session, ch chan<- prometheus.Metric) *ReplSetStatus { 150 | replSetStatus := GetReplSetStatus(session) 151 | 152 | if replSetStatus != nil { 153 | glog.Info("exporting ReplSetStatus Metrics") 154 | replSetStatus.Export(ch) 155 | } 156 | 157 | return replSetStatus 158 | } 159 | 160 | func (exporter *MongodbCollector) collectReplSetConf(session *mgo.Session, ch chan<- prometheus.Metric) *ReplSetConf { 161 | replSetConf := GetReplSetConf(session) 162 | 163 | if replSetConf != nil { 164 | glog.Info("exporting ReplSetConf Metrics") 165 | replSetConf.Export(ch) 166 | } 167 | 168 | return replSetConf 169 | } 170 | 171 | func (exporter *MongodbCollector) collectOplogStatus(session *mgo.Session, ch chan<- prometheus.Metric) *OplogStatus { 172 | oplogStatus := GetOplogStatus(session) 173 | 174 | if oplogStatus != nil { 175 | glog.Info("exporting OplogStatus Metrics") 176 | oplogStatus.Export(ch) 177 | } 178 | 179 | return oplogStatus 180 | } 181 | 182 | func (exporter *MongodbCollector) collectOplogTailStats(session *mgo.Session, ch chan<- prometheus.Metric) *OplogTailStats { 183 | oplogTailStats := GetOplogTailStats(session) 184 | 185 | if oplogTailStats != nil { 186 | glog.Info("exporting oplogTailStats Metrics") 187 | oplogTailStats.Export(ch) 188 | } 189 | 190 | return oplogTailStats 191 | } 192 | 193 | func (exporter *MongodbCollector) collectTopStatus(session *mgo.Session, ch chan<- prometheus.Metric) *TopStatus { 194 | topStatus := GetTopStatus(session) 195 | if topStatus != nil { 196 | glog.Info("exporting Top Metrics") 197 | topStatus.Export(ch) 198 | } 199 | return topStatus 200 | } 201 | 202 | func (exporter *MongodbCollector) collectDatabaseStatus(session *mgo.Session, ch chan<- prometheus.Metric) { 203 | all, err := session.DatabaseNames() 204 | if err != nil { 205 | glog.Error("Failed to get database names") 206 | return 207 | } 208 | for _, db := range all { 209 | if db == "admin" || db == "test" { 210 | continue 211 | } 212 | dbStatus := GetDatabaseStatus(session, db) 213 | if dbStatus != nil { 214 | glog.Infof("exporting Database Metrics for db=%q", dbStatus.Name) 215 | dbStatus.Export(ch) 216 | } 217 | } 218 | } 219 | 220 | func (exporter *MongodbCollector) collectCollectionStatus(session *mgo.Session, ch chan<- prometheus.Metric) { 221 | database_names, err := session.DatabaseNames() 222 | if err != nil { 223 | glog.Error("failed to get database names") 224 | return 225 | } 226 | for _, db := range database_names { 227 | if db == "admin" || db == "test" { 228 | continue 229 | } 230 | CollectCollectionStatus(session, db, ch) 231 | } 232 | } 233 | 234 | func (exporter *MongodbCollector) collectProfileStatus(session *mgo.Session, ch chan<- prometheus.Metric) { 235 | all, err := session.DatabaseNames() 236 | if err != nil { 237 | glog.Error("failed to get database names: %s", err) 238 | return 239 | } 240 | for _, db := range all { 241 | if db == "admin" || db == "test" { 242 | continue 243 | } 244 | CollectProfileStatus(session, db, ch) 245 | } 246 | } 247 | 248 | func (exporter *MongodbCollector) collectConnPoolStats(session *mgo.Session, ch chan<- prometheus.Metric) { 249 | connPoolStats := GetConnPoolStats(session) 250 | 251 | if connPoolStats != nil { 252 | glog.Info("exporting ConnPoolStats Metrics") 253 | connPoolStats.Export(ch) 254 | } 255 | } 256 | -------------------------------------------------------------------------------- /collector/mongodb_collector_test.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/dcu/mongodb_exporter/shared" 7 | "github.com/prometheus/client_golang/prometheus" 8 | ) 9 | 10 | func Test_CollectServerStatus(t *testing.T) { 11 | shared.ParseEnabledGroups("assers,durability,backgrond_flushing,connections,extra_info,global_lock,index_counters,network,op_counters,memory,locks,metrics,cursors") 12 | collector := NewMongodbCollector(MongodbCollectorOpts{URI: "localhost"}) 13 | go collector.Collect(nil) 14 | } 15 | 16 | func Test_DescribeCollector(t *testing.T) { 17 | collector := NewMongodbCollector(MongodbCollectorOpts{URI: "localhost"}) 18 | 19 | ch := make(chan *prometheus.Desc) 20 | go collector.Describe(ch) 21 | } 22 | 23 | func Test_CollectCollector(t *testing.T) { 24 | collector := NewMongodbCollector(MongodbCollectorOpts{URI: "localhost"}) 25 | 26 | ch := make(chan prometheus.Metric) 27 | go collector.Collect(ch) 28 | } 29 | -------------------------------------------------------------------------------- /collector/network.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | networkBytesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 9 | Namespace: Namespace, 10 | Name: "network_bytes_total", 11 | Help: "The network data structure contains data regarding MongoDB's network use", 12 | }, []string{"state"}) 13 | ) 14 | var ( 15 | networkMetricsNumRequestsTotal = prometheus.NewCounter(prometheus.CounterOpts{ 16 | Namespace: Namespace, 17 | Subsystem: "network_metrics", 18 | Name: "num_requests_total", 19 | Help: "The numRequests field is a counter of the total number of distinct requests that the server has received. Use this value to provide context for the bytesIn and bytesOut values to ensure that MongoDB's network utilization is consistent with expectations and application use", 20 | }) 21 | ) 22 | 23 | //NetworkStats network stats 24 | type NetworkStats struct { 25 | BytesIn float64 `bson:"bytesIn"` 26 | BytesOut float64 `bson:"bytesOut"` 27 | NumRequests float64 `bson:"numRequests"` 28 | } 29 | 30 | // Export exports the data to prometheus 31 | func (networkStats *NetworkStats) Export(ch chan<- prometheus.Metric) { 32 | networkBytesTotal.WithLabelValues("in_bytes").Set(networkStats.BytesIn) 33 | networkBytesTotal.WithLabelValues("out_bytes").Set(networkStats.BytesOut) 34 | 35 | networkMetricsNumRequestsTotal.Set(networkStats.NumRequests) 36 | 37 | networkMetricsNumRequestsTotal.Collect(ch) 38 | networkBytesTotal.Collect(ch) 39 | } 40 | 41 | // Describe describes the metrics for prometheus 42 | func (networkStats *NetworkStats) Describe(ch chan<- *prometheus.Desc) { 43 | networkMetricsNumRequestsTotal.Describe(ch) 44 | networkBytesTotal.Describe(ch) 45 | } 46 | -------------------------------------------------------------------------------- /collector/op_counters.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | opCountersTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 9 | Namespace: Namespace, 10 | Name: "op_counters_total", 11 | Help: "The opcounters data structure provides an overview of database operations by type and makes it possible to analyze the load on the database in more granular manner. These numbers will grow over time and in response to database use. Analyze these values over time to track database utilization", 12 | }, []string{"type"}) 13 | ) 14 | var ( 15 | opCountersReplTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 16 | Namespace: Namespace, 17 | Name: "op_counters_repl_total", 18 | Help: "The opcountersRepl data structure, similar to the opcounters data structure, provides an overview of database replication operations by type and makes it possible to analyze the load on the replica in more granular manner. These values only appear when the current host has replication enabled", 19 | }, []string{"type"}) 20 | ) 21 | 22 | // OpcountersStats opcounters stats 23 | type OpcountersStats struct { 24 | Insert float64 `bson:"insert"` 25 | Query float64 `bson:"query"` 26 | Update float64 `bson:"update"` 27 | Delete float64 `bson:"delete"` 28 | GetMore float64 `bson:"getmore"` 29 | Command float64 `bson:"command"` 30 | } 31 | 32 | // Export exports the data to prometheus. 33 | func (opCounters *OpcountersStats) Export(ch chan<- prometheus.Metric) { 34 | opCountersTotal.WithLabelValues("insert").Set(opCounters.Insert) 35 | opCountersTotal.WithLabelValues("query").Set(opCounters.Query) 36 | opCountersTotal.WithLabelValues("update").Set(opCounters.Update) 37 | opCountersTotal.WithLabelValues("delete").Set(opCounters.Delete) 38 | opCountersTotal.WithLabelValues("getmore").Set(opCounters.GetMore) 39 | opCountersTotal.WithLabelValues("command").Set(opCounters.Command) 40 | 41 | opCountersTotal.Collect(ch) 42 | } 43 | 44 | // Describe describes the metrics for prometheus 45 | func (opCounters *OpcountersStats) Describe(ch chan<- *prometheus.Desc) { 46 | opCountersTotal.Describe(ch) 47 | } 48 | 49 | // OpcountersReplStats opcounters stats 50 | type OpcountersReplStats struct { 51 | Insert float64 `bson:"insert"` 52 | Query float64 `bson:"query"` 53 | Update float64 `bson:"update"` 54 | Delete float64 `bson:"delete"` 55 | GetMore float64 `bson:"getmore"` 56 | Command float64 `bson:"command"` 57 | } 58 | 59 | // Export exports the data to prometheus. 60 | func (opCounters *OpcountersReplStats) Export(ch chan<- prometheus.Metric) { 61 | opCountersReplTotal.WithLabelValues("insert").Set(opCounters.Insert) 62 | opCountersReplTotal.WithLabelValues("query").Set(opCounters.Query) 63 | opCountersReplTotal.WithLabelValues("update").Set(opCounters.Update) 64 | opCountersReplTotal.WithLabelValues("delete").Set(opCounters.Delete) 65 | opCountersReplTotal.WithLabelValues("getmore").Set(opCounters.GetMore) 66 | opCountersReplTotal.WithLabelValues("command").Set(opCounters.Command) 67 | 68 | opCountersReplTotal.Collect(ch) 69 | } 70 | 71 | // Describe describes the metrics for prometheus 72 | func (opCounters *OpcountersReplStats) Describe(ch chan<- *prometheus.Desc) { 73 | opCountersReplTotal.Describe(ch) 74 | } 75 | -------------------------------------------------------------------------------- /collector/oplog_status.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/golang/glog" 7 | "github.com/prometheus/client_golang/prometheus" 8 | "gopkg.in/mgo.v2" 9 | "gopkg.in/mgo.v2/bson" 10 | ) 11 | 12 | var ( 13 | oplogStatusCount = prometheus.NewGauge(prometheus.GaugeOpts{ 14 | Namespace: Namespace, 15 | Subsystem: "replset_oplog", 16 | Name: "items_total", 17 | Help: "The total number of changes in the oplog", 18 | }) 19 | oplogStatusHeadTimestamp = prometheus.NewGauge(prometheus.GaugeOpts{ 20 | Namespace: Namespace, 21 | Subsystem: "replset_oplog", 22 | Name: "head_timestamp", 23 | Help: "The timestamp of the newest change in the oplog", 24 | }) 25 | oplogStatusTailTimestamp = prometheus.NewGauge(prometheus.GaugeOpts{ 26 | Namespace: Namespace, 27 | Subsystem: "replset_oplog", 28 | Name: "tail_timestamp", 29 | Help: "The timestamp of the oldest change in the oplog", 30 | }) 31 | oplogStatusSizeBytes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 32 | Namespace: Namespace, 33 | Subsystem: "replset_oplog", 34 | Name: "size_bytes", 35 | Help: "Size of oplog in bytes", 36 | }, []string{"type"}) 37 | ) 38 | 39 | // OplogCollectionStats represents metrics about an oplog collection 40 | type OplogCollectionStats struct { 41 | Count float64 `bson:"count"` 42 | Size float64 `bson:"size"` 43 | StorageSize float64 `bson:"storageSize"` 44 | } 45 | 46 | // OplogStatus represents oplog metrics 47 | type OplogStatus struct { 48 | TailTimestamp float64 49 | HeadTimestamp float64 50 | CollectionStats *OplogCollectionStats 51 | } 52 | 53 | // BsonMongoTimestampToUnix converts a mongo timestamp to UNIX time 54 | // there's gotta be a better way to do this, but it works for now :/ 55 | func BsonMongoTimestampToUnix(timestamp bson.MongoTimestamp) float64 { 56 | return float64(timestamp >> 32) 57 | } 58 | 59 | // GetOplogTimestamp fetches the latest oplog timestamp 60 | func GetOplogTimestamp(session *mgo.Session, returnTail bool) (float64, error) { 61 | sortBy := "$natural" 62 | if returnTail { 63 | sortBy = "-$natural" 64 | } 65 | 66 | var ( 67 | err error 68 | tries int 69 | result struct { 70 | Timestamp bson.MongoTimestamp `bson:"ts"` 71 | } 72 | ) 73 | maxTries := 2 74 | for tries < maxTries { 75 | err = session.DB("local").C("oplog.rs").Find(nil).Sort(sortBy).Limit(1).One(&result) 76 | if err != nil { 77 | tries++ 78 | time.Sleep(500 * time.Millisecond) 79 | } else { 80 | return BsonMongoTimestampToUnix(result.Timestamp), err 81 | } 82 | } 83 | 84 | return 0, err 85 | } 86 | 87 | // GetOplogCollectionStats fetches oplog collection stats 88 | func GetOplogCollectionStats(session *mgo.Session) (*OplogCollectionStats, error) { 89 | results := &OplogCollectionStats{} 90 | err := session.DB("local").Run(bson.M{"collStats": "oplog.rs"}, &results) 91 | return results, err 92 | } 93 | 94 | // Export exports metrics to Prometheus 95 | func (status *OplogStatus) Export(ch chan<- prometheus.Metric) { 96 | oplogStatusSizeBytes.WithLabelValues("current").Set(0) 97 | oplogStatusSizeBytes.WithLabelValues("storage").Set(0) 98 | if status.CollectionStats != nil { 99 | oplogStatusCount.Set(status.CollectionStats.Count) 100 | oplogStatusSizeBytes.WithLabelValues("current").Set(status.CollectionStats.Size) 101 | oplogStatusSizeBytes.WithLabelValues("storage").Set(status.CollectionStats.StorageSize) 102 | } 103 | if status.HeadTimestamp != 0 && status.TailTimestamp != 0 { 104 | oplogStatusHeadTimestamp.Set(status.HeadTimestamp) 105 | oplogStatusTailTimestamp.Set(status.TailTimestamp) 106 | } 107 | 108 | oplogStatusCount.Collect(ch) 109 | oplogStatusHeadTimestamp.Collect(ch) 110 | oplogStatusTailTimestamp.Collect(ch) 111 | oplogStatusSizeBytes.Collect(ch) 112 | } 113 | 114 | // Describe describes metrics collected 115 | func (status *OplogStatus) Describe(ch chan<- *prometheus.Desc) { 116 | oplogStatusCount.Describe(ch) 117 | oplogStatusHeadTimestamp.Describe(ch) 118 | oplogStatusTailTimestamp.Describe(ch) 119 | oplogStatusSizeBytes.Describe(ch) 120 | } 121 | 122 | // GetOplogStatus fetches oplog collection stats 123 | func GetOplogStatus(session *mgo.Session) *OplogStatus { 124 | oplogStatus := &OplogStatus{} 125 | collectionStats, err := GetOplogCollectionStats(session) 126 | if err != nil { 127 | glog.Error("Failed to get local.oplog_rs collection stats.") 128 | return nil 129 | } 130 | 131 | headTimestamp, err := GetOplogTimestamp(session, false) 132 | tailTimestamp, err := GetOplogTimestamp(session, true) 133 | if err != nil { 134 | glog.Error("Failed to get oplog head or tail timestamps.") 135 | return nil 136 | } 137 | 138 | oplogStatus.CollectionStats = collectionStats 139 | oplogStatus.HeadTimestamp = headTimestamp 140 | oplogStatus.TailTimestamp = tailTimestamp 141 | 142 | return oplogStatus 143 | } 144 | -------------------------------------------------------------------------------- /collector/oplog_tail.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/golang/glog" 7 | "github.com/prometheus/client_golang/prometheus" 8 | "github.com/rwynn/gtm" 9 | "gopkg.in/mgo.v2" 10 | "gopkg.in/mgo.v2/bson" 11 | ) 12 | 13 | var ( 14 | oplogEntryCount = prometheus.NewCounterVec(prometheus.CounterOpts{ 15 | Namespace: Namespace, 16 | Subsystem: "oplogtail", 17 | Name: "entry_count", 18 | Help: "The total number of entries observed in the oplog by ns/op", 19 | }, []string{"ns", "op"}) 20 | oplogTailError = prometheus.NewCounter(prometheus.CounterOpts{ 21 | Namespace: Namespace, 22 | Subsystem: "oplogtail", 23 | Name: "tail_error", 24 | Help: "The total number of errors while tailing the oplog", 25 | }) 26 | ) 27 | 28 | var tailer *OplogTailStats 29 | 30 | type OplogTailStats struct{} 31 | 32 | func (o *OplogTailStats) Start(session *mgo.Session) { 33 | // Override the socket timeout for oplog tailing 34 | // Here we want a long-running socket, otherwise we cause lots of locks 35 | // which seriously impede oplog performance 36 | timeout := time.Second * 120 37 | session.SetSocketTimeout(timeout) 38 | // Set cursor timeout 39 | var tmp map[string]interface{} 40 | session.Run(bson.D{{"setParameter", 1}, {"cursorTimeoutMillis", timeout / time.Millisecond}}, &tmp) 41 | 42 | defer session.Close() 43 | session.SetMode(mgo.Monotonic, true) 44 | 45 | ctx := gtm.Start(session, nil) 46 | defer ctx.Stop() 47 | 48 | // ctx.OpC is a channel to read ops from 49 | // ctx.ErrC is a channel to read errors from 50 | // ctx.Stop() stops all go routines started by gtm.Start 51 | for { 52 | // loop forever receiving events 53 | select { 54 | case err := <-ctx.ErrC: 55 | oplogTailError.Add(1) 56 | glog.Errorf("Error getting entry from oplog: %v", err) 57 | case op := <-ctx.OpC: 58 | oplogEntryCount.WithLabelValues(op.Namespace, op.Operation).Add(1) 59 | } 60 | } 61 | } 62 | 63 | // Export exports metrics to Prometheus 64 | func (status *OplogTailStats) Export(ch chan<- prometheus.Metric) { 65 | oplogEntryCount.Collect(ch) 66 | oplogTailError.Collect(ch) 67 | } 68 | 69 | // Describe describes metrics collected 70 | func (status *OplogTailStats) Describe(ch chan<- *prometheus.Desc) { 71 | oplogEntryCount.Describe(ch) 72 | oplogTailError.Describe(ch) 73 | } 74 | 75 | func GetOplogTailStats(session *mgo.Session) *OplogTailStats { 76 | if tailer == nil { 77 | tailer = &OplogTailStats{} 78 | // Start a tailer with a copy of the session (to avoid messing with the other metrics in the session) 79 | go tailer.Start(session.Copy()) 80 | } 81 | 82 | return tailer 83 | } 84 | -------------------------------------------------------------------------------- /collector/profile_status.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/golang/glog" 5 | "github.com/prometheus/client_golang/prometheus" 6 | mgo "gopkg.in/mgo.v2" 7 | "gopkg.in/mgo.v2/bson" 8 | "time" 9 | ) 10 | 11 | var ( 12 | profileCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 13 | Namespace: Namespace, 14 | Subsystem: "profile", 15 | Name: "slow_query_30s_count", 16 | Help: "The number of slow queries in this database during last 30 seconds", 17 | }, []string{"database"}) 18 | ) 19 | 20 | type ProfileStatus struct { 21 | Name string `bson:"database"` 22 | Count int `bson:"count"` 23 | } 24 | 25 | func (profileStatus *ProfileStatus) Export(ch chan<- prometheus.Metric) { 26 | profileCount.WithLabelValues(profileStatus.Name).Set(float64(profileStatus.Count)) 27 | profileCount.Collect(ch) 28 | profileCount.Reset() 29 | } 30 | 31 | func (profileStatus *ProfileStatus) Describe(ch chan<- *prometheus.Desc) { 32 | profileCount.Describe(ch) 33 | } 34 | 35 | func CollectProfileStatus(session *mgo.Session, db string, ch chan<- prometheus.Metric) { 36 | ts := time.Now().Add(-time.Duration(time.Second * 30)) 37 | count, err := session.DB(db).C("system.profile").Find(bson.M{"ts": bson.M{"$gt": ts}}).Count() 38 | if err != nil { 39 | glog.Error(err) 40 | return 41 | } 42 | profileStatus := ProfileStatus{db, count} 43 | profileStatus.Export(ch) 44 | } 45 | -------------------------------------------------------------------------------- /collector/replset_conf.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "gopkg.in/mgo.v2" 5 | "gopkg.in/mgo.v2/bson" 6 | 7 | "github.com/golang/glog" 8 | "github.com/prometheus/client_golang/prometheus" 9 | ) 10 | 11 | var ( 12 | memberHidden = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 13 | Namespace: Namespace, 14 | Subsystem: subsystem, 15 | Name: "member_hidden", 16 | Help: "This field conveys if the member is hidden (1) or not-hidden (0).", 17 | }, []string{"id", "host"}) 18 | memberArbiter = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 19 | Namespace: Namespace, 20 | Subsystem: subsystem, 21 | Name: "member_arbiter", 22 | Help: "This field conveys if the member is an arbiter (1) or not (0).", 23 | }, []string{"id", "host"}) 24 | memberBuildIndexes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 25 | Namespace: Namespace, 26 | Subsystem: subsystem, 27 | Name: "member_build_indexes", 28 | Help: "This field conveys if the member is builds indexes (1) or not (0).", 29 | }, []string{"id", "host"}) 30 | memberPriority = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 31 | Namespace: Namespace, 32 | Subsystem: subsystem, 33 | Name: "member_priority", 34 | Help: "This field conveys the priority of a given member", 35 | }, []string{"id", "host"}) 36 | memberVotes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 37 | Namespace: Namespace, 38 | Subsystem: subsystem, 39 | Name: "member_votes", 40 | Help: "This field conveys the number of votes of a given member", 41 | }, []string{"id", "host"}) 42 | ) 43 | 44 | // Although the docs say that it returns a map with id etc. it *actually* returns 45 | // that wrapped in a map 46 | type OuterReplSetConf struct { 47 | Config ReplSetConf `bson:"config"` 48 | } 49 | 50 | // ReplSetConf keeps the data returned by the GetReplSetConf method 51 | type ReplSetConf struct { 52 | Id string `bson:"_id"` 53 | Version int `bson:"version"` 54 | Members []MemberConf `bson:"members"` 55 | } 56 | 57 | /* 58 | Example: 59 | "settings" : { 60 | "chainingAllowed" : true, 61 | "heartbeatIntervalMillis" : 2000, 62 | "heartbeatTimeoutSecs" : 10, 63 | "electionTimeoutMillis" : 5000, 64 | "getLastErrorModes" : { 65 | 66 | }, 67 | "getLastErrorDefaults" : { 68 | "w" : 1, 69 | "wtimeout" : 0 70 | } 71 | } 72 | */ 73 | type ReplSetConfSettings struct { 74 | } 75 | 76 | // Member represents an array element of ReplSetConf.Members 77 | type MemberConf struct { 78 | Id int32 `bson:"_id"` 79 | Host string `bson:"host"` 80 | ArbiterOnly bool `bson:"arbiterOnly"` 81 | BuildIndexes bool `bson:"buildIndexes"` 82 | Hidden bool `bson:"hidden"` 83 | Priority int32 `bson:"priority"` 84 | 85 | Tags map[string]string `bson:"tags"` 86 | SlaveDelay float64 `bson:"saveDelay"` 87 | Votes int32 `bson:"votes"` 88 | } 89 | 90 | // Export exports the replSetGetStatus stati to be consumed by prometheus 91 | func (replConf *ReplSetConf) Export(ch chan<- prometheus.Metric) { 92 | for _, member := range replConf.Members { 93 | ls := prometheus.Labels{ 94 | "id": replConf.Id, 95 | "host": member.Host, 96 | } 97 | if member.Hidden { 98 | memberHidden.With(ls).Set(1) 99 | } else { 100 | memberHidden.With(ls).Set(0) 101 | } 102 | 103 | if member.ArbiterOnly { 104 | memberArbiter.With(ls).Set(1) 105 | } else { 106 | memberArbiter.With(ls).Set(0) 107 | } 108 | 109 | if member.BuildIndexes { 110 | memberBuildIndexes.With(ls).Set(1) 111 | } else { 112 | memberBuildIndexes.With(ls).Set(0) 113 | } 114 | 115 | memberPriority.With(ls).Set(float64(member.Priority)) 116 | memberVotes.With(ls).Set(float64(member.Votes)) 117 | } 118 | // collect metrics 119 | memberHidden.Collect(ch) 120 | memberArbiter.Collect(ch) 121 | memberBuildIndexes.Collect(ch) 122 | memberPriority.Collect(ch) 123 | memberVotes.Collect(ch) 124 | } 125 | 126 | // Describe describes the replSetGetStatus metrics for prometheus 127 | func (replConf *ReplSetConf) Describe(ch chan<- *prometheus.Desc) { 128 | memberHidden.Describe(ch) 129 | memberArbiter.Describe(ch) 130 | memberBuildIndexes.Describe(ch) 131 | memberPriority.Describe(ch) 132 | memberVotes.Describe(ch) 133 | } 134 | 135 | // GetReplSetConf returns the replica status info 136 | func GetReplSetConf(session *mgo.Session) *ReplSetConf { 137 | result := &OuterReplSetConf{} 138 | err := session.DB("admin").Run(bson.D{{"replSetGetConfig", 1}}, result) 139 | if err != nil { 140 | glog.Error("Failed to get replSet config.") 141 | return nil 142 | } 143 | return &result.Config 144 | } 145 | -------------------------------------------------------------------------------- /collector/replset_status.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "time" 5 | 6 | "gopkg.in/mgo.v2" 7 | "gopkg.in/mgo.v2/bson" 8 | 9 | "github.com/golang/glog" 10 | "github.com/prometheus/client_golang/prometheus" 11 | ) 12 | 13 | var ( 14 | subsystem = "replset" 15 | myState = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 16 | Namespace: Namespace, 17 | Subsystem: subsystem, 18 | Name: "my_state", 19 | Help: "An integer between 0 and 10 that represents the replica state of the current member", 20 | }, []string{"set"}) 21 | 22 | myReplicaLag = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 23 | Namespace: Namespace, 24 | Subsystem: subsystem, 25 | Name: "my_replica_lag", 26 | Help: "An integer shows the replication lag in seconds, -1 if no master found", 27 | }, []string{"set"}) 28 | 29 | masterCount = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 30 | Namespace: Namespace, 31 | Subsystem: subsystem, 32 | Name: "master_count", 33 | Help: "The number of master, any value except 1 means something wrong", 34 | }, []string{}) 35 | 36 | term = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 37 | Namespace: Namespace, 38 | Subsystem: subsystem, 39 | Name: "term", 40 | Help: "The election count for the replica set, as known to this replica set member", 41 | }, []string{"set"}) 42 | numberOfMembers = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 43 | Namespace: Namespace, 44 | Subsystem: subsystem, 45 | Name: "number_of_members", 46 | Help: "The number of replica set mebers", 47 | }, []string{"set"}) 48 | heartbeatIntervalMillis = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 49 | Namespace: Namespace, 50 | Subsystem: subsystem, 51 | Name: "heartbeat_interval_millis", 52 | Help: "The frequency in milliseconds of the heartbeats", 53 | }, []string{"set"}) 54 | memberHealth = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 55 | Namespace: Namespace, 56 | Subsystem: subsystem, 57 | Name: "member_health", 58 | Help: "This field conveys if the member is up (1) or down (0).", 59 | }, []string{"set", "name"}) 60 | memberState = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 61 | Namespace: Namespace, 62 | Subsystem: subsystem, 63 | Name: "member_state", 64 | Help: "The value of state is an integer between 0 and 10 that represents the replica state of the member.", 65 | }, []string{"set", "name"}) 66 | memberUptime = prometheus.NewCounterVec(prometheus.CounterOpts{ 67 | Namespace: Namespace, 68 | Subsystem: subsystem, 69 | Name: "member_uptime", 70 | Help: "The uptime field holds a value that reflects the number of seconds that this member has been online.", 71 | }, []string{"set", "name"}) 72 | memberOptimeDate = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 73 | Namespace: Namespace, 74 | Subsystem: subsystem, 75 | Name: "member_optime_date", 76 | Help: "The last entry from the oplog that this member applied.", 77 | }, []string{"set", "name"}) 78 | memberElectionDate = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 79 | Namespace: Namespace, 80 | Subsystem: subsystem, 81 | Name: "member_election_date", 82 | Help: "The timestamp the node was elected as replica leader", 83 | }, []string{"set", "name"}) 84 | memberLastHeartbeat = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 85 | Namespace: Namespace, 86 | Subsystem: subsystem, 87 | Name: "member_last_heartbeat", 88 | Help: "The lastHeartbeat value provides an ISODate formatted date and time of the transmission time of last heartbeat received from this member", 89 | }, []string{"set", "name"}) 90 | memberLastHeartbeatRecv = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 91 | Namespace: Namespace, 92 | Subsystem: subsystem, 93 | Name: "member_last_heartbeat_recv", 94 | Help: "The lastHeartbeatRecv value provides an ISODate formatted date and time that the last heartbeat was received from this member", 95 | }, []string{"set", "name"}) 96 | memberPingMs = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 97 | Namespace: Namespace, 98 | Subsystem: subsystem, 99 | Name: "member_ping_ms", 100 | Help: "The pingMs represents the number of milliseconds (ms) that a round-trip packet takes to travel between the remote member and the local instance.", 101 | }, []string{"set", "name"}) 102 | memberConfigVersion = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 103 | Namespace: Namespace, 104 | Subsystem: subsystem, 105 | Name: "member_config_version", 106 | Help: "The configVersion value is the replica set configuration version.", 107 | }, []string{"set", "name"}) 108 | memberOptime = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 109 | Namespace: Namespace, 110 | Subsystem: subsystem, 111 | Name: "member_optime", 112 | Help: "Information regarding the last operation from the operation log that this member has applied.", 113 | }, []string{"set", "name"}) 114 | ) 115 | 116 | // ReplSetStatus keeps the data returned by the GetReplSetStatus method 117 | type ReplSetStatus struct { 118 | Set string `bson:"set"` 119 | Date time.Time `bson:"date"` 120 | MyState int32 `bson:"myState"` 121 | Term *int32 `bson:"term,omitempty"` 122 | HeartbeatIntervalMillis *float64 `bson:"heartbeatIntervalMillis,omitempty"` 123 | Members []Member `bson:"members"` 124 | } 125 | 126 | // Member represents an array element of ReplSetStatus.Members 127 | type Member struct { 128 | Name string `bson:"name"` 129 | Self *bool `bson:"self,omitempty"` 130 | Health *int32 `bson:"health,omitempty"` 131 | State int32 `bson:"state"` 132 | StateStr string `bson:"stateStr"` 133 | Uptime float64 `bson:"uptime"` 134 | Optime interface{} `bson:"optime"` 135 | OptimeDate time.Time `bson:"optimeDate"` 136 | ElectionTime *time.Time `bson:"electionTime,omitempty"` 137 | ElectionDate *time.Time `bson:"electionDate,omitempty"` 138 | LastHeartbeat *time.Time `bson:"lastHeartbeat,omitempty"` 139 | LastHeartbeatRecv *time.Time `bson:"lastHeartbeatRecv,omitempty"` 140 | LastHeartbeatMessage *string `bson:"lastHeartbeatMessage,omitempty"` 141 | PingMs *float64 `bson:"pingMs,omitempty"` 142 | SyncingTo *string `bson:"syncingTo,omitempty"` 143 | ConfigVersion *int32 `bson:"configVersion,omitempty"` 144 | } 145 | 146 | // Export exports the replSetGetStatus stati to be consumed by prometheus 147 | func (replStatus *ReplSetStatus) Export(ch chan<- prometheus.Metric) { 148 | myState.Reset() 149 | myReplicaLag.Reset() 150 | term.Reset() 151 | numberOfMembers.Reset() 152 | heartbeatIntervalMillis.Reset() 153 | memberState.Reset() 154 | memberHealth.Reset() 155 | memberUptime.Reset() 156 | memberOptimeDate.Reset() 157 | memberElectionDate.Reset() 158 | memberLastHeartbeat.Reset() 159 | memberLastHeartbeatRecv.Reset() 160 | memberPingMs.Reset() 161 | memberConfigVersion.Reset() 162 | masterCount.Reset() 163 | 164 | myState.WithLabelValues(replStatus.Set).Set(float64(replStatus.MyState)) 165 | 166 | // new in version 3.2 167 | if replStatus.Term != nil { 168 | term.WithLabelValues(replStatus.Set).Set(float64(*replStatus.Term)) 169 | } 170 | numberOfMembers.WithLabelValues(replStatus.Set).Set(float64(len(replStatus.Members))) 171 | 172 | // new in version 3.2 173 | if replStatus.HeartbeatIntervalMillis != nil { 174 | heartbeatIntervalMillis.WithLabelValues(replStatus.Set).Set(*replStatus.HeartbeatIntervalMillis) 175 | } 176 | 177 | var ( 178 | primaryOpTime time.Time 179 | myOpTime time.Time 180 | ) 181 | mCount := 0 182 | for _, member := range replStatus.Members { 183 | ls := prometheus.Labels{ 184 | "set": replStatus.Set, 185 | "name": member.Name, 186 | } 187 | if member.State == 1 { 188 | primaryOpTime = member.OptimeDate 189 | } 190 | if member.Self != nil && *member.Self { 191 | myOpTime = member.OptimeDate 192 | } 193 | memberState.With(ls).Set(float64(member.State)) 194 | if member.State == 1 { 195 | mCount += 1 196 | } 197 | 198 | // ReplSetStatus.Member.Health is not available on the node you're connected to 199 | if member.Health != nil { 200 | memberHealth.With(ls).Set(float64(*member.Health)) 201 | } 202 | 203 | memberUptime.With(ls).Set(member.Uptime) 204 | 205 | memberOptimeDate.With(ls).Set(float64(member.OptimeDate.Unix())) 206 | 207 | // ReplSetGetStatus.Member.ElectionTime is only available on the PRIMARY 208 | if member.ElectionDate != nil { 209 | memberElectionDate.With(ls).Set(float64((*member.ElectionDate).Unix())) 210 | } 211 | if member.LastHeartbeat != nil { 212 | memberLastHeartbeat.With(ls).Set(float64((*member.LastHeartbeat).Unix())) 213 | } 214 | if member.LastHeartbeatRecv != nil { 215 | memberLastHeartbeatRecv.With(ls).Set(float64((*member.LastHeartbeatRecv).Unix())) 216 | } 217 | if member.PingMs != nil { 218 | memberPingMs.With(ls).Set(*member.PingMs) 219 | } 220 | if member.ConfigVersion != nil { 221 | memberConfigVersion.With(ls).Set(float64(*member.ConfigVersion)) 222 | } 223 | } 224 | if !primaryOpTime.IsZero() && !myOpTime.IsZero() { 225 | myReplicaLag.WithLabelValues(replStatus.Set).Set(float64(primaryOpTime.Unix() - myOpTime.Unix())) 226 | } else { 227 | myReplicaLag.WithLabelValues(replStatus.Set).Set(-1.0) 228 | } 229 | masterCount.WithLabelValues().Set(float64(mCount)) 230 | // collect metrics 231 | myState.Collect(ch) 232 | myReplicaLag.Collect(ch) 233 | term.Collect(ch) 234 | numberOfMembers.Collect(ch) 235 | heartbeatIntervalMillis.Collect(ch) 236 | memberState.Collect(ch) 237 | masterCount.Collect(ch) 238 | memberHealth.Collect(ch) 239 | memberUptime.Collect(ch) 240 | memberOptimeDate.Collect(ch) 241 | memberElectionDate.Collect(ch) 242 | memberLastHeartbeat.Collect(ch) 243 | memberLastHeartbeatRecv.Collect(ch) 244 | memberPingMs.Collect(ch) 245 | memberConfigVersion.Collect(ch) 246 | } 247 | 248 | // Describe describes the replSetGetStatus metrics for prometheus 249 | func (replStatus *ReplSetStatus) Describe(ch chan<- *prometheus.Desc) { 250 | myState.Describe(ch) 251 | myReplicaLag.Describe(ch) 252 | term.Describe(ch) 253 | numberOfMembers.Describe(ch) 254 | heartbeatIntervalMillis.Describe(ch) 255 | memberState.Describe(ch) 256 | memberHealth.Describe(ch) 257 | memberUptime.Describe(ch) 258 | memberOptimeDate.Describe(ch) 259 | memberElectionDate.Describe(ch) 260 | memberLastHeartbeatRecv.Describe(ch) 261 | memberPingMs.Describe(ch) 262 | memberConfigVersion.Describe(ch) 263 | } 264 | 265 | // GetReplSetStatus returns the replica status info 266 | func GetReplSetStatus(session *mgo.Session) *ReplSetStatus { 267 | result := &ReplSetStatus{} 268 | err := session.DB("admin").Run(bson.D{{"replSetGetStatus", 1}}, result) 269 | if err != nil { 270 | glog.Error("Failed to get replSet status.") 271 | return nil 272 | } 273 | return result 274 | } 275 | -------------------------------------------------------------------------------- /collector/server_status.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "time" 5 | 6 | "github.com/golang/glog" 7 | "github.com/prometheus/client_golang/prometheus" 8 | "gopkg.in/mgo.v2" 9 | "gopkg.in/mgo.v2/bson" 10 | ) 11 | 12 | var ( 13 | instanceUptimeSeconds = prometheus.NewCounter(prometheus.CounterOpts{ 14 | Namespace: Namespace, 15 | Subsystem: "instance", 16 | Name: "uptime_seconds", 17 | Help: "The value of the uptime field corresponds to the number of seconds that the mongos or mongod process has been active.", 18 | }) 19 | instanceUptimeEstimateSeconds = prometheus.NewCounter(prometheus.CounterOpts{ 20 | Namespace: Namespace, 21 | Subsystem: "instance", 22 | Name: "uptime_estimate_seconds", 23 | Help: "uptimeEstimate provides the uptime as calculated from MongoDB's internal course-grained time keeping system.", 24 | }) 25 | instanceLocalTime = prometheus.NewCounter(prometheus.CounterOpts{ 26 | Namespace: Namespace, 27 | Subsystem: "instance", 28 | Name: "local_time", 29 | Help: "The localTime value is the current time, according to the server, in UTC specified in an ISODate format.", 30 | }) 31 | ) 32 | 33 | // ServerStatus keeps the data returned by the serverStatus() method. 34 | type ServerStatus struct { 35 | Uptime float64 `bson:"uptime"` 36 | UptimeEstimate float64 `bson:"uptimeEstimate"` 37 | LocalTime time.Time `bson:"localTime"` 38 | 39 | Asserts *AssertsStats `bson:"asserts"` 40 | 41 | Dur *DurStats `bson:"dur"` 42 | 43 | BackgroundFlushing *FlushStats `bson:"backgroundFlushing"` 44 | 45 | Connections *ConnectionStats `bson:"connections"` 46 | 47 | ExtraInfo *ExtraInfo `bson:"extra_info"` 48 | 49 | GlobalLock *GlobalLockStats `bson:"globalLock"` 50 | 51 | IndexCounter *IndexCounterStats `bson:"indexCounters"` 52 | 53 | Locks LockStatsMap `bson:"locks,omitempty"` 54 | 55 | Network *NetworkStats `bson:"network"` 56 | 57 | Opcounters *OpcountersStats `bson:"opcounters"` 58 | OpcountersRepl *OpcountersReplStats `bson:"opcountersRepl"` 59 | TCMallocStats *TCMallocStats `bson:"tcmalloc"` 60 | Mem *MemStats `bson:"mem"` 61 | Metrics *MetricsStats `bson:"metrics"` 62 | 63 | Cursors *Cursors `bson:"cursors"` 64 | 65 | StorageEngine *StorageEngineStats `bson:"storageEngine"` 66 | WiredTiger *WiredTigerStats `bson:"wiredTiger"` 67 | } 68 | 69 | // Export exports the server status to be consumed by prometheus. 70 | func (status *ServerStatus) Export(ch chan<- prometheus.Metric) { 71 | instanceUptimeSeconds.Set(status.Uptime) 72 | instanceUptimeEstimateSeconds.Set(status.Uptime) 73 | instanceLocalTime.Set(float64(status.LocalTime.Unix())) 74 | instanceUptimeSeconds.Collect(ch) 75 | instanceUptimeEstimateSeconds.Collect(ch) 76 | instanceLocalTime.Collect(ch) 77 | 78 | if status.Asserts != nil { 79 | status.Asserts.Export(ch) 80 | } 81 | if status.Dur != nil { 82 | status.Dur.Export(ch) 83 | } 84 | if status.BackgroundFlushing != nil { 85 | status.BackgroundFlushing.Export(ch) 86 | } 87 | if status.Connections != nil { 88 | status.Connections.Export(ch) 89 | } 90 | if status.ExtraInfo != nil { 91 | status.ExtraInfo.Export(ch) 92 | } 93 | if status.GlobalLock != nil { 94 | status.GlobalLock.Export(ch) 95 | } 96 | if status.IndexCounter != nil { 97 | status.IndexCounter.Export(ch) 98 | } 99 | if status.Network != nil { 100 | status.Network.Export(ch) 101 | } 102 | if status.Opcounters != nil { 103 | status.Opcounters.Export(ch) 104 | } 105 | if status.OpcountersRepl != nil { 106 | status.OpcountersRepl.Export(ch) 107 | } 108 | if status.TCMallocStats != nil { 109 | status.TCMallocStats.Export(ch) 110 | } 111 | if status.Mem != nil { 112 | status.Mem.Export(ch) 113 | } 114 | if status.Locks != nil { 115 | status.Locks.Export(ch) 116 | } 117 | if status.Metrics != nil { 118 | status.Metrics.Export(ch) 119 | } 120 | if status.Cursors != nil { 121 | status.Cursors.Export(ch) 122 | } 123 | 124 | if status.WiredTiger != nil { 125 | status.WiredTiger.Export(ch) 126 | } 127 | // If db.serverStatus().storageEngine does not exist (3.0+ only) and status.BackgroundFlushing does (MMAPv1 only), default to mmapv1 128 | // https://docs.mongodb.com/v3.0/reference/command/serverStatus/#storageengine 129 | if status.StorageEngine == nil && status.BackgroundFlushing != nil { 130 | status.StorageEngine = &StorageEngineStats{ 131 | Name: "mmapv1", 132 | } 133 | } 134 | if status.StorageEngine != nil { 135 | status.StorageEngine.Export(ch) 136 | } 137 | } 138 | 139 | // Describe describes the server status for prometheus. 140 | func (status *ServerStatus) Describe(ch chan<- *prometheus.Desc) { 141 | instanceUptimeSeconds.Describe(ch) 142 | instanceUptimeEstimateSeconds.Describe(ch) 143 | instanceLocalTime.Describe(ch) 144 | 145 | if status.Asserts != nil { 146 | status.Asserts.Describe(ch) 147 | } 148 | if status.Dur != nil { 149 | status.Dur.Describe(ch) 150 | } 151 | if status.BackgroundFlushing != nil { 152 | status.BackgroundFlushing.Describe(ch) 153 | } 154 | if status.Connections != nil { 155 | status.Connections.Describe(ch) 156 | } 157 | if status.ExtraInfo != nil { 158 | status.ExtraInfo.Describe(ch) 159 | } 160 | if status.GlobalLock != nil { 161 | status.GlobalLock.Describe(ch) 162 | } 163 | if status.IndexCounter != nil { 164 | status.IndexCounter.Describe(ch) 165 | } 166 | if status.Network != nil { 167 | status.Network.Describe(ch) 168 | } 169 | if status.Opcounters != nil { 170 | status.Opcounters.Describe(ch) 171 | } 172 | if status.OpcountersRepl != nil { 173 | status.OpcountersRepl.Describe(ch) 174 | } 175 | if status.TCMallocStats != nil { 176 | status.TCMallocStats.Describe(ch) 177 | } 178 | if status.Mem != nil { 179 | status.Mem.Describe(ch) 180 | } 181 | if status.Locks != nil { 182 | status.Locks.Describe(ch) 183 | } 184 | if status.Metrics != nil { 185 | status.Metrics.Describe(ch) 186 | } 187 | if status.Cursors != nil { 188 | status.Cursors.Describe(ch) 189 | } 190 | if status.WiredTiger != nil { 191 | status.WiredTiger.Describe(ch) 192 | } 193 | if status.StorageEngine != nil { 194 | status.StorageEngine.Describe(ch) 195 | } 196 | } 197 | 198 | // GetServerStatus returns the server status info. 199 | func GetServerStatus(session *mgo.Session) *ServerStatus { 200 | result := &ServerStatus{} 201 | err := session.DB("admin").Run(bson.D{{"serverStatus", 1}, {"recordStats", 0}}, result) 202 | if err != nil { 203 | glog.Error("Failed to get server status.") 204 | return nil 205 | } 206 | return result 207 | } 208 | -------------------------------------------------------------------------------- /collector/server_status_test.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "testing" 5 | 6 | "gopkg.in/mgo.v2/bson" 7 | ) 8 | 9 | func Test_ParserServerStatus(t *testing.T) { 10 | data := LoadFixture("server_status.bson") 11 | 12 | serverStatus := &ServerStatus{} 13 | loadServerStatusFromBson(data, serverStatus) 14 | 15 | if serverStatus.Asserts == nil { 16 | t.Error("Asserts group was not loaded") 17 | } 18 | 19 | if serverStatus.Dur == nil { 20 | t.Error("Dur group was not loaded") 21 | } 22 | 23 | if serverStatus.BackgroundFlushing == nil { 24 | t.Error("BackgroundFlushing group was not loaded") 25 | } 26 | 27 | if serverStatus.Connections == nil { 28 | t.Error("Connections group was not loaded") 29 | } 30 | 31 | if serverStatus.ExtraInfo == nil { 32 | t.Error("ExtraInfo group was not loaded") 33 | } 34 | 35 | if serverStatus.GlobalLock == nil { 36 | t.Error("GlobalLock group was not loaded") 37 | } 38 | 39 | if serverStatus.Network == nil { 40 | t.Error("Network group was not loaded") 41 | } 42 | 43 | if serverStatus.Opcounters == nil { 44 | t.Error("Opcounters group was not loaded") 45 | } 46 | 47 | if serverStatus.OpcountersRepl == nil { 48 | t.Error("OpcountersRepl group was not loaded") 49 | } 50 | 51 | if serverStatus.Mem == nil { 52 | t.Error("Mem group was not loaded") 53 | } 54 | 55 | if serverStatus.Connections == nil { 56 | t.Error("Connections group was not loaded") 57 | } 58 | 59 | if serverStatus.Locks == nil { 60 | t.Error("Locks group was not loaded") 61 | } 62 | 63 | if serverStatus.Metrics.Document.Deleted != 45726 { 64 | t.Error("Metrics group was not loaded correctly") 65 | } 66 | } 67 | 68 | func loadServerStatusFromBson(data []byte, status *ServerStatus) { 69 | err := bson.Unmarshal(data, status) 70 | if err != nil { 71 | panic(err) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /collector/storage_engine.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Percona LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package collector 16 | 17 | import ( 18 | "github.com/prometheus/client_golang/prometheus" 19 | ) 20 | 21 | var ( 22 | storageEngine = prometheus.NewCounterVec(prometheus.CounterOpts{ 23 | Namespace: Namespace, 24 | Name: "storage_engine", 25 | Help: "The storage engine used by the MongoDB instance", 26 | }, []string{"engine"}) 27 | ) 28 | 29 | // StorageEngineStats 30 | type StorageEngineStats struct { 31 | Name string `bson:"name"` 32 | } 33 | 34 | // Export exports the data to prometheus. 35 | func (stats *StorageEngineStats) Export(ch chan<- prometheus.Metric) { 36 | storageEngine.WithLabelValues(stats.Name).Set(1) 37 | storageEngine.Collect(ch) 38 | } 39 | 40 | // Describe describes the metrics for prometheus 41 | func (stats *StorageEngineStats) Describe(ch chan<- *prometheus.Desc) { 42 | storageEngine.Describe(ch) 43 | } 44 | -------------------------------------------------------------------------------- /collector/tcmalloc.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/prometheus/client_golang/prometheus" 5 | ) 6 | 7 | var ( 8 | tcmallocGeneral = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 9 | Namespace: Namespace, 10 | Name: "tcmalloc_generic_heap", 11 | Help: "High-level summary metricsInternal metrics from tcmalloc", 12 | }, []string{"type"}) 13 | tcmallocPageheapBytes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 14 | Namespace: Namespace, 15 | Name: "tcmalloc_pageheap_bytes", 16 | Help: "Sizes for tcpmalloc pageheaps", 17 | }, []string{"type"}) 18 | tcmallocPageheapCounts = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 19 | Namespace: Namespace, 20 | Name: "tcmalloc_pageheap_count", 21 | Help: "Sizes for tcpmalloc pageheaps", 22 | }, []string{"type"}) 23 | 24 | tcmallocCacheBytes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 25 | Namespace: Namespace, 26 | Name: "tcmalloc_cache_bytes", 27 | Help: "Sizes for tcpmalloc caches in bytes", 28 | }, []string{"cache", "type"}) 29 | 30 | tcmallocAggressiveDecommit = prometheus.NewCounter(prometheus.CounterOpts{ 31 | Namespace: Namespace, 32 | Name: "tcmalloc_aggressive_memory_decommit", 33 | Help: "Whether aggressive_memory_decommit is on", 34 | }) 35 | 36 | tcmallocFreeBytes = prometheus.NewCounter(prometheus.CounterOpts{ 37 | Namespace: Namespace, 38 | Name: "tcmalloc_free_bytes", 39 | Help: "Total free bytes of tcmalloc", 40 | }) 41 | ) 42 | 43 | // TCMallocStats tracks the mem stats metrics. 44 | type TCMallocStats struct { 45 | Generic GenericTCMAllocStats `bson:"generic"` 46 | Details DetailedTCMallocStats `bson:"tcmalloc"` 47 | } 48 | 49 | type GenericTCMAllocStats struct { 50 | CurrentAllocatedBytes float64 `bson:"current_allocated_bytes"` 51 | HeapSize float64 `bson:"heap_size"` 52 | } 53 | 54 | type DetailedTCMallocStats struct { 55 | PageheapFreeBytes float64 `bson:"pageheap_free_bytes"` 56 | PageheapUnmappedBytes float64 `bson:"pageheap_unmapped_bytes"` 57 | PageheapComittedBytes float64 `bson:"pageheap_committed_bytes"` 58 | PageheapScavengeCount float64 `bson:"pageheap_scavenge_count"` 59 | PageheapCommitCount float64 `bson:"pageheap_commit_count"` 60 | PageheapTotalCommitBytes float64 `bson:"pageheap_total_commit_bytes"` 61 | PageheapDecommitCount float64 `bson:"pageheap_decommit_count"` 62 | PageheapTotalDecommitBytes float64 `bson:"pageheap_total_decommit_bytes"` 63 | PageheapReserveCount float64 `bson:"pageheap_reserve_count"` 64 | PageheapTotalReserveBytes float64 `bson:"pageheap_total_reserve_bytes"` 65 | 66 | MaxTotalThreadCacheBytes float64 `bson:"max_total_thread_cache_bytes"` 67 | CurrentTotalThreadCacheBytes float64 `bson:"current_total_thread_cache_bytes"` 68 | CentralCacheFreeBytes float64 `bson:"central_cache_free_bytes"` 69 | TransferCacheFreeBytes float64 `bson:"transfer_cache_free_bytes"` 70 | ThreadCacheFreeBytes float64 `bson:"thread_cache_free_bytes"` 71 | 72 | TotalFreeBytes float64 `bson:"total_free_bytes"` 73 | AggressiveMemoryDecommit float64 `bson:"aggressive_memory_decommit"` 74 | } 75 | 76 | // Export exports the data to prometheus. 77 | func (m *TCMallocStats) Export(ch chan<- prometheus.Metric) { 78 | // Generic metrics 79 | tcmallocGeneral.WithLabelValues("allocated").Set(m.Generic.CurrentAllocatedBytes) 80 | tcmallocGeneral.WithLabelValues("total").Set(m.Generic.HeapSize) 81 | tcmallocGeneral.Collect(ch) 82 | 83 | // Pageheap 84 | tcmallocPageheapBytes.WithLabelValues("free").Set(m.Details.PageheapFreeBytes) 85 | tcmallocPageheapBytes.WithLabelValues("unmapped").Set(m.Details.PageheapUnmappedBytes) 86 | tcmallocPageheapBytes.WithLabelValues("comitted").Set(m.Details.PageheapComittedBytes) 87 | tcmallocPageheapBytes.WithLabelValues("total_commit").Set(m.Details.PageheapTotalCommitBytes) 88 | tcmallocPageheapBytes.WithLabelValues("total_decommit").Set(m.Details.PageheapTotalDecommitBytes) 89 | tcmallocPageheapBytes.WithLabelValues("total_reserve").Set(m.Details.PageheapTotalReserveBytes) 90 | tcmallocPageheapBytes.Collect(ch) 91 | 92 | tcmallocPageheapCounts.WithLabelValues("scavenge").Set(m.Details.PageheapScavengeCount) 93 | tcmallocPageheapCounts.WithLabelValues("commit").Set(m.Details.PageheapCommitCount) 94 | tcmallocPageheapCounts.WithLabelValues("decommit").Set(m.Details.PageheapDecommitCount) 95 | tcmallocPageheapCounts.WithLabelValues("reserve").Set(m.Details.PageheapReserveCount) 96 | tcmallocPageheapCounts.Collect(ch) 97 | 98 | tcmallocCacheBytes.WithLabelValues("thread_cache", "max_total").Set(m.Details.MaxTotalThreadCacheBytes) 99 | tcmallocCacheBytes.WithLabelValues("thread_cache", "current_total").Set(m.Details.CurrentTotalThreadCacheBytes) 100 | tcmallocCacheBytes.WithLabelValues("central_cache", "free").Set(m.Details.CentralCacheFreeBytes) 101 | tcmallocCacheBytes.WithLabelValues("transfer_cache", "free").Set(m.Details.TransferCacheFreeBytes) 102 | tcmallocCacheBytes.WithLabelValues("thread_cache", "free").Set(m.Details.ThreadCacheFreeBytes) 103 | tcmallocCacheBytes.Collect(ch) 104 | 105 | tcmallocAggressiveDecommit.Set(m.Details.AggressiveMemoryDecommit) 106 | tcmallocAggressiveDecommit.Collect(ch) 107 | 108 | tcmallocFreeBytes.Set(m.Details.TotalFreeBytes) 109 | tcmallocFreeBytes.Collect(ch) 110 | 111 | } 112 | 113 | // Describe describes the metrics for prometheus 114 | func (m *TCMallocStats) Describe(ch chan<- *prometheus.Desc) { 115 | tcmallocGeneral.Describe(ch) 116 | tcmallocPageheapBytes.Describe(ch) 117 | tcmallocPageheapCounts.Describe(ch) 118 | tcmallocCacheBytes.Describe(ch) 119 | tcmallocAggressiveDecommit.Describe(ch) 120 | tcmallocFreeBytes.Describe(ch) 121 | } 122 | -------------------------------------------------------------------------------- /collector/top_counters.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "reflect" 5 | "strings" 6 | 7 | "github.com/prometheus/client_golang/prometheus" 8 | ) 9 | 10 | var ( 11 | topTimeSecondsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 12 | Namespace: Namespace, 13 | Name: "top_time_seconds_total", 14 | Help: "The top command provides operation time, in seconds, for each database collection", 15 | }, []string{"type", "database", "collection"}) 16 | topCountTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 17 | Namespace: Namespace, 18 | Name: "top_count_total", 19 | Help: "The top command provides operation count for each database collection", 20 | }, []string{"type", "database", "collection"}) 21 | ) 22 | 23 | // TopStatsMap is a map of top stats 24 | type TopStatsMap map[string]TopStats 25 | 26 | // TopcountersStats topcounters stats 27 | type TopcounterStats struct { 28 | Time float64 `bson:"time"` 29 | Count float64 `bson:"count"` 30 | } 31 | 32 | // TopCollectionStats top collection stats 33 | type TopStats struct { 34 | Total TopcounterStats `bson:"total"` 35 | ReadLock TopcounterStats `bson:"readLock"` 36 | WriteLock TopcounterStats `bson:"writeLock"` 37 | Queries TopcounterStats `bson:"queries"` 38 | GetMore TopcounterStats `bson:"getmore"` 39 | Insert TopcounterStats `bson:"insert"` 40 | Update TopcounterStats `bson:"update"` 41 | Remove TopcounterStats `bson:"remove"` 42 | Commands TopcounterStats `bson:"commands"` 43 | } 44 | 45 | // Export exports the data to prometheus. 46 | func (topStats TopStatsMap) Export(ch chan<- prometheus.Metric) { 47 | 48 | for collectionNamespace, topStat := range topStats { 49 | 50 | namespace := strings.Split(collectionNamespace, ".") 51 | database := namespace[0] 52 | collection := strings.Join(namespace[1:], ".") 53 | 54 | topStatTypes := reflect.TypeOf(topStat) 55 | topStatValues := reflect.ValueOf(topStat) 56 | 57 | for i := 0; i < topStatValues.NumField(); i++ { 58 | 59 | metric_type := topStatTypes.Field(i).Name 60 | 61 | op_count := topStatValues.Field(i).Field(1).Float() 62 | 63 | op_time_microsecond := topStatValues.Field(i).Field(0).Float() 64 | op_time_second := float64(op_time_microsecond / 1e6) 65 | 66 | topTimeSecondsTotal.WithLabelValues(metric_type, database, collection).Set(op_time_second) 67 | topCountTotal.WithLabelValues(metric_type, database, collection).Set(op_count) 68 | } 69 | } 70 | 71 | topTimeSecondsTotal.Collect(ch) 72 | topCountTotal.Collect(ch) 73 | } 74 | 75 | // Describe describes the metrics for prometheus 76 | func (tops TopStatsMap) Describe(ch chan<- *prometheus.Desc) { 77 | topTimeSecondsTotal.Describe(ch) 78 | topCountTotal.Describe(ch) 79 | } 80 | -------------------------------------------------------------------------------- /collector/top_status.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "github.com/golang/glog" 5 | "github.com/prometheus/client_golang/prometheus" 6 | "gopkg.in/mgo.v2" 7 | "gopkg.in/mgo.v2/bson" 8 | ) 9 | 10 | // TopStatus represents top metrics 11 | type TopStatus struct { 12 | TopStats TopStatsMap `bson:"totals,omitempty"` 13 | } 14 | 15 | // GetTopStats fetches top stats 16 | func GetTopStats(session *mgo.Session) (*TopStatus, error) { 17 | results := &TopStatus{} 18 | err := session.DB("admin").Run(bson.D{{"top", 1}}, &results) 19 | return results, err 20 | } 21 | 22 | // Export exports metrics to Prometheus 23 | func (status *TopStatus) Export(ch chan<- prometheus.Metric) { 24 | status.TopStats.Export(ch) 25 | } 26 | 27 | // Describe describes metrics collected 28 | func (status *TopStatus) Describe(ch chan<- *prometheus.Desc) { 29 | status.TopStats.Describe(ch) 30 | } 31 | 32 | // GetTopStatus fetches top stats 33 | func GetTopStatus(session *mgo.Session) *TopStatus { 34 | topStatus, err := GetTopStats(session) 35 | if err != nil { 36 | glog.Error("Failed to get top status.") 37 | return nil 38 | } 39 | 40 | return topStatus 41 | } 42 | -------------------------------------------------------------------------------- /collector/top_status_test.go: -------------------------------------------------------------------------------- 1 | package collector 2 | 3 | import ( 4 | "testing" 5 | 6 | "gopkg.in/mgo.v2/bson" 7 | ) 8 | 9 | func Test_ParserTopStatus(t *testing.T) { 10 | data := LoadFixture("top_status.bson") 11 | collections := []string{ 12 | "admin.system.roles", 13 | "admin.system.version", 14 | "dummy.collection", 15 | "dummy.users", 16 | "local.oplog.rs", 17 | "local.startup_log", 18 | "local.system.replset", 19 | } 20 | 21 | topStatus := &TopStatus{} 22 | loadTopStatusFromBson(data, topStatus) 23 | 24 | topStats := topStatus.TopStats["dummy.users"] 25 | 26 | if len(topStatus.TopStats) != len(collections) { 27 | t.Error("All database collections were not loaded") 28 | } 29 | 30 | for cid := range collections { 31 | if _, ok := topStatus.TopStats[collections[cid]]; !ok { 32 | t.Errorf("Database collection '%s' is missing", collections[cid]) 33 | } 34 | } 35 | 36 | if topStats.Total.Time != 1095531 { 37 | t.Error("Wrong total operation time value for dummy user collection") 38 | } 39 | if topStats.Total.Count != 17428 { 40 | t.Error("Wrong total operation count value for dummy user collection") 41 | } 42 | 43 | if topStats.ReadLock.Time != 267953 { 44 | t.Error("Wrong read lock operation time value for dummy user collection") 45 | } 46 | if topStats.ReadLock.Count != 17420 { 47 | t.Error("Wrong read lock operation count value for dummy user collection") 48 | } 49 | 50 | if topStats.WriteLock.Time != 827578 { 51 | t.Error("Wrong write lock operation time value for dummy user collection") 52 | } 53 | if topStats.WriteLock.Count != 8 { 54 | t.Error("Wrong write lock operation count value for dummy user collection") 55 | } 56 | 57 | if topStats.Queries.Time != 899 { 58 | t.Error("Wrong queries operation time value for dummy user collection") 59 | } 60 | if topStats.Queries.Count != 10 { 61 | t.Error("Wrong queries operation count value for dummy user collection") 62 | } 63 | 64 | if topStats.GetMore.Time != 0 { 65 | t.Error("Wrong get more operation time value for dummy user collection") 66 | } 67 | if topStats.GetMore.Count != 0 { 68 | t.Error("Wrong get more operation count value for dummy user collection") 69 | } 70 | 71 | if topStats.Insert.Time != 826929 { 72 | t.Error("Wrong insert operation time value for dummy user collection") 73 | } 74 | if topStats.Insert.Count != 5 { 75 | t.Error("Wrong insert operation count value for dummy user collection") 76 | } 77 | 78 | if topStats.Update.Time != 456 { 79 | t.Error("Wrong update operation time value for dummy user collection") 80 | } 81 | if topStats.Update.Count != 2 { 82 | t.Error("Wrong update operation count value for dummy user collection") 83 | } 84 | 85 | if topStats.Remove.Time != 193 { 86 | t.Error("Wrong remove operation time value for dummy user collection") 87 | } 88 | if topStats.Remove.Count != 1 { 89 | t.Error("Wrong remove operation count value for dummy user collection") 90 | } 91 | 92 | if topStats.Commands.Time != 0 { 93 | t.Error("Wrong commands operation time value for dummy user collection") 94 | } 95 | if topStats.Commands.Count != 0 { 96 | t.Error("Wrong commands operation count value for dummy user collection") 97 | } 98 | } 99 | 100 | func loadTopStatusFromBson(data []byte, status *TopStatus) { 101 | err := bson.Unmarshal(data, status) 102 | if err != nil { 103 | panic(err) 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /collector/wiredtiger.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Percona LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package collector 16 | 17 | import ( 18 | "github.com/prometheus/client_golang/prometheus" 19 | ) 20 | 21 | var ( 22 | wtBlockManagerBlocksTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 23 | Namespace: Namespace, 24 | Subsystem: "wiredtiger_blockmanager", 25 | Name: "blocks_total", 26 | Help: "The total number of blocks read by the WiredTiger BlockManager", 27 | }, []string{"type"}) 28 | wtBlockManagerBytesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 29 | Namespace: Namespace, 30 | Subsystem: "wiredtiger_blockmanager", 31 | Name: "bytes_total", 32 | Help: "The total number of bytes read by the WiredTiger BlockManager", 33 | }, []string{"type"}) 34 | ) 35 | 36 | var ( 37 | wtCachePages = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 38 | Namespace: Namespace, 39 | Subsystem: "wiredtiger_cache", 40 | Name: "pages", 41 | Help: "The current number of pages in the WiredTiger Cache", 42 | }, []string{"type"}) 43 | wtCachePagesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 44 | Namespace: Namespace, 45 | Subsystem: "wiredtiger_cache", 46 | Name: "pages_total", 47 | Help: "The total number of pages read into/from the WiredTiger Cache", 48 | }, []string{"type"}) 49 | wtCacheBytes = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 50 | Namespace: Namespace, 51 | Subsystem: "wiredtiger_cache", 52 | Name: "bytes", 53 | Help: "The current size of data in the WiredTiger Cache in bytes", 54 | }, []string{"type"}) 55 | wtCacheMaxBytes = prometheus.NewGauge(prometheus.GaugeOpts{ 56 | Namespace: Namespace, 57 | Subsystem: "wiredtiger_cache", 58 | Name: "max_bytes", 59 | Help: "The maximum size of data in the WiredTiger Cache in bytes", 60 | }) 61 | wtCacheBytesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 62 | Namespace: Namespace, 63 | Subsystem: "wiredtiger_cache", 64 | Name: "bytes_total", 65 | Help: "The total number of bytes read into/from the WiredTiger Cache", 66 | }, []string{"type"}) 67 | wtCacheEvictedTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 68 | Namespace: Namespace, 69 | Subsystem: "wiredtiger_cache", 70 | Name: "evicted_total", 71 | Help: "The total number of pages evicted from the WiredTiger Cache", 72 | }, []string{"type"}) 73 | wtCachePercentOverhead = prometheus.NewGauge(prometheus.GaugeOpts{ 74 | Namespace: Namespace, 75 | Subsystem: "wiredtiger_cache", 76 | Name: "overhead_percent", 77 | Help: "The percentage overhead of the WiredTiger Cache", 78 | }) 79 | ) 80 | 81 | var ( 82 | wtTransactionsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 83 | Namespace: Namespace, 84 | Subsystem: "wiredtiger_transactions", 85 | Name: "total", 86 | Help: "The total number of transactions WiredTiger has handled", 87 | }, []string{"type"}) 88 | wtTransactionsTotalCheckpointMs = prometheus.NewCounter(prometheus.CounterOpts{ 89 | Namespace: Namespace, 90 | Subsystem: "wiredtiger_transactions", 91 | Name: "checkpoint_milliseconds_total", 92 | Help: "The total time in milliseconds transactions have checkpointed in WiredTiger", 93 | }) 94 | wtTransactionsCheckpointMs = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 95 | Namespace: Namespace, 96 | Subsystem: "wiredtiger_transactions", 97 | Name: "checkpoint_milliseconds", 98 | Help: "The time in milliseconds transactions have checkpointed in WiredTiger", 99 | }, []string{"type"}) 100 | wtTransactionsCheckpointsRunning = prometheus.NewGauge(prometheus.GaugeOpts{ 101 | Namespace: Namespace, 102 | Subsystem: "wiredtiger_transactions", 103 | Name: "running_checkpoints", 104 | Help: "The number of currently running checkpoints in WiredTiger", 105 | }) 106 | ) 107 | 108 | var ( 109 | wtLogRecordsScannedTotal = prometheus.NewCounter(prometheus.CounterOpts{ 110 | Namespace: Namespace, 111 | Subsystem: "wiredtiger_log", 112 | Name: "records_scanned_total", 113 | Help: "The total number of records scanned by log scan in the WiredTiger log", 114 | }) 115 | wtLogRecordsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 116 | Namespace: Namespace, 117 | Subsystem: "wiredtiger_log", 118 | Name: "records_total", 119 | Help: "The total number of compressed/uncompressed records written to the WiredTiger log", 120 | }, []string{"type"}) 121 | wtLogBytesTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 122 | Namespace: Namespace, 123 | Subsystem: "wiredtiger_log", 124 | Name: "bytes_total", 125 | Help: "The total number of bytes written to the WiredTiger log", 126 | }, []string{"type"}) 127 | wtLogOperationsTotal = prometheus.NewCounterVec(prometheus.CounterOpts{ 128 | Namespace: Namespace, 129 | Subsystem: "wiredtiger_log", 130 | Name: "operations_total", 131 | Help: "The total number of WiredTiger log operations", 132 | }, []string{"type"}) 133 | ) 134 | 135 | var ( 136 | wtOpenCursors = prometheus.NewGauge(prometheus.GaugeOpts{ 137 | Namespace: Namespace, 138 | Subsystem: "wiredtiger_session", 139 | Name: "open_cursors_total", 140 | Help: "The total number of cursors opened in WiredTiger", 141 | }) 142 | wtOpenSessions = prometheus.NewGauge(prometheus.GaugeOpts{ 143 | Namespace: Namespace, 144 | Subsystem: "wiredtiger_session", 145 | Name: "open_sessions_total", 146 | Help: "The total number of sessions opened in WiredTiger", 147 | }) 148 | ) 149 | 150 | var ( 151 | wtConcurrentTransactionsOut = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 152 | Namespace: Namespace, 153 | Subsystem: "wiredtiger_concurrent_transactions", 154 | Name: "out_tickets", 155 | Help: "The number of tickets that are currently in use (out) in WiredTiger", 156 | }, []string{"type"}) 157 | wtConcurrentTransactionsAvailable = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 158 | Namespace: Namespace, 159 | Subsystem: "wiredtiger_concurrent_transactions", 160 | Name: "available_tickets", 161 | Help: "The number of tickets that are available in WiredTiger", 162 | }, []string{"type"}) 163 | wtConcurrentTransactionsTotalTickets = prometheus.NewGaugeVec(prometheus.GaugeOpts{ 164 | Namespace: Namespace, 165 | Subsystem: "wiredtiger_concurrent_transactions", 166 | Name: "total_tickets", 167 | Help: "The total number of tickets that are available in WiredTiger", 168 | }, []string{"type"}) 169 | ) 170 | 171 | // blockmanager stats 172 | type WTBlockManagerStats struct { 173 | MappedBytesRead float64 `bson:"mapped bytes read"` 174 | BytesRead float64 `bson:"bytes read"` 175 | BytesWritten float64 `bson:"bytes written"` 176 | MappedBlocksRead float64 `bson:"mapped blocks read"` 177 | BlocksPreLoaded float64 `bson:"blocks pre-loaded"` 178 | BlocksRead float64 `bson:"blocks read"` 179 | BlocksWritten float64 `bson:"blocks written"` 180 | } 181 | 182 | func (stats *WTBlockManagerStats) Export(ch chan<- prometheus.Metric) { 183 | wtBlockManagerBlocksTotal.WithLabelValues("read").Set(stats.BlocksRead) 184 | wtBlockManagerBlocksTotal.WithLabelValues("read_mapped").Set(stats.MappedBlocksRead) 185 | wtBlockManagerBlocksTotal.WithLabelValues("pre_loaded").Set(stats.BlocksPreLoaded) 186 | wtBlockManagerBlocksTotal.WithLabelValues("written").Set(stats.BlocksWritten) 187 | wtBlockManagerBytesTotal.WithLabelValues("read").Set(stats.BytesRead) 188 | wtBlockManagerBytesTotal.WithLabelValues("read_mapped").Set(stats.MappedBytesRead) 189 | wtBlockManagerBytesTotal.WithLabelValues("written").Set(stats.BytesWritten) 190 | } 191 | 192 | func (stats *WTBlockManagerStats) Describe(ch chan<- *prometheus.Desc) { 193 | wtBlockManagerBlocksTotal.Describe(ch) 194 | wtBlockManagerBytesTotal.Describe(ch) 195 | } 196 | 197 | // cache stats 198 | type WTCacheStats struct { 199 | BytesTotal float64 `bson:"bytes currently in the cache"` 200 | BytesDirty float64 `bson:"tracked dirty bytes in the cache"` 201 | BytesInternalPages float64 `bson:"tracked bytes belonging to internal pages in the cache"` 202 | BytesLeafPages float64 `bson:"tracked bytes belonging to leaf pages in the cache"` 203 | MaxBytes float64 `bson:"maximum bytes configured"` 204 | BytesReadInto float64 `bson:"bytes read into cache"` 205 | BytesWrittenFrom float64 `bson:"bytes written from cache"` 206 | EvictedUnmodified float64 `bson:"unmodified pages evicted"` 207 | EvictedModified float64 `bson:"modified pages evicted"` 208 | PercentOverhead float64 `bson:"percentage overhead"` 209 | PagesTotal float64 `bson:"pages currently held in the cache"` 210 | PagesReadInto float64 `bson:"pages read into cache"` 211 | PagesWrittenFrom float64 `bson:"pages written from cache"` 212 | PagesDirty float64 `bson:"tracked dirty pages in the cache"` 213 | } 214 | 215 | func (stats *WTCacheStats) Export(ch chan<- prometheus.Metric) { 216 | wtCachePagesTotal.WithLabelValues("read").Set(stats.PagesReadInto) 217 | wtCachePagesTotal.WithLabelValues("written").Set(stats.PagesWrittenFrom) 218 | wtCacheBytesTotal.WithLabelValues("read").Set(stats.BytesReadInto) 219 | wtCacheBytesTotal.WithLabelValues("written").Set(stats.BytesWrittenFrom) 220 | wtCacheEvictedTotal.WithLabelValues("modified").Set(stats.EvictedModified) 221 | wtCacheEvictedTotal.WithLabelValues("unmodified").Set(stats.EvictedUnmodified) 222 | wtCachePages.WithLabelValues("total").Set(stats.PagesTotal) 223 | wtCachePages.WithLabelValues("dirty").Set(stats.PagesDirty) 224 | wtCacheBytes.WithLabelValues("total").Set(stats.BytesTotal) 225 | wtCacheBytes.WithLabelValues("dirty").Set(stats.BytesDirty) 226 | wtCacheBytes.WithLabelValues("internal_pages").Set(stats.BytesInternalPages) 227 | wtCacheBytes.WithLabelValues("leaf_pages").Set(stats.BytesLeafPages) 228 | wtCacheMaxBytes.Set(stats.MaxBytes) 229 | wtCachePercentOverhead.Set(stats.PercentOverhead) 230 | } 231 | 232 | func (stats *WTCacheStats) Describe(ch chan<- *prometheus.Desc) { 233 | wtCachePagesTotal.Describe(ch) 234 | wtCacheEvictedTotal.Describe(ch) 235 | wtCachePages.Describe(ch) 236 | wtCacheBytes.Describe(ch) 237 | wtCacheMaxBytes.Describe(ch) 238 | wtCachePercentOverhead.Describe(ch) 239 | } 240 | 241 | // log stats 242 | type WTLogStats struct { 243 | TotalBufferSize float64 `bson:"total log buffer size"` 244 | TotalSizeCompressed float64 `bson:"total size of compressed records"` 245 | BytesPayloadData float64 `bson:"log bytes of payload data"` 246 | BytesWritten float64 `bson:"log bytes written"` 247 | RecordsUncompressed float64 `bson:"log records not compressed"` 248 | RecordsCompressed float64 `bson:"log records compressed"` 249 | RecordsProcessedLogScan float64 `bson:"records processed by log scan"` 250 | MaxLogSize float64 `bson:"maximum log file size"` 251 | LogFlushes float64 `bson:"log flush operations"` 252 | LogReads float64 `bson:"log read operations"` 253 | LogScansDouble float64 `bson:"log scan records requiring two reads"` 254 | LogScans float64 `bson:"log scan operations"` 255 | LogSyncs float64 `bson:"log sync operations"` 256 | LogSyncDirs float64 `bson:"log sync_dir operations"` 257 | LogWrites float64 `bson:"log write operations"` 258 | } 259 | 260 | func (stats *WTLogStats) Export(ch chan<- prometheus.Metric) { 261 | wtLogRecordsTotal.WithLabelValues("compressed").Set(stats.RecordsCompressed) 262 | wtLogRecordsTotal.WithLabelValues("uncompressed").Set(stats.RecordsUncompressed) 263 | wtLogBytesTotal.WithLabelValues("payload").Set(stats.BytesPayloadData) 264 | wtLogBytesTotal.WithLabelValues("written").Set(stats.BytesWritten) 265 | wtLogOperationsTotal.WithLabelValues("read").Set(stats.LogReads) 266 | wtLogOperationsTotal.WithLabelValues("write").Set(stats.LogWrites) 267 | wtLogOperationsTotal.WithLabelValues("scan").Set(stats.LogScans) 268 | wtLogOperationsTotal.WithLabelValues("scan_double").Set(stats.LogScansDouble) 269 | wtLogOperationsTotal.WithLabelValues("sync").Set(stats.LogSyncs) 270 | wtLogOperationsTotal.WithLabelValues("sync_dir").Set(stats.LogSyncDirs) 271 | wtLogOperationsTotal.WithLabelValues("flush").Set(stats.LogFlushes) 272 | wtLogRecordsScannedTotal.Set(stats.RecordsProcessedLogScan) 273 | } 274 | 275 | func (stats *WTLogStats) Describe(ch chan<- *prometheus.Desc) { 276 | wtLogRecordsTotal.Describe(ch) 277 | wtLogBytesTotal.Describe(ch) 278 | wtLogOperationsTotal.Describe(ch) 279 | wtLogRecordsScannedTotal.Describe(ch) 280 | } 281 | 282 | // session stats 283 | type WTSessionStats struct { 284 | Cursors float64 `bson:"open cursor count"` 285 | Sessions float64 `bson:"open session count"` 286 | } 287 | 288 | func (stats *WTSessionStats) Export(ch chan<- prometheus.Metric) { 289 | wtOpenCursors.Set(stats.Cursors) 290 | wtOpenSessions.Set(stats.Sessions) 291 | } 292 | 293 | func (stats *WTSessionStats) Describe(ch chan<- *prometheus.Desc) { 294 | wtOpenCursors.Describe(ch) 295 | wtOpenSessions.Describe(ch) 296 | } 297 | 298 | // transaction stats 299 | type WTTransactionStats struct { 300 | Begins float64 `bson:"transaction begins"` 301 | Checkpoints float64 `bson:"transaction checkpoints"` 302 | CheckpointsRunning float64 `bson:"transaction checkpoint currently running"` 303 | CheckpointMaxMs float64 `bson:"transaction checkpoint max time (msecs)"` 304 | CheckpointMinMs float64 `bson:"transaction checkpoint min time (msecs)"` 305 | CheckpointLastMs float64 `bson:"transaction checkpoint most recent time (msecs)"` 306 | CheckpointTotalMs float64 `bson:"transaction checkpoint total time (msecs)"` 307 | Committed float64 `bson:"transactions committed"` 308 | CacheOverflowFailure float64 `bson:"transaction failures due to cache overflow"` 309 | RolledBack float64 `bson:"transactions rolled back"` 310 | } 311 | 312 | func (stats *WTTransactionStats) Export(ch chan<- prometheus.Metric) { 313 | wtTransactionsTotal.WithLabelValues("begins").Set(stats.Begins) 314 | wtTransactionsTotal.WithLabelValues("checkpoints").Set(stats.Checkpoints) 315 | wtTransactionsTotal.WithLabelValues("committed").Set(stats.Committed) 316 | wtTransactionsTotal.WithLabelValues("rolledback").Set(stats.RolledBack) 317 | wtTransactionsCheckpointMs.WithLabelValues("min").Set(stats.CheckpointMinMs) 318 | wtTransactionsCheckpointMs.WithLabelValues("max").Set(stats.CheckpointMaxMs) 319 | wtTransactionsTotalCheckpointMs.Set(stats.CheckpointTotalMs) 320 | wtTransactionsCheckpointsRunning.Set(stats.CheckpointsRunning) 321 | } 322 | 323 | func (stats *WTTransactionStats) Describe(ch chan<- *prometheus.Desc) { 324 | wtTransactionsTotal.Describe(ch) 325 | wtTransactionsTotalCheckpointMs.Describe(ch) 326 | wtTransactionsCheckpointMs.Describe(ch) 327 | wtTransactionsCheckpointsRunning.Describe(ch) 328 | } 329 | 330 | // concurrenttransaction stats 331 | type WTConcurrentTransactionsTypeStats struct { 332 | Out float64 `bson:"out"` 333 | Available float64 `bson:"available"` 334 | TotalTickets float64 `bson:"totalTickets"` 335 | } 336 | 337 | type WTConcurrentTransactionsStats struct { 338 | Write *WTConcurrentTransactionsTypeStats `bson:"read"` 339 | Read *WTConcurrentTransactionsTypeStats `bson:"write"` 340 | } 341 | 342 | func (stats *WTConcurrentTransactionsStats) Export(ch chan<- prometheus.Metric) { 343 | wtConcurrentTransactionsOut.WithLabelValues("read").Set(stats.Read.Out) 344 | wtConcurrentTransactionsOut.WithLabelValues("write").Set(stats.Write.Out) 345 | wtConcurrentTransactionsAvailable.WithLabelValues("read").Set(stats.Read.Available) 346 | wtConcurrentTransactionsAvailable.WithLabelValues("write").Set(stats.Write.Available) 347 | wtConcurrentTransactionsTotalTickets.WithLabelValues("read").Set(stats.Read.TotalTickets) 348 | wtConcurrentTransactionsTotalTickets.WithLabelValues("write").Set(stats.Write.TotalTickets) 349 | } 350 | 351 | func (stats *WTConcurrentTransactionsStats) Describe(ch chan<- *prometheus.Desc) { 352 | wtConcurrentTransactionsOut.Describe(ch) 353 | wtConcurrentTransactionsAvailable.Describe(ch) 354 | wtConcurrentTransactionsTotalTickets.Describe(ch) 355 | } 356 | 357 | // WiredTiger stats 358 | type WiredTigerStats struct { 359 | BlockManager *WTBlockManagerStats `bson:"block-manager"` 360 | Cache *WTCacheStats `bson:"cache"` 361 | Log *WTLogStats `bson:"log"` 362 | Session *WTSessionStats `bson:"session"` 363 | Transaction *WTTransactionStats `bson:"transaction"` 364 | ConcurrentTransactions *WTConcurrentTransactionsStats `bson:"concurrentTransactions"` 365 | } 366 | 367 | func (stats *WiredTigerStats) Describe(ch chan<- *prometheus.Desc) { 368 | if stats.BlockManager != nil { 369 | stats.BlockManager.Describe(ch) 370 | } 371 | if stats.Cache != nil { 372 | stats.Cache.Describe(ch) 373 | } 374 | if stats.Transaction != nil { 375 | stats.Transaction.Describe(ch) 376 | } 377 | if stats.Log != nil { 378 | stats.Log.Describe(ch) 379 | } 380 | if stats.Session != nil { 381 | stats.Session.Describe(ch) 382 | } 383 | if stats.ConcurrentTransactions != nil { 384 | stats.ConcurrentTransactions.Describe(ch) 385 | } 386 | } 387 | 388 | func (stats *WiredTigerStats) Export(ch chan<- prometheus.Metric) { 389 | if stats.BlockManager != nil { 390 | stats.BlockManager.Export(ch) 391 | } 392 | if stats.Cache != nil { 393 | stats.Cache.Export(ch) 394 | } 395 | if stats.Transaction != nil { 396 | stats.Transaction.Export(ch) 397 | } 398 | if stats.Log != nil { 399 | stats.Log.Export(ch) 400 | } 401 | if stats.Session != nil { 402 | stats.Session.Export(ch) 403 | } 404 | if stats.ConcurrentTransactions != nil { 405 | stats.ConcurrentTransactions.Export(ch) 406 | } 407 | 408 | wtBlockManagerBlocksTotal.Collect(ch) 409 | wtBlockManagerBytesTotal.Collect(ch) 410 | 411 | wtCachePagesTotal.Collect(ch) 412 | wtCacheBytesTotal.Collect(ch) 413 | wtCacheEvictedTotal.Collect(ch) 414 | wtCachePages.Collect(ch) 415 | wtCacheBytes.Collect(ch) 416 | wtCacheMaxBytes.Collect(ch) 417 | wtCachePercentOverhead.Collect(ch) 418 | 419 | wtTransactionsTotal.Collect(ch) 420 | wtTransactionsTotalCheckpointMs.Collect(ch) 421 | wtTransactionsCheckpointMs.Collect(ch) 422 | wtTransactionsCheckpointsRunning.Collect(ch) 423 | 424 | wtLogRecordsTotal.Collect(ch) 425 | wtLogBytesTotal.Collect(ch) 426 | wtLogOperationsTotal.Collect(ch) 427 | wtLogRecordsScannedTotal.Collect(ch) 428 | 429 | wtOpenCursors.Collect(ch) 430 | wtOpenSessions.Collect(ch) 431 | 432 | wtConcurrentTransactionsOut.Collect(ch) 433 | wtConcurrentTransactionsAvailable.Collect(ch) 434 | wtConcurrentTransactionsTotalTickets.Collect(ch) 435 | } 436 | -------------------------------------------------------------------------------- /glide.lock: -------------------------------------------------------------------------------- 1 | hash: 2daa265ce1037e5ac801ef074714e212b1ff69f2fea2e5c69a6c05ece9326f31 2 | updated: 2018-03-15T17:01:48.086832728-07:00 3 | imports: 4 | - name: github.com/beorn7/perks 5 | version: 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9 6 | subpackages: 7 | - quantile 8 | - name: github.com/golang/glog 9 | version: 23def4e6c14b4da8ac2ed8007337bc5eb5007998 10 | - name: github.com/golang/protobuf 11 | version: 17ce1425424ab154092bbb43af630bd647f3bb0d 12 | subpackages: 13 | - proto 14 | - name: github.com/matttproud/golang_protobuf_extensions 15 | version: c12348ce28de40eed0136aa2b644d0ee0650e56c 16 | subpackages: 17 | - pbutil 18 | - name: github.com/prometheus/client_golang 19 | version: c5b7fccd204277076155f10851dad72b76a49317 20 | subpackages: 21 | - prometheus 22 | - name: github.com/prometheus/client_model 23 | version: 6f3806018612930941127f2a7c6c453ba2c527d2 24 | subpackages: 25 | - go 26 | - name: github.com/prometheus/common 27 | version: e3fb1a1acd7605367a2b378bc2e2f893c05174b7 28 | subpackages: 29 | - expfmt 30 | - internal/bitbucket.org/ww/goautoneg 31 | - model 32 | - name: github.com/prometheus/procfs 33 | version: a6e9df898b1336106c743392c48ee0b71f5c4efa 34 | subpackages: 35 | - xfs 36 | - name: github.com/rwynn/gtm 37 | version: 495abc277593067479c3c528864191804ab04cf7 38 | repo: https://github.com/jacksontj/gtm.git 39 | - name: github.com/serialx/hashring 40 | version: 6a9381c5a83e926b9f1fd907395a581e69747e96 41 | - name: gopkg.in/mgo.v2 42 | version: 3f83fa5005286a7fe593b055f0d7771a7dce4655 43 | subpackages: 44 | - bson 45 | - internal/json 46 | - internal/sasl 47 | - internal/scram 48 | testImports: [] 49 | -------------------------------------------------------------------------------- /glide.yaml: -------------------------------------------------------------------------------- 1 | package: github.com/dcu/mongodb_exporter 2 | import: 3 | - package: github.com/golang/glog 4 | - package: github.com/prometheus/client_golang 5 | version: v0.8.0 6 | subpackages: 7 | - prometheus 8 | - package: gopkg.in/mgo.v2 9 | subpackages: 10 | - bson 11 | - package: github.com/rwynn/gtm 12 | version: 495abc277593067479c3c528864191804ab04cf7 13 | repo: https://github.com/jacksontj/gtm.git 14 | -------------------------------------------------------------------------------- /grafana_dashboards/dashboard.json: -------------------------------------------------------------------------------- 1 | { 2 | "__inputs": [ 3 | { 4 | "name": "DS_PROMETHEUS", 5 | "label": "prometheus", 6 | "description": "", 7 | "type": "datasource", 8 | "pluginId": "prometheus", 9 | "pluginName": "Prometheus" 10 | } 11 | ], 12 | "__requires": [ 13 | { 14 | "type": "grafana", 15 | "id": "grafana", 16 | "name": "Grafana", 17 | "version": "4.3.2" 18 | }, 19 | { 20 | "type": "panel", 21 | "id": "graph", 22 | "name": "Graph", 23 | "version": "" 24 | }, 25 | { 26 | "type": "datasource", 27 | "id": "prometheus", 28 | "name": "Prometheus", 29 | "version": "1.0.0" 30 | }, 31 | { 32 | "type": "panel", 33 | "id": "singlestat", 34 | "name": "Singlestat", 35 | "version": "" 36 | } 37 | ], 38 | "annotations": { 39 | "list": [] 40 | }, 41 | "editable": true, 42 | "gnetId": null, 43 | "graphTooltip": 1, 44 | "hideControls": false, 45 | "id": null, 46 | "links": [], 47 | "refresh": false, 48 | "rows": [ 49 | { 50 | "collapse": false, 51 | "height": 128, 52 | "panels": [ 53 | { 54 | "cacheTimeout": null, 55 | "colorBackground": false, 56 | "colorValue": true, 57 | "colors": [ 58 | "rgba(245, 54, 54, 0.9)", 59 | "rgba(237, 129, 40, 0.89)", 60 | "rgba(50, 172, 45, 0.97)" 61 | ], 62 | "datasource": "${DS_PROMETHEUS}", 63 | "decimals": null, 64 | "format": "s", 65 | "gauge": { 66 | "maxValue": 100, 67 | "minValue": 0, 68 | "show": false, 69 | "thresholdLabels": false, 70 | "thresholdMarkers": true 71 | }, 72 | "id": 10, 73 | "interval": null, 74 | "links": [], 75 | "mappingType": 1, 76 | "mappingTypes": [ 77 | { 78 | "name": "value to text", 79 | "value": 1 80 | }, 81 | { 82 | "name": "range to text", 83 | "value": 2 84 | } 85 | ], 86 | "maxDataPoints": 100, 87 | "nullPointMode": "connected", 88 | "nullText": null, 89 | "postfix": "", 90 | "postfixFontSize": "50%", 91 | "prefix": "", 92 | "prefixFontSize": "50%", 93 | "rangeMaps": [ 94 | { 95 | "from": "null", 96 | "text": "N/A", 97 | "to": "null" 98 | } 99 | ], 100 | "span": 2, 101 | "sparkline": { 102 | "fillColor": "rgba(31, 118, 189, 0.18)", 103 | "full": false, 104 | "lineColor": "rgb(31, 120, 193)", 105 | "show": false 106 | }, 107 | "tableColumn": "", 108 | "targets": [ 109 | { 110 | "expr": "mongodb_instance_uptime_seconds", 111 | "format": "time_series", 112 | "intervalFactor": 2, 113 | "legendFormat": "", 114 | "refId": "A", 115 | "step": 60 116 | } 117 | ], 118 | "thresholds": "0,360", 119 | "title": "Uptime", 120 | "type": "singlestat", 121 | "valueFontSize": "80%", 122 | "valueMaps": [ 123 | { 124 | "op": "=", 125 | "text": "N/A", 126 | "value": "null" 127 | } 128 | ], 129 | "valueName": "current" 130 | }, 131 | { 132 | "cacheTimeout": null, 133 | "colorBackground": false, 134 | "colorValue": false, 135 | "colors": [ 136 | "rgba(245, 54, 54, 0.9)", 137 | "rgba(237, 129, 40, 0.89)", 138 | "rgba(50, 172, 45, 0.97)" 139 | ], 140 | "datasource": "${DS_PROMETHEUS}", 141 | "decimals": null, 142 | "format": "none", 143 | "gauge": { 144 | "maxValue": 100, 145 | "minValue": 0, 146 | "show": false, 147 | "thresholdLabels": false, 148 | "thresholdMarkers": true 149 | }, 150 | "id": 2, 151 | "interval": null, 152 | "links": [], 153 | "mappingType": 1, 154 | "mappingTypes": [ 155 | { 156 | "name": "value to text", 157 | "value": 1 158 | }, 159 | { 160 | "name": "range to text", 161 | "value": 2 162 | } 163 | ], 164 | "maxDataPoints": 100, 165 | "nullPointMode": "connected", 166 | "nullText": null, 167 | "postfix": "", 168 | "postfixFontSize": "50%", 169 | "prefix": "", 170 | "prefixFontSize": "50%", 171 | "rangeMaps": [ 172 | { 173 | "from": "null", 174 | "text": "N/A", 175 | "to": "null" 176 | } 177 | ], 178 | "span": 2, 179 | "sparkline": { 180 | "fillColor": "rgba(31, 118, 189, 0.18)", 181 | "full": true, 182 | "lineColor": "rgb(31, 120, 193)", 183 | "show": true 184 | }, 185 | "tableColumn": "", 186 | "targets": [ 187 | { 188 | "expr": "mongodb_connections{state=\"available\"}", 189 | "format": "time_series", 190 | "intervalFactor": 2, 191 | "legendFormat": "", 192 | "metric": "mongodb_connections", 193 | "refId": "A", 194 | "step": 60 195 | } 196 | ], 197 | "thresholds": "", 198 | "title": "Available Connections", 199 | "type": "singlestat", 200 | "valueFontSize": "80%", 201 | "valueMaps": [ 202 | { 203 | "op": "=", 204 | "text": "N/A", 205 | "value": "null" 206 | } 207 | ], 208 | "valueName": "avg" 209 | }, 210 | { 211 | "cacheTimeout": null, 212 | "colorBackground": false, 213 | "colorValue": false, 214 | "colors": [ 215 | "rgba(245, 54, 54, 0.9)", 216 | "rgba(237, 129, 40, 0.89)", 217 | "rgba(50, 172, 45, 0.97)" 218 | ], 219 | "datasource": "${DS_PROMETHEUS}", 220 | "format": "none", 221 | "gauge": { 222 | "maxValue": 100, 223 | "minValue": 0, 224 | "show": false, 225 | "thresholdLabels": false, 226 | "thresholdMarkers": true 227 | }, 228 | "id": 1, 229 | "interval": null, 230 | "links": [], 231 | "mappingType": 1, 232 | "mappingTypes": [ 233 | { 234 | "name": "value to text", 235 | "value": 1 236 | }, 237 | { 238 | "name": "range to text", 239 | "value": 2 240 | } 241 | ], 242 | "maxDataPoints": 100, 243 | "nullPointMode": "connected", 244 | "nullText": null, 245 | "postfix": "", 246 | "postfixFontSize": "50%", 247 | "prefix": "", 248 | "prefixFontSize": "50%", 249 | "rangeMaps": [ 250 | { 251 | "from": "null", 252 | "text": "N/A", 253 | "to": "null" 254 | } 255 | ], 256 | "span": 8, 257 | "sparkline": { 258 | "fillColor": "rgba(31, 118, 189, 0.18)", 259 | "full": true, 260 | "lineColor": "rgb(31, 120, 193)", 261 | "show": true 262 | }, 263 | "tableColumn": "", 264 | "targets": [ 265 | { 266 | "expr": "mongodb_connections{state=\"current\"}", 267 | "format": "time_series", 268 | "intervalFactor": 2, 269 | "legendFormat": "", 270 | "metric": "mongodb_connections", 271 | "refId": "A", 272 | "step": 60 273 | } 274 | ], 275 | "thresholds": "", 276 | "title": "Open Connections", 277 | "type": "singlestat", 278 | "valueFontSize": "80%", 279 | "valueMaps": [ 280 | { 281 | "op": "=", 282 | "text": "N/A", 283 | "value": "null" 284 | } 285 | ], 286 | "valueName": "avg" 287 | } 288 | ], 289 | "repeat": null, 290 | "repeatIteration": null, 291 | "repeatRowId": null, 292 | "showTitle": false, 293 | "title": "Dashboard Row", 294 | "titleSize": "h6" 295 | }, 296 | { 297 | "collapse": false, 298 | "height": 228, 299 | "panels": [ 300 | { 301 | "aliasColors": {}, 302 | "bars": false, 303 | "dashLength": 10, 304 | "dashes": false, 305 | "datasource": "${DS_PROMETHEUS}", 306 | "fill": 1, 307 | "id": 7, 308 | "legend": { 309 | "avg": false, 310 | "current": false, 311 | "max": false, 312 | "min": false, 313 | "show": true, 314 | "total": false, 315 | "values": false 316 | }, 317 | "lines": true, 318 | "linewidth": 1, 319 | "links": [], 320 | "nullPointMode": "null", 321 | "percentage": false, 322 | "pointradius": 5, 323 | "points": false, 324 | "renderer": "flot", 325 | "seriesOverrides": [], 326 | "spaceLength": 10, 327 | "span": 5, 328 | "stack": false, 329 | "steppedLine": false, 330 | "targets": [ 331 | { 332 | "expr": "rate(mongodb_op_counters_total[5m])", 333 | "format": "time_series", 334 | "interval": "", 335 | "intervalFactor": 2, 336 | "legendFormat": "{{type}}", 337 | "refId": "A", 338 | "step": 10 339 | } 340 | ], 341 | "thresholds": [], 342 | "timeFrom": null, 343 | "timeShift": null, 344 | "title": "Query Operations", 345 | "tooltip": { 346 | "shared": true, 347 | "sort": 0, 348 | "value_type": "individual" 349 | }, 350 | "type": "graph", 351 | "xaxis": { 352 | "buckets": null, 353 | "mode": "time", 354 | "name": null, 355 | "show": true, 356 | "values": [] 357 | }, 358 | "yaxes": [ 359 | { 360 | "format": "ops", 361 | "label": null, 362 | "logBase": 1, 363 | "max": null, 364 | "min": null, 365 | "show": true 366 | }, 367 | { 368 | "format": "short", 369 | "label": null, 370 | "logBase": 1, 371 | "max": null, 372 | "min": null, 373 | "show": true 374 | } 375 | ] 376 | }, 377 | { 378 | "aliasColors": {}, 379 | "bars": false, 380 | "dashLength": 10, 381 | "dashes": false, 382 | "datasource": "${DS_PROMETHEUS}", 383 | "fill": 1, 384 | "id": 9, 385 | "legend": { 386 | "avg": false, 387 | "current": false, 388 | "max": false, 389 | "min": false, 390 | "show": true, 391 | "total": false, 392 | "values": false 393 | }, 394 | "lines": true, 395 | "linewidth": 1, 396 | "links": [], 397 | "nullPointMode": "null", 398 | "percentage": false, 399 | "pointradius": 5, 400 | "points": false, 401 | "renderer": "flot", 402 | "seriesOverrides": [], 403 | "spaceLength": 10, 404 | "span": 4, 405 | "stack": false, 406 | "steppedLine": false, 407 | "targets": [ 408 | { 409 | "expr": "rate(mongodb_metrics_document_total[5m])", 410 | "format": "time_series", 411 | "interval": "", 412 | "intervalFactor": 2, 413 | "legendFormat": "{{state}}", 414 | "refId": "A", 415 | "step": 20 416 | } 417 | ], 418 | "thresholds": [], 419 | "timeFrom": null, 420 | "timeShift": null, 421 | "title": "Document Operations", 422 | "tooltip": { 423 | "shared": true, 424 | "sort": 0, 425 | "value_type": "individual" 426 | }, 427 | "type": "graph", 428 | "xaxis": { 429 | "buckets": null, 430 | "mode": "time", 431 | "name": null, 432 | "show": true, 433 | "values": [] 434 | }, 435 | "yaxes": [ 436 | { 437 | "format": "short", 438 | "label": null, 439 | "logBase": 1, 440 | "max": null, 441 | "min": null, 442 | "show": true 443 | }, 444 | { 445 | "format": "short", 446 | "label": null, 447 | "logBase": 1, 448 | "max": null, 449 | "min": null, 450 | "show": true 451 | } 452 | ] 453 | }, 454 | { 455 | "aliasColors": {}, 456 | "bars": false, 457 | "dashLength": 10, 458 | "dashes": false, 459 | "datasource": "${DS_PROMETHEUS}", 460 | "fill": 1, 461 | "id": 8, 462 | "legend": { 463 | "avg": false, 464 | "current": false, 465 | "max": false, 466 | "min": false, 467 | "show": true, 468 | "total": false, 469 | "values": false 470 | }, 471 | "lines": true, 472 | "linewidth": 1, 473 | "links": [], 474 | "nullPointMode": "null", 475 | "percentage": false, 476 | "pointradius": 5, 477 | "points": false, 478 | "renderer": "flot", 479 | "seriesOverrides": [], 480 | "spaceLength": 10, 481 | "span": 3, 482 | "stack": false, 483 | "steppedLine": false, 484 | "targets": [ 485 | { 486 | "expr": "rate(mongodb_metrics_query_executor_total[5m])", 487 | "format": "time_series", 488 | "interval": "", 489 | "intervalFactor": 2, 490 | "legendFormat": "{{state}}", 491 | "refId": "A", 492 | "step": 20 493 | } 494 | ], 495 | "thresholds": [], 496 | "timeFrom": null, 497 | "timeShift": null, 498 | "title": "Document Query Executor", 499 | "tooltip": { 500 | "shared": true, 501 | "sort": 0, 502 | "value_type": "individual" 503 | }, 504 | "type": "graph", 505 | "xaxis": { 506 | "buckets": null, 507 | "mode": "time", 508 | "name": null, 509 | "show": true, 510 | "values": [] 511 | }, 512 | "yaxes": [ 513 | { 514 | "format": "short", 515 | "label": null, 516 | "logBase": 1, 517 | "max": null, 518 | "min": null, 519 | "show": true 520 | }, 521 | { 522 | "format": "short", 523 | "label": null, 524 | "logBase": 1, 525 | "max": null, 526 | "min": null, 527 | "show": true 528 | } 529 | ] 530 | } 531 | ], 532 | "repeat": null, 533 | "repeatIteration": null, 534 | "repeatRowId": null, 535 | "showTitle": false, 536 | "title": "Dashboard Row", 537 | "titleSize": "h6" 538 | }, 539 | { 540 | "collapse": false, 541 | "height": 248, 542 | "panels": [ 543 | { 544 | "aliasColors": {}, 545 | "bars": false, 546 | "dashLength": 10, 547 | "dashes": false, 548 | "datasource": "${DS_PROMETHEUS}", 549 | "fill": 1, 550 | "id": 6, 551 | "legend": { 552 | "avg": false, 553 | "current": false, 554 | "max": false, 555 | "min": false, 556 | "show": true, 557 | "total": false, 558 | "values": false 559 | }, 560 | "lines": true, 561 | "linewidth": 1, 562 | "links": [], 563 | "nullPointMode": "null", 564 | "percentage": false, 565 | "pointradius": 5, 566 | "points": false, 567 | "renderer": "flot", 568 | "seriesOverrides": [], 569 | "spaceLength": 10, 570 | "span": 4, 571 | "stack": false, 572 | "steppedLine": false, 573 | "targets": [ 574 | { 575 | "expr": "mongodb_replset_oplog_size_bytes", 576 | "format": "time_series", 577 | "interval": "", 578 | "intervalFactor": 2, 579 | "legendFormat": "{{type}}", 580 | "metric": "mongodb_locks_time_acquiring_global_microseconds_total", 581 | "refId": "A", 582 | "step": 20 583 | } 584 | ], 585 | "thresholds": [], 586 | "timeFrom": null, 587 | "timeShift": null, 588 | "title": "Oplog Size", 589 | "tooltip": { 590 | "shared": true, 591 | "sort": 0, 592 | "value_type": "individual" 593 | }, 594 | "type": "graph", 595 | "xaxis": { 596 | "buckets": null, 597 | "mode": "time", 598 | "name": null, 599 | "show": true, 600 | "values": [] 601 | }, 602 | "yaxes": [ 603 | { 604 | "format": "decbytes", 605 | "label": null, 606 | "logBase": 1, 607 | "max": null, 608 | "min": null, 609 | "show": true 610 | }, 611 | { 612 | "format": "short", 613 | "label": null, 614 | "logBase": 1, 615 | "max": null, 616 | "min": null, 617 | "show": true 618 | } 619 | ] 620 | }, 621 | { 622 | "aliasColors": {}, 623 | "bars": false, 624 | "dashLength": 10, 625 | "dashes": false, 626 | "datasource": "${DS_PROMETHEUS}", 627 | "fill": 1, 628 | "id": 4, 629 | "legend": { 630 | "avg": false, 631 | "current": false, 632 | "max": false, 633 | "min": false, 634 | "show": false, 635 | "total": false, 636 | "values": false 637 | }, 638 | "lines": true, 639 | "linewidth": 1, 640 | "links": [], 641 | "nullPointMode": "null", 642 | "percentage": false, 643 | "pointradius": 5, 644 | "points": false, 645 | "renderer": "flot", 646 | "seriesOverrides": [], 647 | "spaceLength": 10, 648 | "span": 4, 649 | "stack": false, 650 | "steppedLine": false, 651 | "targets": [ 652 | { 653 | "expr": "mongodb_memory", 654 | "format": "time_series", 655 | "interval": "", 656 | "intervalFactor": 2, 657 | "legendFormat": "{{type}}", 658 | "refId": "A", 659 | "step": 20 660 | } 661 | ], 662 | "thresholds": [], 663 | "timeFrom": null, 664 | "timeShift": null, 665 | "title": "Memory", 666 | "tooltip": { 667 | "shared": false, 668 | "sort": 0, 669 | "value_type": "individual" 670 | }, 671 | "type": "graph", 672 | "xaxis": { 673 | "buckets": null, 674 | "mode": "time", 675 | "name": null, 676 | "show": true, 677 | "values": [ 678 | "total" 679 | ] 680 | }, 681 | "yaxes": [ 682 | { 683 | "format": "decmbytes", 684 | "label": "MB", 685 | "logBase": 1, 686 | "max": null, 687 | "min": null, 688 | "show": true 689 | }, 690 | { 691 | "format": "short", 692 | "label": null, 693 | "logBase": 1, 694 | "max": null, 695 | "min": null, 696 | "show": true 697 | } 698 | ] 699 | }, 700 | { 701 | "aliasColors": {}, 702 | "bars": false, 703 | "dashLength": 10, 704 | "dashes": false, 705 | "datasource": "${DS_PROMETHEUS}", 706 | "fill": 1, 707 | "id": 5, 708 | "legend": { 709 | "avg": false, 710 | "current": false, 711 | "max": false, 712 | "min": false, 713 | "show": true, 714 | "total": false, 715 | "values": false 716 | }, 717 | "lines": true, 718 | "linewidth": 1, 719 | "links": [], 720 | "nullPointMode": "null", 721 | "percentage": false, 722 | "pointradius": 5, 723 | "points": false, 724 | "renderer": "flot", 725 | "seriesOverrides": [], 726 | "spaceLength": 10, 727 | "span": 4, 728 | "stack": false, 729 | "steppedLine": false, 730 | "targets": [ 731 | { 732 | "expr": "rate(mongodb_network_bytes_total[5m])", 733 | "format": "time_series", 734 | "interval": "", 735 | "intervalFactor": 2, 736 | "legendFormat": "{{state}}", 737 | "metric": "mongodb_metrics_operation_total", 738 | "refId": "A", 739 | "step": 20 740 | } 741 | ], 742 | "thresholds": [], 743 | "timeFrom": null, 744 | "timeShift": null, 745 | "title": "Network I/O", 746 | "tooltip": { 747 | "shared": true, 748 | "sort": 0, 749 | "value_type": "individual" 750 | }, 751 | "type": "graph", 752 | "xaxis": { 753 | "buckets": null, 754 | "mode": "time", 755 | "name": null, 756 | "show": true, 757 | "values": [] 758 | }, 759 | "yaxes": [ 760 | { 761 | "format": "decbytes", 762 | "label": null, 763 | "logBase": 1, 764 | "max": null, 765 | "min": null, 766 | "show": true 767 | }, 768 | { 769 | "format": "short", 770 | "label": null, 771 | "logBase": 1, 772 | "max": null, 773 | "min": null, 774 | "show": true 775 | } 776 | ] 777 | } 778 | ], 779 | "repeat": null, 780 | "repeatIteration": null, 781 | "repeatRowId": null, 782 | "showTitle": false, 783 | "title": "Dashboard Row", 784 | "titleSize": "h6" 785 | } 786 | ], 787 | "schemaVersion": 14, 788 | "style": "dark", 789 | "tags": [], 790 | "templating": { 791 | "list": [] 792 | }, 793 | "time": { 794 | "from": "now-1h", 795 | "to": "now" 796 | }, 797 | "timepicker": { 798 | "refresh_intervals": [ 799 | "5s", 800 | "10s", 801 | "30s", 802 | "1m", 803 | "5m", 804 | "15m", 805 | "30m", 806 | "1h", 807 | "2h", 808 | "1d" 809 | ], 810 | "time_options": [ 811 | "5m", 812 | "15m", 813 | "1h", 814 | "6h", 815 | "12h", 816 | "24h", 817 | "2d", 818 | "7d", 819 | "30d" 820 | ] 821 | }, 822 | "timezone": "browser", 823 | "title": "MongoDB", 824 | "version": 8 825 | } 826 | -------------------------------------------------------------------------------- /mongodb_exporter.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/tls" 5 | "flag" 6 | "fmt" 7 | slog "log" 8 | "net/http" 9 | "os" 10 | "strings" 11 | 12 | "github.com/dcu/mongodb_exporter/collector" 13 | "github.com/dcu/mongodb_exporter/shared" 14 | 15 | "github.com/golang/glog" 16 | "github.com/prometheus/client_golang/prometheus" 17 | ) 18 | 19 | func mongodbDefaultURI() string { 20 | if u := os.Getenv("MONGODB_URL"); u != "" { 21 | return u 22 | } 23 | return "mongodb://localhost:27017" 24 | } 25 | 26 | var ( 27 | listenAddressFlag = flag.String("web.listen-address", ":9001", "Address on which to expose metrics and web interface.") 28 | metricsPathFlag = flag.String("web.metrics-path", "/metrics", "Path under which to expose metrics.") 29 | webTLSCert = flag.String("web.tls-cert", "", "Path to PEM file that conains the certificate (and optionally also the private key in PEM format).\n"+ 30 | " \tThis should include the whole certificate chain.\n"+ 31 | " \tIf provided: The web socket will be a HTTPS socket.\n"+ 32 | " \tIf not provided: Only HTTP.") 33 | webTLSPrivateKey = flag.String("web.tls-private-key", "", "Path to PEM file that conains the private key (if not contained in web.tls-cert file).") 34 | webTLSClientCa = flag.String("web.tls-client-ca", "", "Path to PEM file that conains the CAs that are trused for client connections.\n"+ 35 | " \tIf provided: Connecting clients should present a certificate signed by one of this CAs.\n"+ 36 | " \tIf not provided: Every client will be accepted.") 37 | 38 | mongodbURIFlag = flag.String("mongodb.uri", mongodbDefaultURI(), "Mongodb URI, format: [mongodb://][user:pass@]host1[:port1][,host2[:port2],...][/database][?options]") 39 | mongodbTLSCert = flag.String("mongodb.tls-cert", "", "Path to PEM file that conains the certificate (and optionally also the private key in PEM format).\n"+ 40 | " \tThis should include the whole certificate chain.\n"+ 41 | " \tIf provided: The connection will be opened via TLS to the MongoDB server.") 42 | mongodbTLSPrivateKey = flag.String("mongodb.tls-private-key", "", "Path to PEM file that conains the private key (if not contained in mongodb.tls-cert file).") 43 | mongodbTLSCa = flag.String("mongodb.tls-ca", "", "Path to PEM file that conains the CAs that are trused for server connections.\n"+ 44 | " \tIf provided: MongoDB servers connecting to should present a certificate signed by one of this CAs.\n"+ 45 | " \tIf not provided: System default CAs are used.") 46 | mongodbTLSDisableHostnameValidation = flag.Bool("mongodb.tls-disable-hostname-validation", false, "Do hostname validation for server connection.") 47 | enabledGroupsFlag = flag.String("groups.enabled", "asserts,durability,background_flushing,connections,extra_info,global_lock,index_counters,network,op_counters,op_counters_repl,memory,locks,metrics", "Comma-separated list of groups to use, for more info see: docs.mongodb.org/manual/reference/command/serverStatus/") 48 | authUserFlag = flag.String("auth.user", "", "Username for basic auth.") 49 | authPassFlag = flag.String("auth.pass", "", "Password for basic auth.") 50 | mongodbUserName = flag.String("mongodb.username", "", "Username to connect to Mongodb") 51 | mongodbAuthMechanism = flag.String("mongodb.mechanism", "", "auth mechanism to connect to Mongodb (ie: MONGODB-X509)") 52 | mongodbCollectOplog = flag.Bool("mongodb.collect.oplog", true, "collect Mongodb Oplog status") 53 | mongodbCollectOplogTail = flag.Bool("mongodb.collect.oplog_tail", false, "tail Mongodb Oplog to get stats") 54 | mongodbCollectReplSet = flag.Bool("mongodb.collect.replset", true, "collect Mongodb replica set status") 55 | mongodbCollectTopMetrics = flag.Bool("mongodb.collect.top", false, "collect Mongodb Top metrics") 56 | mongodbCollectDatabaseMetrics = flag.Bool("mongodb.collect.database", false, "collect MongoDB database metrics") 57 | mongodbCollectCollectionMetrics = flag.Bool("mongodb.collect.collection", false, "Collect MongoDB collection metrics") 58 | mongodbCollectProfileMetrics = flag.Bool("mongodb.collect.profile", false, "Collect MongoDB profile metrics") 59 | mongodbCollectConnPoolStats = flag.Bool("mongodb.collect.connpoolstats", false, "Collect MongoDB connpoolstats") 60 | mongodbSocketTimeout = flag.Duration("mongodb.socket-timeout", 0, "timeout for socket operations to mongodb") 61 | version = flag.Bool("version", false, "Print mongodb_exporter version") 62 | ) 63 | 64 | type basicAuthHandler struct { 65 | handler http.HandlerFunc 66 | user string 67 | password string 68 | } 69 | 70 | func (h *basicAuthHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { 71 | user, password, ok := r.BasicAuth() 72 | if !ok || password != h.password || user != h.user { 73 | w.Header().Set("WWW-Authenticate", "Basic realm=\"metrics\"") 74 | http.Error(w, "Invalid username or password", http.StatusUnauthorized) 75 | return 76 | } 77 | h.handler(w, r) 78 | return 79 | } 80 | 81 | func hasUserAndPassword() bool { 82 | return *authUserFlag != "" && *authPassFlag != "" 83 | } 84 | 85 | func prometheusHandler() http.Handler { 86 | handler := prometheus.Handler() 87 | if hasUserAndPassword() { 88 | handler = &basicAuthHandler{ 89 | handler: prometheus.Handler().ServeHTTP, 90 | user: *authUserFlag, 91 | password: *authPassFlag, 92 | } 93 | } 94 | 95 | return handler 96 | } 97 | 98 | func startWebServer() { 99 | handler := prometheusHandler() 100 | 101 | registerCollector() 102 | 103 | http.Handle(*metricsPathFlag, handler) 104 | http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { 105 | w.Write([]byte(` 106 | MongoDB Exporter 107 | 108 |

MongoDB Exporter

109 |

Metrics

110 | 111 | `)) 112 | }) 113 | 114 | server := &http.Server{ 115 | Addr: *listenAddressFlag, 116 | ErrorLog: createHTTPServerLogWrapper(), 117 | } 118 | 119 | var err error 120 | if len(*webTLSCert) > 0 { 121 | clientValidation := "no" 122 | if len(*webTLSClientCa) > 0 && len(*webTLSCert) > 0 { 123 | certificates, err := shared.LoadCertificatesFrom(*webTLSClientCa) 124 | if err != nil { 125 | glog.Fatalf("Couldn't load client CAs from %s. Got: %s", *webTLSClientCa, err) 126 | } 127 | server.TLSConfig = &tls.Config{ 128 | ClientCAs: certificates, 129 | ClientAuth: tls.RequireAndVerifyClientCert, 130 | } 131 | clientValidation = "yes" 132 | } 133 | targetTLSPrivateKey := *webTLSPrivateKey 134 | if len(targetTLSPrivateKey) <= 0 { 135 | targetTLSPrivateKey = *webTLSCert 136 | } 137 | fmt.Printf("Listening on %s (scheme=HTTPS, secured=TLS, clientValidation=%s)\n", server.Addr, clientValidation) 138 | err = server.ListenAndServeTLS(*webTLSCert, targetTLSPrivateKey) 139 | } else { 140 | fmt.Printf("Listening on %s (scheme=HTTP, secured=no, clientValidation=no)\n", server.Addr) 141 | err = server.ListenAndServe() 142 | } 143 | 144 | if err != nil { 145 | panic(err) 146 | } 147 | } 148 | 149 | func registerCollector() { 150 | mongodbCollector := collector.NewMongodbCollector(collector.MongodbCollectorOpts{ 151 | URI: *mongodbURIFlag, 152 | TLSCertificateFile: *mongodbTLSCert, 153 | TLSPrivateKeyFile: *mongodbTLSPrivateKey, 154 | TLSCaFile: *mongodbTLSCa, 155 | TLSHostnameValidation: !(*mongodbTLSDisableHostnameValidation), 156 | CollectOplog: *mongodbCollectOplog, 157 | TailOplog: *mongodbCollectOplogTail, 158 | CollectReplSet: *mongodbCollectReplSet, 159 | CollectTopMetrics: *mongodbCollectTopMetrics, 160 | CollectDatabaseMetrics: *mongodbCollectDatabaseMetrics, 161 | CollectCollectionMetrics: *mongodbCollectCollectionMetrics, 162 | CollectProfileMetrics: *mongodbCollectProfileMetrics, 163 | CollectConnPoolStats: *mongodbCollectConnPoolStats, 164 | UserName: *mongodbUserName, 165 | AuthMechanism: *mongodbAuthMechanism, 166 | SocketTimeout: *mongodbSocketTimeout, 167 | }) 168 | prometheus.MustRegister(mongodbCollector) 169 | } 170 | 171 | type bufferedLogWriter struct { 172 | buf []byte 173 | } 174 | 175 | func (w *bufferedLogWriter) Write(p []byte) (n int, err error) { 176 | glog.Info(strings.TrimSpace(strings.Replace(string(p), "\n", " ", -1))) 177 | return len(p), nil 178 | } 179 | 180 | func createHTTPServerLogWrapper() *slog.Logger { 181 | return slog.New(&bufferedLogWriter{}, "", 0) 182 | } 183 | 184 | func main() { 185 | flag.Parse() 186 | if *version { 187 | fmt.Println("mongodb_exporter version: {{VERSION}}") 188 | return 189 | } 190 | shared.ParseEnabledGroups(*enabledGroupsFlag) 191 | 192 | startWebServer() 193 | } 194 | -------------------------------------------------------------------------------- /screenshots/mongodb-dashboard-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dcu/mongodb_exporter/8f53431089a13bc46f4f86533451c01b60582478/screenshots/mongodb-dashboard-1.png -------------------------------------------------------------------------------- /shared/connection.go: -------------------------------------------------------------------------------- 1 | package shared 2 | 3 | import ( 4 | "time" 5 | 6 | "crypto/tls" 7 | "crypto/x509" 8 | "fmt" 9 | "net" 10 | 11 | "github.com/golang/glog" 12 | "gopkg.in/mgo.v2" 13 | ) 14 | 15 | const ( 16 | dialMongodbTimeout = 10 * time.Second 17 | syncMongodbTimeout = 1 * time.Minute 18 | ) 19 | 20 | // MongoSessionOpts represents options for a Mongo session 21 | type MongoSessionOpts struct { 22 | URI string 23 | TLSCertificateFile string 24 | TLSPrivateKeyFile string 25 | TLSCaFile string 26 | TLSHostnameValidation bool 27 | UserName string 28 | AuthMechanism string 29 | SocketTimeout time.Duration 30 | } 31 | 32 | // MongoSession creates a Mongo session 33 | func MongoSession(opts MongoSessionOpts) *mgo.Session { 34 | dialInfo, err := mgo.ParseURL(opts.URI) 35 | if err != nil { 36 | glog.Errorf("Cannot connect to server using url %s: %s", opts.URI, err) 37 | return nil 38 | } 39 | 40 | dialInfo.Direct = true // Force direct connection 41 | dialInfo.Timeout = dialMongodbTimeout 42 | if opts.UserName != "" { 43 | dialInfo.Username = opts.UserName 44 | } 45 | 46 | err = opts.configureDialInfoIfRequired(dialInfo) 47 | if err != nil { 48 | glog.Errorf("%s", err) 49 | return nil 50 | } 51 | 52 | session, err := mgo.DialWithInfo(dialInfo) 53 | if err != nil { 54 | glog.Errorf("Cannot connect to server using url %s: %s", opts.URI, err) 55 | return nil 56 | } 57 | session.SetMode(mgo.Eventual, true) 58 | session.SetSyncTimeout(syncMongodbTimeout) 59 | session.SetSocketTimeout(opts.SocketTimeout) 60 | return session 61 | } 62 | 63 | func (opts MongoSessionOpts) configureDialInfoIfRequired(dialInfo *mgo.DialInfo) error { 64 | if opts.AuthMechanism != "" { 65 | dialInfo.Mechanism = opts.AuthMechanism 66 | } 67 | if len(opts.TLSCertificateFile) > 0 { 68 | certificates, err := LoadKeyPairFrom(opts.TLSCertificateFile, opts.TLSPrivateKeyFile) 69 | if err != nil { 70 | return fmt.Errorf("Cannot load key pair from '%s' and '%s' to connect to server '%s'. Got: %v", opts.TLSCertificateFile, opts.TLSPrivateKeyFile, opts.URI, err) 71 | } 72 | config := &tls.Config{ 73 | Certificates: []tls.Certificate{certificates}, 74 | InsecureSkipVerify: !opts.TLSHostnameValidation, 75 | } 76 | if len(opts.TLSCaFile) > 0 { 77 | ca, err := LoadCertificatesFrom(opts.TLSCaFile) 78 | if err != nil { 79 | return fmt.Errorf("Couldn't load client CAs from %s. Got: %s", opts.TLSCaFile, err) 80 | } 81 | config.RootCAs = ca 82 | } 83 | dialInfo.DialServer = func(addr *mgo.ServerAddr) (net.Conn, error) { 84 | conn, err := tls.Dial("tcp", addr.String(), config) 85 | if err != nil { 86 | glog.Infof("Could not connect to %v. Got: %v", addr, err) 87 | return nil, err 88 | } 89 | if config.InsecureSkipVerify { 90 | err = enrichWithOwnChecks(conn, config) 91 | if err != nil { 92 | glog.Infof("Could not disable hostname validation. Got: %v", err) 93 | } 94 | } 95 | return conn, err 96 | } 97 | } 98 | return nil 99 | } 100 | 101 | func enrichWithOwnChecks(conn *tls.Conn, tlsConfig *tls.Config) error { 102 | var err error 103 | if err = conn.Handshake(); err != nil { 104 | conn.Close() 105 | return err 106 | } 107 | 108 | opts := x509.VerifyOptions{ 109 | Roots: tlsConfig.RootCAs, 110 | CurrentTime: time.Now(), 111 | DNSName: "", 112 | Intermediates: x509.NewCertPool(), 113 | } 114 | 115 | certs := conn.ConnectionState().PeerCertificates 116 | for i, cert := range certs { 117 | if i == 0 { 118 | continue 119 | } 120 | opts.Intermediates.AddCert(cert) 121 | } 122 | 123 | _, err = certs[0].Verify(opts) 124 | if err != nil { 125 | conn.Close() 126 | return err 127 | } 128 | 129 | return nil 130 | } 131 | -------------------------------------------------------------------------------- /shared/group_desc.go: -------------------------------------------------------------------------------- 1 | package shared 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | var ( 8 | // EnabledGroups is map with the group name as field and a boolean indicating wether that group is enabled or not. 9 | EnabledGroups = make(map[string]bool) 10 | ) 11 | 12 | // ParseEnabledGroups parses the groups passed by the command line input. 13 | func ParseEnabledGroups(enabledGroupsFlag string) { 14 | for _, name := range strings.Split(enabledGroupsFlag, ",") { 15 | name = strings.TrimSpace(name) 16 | EnabledGroups[name] = true 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /shared/group_desc_test.go: -------------------------------------------------------------------------------- 1 | package shared 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func Test_ParseEnabledGroups(t *testing.T) { 8 | ParseEnabledGroups("a, b, c") 9 | if !EnabledGroups["a"] { 10 | t.Error("a was not loaded.") 11 | } 12 | if !EnabledGroups["b"] { 13 | t.Error("b was not loaded.") 14 | } 15 | if !EnabledGroups["c"] { 16 | t.Error("c was not loaded.") 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /shared/utils.go: -------------------------------------------------------------------------------- 1 | package shared 2 | 3 | import ( 4 | "crypto/tls" 5 | "crypto/x509" 6 | "io/ioutil" 7 | "regexp" 8 | "strings" 9 | ) 10 | 11 | var ( 12 | snakeRegexp = regexp.MustCompile("\\B[A-Z]+[^_$]") 13 | parameterizeRegexp = regexp.MustCompile("[^A-Za-z0-9_]+") 14 | ) 15 | 16 | // SnakeCase converts the given text to snakecase/underscore syntax. 17 | func SnakeCase(text string) string { 18 | result := snakeRegexp.ReplaceAllStringFunc(text, func(match string) string { 19 | return "_" + match 20 | }) 21 | 22 | return ParameterizeString(result) 23 | } 24 | 25 | // ParameterizeString parameterizes the given string. 26 | func ParameterizeString(text string) string { 27 | result := parameterizeRegexp.ReplaceAllString(text, "_") 28 | return strings.ToLower(result) 29 | } 30 | 31 | // LoadCertificatesFrom returns certificates for a given pem file 32 | func LoadCertificatesFrom(pemFile string) (*x509.CertPool, error) { 33 | caCert, err := ioutil.ReadFile(pemFile) 34 | if err != nil { 35 | return nil, err 36 | } 37 | certificates := x509.NewCertPool() 38 | certificates.AppendCertsFromPEM(caCert) 39 | return certificates, nil 40 | } 41 | 42 | // LoadKeyPairFrom returns a configured TLS certificate 43 | func LoadKeyPairFrom(pemFile string, privateKeyPemFile string) (tls.Certificate, error) { 44 | targetPrivateKeyPemFile := privateKeyPemFile 45 | if len(targetPrivateKeyPemFile) <= 0 { 46 | targetPrivateKeyPemFile = pemFile 47 | } 48 | return tls.LoadX509KeyPair(pemFile, targetPrivateKeyPemFile) 49 | } 50 | -------------------------------------------------------------------------------- /shared/utils_test.go: -------------------------------------------------------------------------------- 1 | package shared 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func Test_SnakeCase(t *testing.T) { 8 | cases := []struct { 9 | in string 10 | out string 11 | }{ 12 | {in: "testing-string", out: "testing_string"}, 13 | {in: "TestingString", out: "testing_string"}, 14 | {in: "Testing_String", out: "testing__string"}, 15 | {in: "", out: ""}, 16 | } 17 | 18 | for _, test := range cases { 19 | if out := SnakeCase(test.in); out != test.out { 20 | t.Errorf("expected %s but got %s", test.out, out) 21 | } 22 | } 23 | } 24 | 25 | func Test_ParameterizeString(t *testing.T) { 26 | cases := []struct { 27 | in string 28 | out string 29 | }{ 30 | {in: "testing-string", out: "testing_string"}, 31 | {in: "TestingString", out: "testingstring"}, 32 | {in: "Testing-String", out: "testing_string"}, 33 | {in: "", out: ""}, 34 | } 35 | 36 | for _, test := range cases { 37 | if out := ParameterizeString(test.in); out != test.out { 38 | t.Errorf("expected %s but got %s", test.out, out) 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /snap/daemon_arguments: -------------------------------------------------------------------------------- 1 | # Set the command-line arguments to pass to the server. 2 | ARGS="" 3 | 4 | # The following options are supported by prometheus-mongodb-exporter : 5 | # -alsologtostderr 6 | # log to standard error as well as files 7 | # -auth.pass string 8 | # Password for basic auth. 9 | # -auth.user string 10 | # Username for basic auth. 11 | # -groups.enabled string 12 | # Comma-separated list of groups to use, for more info see: docs.mongodb.org/manual/reference/command/serverStatus/ (default "asserts,durability,background_flushing,connections,extra_info,global_lock,index_counters,network,op_counters,op_counters_repl,memory,locks,metrics") 13 | # -log_backtrace_at value 14 | # when logging hits line file:N, emit a stack trace 15 | # -log_dir string 16 | # If non-empty, write log files in this directory 17 | # -logtostderr 18 | # log to standard error instead of files 19 | # -mongodb.collect.database 20 | # collect MongoDB database metrics 21 | # -mongodb.collect.oplog 22 | # collect Mongodb Oplog status (default true) 23 | # -mongodb.collect.replset 24 | # collect Mongodb replica set status (default true) 25 | # -mongodb.tls-ca string 26 | # Path to PEM file that conains the CAs that are trused for server connections. 27 | # If provided: MongoDB servers connecting to should present a certificate signed by one of this CAs. 28 | # If not provided: System default CAs are used. 29 | # -mongodb.tls-cert string 30 | # Path to PEM file that conains the certificate (and optionally also the private key in PEM format). 31 | # This should include the whole certificate chain. 32 | # If provided: The connection will be opened via TLS to the MongoDB server. 33 | # -mongodb.tls-disable-hostname-validation 34 | # Do hostname validation for server connection. 35 | # -mongodb.tls-private-key string 36 | # Path to PEM file that conains the private key (if not contained in mongodb.tls-cert file). 37 | # -mongodb.uri string 38 | # Mongodb URI, format: [mongodb://][user:pass@]host1[:port1][,host2[:port2],...][/database][?options] (default "mongodb://localhost:27017") 39 | # -stderrthreshold value 40 | # logs at or above this threshold go to stderr 41 | # -v value 42 | # log level for V logs 43 | # -version 44 | # Print mongodb_exporter version 45 | # -vmodule value 46 | # comma-separated list of pattern=N settings for file-filtered logging 47 | # -web.listen-address string 48 | # Address on which to expose metrics and web interface. (default ":9001") 49 | # -web.metrics-path string 50 | # Path under which to expose metrics. (default "/metrics") 51 | # -web.tls-cert string 52 | # Path to PEM file that conains the certificate (and optionally also the private key in PEM format). 53 | # This should include the whole certificate chain. 54 | # If provided: The web socket will be a HTTPS socket. 55 | # If not provided: Only HTTP. 56 | # -web.tls-client-ca string 57 | # Path to PEM file that conains the CAs that are trused for client connections. 58 | # If provided: Connecting clients should present a certificate signed by one of this CAs. 59 | # If not provided: Every client will be accepted. 60 | # -web.tls-private-key string 61 | # Path to PEM file that conains the private key (if not contained in web.tls-cert file). 62 | -------------------------------------------------------------------------------- /snap/snap_config_wrapper: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | test -e $SNAP_DATA/daemon_arguments || cp $SNAP/etc/prometheus-mongodb-exporter/daemon_arguments.example $SNAP_DATA/daemon_arguments 4 | 5 | . $SNAP_DATA/daemon_arguments 6 | exec $SNAP/bin/prometheus-mongodb-exporter $ARGS 7 | -------------------------------------------------------------------------------- /snap/snapcraft.yaml: -------------------------------------------------------------------------------- 1 | name: prometheus-mongodb-exporter 2 | version: 20171129 3 | summary: Prometheus mongodb Exporter 4 | description: | 5 | Exporter that exposes information gathered from mongodb for use by the Prometheus monitoring system 6 | confinement: strict 7 | grade: stable 8 | apps: 9 | mongodb-exporter: 10 | command: 'bin/prometheus-mongodb-exporter.wrapper' 11 | plugs: [network-bind, network] 12 | daemon: simple 13 | parts: 14 | mongodb-exporter: 15 | plugin: go 16 | source: https://github.com/dcu/mongodb_exporter.git 17 | go-importpath: github.com/dcu/mongodb_exporter 18 | build: | 19 | sudo add-apt-repository -y ppa:masterminds/glide && sudo apt-get update 20 | sudo apt-get install -y glide 21 | export GOPATH=$(pwd)/../go 22 | cd $GOPATH/src/github.com/dcu/mongodb_exporter 23 | make build 24 | install: | 25 | mkdir $SNAPCRAFT_PART_INSTALL/bin 26 | cp -p ../go/src/github.com/dcu/mongodb_exporter/mongodb_exporter $SNAPCRAFT_PART_INSTALL/bin/prometheus-mongodb-exporter 27 | snap-wrappers: 28 | plugin: dump 29 | source: . 30 | organize: 31 | snap_config_wrapper: bin/prometheus-mongodb-exporter.wrapper 32 | daemon_arguments: etc/prometheus-mongodb-exporter/daemon_arguments.example 33 | stage: 34 | - bin/prometheus-mongodb-exporter.wrapper 35 | - etc/prometheus-mongodb-exporter/daemon_arguments.example 36 | prime: 37 | - bin/prometheus-mongodb-exporter.wrapper 38 | - etc/prometheus-mongodb-exporter/daemon_arguments.example 39 | --------------------------------------------------------------------------------