├── resources └── riemann_plugin │ ├── collectd │ ├── .#meta.edn{:plugin │ └── meta.edn │ ├── str │ └── meta.edn │ ├── thresholds │ └── meta.edn │ └── python │ └── meta.edn ├── .gitignore ├── project.clj ├── src └── org │ └── spootnik │ └── riemann │ ├── webhook.clj │ ├── python.clj │ ├── thresholds.clj │ └── collectd.clj ├── README.md └── test └── org └── spootnik └── riemann └── thresholds_test.clj /resources/riemann_plugin/collectd/.#meta.edn{:plugin: -------------------------------------------------------------------------------- 1 | pyr@columba.spootnik.org.427 -------------------------------------------------------------------------------- /resources/riemann_plugin/str/meta.edn: -------------------------------------------------------------------------------- 1 | {:plugin "str" 2 | :title "Exposes string manipulation functions" 3 | :git-repo "https://github.com/pyr/riemann-extra" 4 | :require clojure.string} 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /lib 3 | /classes 4 | /checkouts 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | .lein-deps-sum 10 | .lein-failures 11 | .lein-plugins 12 | .lein-repl-history 13 | -------------------------------------------------------------------------------- /resources/riemann_plugin/thresholds/meta.edn: -------------------------------------------------------------------------------- 1 | {:plugin "thresholds" 2 | :title "A plugin handling thresholds on metrics" 3 | :git-repo "https://github.com/pyr/riemann-extra" 4 | :require org.spootnik.riemann.thresholds} 5 | -------------------------------------------------------------------------------- /resources/riemann_plugin/collectd/meta.edn: -------------------------------------------------------------------------------- 1 | {:plugin "collectd" 2 | :title "A plugin for massaging metrics coming from collectd" 3 | :git-repo "https://github.com/pyr/riemann-extra" 4 | :require org.spootnik.riemann.collectd} 5 | -------------------------------------------------------------------------------- /resources/riemann_plugin/python/meta.edn: -------------------------------------------------------------------------------- 1 | {:plugin "python" 2 | :title "A plugin for handling points coming from python functions" 3 | :git-repo "https://github.com/pyr/riemann-extra" 4 | :require org.spootnik.riemann.python} 5 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject org.spootnik/riemann-extra "0.2.4" 2 | :description "utility functions for riemann" 3 | :url "https://github.com/pyr/riemann-extra" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :profiles {:dev {:dependencies [[riemann "0.2.10"]]}} 7 | :dependencies [[org.clojure/clojure "1.6.0"]] 8 | :omit-source false) 9 | -------------------------------------------------------------------------------- /src/org/spootnik/riemann/webhook.clj: -------------------------------------------------------------------------------- 1 | (ns org.spootnik.riemann.webhook 2 | (:require [clj-http.client :as client]) 3 | (:require [cheshire.core :as json])) 4 | 5 | (defn post 6 | "POST to a webhook URL." 7 | [request url] 8 | (client/post url 9 | {:body (json/generate-string request) 10 | :socket-timeout 5000 11 | :conn-timeout 5000 12 | :content-type :json 13 | :accept :json 14 | :throw-entire-message? true})) 15 | 16 | (defn format-event 17 | "Formats an event for PD. event-type is one of :trigger, :acknowledge, 18 | :resolve" 19 | [event] 20 | {:description (str (:host event) " " 21 | (:service event) " is " 22 | (:state event) " (" 23 | (:metric event) ")") 24 | :details event}) 25 | 26 | (defn webhook 27 | [url] 28 | (fn [event] 29 | (post (format-event event) url))) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | riemann-extra: utility functions for riemann 2 | ============================================ 3 | 4 | riemann-extra provides you with additional stream 5 | and event handling functions for riemann. 6 | 7 | ## Building 8 | 9 | Run `lein uberjar` in the project's directory 10 | 11 | ## Configuration 12 | 13 | Starting with riemann 0.2.2, you can just provide a link to this jar in 14 | `/etc/default/riemann`, like so: 15 | 16 | ``` 17 | EXTRA_CLASSPATH=/path/to/riemann-extra.jar 18 | ``` 19 | 20 | ## Available Namespaces 21 | 22 | ### org.spootnik.riemann.thresholds 23 | 24 | * `theshold-check` 25 | 26 | A function which given a map of thresholds yields a function to associate the 27 | a new state to each events: 28 | 29 | ```clojure 30 | (def thresholds 31 | {"cpu-user" {:warning 30 :critical 60} 32 | "cpu-system" {:warning 30 :critical 60} 33 | "cpu-idle2" {:warning 50 :critical 50 :invert true} 34 | "cpu-nice" {:warning 50 :critical 20} 35 | "cpu-idle" {:warning 50 :critical 20 :invert true} 36 | "cpu-idle3" {:warning 50 :critical 20 :invert true} 37 | "cpu-steal" {:warning 50 :critical 20}}) 38 | 39 | (require '[org.spootnik.riemann.thresholds :refer [threshold-check]]) 40 | 41 | (streams 42 | (smap (threshold-check thresholds) 43 | prn)) 44 | ``` 45 | -------------------------------------------------------------------------------- /src/org/spootnik/riemann/python.clj: -------------------------------------------------------------------------------- 1 | (ns org.spootnik.riemann.python 2 | "Helper functions to work with input from python-riemann-wrapper" 3 | (:require [riemann.streams :refer [with where* rate by adjust smap]] 4 | [clojure.string :as s])) 5 | 6 | (defn python-stream 7 | "Given an input stream, catch events generated by a python 8 | riemann_wrapper" 9 | [secs & children] 10 | (where* (comp #{"python"} :tags) 11 | (by [:host] 12 | (where* (comp #{"duration"} :tags) 13 | (smap (fn [ev#] 14 | (assoc ev# 15 | :service (str (:prefix ev#) " calls/s"))) 16 | (with :metric 1 (rate ~secs ~@children)))) 17 | (where* (comp #{"exception"} :tags) 18 | (smap (fn [ev#] 19 | (assoc ev# 20 | :service (str (:prefix ev#) " exceptions/s"))) 21 | (with :metric 1 (rate ~secs ~@children))))) 22 | (by [:host :service] 23 | (where* (comp #{"duration"} :tags) 24 | (adjust [:service str " persec"] 25 | (with :metric 1 (rate ~secs ~@children)))) 26 | (where* (comp #{"exception"} :tags) 27 | (adjust [:service str " persec"] 28 | (with :metric 1 (rate ~secs ~@children))))))) -------------------------------------------------------------------------------- /test/org/spootnik/riemann/thresholds_test.clj: -------------------------------------------------------------------------------- 1 | (ns org.spootnik.riemann.thresholds-test 2 | (:require [clojure.test :refer :all] 3 | [org.spootnik.riemann.thresholds :refer :all])) 4 | 5 | (def thresholds 6 | {"cpu-user" {:warning 30 :critical 60} 7 | "cpu-system" {:warning 30 :critical 60} 8 | "cpu-idle2" {:warning 50 :critical 50 :invert true} 9 | "cpu-nice" {:warning 50 :critical 20} 10 | "cpu-idle" {:warning 50 :critical 20 :invert true} 11 | "cpu-idle3" {:warning 50 :critical 20 :invert true} 12 | "cpu-steal" {:warning 50 :critical 20} 13 | "cpu-foo" {}}) 14 | 15 | (deftest forward-threshold-test 16 | (testing "forward thresholds should match" 17 | (let [testfn (threshold-check thresholds)] 18 | (is (= {:service "cpu-user" :metric 100 :state "critical" :tags #{}} 19 | (testfn {:service "cpu-user" :metric 100}))) 20 | (is (= {:service "cpu-user" :metric 50 :state "warning" :tags #{}} 21 | (testfn {:service "cpu-user" :metric 50}))) 22 | (is (= {:service "cpu-idle" :metric 0 :state "critical" :tags #{}} 23 | (testfn {:service "cpu-idle" :metric 0}))) 24 | (is (= {:service "cpu-idle" :metric 40 :state "warning" :tags #{}} 25 | (testfn {:service "cpu-idle" :metric 40}))) 26 | (is (= {:service "foo"} 27 | (testfn {:service "foo"}))) 28 | (is (= {:service "foo" :metric 2} 29 | (testfn {:service "foo" :metric 2}))) 30 | (is (= {:service "cpu-idle"} 31 | (testfn {:service "cpu-idle"}))) 32 | (is (= {:service "cpu-foo"} 33 | (testfn {:service "cpu-foo"})))))) 34 | -------------------------------------------------------------------------------- /src/org/spootnik/riemann/thresholds.clj: -------------------------------------------------------------------------------- 1 | (ns org.spootnik.riemann.thresholds 2 | "A common riemann use case: changing event states based 3 | on threshold lookups" 4 | (:require [clojure.set :refer [union]] 5 | [clojure.tools.logging :refer [warn error]])) 6 | 7 | (defn find-specific-threshold 8 | [{:keys [host tags]} 9 | {:keys [match-host match-tag match-default] :as threshold}] 10 | (cond 11 | match-tag (and ((set tags) match-tag) threshold) 12 | match-host (and (= match-host host) threshold) 13 | match-default threshold)) 14 | 15 | (defn match-threshold 16 | [{:keys [service]} [pattern payload]] 17 | (when (re-matches pattern service) 18 | payload)) 19 | 20 | (defn find-threshold 21 | [thresholds re-patterns event] 22 | (if-let [thresholds (or (get thresholds (:service event)) 23 | (some (partial match-threshold event) re-patterns))] 24 | (if (sequential? thresholds) 25 | (some (partial find-specific-threshold event) thresholds) 26 | thresholds))) 27 | 28 | (defn threshold-check 29 | "Given a list of standard or inverted thresholds, yield 30 | a function that will adapt an inputs state. 31 | 32 | The output function does not process events with no metrics" 33 | [thresholds] 34 | (let [re-patterns (filter (complement (comp string? key)) thresholds)] 35 | (fn [{:keys [metric tags] :as event}] 36 | (try 37 | (if-let [{:keys [warning critical invert exact add-tags]} 38 | (if metric (find-threshold thresholds re-patterns event))] 39 | (assoc event 40 | :tags (union (set tags) (set add-tags)) 41 | :state 42 | (cond 43 | (nil? metric) "unknown" 44 | (and exact (not= (double metric) (double exact))) "critical" 45 | (and exact (= (double metric) (double exact))) "ok" 46 | (and critical ((if invert <= >) metric critical)) "critical" 47 | (and warning ((if invert <= >) metric warning)) "warning" 48 | :else "ok")) 49 | event) 50 | (catch Exception e 51 | (error e "threshold-check failed for " event)))))) 52 | -------------------------------------------------------------------------------- /src/org/spootnik/riemann/collectd.clj: -------------------------------------------------------------------------------- 1 | (ns org.spootnik.riemann.collectd 2 | "Helper functions to work with input from collectd" 3 | (:require [riemann.streams :refer [tagged sdo project* where* by smap]] 4 | [clojure.tools.logging :refer [error info]] 5 | [clojure.string :as s])) 6 | 7 | (def default-services 8 | [{:service "conntrack/conntrack" :rewrite "conntrack"} 9 | {:service "load/load/shortterm" :rewrite "load-shorterm"} 10 | {:service "load/load/midterm" :rewrite "load-midterm"} 11 | {:service "load/load/longterm" :rewrite "load-longterm"} 12 | 13 | {:service "swap/swap-used" :rewrite "swap used"} 14 | {:service "swap/swap-free" :rewrite "swap free"} 15 | {:service "swap/swap-cached" :rewrite "swap cached"} 16 | {:service "swap/swap_io-in" :rewrite "swap io in"} 17 | {:service "swap/swap_io-out" :rewrite "swap io out"} 18 | 19 | {:service "memory/memory-used" :rewrite "mem used"} 20 | {:service "memory/memory-free" :rewrite "mem free"} 21 | {:service "memory/memory-buffered" :rewrite "mem buffered"} 22 | {:service "memory/memory-cached" :rewrite "mem cached"} 23 | 24 | {:service #"^cpu-([0-9]+)/cpu-(.*)$" :rewrite "cpu-$1 $2"} 25 | {:service #"^aggregation-cpu-average/cpu-(.*)$" :rewrite "cpu $1"} 26 | {:service #"^df-(.*)/df_complex-(.*)$" :rewrite "df $1 $2"} 27 | {:service #"^GenericJMX-(.*)\.(.*)/(.*)$" :rewrite "$1-jmx-$2-$3"} 28 | {:service #"^interface-(.*)/if_(errors|packets|octets)/(tx|rx)$" 29 | :rewrite "nic $1 $3 $2"}]) 30 | 31 | (defn rewrite-service-with 32 | [rules] 33 | (let [matcher (fn [s1 s2] (if (string? s1) (= s1 s2) (re-find s1 s2)))] 34 | (fn [{:keys [service] :as event}] 35 | (or 36 | (first 37 | (for [{:keys [rewrite] :as rule} rules 38 | :when (matcher (:service rule) service)] 39 | (assoc event :service 40 | (if (string? (:service rule)) 41 | rewrite 42 | (s/replace service (:service rule) rewrite))))) 43 | event)))) 44 | 45 | (defmacro df-stream 46 | [& children] 47 | `(where* (fn [{:keys [plugin# state#]}] 48 | (and (= "df" plugin#) 49 | (not= "expired" state#))) 50 | (by [:host :plugin_instance] 51 | (project* [(comp (partial = "used") :type_instance) 52 | (comp (partial = "free") :type_instance)] 53 | (smap 54 | (fn [[used# free#]] 55 | (when (and used# free#) 56 | (try 57 | (assoc used# 58 | :service (format "df %s pct" 59 | (:plugin_instance used#)) 60 | :metric (-> (:metric used#) 61 | (/ (+ (:metric used#) 62 | (:metric free#))) 63 | (* 100))) 64 | (catch Exception e# 65 | (error e# "cannot compute df pct for " used# free#) 66 | nil)))) 67 | ~@children))))) 68 | 69 | (defmacro mem-stream 70 | [& children] 71 | `(where* (fn [{:keys [plugin# state#]}] 72 | (and (= "memory" plugin#) 73 | (not= "expired" state#))) 74 | (by [:host] 75 | (project* [(comp (partial = "used") :type_instance) 76 | (comp (partial = "cached") :type_instance) 77 | (comp (partial = "buffered") :type_instance) 78 | (comp (partial = "free") :type_instance)] 79 | (smap 80 | (fn [[used# cached# buf# free#]] 81 | (when (and used# cached# buf# free#) 82 | (try 83 | (assoc used# 84 | :service "mem pct" 85 | :metric (-> (:metric used#) 86 | (/ (+ (:metric used#) 87 | (:metric cached#) 88 | (:metric buf#) 89 | (:metric free#))) 90 | (* 100))) 91 | (catch Exception e# 92 | (error e# "cannot compute mem pct for " used# cached# buf# free#) 93 | nil)))) 94 | ~@children))))) 95 | 96 | (defmacro swap-stream 97 | [& children] 98 | `(where* (fn [{:keys [plugin# state#]}] 99 | (and (= "swap" plugin#) 100 | (not= "expired" state#))) 101 | (by [:host] 102 | (project* [(comp (partial = "used") :type_instance) 103 | (comp (partial = "cached") :type_instance) 104 | (comp (partial = "free") :type_instance)] 105 | (smap 106 | (fn [[used# cached# free#]] 107 | (when (and used# cached# free#) 108 | (try 109 | (assoc used# 110 | :service "swap pct" 111 | :metric (-> (:metric used#) 112 | (/ (+ (:metric used#) 113 | (:metric cached#) 114 | (:metric free#))) 115 | (* 100))) 116 | (catch Exception e# 117 | (error e# "cannot compute swap pct for " used# cached# free#) 118 | nil)))) 119 | ~@children))))) 120 | 121 | (defmacro cpu-stream 122 | [& children] 123 | `(where* (fn [{:keys [plugin_instance# state#]}] 124 | (and (= "cpu-average" plugin_instance#) 125 | (not= "expired" state#))) 126 | (by [:host] 127 | (project* [(comp (partial = "user") :type_instance) 128 | (comp (partial = "system") :type_intance) 129 | (comp (partial = "softirq") :type_instance) 130 | (comp (partial = "interrupt") :type_instance) 131 | (comp (partial = "steal") :type_instance) 132 | (comp (partial = "wait") :type_instance) 133 | (comp (partial = "nice") :type_instance)] 134 | (smap (fn [events#] 135 | (when-let [summed# (folds/sum events#)] 136 | (assoc summed# "cpu all"))) 137 | ~@children))))) 138 | 139 | (defmacro jmx-memory-stream 140 | [& children] 141 | `(where* (fn [event#] 142 | (and (not= (:state event#) "expired") 143 | (re-find #"^GenericJMX-(.*)\.memory" (:service event#)))) 144 | (smap (fn [{:keys [service#] :as event#}] 145 | (assoc event# :service 146 | (s/replace service# #"GenericJMX-(.*)\.memory.*$" "$1"))) 147 | (by [:host :plugin_instance] 148 | (project* [(comp (partial = "nonheapused") 149 | :type_instance) 150 | (comp (partial = "nonheapmax") 151 | :type_instance)] 152 | (smap 153 | (fn [[used# max#]] 154 | (when (and used# max#) 155 | (try 156 | (assoc used# 157 | :service (str (:service used#) " nonheap mem pct") 158 | :metric (-> (:metric used#) 159 | (/ (:metric max#)) 160 | (* 100))) 161 | (catch Exception e# 162 | (error e# "cannot compute nonheap mem pct for " used# max#) 163 | nil)))) 164 | ~@children)) 165 | (project* [(comp (partial = "heapused") :type_instance) 166 | (comp (partial = "heapmax") :type_instance)] 167 | (smap 168 | (fn [[used# max#]] 169 | (when (and used# max#) 170 | (try 171 | (assoc used# 172 | :service (str (:service used#) " heap mem pct") 173 | :metric (-> (:metric used#) 174 | (/ (:metric max#)) 175 | (* 100))) 176 | (catch Exception e# 177 | (error e# "cannot compute heap mem pct for " used# max#) 178 | nil)))) 179 | ~@children)))))) 180 | 181 | (def rewrite-service 182 | (rewrite-service-with default-services)) 183 | --------------------------------------------------------------------------------