├── .lein-classpath ├── doc ├── samplerr.gif ├── samplerr.png ├── samplerr.log ├── samplerr.html └── samplerr.svg ├── .gitignore ├── resources └── riemann_plugin │ └── samplerr │ └── meta.edn ├── tasks └── leiningen │ ├── pkg.clj │ ├── tar.clj │ └── fatrpm.clj ├── project.clj ├── examples └── riemann.config ├── CHANGELOG.md ├── TODO.md ├── README.md ├── LICENSE └── src └── riemann └── plugin └── samplerr.clj /.lein-classpath: -------------------------------------------------------------------------------- 1 | :tasks 2 | -------------------------------------------------------------------------------- /doc/samplerr.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccin2p3/samplerr/HEAD/doc/samplerr.gif -------------------------------------------------------------------------------- /doc/samplerr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccin2p3/samplerr/HEAD/doc/samplerr.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-repl-history 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | -------------------------------------------------------------------------------- /doc/samplerr.log: -------------------------------------------------------------------------------- 1 | recordmydesktop --windowid=0x2000001 -y94 --width=580 --height=426 --no-cursor --no-sound --overwrite -o samplerr.ogv 2 | ffmpeg -i samplerr.ogv -loop 0 samplerr.gif 3 | -------------------------------------------------------------------------------- /resources/riemann_plugin/samplerr/meta.edn: -------------------------------------------------------------------------------- 1 | {:plugin "samplerr" 2 | :title "A plugin for aggregating metrics in a round-robin fashion into elasticsearch" 3 | :git-repo "https://github.com/samplerr/samplerr" 4 | :require riemann.plugin.samplerr} 5 | -------------------------------------------------------------------------------- /tasks/leiningen/pkg.clj: -------------------------------------------------------------------------------- 1 | (ns leiningen.pkg 2 | (:use [leiningen.uberjar :only [uberjar]] 3 | [leiningen.fatrpm :only [fatrpm]] 4 | [leiningen.tar :only [tar]])) 5 | 6 | (defn pkg [project] 7 | (doto project 8 | (uberjar) 9 | (tar false) 10 | (fatrpm false))) 11 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject samplerr "0.6.8-SNAPSHOT" 2 | :description "riemann plugin to aggregate data in a round-robin fashion to elasticsearch" 3 | :url "http://github.com/samplerr/samplerr" 4 | :license {:name "EPL-1.0" 5 | :url "https://spdx.org/licenses/EPL-1.0.html"} 6 | :dependencies [[cc.qbits/spandex "0.8.2"]] 7 | :profiles {:provided 8 | {:dependencies 9 | [[cheshire "5.9.0"] 10 | [org.clojure/clojure "1.9.0"] 11 | [riemann "0.3.10"] 12 | [clj-time "0.14.2"] 13 | [org.clojure/tools.logging "1.2.1"]]}} 14 | :plugins [[lein-rpm "0.0.6" 15 | :exclusions [org.apache.maven/maven-plugin-api 16 | org.codehaus.plexus/plexus-container-default 17 | org.codehaus.plexus/plexus-utils 18 | org.clojure/clojure 19 | classworlds]] 20 | ; for lein-rpm 21 | [org.apache.maven/maven-plugin-api "2.0"] 22 | [org.codehaus.plexus/plexus-container-default 23 | "2.0.0"] 24 | [org.codehaus.plexus/plexus-utils "3.2.0"] 25 | [classworlds "1.1"] 26 | [test2junit "1.3.3"]] 27 | ) 28 | 29 | -------------------------------------------------------------------------------- /examples/riemann.config: -------------------------------------------------------------------------------- 1 | ; -*- mode: clojure; -*- 2 | ; vim: filetype=clojure 3 | 4 | (load-plugins) 5 | (repl-server {:host "127.0.0.1" :port 1234}) 6 | 7 | (let [host "0.0.0.0"] 8 | (tcp-server {:host host :port 8086}) 9 | (udp-server {:host host :port 8086}) 10 | (ws-server {:host host :port 8087}) 11 | ) 12 | 13 | (periodically-expire 120) 14 | 15 | (require '[riemann.plugin.samplerr :as samplerr]) 16 | (require '[clj-time.core :as t]) 17 | 18 | (let [elastic (samplerr/connect "http://localhost:9200") 19 | index-prefix ".samplerr-" 20 | alias-prefix "samplerr-" 21 | cfunc [{:func samplerr/average :name "avg"} 22 | {:func samplerr/minimum :name "min"} 23 | {:func samplerr/maximum :name "max"}] 24 | archives [{:tf "YYYY.MM.dd" :step (t/seconds 20) :ttl (t/days 2) :cfunc cfunc} 25 | {:tf "YYYY.MM" :step (t/minutes 10) :ttl (t/months 2) :cfunc cfunc} 26 | {:tf "YYYY" :step (t/hours 1) :ttl (t/years 10) :cfunc cfunc}] 27 | rotate (samplerr/periodically-rotate {:interval (t/days 1) :conn elastic :index-prefix index-prefix :alias-prefix alias-prefix :archives archives}) 28 | persist (batch 1000 10 (samplerr/persist {:index-prefix index-prefix :index-type "samplerr" :conn elastic}))] 29 | 30 | (streams 31 | (where (tagged "collectd") 32 | (by [:host :service] 33 | (samplerr/down archives persist)))) 34 | rotate) 35 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [Unreleased][unreleased] 5 | ### Changed 6 | - bar 7 | 8 | ### Fixed 9 | - foo 10 | 11 | ## 0.6.4 - 2018-09-13 12 | - fix bug that made rotation still throw on ES6.x 13 | - hopefully last embarassing attempt at fixing this bug 14 | 15 | ## 0.6.3 - 2018-09-12 16 | - fix bug that made rotation still throw on ES6.x 17 | 18 | ## 0.6.2 - 2018-09-11 19 | - fix bug that made rotation/purge throw on ES6.x 20 | 21 | ## 0.6.1 - 2018-08-23 22 | - bump spandex dep to 0.6.4 23 | - fix some typos in README (thanks boernd and brutasse) 24 | - improve error handling 25 | 26 | ## 0.4.1 - 2017-07-11 27 | - move from clojurewerkz/elastisch to qbits/spandex 28 | 29 | ## 0.3.6 - 2017-06-06 30 | - adds elasticsearch 5.x compatibility 31 | - bump dep versions to match riemann 0.2.13 32 | 33 | ## 0.3.5 - 2017-01-13 34 | 35 | ### Fixed 36 | - low frequency (lower than step) events were lost 37 | 38 | ## 0.3.3 - 2016-09-05 39 | 40 | ### Fixed 41 | - events were lost e.g. not part of aggregation 42 | this is now fixed in average/minimum/maximum/counter 43 | 44 | ## 0.3.2 - 2016-03-31 45 | 46 | ### Fixed 47 | - purge is now fixed (was noop) 48 | 49 | ## 0.3.1 - 2016-03-22 50 | 51 | ### Changed 52 | - remove at-at dependency 53 | - change rotate/purge semantics 54 | 55 | ## 0.2.1 - 2016-03-17 56 | 57 | ### Changed 58 | - change semantics of higher-level functions 59 | - implementes alias rotation 60 | 61 | ## [0.1.1] - 2016-02-05 62 | - initial release 63 | 64 | -------------------------------------------------------------------------------- /tasks/leiningen/tar.clj: -------------------------------------------------------------------------------- 1 | (ns leiningen.tar 2 | (:use [clojure.java.shell :only [sh with-sh-dir]] 3 | [clojure.java.io :only [file delete-file writer copy]] 4 | [clojure.string :only [join capitalize trim-newline split trim]] 5 | [leiningen.uberjar :only [uberjar]])) 6 | 7 | (defn delete-file-recursively 8 | "Delete file f. If it's a directory, recursively delete all its contents. 9 | Raise an exception if any deletion fails unless silently is true." 10 | [f & [silently]] 11 | (System/gc) ; This sometimes helps release files for deletion on windows. 12 | (let [f (file f)] 13 | (if (.isDirectory f) 14 | (doseq [child (.listFiles f)] 15 | (delete-file-recursively child silently))) 16 | (delete-file f silently))) 17 | 18 | (defn tar-dir 19 | "Tar package working directory." 20 | [project] 21 | (file (:root project) "target" "tar" (str (:name project) "-" 22 | (:version project)))) 23 | 24 | (defn cleanup 25 | [project] 26 | ; Delete working dir. 27 | (when (.exists (file (:root project) "target" "tar")) 28 | (delete-file-recursively (file (:root project) "target" "tar")))) 29 | 30 | (defn reset 31 | [project] 32 | (cleanup project) 33 | (sh "rm" (str (:root project) "/target/*.tar.bz2"))) 34 | 35 | (defn make-tar-dir 36 | "Creates the tarball package structure in a new directory." 37 | [project] 38 | (let [dir (tar-dir project)] 39 | (.mkdirs dir) 40 | 41 | ; Jar 42 | (.mkdirs (file dir "lib")) 43 | (copy (file (:root project) "target" 44 | (str "samplerr-" (:version project) "-standalone.jar")) 45 | (file dir "lib" "samplerr-")) 46 | 47 | dir)) 48 | 49 | (defn write 50 | "Write string to file, plus newline" 51 | [file string] 52 | (with-open [w (writer file)] 53 | (.write w (str (trim-newline string) "\n")))) 54 | 55 | (defn md5 56 | "Computes the md5 checksum of a file. Returns a hex string." 57 | [file] 58 | (-> (->> file 59 | str 60 | (sh "md5sum") 61 | :out) 62 | (split #" ") 63 | first 64 | trim)) 65 | 66 | (defn compress 67 | "Convert given package directory to a .tar.bz2." 68 | [project tar-dir] 69 | (let [filename (str (:name project) 70 | "-" 71 | (:version project) 72 | ".tar.bz2") 73 | tarball (str (file (:root project) 74 | "target" 75 | filename))] 76 | (with-sh-dir (.getParent tar-dir) 77 | (print (:err (sh "tar" "cvjf" tarball (.getName tar-dir))))) 78 | 79 | (write (str tarball ".md5") 80 | (str (md5 tarball) " " filename)))) 81 | 82 | (defn tar 83 | ([project] (tar project true)) 84 | ([project uberjar?] 85 | (reset project) 86 | (when uberjar? (uberjar project)) 87 | (compress project (make-tar-dir project)) 88 | (cleanup project))) 89 | -------------------------------------------------------------------------------- /tasks/leiningen/fatrpm.clj: -------------------------------------------------------------------------------- 1 | (ns leiningen.fatrpm 2 | (:refer-clojure :exclude [replace]) 3 | (:use [clojure.java.shell :only [sh]] 4 | [clojure.java.io :only [file delete-file writer copy]] 5 | [clojure.string :only [join capitalize trim-newline replace]] 6 | [leiningen.uberjar :only [uberjar]] 7 | [leiningen.tar :only [md5]]) 8 | (:import java.util.Date 9 | java.text.SimpleDateFormat 10 | (org.codehaus.mojo.rpm RPMMojo 11 | AbstractRPMMojo 12 | Mapping Source 13 | SoftlinkSource 14 | Scriptlet) 15 | (org.apache.maven.project MavenProject) 16 | (org.apache.maven.shared.filtering DefaultMavenFileFilter) 17 | (org.codehaus.plexus.logging.console ConsoleLogger))) 18 | 19 | (defn write 20 | "Write string to file, plus newline" 21 | [file string] 22 | (with-open [w (writer file)] 23 | (.write w (str (trim-newline string) "\n")))) 24 | 25 | (defn workarea 26 | [project] 27 | (file (:root project) "target" "rpm")) 28 | 29 | (defn cleanup 30 | [project] 31 | (sh "rm" "-rf" (str (workarea project)))) 32 | 33 | (defn reset 34 | [project] 35 | (cleanup project) 36 | (sh "rm" (str (:root project) "/target/*.rpm"))) 37 | 38 | (defn get-version 39 | [project] 40 | (let [df (SimpleDateFormat. ".yyyyMMdd.HHmmss")] 41 | (replace (:version project) #"-SNAPSHOT" (.format df (Date.))))) 42 | 43 | (defn set-mojo! 44 | "Set a field on an AbstractRPMMojo object." 45 | [object name value] 46 | (let [field (.getDeclaredField AbstractRPMMojo name)] 47 | (.setAccessible field true) 48 | (.set field object value)) 49 | object) 50 | 51 | (defn array-list 52 | [list] 53 | (let [list (java.util.ArrayList.)] 54 | (doseq [item list] (.add list item)) 55 | list)) 56 | 57 | (defn scriptlet 58 | "Creates a scriptlet backed by a file" 59 | [filename] 60 | (doto (Scriptlet.) 61 | (.setScriptFile (file filename)))) 62 | 63 | (defn source 64 | "Create a source with a local location and a destination." 65 | ([] (Source.)) 66 | ([location] 67 | (doto (Source.) 68 | (.setLocation (str location)))) 69 | ([location destination] 70 | (doto (Source.) 71 | (.setLocation (str location)) 72 | (.setDestination (str destination))))) 73 | 74 | (defn mapping 75 | [m] 76 | (doto (Mapping.) 77 | (.setArtifact (:artifact m)) 78 | (.setConfiguration (case (:configuration m) 79 | true "true" 80 | false "false" 81 | nil "false" 82 | (:configuration m))) 83 | (.setDependency (:dependency m)) 84 | (.setDirectory (:directory m)) 85 | (.setDirectoryIncluded (boolean (:directory-included? m))) 86 | (.setDocumentation (boolean (:documentation? m))) 87 | (.setFilemode (:filemode m)) 88 | (.setGroupname (:groupname m)) 89 | (.setRecurseDirectories (boolean (:recurse-directories? m))) 90 | (.setSources (:sources m)) 91 | (.setUsername (:username m)))) 92 | 93 | (defn mappings 94 | [project] 95 | (map (comp mapping 96 | (partial merge {:username "riemann" 97 | :groupname "riemann"})) 98 | 99 | [; Jar 100 | {:directory "/usr/lib/riemann/" 101 | :filemode "644" 102 | :username "root" 103 | :groupname "root" 104 | :sources [(source (str (file (:root project) 105 | "target" 106 | (str "samplerr-" 107 | (:version project) 108 | "-standalone.jar"))) 109 | "samplerr.jar")]}])) 110 | 111 | (defn blank-rpm 112 | "Create a new RPM file" 113 | [] 114 | (let [mojo (RPMMojo.) 115 | fileFilter (DefaultMavenFileFilter.)] 116 | (set-mojo! mojo "project" (MavenProject.)) 117 | (.enableLogging fileFilter (ConsoleLogger. 0 "Logger")) 118 | (set-mojo! mojo "mavenFileFilter" fileFilter))) 119 | 120 | (defn create-dependency 121 | [rs] 122 | (let [hs (java.util.LinkedHashSet.)] 123 | (doseq [r rs] (.add hs r)) 124 | hs)) 125 | 126 | (defn make-rpm 127 | "Create and execute a Mojo RPM." 128 | [project] 129 | (doto (blank-rpm) 130 | (set-mojo! "projversion" (get-version project)) 131 | (set-mojo! "name" (:name project)) 132 | (set-mojo! "summary" (:description project)) 133 | (set-mojo! "copyright" "Kyle Kingsbury & contributors") 134 | (set-mojo! "workarea" (workarea project)) 135 | (set-mojo! "mappings" (mappings project)) 136 | (set-mojo! "preinstallScriptlet" (scriptlet 137 | (file (:root project) 138 | "pkg" "deb" "preinst.sh"))) 139 | (set-mojo! "requires" (create-dependency ["riemann >= 0.2.10"])) 140 | (.execute))) 141 | 142 | (defn extract-rpm 143 | "Snags the RPM file out of its little mouse-hole and brings it up to target/, 144 | then generates an md5" 145 | [project] 146 | (let [dir (file (workarea project) 147 | (:name project) 148 | "RPMS" 149 | "noarch") 150 | rpms (remove #(.isDirectory %) (.listFiles dir))] 151 | (doseq [rpm rpms] 152 | (let [dest (file (:root project) "target" (.getName rpm))] 153 | ; Move 154 | (.renameTo rpm dest) 155 | 156 | ; MD5 157 | (write (str dest ".md5") 158 | (str (md5 dest) " " (.getName rpm))))))) 159 | 160 | (defn fatrpm 161 | ([project] (fatrpm project true)) 162 | ([project uberjar?] 163 | (reset project) 164 | (when uberjar? (uberjar project)) 165 | (make-rpm project) 166 | (extract-rpm project) 167 | (cleanup project))) 168 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # TODO 2 | 3 | ## 4 | 5 | 21:49 < aphyr> righto 6 | 21:49 < aphyr> yeah, so I'd say for stuff like sum, for instance 7 | 21:50 < aphyr> (defn sum [interval & cs] (let [count (atom 0)] (fn stream [event] (swap! state + (:metric event)) ...)) 8 | 21:50 < aphyr> same pattern as rate, sreduce itself, etc 9 | 21:50 < aphyr> close over an atom with your stream fn 10 | 21:51 < aphyr> https://github.com/riemann/riemann/blob/master/src/riemann/streams.clj#L220-L223 11 | 21:51 < aphyr> https://github.com/riemann/riemann/blob/master/src/riemann/streams.clj#L263-L274 12 | 21:51 < aphyr> hopefully a straightforward pattern to apply to your use case 13 | 14 | ## aggregation 15 | 16 | * (fixed-offset-time-window) is too memory intensive 17 | for min/max/avg we don't need to store all values 18 | → (samplerr/scamm 19 | [interval & children] 20 | "accumulates events during fixed interval and emits exactly five events: sum, count, average, min and max 21 | it appends "/cfunc/interval" to service where cfunc is one of min, max, avg and count" 22 | (…) 23 | 24 | ### POC for min/max/avg/sum/count 25 | 26 | * probably need to use (streams/sreduce) 27 | 28 | ```clojure 29 | (let [index (index)] 30 | (streams 31 | (default :ttl 60 32 | (where metric 33 | (by [:host :service] 34 | (sreduce (fn [acc event] (if (or (nil? acc) (expired? acc)) event (if (>= (:metric event) (:metric acc)) event acc))) 35 | (smap #(assoc % :service (str (:service %) " max")) 36 | (coalesce 60 37 | (smap #(first %) index)))) 38 | (sreduce (fn [acc event] (if (or (nil? acc) (expired? acc)) event (assoc event :metric (+ (:metric acc) (:metric event))))) 39 | (smap #(assoc % :service (str (:service %) " sum")) 40 | (coalesce 60 41 | (smap #(first %) index)))) 42 | (sreduce (fn [acc event] (if (or (nil? acc) (expired? acc)) (assoc event :metric 1) (assoc event :metric (+ 1 (:metric acc))))) {:metric 0} 43 | (smap #(assoc % :service (str (:service %) " count")) 44 | (coalesce 60 45 | (smap #(first %) index)))) 46 | (sreduce (fn [acc event] (if (or (nil? acc) (expired? acc)) (assoc event :sum (:metric event) :count 1) (assoc event :sum (+ (:sum acc) (:metric event)) :count (+ 1 (:count acc))))) nil 47 | (smap #(assoc % :service (str (:service %) " avg") :metric (/ (:sum %) (:count %))) 48 | (coalesce 60 49 | (smap #(first %) index)))) 50 | (sreduce (fn [acc event] (if (or (nil? acc) (expired? acc)) event (if (<= (:metric event) (:metric acc)) event acc))) 51 | (smap #(assoc % :service (str (:service %) " min")) 52 | (coalesce 60 53 | (smap #(first %) index))))))))) 54 | ``` 55 | 56 | ## aliasing 57 | 58 | * use filtered aliases in ES to avoid having duplicate entries e.g. for daily indices between 2016.04 and 2016.04.23 when search query spans multiple days 59 | * sort retention-policies 60 | * verify overlap between periods 61 | 62 | * (samplerr/periodically-shift ) 63 | 64 | * (samplerr/periodically-shift 86400 samplerr-i samplerr-a [{:es_index "YYYY.MM.DD" :keep 172800} 65 | {:es_index "YYYY.MM" :keep 5270400} 66 | {:es_index "YYYY"}]) 67 | * list indices 68 | 69 | (list-indices prefix) 70 | 71 | (require '[clojurewerkz.elastisch.rest.index :as esri]) 72 | (def elastic (samplerr/connect "http://localhost:9200")) 73 | 74 | (keys (esri/get-aliases elastic "samplerr-*")) 75 | 76 | (def 77 | 78 | ## Exceptions seen in the wild 79 | 80 | ``` 81 | { 82 | "@message": "riemann.streams$smap$stream__6995@34faf7e5 threw", 83 | "@timestamp": "2018-09-13T12:27:51.797Z", 84 | "@source_host": "ccosvms0243", 85 | "@fields": { 86 | "exception": { 87 | "stacktrace": "at clojure.lang.Numbers.ops(Numbers.java:1018)\nat clojure.lang.Numbers.gt(Numbers.java:234)\nat clojure.lang.Numbers.max(Numbers.java:4052)\nat riemann.plugin.samplerr$down_n_cf$fn__9606.invoke(samplerr.clj:246)\nat riemann.streams$smap$stream__6995.invoke(streams.clj:165)\nat riemann.streams$smap$stream__6995$fn__7010.invoke(streams.clj:167)\nat riemann.streams$smap$stream__6995.invoke(streams.clj:167)\nat riemann.plugin.samplerr$average$stream__9097__auto____9464$fn__9469.invoke(samplerr.clj:195)\nat riemann.plugin.samplerr$average$stream__9097__auto____9464.invoke(samplerr.clj:195)\nat riemann.streams$sreduce$stream__7191$fn__7206.invoke(streams.clj:234)\nat riemann.streams$sreduce$stream__7191.invoke(streams.clj:234)\nat riemann.plugin.samplerr$down_n_cf$stream__9097__auto____9608$fn__9613.invoke(samplerr.clj:244)\nat riemann.plugin.samplerr$down_n_cf$stream__9097__auto____9608.invoke(samplerr.clj:244)\nat riemann.streams$with$stream__8692$fn__8731.invoke(streams.clj:1343)\nat riemann.streams$with$stream__8692.invoke(streams.clj:1343)\nat riemann.streams$sdo$stream__7222$fn__7237.invoke(streams.clj:246)\nat riemann.streams$sdo$stream__7222.invoke(streams.clj:246)\nat riemann.streams$by_fn$stream__8942$fn__8947.invoke(streams.clj:1551)\nat riemann.streams$by_fn$stream__8942.invoke(streams.clj:1551)\nat riemann.config$eval9768$stream__9097__auto____9964$fn__9993.invoke(riemann.config:48)\nat riemann.config$eval9768$stream__9097__auto____9964.invoke(riemann.config:48)\nat riemann.config$eval9768$stream__9097__auto____10024$fn__10053.invoke(riemann.config:44)\nat riemann.config$eval9768$stream__9097__auto____10024.invoke(riemann.config:44)\nat riemann.config$eval9768$stream__9097__auto____10143$fn__10148.invoke(riemann.config:43)\nat riemann.config$eval9768$stream__9097__auto____10143.invoke(riemann.config:43)\nat riemann.core$stream_BANG_$fn__9746.invoke(core.clj:20)\nat riemann.core$stream_BANG_.invokeStatic(core.clj:19)\nat riemann.core$stream_BANG_.invoke(core.clj:15)\nat riemann.transport$handle.invokeStatic(transport.clj:171)\nat riemann.transport$handle.invoke(transport.clj:165)\nat riemann.transport.tcp$tcp_handler.invokeStatic(tcp.clj:109)\nat riemann.transport.tcp$tcp_handler.invoke(tcp.clj:102)\nat riemann.transport.tcp$gen_tcp_handler$fn__12899.invoke(tcp.clj:68)\nat riemann.transport.tcp.proxy$io.netty.channel.ChannelInboundHandlerAdapter$ff19274a.channelRead(Unknown Source)\nat io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:362)\nat io.netty.channel.AbstractChannelHandlerContext.access$600(AbstractChannelHandlerContext.java:38)\nat io.netty.channel.AbstractChannelHandlerContext$7.run(AbstractChannelHandlerContext.java:353)\nat io.netty.util.concurrent.DefaultEventExecutor.run(DefaultEventExecutor.java:66)\nat io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:886)\nat io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)\nat java.lang.Thread.run(Thread.java:748)", 88 | "exception_class": "java.lang.NullPointerException", 89 | "exception_message": null 90 | }, 91 | "file": "NO_SOURCE_FILE", 92 | "method": "invoke", 93 | "level": "WARN", 94 | "line_number": "0", 95 | "loggerName": "riemann.streams", 96 | "class": "clojure.tools.logging$eval257$fn__262", 97 | "mdc": {}, 98 | "threadName": "defaultEventExecutorGroup-2-3" 99 | } 100 | } 101 | ``` 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # samplerr 2 | 3 | ## Introduction 4 | 5 | The main goal of this project is to provide a means for long term relevant storage for your metrics. 6 | It borrows some of [rrdtool](http://rrdtool.org/)'s concepts and leverages the power of a modern storage backend: [elasticsearch](http://elastic.co/products/elasticsearch). 7 | 8 | The idea is to downsample metrics to multiple sampling rates by consolidating those using meaningful aggregation functions: multiple archive stores with different resolutions. 9 | Different resolution archives are mainly useful for two reasons: 10 | 11 | 1. Keep storage space in bounds 12 | 2. Keep data amount in bounds at query time 13 | 14 | Different consolidation functions (*e.g.* min, max, avg, *etc.*) are mainly useful for keeping track of what matters in the metrics you keep. 15 | 16 | samplerr keeps storage low and client queries fast by purging high-resolution data periodically and creates 17 | [elasticsearch aliases](https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-aliases.html) to point the clients to the highest available resolution. 18 | 19 | ![elasticsearch aliases](doc/samplerr.gif) 20 | 21 | ## How it works 22 | 23 | ![sampler diagram](doc/samplerr.png) 24 | 25 | In this example, samplerr ingests a metric which has a 5s interval. It then downsamples it to 3 different archives with different consolidation functions. 26 | It keeps different retention policies for each elasticsearch index. For instance, the highest resolution data (30s) is kept for two days, while the lowest resolution (3h) is kept for 1 year. 27 | The disk footprint is the same for all three data stores. 28 | 29 | ## Features 30 | 31 | * multiple resolution archives 32 | * consolidation functions 33 | * constant round robin storage footprint per metric with respect to time 34 | * transparent query across all archives 35 | 36 | Its architecure is modular, so you can use any of its following main functions: 37 | 38 | * *Downsample* metrics using consolidation functions 39 | * *Persist* metrics to the storage backend 40 | * *Rotate* archive references 41 | * *Purge* expired archives 42 | 43 | ## Implementation 44 | 45 | The current implementation: 46 | 47 | * is a [riemann](http://riemann.io/) plugin 48 | * writes your metrics to [elasticsearch](http://elastic.co/products/elasticsearch) 49 | * aggregates data using arbitrary clojure functions 50 | * aggregates data in realtime into different round robin time-based elasticsearch indices (archives) 51 | * manages your time based elasticsearch aliases to point to highest possible resolution data 52 | * ensures your metric stays within storage boundaries 53 | 54 | ## Installation 55 | 56 | After cloning the repo, you can build the plugin using [leiningen](/technomancy/leiningen) 57 | 58 | ``` 59 | lein uberjar 60 | ``` 61 | 62 | This will create a plugin jar named `samplerr-x.y.z-SNAPSHOT-standalone.jar` which you can include into your *java classpath*, *e.g.*: 63 | 64 | ``` 65 | java -cp /usr/lib/riemann/riemann.jar:/usr/lib/riemann/samplerr-0.1.1-SNAPSHOT-standalone-up.jar riemann.bin start /etc/riemann/riemann.config 66 | ``` 67 | 68 | On debian or redhat you could also add the classpath using the `EXTRA_CLASSPATH` variable available respectively in `/etc/default/riemann` or `/etc/sysconfig/riemann`. 69 | 70 | ## Synopsis 71 | 72 | ```clojure 73 | (load-plugins) 74 | (require '[clj-time.core :as t]) 75 | 76 | (let [elastic (samplerr/connect {:hosts ["http://localhost:9200"]}) 77 | index-prefix ".samplerr" 78 | alias-prefix "samplerr" 79 | cfunc [{:func samplerr/average :name "avg"} 80 | {:func samplerr/minimum :name "min"} 81 | {:func samplerr/maximum :name "max"}] 82 | archives [{:tf "YYYY.MM.dd" :step (t/seconds 20) :ttl (t/days 2) :cfunc cfunc} 83 | {:tf "YYYY.MM" :step (t/minutes 10) :ttl (t/months 2) :cfunc cfunc} 84 | {:tf "YYYY" :step (t/hours 1) :ttl (t/years 10) :cfunc cfunc}] 85 | rotate (samplerr/periodically-rotate {:interval (t/days 1) :conn elastic :index-prefix index-prefix :alias-prefix alias-prefix :archives archives}) 86 | persist (batch 1000 10 (samplerr/persist {:index-prefix index-prefix :index-type "samplerr" :conn elastic}))] 87 | 88 | (streams 89 | (where (tagged "collectd") 90 | (by [:host :service] 91 | (samplerr/down archives persist)))) 92 | rotate) 93 | ``` 94 | 95 | ## Usage 96 | 97 | `samplerr` provides five high-level functions, two of which are stream functions. 98 | 99 | ### Stream functions 100 | 101 | #### `(down archives & children)` 102 | 103 | This stream function splits streams by archive and consolidation functions. 104 | It conveniently passes on events to child streams, for example to send those to elasticsearch using the `persist` stream function. 105 | 106 | The sequence `archives` should contain at least one archive. Each archive describes the aggregation that shall be performed and the target archive: 107 | 108 | ```clojure 109 | (def archives [{:tf "YYYY.MM.dd" :step 20 :cfunc cfunc} 110 | {:tf "YYYY.MM" :step 600 :cfunc cfunc} 111 | {:tf "YYYY" :step 3600 :cfunc cfunc}]) 112 | ``` 113 | 114 | * `:tf` time format string to be used to target the archive. This will be used by `persist` to target the corresponding elasticsearch index. This will be parsed by `clj-time.format` and must thus be valid. Example: the event `{:time 1458207113000 :metric 42}` will be indexed to elasticsearch into `.samplerr-2016.03.17`, `.samplerr-2016.03` and `.samplerr-2016` concurrently with the above config. 115 | * `:step` contains the consolidation time interval to be used to accumulate events to be aggregated using `cfunc`. This is the equivalent of `rrdtool`'s step, and represents the resolution of your time series. 116 | * `:cfunc` contains the list of consolidation functions to be used. 117 | 118 | Consolidation functions are a hash map containing two keys: 119 | 120 | ```clojure 121 | (def cfunc [{:func samplerr/average :name avg} 122 | {:func samplerr/minimum :name min} 123 | {:func samplerr/maximum :name max}]) 124 | ``` 125 | 126 | * The value of `:func` contains the stream function to be used for consolidation. It should accept one parameter corresponding to the `:step` interval. **the interface may change in the future** 127 | * The value of `:name` will be used as an attribute to the consolidated events, and subsequently be indexed using elasticsearch. Following up on the above example: the same event stream will be indexed to 9 elasticsearch documents: one per archive and per cfunc. For instance: `{"@timestamp": "2016-03-17T10:31:53+01:00", "metric": 42, "cfunc": "avg", "_index": ".samplerr-2016.03.17"}` 128 | 129 | `samplerr` provides some commonly used cfuncs like `average`, `minimum` and `maximum` which are described in the corresponding section. 130 | 131 | #### `(persist options & children)` 132 | 133 | This stream function sends events processed by `down` to the storage backend (elasticsearch). It is configured using the hash-map `options`: 134 | 135 | ```clojure 136 | (def options {:index-prefix index-prefix :index-type index-type :conn es-conn-handle}) 137 | ``` 138 | 139 | * `:index-prefix` points to the string to be prefixed to the elasticsearch index. The event's time formatted using the archive's `:tf` will be appended to that prefix. 140 | * `:index-type` elasticsearch document type 141 | * `:conn` connection handle to the elasticsearch REST endpoint. This can be a [`qbits.spandex/client` endpoint](https://github.com/mpenet/spandex/blob/master/src/clj/qbits/spandex.clj#L33), or our wrapped one called `connect` 142 | 143 | Events should contain the riemann attribute `:tf` which will route them to the appropriate archive. 144 | 145 | ### Other functions 146 | 147 | #### `(connect)` 148 | 149 | This is a proxy to `qbits.spandex/client` 150 | 151 | #### `(rotate {:conn es-conn-handle :index-prefix index-prefix :alias-prefix alias-prefix :archives archives)` 152 | 153 | This will manage elasticsearch aliases. 154 | Aliases will be created for each `archive` by concatenating `index-prefix` with the `:tf` formatted date and will point to the first *unexpired* index (prefix `index-prefix`). Expiry is computed using the archive's `:ttl`. 155 | The idea behind this is that clients will query elasticsearch using the aliases. Most high-level clients (*e.g.* [grafana], [kibana]) can only point to one time-base index pattern, *e.g.* `foo-YYYY.MM.dd`. 156 | 157 | `samplerr` will transparently position aliases pointing to the highest possible resolution archive that overlaps with it and that is not expired. The algorithm is roughly the following: 158 | 159 | * for each index matching `*` 160 | * is the ttl expired? 161 | * YES: move all its aliases to the next unexpired period 162 | * NO: 163 | * find archive it belongs to 164 | * parse the time of the beginning of its period using `:tf` 165 | * add an alias `-` 166 | 167 | The usual way to use this function is either: 168 | 169 | * periodically using `periodically-rotate` 170 | * triggered by an event in the stream. For instance you could trigger the rotation when the day changes 171 | 172 | #### `(periodically-rotate {:interval periodicity :conn es-conn-handle :index-prefix index-prefix :alias-prefix alias-prefix :archives archives)` 173 | 174 | This function will call `rotate` every `periodicity` time interval. The first argument should be given in terms of a `org.joda.time/PeriodType` object conventiently provided by `clj-time.core` using *e.g.* `hours`, `days`, *etc.* 175 | 176 | Note that the first rotation will not take effect immediately after riemann startup. 177 | Also note that configuration reloads will work as expected. 178 | 179 | ##### Example 180 | 181 | Take the example in the [synopsis](#synopsis) section. Let's say today is 2016-02-01 at 03:14 PM and 182 | riemann started exactly 2 days ago. `samplerr/rotate` fires up and processes the elasticsearch indices: 183 | 184 | * `.samplerr-2016.02.01` is younger than two days: create alias `samplerr-2016.02.01` 185 | * `.samplerr-2016.01.31` is younger than two days: create alias `samplerr-2016.01.31` 186 | * `.samplerr-2016.01.30` is two days old: expired! move its aliases to `.samplerr-2016.01` 187 | * `.sampler-2015.02` is younger than two months: create alias `sampler-2015.02` 188 | * `.sampler-2015.01` is younger than two months: create alias `sampler-2015.01` 189 | * `.sampler-2014.12` is two months old: expired! move its aliases to `.samplerr-2014` 190 | * … 191 | 192 | #### `(purge {:conn es-conn-handle :index-prefix index-prefix :archives archives)` 193 | 194 | This function will **DELETE** expired indices. Use with care. 195 | 196 | The usual way to use this function is either: 197 | 198 | * periodically using `periodically-purge` 199 | * triggered by an event in the stream. For instance you could trigger the purge when the disk space is full on the elasticsearch node 200 | 201 | #### `(periodically-purge {:interval periodicity :conn es-conn-handle :index-prefix index-prefix :archives archives)` 202 | 203 | This function will call `purge` periodically. 204 | 205 | ## Development 206 | 207 | At the time of writing the contributors of this project are Fabien Wernli and some code from the elasticsearch integration was borrowed from [tnn1t1s](https://github.com/tnn1t1s/riemann-elastic) which itself borrowed from [kiries](https://github.com/threatgrid/kiries). 208 | 209 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 1.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE 4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF 5 | THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial code and 12 | documentation distributed under this Agreement, and 13 | 14 | b) in the case of each subsequent Contributor: 15 | 16 | i) changes to the Program, and 17 | 18 | ii) additions to the Program; 19 | 20 | where such changes and/or additions to the Program originate from and 21 | are distributed by that particular Contributor. A Contribution 22 | 'originates' from a Contributor if it was added to the Program by such 23 | Contributor itself or anyone acting on such Contributor's 24 | behalf. Contributions do not include additions to the Program which: 25 | (i) are separate modules of software distributed in conjunction with 26 | the Program under their own license agreement, and (ii) are not 27 | derivative works of the Program. 28 | 29 | "Contributor" means any person or entity that distributes the Program. 30 | 31 | "Licensed Patents" mean patent claims licensable by a Contributor 32 | which are necessarily infringed by the use or sale of its Contribution 33 | alone or when combined with the Program. 34 | 35 | "Program" means the Contributions distributed in accordance with this 36 | Agreement. 37 | 38 | "Recipient" means anyone who receives the Program under this 39 | Agreement, including all Contributors. 40 | 41 | 2. GRANT OF RIGHTS 42 | 43 | a) Subject to the terms of this Agreement, each Contributor hereby 44 | grants Recipient a non-exclusive, worldwide, royalty-free copyright 45 | license to reproduce, prepare derivative works of, publicly display, 46 | publicly perform, distribute and sublicense the Contribution of such 47 | Contributor, if any, and such derivative works, in source code and 48 | object code form. 49 | 50 | b) Subject to the terms of this Agreement, each Contributor hereby 51 | grants Recipient a non-exclusive, worldwide, royalty-free patent 52 | license under Licensed Patents to make, use, sell, offer to sell, 53 | import and otherwise transfer the Contribution of such Contributor, if 54 | any, in source code and object code form. This patent license shall 55 | apply to the combination of the Contribution and the Program if, at 56 | the time the Contribution is added by the Contributor, such addition 57 | of the Contribution causes such combination to be covered by the 58 | Licensed Patents. The patent license shall not apply to any other 59 | combinations which include the Contribution. No hardware per se is 60 | licensed hereunder. 61 | 62 | c) Recipient understands that although each Contributor grants the 63 | licenses to its Contributions set forth herein, no assurances are 64 | provided by any Contributor that the Program does not infringe the 65 | patent or other intellectual property rights of any other entity. Each 66 | Contributor disclaims any liability to Recipient for claims brought by 67 | any other entity based on infringement of intellectual property rights 68 | or otherwise. As a condition to exercising the rights and licenses 69 | granted hereunder, each Recipient hereby assumes sole responsibility 70 | to secure any other intellectual property rights needed, if any. For 71 | example, if a third party patent license is required to allow 72 | Recipient to distribute the Program, it is Recipient's responsibility 73 | to acquire that license before distributing the Program. 74 | 75 | d) Each Contributor represents that to its knowledge it has sufficient 76 | copyright rights in its Contribution, if any, to grant the copyright 77 | license set forth in this Agreement. 78 | 79 | 3. REQUIREMENTS 80 | 81 | A Contributor may choose to distribute the Program in object code form 82 | under its own license agreement, provided that: 83 | 84 | a) it complies with the terms and conditions of this Agreement; and 85 | 86 | b) its license agreement: 87 | 88 | i) effectively disclaims on behalf of all Contributors all warranties 89 | and conditions, express and implied, including warranties or 90 | conditions of title and non-infringement, and implied warranties or 91 | conditions of merchantability and fitness for a particular purpose; 92 | 93 | ii) effectively excludes on behalf of all Contributors all liability 94 | for damages, including direct, indirect, special, incidental and 95 | consequential damages, such as lost profits; 96 | 97 | iii) states that any provisions which differ from this Agreement are 98 | offered by that Contributor alone and not by any other party; and 99 | 100 | iv) states that source code for the Program is available from such 101 | Contributor, and informs licensees how to obtain it in a reasonable 102 | manner on or through a medium customarily used for software exchange. 103 | 104 | When the Program is made available in source code form: 105 | 106 | a) it must be made available under this Agreement; and 107 | 108 | b) a copy of this Agreement must be included with each copy of the Program. 109 | 110 | Contributors may not remove or alter any copyright notices contained 111 | within the Program. 112 | 113 | Each Contributor must identify itself as the originator of its 114 | Contribution, if any, in a manner that reasonably allows subsequent 115 | Recipients to identify the originator of the Contribution. 116 | 117 | 4. COMMERCIAL DISTRIBUTION 118 | 119 | Commercial distributors of software may accept certain 120 | responsibilities with respect to end users, business partners and the 121 | like. While this license is intended to facilitate the commercial use 122 | of the Program, the Contributor who includes the Program in a 123 | commercial product offering should do so in a manner which does not 124 | create potential liability for other Contributors. Therefore, if a 125 | Contributor includes the Program in a commercial product offering, 126 | such Contributor ("Commercial Contributor") hereby agrees to defend 127 | and indemnify every other Contributor ("Indemnified Contributor") 128 | against any losses, damages and costs (collectively "Losses") arising 129 | from claims, lawsuits and other legal actions brought by a third party 130 | against the Indemnified Contributor to the extent caused by the acts 131 | or omissions of such Commercial Contributor in connection with its 132 | distribution of the Program in a commercial product offering. The 133 | obligations in this section do not apply to any claims or Losses 134 | relating to any actual or alleged intellectual property 135 | infringement. In order to qualify, an Indemnified Contributor must: a) 136 | promptly notify the Commercial Contributor in writing of such claim, 137 | and b) allow the Commercial Contributor tocontrol, and cooperate with 138 | the Commercial Contributor in, the defense and any related settlement 139 | negotiations. The Indemnified Contributor may participate in any such 140 | claim at its own expense. 141 | 142 | For example, a Contributor might include the Program in a commercial 143 | product offering, Product X. That Contributor is then a Commercial 144 | Contributor. If that Commercial Contributor then makes performance 145 | claims, or offers warranties related to Product X, those performance 146 | claims and warranties are such Commercial Contributor's responsibility 147 | alone. Under this section, the Commercial Contributor would have to 148 | defend claims against the other Contributors related to those 149 | performance claims and warranties, and if a court requires any other 150 | Contributor to pay any damages as a result, the Commercial Contributor 151 | must pay those damages. 152 | 153 | 5. NO WARRANTY 154 | 155 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS 156 | PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 157 | KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY 158 | WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY 159 | OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely 160 | responsible for determining the appropriateness of using and 161 | distributing the Program and assumes all risks associated with its 162 | exercise of rights under this Agreement , including but not limited to 163 | the risks and costs of program errors, compliance with applicable 164 | laws, damage to or loss of data, programs or equipment, and 165 | unavailability or interruption of operations. 166 | 167 | 6. DISCLAIMER OF LIABILITY 168 | 169 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR 170 | ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, 171 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING 172 | WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF 173 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 174 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR 175 | DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED 176 | HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 177 | 178 | 7. GENERAL 179 | 180 | If any provision of this Agreement is invalid or unenforceable under 181 | applicable law, it shall not affect the validity or enforceability of 182 | the remainder of the terms of this Agreement, and without further 183 | action by the parties hereto, such provision shall be reformed to the 184 | minimum extent necessary to make such provision valid and enforceable. 185 | 186 | If Recipient institutes patent litigation against any entity 187 | (including a cross-claim or counterclaim in a lawsuit) alleging that 188 | the Program itself (excluding combinations of the Program with other 189 | software or hardware) infringes such Recipient's patent(s), then such 190 | Recipient's rights granted under Section 2(b) shall terminate as of 191 | the date such litigation is filed. 192 | 193 | All Recipient's rights under this Agreement shall terminate if it 194 | fails to comply with any of the material terms or conditions of this 195 | Agreement and does not cure such failure in a reasonable period of 196 | time after becoming aware of such noncompliance. If all Recipient's 197 | rights under this Agreement terminate, Recipient agrees to cease use 198 | and distribution of the Program as soon as reasonably 199 | practicable. However, Recipient's obligations under this Agreement and 200 | any licenses granted by Recipient relating to the Program shall 201 | continue and survive. 202 | 203 | Everyone is permitted to copy and distribute copies of this Agreement, 204 | but in order to avoid inconsistency the Agreement is copyrighted and 205 | may only be modified in the following manner. The Agreement Steward 206 | reserves the right to publish new versions (including revisions) of 207 | this Agreement from time to time. No one other than the Agreement 208 | Steward has the right to modify this Agreement. The Eclipse Foundation 209 | is the initial Agreement Steward. The Eclipse Foundation may assign 210 | the responsibility to serve as the Agreement Steward to a suitable 211 | separate entity. Each new version of the Agreement will be given a 212 | distinguishing version number. The Program (including Contributions) 213 | may always be distributed subject to the version of the Agreement 214 | under which it was received. In addition, after a new version of the 215 | Agreement is published, Contributor may elect to distribute the 216 | Program (including its Contributions) under the new version. Except as 217 | expressly stated in Sections 2(a) and 2(b) above, Recipient receives 218 | no rights or licenses to the intellectual property of any Contributor 219 | under this Agreement, whether expressly, by implication, estoppel or 220 | otherwise. All rights in the Program not expressly granted under this 221 | Agreement are reserved. 222 | 223 | This Agreement is governed by the laws of the State of Washington and 224 | the intellectual property laws of the United States of America. No 225 | party to this Agreement will bring a legal action under this Agreement 226 | more than one year after the cause of action arose. Each party waives 227 | its rights to a jury trial in any resulting litigation. 228 | -------------------------------------------------------------------------------- /src/riemann/plugin/samplerr.clj: -------------------------------------------------------------------------------- 1 | ; Copyright (c) Centre de Calcul de l'IN2P3 du CNRS 2 | ; Contributor(s) : Fabien Wernli (2016) 3 | 4 | (ns riemann.plugin.samplerr 5 | "A riemann plugin to downsample data in a RRDTool fashion into elasticsearch" 6 | (:use [clojure.tools.logging :only (info error debug warn)] 7 | [riemann.common :only [member?]] 8 | [riemann.time :only [unix-time]]) 9 | (:require [cheshire.core :as json] 10 | [clj-time.format] 11 | [clj-time.core] 12 | [clj-time.coerce] 13 | [clojure.edn :as edn] 14 | [clojure.java.io :as io] 15 | [qbits.spandex :as es] 16 | [riemann.config] 17 | [riemann.core :as core] 18 | [riemann.service :as service] 19 | [riemann.streams :as streams])) 20 | 21 | (defn ^{:private true} keys-to-map [key-map] 22 | (reduce-kv (fn [key-map k v] 23 | (assoc-in key-map 24 | (clojure.string/split (name k) #"\.") 25 | (if (map? v) 26 | (keys-to-map v) 27 | v))) 28 | {} key-map)) 29 | 30 | 31 | (def ^{:private true} format-iso8601 32 | (clj-time.format/with-zone (clj-time.format/formatters :date-time-no-ms) 33 | clj-time.core/utc)) 34 | 35 | (defn ^{:private true} iso8601 [event-s] 36 | (clj-time.format/unparse format-iso8601 37 | (clj-time.coerce/from-long (* 1000 event-s)))) 38 | 39 | (defn ^{:private true} safe-iso8601 [event-s] 40 | (try (iso8601 event-s) 41 | (catch Exception e 42 | (warn "Unable to parse iso8601 input: " event-s) 43 | (clj-time.format/unparse format-iso8601 (clj-time.core/now))))) 44 | 45 | (defn ^{:private true} stashify-timestamp [event] 46 | (-> (if-not (get event "@timestamp") 47 | (let [time (:time event)] 48 | (if (nil? time) (info event)) 49 | (assoc event "@timestamp" (safe-iso8601 (long time)))) 50 | event) 51 | (dissoc :time) 52 | (dissoc :ttl))) 53 | 54 | (defn ^{:private true} edn-safe-read [v] 55 | (try 56 | (edn/read-string v) 57 | (catch Exception e 58 | (warn "Unable to read supposed EDN form with value: " v) 59 | v))) 60 | 61 | (defn ^{:private true} message-event [event] 62 | (keys-to-map 63 | (into {} 64 | (for [[k v] event 65 | :when v] 66 | (cond 67 | (= (name k) "_id") [k v] 68 | (.startsWith (name k) "_") 69 | [(.substring (name k) 1) (edn-safe-read v)] 70 | :else 71 | [k v]))))) 72 | 73 | (defn ^{:private true} elastic-event [event message] 74 | (let [e (-> event 75 | stashify-timestamp)] 76 | (if message 77 | (message-event e) 78 | e))) 79 | 80 | (defn ^{:private true} riemann-to-elasticsearch [events message] 81 | (->> [events] 82 | flatten 83 | (remove streams/expired?) 84 | (map #(elastic-event % message)))) 85 | 86 | (defn connect 87 | "Connect to ElasticSearch" 88 | [& argv] 89 | (apply es/client argv)) 90 | 91 | 92 | (defn- make-index-timestamper [event] 93 | (let [formatter (clj-time.format/formatter (eval (get event "tf")))] 94 | (fn [date] 95 | (clj-time.format/unparse formatter date)))) 96 | 97 | (defn persist 98 | "bulk index to ES" 99 | [{:keys [conn index-prefix message] 100 | :or {index-prefix ".samplerr" 101 | message true}} & children] 102 | (fn [events] 103 | (let [esets (group-by (fn [e] (let [index-namer (make-index-timestamper e)] 104 | (str index-prefix (index-namer 105 | (clj-time.format/parse format-iso8601 106 | (get e "@timestamp")))))) 107 | (riemann-to-elasticsearch events message))] 108 | (doseq [es_index (keys esets)] 109 | (let [raw (get esets es_index) 110 | bulk-create-items 111 | (interleave (map #(if-let [id (get % "_id")] 112 | {:index {:_index es_index :_id id}} 113 | {:index {:_index es_index}} 114 | ) 115 | raw) 116 | raw)] 117 | (when (seq bulk-create-items) 118 | (try 119 | (let [response (es/request conn {:url "_bulk" :body (es/chunks->body bulk-create-items) :content-type "application/x-ndjson" :method :post}) 120 | res (:body response) 121 | by_status (frequencies (map :status (map :index (:items res)))) 122 | total (count (:items res)) 123 | succ (filter :_version (map :index (:items res))) 124 | failed (filter :error (map :index (:items res)))] 125 | (debug "elasticized" total " (total) " by_status " docs to " es_index "in " (:took res) "ms") 126 | (if (< 0 (count failed)) (info "Failed: " failed))) 127 | (catch Exception e 128 | (error "Unable to bulk index:" e))))))))) 129 | 130 | (defn ^{:private true} resource-as-json [resource-name] 131 | (json/parse-string (slurp (io/resource resource-name)))) 132 | 133 | 134 | (defn ^{:private true} file-as-json [file-name] 135 | (try 136 | (json/parse-string (slurp file-name)) 137 | (catch Exception e 138 | (error "Exception while reading JSON file: " file-name) 139 | (throw e)))) 140 | 141 | 142 | ;;;;;; 143 | ;;;;;; 144 | ;;;;;; 145 | 146 | (defn- new-interval? 147 | [acc event] 148 | (when-let [acc-time (:time acc)] 149 | (let [event-time (:time event) 150 | age (- event-time acc-time) 151 | step (:step event)] 152 | (>= age step)))) 153 | 154 | ; cfuncs 155 | (defn sum 156 | [interval & children] 157 | (streams/sreduce 158 | (fn [acc event] 159 | (if (nil? acc) 160 | event 161 | (if (new-interval? acc event) 162 | (assoc event :parent (dissoc acc :parent)) 163 | (assoc event :time (:time acc) :metric (+ (:metric event) (:metric acc)))))) 164 | nil 165 | (streams/where (contains? event :parent) 166 | (streams/smap #((comp (fn [e] (dissoc e :parent)) 167 | :parent) %) 168 | (apply streams/sdo children))))) 169 | 170 | 171 | (defn counter 172 | [interval & children] 173 | (streams/sreduce 174 | (fn [acc event] 175 | (if (nil? acc) 176 | (assoc event :metric 1) 177 | (if (new-interval? acc event) 178 | (assoc event :metric 1 :parent (dissoc acc :parent)) 179 | (assoc event :time (:time acc) :metric (+ 1 (:metric acc)))))) 180 | nil 181 | (streams/where (contains? event :parent) 182 | (streams/smap #((comp (fn [event] (dissoc event :parent)) :parent) %) 183 | (apply streams/sdo children))))) 184 | 185 | (defn average 186 | [interval & children] 187 | (streams/sreduce 188 | (fn [acc event] 189 | (if (nil? acc) 190 | (assoc event :sum (:metric event) :count 1) 191 | (if (new-interval? acc event) 192 | (assoc event :sum (:metric event) :count 1 :parent (dissoc acc :parent)) 193 | (assoc event :time (:time acc) :count (+ 1 (:count acc)) :sum (+ (:metric event) (:sum acc)))))) 194 | nil 195 | (streams/where (contains? event :parent) 196 | (streams/smap #((comp (fn [e] (assoc e :metric (/ (:sum e) (:count e)))) 197 | (fn [e] (dissoc e :parent)) 198 | :parent) %) 199 | (apply streams/sdo children))))) 200 | 201 | (defn extremum 202 | [efunc interval children] 203 | (streams/sreduce 204 | (fn [acc event] 205 | (if (new-interval? acc event) 206 | (assoc event :parent (dissoc acc :parent) :orig-time (:time event)) 207 | (if (efunc (:metric event) (:metric acc)) 208 | (assoc event :time (:time acc) :orig-time (:time event)) 209 | acc))) 210 | (streams/where (contains? event :parent) 211 | (streams/smap #((comp (fn [e] (dissoc e :orig-time)) 212 | (fn [e] (assoc e :time ((some-fn :orig-time :time) e))) 213 | (fn [e] (dissoc e :parent)) 214 | :parent) %) 215 | (apply streams/sdo children))))) 216 | 217 | (defn maximum 218 | [interval & children] 219 | (extremum >= interval children)) 220 | 221 | (defn minimum 222 | [interval & children] 223 | (extremum <= interval children)) 224 | 225 | (defn- to-seconds* 226 | "takes a clj-time duration and converts it to seconds" 227 | [dateobj] 228 | (clj-time.core/in-seconds dateobj)) 229 | (def to-seconds (memoize to-seconds*)) 230 | 231 | (defn- to-millis* 232 | "takes a clj-time duration and converts it to milliseconds" 233 | [dateobj] 234 | (clj-time.core/in-millis dateobj)) 235 | (def to-millis (memoize to-millis*)) 236 | 237 | (defn down-n-cf 238 | "takes map of archive parameters and sends time-aggregated data to children" 239 | [{:keys [cfunc step tf] :as args :or {cfunc {:name "avg" :func average}}} & children] 240 | (let [cfunc_n (:name cfunc) 241 | cfunc_f (:func cfunc) 242 | seconds (to-seconds step)] 243 | (streams/with {:step seconds :cfunc cfunc_n :tf tf} 244 | (streams/where metric 245 | (cfunc_f seconds 246 | (streams/smap #(assoc % :ttl (max (* seconds 3) (or (:ttl %) 0)) :service (str (:service %) "/" cfunc_n "/" seconds)) 247 | (apply streams/sdo children))))))) 248 | 249 | (defn down-n 250 | "takes map of archive parameters and maps to down-n-cf for all cfuncs" 251 | [{:keys [cfunc] :as args} & children] 252 | (apply streams/sdo (map #(apply down-n-cf (assoc args :cfunc %) children) cfunc))) 253 | 254 | (defn down 255 | "takes vector of archives and generates (count vector) down-n streams" 256 | [archives & children] 257 | (apply streams/sdo (map #(apply down-n % children) archives))) 258 | 259 | ;;; 260 | ;;; foreign commodity functions 261 | ;;; that need license checking 262 | 263 | ;source http://stackoverflow.com/questions/8641305/find-index-of-an-element-matching-a-predicate-in-clojure 264 | (defn- indices [pred coll] 265 | (keep-indexed #(when (pred %2) %1) coll)) 266 | 267 | ;;; 268 | ;;; alias shifting code 269 | ;;; 270 | 271 | (defn list-indices 272 | "lists all indices from an elasticsearch cluster having given prefix" 273 | [elastic prefix] 274 | (map name (keys (:indices (:body (es/request elastic {:url [(str prefix "*") :_stats :store] :method :get})))))) 275 | 276 | (defn- index-exists? 277 | "returns true if index exists" 278 | [elastic index] 279 | (try 280 | (es/request elastic {:url [index] :method :head}) 281 | (catch Exception e 282 | (warn e "index exists caught")))) 283 | 284 | (defn matches-timeformat? 285 | "returns true if datestr matches timeformat" 286 | [datestr timeformat] 287 | (try (clj-time.format/parse (clj-time.format/formatter timeformat) datestr) (catch IllegalArgumentException ex false))) 288 | 289 | ; TODO: should return an additional key which is the parsed clj-time object so we don't need to parse time again 290 | (defn get-retention-policy 291 | "returns the first retention policy that matches datestr or nil 292 | example: (get-retention-policy \"2016\" [{:tf \"YYYY.MM.DD\" :ttl 86400} {:tf \"YYYY\" :ttl 315567360}]) 293 | will return {:tf \"YYYY\" :ttl 315567360}" 294 | [datestr retention-policies] 295 | (first (filter #(matches-timeformat? datestr (:tf %)) retention-policies))) 296 | 297 | (defn get-retention-policy-index 298 | "returns the first retention policy's index that matches datestr or nil 299 | example: (get-retention-policy \"2016\" [{:tf \"YYYY.MM.DD\" :ttl 86400} {:tf \"YYYY\" :ttl 315567360}]) 300 | will return {:tf \"YYYY\" :ttl 315567360}" 301 | [datestr retention-policies] 302 | (first (indices #(matches-timeformat? datestr (:tf %)) retention-policies))) 303 | 304 | (defn- parse-datestr 305 | "parses datestr using retention-policy" 306 | [datestr retention-policy] 307 | (let [tf (:tf retention-policy)] 308 | (clj-time.format/parse (clj-time.format/formatter tf) datestr))) 309 | 310 | (defn parse-retention-policy-date 311 | "parses datestr using index n of retention-policies" 312 | [datestr retention-policies n] 313 | (if n 314 | (let [retention-policy (nth retention-policies n)] 315 | (parse-datestr datestr retention-policy)) 316 | (warn (str datestr " does not match any retention policy")))) 317 | 318 | (defn- format-datestr 319 | "formats dateobj using retention-policy" 320 | [dateobj retention-policy] 321 | (let [tf (:tf retention-policy)] 322 | (clj-time.format/unparse (clj-time.format/formatter tf) dateobj))) 323 | 324 | (defn format-retention-policy-date 325 | "formats dateobj using index n of retention-polices" 326 | [dateobj retention-policies n] 327 | (let [retention-policy (nth retention-policies n)] 328 | (format-datestr dateobj retention-policy))) 329 | 330 | (defn is-expired? 331 | "returns true if datestr matches an expired retention policy" 332 | [datestr retention-policies] 333 | (let [retention-policy (get-retention-policy datestr retention-policies) 334 | tf (:tf retention-policy) 335 | ttl (:ttl retention-policy) 336 | parsed-time (clj-time.format/parse (clj-time.format/formatter tf) datestr) 337 | now (clj-time.core/now) 338 | expiration (clj-time.core/minus now ttl)] 339 | (clj-time.core/before? parsed-time expiration))) 340 | 341 | (defn get-aliases 342 | "returns aliases of index or empty list" 343 | [elastic index] 344 | (keys ((comp :aliases (keyword index))(:body (es/request elastic {:url [index :_alias] :method :get}))))) 345 | 346 | (defn move-aliases 347 | "moves aliases from src-index to dst-index" 348 | [elastic src-index dst-index] 349 | (info "transfer aliases from" src-index "to" dst-index) 350 | (let [src-aliases (get-aliases elastic src-index) 351 | src-actions (map #(hash-map :remove (hash-map :index src-index :alias %)) src-aliases) 352 | dst-actions (map #(hash-map :add (hash-map :index dst-index :alias %)) src-aliases)] 353 | (es/request elastic {:url "/_aliases" :method :post :body {:actions (vec (concat src-actions dst-actions))}}))) 354 | 355 | (defn add-alias 356 | "adds alias to index" 357 | [elastic index es-alias] 358 | (info "add alias" es-alias "->" index) 359 | (es/request elastic {:url "/_aliases" :method :post :body {:actions [{:add {:index index :alias es-alias}}]}})) 360 | 361 | (defn remove-aliases 362 | "removes all aliases from index" 363 | [elastic index] 364 | (info "remove all aliases from" index) 365 | (let [aliases (get-aliases elastic index) 366 | actions (map #(hash-map :remove (hash-map :index index :alias %)) aliases) 367 | actions {:actions actions}] 368 | (es/request elastic {:url "/_aliases" :method :post :body actions}))) 369 | 370 | (defn fresh-index-targets 371 | "returns collection of unexpired datestrings for dateobj" 372 | [dateobj retention-policies] 373 | (for [policy retention-policies 374 | :let [now (clj-time.core/now) 375 | tf (:tf policy) 376 | ttl (:ttl policy)] 377 | :when (clj-time.core/before? (clj-time.core/minus now ttl) dateobj)] 378 | (clj-time.format/unparse (clj-time.format/formatter tf) dateobj))) 379 | 380 | (defn shift-alias 381 | "matches index with its corresponding retention policy. if expired, shifts its existing aliases to next retention policy. else adds alias if necessary" 382 | [elastic index index-prefix alias-prefix retention-policies] 383 | (let [datestr (clojure.string/replace index (re-pattern (str "^" index-prefix)) "") 384 | retention-policy-index (get-retention-policy-index datestr retention-policies) 385 | dateobj (parse-retention-policy-date datestr retention-policies retention-policy-index)] 386 | (if retention-policy-index 387 | (let [next-dates (fresh-index-targets dateobj retention-policies) 388 | next-date (first next-dates) 389 | next-index (str index-prefix next-date)] 390 | (if (is-expired? datestr retention-policies) 391 | (do 392 | (if next-date 393 | (if (index-exists? elastic next-index) 394 | ;;;;;;;stacktrace happens here 395 | ;;;;;;;at clojure.core$ex_info.invokeStatic(core.clj:4617) at clojure.core$ex_info.invoke(core.clj:4617) at qbits.spandex$response_ex__GT_ex_info.invokeStatic(spandex.clj:218) at qbits.spandex$response_ex__GT_ex_info.invoke(spandex.clj:215) at qbits.spandex$eval6897$fn__6898.invoke(spandex.clj:227) at qbits.spandex$eval6880$fn__6881$G__6871__6886.invoke(spandex.clj:222) at qbits.spandex$default_exception_handler.invokeStatic(spandex.clj:238) at qbits.spandex$default_exception_handler.invoke(spandex.clj:231) at qbits.spandex$request.invokeStatic(spandex.clj:271) at qbits.spandex$request.invoke(spandex.clj:253) at riemann.plugin.samplerr$index_exists_QMARK_.invokeStatic(samplerr.clj:279) at riemann.plugin.samplerr$index_exists_QMARK_.invoke(samplerr.clj:276) at riemann.plugin.samplerr$shift_alias.invokeStatic(samplerr.clj:388) at riemann.plugin.samplerr$shift_alias.invoke(samplerr.clj:375) at riemann.plugin.samplerr$rotate.invokeStatic(samplerr.clj:454) at riemann.plugin.samplerr$rotate.invoke(samplerr.clj:447) at riemann.plugin.samplerr$rotation_service$rot__7675.invoke(samplerr.clj:470) at riemann.service.ThreadService$thread_service_runner__6478$fn__6479.invoke(service.clj:71) at riemann.service.ThreadService$thread_service_runner__6478.invoke(service.clj:70) at clojure.lang.AFn.run(AFn.java:22) at java.lang.Thread.run(Thread.java:748) 396 | (if (get-aliases elastic index) 397 | (move-aliases elastic index next-index) 398 | (add-alias elastic next-index (str alias-prefix datestr))) 399 | (throw (Exception. (str "can't move aliases from " index " to missing " next-index))))) 400 | (if (get-aliases elastic index) 401 | (remove-aliases elastic index))) 402 | (add-alias elastic index (str alias-prefix datestr))))))) 403 | 404 | (defn delete-index 405 | "deletes index" 406 | [elastic index] 407 | (info "delete index" index) 408 | (es/request elastic {:url [index] :method :delete})) 409 | 410 | (defn purge-index 411 | "deletes index if it matches a timeformat in retention-policies and is expired" 412 | [elastic index index-prefix retention-policies] 413 | (let [datestr (clojure.string/replace index (re-pattern (str "^" index-prefix)) "") 414 | retention-policy-index (get-retention-policy-index datestr retention-policies) 415 | dateobj (parse-retention-policy-date datestr retention-policies retention-policy-index)] 416 | (if retention-policy-index 417 | (if (is-expired? datestr retention-policies) 418 | (delete-index elastic index))))) 419 | 420 | (defn purge 421 | "deletes all indices matching index-prefix and that are expired according to retention-policies" 422 | [{:keys [conn index-prefix archives]}] 423 | (debug "purging") 424 | (loop [indices (list-indices conn (str index-prefix "*"))] 425 | (let [current-index (first indices) 426 | remaining-indices (rest indices)] 427 | (purge-index conn current-index index-prefix archives) 428 | (if (not (empty? remaining-indices)) 429 | (recur remaining-indices))))) 430 | 431 | (defn purge-service 432 | "returns a service which schedules a task to purge indices" 433 | [{:keys [interval conn index-prefix archives enabled?] 434 | :or {interval (clj-time.core/days 3) 435 | enabled? true}}] 436 | (let [interval (to-millis interval)] 437 | (service/thread-service 438 | ::samplerr-purge [interval conn index-prefix archives enabled?] 439 | (fn pur [core] 440 | (Thread/sleep interval) 441 | (try 442 | (if enabled? 443 | (purge {:conn conn :index-prefix index-prefix :archives archives})) 444 | (catch Exception e 445 | (warn e "purge service caught"))))))) 446 | 447 | (defn periodically-purge 448 | "adds an index purge service to core" 449 | [& opts] 450 | (info "registering purge service with" (apply :interval opts) "interval") 451 | (let [service (apply purge-service opts)] 452 | (swap! riemann.config/next-core core/conj-service service :force))) 453 | 454 | (defn rotate 455 | "maps shift-alias to all indices from elastic connection matching index-prefix" 456 | [{:keys [conn index-prefix alias-prefix archives]}] 457 | (debug "rotating") 458 | (loop [indices (list-indices conn (str index-prefix "*"))] 459 | (let [current-index (first indices) 460 | remaining-indices (rest indices)] 461 | (shift-alias conn current-index index-prefix alias-prefix archives) 462 | (if (not (empty? remaining-indices)) 463 | (recur remaining-indices))))) 464 | 465 | (defn rotation-service 466 | "returns a service which schedules a task to rotate aliases" 467 | [{:keys [interval conn alias-prefix index-prefix archives enabled?] 468 | :or {interval (clj-time.core/minutes 5) 469 | enabled? true}}] 470 | (let [interval (to-millis interval)] 471 | (service/thread-service 472 | ::samplerr-rotation [interval conn alias-prefix index-prefix archives enabled?] 473 | (fn rot [core] 474 | (Thread/sleep interval) 475 | (try 476 | (if enabled? 477 | (rotate {:conn conn :index-prefix index-prefix :alias-prefix alias-prefix :archives archives})) 478 | (catch Exception e 479 | (warn e "rotation service caught"))))))) 480 | 481 | (defn periodically-rotate 482 | "adds an alias rotation service to core" 483 | [& opts] 484 | (info "registering rotation service with" (apply :interval opts) "interval") 485 | (let [service (apply rotation-service opts)] 486 | (swap! riemann.config/next-core core/conj-service service :force))) 487 | 488 | -------------------------------------------------------------------------------- /doc/samplerr.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | tmux 6 | 9 | 10 | 11 |
12 | 13 | 15 | 16 | -------------------------------------------------------------------------------- /doc/samplerr.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 20 | 22 | 30 | 36 | 37 | 45 | 51 | 52 | 60 | 66 | 67 | 75 | 81 | 82 | 90 | 96 | 97 | 105 | 111 | 112 | 120 | 126 | 127 | 135 | 141 | 142 | 150 | 156 | 157 | 165 | 171 | 172 | 173 | 191 | 196 | 197 | 199 | 200 | 202 | image/svg+xml 203 | 205 | 206 | 207 | 208 | 209 | 214 | 217 | collect.......5s interval 238 | 239 | samplerr 250 | 726 | 732 | 738 | 744 | 750 | 756 | 762 | 30s resolution2d retention1TB disk 783 | 1124 | 1554 | 15m resolution1M retention1TB disk 1575 | 3h resolution1y retention1TB disk 1596 | ES indexYYYY.MM.dd 1612 | ES indexYYYY.MM 1628 | ES indexYYYY 1644 | 1645 | 1646 | --------------------------------------------------------------------------------