├── .lein-classpath ├── .travis.yml ├── README.markdown ├── .gitignore ├── tasks └── leiningen │ ├── pkg.clj │ ├── tar.clj │ └── fatdeb.clj ├── riemann.config ├── src ├── leiningen │ └── build_parser.clj └── riemann │ ├── repl.clj │ ├── bin.clj │ ├── pagerduty.clj │ ├── folds.clj │ ├── time │ └── controlled.clj │ ├── service.clj │ ├── pubsub.clj │ ├── index.clj │ ├── Query.g │ ├── deps.clj │ ├── email.clj │ ├── transport │ ├── graphite.clj │ ├── websockets.clj │ ├── udp.clj │ └── tcp.clj │ ├── transport.clj │ ├── logging.clj │ ├── core.clj │ ├── query.clj │ ├── graphite.clj │ ├── librato.clj │ ├── pool.clj │ ├── common.clj │ ├── config.clj │ └── time.clj ├── pkg ├── riemann.config ├── deb │ └── riemann └── tar │ └── riemann ├── test └── riemann │ └── test │ ├── common.clj │ ├── pubsub.clj │ ├── email.clj │ ├── graphite.clj │ ├── time │ └── controlled.clj │ ├── transport.clj │ ├── client.clj │ ├── service.clj │ ├── index.clj │ ├── config.clj │ ├── time.clj │ ├── librato.clj │ ├── bench.clj │ ├── query.clj │ ├── pool.clj │ ├── deps.clj │ ├── core.clj │ └── streams.clj ├── project.clj └── LICENSE /.lein-classpath: -------------------------------------------------------------------------------- 1 | :tasks 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: clojure 2 | lein: lein2 3 | jdk: 4 | - openjdk7 5 | - oraclejdk7 6 | - openjdk6 7 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | [All About Riemann](http://aphyr.github.com/riemann/) 2 | === 3 | 4 | [![Build Status](https://travis-ci.org/aphyr/riemann.png)](https://travis-ci.org/aphyr/riemann) 5 | 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .cake 2 | pom.xml 3 | *.jar 4 | *.tar 5 | *.tar.bz2 6 | *.war 7 | *.deb 8 | *~ 9 | .*.swp 10 | *.log 11 | lib 12 | classes 13 | build 14 | .lein-deps-sum 15 | .lein-failures 16 | protosrc/ 17 | reimann-*.zip 18 | site/** 19 | bench/** 20 | target/** 21 | -------------------------------------------------------------------------------- /tasks/leiningen/pkg.clj: -------------------------------------------------------------------------------- 1 | (ns leiningen.pkg 2 | (:use [leiningen.uberjar :only [uberjar]] 3 | [leiningen.fatdeb :only [fatdeb]] 4 | [leiningen.tar :only [tar]])) 5 | 6 | (defn pkg [project] 7 | (doto project 8 | (uberjar) 9 | (tar false) 10 | (fatdeb false))) 11 | -------------------------------------------------------------------------------- /riemann.config: -------------------------------------------------------------------------------- 1 | ; vim: filetype=clojure 2 | 3 | (logging/init :file "riemann.log") 4 | 5 | (tcp-server) 6 | (udp-server) 7 | (ws-server) 8 | (repl-server) 9 | 10 | (periodically-expire 10) 11 | 12 | (let [index (default :ttl 300 (update-index (index)))] 13 | (streams 14 | prn 15 | index 16 | )) 17 | -------------------------------------------------------------------------------- /src/leiningen/build_parser.clj: -------------------------------------------------------------------------------- 1 | ;(ns leiningen.build-parser 2 | ; (:use [clojure.contrib.shell-out] 3 | ; [clojure.java.io :only [copy file]] 4 | ; )) 5 | ; 6 | ;(defn build-parser [project] 7 | ; (prn (sh "java" "-cp" "lib/*" "org.antlr.Tool" "src/riemann/Query.g" 8 | ; :dir (:root project))) 9 | ;~ (.delete (file (:root project) "Query.tokens"))) 10 | -------------------------------------------------------------------------------- /pkg/riemann.config: -------------------------------------------------------------------------------- 1 | ; vim: filetype=clojure 2 | 3 | (logging/init :file "riemann.log") 4 | 5 | ; Listen on the local interface over TCP (5555), UDP (5555), and websockets 6 | ; (5556) 7 | (let [host "127.0.0.1"] 8 | (tcp-server :host host) 9 | (udp-server :host host) 10 | (ws-server :host host)) 11 | 12 | ; Expire old events from the index every 5 seconds. 13 | (periodically-expire 5) 14 | 15 | ; Keep events in the index for 5 minutes by default. 16 | (let [index (default :ttl 300 (update-index (index)))] 17 | 18 | ; Inbound events will be passed to these streams: 19 | (streams 20 | 21 | ; Index all events immediately. 22 | index 23 | 24 | ; Calculate an overall rate of events. 25 | (with {:metric 1 :host nil :state "ok" :service "events/sec"} 26 | (rate 5 index)) 27 | 28 | ; Log expired events. 29 | (expired 30 | (fn [event] (info "expired" event))) 31 | )) 32 | -------------------------------------------------------------------------------- /pkg/deb/riemann: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | JAR="/usr/lib/riemann/riemann.jar" 4 | CONFIG="/etc/riemann/riemann.config" 5 | AGGRESSIVE_OPTS="-XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:+AggressiveOpts -XX:+UseFastAccessorMethods -XX:+UseCompressedOops" 6 | 7 | usage() 8 | { 9 | cat << EOF 10 | usage: $0 [-a] [java options ...] [config-file] 11 | 12 | Runs Riemann with the given configuration file. 13 | 14 | OPTIONS: 15 | -h Show this message 16 | -a Adds some default aggressive, nonportable JVM optimization flags. 17 | 18 | Any unrecognized options (e.g. -XX:+UseParNewGC) will be passed on to java. 19 | EOF 20 | } 21 | 22 | OPTS= 23 | for arg in "$@"; do 24 | case $arg in 25 | "-a") 26 | OPTS="$AGGRESSIVE_OPTS $OPTS" 27 | ;; 28 | "-h") 29 | usage 30 | exit 0 31 | ;; 32 | -*) 33 | OPTS="$OPTS $arg" 34 | ;; 35 | *) 36 | CONFIG="$arg" 37 | ;; 38 | esac 39 | done 40 | 41 | exec java $OPTS -jar "$JAR" "$CONFIG" 42 | -------------------------------------------------------------------------------- /pkg/tar/riemann: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | top="$(dirname "$0")/.." 3 | 4 | JAR="$top/lib/riemann.jar" 5 | CONFIG="$top/etc/riemann.config" 6 | AGGRESSIVE_OPTS="-XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:+AggressiveOpts -XX:+UseFastAccessorMethods -XX:+UseCompressedOops" 7 | 8 | usage() 9 | { 10 | cat << EOF 11 | usage: $0 [-a] [java options ...] [config-file] 12 | 13 | Runs Riemann with the given configuration file. 14 | 15 | OPTIONS: 16 | -h Show this message 17 | -a Adds some default aggressive, nonportable JVM optimization flags. 18 | 19 | Any unrecognized options (e.g. -XX:+UseParNewGC) will be passed on to java. 20 | EOF 21 | } 22 | 23 | OPTS= 24 | for arg in "$@"; do 25 | case $arg in 26 | "-a") 27 | OPTS="$AGGRESSIVE_OPTS $OPTS" 28 | ;; 29 | "-h") 30 | usage 31 | exit 0 32 | ;; 33 | -*) 34 | OPTS="$OPTS $arg" 35 | ;; 36 | *) 37 | CONFIG="$arg" 38 | ;; 39 | esac 40 | done 41 | 42 | exec java $OPTS -jar "$JAR" "$CONFIG" 43 | -------------------------------------------------------------------------------- /src/riemann/repl.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.repl 2 | "The riemann REPL server is a bit of a special case. Since it controls almost 3 | every aspect of Riemann--and can shut those aspects down--it needs to live 4 | above them. While you usually *start* a repl server from the config file, it 5 | is not bound to the usual config lifecycle and won't be shut down or 6 | interrupted during config reload." 7 | (:use clojure.tools.logging) 8 | (:require [clojure.tools.nrepl.server :as nrepl])) 9 | 10 | (def server nil) 11 | 12 | (defn stop-server! 13 | "Stops the REPL server." 14 | [] 15 | (when-let [s server] 16 | (nrepl/stop-server s)) 17 | (def server nil)) 18 | 19 | (defn start-server! 20 | "Starts a new repl server. Stops the old server first, if any. Options: 21 | 22 | :host (default \"127.0.0.1\") 23 | :port (default 5557)" 24 | [opts] 25 | (stop-server!) 26 | (let [opts (merge {:port 5557 :host "127.0.0.1"} 27 | (apply hash-map opts))] 28 | (def server (nrepl/start-server 29 | :port (:port opts) 30 | :bind (:host opts))) 31 | (info "REPL server" opts "online"))) 32 | 33 | (defn start-server 34 | "Starts a new REPL server, when one isn't already running." 35 | [opts] 36 | (when-not server (start-server! opts))) 37 | -------------------------------------------------------------------------------- /test/riemann/test/common.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.common 2 | (:use riemann.common) 3 | (:use clojure.test)) 4 | 5 | (deftest subset-test 6 | (are [a b] (subset? a b) 7 | [] [] 8 | [] [1 2 3] 9 | [1] [1] 10 | [1] [2 1 3] 11 | [1 2] [1 2 3] 12 | [1 2] [2 3 1]) 13 | 14 | (are [a b] (not (subset? a b)) 15 | [1] [] 16 | [1 2] [1] 17 | [1] [2] 18 | [1] [2 3] 19 | [1 2] [1 3] 20 | [1 2] [3 1])) 21 | 22 | (deftest overlap-test 23 | (are [a b] (overlap? a b) 24 | [1 2] [1] 25 | [1] [1] 26 | [1 2] [2 3] 27 | [3 2] [1 3] 28 | [1 3] [3 1]) 29 | 30 | (are [a b] (not (overlap? a b)) 31 | [] [] 32 | [1] [] 33 | [1] [2] 34 | [3] [1 2] 35 | [1 2] [3 4])) 36 | 37 | (deftest disjoint-test 38 | (are [a b] (disjoint? a b) 39 | [] [] 40 | [1] [] 41 | [1] [2] 42 | [3] [1 2] 43 | [1 2] [3 4]) 44 | 45 | (are [a b] (not (disjoint? a b)) 46 | [1 2] [1] 47 | [1] [1] 48 | [1 2] [2 3] 49 | [3 2] [1 3] 50 | [1 3] [3 1])) 51 | -------------------------------------------------------------------------------- /test/riemann/test/pubsub.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.pubsub 2 | (:use riemann.pubsub) 3 | (:use clojure.test)) 4 | 5 | (defn pusher [out] 6 | "push events onto x" 7 | (fn [x] (dosync (alter out conj x)))) 8 | 9 | (deftest one-to-one 10 | (let [r (pubsub-registry) 11 | out (ref []) 12 | id (subscribe r :foo (pusher out))] 13 | 14 | (publish r :foo 1) 15 | (publish r :foo 2) 16 | (is (= (deref out) [1 2])))) 17 | 18 | (deftest one-to-many 19 | (let [r (pubsub-registry) 20 | out1 (ref []) 21 | out2 (ref []) 22 | id1 (subscribe r :foo (pusher out1)) 23 | id2 (subscribe r :foo (pusher out2))] 24 | 25 | (publish r :foo 1) 26 | (publish r :foo 2) 27 | (is (= (deref out1) (deref out2) [1 2])))) 28 | 29 | (deftest unsub 30 | (let [r (pubsub-registry) 31 | out1 (ref []) 32 | out2 (ref []) 33 | foo1 (subscribe r :foo (pusher out1)) 34 | foo2 (subscribe r :foo (pusher out2))] 35 | 36 | (publish r :foo 1) 37 | 38 | ; Unsub with channel 39 | (unsubscribe r :foo foo1) 40 | (publish r :foo 2) 41 | 42 | ; Unsub without channel 43 | (unsubscribe r foo2) 44 | (publish r :foo 3) 45 | 46 | (is (= (deref out1) [1])) 47 | (is (= (deref out2) [1 2])))) 48 | -------------------------------------------------------------------------------- /test/riemann/test/email.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.email 2 | (:use [riemann.time :only [unix-time]] 3 | riemann.email 4 | clojure.test)) 5 | 6 | (deftest subject 7 | (let [s #'riemann.email/subject] 8 | (are [events subject] (= (s events) subject) 9 | [] "" 10 | 11 | [{}] "" 12 | 13 | [{:host "foo"}] "foo" 14 | 15 | [{:host "foo"} {:host "bar"}] "foo and bar" 16 | 17 | [{:host "foo"} {:host "bar"} {:host "baz"}] 18 | "foo, bar, baz" 19 | 20 | [{:host "foo"} {:host "baz"} {:host "bar"} {:host "baz"}] 21 | "foo, baz, bar" 22 | 23 | [{:host 1} {:host 2} {:host 3} {:host 4} {:host 5}] 24 | "5 hosts" 25 | 26 | [{:host "foo" :state "ok"}] "foo ok" 27 | 28 | [{:host "foo" :state "ok"} {:host "bar" :state "ok"}] 29 | "foo and bar ok" 30 | 31 | [{:host "foo" :state "error"} {:host "bar" :state "ok"}] 32 | "foo and bar error and ok" 33 | ))) 34 | 35 | (deftest ^:email ^:integration email-test 36 | (let [email (mailer {}) 37 | stream (email "aphyr@aphyr.com")] 38 | (stream {:host "localhost" 39 | :service "email test" 40 | :state "ok" 41 | :description "all clear, uh, situation normal" 42 | :metric 3.14159 43 | :time (unix-time)}))) 44 | -------------------------------------------------------------------------------- /src/riemann/bin.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.bin 2 | "Main function." 3 | (:require riemann.config 4 | riemann.logging 5 | riemann.time) 6 | (:use clojure.tools.logging) 7 | (:gen-class)) 8 | 9 | (def config-file 10 | "The configuration file loaded by the bin tool" 11 | (atom nil)) 12 | 13 | (defn reload! 14 | "Reloads the given configuration file by clearing the task scheduler, shutting 15 | down the current core, and loading a new one." 16 | [] 17 | (try 18 | (riemann.config/validate-config @config-file) 19 | (riemann.time/reset-tasks!) 20 | (riemann.config/clear!) 21 | (riemann.config/include @config-file) 22 | (riemann.config/apply!) 23 | :reloaded 24 | (catch Exception e 25 | (error e "Couldn't reload:") 26 | e))) 27 | 28 | (defn handle-signals 29 | "Sets up POSIX signal handlers." 30 | [] 31 | (sun.misc.Signal/handle 32 | (sun.misc.Signal. "HUP") 33 | (proxy [sun.misc.SignalHandler] [] 34 | (handle [sig] 35 | (info "Caught SIGHUP, reloading") 36 | (reload!))))) 37 | (defn pid 38 | "Process identifier, such as it is on the JVM. :-/" 39 | [] 40 | (-> (java.lang.management.ManagementFactory/getRuntimeMXBean) (.getName))) 41 | 42 | (defn -main 43 | "Start Riemann. Loads a configuration file from the first of its args." 44 | [& argv] 45 | (riemann.logging/init) 46 | (try 47 | (info "PID" (pid)) 48 | (reset! config-file (or (first argv) "riemann.config")) 49 | (handle-signals) 50 | (riemann.time/start!) 51 | (riemann.config/include @config-file) 52 | (riemann.config/apply!) 53 | nil 54 | (catch Exception e 55 | (error e "Couldn't start")))) 56 | -------------------------------------------------------------------------------- /test/riemann/test/graphite.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.graphite 2 | (:use riemann.graphite 3 | [riemann.time :only [unix-time]] 4 | clojure.test) 5 | (:require [riemann.logging :as logging])) 6 | 7 | (logging/init) 8 | 9 | (deftest percentiles 10 | (is (= (graphite-path-percentiles 11 | {:service "foo bar"}) 12 | "foo.bar")) 13 | (is (= (graphite-path-percentiles 14 | {:service "foo bar 1"}) 15 | "foo.bar.1")) 16 | (is (= (graphite-path-percentiles 17 | {:service "foo bar 99"}) 18 | "foo.bar.99")) 19 | (is (= (graphite-path-percentiles 20 | {:service "foo bar 0.99"}) 21 | "foo.bar.99")) 22 | ) 23 | 24 | (deftest ^:graphite ^:integration graphite-test 25 | (let [g (graphite {:block-start true})] 26 | (g {:host "riemann.local" 27 | :service "graphite test" 28 | :state "ok" 29 | :description "all clear, uh, situation normal" 30 | :metric -2 31 | :time (unix-time)})) 32 | 33 | (let [g (graphite {:block-start true})] 34 | (g {:service "graphite test" 35 | :state "ok" 36 | :description "all clear, uh, situation normal" 37 | :metric 3.14159 38 | :time (unix-time)})) 39 | 40 | (let [g (graphite {:block-start true})] 41 | (g {:host "no-service.riemann.local" 42 | :state "ok" 43 | :description "all clear, uh, situation normal" 44 | :metric 4 45 | :time (unix-time)}))) 46 | -------------------------------------------------------------------------------- /src/riemann/pagerduty.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.pagerduty 2 | "Forwards events to Pagerduty" 3 | (:require [clj-http.client :as client]) 4 | (:require [cheshire.core :as json])) 5 | 6 | (def ^:private event-url 7 | "https://events.pagerduty.com/generic/2010-04-15/create_event.json") 8 | 9 | (defn- post 10 | "POST to the PagerDuty events API." 11 | [request] 12 | (client/post event-url 13 | {:body (json/generate-string request) 14 | :socket-timeout 5000 15 | :conn-timeout 5000 16 | :content-type :json 17 | :accept :json 18 | :throw-entire-message? true})) 19 | 20 | (defn- format-event 21 | "Formats an event for PD. event-type is one of :trigger, :acknowledge, 22 | :resolve" 23 | [service-key event-type event] 24 | {:service_key service-key 25 | :event_type event-type 26 | :incident_key (str (:host event) " " (:service event)) 27 | :description (str (:host event) " " 28 | (:service event) " is " 29 | (:state event) " (" 30 | (:metric event) ")") 31 | :details event}) 32 | 33 | (defn pagerduty 34 | "Creates a pagerduty adapter. Takes your PD service key, and returns a map of 35 | functions which trigger, acknowledge, and resolve events. Event service will 36 | be used as the incident key. The PD description will be the service, state, 37 | and metric. The full event will be attached as the details. 38 | 39 | (let [pd (pagerduty \"my-service-key\")] 40 | (changed-state 41 | (where (state \"ok\") (:resolve pd)) 42 | (where (state \"critical\") (:trigger pd))))" 43 | [service-key] 44 | {:trigger (fn [e] (post (format-event service-key :trigger e))) 45 | :acknowledge (fn [e] (post (format-event service-key :acknowledge e))) 46 | :resolve (fn [e] (post (format-event service-key :resolve e)))}) 47 | -------------------------------------------------------------------------------- /test/riemann/test/time/controlled.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.time.controlled 2 | (:use riemann.time.controlled 3 | riemann.time 4 | [riemann.common :exclude [unix-time linear-time]] 5 | clojure.math.numeric-tower 6 | clojure.test) 7 | (:require [riemann.logging :as logging])) 8 | 9 | (use-fixtures :once control-time!) 10 | (use-fixtures :each reset-time!) 11 | 12 | (deftest clock-test 13 | (is (= (unix-time-controlled) 0)) 14 | (advance! -1) 15 | (is (= (unix-time-controlled) 0)) 16 | (advance! 4.5) 17 | (is (= (unix-time-controlled) 4.5)) 18 | (reset-time!) 19 | (is (= (unix-time-controlled) 0))) 20 | 21 | (deftest once-test 22 | (let [x (atom 0) 23 | once1 (once! 1 #(swap! x inc)) 24 | once2 (once! 2 #(swap! x inc)) 25 | once3 (once! 3 #(swap! x inc))] 26 | 27 | (advance! 0.5) 28 | (is (= @x 0)) 29 | 30 | (advance! 2) 31 | (is (= @x 2)) 32 | 33 | (cancel once3) 34 | (advance! 3) 35 | (is (= @x 2)))) 36 | 37 | (deftest every-test 38 | (let [x (atom 0) 39 | bump #(swap! x inc) 40 | task (every! 1 2 bump)] 41 | 42 | (is (= @x 0)) 43 | 44 | (advance! 1) 45 | (is (= @x 0)) 46 | 47 | (advance! 2) 48 | (is (= @x 1)) 49 | 50 | (advance! 3) 51 | (is (= @x 2)) 52 | 53 | (advance! 4) 54 | (is (= @x 3)) 55 | 56 | ; Double-down 57 | (defer task -3) 58 | (is (= @x 3)) 59 | (advance! 5) 60 | (is (= @x 8)) 61 | 62 | ; Into the future! 63 | (defer task 4) 64 | (advance! 8) 65 | (is (= @x 8)) 66 | (advance! 9) 67 | (is (= @x 9)) 68 | (advance! 10) 69 | (is (= @x 10)))) 70 | -------------------------------------------------------------------------------- /test/riemann/test/transport.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.transport 2 | (:use riemann.common 3 | riemann.core 4 | riemann.transport.tcp 5 | riemann.transport.udp 6 | riemann.logging 7 | clojure.test 8 | lamina.core 9 | aleph.tcp 10 | aleph.udp 11 | gloss.core) 12 | (:import (org.jboss.netty.buffer ChannelBuffers))) 13 | 14 | (riemann.logging/init) 15 | 16 | (deftest udp 17 | (riemann.logging/suppress ["riemann.transport" "riemann.core"] 18 | (let [server (udp-server) 19 | core (transition! (core) {:services [server]}) 20 | client (wait-for-result (udp-socket {})) 21 | msg (ChannelBuffers/wrappedBuffer 22 | (encode {:ok true}))] 23 | 24 | (try 25 | (enqueue client {:host "localhost" 26 | :port 5555 27 | :message msg}) 28 | (Thread/sleep 100) 29 | (finally 30 | (close client) 31 | (stop! core)))))) 32 | 33 | (deftest ignores-garbage 34 | (riemann.logging/suppress ["riemann.transport" "riemann.core"] 35 | (let [server (tcp-server) 36 | core (transition! (core) {:services [server]}) 37 | client (wait-for-result 38 | (aleph.tcp/tcp-client 39 | {:host "localhost" 40 | :port 5555 41 | :frame (finite-block :int32)}))] 42 | 43 | (try 44 | (enqueue client 45 | (java.nio.ByteBuffer/wrap 46 | (byte-array (map byte [0 1 2])))) 47 | (is nil? (wait-for-message client)) 48 | (is (closed? client)) 49 | (finally 50 | (close client) 51 | (stop! core)))))) 52 | -------------------------------------------------------------------------------- /src/riemann/folds.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.folds 2 | "Functions for combining states." 3 | (:use [riemann.common])) 4 | 5 | (defn sorted-sample-extract 6 | "Returns the events in seqable s, sorted and taken at each point p of points, 7 | where p ranges from 0 (smallest metric) to 1 (largest metric). 0.5 is the 8 | median event, 0.95 is the 95th' percentile event, and so forth." 9 | [s points] 10 | (if (empty? s) 11 | '() 12 | (let [sorted (sort-by :metric s) 13 | n (count sorted) 14 | extract (fn [point] 15 | (let [idx (min (dec n) (int (Math/floor (* n point))))] 16 | (nth sorted idx)))] 17 | (map extract points)))) 18 | 19 | (defn sorted-sample 20 | "Sample a sequence of events at points, return states with service remapped 21 | to service + point. For instance, (sorted-sample events [0 1]) returns a 22 | 2-element seq of the smallest event and the biggest event, by metric. The 23 | first has a service which ends in \" 0\" and the second one ends in \" 1\". 24 | Useful for extracting histograms and percentiles." 25 | [s points] 26 | (map (fn [point, event] 27 | (assoc event :service 28 | (str (:service event) " " point))) 29 | points 30 | (sorted-sample-extract s points))) 31 | 32 | (defn sum 33 | "Adds events together. Sums metric, merges into last of events." 34 | [events] 35 | (assoc (last events) 36 | :metric 37 | (reduce + (map :metric events)))) 38 | 39 | (defn mean 40 | "Averages events together. Mean metric, merged into last of events." 41 | [events] 42 | (assoc (last events) 43 | :metric 44 | (/ (reduce + (map :metric events)) (count events)))) 45 | 46 | (defn median 47 | "Returns the median event from events, by metric." 48 | [events] 49 | (first (sorted-sample-extract events [0.5]))) 50 | 51 | (defn minimum 52 | "Returns the minimum event, by metric." 53 | [events] 54 | (apply min-key :metric events)) 55 | 56 | (defn maximum 57 | "Returns the maximum event, by metric." 58 | [events] 59 | (apply max-key :metric events)) 60 | -------------------------------------------------------------------------------- /src/riemann/time/controlled.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.time.controlled 2 | "Provides controllable periodic and deferred execution. Calling (advance! 3 | delta-in-seconds) moves the clock forward, triggering events that would have 4 | occurred, in sequence." 5 | (:use riemann.time 6 | clojure.math.numeric-tower)) 7 | 8 | (def clock 9 | "Reference to the current time, in seconds." 10 | (atom nil)) 11 | 12 | (defn reset-clock! 13 | [] 14 | (reset! clock 0)) 15 | 16 | (defn reset-time! 17 | "Resets the clock and task queue. If a function is given, calls f after 18 | resetting the time and task list." 19 | ([f] (reset-time!) (f)) 20 | ([] 21 | (reset-clock!) 22 | (reset-tasks!))) 23 | 24 | (defn set-time! 25 | "Sets the current time, without triggering callbacks." 26 | [t] 27 | (reset! clock t)) 28 | 29 | (defn unix-time-controlled 30 | [] 31 | @clock) 32 | 33 | (defn linear-time-controlled 34 | [] 35 | @clock) 36 | 37 | (defn advance! 38 | "Advances the clock to t seconds, triggering side effects." 39 | [t] 40 | (when (< @clock t) 41 | (loop [] 42 | (when-let [task (poll-task!)] 43 | (if (<= (:t task) t) 44 | (do 45 | ; Consume task 46 | (swap! clock max (:t task)) 47 | (run task) 48 | (when-let [task' (succ task)] 49 | (schedule-sneaky! task')) 50 | (recur)) 51 | ; Return task 52 | (schedule-sneaky! task)))) 53 | (swap! clock max t))) 54 | 55 | (defn control-time! 56 | "Switches riemann.time functions to time.controlled counterparts, invokes f, 57 | then restores them. Definitely not threadsafe. Not safe by any standard, 58 | come to think of it. Only for testing purposes." 59 | [f] 60 | (let [unix-time riemann.time/unix-time 61 | linear-time riemann.time/linear-time] 62 | ; Please forgive me. 63 | (intern 'riemann.time 'unix-time unix-time-controlled) 64 | (intern 'riemann.time 'linear-time linear-time-controlled) 65 | (f) 66 | (intern 'riemann.time 'unix-time unix-time) 67 | (intern 'riemann.time 'linear-time linear-time))) 68 | -------------------------------------------------------------------------------- /src/riemann/service.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.service 2 | "Lifecycle protocol for stateful services bound to a core.") 3 | 4 | (defprotocol Service 5 | "Services are components of a core with a managed lifecycle. They're used for 6 | stateful things like connection pools, network servers, and background 7 | threads." 8 | (reload! [service core] 9 | "Informs the service of a change in core.") 10 | (start! [service] 11 | "Starts a service. Must be idempotent.") 12 | (stop! [service] 13 | "Stops a service. Must be idempotent.") 14 | (equiv? [service1 service2] 15 | "Used to identify which services can remain running through a core 16 | transition, like reloading. If the old service is equivalent to the 17 | new service, the old service may be preserved and used by the new 18 | core. Otherwise, the old service may be shut down and replaced by 19 | the new.")) 20 | 21 | (defrecord ThreadService [name equiv-key f core running thread] 22 | Service 23 | (reload! [this new-core] 24 | (reset! core new-core)) 25 | 26 | (equiv? [this other] 27 | (and 28 | (instance? ThreadService other) 29 | (= name (:name other)) 30 | (= equiv-key (:equiv-key other)))) 31 | 32 | (start! [this] 33 | (locking this 34 | (when-not @running 35 | (reset! running true) 36 | (reset! thread (Thread. (fn thread-service-runner [] 37 | (while @running 38 | (f @core))))) 39 | (.start @thread)))) 40 | 41 | (stop! [this] 42 | (locking this 43 | (when @running 44 | (reset! running false) 45 | ; Wait for exit 46 | (while (.isAlive @thread) 47 | (Thread/sleep 5)))))) 48 | 49 | (defn thread-service 50 | "Returns a ThreadService which will call (f core) repeatedly when started. 51 | Will only stop between calls to f. Start and stop are blocking operations. 52 | Equivalent to other ThreadServices with the same name and equivalence key-- 53 | if not provided, defaults nil." 54 | ([name f] 55 | (thread-service name nil f)) 56 | ([name equiv-key f] 57 | (ThreadService. name equiv-key f (atom nil) (atom false) (atom nil)))) 58 | -------------------------------------------------------------------------------- /src/riemann/pubsub.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.pubsub 2 | "Provides publish-subscribe handling of events. Publishers push events onto a 3 | channel, which has n subscribers. Each subscriber subscribes to a channel 4 | with an optional predicate function. Events which match the predicate are 5 | sent to the subscriber.") 6 | 7 | ; Registry: 8 | ; channel1: 9 | ; id1: fun1 10 | ; id2: fun2 11 | ; id3: fun3 12 | ; channel2: 13 | ; id4: fun1 14 | 15 | (defn pubsub-registry 16 | "Returns a new pubsub registry, which tracks which subscribers are 17 | listening to which channels." 18 | [] 19 | (atom {:channels {} 20 | :last-sub-id 0})) 21 | 22 | (defn publish 23 | "Publish an event to the given channel in a registry." 24 | [registry channel event] 25 | (let [channels (:channels @registry)] 26 | (doseq [[id f] (channels channel)] 27 | (f event)))) 28 | 29 | (defn subscribe 30 | "Subscribe to the given channel in a registry with f, which is called with 31 | each event that arrives on that channel. Returns an ID for the subscription." 32 | [registry channel f] 33 | (let [sub-id (-> @registry :last-sub-id inc) 34 | inner-subscribe (fn [registry channel f] 35 | (-> registry 36 | (assoc-in [:channels channel sub-id] f) 37 | (assoc :last-sub-id sub-id)))] 38 | (swap! registry inner-subscribe channel f) 39 | sub-id)) 40 | 41 | (defn dissoc-in 42 | "Dissociates an entry from a nested associative structure returning a new 43 | nested structure. keys is a sequence of keys. Any empty maps that result 44 | will not be present in the new structure." 45 | [m [k & ks :as keys]] 46 | (if ks 47 | (if-let [nextmap (get m k)] 48 | (let [newmap (dissoc-in nextmap ks)] 49 | (if (seq newmap) 50 | (assoc m k newmap) 51 | (dissoc m k))) 52 | m) 53 | (dissoc m k))) 54 | 55 | (defn unsubscribe 56 | "Unsubscribe from the given registry by id. If you provide a channel to 57 | unsubscribe from, O(1). If you provide only the id, O(channels)." 58 | ([registry channel id] 59 | (swap! registry dissoc-in [:channels channel id])) 60 | 61 | ([registry id] 62 | (swap! registry update-in [:channels] 63 | #(reduce merge (for [[chan subs] %] {chan (dissoc subs id)}))))) -------------------------------------------------------------------------------- /test/riemann/test/client.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.client 2 | (:use riemann.common 3 | riemann.core 4 | riemann.transport.tcp 5 | riemann.client 6 | riemann.index 7 | [riemann.logging :only [suppress]] 8 | clojure.test)) 9 | 10 | (riemann.logging/init) 11 | 12 | (deftest reconnect 13 | (suppress ["riemann.transport.tcp" "riemann.core"] 14 | (let [server (tcp-server) 15 | core (transition! (core) {:services [server]}) 16 | client (tcp-client)] 17 | (try 18 | ; Initial connection works 19 | (is (send-event client {:service "test"})) 20 | 21 | ; Kill server; should fail. 22 | (stop! core) 23 | (.setMinimumReconnectInterval client 0) 24 | (is (thrown? java.net.SocketException 25 | (send-event client {:service "test"}))) 26 | 27 | ; Restart server; should work 28 | (start! core) 29 | (try 30 | (send-event client {:service "test"}) 31 | (finally 32 | (stop! core))) 33 | 34 | (finally 35 | (close-client client) 36 | (stop! core)))))) 37 | 38 | ; Check that server error messages are correctly thrown. 39 | (deftest server-errors 40 | (suppress ["riemann.transport.tcp" "riemann.core"] 41 | (let [index (index) 42 | server (tcp-server core) 43 | core (transition! (core) {:services [server] 44 | :index index}) 45 | client (tcp-client)] 46 | 47 | (try 48 | (is (thrown? com.aphyr.riemann.client.ServerError 49 | (query client "invalid!"))) 50 | 51 | (let [e (try (query client "invalid!") 52 | (catch com.aphyr.riemann.client.ServerError e e))] 53 | (is (= "parse error: invalid term \"invalid\"" (.getMessage e)))) 54 | 55 | 56 | (finally 57 | (close-client client) 58 | (stop! core)))))) 59 | -------------------------------------------------------------------------------- /src/riemann/index.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.index 2 | "Maintains a stateful index of events by [host, service] key. Can be queried 3 | to return the most recent indexed events matching some expression. Can expire 4 | events which have exceeded their TTL. Presently the only implementation of 5 | the index protocol is backed by a nonblockinghashmap, but I plan to add an 6 | HSQLDB backend as well." 7 | (:require [riemann.query :as query]) 8 | (:use [riemann.time :only [unix-time]]) 9 | (:import (org.cliffc.high_scale_lib NonBlockingHashMap))) 10 | 11 | (defprotocol Index 12 | (clear [this] 13 | "Resets the index") 14 | (delete [this event] 15 | "Deletes any event with this host & service from index") 16 | (delete-exactly [this event] 17 | "Deletes event from index") 18 | (expire [this] 19 | "Return a seq of expired states from this index, removing each.") 20 | (search [this query-ast] 21 | "Returns a seq of events from the index matching this query AST") 22 | (update [this event] 23 | "Updates index with event")) 24 | 25 | ; The index accepts states and maintains a table of the most recent state for 26 | ; each unique [host, service]. It can be searched for states matching a query. 27 | 28 | (def default-ttl 60) 29 | 30 | (defn nbhm-index 31 | "Create a new nonblockinghashmap backed index" 32 | [] 33 | (let [hm (NonBlockingHashMap.)] 34 | (reify Index 35 | (clear [this] 36 | (.clear hm)) 37 | 38 | (delete [this event] 39 | (.remove hm [(:host event) (:service event)])) 40 | 41 | (delete-exactly [this event] 42 | (.remove hm [(:host event) (:service event)] event)) 43 | 44 | (expire [this] 45 | (filter 46 | (fn [{:keys [ttl time] :or {:ttl default-ttl} :as state}] 47 | (let [age (- (unix-time) time)] 48 | (when (> age ttl) 49 | (delete this state) 50 | true))) 51 | (.values hm))) 52 | 53 | (search [this query-ast] 54 | "O(n), sadly." 55 | (let [matching (query/fun query-ast)] 56 | (filter matching (.values hm)))) 57 | 58 | (update [this event] 59 | (when-not (= "expired" (:state event)) 60 | (.put hm [(:host event) (:service event)] event) 61 | event)) 62 | 63 | clojure.lang.Seqable 64 | (seq [this] 65 | (seq (.values hm)))))) 66 | 67 | (defn index 68 | "Create a new index (currently: an nhbm index)" 69 | [] 70 | (nbhm-index)) 71 | -------------------------------------------------------------------------------- /src/riemann/Query.g: -------------------------------------------------------------------------------- 1 | grammar Query; 2 | 3 | options { 4 | output=AST; 5 | ASTLabelType=CommonTree; 6 | } 7 | 8 | tokens { 9 | AND = 'and'; 10 | OR = 'or'; 11 | NOT = 'not'; 12 | APPROXIMATELY = '=~'; 13 | NOT_EQUAL = '!='; 14 | EQUAL = '='; 15 | LESSER = '<'; 16 | LESSER_EQUAL = '<='; 17 | GREATER = '>'; 18 | GREATER_EQUAL = '>='; 19 | TAGGED = 'tagged'; 20 | } 21 | 22 | @header {package riemann;} 23 | @lexer::header {package riemann;} 24 | 25 | expr : (or EOF) -> or; 26 | 27 | or : and (WS* OR^ WS* and)*; 28 | 29 | and : (not | primary) (WS* AND^ WS* (not | primary))*; 30 | 31 | not : NOT^ WS* (not | primary); 32 | 33 | 34 | primary : ( 35 | ('(' or ')') -> ^(or) 36 | | simple -> simple 37 | ); 38 | 39 | fragment 40 | simple : ( t | f | nil 41 | | tagged 42 | | approximately 43 | | lesser 44 | | lesser_equal 45 | | greater 46 | | greater_equal 47 | | not_equal 48 | | equal 49 | ); 50 | 51 | approximately 52 | : field WS* APPROXIMATELY^ WS* value; 53 | lesser : field WS* LESSER^ WS* value; 54 | lesser_equal 55 | : field WS* LESSER_EQUAL^ WS* value; 56 | greater : field WS* GREATER^ WS* value; 57 | greater_equal 58 | : field WS* GREATER_EQUAL^ WS* value; 59 | not_equal 60 | : field WS* NOT_EQUAL^ WS* value; 61 | equal : field WS* EQUAL^ WS* value; 62 | 63 | tagged : TAGGED^ WS* String; 64 | 65 | value : (String | t | f | nil | INT | FLOAT); 66 | 67 | t : 'true'; 68 | f : 'false'; 69 | nil : 'null' | 'nil'; 70 | 71 | field : ('host' 72 | | 'service' 73 | | 'state' 74 | | 'description' 75 | | 'metric_f' 76 | | 'metric' 77 | | 'ttl' 78 | | 'time' 79 | ); 80 | 81 | ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'0'..'9'|'_')* 82 | ; 83 | 84 | INT : '-'? '0'..'9'+ 85 | ; 86 | 87 | FLOAT 88 | : '-'? ('0'..'9')+ ('.' ('0'..'9')*)? EXPONENT? 89 | ; 90 | 91 | WS : ( ' ' 92 | | '\t' 93 | | '\r' 94 | | '\n' 95 | ) {$channel=HIDDEN;} 96 | ; 97 | 98 | fragment 99 | EXPONENT : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; 100 | 101 | String : 102 | // '"' (EscapeSequence | FreeChar)* '"' 103 | // Still don't understand why this doesn't work 104 | '"' ( EscapeSequence | ~('\u0000'..'\u001f' | '\\' | '\"' ) )* '"' 105 | ; 106 | 107 | fragment EscapeSequence 108 | : '\\' (UnicodeEscape |'b'|'t'|'n'|'f'|'r'|'\"'|'\\') 109 | ; 110 | 111 | fragment UnicodeEscape 112 | : 'u' HexDigit HexDigit HexDigit HexDigit 113 | ; 114 | 115 | fragment HexDigit 116 | : '0'..'9' | 'A'..'F' | 'a'..'f' 117 | ; -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject riemann "0.1.6-SNAPSHOT" 2 | :description 3 | "A network event stream processor. Intended for analytics, metrics, and alerting; and to glue various monitoring systems together." 4 | :url "http://github.com/aphyr/riemann" 5 | ; :warn-on-reflection true 6 | ; :jvm-opts ["-server" "-d64" "-Xms1024m" "-Xmx1024m" "-XX:+UseParNewGC" "-XX:+UseConcMarkSweepGC" "-XX:+CMSParallelRemarkEnabled" "-XX:+AggressiveOpts" "-XX:+UseFastAccessorMethods" "-verbose:gc" "-XX:+PrintGCDetails"] 7 | :jvm-opts ["-server" "-Xms1024m" "-Xmx1024m" "-XX:+UseParNewGC" "-XX:+UseConcMarkSweepGC" "-XX:+CMSParallelRemarkEnabled" "-XX:+AggressiveOpts" "-XX:+UseFastAccessorMethods"] 8 | :repositories { 9 | "boundary-site" "http://maven.boundary.com/artifactory/repo" 10 | } 11 | :maintainer {:email "aphyr@aphyr.com"} 12 | :dependencies [ 13 | [org.clojure/algo.generic "0.1.0"] 14 | [org.clojure/clojure "1.4.0"] 15 | [org.clojure/math.numeric-tower "0.0.1"] 16 | [org.clojure/tools.logging "0.2.3"] 17 | [org.clojure/tools.nrepl "0.2.0-RC1"] 18 | [clojure-complete "0.2.2"] 19 | [log4j/log4j "1.2.16" :exclusions [javax.mail/mail 20 | javax.jms/jms 21 | com.sun.jdmk/jmxtools 22 | com.sun.jmx/jmxri]] 23 | [aleph "0.2.1-beta2"] 24 | [clj-http "0.4.1"] 25 | [cheshire "5.0.0"] 26 | [clj-librato "0.0.2"] 27 | [clj-time "0.4.3"] 28 | [clj-wallhack "1.0"] 29 | [com.boundary/high-scale-lib "1.0.3"] 30 | [com.draines/postal "1.8.0"] 31 | [incanter/incanter-charts "1.3.0"] 32 | [io.netty/netty "3.3.0.Final"] 33 | [log4j/apache-log4j-extras "1.0"] 34 | [org.antlr/antlr "3.2"] 35 | [org.slf4j/slf4j-log4j12 "1.6.4"] 36 | [riemann-clojure-client "0.0.6"] 37 | [slingshot "0.10.2"] 38 | ] 39 | :plugins [[codox "0.6.1"]] 40 | :test-selectors {:default (fn [x] (not (or (:integration x) 41 | (:time x) 42 | (:bench x)))) 43 | :integration :integration 44 | :email :email 45 | :graphite :graphite 46 | :librato :librato 47 | :time :time 48 | :bench :bench 49 | :focus :focus 50 | :all (fn [_] true)} 51 | :javac-options ["-target" "1.6" "-source" "1.6"] 52 | :java-source-paths ["src/riemann/"] 53 | :java-source-path "src/riemann/" 54 | :aot [riemann.bin] 55 | :main riemann.bin 56 | :codox {:output-dir "site/api"} 57 | ) 58 | -------------------------------------------------------------------------------- /src/riemann/deps.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.deps 2 | "Riemann's dependency resolution system expresses stateful relationships 3 | between events. Dependencies are expressed as Rules; a Rule is a statement 4 | about the relationship between a particular event and the current state of 5 | the index. 6 | 7 | Maps are rules which specify that their keys and values should be present in 8 | some event in the index. {} will match any non-empty index. {:service \"a\" 9 | :state \"ok\"} will match an index which has {:service \"a\" :state \"ok\" 10 | :metric 123}, and so on. 11 | 12 | (all & rules) matches only if all rules match. 13 | 14 | (any & rules) matches if any of the rules match. 15 | 16 | (localhost & rules) states that all child rules must have the same host as 17 | the event of interest. 18 | 19 | (depends a & bs) means that if a matches the current event (and only the 20 | current event, not the full index), b must match the current event and index. 21 | " 22 | (:use riemann.index) 23 | (:require [riemann.streams :as streams])) 24 | 25 | (defprotocol Rule 26 | (match [this context event])) 27 | 28 | (extend-protocol Rule 29 | clojure.lang.IPersistentMap 30 | (match [this index _] 31 | (some (fn [e] (= this (select-keys e (keys this)))) 32 | index) 33 | )) 34 | 35 | (defrecord All [rules] 36 | Rule 37 | (match [this index event] 38 | ; (prn "Matching all" rules) 39 | ; (prn "index are" index) 40 | ; (prn "event is" event) 41 | (every? #(match % index event) rules))) 42 | 43 | (defn all [& rules] 44 | (All. rules)) 45 | 46 | (defrecord Any [rules] 47 | Rule 48 | (match [this index event] 49 | (some #(match % index event) rules))) 50 | 51 | (defn any [& rules] 52 | (Any. rules)) 53 | 54 | (defrecord Localhost [rule] 55 | Rule 56 | (match [this index event] 57 | (match rule 58 | (filter (fn [e] (= (:host event) (:host e))) index) 59 | event))) 60 | 61 | (defn localhost [& rules] 62 | (Localhost. (apply all rules))) 63 | 64 | (defrecord Depends [a b] 65 | Rule 66 | (match [this index event] 67 | (if (match a [event] event) 68 | (match b index event) 69 | true))) 70 | 71 | (defn depends [a & bs] 72 | (Depends. a (All. bs))) 73 | 74 | (defn deps-tag [index rule & children] 75 | "Returns a stream which accepts events, checks whether they satisfy the given 76 | rule, and associates those which have their dependencies satisfied with 77 | {:deps-satisfied true}, and false for those which are satisfied." 78 | (fn [event] 79 | (streams/call-rescue 80 | (assoc event :deps-satisfied? (match rule index event)) 81 | children))) 82 | -------------------------------------------------------------------------------- /test/riemann/test/service.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.service 2 | (:import (java.util.concurrent TimeUnit 3 | LinkedBlockingQueue)) 4 | (:use riemann.service 5 | clojure.test)) 6 | 7 | (deftest thread-service-equiv-test 8 | (is (equiv? (thread-service :foo #()) 9 | (thread-service :foo #()))) 10 | (is (equiv? (thread-service :test 1 #()) 11 | (thread-service :test 1 #()))) 12 | 13 | (is (not (equiv? (thread-service :foo #()) 14 | (thread-service :bar #())))) 15 | (is (not (equiv? (thread-service :foo 1 #()) 16 | (thread-service :bar 1 #())))) 17 | (is (not (equiv? (thread-service :foo 1 #()) 18 | (thread-service :foo 2 #()))))) 19 | 20 | ; THIS IS A MUTABLE STATE OF AFFAIRS 21 | ; WHICH IS TO SAY, IT IS FUCKING DREADFUL 22 | (deftest thread-service-test 23 | (let [in (LinkedBlockingQueue.) 24 | out (LinkedBlockingQueue.) 25 | s (thread-service 26 | :test 27 | (fn [core] 28 | (.put out [(.take in) core]))) 29 | send (fn [msg] 30 | (.put in msg) 31 | (.take out))] 32 | 33 | ; Shouldn't do anything before started. 34 | (.put in :before-start) 35 | (Thread/sleep 50) 36 | (is (= :before-start (.peek in))) 37 | (is (nil? (.peek out))) 38 | 39 | ; Should run when started 40 | (start! s) 41 | (is (= [:before-start nil] (.take out))) 42 | 43 | ; Should respond to subsequent messages 44 | (is (= [:a nil] (send :a))) 45 | 46 | ; Should reload core 47 | (reload! s :core) 48 | ; We may or may not have a waiting iteration with nil core 49 | (is (= :reload-1 (first (send :reload-1)))) 50 | (is (= [:reload-2 :core] (send :reload-2))) 51 | 52 | ; Start! is idempotent 53 | ; Not a very good test--should probably check the threads used. :/ 54 | (start! s) 55 | (is (= [:start-2 :core] (send :start-2))) 56 | 57 | ; Should shut down cleanly 58 | (let [f (future 59 | (Thread/sleep 50) 60 | (.put in :stop))] 61 | (stop! s) 62 | @f) 63 | (is #{nil [:stop :core]} (.poll out)) 64 | 65 | ; Is stopped 66 | (.put in :stop-2) 67 | (Thread/sleep 50) 68 | (is #{:stop :stop-2} (.poll in)) 69 | 70 | ; Stop is idempotent 71 | (stop! s) 72 | 73 | ; Can restart 74 | (.clear in) 75 | (.clear out) 76 | (reload! s :core-2) 77 | (start! s) 78 | (is (= [:restarted :core-2] (send :restarted))))) 79 | -------------------------------------------------------------------------------- /src/riemann/email.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.email 2 | "Send email about events. Create a mailer with (mailer opts), then create 3 | streams which send email with (your-mailer \"shodan@tau.ceti.five\"). Or 4 | simply call email-event directly." 5 | (:use riemann.common) 6 | (:use postal.core) 7 | (:use [clojure.string :only [join]])) 8 | 9 | (defn- human-uniq 10 | "Returns a human-readable string describing things, e.g. 11 | 12 | importer 13 | api1, api2, api4 14 | 23 services" 15 | [things, type] 16 | (let [things (distinct things)] 17 | (case (count things) 18 | 0 nil 19 | 1 (first things) 20 | 2 (str (first things) " and " (nth things 1)) 21 | 3 (join ", " things) 22 | 4 (join ", " things) 23 | (str (count things) " " type)))) 24 | 25 | (defn- subject 26 | "Constructs a subject line for a set of events." 27 | [events] 28 | (join " " (keep identity 29 | [(human-uniq (map :host events) "hosts") 30 | (human-uniq (map :service events) "services") 31 | (human-uniq (map :state events) "states")]))) 32 | 33 | (defn- body 34 | "Constructs an email body for a set of events." 35 | [events] 36 | (join "\n\n\n" 37 | (map 38 | (fn [event] 39 | (str 40 | "At " (time-at (:time event)) "\n" 41 | (:host event) " " 42 | (:service event) " " 43 | (:state event) " (" 44 | (:metric event) ")\n" 45 | "Tags: [" (join ", " (:tags event)) "]" 46 | "\n\n" 47 | (:description event))) 48 | events))) 49 | 50 | (defn email-event 51 | "Send event(s) with the given configuration (:host, :port, :user, :to, etc)" 52 | [opts events] 53 | (let [events (flatten [events])] 54 | (send-message 55 | (merge {:subject (subject events) 56 | :body (body events)} 57 | opts)))) 58 | 59 | (defn mailer 60 | "Returns a mailer which creates email streams, which take events. The mailer 61 | is invoked with an address or a sequence of addresses; it returns a function 62 | that takes events and sends email about that event to those addresses. 63 | Example: 64 | 65 | (def email (mailer {:from \"riemann@trioptimum.org\" 66 | :host \"mail.relay\" 67 | :user \"foo\" 68 | :pass \"bar\"})) 69 | 70 | (changed :state 71 | (email \"xerxes@trioptimum.org\" \"shodan@trioptimum.org\")) 72 | 73 | This makes it easy to configure your email settings once and re-use them 74 | for different recipients. Of course, you can set :to in the mailer options 75 | as well, and use (email) without args. Options are passed to Postal." 76 | [opts] 77 | 78 | (let [opts (merge {:from "riemann"} 79 | opts)] 80 | 81 | (fn [& recipients] 82 | (fn [event] 83 | (let [opts (if (empty? recipients) 84 | opts 85 | (merge opts {:to recipients}))] 86 | (email-event opts event)))))) 87 | -------------------------------------------------------------------------------- /tasks/leiningen/tar.clj: -------------------------------------------------------------------------------- 1 | (ns leiningen.tar 2 | (:use [clojure.java.shell :only [sh with-sh-dir]] 3 | [clojure.java.io :only [file delete-file writer copy]] 4 | [clojure.string :only [join capitalize trim-newline]] 5 | [leiningen.uberjar :only [uberjar]])) 6 | 7 | (defn delete-file-recursively 8 | "Delete file f. If it's a directory, recursively delete all its contents. 9 | Raise an exception if any deletion fails unless silently is true." 10 | [f & [silently]] 11 | (System/gc) ; This sometimes helps release files for deletion on windows. 12 | (let [f (file f)] 13 | (if (.isDirectory f) 14 | (doseq [child (.listFiles f)] 15 | (delete-file-recursively child silently))) 16 | (delete-file f silently))) 17 | 18 | (defn tar-dir 19 | "Tar package working directory." 20 | [project] 21 | (file (:root project) "target" "tar" (str (:name project) "-" 22 | (:version project)))) 23 | 24 | (defn cleanup 25 | [project] 26 | ; Delete working dir. 27 | (when (.exists (file (:root project) "target" "tar")) 28 | (delete-file-recursively (file (:root project) "target" "tar")))) 29 | 30 | (defn reset 31 | [project] 32 | (cleanup project) 33 | (sh "rm" (str (:root project) "/target/*.tar.bz2"))) 34 | 35 | (defn make-tar-dir 36 | "Creates the tarball package structure in a new directory." 37 | [project] 38 | (let [dir (tar-dir project)] 39 | (.mkdirs dir) 40 | 41 | ; Jar 42 | (.mkdirs (file dir "lib")) 43 | (copy (file (:root project) "target" 44 | (str "riemann-" (:version project) "-standalone.jar")) 45 | (file dir "lib" "riemann.jar")) 46 | 47 | ; Binary 48 | (.mkdirs (file dir "bin")) 49 | (copy (file (:root project) "pkg" "tar" "riemann") 50 | (file dir "bin" "riemann")) 51 | (.setExecutable (file dir "bin" "riemann") true false) 52 | 53 | ; Config 54 | (.mkdirs (file dir "etc")) 55 | (copy (file (:root project) "pkg" "riemann.config") 56 | (file dir "etc" "riemann.config")) 57 | 58 | dir)) 59 | 60 | (defn write 61 | "Write string to file, plus newline" 62 | [file string] 63 | (with-open [w (writer file)] 64 | (.write w (str (trim-newline string) "\n")))) 65 | 66 | (defn compress 67 | "Convert given package directory to a .tar.bz2." 68 | [project tar-dir] 69 | (let [tarball (str (file (:root project) 70 | "target" 71 | (str (:name project) 72 | "-" 73 | (:version project) 74 | ".tar.bz2")))] 75 | (with-sh-dir (.getParent tar-dir) 76 | (print (:err (sh "tar" "cvjf" tarball (.getName tar-dir))))) 77 | (write (str tarball ".md5") 78 | (:out (sh "md5sum" (str tarball)))))) 79 | 80 | (defn tar 81 | ([project] (tar project true)) 82 | ([project uberjar?] 83 | (reset project) 84 | (when uberjar? (uberjar project)) 85 | (compress project (make-tar-dir project)) 86 | (cleanup project))) 87 | -------------------------------------------------------------------------------- /src/riemann/transport/graphite.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.transport.graphite 2 | (:import [org.jboss.netty.util CharsetUtil] 3 | [org.jboss.netty.channel Channels] 4 | [org.jboss.netty.handler.codec.oneone OneToOneDecoder] 5 | [org.jboss.netty.handler.codec.string StringDecoder StringEncoder] 6 | [org.jboss.netty.handler.codec.frame 7 | DelimiterBasedFrameDecoder 8 | Delimiters]) 9 | (:use [riemann.transport.tcp :only [tcp-server]] 10 | [clojure.string :only [split]])) 11 | 12 | (defn decode-graphite-line 13 | "Decode a line coming from graphite. 14 | Graphite uses a simple scheme where each metric is given as a CRLF delimited 15 | line, space split with three items: 16 | 17 | * The metric name 18 | * The metric value (optionally NaN) 19 | * The timestamp 20 | 21 | By default, decode-graphite-line will yield a simple metric with just 22 | a service metric and timestamp, a parser-fn can be given to it, which 23 | will yield a map to merge onto the result. This can be used when 24 | graphite metrics have known patterns that you wish to extract more 25 | information (host, refined service name, tags) from" 26 | [line parser-fn] 27 | (when-let [[service metric timestamp] (split line #" ")] 28 | (when (not= metric "nan") ;; discard nan values 29 | {:ok true 30 | :states [] 31 | :events [(let [res {:service service 32 | :metric (Float. metric) 33 | :time (Long. timestamp)}] 34 | (if parser-fn (merge res (parser-fn res)) res))]}))) 35 | 36 | (defn graphite-frame-decoder 37 | "A closure which yields a graphite frame-decoder. Taking an argument 38 | which will be given to decode-graphite-line (hence the closure)" 39 | [parser-fn] 40 | (fn [] 41 | (proxy [OneToOneDecoder] [] 42 | (decode [context channel message] 43 | (decode-graphite-line message parser-fn))))) 44 | 45 | (defn graphite-server 46 | "Start a graphite-server, some bits could be factored with tcp-server. 47 | Only the default option map and the bootstrap change." 48 | ([] (graphite-server {})) 49 | ([opts] 50 | (let [pipeline-factory #(doto (Channels/pipeline) 51 | (.addLast "framer" 52 | (DelimiterBasedFrameDecoder. 53 | 1024 ;; Will the magic ever stop ? 54 | (Delimiters/lineDelimiter))) 55 | (.addLast "string-decoder" 56 | (StringDecoder. CharsetUtil/UTF_8)) 57 | (.addLast "string-encoder" 58 | (StringEncoder. CharsetUtil/UTF_8)) 59 | (.addLast "graphite-decoder" 60 | ((graphite-frame-decoder 61 | (:parser-fn opts)))))] 62 | (tcp-server (merge {:host "127.0.0.1" 63 | :port 2003 64 | :pipeline-factory pipeline-factory} 65 | opts))))) 66 | -------------------------------------------------------------------------------- /test/riemann/test/index.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.index 2 | (:use riemann.index 3 | riemann.query 4 | [riemann.time :only [unix-time]] 5 | clojure.test)) 6 | 7 | (deftest nbhm-update 8 | (let [i (nbhm-index)] 9 | (update i {:host 1}) 10 | (update i {:host 2}) 11 | (update i {:host 1 :service 3 :state :ok}) 12 | (update i {:host 1 :service 3 :description "new"}) 13 | 14 | (is (= (set i) 15 | #{{:host 1} 16 | {:host 2} 17 | {:host 1 :service 3 :description "new"}})))) 18 | 19 | (deftest nhbm-delete 20 | (let [i (nbhm-index)] 21 | (update i {:host 1}) 22 | (update i {:host 2}) 23 | (delete i {:host 1 :service 1}) 24 | (delete i {:host 2 :state :ok}) 25 | (is (= (set i) 26 | #{{:host 1}})))) 27 | 28 | (deftest nhbm-search 29 | (let [i (nbhm-index)] 30 | (update i {:host 1}) 31 | (update i {:host 2 :service "meow"}) 32 | (update i {:host 3 :service "mrrrow"}) 33 | (is (= (set (search i (ast "host >= 2 and not service =~ \"%r%\""))) 34 | #{{:host 2 :service "meow"}})))) 35 | 36 | (deftest nhbm-expire 37 | (let [i (nbhm-index)] 38 | (update i {:host 1 :ttl 0 :time (unix-time)}) 39 | (update i {:host 2 :ttl 10 :time (unix-time)}) 40 | (update i {:host 3 :ttl 20 :time (- (unix-time) 21)}) 41 | 42 | (let [expired (expire i)] 43 | (is (= (set (map (fn [e] (:host e)) 44 | expired)) 45 | #{1 3}))) 46 | 47 | (is (= (map (fn [e] (:host e)) i) 48 | [2])))) 49 | 50 | (defn random-event 51 | [& {:as event}] 52 | (merge {:host (rand-int 100) 53 | :service (rand-int 100) 54 | :ttl (rand-int 500) 55 | :time (- (unix-time) (rand-int 30))} 56 | event)) 57 | 58 | (deftest ^:bench indexing-nbhm-time 59 | (let [_ (println "building events, this might take some time") 60 | not-much (doall (repeatedly 100 random-event)) 61 | a-few (doall (repeatedly 100000 random-event)) 62 | a-lot (doall (repeatedly 1000000 random-event)) 63 | i (nbhm-index)] 64 | (println "updating and expiring the same 100 events 10000 times:") 65 | (time (dotimes [iter 10000] 66 | (do (doseq [event not-much] 67 | (update i event))))) 68 | (println "expiring") 69 | (time (dotimes [iter 10000] (doall (expire i)))) 70 | (clear i) 71 | 72 | (println "updating and expiring the same 100000 events 100 times:") 73 | (time (dotimes [iter 100] 74 | (do (doseq [event a-few] 75 | (update i event))))) 76 | (println "expiring") 77 | (time (dotimes [iter 100] (doall (expire i)))) 78 | (clear i) 79 | 80 | (println "updating and expiring the same 10000000 events 10 times:") 81 | (time (dotimes [iter 10] 82 | (do (doseq [event a-lot] 83 | (update i event))))) 84 | (println "expiring") 85 | (time (dotimes [iter 10] (doall (expire i)))))) 86 | -------------------------------------------------------------------------------- /test/riemann/test/config.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.config 2 | (:use riemann.config 3 | clojure.test 4 | [riemann.index :only [Index]]) 5 | (:require [riemann.core :as core] 6 | [riemann.pubsub :as pubsub] 7 | [riemann.logging :as logging] 8 | [riemann.streams :as streams])) 9 | 10 | (defn reset-core! [f] 11 | (logging/suppress "riemann.core" 12 | (clear!) 13 | (core/stop! @core) 14 | (reset! core (core/core)) 15 | (f) 16 | (clear!) 17 | (reset! core (core/core)) 18 | (core/stop! @core))) 19 | 20 | (use-fixtures :each reset-core!) 21 | 22 | (deftest blank-test 23 | (is (empty? (:streams @core))) 24 | (is (empty? (:streams @next-core))) 25 | (is (empty? (:services @core))) 26 | (is (empty? (:services @core)))) 27 | 28 | (deftest apply-test 29 | (is (not= @core @next-core)) 30 | (let [old-next-core @next-core] 31 | (apply!) 32 | (is (= old-next-core @core)) 33 | (is (not= @core @next-core)))) 34 | 35 | (deftest tcp-server-test 36 | (tcp-server :host "a") 37 | (is (= "a" (:host (first (:services @next-core))))) 38 | (is (empty? (:services @core)))) 39 | 40 | (deftest udp-server-test 41 | (udp-server :host "b") 42 | (is (= "b" (:host (first (:services @next-core))))) 43 | (is (empty? (:services @core)))) 44 | 45 | (deftest ws-server-test 46 | (ws-server :port 1234) 47 | (is (= 1234 (:port (first (:services @next-core))))) 48 | (is (empty? (:services @core)))) 49 | 50 | (deftest graphite-server-test 51 | (graphite-server :port 1) 52 | (is (= 1 (:port (first (:services @next-core))))) 53 | (is (empty? (:services @core)))) 54 | 55 | (deftest streams-test 56 | (streams :a) 57 | (streams :b) 58 | (is (= [:a :b] (:streams @next-core))) 59 | (is (empty? (:streams @core)))) 60 | 61 | (deftest index-test 62 | (let [i (index)] 63 | (is (satisfies? Index i)) 64 | (is (= i (:index @next-core))) 65 | (is (nil? (:index @core))))) 66 | 67 | (deftest update-index-test 68 | (let [i (index) 69 | up (update-index i)] 70 | (apply!) 71 | (up {:service 1 :state "ok"}) 72 | (is (= (seq i) [{:service 1 :state "ok"}])))) 73 | 74 | (deftest subscribe-in-stream-test 75 | (let [received (promise)] 76 | (streams 77 | (streams/where (service "test-in") 78 | (publish :test)) 79 | (subscribe :test (partial deliver received))) 80 | (apply!) 81 | 82 | ; Send through streams 83 | ((first (:streams @core)) {:service "test-in"}) 84 | (is (= {:service "test-in"} @received)))) 85 | 86 | (deftest subscribe-outside-stream-test 87 | (let [received (promise)] 88 | (subscribe :test (partial deliver received)) 89 | (apply!) 90 | 91 | ; Send outside streams 92 | (pubsub/publish (:pubsub @core) :test "hi") 93 | (is (= "hi" @received)))) 94 | -------------------------------------------------------------------------------- /src/riemann/transport.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.transport 2 | "Functions used in several transports. Some netty parts transpire 3 | here since netty is the preferred method of providing transports" 4 | (:use [slingshot.slingshot :only [try+]] 5 | [riemann.common :only [decode-msg]] 6 | [riemann.index :only [search]] 7 | clojure.tools.logging) 8 | (:require [riemann.query :as query]) 9 | (:import 10 | (com.aphyr.riemann Proto$Msg) 11 | (org.jboss.netty.channel ChannelPipelineFactory ChannelPipeline) 12 | (org.jboss.netty.buffer ChannelBufferInputStream) 13 | (org.jboss.netty.handler.codec.oneone OneToOneDecoder) 14 | (org.jboss.netty.handler.codec.protobuf ProtobufDecoder 15 | ProtobufEncoder) 16 | (org.jboss.netty.handler.execution ExecutionHandler 17 | OrderedMemoryAwareThreadPoolExecutor))) 18 | 19 | (defprotocol Transport 20 | "A riemann transport is a way of emitting and receiving events 21 | over the wire." 22 | (setup [this opts] 23 | "Setup step for transports. In order to handle server life-cycle 24 | correctly, can be called several times.") 25 | (capabilities [this] 26 | "Return a collection of keywords representing what the transport 27 | can handle, possible values are: :queries and :events") 28 | (start [this] 29 | "Start listening for events and ") 30 | (stop [this] 31 | "Gracefully stop the server")) 32 | 33 | (defn channel-pipeline-factory 34 | "Return a factory for ChannelPipelines given a wire protocol-specific 35 | pipeline factory and a network protocol-specific handler." 36 | [pipeline-factory handler] 37 | (reify ChannelPipelineFactory 38 | (getPipeline [this] 39 | (doto ^ChannelPipeline (pipeline-factory) 40 | (.addLast "executor" (ExecutionHandler. 41 | (OrderedMemoryAwareThreadPoolExecutor. 42 | 16 1048576 1048576))) ; Maaagic values! 43 | (.addLast "handler" handler))))) 44 | 45 | (defn protobuf-decoder 46 | "Decodes protobufs to Msg objects" 47 | [] 48 | (ProtobufDecoder. (Proto$Msg/getDefaultInstance))) 49 | 50 | (defn protobuf-encoder 51 | "Encodes protobufs to Msg objects" 52 | [] 53 | (ProtobufEncoder.)) 54 | 55 | (defn msg-decoder 56 | "Netty decoder for Msg protobuf objects -> maps" 57 | [] 58 | (proxy [OneToOneDecoder] [] 59 | (decode [context channel message] 60 | (decode-msg message)))) 61 | 62 | (defn handle 63 | "Handles a msg with the given core." 64 | [core msg] 65 | (try+ 66 | ;; Send each event/state to each stream 67 | (doseq [event (concat (:events msg) (:states msg)) 68 | stream (:streams core)] 69 | (stream event)) 70 | 71 | (if (:query msg) 72 | ;; Handle query 73 | (let [ast (query/ast (:string (:query msg)))] 74 | (if-let [i (:index core)] 75 | {:ok true :events (search i ast)} 76 | {:ok false :error "no index"})) 77 | 78 | {:ok true}) 79 | 80 | ;; Some kind of error happened 81 | (catch [:type :riemann.query/parse-error] {:keys [message]} 82 | {:ok false :error (str "parse error: " message)}) 83 | (catch Exception ^Exception e 84 | {:ok false :error (.getMessage e)}))) 85 | -------------------------------------------------------------------------------- /src/riemann/logging.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.logging 2 | "Configures log4j to log to a file. It's a trap!" 3 | ; With thanks to arohner 4 | (:import (org.apache.log4j 5 | Logger 6 | BasicConfigurator 7 | EnhancedPatternLayout 8 | Level 9 | ConsoleAppender 10 | FileAppender 11 | SimpleLayout) 12 | (org.apache.log4j.spi RootLogger)) 13 | (:import (org.apache.log4j.rolling TimeBasedRollingPolicy 14 | RollingFileAppender)) 15 | (:import org.apache.commons.logging.LogFactory) 16 | (:require wall.hack)) 17 | 18 | (defn set-level 19 | "Set the level for the given logger, by string name. Use: 20 | (set-level \"riemann.client\", Level/DEBUG)" 21 | ([level] 22 | (. (Logger/getRootLogger) (setLevel level))) 23 | ([logger level] 24 | (. (Logger/getLogger logger) (setLevel level)))) 25 | 26 | (defmacro suppress 27 | "Turns off logging for the evaluation of body." 28 | [loggers & body] 29 | (let [[logger & more] (flatten [loggers])] 30 | (if logger 31 | `(let [old-level# (.getLevel (Logger/getLogger ~logger))] 32 | (try 33 | (set-level ~logger Level/FATAL) 34 | (suppress ~more ~@body) 35 | (finally 36 | (set-level ~logger old-level#)))) 37 | `(do ~@body)))) 38 | 39 | (def riemann-layout 40 | "A nice format for log lines." 41 | (EnhancedPatternLayout. "%p [%d] %t - %c - %m%n%throwable%n")) 42 | 43 | (defn init 44 | "Initialize log4j. You will probably call this from the config file. Options: 45 | 46 | :file The file to log to. If omitted, logs to console only." 47 | [& { :keys [file] }] 48 | ; Reset loggers 49 | (doto (Logger/getRootLogger) 50 | (.removeAllAppenders) 51 | (.addAppender (ConsoleAppender. riemann-layout))) 52 | 53 | (when file 54 | (let [rolling-policy (doto (TimeBasedRollingPolicy.) 55 | (.setActiveFileName file) 56 | (.setFileNamePattern 57 | (str file ".%d{yyyy-MM-dd}.gz")) 58 | (.activateOptions)) 59 | log-appender (doto (RollingFileAppender.) 60 | (.setRollingPolicy rolling-policy) 61 | (.setLayout riemann-layout) 62 | (.activateOptions))] 63 | (.addAppender (Logger/getRootLogger) log-appender))) 64 | 65 | ; Set levels. 66 | (. (Logger/getRootLogger) (setLevel Level/INFO)) 67 | 68 | (set-level "riemann.client" Level/DEBUG) 69 | (set-level "riemann.server" Level/DEBUG) 70 | (set-level "riemann.streams" Level/DEBUG) 71 | (set-level "riemann.graphite" Level/DEBUG)) 72 | 73 | ; Not sure where he intended this to go.... 74 | (defn- add-file-appender [loggername filename] 75 | (.addAppender (Logger/getLogger loggername) 76 | (doto (FileAppender.) 77 | (.setLayout riemann-layout)))) 78 | 79 | (defn nice-syntax-error 80 | "Rewrites clojure.lang.LispReader$ReaderException to have error messages that 81 | might actually help someone." 82 | ([e] (nice-syntax-error e "(no file)")) 83 | ([e file] 84 | ; Lord help me. 85 | (let [line (wall.hack/field (class e) :line e) 86 | msg (.getMessage (or (.getCause e) e))] 87 | (RuntimeException. (str "Syntax error (" file ":" line ") " msg))))) 88 | -------------------------------------------------------------------------------- /test/riemann/test/time.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.time 2 | (:use riemann.time 3 | [riemann.common :exclude [unix-time linear-time]] 4 | clojure.math.numeric-tower 5 | clojure.test 6 | clojure.tools.logging) 7 | (:require [riemann.logging :as logging])) 8 | 9 | (riemann.logging/init) 10 | 11 | (defn reset-time! 12 | [f] 13 | (stop!) 14 | (reset-tasks!) 15 | (start!) 16 | (f) 17 | (stop!) 18 | (reset-tasks!)) 19 | (use-fixtures :each reset-time!) 20 | 21 | (deftest ^:time clock-test 22 | (is (approx-equal (/ (System/currentTimeMillis) 1000) 23 | (unix-time)))) 24 | 25 | (deftest ^:time once-test 26 | "Run a function once, to verify that the threadpool works at all." 27 | (let [t0 (unix-time) 28 | results (atom [])] 29 | (after! 0.1 #(swap! results conj (- (unix-time) t0))) 30 | (Thread/sleep 300) 31 | (is (<= 0.085 (first @results) 0.115)))) 32 | 33 | ; LMAO if this test becomes hilariously unstable and/or exhibits genuine 34 | ; heisenbugs for any unit of time smaller than 250ms. 35 | (deftest ^:time defer-cancel-test 36 | (let [x1 (atom 0) 37 | x2 (atom 0) 38 | t1 (every! 1 (fn [] (swap! x1 inc))) 39 | t2 (every! 1 1 #(swap! x2 inc))] 40 | (Thread/sleep 500) 41 | (is (= 1 @x1)) 42 | (is (= 0 @x2)) 43 | 44 | (Thread/sleep 1000) 45 | (is (= 2 @x1)) 46 | (is (= 1 @x2)) 47 | 48 | ; Defer 49 | (defer t1 1.5) 50 | (Thread/sleep 1000) 51 | (is (= 2 @x1)) 52 | (is (= 2 @x2)) 53 | 54 | (Thread/sleep 1000) 55 | (is (= 3 @x1)) 56 | (is (= 3 @x2)) 57 | 58 | ; Cancel 59 | (cancel t2) 60 | (Thread/sleep 1000) 61 | (is (= 4 @x1)) 62 | (is (= 3 @x2)))) 63 | 64 | (deftest ^:time exception-recovery-test 65 | (let [x (atom 0)] 66 | (every! 0.1 (fn [] (swap! x inc) (/ 1 0))) 67 | (Thread/sleep 150) 68 | (is (= 2 @x)))) 69 | 70 | (defn mapvals 71 | [f kv] 72 | (into {} (map (fn [[k v]] [k (f v)]) kv))) 73 | 74 | (defn pairs 75 | [coll] 76 | (partition 2 1 coll)) 77 | 78 | (defn differences 79 | [coll] 80 | (map (fn [[x y]] (- y x)) (pairs coll))) 81 | 82 | (deftest ^:time periodic-test 83 | "Run one function periodically." 84 | (let [results (atom [])] 85 | ; For a wide variety of intervals, start periodic jobs to record 86 | ; the time. 87 | (doseq [interval (range 1/10 5 1/10)] 88 | (every! interval #(swap! results conj [interval (unix-time)]))) 89 | 90 | (Thread/sleep 20000) 91 | (stop!) 92 | 93 | (let [groups (mapvals (fn [vs] (map second vs)) 94 | (group-by first @results)) 95 | differences (mapvals differences groups)] 96 | (doseq [[interval deltas] differences] 97 | ; First delta will be slightly smaller because the scheduler 98 | ; computed an absolute time in the *past* 99 | (is (<= -0.025 (- (first deltas) interval) 0)) 100 | 101 | (let [deltas (drop 1 deltas)] 102 | ; Remaining deltas should be accurate to within 5ms. 103 | (is (every? (fn [delta] 104 | (< -0.05 (- delta interval) 0.05)) deltas)) 105 | ; and moreover, there should be no cumulative drift. 106 | (is (< -0.005 107 | (- (/ (reduce + deltas) (count deltas)) interval) 108 | 0.005))))))) 109 | -------------------------------------------------------------------------------- /src/riemann/core.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.core 2 | "Binds together an index, servers, and streams." 3 | (:use [riemann.time :only [unix-time]] 4 | clojure.tools.logging) 5 | (:require riemann.streams 6 | [riemann.service :as service] 7 | [riemann.index :as index] 8 | [riemann.pubsub :as ps])) 9 | 10 | (defrecord Core 11 | [streams services index pubsub]) 12 | 13 | (defn reaper 14 | "Returns a service which expires states from its core's index every interval 15 | (default 10) seconds. Expired events are streamed to the core's streams. The 16 | streamed states have only the host and service copied, current time, and 17 | state expired. Expired events from the index are also published to the 18 | \"index\" pubsub channel." 19 | [interval] 20 | (let [interval (* 1000 (or interval 10))] 21 | (service/thread-service 22 | :reaper interval 23 | (fn worker [core] 24 | (Thread/sleep interval) 25 | (let [i (:index core) 26 | streams (:streams core)] 27 | (when i 28 | (doseq [state (index/expire i)] 29 | (let [e {:host (:host state) 30 | :service (:service state) 31 | :state "expired" 32 | :time (unix-time)}] 33 | (when-let [registry (:pubsub core)] 34 | (ps/publish registry "index" e)) 35 | (doseq [stream streams] 36 | (stream e)))))))))) 37 | 38 | (defn core 39 | "Create a new core." 40 | [] 41 | (Core. [] [] nil (ps/pubsub-registry))) 42 | 43 | (defn transition! 44 | "A core transition \"merges\" one core into another. Cores are immutable, 45 | but the stateful resources associated with them aren't. When you call 46 | (transition! old-core new-core), we: 47 | 48 | 1. Stop old core services without an equivalent in the new core. 49 | 50 | 2. Merge the new core's services with equivalents from the old core. 51 | 52 | 3. Reload all services with the merged core. 53 | 54 | 4. Start all services in the merged core. 55 | 56 | Finally, we return the merged core. old-core and new-core can be discarded." 57 | [old-core new-core] 58 | (let [merged-services (map (fn [svc] 59 | (or (first (filter #(service/equiv? % svc) 60 | (:services old-core))) 61 | svc)) 62 | (:services new-core)) 63 | merged (assoc new-core :services merged-services)] 64 | 65 | ; Stop old services 66 | (dorun (pmap service/stop! 67 | (remove (set merged-services) (:services old-core)))) 68 | 69 | 70 | ; Reload merged services 71 | (dorun (pmap #(service/reload! % merged) merged-services)) 72 | 73 | ; Start merged services 74 | (dorun (pmap service/start! merged-services)) 75 | merged)) 76 | 77 | (defn start! 78 | "Start the given core. Reloads and starts all services." 79 | [core] 80 | (dorun (pmap #(service/reload! % core) (:services core))) 81 | (dorun (pmap service/start! (:services core))) 82 | (info "Hyperspace core online")) 83 | 84 | (defn stop! 85 | "Stops the given core and all services." 86 | [core] 87 | (info "Core stopping") 88 | (dorun (pmap service/stop! (:services core))) 89 | (info "Hyperspace core shut down")) 90 | 91 | (defn update-index 92 | "Updates this core's index with an event. Also publishes to the index pubsub 93 | channel." 94 | [core event] 95 | (when (index/update (:index core) event) 96 | (when-let [registry (:pubsub core)] 97 | (ps/publish registry "index" event)))) 98 | 99 | (defn delete-from-index 100 | "Updates this core's index with an event." 101 | [core event] 102 | (index/delete (:index core) event)) 103 | -------------------------------------------------------------------------------- /src/riemann/query.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.query 2 | "The query parser. Parses strings into ASTs, and converts ASTs to functions 3 | which match events." 4 | (:use riemann.common) 5 | (:use [slingshot.slingshot :only [throw+ try+]]) 6 | (:import (org.antlr.runtime ANTLRStringStream 7 | CommonTokenStream) 8 | (riemann QueryLexer QueryParser))) 9 | 10 | ; With many thanks to Brian Carper 11 | ; http://briancarper.net/blog/554/antlr-via-clojure 12 | 13 | (defn parse-string 14 | "Parse string into ANTLR tree nodes" 15 | [s] 16 | (try 17 | (let [lexer (QueryLexer. (ANTLRStringStream. s)) 18 | tokens (CommonTokenStream. lexer) 19 | parser (QueryParser. tokens)] 20 | (.getTree (.expr parser))) 21 | (catch Throwable e 22 | (throw+ {:type ::parse-error 23 | :message (.getMessage (.getCause e))})))) 24 | 25 | (defn- make-regex 26 | "Convert a string like \"foo%\" into /^foo.*$/" 27 | [string] 28 | (let [tokens (re-seq #"%|[^%]+" string) 29 | pairs (map (fn [token] 30 | (case token 31 | "%" ".*" 32 | (java.util.regex.Pattern/quote token))) 33 | tokens)] 34 | (re-pattern (str "^" (apply str pairs) "$")))) 35 | 36 | (defn node-ast [node] 37 | "The AST for a given parse node" 38 | (let [n (.getText node) 39 | kids (remove (fn [x] (= x :useless)) 40 | (map node-ast (.getChildren node)))] 41 | (case n 42 | "or" (apply list 'or kids) 43 | "and" (apply list 'and kids) 44 | "not" (apply list 'not kids) 45 | "=" (apply list '= kids) 46 | ">" (list 'when (first kids) (apply list '> kids)) 47 | ">=" (list 'when (first kids) (apply list '>= kids)) 48 | "<" (list 'when (first kids) (apply list '< kids)) 49 | "<=" (list 'when (first kids) (apply list '<= kids)) 50 | "=~" (list 'when (first kids) (list 're-find (make-regex (last kids)) 51 | (first kids))) 52 | "!=" (list 'not (apply list '= kids)) 53 | "tagged" (list 'when 'tags (list 'member? (first kids) 'tags)) 54 | "(" :useless 55 | ")" :useless 56 | "nil" nil 57 | "null" nil 58 | "true" true 59 | "false" false 60 | "host" 'host 61 | "service" 'service 62 | "state" 'state 63 | "description" 'description 64 | "metric_f" 'metric_f 65 | "metric" 'metric 66 | "time" 'time 67 | "ttl" 'ttl 68 | (when n (let [term (read-string n)] 69 | (if (or (number? term) 70 | (string? term)) 71 | term 72 | (throw+ {:type ::parse-error 73 | :message (str "invalid term \"" n "\"")}))))))) 74 | 75 | (defn ast 76 | "The expression AST for a given string" 77 | [string] 78 | (node-ast (parse-string string))) 79 | 80 | (defn fun 81 | "Transforms an AST into a fn [event] which returns true if the query matches 82 | that event. Example: 83 | 84 | (def q (fun (ast \"metric > 2\"))) 85 | (q {:metric 1}) => false 86 | (q {:metric 3}) => true" 87 | [ast] 88 | (eval 89 | (list 'fn ['event] 90 | (list 'let '[host (:host event) 91 | service (:service event) 92 | state (:state event) 93 | description (:description event) 94 | metric_f (:metric_f event) 95 | metric (:metric event) 96 | time (:time event) 97 | tags (:tags event) 98 | ttl (:ttl event) 99 | member? riemann.common/member?] 100 | ast)))) 101 | -------------------------------------------------------------------------------- /src/riemann/graphite.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.graphite 2 | "Forwards events to Graphite." 3 | (:refer-clojure :exclude [replace]) 4 | (:import 5 | (java.net Socket) 6 | (java.io Writer 7 | OutputStreamWriter) 8 | (org.jboss.netty.channel Channels) 9 | (org.jboss.netty.handler.codec.frame DelimiterBasedFrameDecoder 10 | Delimiters) 11 | (org.jboss.netty.handler.codec.oneone OneToOneDecoder) 12 | (org.jboss.netty.handler.codec.string StringDecoder StringEncoder) 13 | (org.jboss.netty.util CharsetUtil)) 14 | (:use [clojure.string :only [split join replace]] 15 | clojure.tools.logging 16 | riemann.pool 17 | riemann.common)) 18 | 19 | 20 | (defn graphite-path-basic 21 | "Constructs a path for an event. Takes the hostname fqdn, reversed, 22 | followed by the service, with spaces converted to dots." 23 | [event] 24 | (let [service (:service event) 25 | host (:host event) 26 | split-service (if service (split service #" ") []) 27 | split-host (if host (split host #"\.") [])] 28 | (join "." (concat (reverse split-host) split-service)))) 29 | 30 | (defn graphite-path-percentiles 31 | "Like graphite-service-basic, but also converts trailing decimals like 0.95 32 | to 95." 33 | [event] 34 | (graphite-path-basic 35 | (if-let [service (:service event)] 36 | (assoc event :service 37 | (replace service 38 | #"(\d+\.\d+)$" 39 | (fn [[_ x]] (str (int (* 100 (read-string x))))))) 40 | event))) 41 | 42 | (defn graphite 43 | "Returns a function which accepts an event and sends it to Graphite. 44 | Silently drops events when graphite is down. Attempts to reconnect 45 | automatically every five seconds. Use: 46 | 47 | (graphite {:host \"graphite.local\" :port 2003}) 48 | 49 | Options: 50 | 51 | :path A function which, given an event, returns the string describing 52 | the path of that event in graphite. graphite-path-percentiles by 53 | default. 54 | 55 | :pool-size The number of connections to keep open. 56 | 57 | :reconnect-interval How many seconds to wait between attempts to connect. 58 | Default 5. 59 | 60 | :claim-timeout How many seconds to wait for a graphite connection from 61 | the pool. Default 0.1. 62 | 63 | :block-start Wait for the pool's initial connections to open 64 | before returning." 65 | [opts] 66 | (let [opts (merge {:host "127.0.0.1" 67 | :port 2003 68 | :path graphite-path-percentiles} opts) 69 | pool (fixed-pool 70 | (fn open [] 71 | (info "Connecting to " (select-keys opts [:host :port])) 72 | (let [sock (Socket. (:host opts) (:port opts)) 73 | out (OutputStreamWriter. (.getOutputStream sock))] 74 | (info "Connected") 75 | [sock out])) 76 | (fn close [[sock out]] 77 | (info "Closing connection to " 78 | (select-keys opts [:host :port])) 79 | (.close out) 80 | (.close sock)) 81 | {:size (:pool-size opts) 82 | :block-start (:block-start opts) 83 | :regenerate-interval (:reconnect-interval opts)}) 84 | path (:path opts)] 85 | 86 | (fn [event] 87 | (when (:metric event) 88 | (with-pool [[sock out] pool (:claim-timeout opts)] 89 | (let [string (str (join " " [(path event) 90 | (float (:metric event)) 91 | (int (:time event))]) 92 | "\n")] 93 | (.write ^OutputStreamWriter out string) 94 | (.flush ^OutputStreamWriter out))))))) 95 | 96 | 97 | -------------------------------------------------------------------------------- /src/riemann/librato.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.librato 2 | "Forwards events to Librato Metrics." 3 | (:require [clojure.string :as string]) 4 | (:use [clj-librato.metrics :only [collate annotate]] 5 | clojure.math.numeric-tower)) 6 | 7 | (defn safe-name 8 | "Converts a string into a safe name for Librato's metrics and streams. Converts spaces to periods, preserves only A-Za-z0-9.:-_, and cuts to 255 characters." 9 | [s] 10 | (when s 11 | (-> s 12 | (string/replace " " ".") 13 | (string/replace #"[^-.:_\w]" "") 14 | (subs 0 (min 255 (count s)))))) 15 | 16 | (defn event->gauge 17 | "Converts an event to a gauge." 18 | [event] 19 | {:name (safe-name (:service event)) 20 | :source (safe-name (:host event)) 21 | :value (:metric event) 22 | :measure-time (round (:time event))}) 23 | 24 | (def event->counter event->gauge) 25 | 26 | (defn event->annotation 27 | "Converts an event to an annotation." 28 | [event] 29 | (into {} 30 | (filter second 31 | {:name (safe-name (:service event)) 32 | :title (string/join 33 | " " [(:service event) (:state event)]) 34 | :source (safe-name (:host event)) 35 | :description (:description event) 36 | :start-time (round (:time event)) 37 | :end-time (when (:end-time event) (round (:end-time event)))} 38 | ))) 39 | 40 | (defn librato-metrics 41 | "Creates a librato metrics adapter. Takes your username and API key, and 42 | returns a map of streams: 43 | 44 | :gauge 45 | :counter 46 | :annotation 47 | :start-annotation 48 | :end-annotation 49 | 50 | Gauge and counter submit events as measurements. Annotation creates an 51 | annotation from the given event; it will have only a start time unless 52 | :end-time is given. :start-annotation will *start* an annotation; the 53 | annotation ID for that host and service will be remembered. :end-annotation 54 | will submit an end-time for the most recent annotation submitted with 55 | :start-annotation. 56 | 57 | Example: 58 | 59 | (def librato (librato-metrics \"aphyr@aphyr.com\" \"abcd01234...\")) 60 | 61 | (tagged \"latency\" (librato :gauge)) 62 | 63 | (where (service \"www\") 64 | (changed-state 65 | (where (state \"ok\") 66 | (:start-annotation librato) 67 | (else 68 | (:end-annotation librato)))))" 69 | [user api-key] 70 | (let [annotation-ids (atom {})] 71 | {:gauge (fn [event] 72 | (let [gauge (event->gauge event)] 73 | (collate user api-key [gauge] []) 74 | gauge)) 75 | 76 | :counter (fn [event] 77 | (let [counter (event->counter event)] 78 | (collate user api-key [] [counter]) 79 | counter)) 80 | 81 | :annotation (fn [event] 82 | (let [a (event->annotation event)] 83 | (annotate user api-key (:name a) 84 | (dissoc a :name)))) 85 | 86 | :start-annotation (fn [event] 87 | (let [a (event->annotation event) 88 | res (annotate user api-key (:name a) 89 | (dissoc a :name))] 90 | (swap! annotation-ids assoc 91 | [(:host event) (:service event)] (:id res)) 92 | res)) 93 | 94 | :end-annotation (fn [event] 95 | (let [id ((deref annotation-ids) 96 | [(:host event) (:service event)]) 97 | a (event->annotation event)] 98 | (when id 99 | (let [res (annotate 100 | user api-key (:name a) id 101 | {:end-time (round (:time event))})] 102 | (swap! annotation-ids dissoc 103 | [(:host event) (:service event)]) 104 | res))))})) 105 | -------------------------------------------------------------------------------- /tasks/leiningen/fatdeb.clj: -------------------------------------------------------------------------------- 1 | (ns leiningen.fatdeb 2 | (:use [clojure.java.shell :only [sh]] 3 | [clojure.java.io :only [file delete-file writer copy]] 4 | [clojure.string :only [join capitalize trim-newline]] 5 | [leiningen.uberjar :only [uberjar]])) 6 | 7 | (defn delete-file-recursively 8 | "Delete file f. If it's a directory, recursively delete all its contents. 9 | Raise an exception if any deletion fails unless silently is true." 10 | [f & [silently]] 11 | (System/gc) ; This sometimes helps release files for deletion on windows. 12 | (let [f (file f)] 13 | (if (.isDirectory f) 14 | (doseq [child (.listFiles f)] 15 | (delete-file-recursively child silently))) 16 | (delete-file f silently))) 17 | 18 | (defn deb-dir 19 | "Debian package working directory." 20 | [project] 21 | (file (:root project) "target/deb/riemann")) 22 | 23 | (defn cleanup 24 | [project] 25 | ; Delete working dir. 26 | (when (.exists (deb-dir project)) 27 | (delete-file-recursively (deb-dir project)))) 28 | 29 | (defn reset 30 | [project] 31 | (cleanup project) 32 | (sh "rm" (str (:root project) "/target/*.deb"))) 33 | 34 | (defn control 35 | "Control file" 36 | [project] 37 | (join "\n" 38 | (map (fn [[k v]] (str (capitalize (name k)) ": " v)) 39 | {:package (:name project) 40 | :version (:version project) 41 | :section "base" 42 | :priority "optional" 43 | :architecture "all" 44 | :depends "bash" 45 | :maintainer (:email (:maintainer project)) 46 | :description (:description project)}))) 47 | 48 | (defn write 49 | "Write string to file, plus newline" 50 | [file string] 51 | (with-open [w (writer file)] 52 | (.write w (str (trim-newline string) "\n")))) 53 | 54 | (defn make-deb-dir 55 | "Creates the debian package structure in a new directory." 56 | [project] 57 | (let [dir (deb-dir project)] 58 | (.mkdirs dir) 59 | 60 | ; Meta 61 | (.mkdirs (file dir "DEBIAN")) 62 | (write (file dir "DEBIAN" "control") (control project)) 63 | (write (file dir "DEBIAN" "conffiles") 64 | (join "\n" ["/etc/riemann/riemann.config"])) 65 | 66 | ; Postinst 67 | ; Fakeroot plays poorly with lein; have to change permissions after 68 | ; the fact. :( 69 | (write (file dir "DEBIAN" "postinst") 70 | "#!/bin/sh 71 | chown -R root:root /usr/lib/riemann 72 | chown root:root /usr/bin/riemann 73 | chown -R root:root /etc/riemann") 74 | (.setExecutable (file dir "DEBIAN" "postinst") true false) 75 | 76 | ; Jar 77 | (.mkdirs (file dir "usr" "lib" "riemann")) 78 | (copy (file (:root project) "target" 79 | (str "riemann-" (:version project) "-standalone.jar")) 80 | (file dir "usr" "lib" "riemann" "riemann.jar")) 81 | 82 | ; Binary 83 | (.mkdirs (file dir "usr" "bin")) 84 | (copy (file (:root project) "pkg" "deb" "riemann") 85 | (file dir "usr" "bin" "riemann")) 86 | (.setExecutable (file dir "usr" "bin" "riemann") true false) 87 | 88 | ; Config 89 | (.mkdirs (file dir "etc" "riemann")) 90 | (copy (file (:root project) "pkg" "riemann.config") 91 | (file dir "etc" "riemann" "riemann.config")) 92 | 93 | dir)) 94 | 95 | (defn dpkg 96 | "Convert given package directory to a .deb." 97 | [project deb-dir] 98 | (print (:err (sh "dpkg" "--build" 99 | (str deb-dir) 100 | (str (file (:root project) "target"))))) 101 | (let [deb-file (file (:root project) "target" (str (:name project) "_" 102 | (:version project) "_" 103 | "all" ".deb"))] 104 | (write (str deb-file ".md5") 105 | (:out (sh "md5sum" (str deb-file)))))) 106 | 107 | 108 | (defn fatdeb 109 | ([project] (fatdeb project true)) 110 | ([project uberjar?] 111 | (reset project) 112 | (when uberjar? (uberjar project)) 113 | (dpkg project (make-deb-dir project)) 114 | (cleanup project))) 115 | -------------------------------------------------------------------------------- /src/riemann/pool.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.pool 2 | "A generic thread-safe resource pool." 3 | (:use clojure.tools.logging) 4 | (:import [java.util.concurrent ArrayBlockingQueue TimeUnit])) 5 | 6 | ; THIS IS A MUTABLE STATE OF AFFAIRS. WHICH IS TO SAY, IT IS FUCKING TERRIBLE. 7 | 8 | (defprotocol Pool 9 | (grow [pool] 10 | "Adds an element to the pool.") 11 | (claim [pool] [pool timeout] 12 | "Take a thingy from the pool. Timeout in seconds; if unspecified, 0. 13 | Returns nil if no thingy available.") 14 | (release [pool thingy] 15 | "Returns a thingy to the pool.") 16 | (invalidate [pool thingy] 17 | "Tell the pool a thingy is no longer valid.")) 18 | 19 | (defrecord FixedQueuePool [queue open close regenerate-interval] 20 | Pool 21 | (grow [this] 22 | (loop [] 23 | (if-let [thingy (try (open) (catch Throwable t nil))] 24 | (.put queue thingy) 25 | (do 26 | (Thread/sleep (* 1000 regenerate-interval)) 27 | (recur))))) 28 | 29 | (claim [this] 30 | (claim this nil)) 31 | 32 | (claim [this timeout] 33 | (try 34 | (.poll queue (* 1000 (or timeout 0)) TimeUnit/MILLISECONDS) 35 | (catch java.lang.InterruptedException e 36 | nil))) 37 | 38 | (release [this thingy] 39 | (when thingy 40 | (.put queue thingy))) 41 | 42 | (invalidate [this thingy] 43 | (when thingy 44 | (try (close thingy) 45 | (catch Throwable t 46 | (warn "Closing" thingy "threw" t))) 47 | (future (grow this))))) 48 | 49 | (defn fixed-pool 50 | "A fixed pool of thingys. (open) is called to generate a thingy. (close 51 | thingy) is called when a thingy is invalidated. When thingys are invalidated, 52 | the pool will immediately try to open a new one; if open throws or returns 53 | nil, the pool will sleep for regenerate-interval seconds before retrying 54 | (open). 55 | 56 | :regenerate-interval How long to wait between retrying (open). 57 | :size Number of thingys in the pool. 58 | :block-start Should (fixed-pool) wait until the pool is full 59 | before returning? 60 | 61 | Note that fixed-pool is correct only if every successful (claim) is followed 62 | by exactly one of either (invalidate) or (release). If calls are unbalanced; 63 | e.g. resources are not released, doubly released, or released *and* 64 | invalidated, starvation or unbounded blocking could occur. (with-pool) 65 | provides this guarantee." 66 | ([open] 67 | (fixed-pool open {})) 68 | ([open opts] 69 | (fixed-pool open identity opts)) 70 | ([open close opts] 71 | (let [size (or (:size opts) (* 2 (.availableProcessors 72 | (Runtime/getRuntime)))) 73 | regenerate-interval (or (:regenerate-interval opts) 5) 74 | block-start (or (:block-start opts) true) 75 | pool (FixedQueuePool. 76 | (ArrayBlockingQueue. size true) 77 | open 78 | close 79 | regenerate-interval) 80 | openers (map (fn open-pool [_] (future (grow pool))) 81 | (range size))] 82 | (when block-start 83 | (doseq [worker openers] @worker)) 84 | pool))) 85 | 86 | (defmacro with-pool 87 | "Evaluates body in a try expression with a symbol 'thingy claimed from the 88 | given pool, with specified claim timeout. Releases thingy at the end of the 89 | body, or if an exception is thrown, invalidates them and rethrows. Example: 90 | 91 | ; With client, taken from connection-pool, waiting 5 seconds to claim, send 92 | ; client a message. 93 | (with-pool [client connection-pool 5] 94 | (send client a-message))" 95 | [[thingy pool timeout] & body] 96 | ; Destructuring bind could change nil to a, say, vector, and cause 97 | ; unbalanced claim/release. 98 | `(let [thingy# (claim ~pool ~timeout) 99 | ~thingy thingy#] 100 | (try 101 | (let [res# (do ~@body)] 102 | (release ~pool thingy#) 103 | res#) 104 | (catch Throwable t# 105 | (invalidate ~pool thingy#) 106 | (throw t#))))) 107 | -------------------------------------------------------------------------------- /src/riemann/transport/websockets.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.transport.websockets 2 | "Accepts messages from external sources. Associated with a core. Sends 3 | incoming events to the core's streams, queries the core's index for states." 4 | (:require [riemann.query :as query] 5 | [riemann.index :as index] 6 | [riemann.pubsub :as p]) 7 | (:use [riemann.common :only [event-to-json]] 8 | [riemann.service :only [Service]] 9 | [aleph.http :only [start-http-server]] 10 | [lamina.core :only [receive-all close enqueue]] 11 | [clojure.tools.logging :only [info warn]] 12 | [clj-http.util :only [url-decode]] 13 | [clojure.string :only [split]])) 14 | 15 | (defn http-query-map 16 | "Converts a URL query string into a map." 17 | [string] 18 | (apply hash-map 19 | (map url-decode 20 | (mapcat (fn [kv] (split kv #"=" 2)) 21 | (split string #"&"))))) 22 | 23 | (defn ws-pubsub-handler [core ch hs] 24 | (let [topic (url-decode (last (split (:uri hs) #"/" 3))) 25 | params (http-query-map (:query-string hs)) 26 | query (params "query") 27 | pred (query/fun (query/ast query)) 28 | sub (p/subscribe (:pubsub core) topic 29 | (fn [event] 30 | (when (pred event) 31 | (enqueue ch (event-to-json event)))))] 32 | (info "New websocket subscription to" topic ":" query) 33 | (receive-all ch (fn [msg] 34 | (when-not msg 35 | ; Shut down channel 36 | (info "Closing websocket " 37 | (:remote-addr hs) topic query) 38 | (close ch) 39 | (p/unsubscribe (:pubsub core) sub)))))) 40 | 41 | (defn ws-index-handler 42 | "Queries the index for events and streams them to the client. If subscribe is 43 | true, also initiates a pubsub subscription to the index topic with that 44 | query." 45 | [core ch hs] 46 | (let [params (http-query-map (:query-string hs)) 47 | query (params "query") 48 | ast (query/ast query)] 49 | (when-let [i (:index core)] 50 | (doseq [event (index/search i ast)] 51 | (enqueue ch (event-to-json event)))) 52 | (if (= (params "subscribe") "true") 53 | (ws-pubsub-handler core ch (assoc hs :uri "/pubsub/index")) 54 | (close ch)))) 55 | 56 | (defn ws-handler [core] 57 | (fn [ch handshake] 58 | (info "Websocket connection from" (:remote-addr handshake) 59 | (:uri handshake) 60 | (:query-string handshake)) 61 | (condp re-matches (:uri handshake) 62 | #"^/index/?$" (ws-index-handler @core ch handshake) 63 | #"^/pubsub/[^/]+/?$" (ws-pubsub-handler @core ch handshake) 64 | :else (do 65 | (info "Unknown URI " (:uri handshake) ", closing") 66 | (close ch))))) 67 | 68 | (defrecord WebsocketServer [host port core server] 69 | Service 70 | (equiv? [this other] 71 | (and (instance? WebsocketServer other) 72 | (= host (:host other)) 73 | (= port (:port other)))) 74 | 75 | (reload! [this new-core] 76 | (reset! core new-core)) 77 | 78 | (start! [this] 79 | (locking this 80 | (when-not @server 81 | (reset! server (start-http-server (ws-handler core) 82 | {:host host 83 | :port port 84 | :websocket true})) 85 | (info "Websockets server" host port "online")))) 86 | 87 | (stop! [this] 88 | (locking this 89 | (when @server 90 | (@server) 91 | (info "Websockets server" host port "shut down"))))) 92 | 93 | (defn ws-server 94 | "Starts a new websocket server for a core. Starts immediately. 95 | 96 | Options: 97 | :host The address to listen on (default 127.0.0.1) 98 | :post The port to listen on (default 5556)" 99 | ([] (ws-server {})) 100 | ([opts] 101 | (WebsocketServer. 102 | (get opts :host "127.0.0.1") 103 | (get opts :port 5556) 104 | (atom nil) 105 | (atom nil)))) 106 | -------------------------------------------------------------------------------- /src/riemann/common.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.common 2 | "Utility functions. Time/date, some flow control constructs, protocol buffer 3 | definitions and codecs, some vector set ops, etc." 4 | (:import [java.util Date] 5 | [com.aphyr.riemann Proto$Query Proto$Event Proto$Msg]) 6 | (:require gloss.io 7 | clj-time.core 8 | clj-time.format 9 | clj-time.coerce 10 | clojure.set 11 | [cheshire.core :as json] 12 | [clojure.java.io :as io]) 13 | (:use [clojure.string :only [split]] 14 | [riemann.time :only [unix-time]] 15 | riemann.codec 16 | gloss.core 17 | clojure.math.numeric-tower)) 18 | 19 | (defprotocol Match 20 | (match [pred object] 21 | "Does predicate describe object?")) 22 | 23 | ; Times 24 | (defn time-at 25 | "Returns the Date of a unix epoch time." 26 | [unix-time] 27 | (java.util.Date. (long (* 1000 unix-time)))) 28 | 29 | (defn unix-to-iso8601 30 | "Transforms unix time to iso8601 string" 31 | [unix] 32 | (clj-time.format/unparse (clj-time.format/formatters :date-time) 33 | (clj-time.coerce/from-long (long (* 1000 unix))))) 34 | 35 | (defn post-load-event 36 | "After events are loaded, we assign default times if none exist." 37 | [e] 38 | (if (:time e) e (assoc e :time (unix-time)))) 39 | 40 | (defn decode-msg 41 | "Decode a protobuf to a message. Decodes the protocol buffer 42 | representation of Msg and applies post-load-event to all events." 43 | [msg] 44 | (let [msg (decode-pb-msg msg)] 45 | (-> msg 46 | (assoc :states (map post-load-event (:states msg))) 47 | (assoc :events (map post-load-event (:events msg)))))) 48 | 49 | (defn decode-inputstream 50 | "Decode an InputStream to a message. Decodes the protobuf representation of 51 | Msg and applies post-load-event to all events." 52 | [s] 53 | (let [msg (decode-pb-msg (Proto$Msg/parseFrom s))] 54 | (-> msg 55 | (assoc :states (map post-load-event (:states msg))) 56 | (assoc :events (map post-load-event (:events msg)))))) 57 | 58 | (defn ^"[B" encode 59 | "Builds and dumps a protobuf message as bytes from a hash." 60 | [msg] 61 | (.toByteArray (encode-pb-msg msg))) 62 | 63 | (defn event-to-json 64 | "Convert an event to a JSON string." 65 | [event] 66 | (json/generate-string 67 | (assoc event :time (unix-to-iso8601 (:time event))))) 68 | 69 | (defn event 70 | "Create a new event from a map." 71 | [opts] 72 | (let [t (long (round (or (opts :time) 73 | (unix-time))))] 74 | (map->Event (merge opts {:time t})))) 75 | 76 | (defn approx-equal 77 | "Returns true if x and y are roughly equal, such that x/y is within tol of 78 | unity." 79 | ([x,y] 80 | (approx-equal x y 0.01)) 81 | ([x, y, tol] 82 | (if (= x y) true 83 | (let [f (try (/ x y) (catch java.lang.ArithmeticException e (/ y x)))] 84 | (< (- 1 tol) f (inc tol)))))) 85 | 86 | (defn re-matches? 87 | "Does the given regex match string? Nil if string is nil." 88 | [re string] 89 | (when string 90 | (re-find re string))) 91 | 92 | ; Matching 93 | (extend-protocol Match 94 | ; Regexes are matched against strings. 95 | java.util.regex.Pattern 96 | (match [re string] 97 | (try (re-find re string) 98 | (catch NullPointerException _ false) 99 | (catch ClassCastException _ false))) 100 | 101 | ; Functions are called with the given object. 102 | java.util.concurrent.Callable 103 | (match [f obj] 104 | (f obj)) 105 | 106 | ; Falls back to object equality 107 | java.lang.Object 108 | (match [pred object] 109 | (= pred object))) 110 | 111 | ; Vector set operations 112 | (defn member? 113 | "Is r present in seqable s?" 114 | [r s] 115 | (some (fn [e] (= r e)) s)) 116 | 117 | (defn subset? 118 | "Are all elements of required present in seqable s?" 119 | [required s] 120 | (clojure.set/subset? (set required) (set s))) 121 | 122 | (defn overlap? 123 | "Do a and b (any seqables) have any elements in common?" 124 | [a b] 125 | (some (fn [e] 126 | (some (fn [r] (= e r)) a)) b)) 127 | 128 | (defn disjoint? 129 | "Do a and b (any seqables) have no elements in common?" 130 | [a b] 131 | (not-any? (fn [e] 132 | (some (fn [r] (= e r)) a)) 133 | b)) 134 | -------------------------------------------------------------------------------- /test/riemann/test/librato.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.librato 2 | (:use riemann.librato 3 | [riemann.time :only [unix-time]] 4 | clj-librato.metrics 5 | clojure.math.numeric-tower 6 | clojure.test) 7 | (:require [riemann.logging :as logging])) 8 | 9 | (def user (System/getenv "LIBRATO_METRICS_USER")) 10 | (def api-key (System/getenv "LIBRATO_METRICS_API_KEY")) 11 | 12 | (when-not user 13 | (println "export LIBRATO_METRICS_USER=\"...\" to run these tests.")) 14 | (when-not api-key 15 | (println "export LIBRATO_METRICS_API_KEY=\"...\" to run these tests.")) 16 | 17 | (logging/init) 18 | 19 | (defn get-metric 20 | "Get a metric for a gauge or counter." 21 | [gauge] 22 | (-> (metric user api-key (:name gauge) 23 | {:end-time (:measure-time gauge) 24 | :count 1 25 | :resolution 1}) 26 | :measurements 27 | (get (or (:source gauge) "unassigned")) 28 | (first))) 29 | 30 | (deftest ^:librato ^:integration librato-metrics-test 31 | (let [l (librato-metrics user api-key)] 32 | (testing "gauge with source" 33 | (let [e {:host "a" :service "b" :metric (rand) 34 | :time (unix-time)} 35 | r ((:gauge l) e) 36 | m (get-metric (event->gauge e))] 37 | (is m) 38 | (is (= (:metric e) (:value m))) 39 | (is (= (round (:time e)) (:measure-time m))))) 40 | 41 | (testing "gauge without source" 42 | (let [e {:service "sourceless" :metric (rand) 43 | :time (unix-time)} 44 | r ((:gauge l) e) 45 | m (get-metric (event->gauge e))] 46 | (is m) 47 | (is (= (:metric e) (:value m))) 48 | (is (= (round (:time e)) (:measure-time m))))) 49 | 50 | (testing "annotation" 51 | (let [e {:service "ann test" 52 | :state "down" 53 | :host "testing1.tx" 54 | :description (str "test " (rand)) 55 | :time (unix-time)} 56 | r ((:annotation l) e) 57 | a (annotation user api-key "ann.test" (:id r))] 58 | (is a) 59 | (is (= "ann test down" (:title a))) 60 | (is (= "testing1.tx" (:source a))) 61 | (is (= (round (:time e)) (:start-time a))) 62 | (is (nil? (:end-time a))) 63 | (is (= (:description e) (:description a))))) 64 | 65 | (testing "annotation without source" 66 | (let [e {:service "ann" 67 | :state "down" 68 | :description (str "test " (rand)) 69 | :time (unix-time)} 70 | r ((:annotation l) e) 71 | a (annotation user api-key "ann" (:id r))] 72 | (is a) 73 | (is (= "ann down" (:title a))) 74 | (is (= "unassigned" (:source a))) 75 | (is (= (round (:time e)) (:start-time a))) 76 | (is (nil? (:end-time a))) 77 | (is (= (:description e) (:description a))))) 78 | 79 | (testing "annotation start" 80 | (let [e {:service "ann" 81 | :host "flaky" 82 | :state "outage" 83 | :description "something bad happened" 84 | :time (round (unix-time))} 85 | r1 ((:start-annotation l) e) 86 | r2 ((:end-annotation l) 87 | {:service "ann" 88 | :host "flaky" 89 | :state "ok" 90 | :description "all fine" 91 | :time (+ 5 (:time e))}) 92 | a (annotation user api-key "ann" (:id r1))] 93 | (is a) 94 | (is (= "ann outage") (:title a)) 95 | (is (= "flaky") (:source a)) 96 | (is (= "something bad happened" (:description a))) 97 | (is (= (:time e) (:start-time a))) 98 | (is (= (+ 5 (:time e)) (:end-time a))))))) 99 | -------------------------------------------------------------------------------- /test/riemann/test/bench.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.bench 2 | (:use riemann.core 3 | riemann.common 4 | riemann.logging 5 | riemann.transport.udp 6 | riemann.transport.tcp 7 | clojure.test 8 | clojure.java.shell 9 | [clojure.string :only [trim-newline]] 10 | [incanter core charts] 11 | [riemann.client :only [tcp-client udp-client close-client send-event]]) 12 | (:require riemann.streams)) 13 | 14 | (defn git-version 15 | "Returns a human-readable version name for this commit." 16 | [] 17 | (if (re-matches #"^\n*$" (:out (sh "git" "status" "-s"))) 18 | ; Unchanged commit. 19 | (str 20 | (trim-newline (:out (sh "git" "show" "-s" "--format=%ci" "HEAD"))) 21 | " " 22 | (trim-newline (:out (sh "git" "rev-parse" "HEAD" :out "UTF-8")))) 23 | 24 | ; Changed commit. 25 | "HEAD")) 26 | (def git-version-memo (memoize git-version)) 27 | 28 | (defn now 29 | "Current high-res time, in ms" 30 | [] 31 | (/ (. System nanoTime) 1000000.0)) 32 | 33 | (defmacro time* 34 | "Evaluates expr and returns the time it took in ms" 35 | [expr] 36 | `(let [start# (. System (nanoTime)) 37 | ret# ~expr] 38 | (/ (- (. System (nanoTime)) start#) 1000000.0))) 39 | 40 | (defn record 41 | "Returns [times, latencies] of calling f" 42 | ([f opts] 43 | (let [n (or (:n opts) 100) 44 | t0 (now)] 45 | (loop [i n 46 | times [] 47 | latencies []] 48 | (if (zero? i) 49 | [times latencies] 50 | (recur (dec i) 51 | (conj times (- (now) t0)) 52 | (conj latencies (time* (f))))))))) 53 | 54 | (defn throughput 55 | "Returns [times, throughputs] of tape" 56 | ([tape opts] 57 | (let [[times latencies] tape 58 | samples (min (dec (count times)) (max 1 (or (:samples opts) 1000))) 59 | sample-size (/ (dec (count times)) samples) 60 | selected-times (take-nth sample-size times) 61 | throughputs (map (fn [[t1 t2]] 62 | (/ sample-size (- t2 t1))) 63 | (partition 2 1 selected-times))] 64 | [(drop-last selected-times) throughputs]))) 65 | 66 | (defn latencies 67 | [tape opts] 68 | tape) 69 | 70 | (defn save-graph [graph opts] 71 | (let [file (str "bench/" (git-version-memo) "/" (:title opts) ".png")] 72 | (sh "mkdir" "-p" (str "bench/" (git-version-memo))) 73 | (save graph file :width 1024) 74 | (println "Wrote" file))) 75 | 76 | (defn latency-graph 77 | "Graphs latencies, with options." 78 | ([tape] (latency-graph tape {})) 79 | ([tape opts] 80 | (let [title (str (:title opts) " latency") 81 | [times latencies] (latencies tape opts)] 82 | (doto (scatter-plot (map #(/ % 1000) times) 83 | latencies 84 | :title title 85 | :x-label "Time (s)" 86 | :y-label "Latency (ms)") 87 | (set-stroke :width 1) ; huh 88 | (save-graph {:title title}))))) 89 | 90 | (defn throughput-graph 91 | "Graphs throughput of tape, with options." 92 | ([tape] (throughput-graph tape {})) 93 | ([tape opts] 94 | (let [title (str (:title opts) " throughput") 95 | [times throughput] (throughput tape opts)] 96 | (doto (scatter-plot (map #(/ % 1000) times) 97 | (map (partial * 1000) throughput) 98 | :title title 99 | :x-label "Time (s)" 100 | :y-label "Reqs/s") 101 | (save-graph {:title title}))))) 102 | 103 | (defn multigraph [f opts] 104 | (let [tape (record f opts)] 105 | (latency-graph tape opts) 106 | (throughput-graph tape opts))) 107 | 108 | (defn core-package 109 | ([] (core-package [{}])) 110 | ([opts] 111 | (let [servers [(tcp-server) (udp-server)] 112 | streams (or (:streams opts) []) 113 | core (suppress "riemann.core" 114 | (transition! (core) {:streams streams 115 | :services servers}))] 116 | {:core core 117 | :servers servers 118 | :streams streams}))) 119 | 120 | (deftest ^:bench drop-tcp-events 121 | (let [{:keys [core]} (core-package) 122 | client (tcp-client)] 123 | (try 124 | (multigraph 125 | #(send-event client {:service "test" :metric 0.1}) 126 | {:title "drop tcp events" 127 | :n 100000}) 128 | (finally 129 | (stop! core))))) 130 | 131 | (deftest ^:bench drop-udp-events 132 | (let [{:keys [core]} (core-package) 133 | client (udp-client)] 134 | (try 135 | (multigraph 136 | #(send-event client {:service "test" :metric 0.1} false) 137 | {:title "drop udp events" 138 | :n 100000}) 139 | (finally 140 | (stop! core))))) 141 | -------------------------------------------------------------------------------- /src/riemann/config.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.config 2 | "Riemann config files are eval'd in the context of this namespace. Includes 3 | streams, client, email, logging, and graphite; the common functions used in 4 | config. Provides a default core and functions ((tcp|udp)-server, streams, 5 | index) which modify that core." 6 | (:require [riemann.core :as core] 7 | [riemann.transport.tcp :as tcp] 8 | [riemann.transport.udp :as udp] 9 | [riemann.transport.websockets :as websockets] 10 | [riemann.transport.graphite :as graphite] 11 | [riemann.repl] 12 | [riemann.index] 13 | [riemann.logging :as logging] 14 | [riemann.folds :as folds] 15 | [riemann.pubsub :as pubsub] 16 | [riemann.graphite :as graphite-client] 17 | [clojure.tools.nrepl.server :as repl]) 18 | (:use clojure.tools.logging 19 | riemann.client 20 | riemann.email 21 | [riemann.pagerduty :only [pagerduty]] 22 | [riemann.librato :only [librato-metrics]] 23 | [riemann.streams :exclude [update-index delete-from-index]]) 24 | (:gen-class)) 25 | 26 | (def core "The currently running core." 27 | (atom (core/core))) 28 | (def next-core "The core which will replace the current core." 29 | (atom (core/core))) 30 | 31 | (def graphite #'graphite-client/graphite) 32 | 33 | (defn repl-server 34 | "Starts a new REPL server with opts." 35 | [& opts] 36 | (riemann.repl/start-server (apply hash-map opts))) 37 | 38 | (defn add-service! 39 | "Adds a service to the next core." 40 | [service] 41 | (locking core 42 | (swap! next-core assoc :services 43 | (conj (:services @next-core) service)))) 44 | 45 | (defn tcp-server 46 | "Add a new TCP server with opts to the default core." 47 | [& opts] 48 | (add-service! (tcp/tcp-server (apply hash-map opts)))) 49 | 50 | (defn graphite-server 51 | "Add a new Graphite TCP server with opts to the default core." 52 | [& opts] 53 | (add-service! (graphite/graphite-server (apply hash-map opts)))) 54 | 55 | (defn udp-server 56 | "Add a new UDP server with opts to the default core." 57 | [& opts] 58 | (add-service! (udp/udp-server (apply hash-map opts)))) 59 | 60 | (defn ws-server 61 | "Add a new websockets server with opts to the default core." 62 | [& opts] 63 | (add-service! (websockets/ws-server (apply hash-map opts)))) 64 | 65 | (defn streams 66 | "Add any number of streams to the default core." 67 | [& things] 68 | (locking core 69 | (swap! next-core assoc :streams 70 | (concat (:streams @next-core) things)))) 71 | 72 | (defn index 73 | "Set the index used by this core." 74 | [& opts] 75 | (let [index (apply riemann.index/index opts)] 76 | (locking core 77 | (swap! next-core assoc :index index)) 78 | index)) 79 | 80 | (defn update-index 81 | "Updates the given index with all events received. Also publishes to the 82 | index pubsub channel." 83 | [index] 84 | (fn [event] (core/update-index @core event))) 85 | 86 | (defn delete-from-index 87 | "Deletes any events that pass through from the index" 88 | [index] 89 | (fn [event] (core/delete-from-index @core event))) 90 | 91 | (defn periodically-expire 92 | "Sets up a reaper for this core. See core API docs." 93 | ([] 94 | (periodically-expire 10)) 95 | ([interval] 96 | (add-service! (core/reaper interval)))) 97 | 98 | (defn publish 99 | "Returns a stream which publishes events to the given channel. Uses this 100 | core's pubsub registry." 101 | [channel] 102 | (fn [event] 103 | (pubsub/publish (:pubsub @core) channel event))) 104 | 105 | (defn subscribe 106 | "Subscribes to the given channel with f, which will receive events. Uses this 107 | core's pubsub registry." 108 | [channel f] 109 | (pubsub/subscribe (:pubsub @next-core) channel f)) 110 | 111 | (defn clear! 112 | "Resets the next core." 113 | [] 114 | (locking core 115 | (reset! next-core (core/core)))) 116 | 117 | (defn apply! 118 | "Applies pending changes to the core. Transitions the current core to the 119 | next one, and resets the next core." 120 | [] 121 | (locking core 122 | (swap! core core/transition! @next-core) 123 | (clear!))) 124 | 125 | (defn start! 126 | "Start the current core." 127 | [] 128 | (core/start! @core)) 129 | 130 | (defn stop! 131 | "Stop the current core." 132 | [] 133 | (core/stop! @core)) 134 | 135 | (defn read-strings 136 | "Returns a sequence of forms read from string." 137 | ([string] 138 | (read-strings [] 139 | (-> string (java.io.StringReader.) 140 | (clojure.lang.LineNumberingPushbackReader.)))) 141 | ([forms reader] 142 | (let [form (clojure.lang.LispReader/read reader false ::EOF false)] 143 | (if (= ::EOF form) 144 | forms 145 | (recur (conj forms form) reader))))) 146 | 147 | (defn validate-config 148 | "Check that a config file has valid syntax." 149 | [file] 150 | (try 151 | (read-strings (slurp file)) 152 | (catch clojure.lang.LispReader$ReaderException e 153 | (throw (logging/nice-syntax-error e file))))) 154 | 155 | (defn include 156 | "Include another config file. 157 | 158 | (include \"foo.clj\")" 159 | [file] 160 | (binding [*ns* (find-ns 'riemann.config)] 161 | (validate-config file) 162 | (load-string (slurp file)))) 163 | -------------------------------------------------------------------------------- /src/riemann/transport/udp.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.transport.udp 2 | "Accepts messages from external sources. Associated with a core. Sends 3 | incoming events to the core's streams, queries the core's index for states." 4 | (:import [java.net InetSocketAddress] 5 | [java.util.concurrent Executors] 6 | [org.jboss.netty.bootstrap ConnectionlessBootstrap] 7 | [org.jboss.netty.channel ChannelStateEvent 8 | Channels 9 | ExceptionEvent 10 | FixedReceiveBufferSizePredictorFactory 11 | MessageEvent 12 | SimpleChannelUpstreamHandler] 13 | [org.jboss.netty.channel.group ChannelGroup DefaultChannelGroup] 14 | [org.jboss.netty.channel.socket.nio NioDatagramChannelFactory]) 15 | (:use [clojure.tools.logging :only [warn info]] 16 | [clojure.string :only [split]] 17 | [riemann.service :only [Service]] 18 | [riemann.transport :only [handle 19 | protobuf-decoder 20 | protobuf-encoder 21 | msg-decoder 22 | channel-pipeline-factory]])) 23 | 24 | (defn udp-handler 25 | "Returns a UDP handler for the given atom to a core." 26 | [core ^ChannelGroup channel-group] 27 | (proxy [SimpleChannelUpstreamHandler] [] 28 | (channelOpen [context ^ChannelStateEvent state-event] 29 | (.add channel-group (.getChannel state-event))) 30 | 31 | (messageReceived [context ^MessageEvent message-event] 32 | (handle @core (.getMessage message-event))) 33 | 34 | (exceptionCaught [context ^ExceptionEvent exception-event] 35 | (warn (.getCause exception-event) "UDP handler caught")))) 36 | 37 | (defrecord UDPServer [host port max-size pipeline-factory core killer] 38 | ; core is an atom to a core 39 | ; killer is an atom to a function that shuts down the server 40 | 41 | Service 42 | ; TODO compare pipeline-factory! 43 | (equiv? [this other] 44 | (and (instance? UDPServer other) 45 | (= host (:host other)) 46 | (= port (:port other)))) 47 | 48 | (reload! [this new-core] 49 | (reset! core new-core)) 50 | 51 | (start! [this] 52 | (locking this 53 | (when-not @killer 54 | (let [bootstrap (ConnectionlessBootstrap. 55 | (NioDatagramChannelFactory. 56 | (Executors/newCachedThreadPool))) 57 | all-channels (DefaultChannelGroup. 58 | (str "udp-server " host port max-size)) 59 | cpf (channel-pipeline-factory 60 | pipeline-factory (udp-handler core all-channels))] 61 | 62 | ; Configure bootstrap 63 | (doto bootstrap 64 | (.setPipelineFactory cpf) 65 | (.setOption "broadcast" "false") 66 | (.setOption "receiveBufferSizePredictorFactory" 67 | (FixedReceiveBufferSizePredictorFactory. max-size))) 68 | 69 | ; Start bootstrap 70 | (let [server-channel (.bind bootstrap 71 | (InetSocketAddress. host port))] 72 | (.add all-channels server-channel)) 73 | (info "UDP server" host port max-size "online") 74 | 75 | ; fn to close server 76 | (reset! killer 77 | (fn [] 78 | (-> all-channels .close .awaitUninterruptibly) 79 | (.releaseExternalResources bootstrap) 80 | (info "UDP server" host port max-size "shut down") 81 | )))))) 82 | 83 | (stop! [this] 84 | (locking this 85 | (when @killer 86 | (@killer) 87 | (reset! killer nil))))) 88 | 89 | 90 | (defn udp-server 91 | "Starts a new UDP server. Doesn't start until (service/start!). 92 | 93 | IMPORTANT: The UDP server has a maximum datagram size--by default, 16384 94 | bytes. If your client does not agree on the maximum datagram size (and send 95 | big messages over TCP instead), it can send large messages which will be 96 | dropped with protobuf parse errors in the log. 97 | 98 | Options: 99 | :host The address to listen on (default 127.0.0.1). 100 | :port The port to listen on (default 5555). 101 | :max-size The maximum datagram size (default 16384 bytes). 102 | :pipeline-factory" 103 | ([] (udp-server {})) 104 | ([opts] 105 | (let [pipeline-factory #(doto (Channels/pipeline) 106 | (.addLast "protobuf-encoder" 107 | (protobuf-encoder)) 108 | (.addLast "protobuf-decoder" 109 | (protobuf-decoder)) 110 | (.addLast "msg-decoder" 111 | (msg-decoder)))] 112 | 113 | (UDPServer. 114 | (get opts :host "127.0.0.1") 115 | (get opts :port 5555) 116 | (get opts :max-size 16384) 117 | (get opts :pipeline-factory pipeline-factory) 118 | (atom nil) 119 | (atom nil))))) 120 | -------------------------------------------------------------------------------- /src/riemann/time.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.time 2 | "Clocks and scheduled tasks. Provides functions for getting the current time 3 | and running functions (Tasks) at specific times and periods. Includes a 4 | threadpool for task execution, controlled by (start!) and (stop!)." 5 | (:import [java.util.concurrent ConcurrentSkipListSet] 6 | [java.util.concurrent.locks LockSupport]) 7 | (:use [clojure.math.numeric-tower :only [ceil]] 8 | [clojure.stacktrace :only [print-stack-trace]] 9 | [clojure.tools.logging :only [warn]])) 10 | 11 | (defprotocol Task 12 | (succ [task] 13 | "The successive task to this one.") 14 | (run [task] 15 | "Executes this task.") 16 | (cancel [task] 17 | "Cancel this task.")) 18 | 19 | (defprotocol Deferrable 20 | (defer [this new-time] 21 | "Schedule a task for a new time.")) 22 | 23 | (defn unix-time-real 24 | "The current unix epoch time in seconds, taken from 25 | System/currentTimeMillis." 26 | [] 27 | (/ (System/currentTimeMillis) 1000)) 28 | 29 | (defn linear-time-real 30 | "A current time on a linear scale with no fixed epoch; counts in seconds. 31 | Unlike unix-time, which can pause, skip, or flow backwards, advances 32 | consistently at (close) to wall clock time." 33 | [] 34 | (/ (System/nanoTime) 1000000000)) 35 | 36 | (def unix-time unix-time-real) 37 | (def linear-time linear-time-real) 38 | 39 | (defrecord Once [id f t cancelled] 40 | Task 41 | (succ [this] nil) 42 | (run [this] (when-not @cancelled (f))) 43 | (cancel [this] 44 | (reset! cancelled true))) 45 | 46 | (defrecord Every [id f t interval deferred-t cancelled] 47 | Task 48 | (succ [this] 49 | (when-not @cancelled 50 | (let [next-time (or @deferred-t (+ t interval))] 51 | (reset! deferred-t nil) 52 | (assoc this :t next-time)))) 53 | 54 | (run [this] 55 | (when-not (or @deferred-t @cancelled) (f))) 56 | 57 | (cancel [this] 58 | (reset! cancelled true)) 59 | 60 | Deferrable 61 | (defer [this delay] 62 | (reset! deferred-t (+ (unix-time) delay)))) 63 | 64 | (def max-task-id 65 | (atom 0)) 66 | 67 | (def tasks 68 | "Scheduled operations." 69 | (ConcurrentSkipListSet. 70 | (fn [a b] (compare [(:t a) (:id a)] 71 | [(:t b) (:id b)])))) 72 | 73 | ; Processor-dependent threadpool sizing will happen. It's on the list. ;-) 74 | (def thread-count 4) 75 | (def park-interval 0.1) 76 | (def threadpool (atom [])) 77 | (def running (atom false)) 78 | 79 | (defn task-id 80 | "Return a new task ID." 81 | [] 82 | (swap! max-task-id inc)) 83 | 84 | ; Look at all these bang! methods! Mutability is SO EXCITING! 85 | 86 | (defn reset-tasks! 87 | "Resets the task queue to empty, without triggering side effects." 88 | [] 89 | (.clear tasks)) 90 | 91 | (defn poll-task! 92 | "Removes the next task from the queue." 93 | [] 94 | (.pollFirst tasks)) 95 | 96 | (defn schedule-sneaky! 97 | "Schedules a task. Does *not* awaken any threads." 98 | [task] 99 | (.add tasks task) 100 | task) 101 | 102 | (defn schedule! 103 | "Schedule a task. May awaken a thread from the threadpool to investigate." 104 | [task] 105 | (schedule-sneaky! task) 106 | (when @running 107 | (LockSupport/unpark (rand-nth @threadpool))) 108 | task) 109 | 110 | (defn once! 111 | "Calls f at t seconds." 112 | [t f] 113 | (schedule! (Once. (task-id) f t (atom false)))) 114 | 115 | (defn after! 116 | "Calls f after delay seconds" 117 | [delay f] 118 | (once! (+ (unix-time) delay) f)) 119 | 120 | (defn every! 121 | "Calls f every interval seconds, after delay." 122 | ([interval f] 123 | (every! interval 0 f)) 124 | ([interval delay f] 125 | (schedule! (Every. (task-id) 126 | f 127 | (+ (unix-time) delay) 128 | interval 129 | (atom nil) 130 | (atom false))))) 131 | 132 | (defn run-tasks! 133 | "While running, takes tasks from the queue and executes them when ready. Will 134 | park the current thread when no tasks are available." 135 | [i] 136 | (while @running 137 | (try 138 | (if-let [task (poll-task!)] 139 | ; We've acquired a task. 140 | (if (<= (:t task) (unix-time-real)) 141 | (do 142 | ; Run task 143 | (try 144 | (run task) 145 | (catch Throwable t 146 | (warn "running task threw" 147 | (with-out-str (clojure.stacktrace/print-stack-trace t))))) 148 | (when-let [task' (succ task)] 149 | ; Schedule the next task. 150 | (schedule-sneaky! task'))) 151 | (do 152 | ; Return task. 153 | (schedule-sneaky! task) 154 | ; Park until that task comes up next. 155 | (LockSupport/parkUntil (ceil (* 1000 (:t task)))))) 156 | (do 157 | ; No task available; park for a bit and try again. 158 | (LockSupport/parkNanos (ceil (* 1000000000 park-interval))))) 159 | (catch Throwable t 160 | (warn "caught" 161 | (with-out-str (clojure.stacktrace/print-stack-trace t))))))) 162 | 163 | (defn stop! 164 | "Stops the task threadpool. Waits for threads to exit." 165 | [] 166 | (locking threadpool 167 | (reset! running false) 168 | (while (some #(.isAlive %) @threadpool) 169 | ; Allow at most 1/10th park-interval to pass after all threads exit. 170 | (Thread/sleep (* park-interval 100))) 171 | (reset! threadpool []))) 172 | 173 | (defn start! 174 | "Starts the threadpool to execute tasks on the queue automatically." 175 | [] 176 | (locking threadpool 177 | (stop!) 178 | (reset! running true) 179 | (reset! threadpool 180 | (map (fn [i] 181 | (doto (Thread. (bound-fn [] (run-tasks! i)) 182 | (str "riemann task " i)) 183 | (.start))) 184 | (range thread-count))))) 185 | -------------------------------------------------------------------------------- /test/riemann/test/query.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.query 2 | (:use riemann.query 3 | [riemann.time :only [linear-time]] 4 | clojure.test)) 5 | 6 | (deftest ast-test 7 | (are [s expr] (= (ast s) expr) 8 | ; Fields 9 | "state = true" '(= state true) 10 | "host = true" '(= host true) 11 | "service = true" '(= service true) 12 | "description = true" '(= description true) 13 | "metric_f = true" '(= metric_f true) 14 | "metric = true" '(= metric true) 15 | "time = true" '(= time true) 16 | "ttl = 64" '(= ttl 64) 17 | 18 | ; Literals 19 | "true" true 20 | "false" false 21 | "nil" nil 22 | "null" nil 23 | 24 | ; Integers 25 | "state = 0" '(= state 0) 26 | "state = 1" '(= state 1) 27 | "state = -1" '(= state -1) 28 | 29 | ; Floats 30 | "state = 1." '(= state 1.) 31 | "state = 0.0" '(= state 0.0) 32 | "state = 1.5" '(= state 1.5) 33 | "state = -1.5" '(= state -1.5) 34 | "state = 1e5" '(= state 1e5) 35 | "state = 1E5" '(= state 1e5) 36 | "state = -1.2e-5" '(= state -1.2e-5) 37 | 38 | ; Strings 39 | "state = \"\"" '(= state "") 40 | "state = \"foo\"" '(= state "foo") 41 | "state = \"\\b\\t\\n\\f\\r\"" '(= state "\b\t\n\f\r") 42 | "state = \" \\\" \\\\ \"" '(= state " \" \\ ") 43 | "state = \"辻斬\"" '(= state "辻斬") 44 | 45 | ; Simple predicates 46 | "state = 2" '(= state 2) 47 | "state > 2" '(when state (> state 2)) 48 | "state < 2" '(when state (< state 2)) 49 | "state >= 2" '(when state (>= state 2)) 50 | "state <= 2" '(when state (<= state 2)) 51 | "state != 2" '(not (= state 2)) 52 | ; Regexen aren't comparable 53 | ; "state =~ \"%foo%\"" '(re-find #".*foo.*" state) 54 | 55 | ; Tags 56 | "tagged \"cat\"" '(when tags (member? "cat" tags)) 57 | 58 | ; Boolean operators 59 | "not host = 1" '(not (= host 1)) 60 | "host = 1 and state = 2" '(and (= host 1) (= state 2)) 61 | "host = 1 or state = 2" '(or (= host 1) (= state 2)) 62 | 63 | ; Grouping 64 | "(host = 1)" '(= host 1) 65 | "((host = 1))" '(= host 1) 66 | 67 | ; Precedence 68 | "not host = 1 and host = 2" 69 | '(and (not (= host 1)) (= host 2)) 70 | 71 | "not host = 1 or host = 2 and host = 3" 72 | '(or (not (= host 1)) 73 | (and (= host 2) (= host 3))) 74 | 75 | "not ((host = 1 or host = 2) and host = 3)" 76 | '(not (and (or (= host 1) 77 | (= host 2)) 78 | (= host 3))) 79 | )) 80 | 81 | (defn f [s good evil] 82 | "Given a query string s, ensure that it matches all good states and no evil 83 | ones." 84 | (let [fun (fun (ast s))] 85 | (doseq [state good] 86 | (is (fun state))) 87 | (doseq [state evil] 88 | (is (not (fun state)))))) 89 | 90 | (deftest truthy 91 | (f "true" 92 | [{:state "foo"} {}] 93 | []) 94 | 95 | (f "false" 96 | [] 97 | [{:state "foo"} {}]) 98 | 99 | (f "null" 100 | [] 101 | [{:state "foo"} {}])) 102 | 103 | (deftest equal 104 | (f "state = \"foo\"" 105 | [{:state "foo"}] 106 | [{:state "bar"} {}]) 107 | ) 108 | 109 | (deftest not-equal 110 | (f "state != 1" 111 | [{:state 0.5} {}] 112 | [{:state 1}])) 113 | 114 | (deftest wildcard 115 | (f "host =~ \"%s.\"" 116 | [{:host "s."} {:host "foos."}] 117 | [{:host "a."} {:host "s.murf"} {}])) 118 | 119 | (deftest inequality 120 | (f "metric > 1e10" 121 | [{:metric 1e11}] 122 | [{:metric 1e10} {}]) 123 | (f "metric >= -1" 124 | [{:metric 0} {:metric -1}] 125 | [{:metric -2} {}]) 126 | (f "metric < 1.2e2" 127 | [{:metric 1.5e1}] 128 | [{:metric 1.2e2} {}]) 129 | (f "metric <= 1" 130 | [{:metric 1} {:metric -20}] 131 | [{:metric 2} {}])) 132 | 133 | (deftest tagged 134 | (f "tagged \"cat\"" 135 | [{:tags #{"cat" "dog"}} {:tags #{"cat"}}] 136 | [{:tags #{"dog"}} {}])) 137 | 138 | (deftest null 139 | (f "time = null and description != nil" 140 | [{:time nil :description true} {:description "hey"}] 141 | [{:time 2 :description true} {:description nil} {}])) 142 | 143 | (deftest bool 144 | (f "not ((host = 1 or host = 2) and service = 3)" 145 | [{:host 1} {:service 3} {}] 146 | [{:host 2 :service 3}])) 147 | 148 | (deftest fast 149 | (let [fun (fun (ast 150 | "host =~ \"api %\" and state = \"ok\" and metric > 0")) 151 | events (cycle [{:host "api 1" :state "ok" :metric 1.2} 152 | {:host "other" :state "ok" :metric 1.2} 153 | {:host "api 2" :state "no" :metric 1.2} 154 | {:host "api 3" :state "ok" :metric 0.5} 155 | {}]) 156 | t1 (linear-time)] 157 | (doseq [e (take 1000 events)] 158 | (fun e)) 159 | (is (< (- (linear-time) t1) 0.05)))) 160 | -------------------------------------------------------------------------------- /test/riemann/test/pool.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.pool 2 | (:use riemann.pool 3 | clojure.test)) 4 | 5 | (deftest claim-release-test 6 | (let [x (atom 0) 7 | pool (fixed-pool #(swap! x inc) {:size 2 :block-start true}) 8 | ; Claim both elements 9 | a (claim pool) 10 | b (claim pool) 11 | ; Pool is empty; should be nil. 12 | c (claim pool) 13 | a' (release pool a) 14 | ; Should re-acquire a 15 | d (claim pool) 16 | ; Empty 17 | e (claim pool) 18 | b' (release pool b) 19 | ; Re-acquire b 20 | f (claim pool)] 21 | (is (= #{1 2} #{a b})) 22 | (is (nil? c)) 23 | (is (= a d)) 24 | (is (nil? e)) 25 | (is (= b f)) 26 | ; Shouldn't have (open)'d more than twice. 27 | (is (= 2 @x)))) 28 | 29 | (deftest claim-invalidate-test 30 | (let [x (atom 0) 31 | pool (fixed-pool #(swap! x inc) {:size 2 :block-start true}) 32 | a (claim pool) 33 | a' (invalidate pool a) 34 | b (claim pool) 35 | 36 | b' (invalidate pool b) 37 | c (claim pool 1) 38 | d (claim pool 1) 39 | e (claim pool 1) 40 | c' (invalidate pool c) 41 | d' (invalidate pool d) 42 | ; Invalidate nil should be a noop 43 | e' (invalidate pool e)] 44 | ; Wait for futures. 45 | (is a') 46 | (is b') 47 | (is c') 48 | (is d') 49 | (is (nil? e')) 50 | 51 | (dorun (map deref [a' b' c' d'])) 52 | 53 | (is #{1 2} a) 54 | (is #{1 2 3} b) 55 | (is (= #{1 2 3 4} #{a b c d})) 56 | (is (nil? e)) 57 | ; Should have opened twice to start and 4 times after invalidations. 58 | (is (= 6 @x)))) 59 | 60 | (deftest with-pool-test 61 | (let [x (atom 0) 62 | pool (fixed-pool #(swap! x inc) {:size 1 :block-start true})] 63 | 64 | ; Regular claim 65 | (let [a (with-pool [a pool] a)] 66 | (is (= 1 a)) 67 | (is (= 1 @x))) 68 | 69 | ; With-pool should have released. 70 | (let [a (claim pool)] 71 | (is (= 1 a)) 72 | (release pool a)) 73 | 74 | ; Throwing errors 75 | (is (thrown? RuntimeException 76 | (with-pool [b pool] 77 | (is (= 1 b)) 78 | (throw (RuntimeException. "whoops"))))) 79 | 80 | ; Pool should have regenerated. 81 | (Thread/sleep 250) 82 | (is (= 2 @x)))) 83 | 84 | (deftest ^:time unreliable-test 85 | (let [x (atom 0) 86 | size 5 87 | got-client (atom 0) 88 | no-client (atom 0) 89 | opens (atom 0) 90 | open-attempts (atom 0) 91 | open-failures (atom 0) 92 | invalidations (atom 0) 93 | closes (atom 0) 94 | pool (fixed-pool 95 | (fn [] 96 | (swap! open-attempts inc) 97 | (if (< 0.5 (rand)) 98 | (swap! opens inc) 99 | (do 100 | (swap! open-failures inc) 101 | (throw (RuntimeException.))))) 102 | (fn [_] (swap! closes inc)) 103 | {:size size 104 | :regenerate-interval 0.1})] 105 | 106 | (let [workers 107 | (map (fn [_] 108 | (future 109 | (dotimes [i 100] 110 | (try 111 | (with-pool [x pool 0.1] 112 | (if x 113 | (do 114 | (when (< (rand) 0.1) 115 | (swap! invalidations inc) 116 | (throw (RuntimeException.))) 117 | (swap! got-client inc)) 118 | (swap! no-client inc))) 119 | (catch RuntimeException t))))) 120 | (range 10))] 121 | (doseq [w workers] @w)) 122 | 123 | ; Some of the time, multiple retries were needed. 124 | (is (< @invalidations (+ size @opens))) 125 | 126 | ; The pool should have made progress. 127 | (is (< 0 @got-client)) 128 | 129 | ; Most of the runs had a client, but not all. 130 | (is (< 0 @no-client (/ @got-client 5))) 131 | 132 | ; Every invalidated client was closed. 133 | (is (= @closes @invalidations)) 134 | 135 | ; The number of open clients did not exceed size. 136 | (is (<= 0 (- @opens @closes) size)) 137 | 138 | ; Invalidations occurred. 139 | (is (< 0 @invalidations)) 140 | 141 | ; Failed opens occurred. 142 | (is (< 0 @open-failures)) 143 | 144 | ; Every invalidation and every failure opening led to another open 145 | ; (except for up to size futures in progress). 146 | (is (<= 0 147 | (- @open-attempts @invalidations @open-failures) 148 | size)) 149 | 150 | ; Far fewer clients were opened than used. 151 | (is (< @opens (/ @got-client 5))) 152 | 153 | (prn @got-client "runs had a client") 154 | (prn @no-client "runs had no client") 155 | (prn @invalidations "clients were invalidated") 156 | (prn @opens "opened clients") 157 | (prn @closes "closed clients") 158 | (prn @open-failures "failures opening clients") 159 | (prn @open-attempts "attempts to open a client"))) 160 | -------------------------------------------------------------------------------- /src/riemann/transport/tcp.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.transport.tcp 2 | "Accepts messages from external sources. Associated with a core. Sends 3 | incoming events to the core's streams, queries the core's index for states." 4 | (:import [java.net InetSocketAddress] 5 | [java.util.concurrent Executors] 6 | [java.nio.channels ClosedChannelException] 7 | [org.jboss.netty.bootstrap ServerBootstrap] 8 | [org.jboss.netty.buffer ChannelBuffers] 9 | [org.jboss.netty.channel ChannelHandler 10 | ChannelHandlerContext 11 | ChannelPipeline 12 | ChannelPipelineFactory 13 | ChannelStateEvent 14 | Channels 15 | ExceptionEvent 16 | MessageEvent 17 | SimpleChannelHandler] 18 | [org.jboss.netty.channel.group ChannelGroup DefaultChannelGroup] 19 | [org.jboss.netty.channel.socket.nio NioServerSocketChannelFactory] 20 | [org.jboss.netty.handler.codec.frame LengthFieldBasedFrameDecoder 21 | LengthFieldPrepender] 22 | [org.jboss.netty.handler.execution 23 | OrderedMemoryAwareThreadPoolExecutor]) 24 | (:use [riemann.transport :only [handle 25 | protobuf-decoder 26 | protobuf-encoder 27 | msg-decoder 28 | channel-pipeline-factory]] 29 | [riemann.codec :only [encode-pb-msg]] 30 | [riemann.service :only [Service]] 31 | [clojure.tools.logging :only [info warn]] 32 | [riemann.transport :only [handle]])) 33 | 34 | (defn int32-frame-decoder 35 | [] 36 | ; Offset 0, 4 byte header, skip those 4 bytes. 37 | (LengthFieldBasedFrameDecoder. Integer/MAX_VALUE, 0, 4, 0, 4)) 38 | 39 | (defn int32-frame-encoder 40 | [] 41 | (LengthFieldPrepender. 4)) 42 | 43 | (defn tcp-handler 44 | "Returns a TCP handler around the given atom pointing to a core" 45 | [core ^ChannelGroup channel-group] 46 | (proxy [SimpleChannelHandler] [] 47 | (channelOpen [context ^ChannelStateEvent state-event] 48 | (.add channel-group (.getChannel state-event))) 49 | 50 | (messageReceived [^ChannelHandlerContext context 51 | ^MessageEvent message-event] 52 | (let [channel (.getChannel message-event) 53 | msg (.getMessage message-event)] 54 | (try 55 | (.write channel (encode-pb-msg (handle @core msg))) 56 | (catch java.nio.channels.ClosedChannelException e 57 | (warn "channel closed")) 58 | (catch com.google.protobuf.InvalidProtocolBufferException e 59 | (warn "invalid message, closing") 60 | (.close channel))))) 61 | 62 | (exceptionCaught [context ^ExceptionEvent exception-event] 63 | (let [cause (.getCause exception-event)] 64 | (when-not (= ClosedChannelException (class cause)) 65 | (warn (.getCause exception-event) "TCP handler caught") 66 | (.close (.getChannel exception-event))))))) 67 | 68 | (defrecord TCPServer [host port pipeline-factory core killer] 69 | ; core is a reference to a core 70 | ; killer is a reference to a function which shuts down the server. 71 | 72 | Service 73 | ; TODO compare pipeline-factory! 74 | (equiv? [this other] 75 | (and (instance? TCPServer other) 76 | (= host (:host other)) 77 | (= port (:port other)))) 78 | 79 | (reload! [this new-core] 80 | (reset! core new-core)) 81 | 82 | (start! [this] 83 | (locking this 84 | (when-not @killer 85 | (let [bootstrap (ServerBootstrap. 86 | (NioServerSocketChannelFactory. 87 | (Executors/newCachedThreadPool) 88 | (Executors/newCachedThreadPool))) 89 | all-channels (DefaultChannelGroup. 90 | (str "tcp-server " host ":" port)) 91 | cpf (channel-pipeline-factory 92 | pipeline-factory 93 | (tcp-handler core all-channels))] 94 | 95 | ; Configure bootstrap 96 | (doto bootstrap 97 | (.setPipelineFactory cpf) 98 | (.setOption "readWriteFair" true) 99 | (.setOption "tcpNoDelay" true) 100 | (.setOption "reuseAddress" true) 101 | (.setOption "child.tcpNoDelay" true) 102 | (.setOption "child.reuseAddress" true) 103 | (.setOption "child.keepAlive" true)) 104 | 105 | ; Start bootstrap 106 | (let [server-channel (.bind bootstrap 107 | (InetSocketAddress. host port))] 108 | (.add all-channels server-channel)) 109 | (info "TCP server" host port "online") 110 | 111 | ; fn to close server 112 | (reset! killer 113 | (fn [] 114 | (-> all-channels .close .awaitUninterruptibly) 115 | (.releaseExternalResources bootstrap) 116 | (info "TCP server" host port "shut down"))))))) 117 | 118 | (stop! [this] 119 | (locking this 120 | (when @killer 121 | (@killer) 122 | (reset! killer nil))))) 123 | 124 | (defn tcp-server 125 | "Create a new TCP server. Doesn't start until (service/start!). Options: 126 | :host The host to listen on (default 127.0.0.1). 127 | :port The port to listen on. (default 5555) 128 | :pipeline-factory" 129 | ([] 130 | (tcp-server {})) 131 | ([opts] 132 | (let [pipeline-factory #(doto (Channels/pipeline) 133 | (.addLast "int32-frame-decoder" 134 | (int32-frame-decoder)) 135 | (.addLast "int32-frame-encoder" 136 | (int32-frame-encoder)) 137 | (.addLast "protobuf-decoder" 138 | (protobuf-decoder)) 139 | (.addLast "protobuf-encoder" 140 | (protobuf-encoder)) 141 | (.addLast "msg-decoder" 142 | (msg-decoder)))] 143 | (TCPServer. 144 | (get opts :host "127.0.0.1") 145 | (get opts :port 5555) 146 | (get opts :pipeline-factory pipeline-factory) 147 | (atom nil) 148 | (atom nil))))) 149 | -------------------------------------------------------------------------------- /test/riemann/test/deps.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.deps 2 | (:use riemann.deps 3 | riemann.index 4 | clojure.test)) 5 | 6 | (defn context [events] 7 | (let [i (nbhm-index)] 8 | (doseq [e events] 9 | (update i e)) 10 | i)) 11 | 12 | (deftest hash-match 13 | ; No states 14 | (is (not (match {:service "foo"} 15 | (context []) 16 | nil))) 17 | ; Single state 18 | (is (match {:state "ok"} 19 | (context [{:state "ok"}]) 20 | nil)) 21 | ; Wrong state 22 | (is (not (match {:state "ok"} 23 | (context [{:state "critical"}]) 24 | nil)))) 25 | 26 | (deftest localhost-match 27 | (let [r (localhost {:service "memcache" :state "ok"})] 28 | (is (match r 29 | (context [{:host 1 :service "memcache" :state "ok"}]) 30 | {:host 1})) 31 | (is (not (match r 32 | (context [{:host 1 :service "memcache" :state "ok"}]) 33 | {:host 2}))) 34 | (is (not (match r 35 | (context [{:host 1 :service "memcache" :state "false"}]) 36 | {:host 1}))) 37 | )) 38 | 39 | (deftest depends-match 40 | ; Different service is always true 41 | (is (match (depends {:service "x"} {:service "y"}) 42 | (context []) 43 | {:service "z"})) 44 | 45 | ; Single dep 46 | (let [r (depends {:service "x"} {:service "y" :state "ok"})] 47 | (is (match r (context [{:service "y" :state "ok"}]) 48 | {:service "x"})) 49 | (is (not (match r (context [{:service "y" :state "no"}]) 50 | {:service "x"}))) 51 | (is (not (match r (context []) 52 | {:service "x"}))) 53 | )) 54 | 55 | (deftest all-match 56 | (let [r (all {:service "x"} {:service "y"})] 57 | (is (match r (context [{:service "x"} {:service "y"}]) nil)) 58 | (is (not (match r (context []) nil))) 59 | (is (not (match r (context [{:service "x"}]) nil))))) 60 | 61 | (deftest any-match 62 | (let [r (any {:service "x"} {:service "y"})] 63 | (is (match r (context [{:service "x"} {:service "y"}]) nil)) 64 | (is (not (match r (context []) nil))) 65 | (is (match r (context [{:service "x"}]) nil)))) 66 | 67 | (deftest real-match 68 | (let [r (all (depends {:service "lbapp"} 69 | (any {:service "riak 1" :state "ok"} 70 | {:service "riak 2" :state "ok"})) 71 | (depends {:service "api"} 72 | (all 73 | (localhost 74 | (any 75 | {:service "memcached" :state "ok"} 76 | {:service "redis" :state "ok"}) 77 | (any 78 | {:service "cpu" :state "ok"} 79 | {:service "cpu" :state "warning"})) 80 | {:host "db" :service "postgres" :state "ok"}))) 81 | c (context [{:service "riak 1" :state "ok"} 82 | {:service "riak 2" :state "warning"} 83 | {:service "memcached" :host 1 :state "ok"} 84 | {:service "memcached" :host 2 :state "critical"} 85 | {:service "memcached" :host 3 :state "ok"} 86 | {:service "memcached" :host 4 :state "critical"} 87 | {:service "redis" :host 1 :state "ok"} 88 | {:service "redis" :host 2 :state "ok"} 89 | {:service "redis" :host 3 :state "critical"} 90 | {:service "redis" :host 4 :state "critical"} 91 | {:service "cpu" :host 1 :state "ok"} 92 | {:service "cpu" :host 2 :state "warning"} 93 | {:service "cpu" :host 3 :state "warning"} 94 | {:service "cpu" :host 4 :state "ok"} 95 | {:host "db" :service "postgres" :state "ok"}])] 96 | 97 | (is (match r c {:service "lbapp"})) 98 | (is (match r c {:service "api" :host 1})) 99 | (is (match r c {:service "api" :host 2})) 100 | (is (match r c {:service "api" :host 3})) 101 | (is (not (match r c {:service "api" :host 4}))) 102 | (is (not (match r c {:service "api"}))) 103 | (is (not (match r c {:service "api" :host :invisible}))))) 104 | 105 | (deftest tag-test 106 | (let [rule (depends {:service "x"} {:service "y"}) 107 | index (nbhm-index) 108 | out (ref []) 109 | append-out (fn [e] (dosync (alter out conj e))) 110 | get-out (fn [] (dosync (let [x (set (deref out))] 111 | (ref-set out []) 112 | x))) 113 | s (deps-tag index rule append-out)] 114 | 115 | (is (= #{} (get-out))) 116 | 117 | ; Pass through unrelated events. 118 | (s {}) 119 | (s {:service "other"}) 120 | (is (= #{{:deps-satisfied? true} 121 | {:deps-satisfied? true :service "other"}} 122 | (get-out))) 123 | )) 124 | 125 | ; Someday. 126 | (comment 127 | (deftest suppress-test 128 | (let [rule (depends {:service "x"} {:service "y"}) 129 | index (nbhm-index) 130 | out (ref []) 131 | append-out (fn [e] (dosync (alter out conj e))) 132 | get-out (fn [] (dosync (let [x (set (deref out))] 133 | (ref-set out []) 134 | x))) 135 | s (suppress-dependent-failures {:index index 136 | :interval 0.1 137 | :rule rule} 138 | append-out)] 139 | 140 | (is (= #{} (get-out))) 141 | 142 | ; Should pass through unrelated events. 143 | (s {}) 144 | (s {:service "other"}) 145 | (Thread/sleep 100) 146 | (is (= #{{} {:service "other"}} (get-out))) 147 | 148 | ; Should hold on to unsatisfied events until dependencies are met. 149 | (s {:service "x"}) 150 | (Thread/sleep 100) 151 | (is (= #{} (get-out))) 152 | 153 | (update index {:service "y"}) 154 | (Thread/sleep 150) 155 | (is (= #{{:service "x"}} (get-out))) 156 | 157 | ; Should allow unsatisfied events to be updated. 158 | (delete index {:service "y"}) 159 | (s {:service "x" :state 1}) 160 | (Thread/sleep 100) 161 | (s {:service "x" :state 2}) 162 | (Thread/sleep 100) 163 | (is (= #{} (get-out))) 164 | 165 | (update index {:service "y"}) 166 | (Thread/sleep 150) 167 | (is (= #{{:service "x" :state 2}})) 168 | ))) 169 | -------------------------------------------------------------------------------- /test/riemann/test/core.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.core 2 | (:require riemann.transport.tcp 3 | riemann.streams 4 | [riemann.logging :as logging]) 5 | (:use riemann.client 6 | riemann.common 7 | riemann.index 8 | riemann.time.controlled 9 | riemann.core 10 | clojure.test 11 | [clojure.algo.generic.functor :only [fmap]] 12 | [riemann.service :only [Service]] 13 | [riemann.time :only [unix-time]])) 14 | 15 | (logging/init) 16 | (use-fixtures :each reset-time!) 17 | (use-fixtures :once control-time!) 18 | 19 | (defmacro tim 20 | "Evaluates expr and returns the time it took in seconds" 21 | [expr] 22 | `(let [start# (. System (nanoTime)) 23 | ret# ~expr] 24 | (/ (- (. System (nanoTime)) start#) 1000000000.0))) 25 | 26 | (deftest blank-test 27 | (let [c (core)] 28 | (is (= [] (:streams c))) 29 | (is (= [] (:services c))) 30 | (is (= nil (:index c))) 31 | (is (:pubsub c)))) 32 | 33 | (defrecord TestService [id running core] 34 | Service 35 | (start! [_] (reset! running true)) 36 | (stop! [_] (reset! running false)) 37 | (reload! [_ c] (reset! core c)) 38 | (equiv? [a b] (= (:id a) (:id b)))) 39 | 40 | (deftest start-transition-stop 41 | (logging/suppress 42 | "riemann.core" 43 | (let [old-running (atom nil) 44 | old-core (atom nil) 45 | same-1-running (atom nil) 46 | same-1-core (atom nil) 47 | same-2-running (atom nil) 48 | same-2-core (atom nil) 49 | new-running (atom nil) 50 | new-core (atom nil) 51 | old-service (TestService. :old old-running old-core) 52 | same-service-1 (TestService. :same same-1-running same-1-core) 53 | same-service-2 (TestService. :same same-2-running same-2-core) 54 | new-service (TestService. :new new-running new-core) 55 | old {:services [old-service same-service-1]} 56 | new {:services [new-service same-service-2]}] 57 | 58 | (start! old) 59 | (is (= [old-service same-service-1] (:services old))) 60 | (is @old-running) 61 | (is @same-1-running) 62 | (is (not @same-2-running)) 63 | (is (not @new-running)) 64 | (is (= old @old-core)) 65 | (is (= old @same-1-core)) 66 | (is (= nil @same-2-core)) 67 | (is (= nil @new-core)) 68 | 69 | ; Should preserve the same-1 service from the old core 70 | (let [final (transition! old new)] 71 | (is (not= new final)) 72 | (is (= [new-service same-service-1] (:services final))) 73 | (is (not @old-running)) 74 | (is @same-1-running) 75 | (is (not @same-2-running)) 76 | (is @new-running) 77 | (is (= old @old-core)) 78 | (is (= final @same-1-core)) 79 | (is (= nil @same-2-core)) 80 | (is (= final @new-core)) 81 | 82 | (stop! final) 83 | (is (= [new-service same-service-1] (:services final))) 84 | (is (not @old-running)) 85 | (is (not @same-1-running)) 86 | (is (not @same-2-running)) 87 | (is (not @new-running)) 88 | (is (= old @old-core)) 89 | (is (= final @same-1-core)) 90 | (is (= nil @same-2-core)) 91 | (is (= final @new-core)))))) 92 | 93 | (deftest serialization 94 | (let [out (ref []) 95 | server (riemann.transport.tcp/tcp-server) 96 | stream (riemann.streams/append out) 97 | core (logging/suppress ["riemann.transport.tcp" 98 | "riemann.core"] 99 | (transition! (core) 100 | {:services [server] 101 | :streams [stream]})) 102 | client (riemann.client/tcp-client) 103 | events [{:host "shiiiiire!"} 104 | {:service "baaaaaginnnns!"} 105 | {:state "middling"} 106 | {:description "quite dire, really"} 107 | {:tags ["oh" "sam"]} 108 | {:metric -1000.0} 109 | {:metric Double/MAX_VALUE} 110 | {:metric Long/MIN_VALUE} 111 | {:time 1234} 112 | {:ttl 12.0}]] 113 | 114 | (try 115 | ; Send events 116 | (doseq [e events] (send-event client e)) 117 | 118 | (doseq [[in out] (map (fn [a b] [a b]) events (deref out))] 119 | (is (every? (fn [k] (= (k in) (k out))) (keys in)))) 120 | 121 | (finally 122 | (close-client client) 123 | (logging/suppress ["riemann.core" "riemann.transport.tcp"] 124 | (stop! core)))))) 125 | 126 | (deftest query-test 127 | (let [index (index) 128 | server (riemann.transport.tcp/tcp-server) 129 | core (logging/suppress ["riemann.core" 130 | "riemann.transport.tcp"] 131 | (transition! (core) 132 | {:services [server] 133 | :index index})) 134 | client (riemann.client/tcp-client)] 135 | 136 | (try 137 | ; Send events 138 | (update-index core {:metric 1 :time 1}) 139 | (update-index core {:metric 2 :time 3}) 140 | (update-index core {:host "kitten" 141 | :tags ["whiskers" "paws"] :time 2}) 142 | (update-index core {:service "miao" :host "cat" :time 3}) 143 | 144 | (let [r (set (query client "metric = 2 or service = \"miao\" or tagged \"whiskers\""))] 145 | (is (= r 146 | #{(event {:metric 2, :time 3}) 147 | (event {:host "kitten" :tags ["whiskers" "paws"] :time 2}) 148 | (event {:host "cat", :service "miao", :time 3})} r))) 149 | 150 | (finally 151 | (close-client client) 152 | (logging/suppress ["riemann.core" "riemann.transport.tcp"] 153 | (stop! core)))))) 154 | 155 | (deftest expires 156 | (let [index (index) 157 | res (atom nil) 158 | expired-stream (riemann.streams/expired 159 | (fn [e] (reset! res e))) 160 | reaper (reaper 0.001) 161 | core (logging/suppress 162 | ["riemann.core" "riemann.transport.tcp"] 163 | (transition! (core) {:services [reaper] 164 | :streams [expired-stream] 165 | :index index}))] 166 | 167 | ; Insert events 168 | (update-index core {:service 1 :ttl 0.01 :time (unix-time)}) 169 | (update-index core {:service 2 :ttl 1 :time (unix-time)}) 170 | 171 | (advance! 0.011) 172 | 173 | ; Wait for reaper to eat them 174 | (Thread/sleep 100) 175 | 176 | ; Kill reaper 177 | (logging/suppress "riemann.core" 178 | (stop! core)) 179 | 180 | ; Check that index does not contain these states 181 | (is (= [2] (map (fn [e] (:service e)) index))) 182 | 183 | ; Check that expired-stream received them. 184 | (is (= @res 185 | {:service 1 186 | :host nil 187 | :time 0.011 188 | :state "expired"})))) 189 | 190 | (deftest percentiles 191 | (let [out (ref []) 192 | server (riemann.transport.tcp/tcp-server) 193 | stream (riemann.streams/percentiles 1 [0 0.5 0.95 0.99 1] 194 | (riemann.streams/append out)) 195 | core (logging/suppress 196 | ["riemann.core" "riemann.transport.tcp"] 197 | (transition! (core) {:services [server] 198 | :streams [stream]})) 199 | client (riemann.client/tcp-client)] 200 | (try 201 | ; Send some events over the network 202 | (doseq [n (shuffle (take 101 (iterate inc 0)))] 203 | (send-event client {:metric n :service "per"})) 204 | (close-client client) 205 | 206 | ; Wait for percentiles 207 | (advance! 1) 208 | 209 | ; Get states 210 | (let [events (deref out) 211 | states (fmap first (group-by :service events))] 212 | 213 | (is (= (:metric (states "per 0.5")) 50)) 214 | (is (= (:metric (states "per 0.95")) 95)) 215 | (is (= (:metric (states "per 0.99")) 99)) 216 | (is (= (:metric (states "per 1")) 100))) 217 | 218 | (finally 219 | (close-client client) 220 | (logging/suppress ["riemann.transport.tcp" "riemann.core"] 221 | (stop! core)))))) 222 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 1.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE 4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF 5 | THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial code and 12 | documentation distributed under this Agreement, and 13 | 14 | b) in the case of each subsequent Contributor: 15 | 16 | i) changes to the Program, and 17 | 18 | ii) additions to the Program; 19 | 20 | where such changes and/or additions to the Program originate from and 21 | are distributed by that particular Contributor. A Contribution 22 | 'originates' from a Contributor if it was added to the Program by such 23 | Contributor itself or anyone acting on such Contributor's 24 | behalf. Contributions do not include additions to the Program which: 25 | (i) are separate modules of software distributed in conjunction with 26 | the Program under their own license agreement, and (ii) are not 27 | derivative works of the Program. 28 | 29 | "Contributor" means any person or entity that distributes the Program. 30 | 31 | "Licensed Patents" mean patent claims licensable by a Contributor 32 | which are necessarily infringed by the use or sale of its Contribution 33 | alone or when combined with the Program. 34 | 35 | "Program" means the Contributions distributed in accordance with this 36 | Agreement. 37 | 38 | "Recipient" means anyone who receives the Program under this 39 | Agreement, including all Contributors. 40 | 41 | 2. GRANT OF RIGHTS 42 | 43 | a) Subject to the terms of this Agreement, each Contributor hereby 44 | grants Recipient a non-exclusive, worldwide, royalty-free copyright 45 | license to reproduce, prepare derivative works of, publicly display, 46 | publicly perform, distribute and sublicense the Contribution of such 47 | Contributor, if any, and such derivative works, in source code and 48 | object code form. 49 | 50 | b) Subject to the terms of this Agreement, each Contributor hereby 51 | grants Recipient a non-exclusive, worldwide, royalty-free patent 52 | license under Licensed Patents to make, use, sell, offer to sell, 53 | import and otherwise transfer the Contribution of such Contributor, if 54 | any, in source code and object code form. This patent license shall 55 | apply to the combination of the Contribution and the Program if, at 56 | the time the Contribution is added by the Contributor, such addition 57 | of the Contribution causes such combination to be covered by the 58 | Licensed Patents. The patent license shall not apply to any other 59 | combinations which include the Contribution. No hardware per se is 60 | licensed hereunder. 61 | 62 | c) Recipient understands that although each Contributor grants the 63 | licenses to its Contributions set forth herein, no assurances are 64 | provided by any Contributor that the Program does not infringe the 65 | patent or other intellectual property rights of any other entity. Each 66 | Contributor disclaims any liability to Recipient for claims brought by 67 | any other entity based on infringement of intellectual property rights 68 | or otherwise. As a condition to exercising the rights and licenses 69 | granted hereunder, each Recipient hereby assumes sole responsibility 70 | to secure any other intellectual property rights needed, if any. For 71 | example, if a third party patent license is required to allow 72 | Recipient to distribute the Program, it is Recipient's responsibility 73 | to acquire that license before distributing the Program. 74 | 75 | d) Each Contributor represents that to its knowledge it has sufficient 76 | copyright rights in its Contribution, if any, to grant the copyright 77 | license set forth in this Agreement. 78 | 79 | 3. REQUIREMENTS 80 | 81 | A Contributor may choose to distribute the Program in object code form 82 | under its own license agreement, provided that: 83 | 84 | a) it complies with the terms and conditions of this Agreement; and 85 | 86 | b) its license agreement: 87 | 88 | i) effectively disclaims on behalf of all Contributors all warranties 89 | and conditions, express and implied, including warranties or 90 | conditions of title and non-infringement, and implied warranties or 91 | conditions of merchantability and fitness for a particular purpose; 92 | 93 | ii) effectively excludes on behalf of all Contributors all liability 94 | for damages, including direct, indirect, special, incidental and 95 | consequential damages, such as lost profits; 96 | 97 | iii) states that any provisions which differ from this Agreement are 98 | offered by that Contributor alone and not by any other party; and 99 | 100 | iv) states that source code for the Program is available from such 101 | Contributor, and informs licensees how to obtain it in a reasonable 102 | manner on or through a medium customarily used for software exchange. 103 | 104 | When the Program is made available in source code form: 105 | 106 | a) it must be made available under this Agreement; and 107 | 108 | b) a copy of this Agreement must be included with each copy of the Program. 109 | 110 | Contributors may not remove or alter any copyright notices contained 111 | within the Program. 112 | 113 | Each Contributor must identify itself as the originator of its 114 | Contribution, if any, in a manner that reasonably allows subsequent 115 | Recipients to identify the originator of the Contribution. 116 | 117 | 4. COMMERCIAL DISTRIBUTION 118 | 119 | Commercial distributors of software may accept certain 120 | responsibilities with respect to end users, business partners and the 121 | like. While this license is intended to facilitate the commercial use 122 | of the Program, the Contributor who includes the Program in a 123 | commercial product offering should do so in a manner which does not 124 | create potential liability for other Contributors. Therefore, if a 125 | Contributor includes the Program in a commercial product offering, 126 | such Contributor ("Commercial Contributor") hereby agrees to defend 127 | and indemnify every other Contributor ("Indemnified Contributor") 128 | against any losses, damages and costs (collectively "Losses") arising 129 | from claims, lawsuits and other legal actions brought by a third party 130 | against the Indemnified Contributor to the extent caused by the acts 131 | or omissions of such Commercial Contributor in connection with its 132 | distribution of the Program in a commercial product offering. The 133 | obligations in this section do not apply to any claims or Losses 134 | relating to any actual or alleged intellectual property 135 | infringement. In order to qualify, an Indemnified Contributor must: a) 136 | promptly notify the Commercial Contributor in writing of such claim, 137 | and b) allow the Commercial Contributor tocontrol, and cooperate with 138 | the Commercial Contributor in, the defense and any related settlement 139 | negotiations. The Indemnified Contributor may participate in any such 140 | claim at its own expense. 141 | 142 | For example, a Contributor might include the Program in a commercial 143 | product offering, Product X. That Contributor is then a Commercial 144 | Contributor. If that Commercial Contributor then makes performance 145 | claims, or offers warranties related to Product X, those performance 146 | claims and warranties are such Commercial Contributor's responsibility 147 | alone. Under this section, the Commercial Contributor would have to 148 | defend claims against the other Contributors related to those 149 | performance claims and warranties, and if a court requires any other 150 | Contributor to pay any damages as a result, the Commercial Contributor 151 | must pay those damages. 152 | 153 | 5. NO WARRANTY 154 | 155 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS 156 | PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 157 | KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY 158 | WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY 159 | OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely 160 | responsible for determining the appropriateness of using and 161 | distributing the Program and assumes all risks associated with its 162 | exercise of rights under this Agreement , including but not limited to 163 | the risks and costs of program errors, compliance with applicable 164 | laws, damage to or loss of data, programs or equipment, and 165 | unavailability or interruption of operations. 166 | 167 | 6. DISCLAIMER OF LIABILITY 168 | 169 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR 170 | ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, 171 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING 172 | WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF 173 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 174 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR 175 | DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED 176 | HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 177 | 178 | 7. GENERAL 179 | 180 | If any provision of this Agreement is invalid or unenforceable under 181 | applicable law, it shall not affect the validity or enforceability of 182 | the remainder of the terms of this Agreement, and without further 183 | action by the parties hereto, such provision shall be reformed to the 184 | minimum extent necessary to make such provision valid and enforceable. 185 | 186 | If Recipient institutes patent litigation against any entity 187 | (including a cross-claim or counterclaim in a lawsuit) alleging that 188 | the Program itself (excluding combinations of the Program with other 189 | software or hardware) infringes such Recipient's patent(s), then such 190 | Recipient's rights granted under Section 2(b) shall terminate as of 191 | the date such litigation is filed. 192 | 193 | All Recipient's rights under this Agreement shall terminate if it 194 | fails to comply with any of the material terms or conditions of this 195 | Agreement and does not cure such failure in a reasonable period of 196 | time after becoming aware of such noncompliance. If all Recipient's 197 | rights under this Agreement terminate, Recipient agrees to cease use 198 | and distribution of the Program as soon as reasonably 199 | practicable. However, Recipient's obligations under this Agreement and 200 | any licenses granted by Recipient relating to the Program shall 201 | continue and survive. 202 | 203 | Everyone is permitted to copy and distribute copies of this Agreement, 204 | but in order to avoid inconsistency the Agreement is copyrighted and 205 | may only be modified in the following manner. The Agreement Steward 206 | reserves the right to publish new versions (including revisions) of 207 | this Agreement from time to time. No one other than the Agreement 208 | Steward has the right to modify this Agreement. The Eclipse Foundation 209 | is the initial Agreement Steward. The Eclipse Foundation may assign 210 | the responsibility to serve as the Agreement Steward to a suitable 211 | separate entity. Each new version of the Agreement will be given a 212 | distinguishing version number. The Program (including Contributions) 213 | may always be distributed subject to the version of the Agreement 214 | under which it was received. In addition, after a new version of the 215 | Agreement is published, Contributor may elect to distribute the 216 | Program (including its Contributions) under the new version. Except as 217 | expressly stated in Sections 2(a) and 2(b) above, Recipient receives 218 | no rights or licenses to the intellectual property of any Contributor 219 | under this Agreement, whether expressly, by implication, estoppel or 220 | otherwise. All rights in the Program not expressly granted under this 221 | Agreement are reserved. 222 | 223 | This Agreement is governed by the laws of the State of Washington and 224 | the intellectual property laws of the United States of America. No 225 | party to this Agreement will bring a legal action under this Agreement 226 | more than one year after the cause of action arose. Each party waives 227 | its rights to a jury trial in any resulting litigation. 228 | -------------------------------------------------------------------------------- /test/riemann/test/streams.clj: -------------------------------------------------------------------------------- 1 | (ns riemann.test.streams 2 | (:use riemann.streams 3 | [riemann.common :exclude [match]] 4 | riemann.time.controlled 5 | riemann.time 6 | clojure.test) 7 | (:require [riemann.index :as index] 8 | [riemann.folds :as folds] 9 | incanter.stats)) 10 | 11 | (use-fixtures :once control-time!) 12 | (use-fixtures :each reset-time!) 13 | 14 | (defmacro run-stream 15 | "Applies inputs to stream, and returns outputs." 16 | [stream inputs] 17 | `(let [out# (ref []) 18 | stream# (~@stream (append out#))] 19 | (doseq [e# ~inputs] (stream# e#)) 20 | (deref out#))) 21 | 22 | (defmacro run-stream-intervals 23 | "Applies a seq of alternating events and intervals (in seconds) between them to stream, returning outputs." 24 | [stream inputs-and-intervals] 25 | `(let [out# (ref []) 26 | stream# (~@stream (append out#)) 27 | start-time# (ref (unix-time)) 28 | next-time# (ref (deref start-time#))] 29 | (doseq [[e# interval#] (partition-all 2 ~inputs-and-intervals)] 30 | (stream# e#) 31 | (when interval# 32 | (dosync (ref-set next-time# (+ (deref next-time#) interval#))) 33 | (advance! (deref next-time#)))) 34 | (let [result# (deref out#)] 35 | ; close stream 36 | (stream# {:state "expired"}) 37 | result#))) 38 | 39 | (defmacro test-stream 40 | "Verifies that the given stream, taking inputs, forwards outputs to children." 41 | [stream inputs outputs] 42 | `(is (= (run-stream ~stream ~inputs) ~outputs))) 43 | 44 | (defmacro test-stream-intervals 45 | "Verifies that run-stream-intervals, taking inputs/intervals, forwards outputs to chldren." 46 | [stream inputs-and-intervals outputs] 47 | `(is (= (run-stream-intervals ~stream ~inputs-and-intervals) ~outputs))) 48 | 49 | (defn evs 50 | "Generate events based on the given event, with given metrics" 51 | [base-event & metrics] 52 | (vec (map #(assoc base-event :metric %) 53 | metrics))) 54 | 55 | (defn em 56 | "Generate events with given metrics" 57 | [& metrics] 58 | (vec (map (fn [m] {:metric m}) metrics))) 59 | 60 | (deftest combine-test 61 | (let [r (ref nil) 62 | sum (combine folds/sum (register r)) 63 | min (combine folds/minimum (register r)) 64 | max (combine folds/maximum (register r)) 65 | mean (combine folds/mean (register r)) 66 | median (combine folds/median (register r)) 67 | events [{:metric 1} 68 | {:metric 0} 69 | {:metric -2}]] 70 | (sum events) 71 | (is (= (deref r) {:metric -1})) 72 | (min events) 73 | (is (= (deref r) {:metric -2})) 74 | (max events) 75 | (is (= (deref r) {:metric 1})) 76 | (mean events) 77 | (is (= (deref r) {:metric -1/3})) 78 | (median events) 79 | (is (= (deref r) {:metric 0})))) 80 | 81 | (deftest smap-test 82 | (test-stream (smap inc) [6 3 -1] [7 4 0])) 83 | 84 | (deftest stream-test 85 | (let [vals1 (atom []) 86 | vals2 (atom []) 87 | add1 #(swap! vals1 conj %) 88 | add2 #(swap! vals2 conj %)] 89 | (run-stream (stream add1 add2) [1 2 3]) 90 | (is (= @vals1 [1 2 3])) 91 | (is (= @vals2 [1 2 3])))) 92 | 93 | (deftest sreduce-test 94 | (testing "explicit value" 95 | (test-stream (sreduce + 1) [1 2 3] [2 4 7])) 96 | 97 | (testing "implicit value" 98 | (test-stream (sreduce +) [1 2 3 4] [3 6 10]))) 99 | 100 | (deftest counter-test 101 | (let [r (ref []) 102 | s (counter (append r)) 103 | events [{:metric 2} 104 | {} 105 | {:metric 1} 106 | {:metric 5} 107 | {:tags ["reset"] :metric -1} 108 | {:metric 2}]] 109 | (doseq [e events] (s e)) 110 | 111 | (is (= (deref r) 112 | [{:metric 2} 113 | {:metric 3} 114 | {:metric 8} 115 | {:tags ["reset"] :metric -1} 116 | {:metric 1}])))) 117 | 118 | (deftest match-test 119 | ; Regular strings. 120 | (test-stream (match :service "foo") 121 | [{} 122 | {:service "bar"} 123 | {:service "foo"}] 124 | [{:service "foo"}]) 125 | 126 | ; Sets 127 | (test-stream (match :metric #{0 2}) 128 | [{} 129 | {:metric 1} 130 | {:metric 2}] 131 | [{:metric 2}]) 132 | 133 | ; Regexen 134 | (test-stream (match :state #"^mi") 135 | [{} 136 | {:state "migas"} 137 | {:state "other breakfast foods"}] 138 | [{:state "migas"}]) 139 | 140 | ; Functions 141 | (test-stream (match identity 2) 142 | [1 2 3] 143 | [2])) 144 | 145 | (deftest tagged-all-test 146 | (test-stream (tagged-all ["kitten" "cat"]) 147 | [{:tags ["kitten" "cat"]} 148 | {:tags ["kitten", "cat", "meow"]} 149 | {:tags ["dog" "cat"]} 150 | {:tags ["cat"]} 151 | {:tags []} 152 | {}] 153 | [{:tags ["kitten" "cat"]} 154 | {:tags ["kitten", "cat", "meow"]}]) 155 | 156 | (test-stream (tagged-all "meow") 157 | [{:tags ["meow" "bark"]} 158 | {:tags ["meow"]} 159 | {:tags ["bark"]} 160 | {}] 161 | [{:tags ["meow" "bark"]} 162 | {:tags ["meow"]}])) 163 | 164 | (deftest tagged-any-test 165 | (test-stream (tagged-any ["kitten" "cat"]) 166 | [{:tags ["kitten" "cat"]} 167 | {:tags ["cat", "dog"]} 168 | {:tags ["kitten"]} 169 | {:tags ["dog"]} 170 | {:tags []} 171 | {}] 172 | [{:tags ["kitten" "cat"]} 173 | {:tags ["cat", "dog"]} 174 | {:tags ["kitten"]}]) 175 | 176 | (test-stream (tagged-all "meow") 177 | [{:tags ["meow" "bark"]} 178 | {:tags ["meow"]} 179 | {:tags ["bark"]} 180 | {}] 181 | [{:tags ["meow" "bark"]} 182 | {:tags ["meow"]}])) 183 | 184 | (deftest split*-test 185 | (test-stream (split* identity) 186 | [true false nil 2] 187 | [true 2]) 188 | 189 | ;; dispatch with default value 190 | (let [sup (fn [threshold] (fn [{:keys [metric]}] (> metric threshold))) 191 | res (atom []) 192 | events [{:metric 15} {:metric 8} {:metric 2}] 193 | expect [{:metric 15 :state :crit} 194 | {:metric 8 :state :warn} 195 | {:metric 2 :state :ok}]] 196 | (doseq [e events] 197 | ((split* (sup 10) (with :state :crit (partial swap! res conj)) 198 | (sup 5) (with :state :warn (partial swap! res conj)) 199 | (with :state :ok (partial swap! res conj))) 200 | e)) 201 | (is (= expect @res))) 202 | 203 | ;; dispatch with no default value 204 | (let [sup (fn [threshold] (fn [{:keys [metric]}] (> metric threshold))) 205 | res (atom []) 206 | events [{:metric 15} {:metric 8} {:metric 2}] 207 | expect [{:metric 15 :state :crit} 208 | {:metric 8 :state :warn}]] 209 | (doseq [e events] 210 | ((split* (sup 10) (with :state :crit (partial swap! res conj)) 211 | (sup 5) (with :state :warn (partial swap! res conj))) 212 | e)) 213 | (is (= expect @res)))) 214 | 215 | (deftest split-test 216 | ;; same test as above, using implicit rewrites 217 | (let [sup (fn [threshold] (fn [{:keys [metric]}] (> metric threshold))) 218 | res (atom []) 219 | events [{:metric 15} {:metric 8} {:metric 2}] 220 | expect [{:metric 15 :state :crit} 221 | {:metric 8 :state :warn} 222 | {:metric 2 :state :ok}]] 223 | (doseq [e events] 224 | ((split (> metric 10) (with :state :crit (partial swap! res conj)) 225 | (> metric 5) (with :state :warn (partial swap! res conj)) 226 | (with :state :ok (partial swap! res conj))) 227 | e)) 228 | (is (= expect @res)))) 229 | 230 | (deftest splitp-test 231 | ;; same test as above, using splitp 232 | (let [sup (fn [threshold] (fn [{:keys [metric]}] (> metric threshold))) 233 | res (atom []) 234 | events [{:metric 15} {:metric 8} {:metric 2}] 235 | expect [{:metric 15 :state :crit} 236 | {:metric 8 :state :warn} 237 | {:metric 2 :state :ok}]] 238 | (doseq [e events] 239 | ((splitp <= metric 240 | 10 (with :state :crit (partial swap! res conj)) 241 | 5 (with :state :warn (partial swap! res conj)) 242 | (with :state :ok (partial swap! res conj))) 243 | e)) 244 | (is (= expect @res)))) 245 | 246 | (deftest where*-test 247 | (test-stream (where* identity) 248 | [true false nil 2] 249 | [true 2]) 250 | 251 | (test-stream (where* expired?) 252 | [{:time -1 :ttl 0.5} 253 | {:time 0 :ttl 1}] 254 | [{:time -1 :ttl 0.5}]) 255 | 256 | ; Complex closure with else clause 257 | (let [good (atom []) 258 | bad (atom []) 259 | s (where* (fn [event] 260 | (or (= "good" (:service event)) 261 | (< 2 (:metric event)))) 262 | (partial swap! good conj) 263 | (else (partial swap! bad conj))) 264 | events [{:service "good" :metric 0} 265 | {:service "bad" :metric 0} 266 | {:metric 1} 267 | {:service "bad" :metric 1} 268 | {:service "bad" :metric 3}]] 269 | 270 | ; Run stream 271 | (doseq [e events] (s e)) 272 | 273 | (is (= @good 274 | [{:service "good" :metric 0} 275 | {:service "bad" :metric 3}])) 276 | (is (= @bad 277 | [{:service "bad" :metric 0} 278 | {:metric 1} 279 | {:service "bad" :metric 1}])))) 280 | 281 | (deftest where-field 282 | (let [r (ref []) 283 | s (where (or (state "ok" "good") 284 | (= "weird" state)) 285 | (fn [e] (dosync (alter r conj e)))) 286 | events [{:state "ok"} 287 | {:state "good"} 288 | {:state "weird"} 289 | {:state "error"}] 290 | expect [{:state "ok"} 291 | {:state "good"} 292 | {:state "weird"}]] 293 | (doseq [e events] (s e)) 294 | (is (= expect (deref r))))) 295 | 296 | (deftest where-regex 297 | (test-stream (where (service #"^foo")) 298 | [{} 299 | {:service "foo"} 300 | {:service "food"}] 301 | [{:service "foo"} 302 | {:service "food"}])) 303 | 304 | (deftest where-variable 305 | ; Verify that the macro allows variables to be used in predicates. 306 | (let [regex #"cat"] 307 | (test-stream (where (service regex)) 308 | [{:service "kitten"} 309 | {:service "cats"}] 310 | [{:service "cats"}]))) 311 | 312 | (deftest where-tagged 313 | (let [r (ref []) 314 | s (where (tagged "foo") (append r)) 315 | events [{} 316 | {:tags []} 317 | {:tags ["blah"]} 318 | {:tags ["foo"]} 319 | {:tags ["foo" "bar"]}]] 320 | (doseq [e events] (s e)) 321 | (is (= (deref r) 322 | [{:tags ["foo"]} {:tags ["foo" "bar"]}])))) 323 | 324 | (deftest where-else 325 | ; Where should take an else clause. 326 | (let [a (atom []) 327 | b (atom [])] 328 | (run-stream 329 | (where (service #"a") 330 | #(swap! a conj (:service %)) 331 | (else #(swap! b conj (:service %)))) 332 | [{:service "cat"} 333 | {:service "dog"} 334 | {:service nil} 335 | {:service "badger"}]) 336 | (is (= @a ["cat" "badger"])) 337 | (is (= @b ["dog" nil])))) 338 | 339 | (deftest where-child-evaluated-once 340 | ; Where should evaluate its children exactly once. 341 | (let [x (atom 0) 342 | s (where true (do (swap! x inc) identity))] 343 | (is (= @x 1)) 344 | (s {:service "test"}) 345 | (is (= @x 1)) 346 | (s {:service "test"}) 347 | (is (= @x 1)))) 348 | 349 | (deftest default-kv 350 | (let [r (ref nil) 351 | s (default :service "foo" (register r))] 352 | (s {:service nil}) 353 | (is (= {:service "foo"} (deref r))) 354 | 355 | (s {:service "foo"}) 356 | (is (= {:service "foo"} (deref r))) 357 | 358 | (s {:service "bar" :test "baz"}) 359 | (is (= {:service "bar" :test "baz"} (deref r))))) 360 | 361 | (deftest default-map 362 | (let [r (ref nil) 363 | s (default {:service "foo" :state nil} (register r))] 364 | (s (event {:service nil})) 365 | (is (= "foo" (:service (deref r)))) 366 | (is (= nil (:state (deref r)))) 367 | 368 | (s (event {:service "foo"})) 369 | (is (= "foo" (:service (deref r)))) 370 | (is (= nil (:state (deref r)))) 371 | 372 | (s (event {:service "bar" :host "baz" :state "evil"})) 373 | (is (= "bar" (:service (deref r)))) 374 | (is (= "baz" (:host (deref r)))) 375 | (is (= "evil" (:state (deref r)))))) 376 | 377 | (deftest with-kv 378 | (let [r (ref nil) 379 | s (with :service "foo" (fn [e] (dosync (ref-set r e))))] 380 | (s {:service nil}) 381 | (is (= {:service "foo"} (deref r))) 382 | 383 | (s {:service "foo"}) 384 | (is (= {:service "foo"} (deref r))) 385 | 386 | (s {:service "bar" :test "baz"}) 387 | (is (= {:service "foo" :test "baz"} (deref r))))) 388 | 389 | (deftest with-map 390 | (let [r (ref nil) 391 | s (with {:service "foo" :state nil} (fn [e] (dosync (ref-set r e))))] 392 | (s (event {:service nil})) 393 | (is (= "foo" (:service (deref r)))) 394 | (is (= nil (:state (deref r)))) 395 | 396 | (s (event {:service "foo"})) 397 | (is (= "foo" (:service (deref r)))) 398 | (is (= nil (:state (deref r)))) 399 | 400 | (s (event {:service "bar" :test "baz" :state "evil"})) 401 | (is (= "foo" (:service (deref r)))) 402 | (is (= nil (:state (deref r)))))) 403 | 404 | (deftest by-single 405 | ; Each test stream keeps track of the first host it sees, and confirms 406 | ; that each subsequent event matches that host. 407 | (let [i (ref 0) 408 | s (by :host 409 | (let [host (ref nil)] 410 | (fn [event] 411 | (dosync 412 | (alter i inc) 413 | (when (nil? (deref host)) 414 | (ref-set host (event :host))) 415 | (is (= (deref host) (event :host))))))) 416 | events (map (fn [h] {:host h}) [:a :a :b :a :c :b])] 417 | (doseq [event events] 418 | (s event)) 419 | (is (= (count events) (deref i))))) 420 | 421 | (deftest by-multiple 422 | ; Each test stream keeps track of the first host/service it sees, and 423 | ; confirms that each subsequent event matches that host. 424 | (let [i (ref 0) 425 | s (by [:host :service] 426 | (let [host (ref nil) 427 | service (ref nil)] 428 | (fn [event] 429 | (dosync 430 | (alter i inc) 431 | 432 | (when (nil? (deref host)) 433 | (ref-set host (event :host))) 434 | (when (nil? (deref service)) 435 | (ref-set service (event :service))) 436 | 437 | (is (= (deref host) (event :host))) 438 | (is (= (deref service) (event :service))))))) 439 | 440 | events (map (fn [h] {:host (first h) 441 | :service (last h)}) 442 | [[1 :a] 443 | [1 :b] 444 | [1 :a] 445 | [2 :a] 446 | [2 :a] 447 | [1 :a] 448 | [2 :a] 449 | [1 :b]])] 450 | (doseq [event events] 451 | (s event)) 452 | (is (= (count events) (deref i))))) 453 | 454 | (deftest by-evaluates-children-once-per-branch 455 | (let [i (atom 0) 456 | s (by :metric (do (swap! i inc) identity))] 457 | (is (= @i 0)) 458 | (s {:metric 1}) 459 | (is (= @i 1)) 460 | (s {:metric 2}) 461 | (is (= @i 2)) 462 | (s {:metric 1}) 463 | (is (= @i 2)))) 464 | 465 | (deftest fill-in-test 466 | (test-stream-intervals 467 | (fill-in 0.01 {:metric 0}) 468 | [] 469 | []) 470 | 471 | ; Quick succession 472 | (is (= (map :metric (run-stream-intervals 473 | (fill-in 0.01 {:metric 0}) 474 | (interpose nil (em 1 2 3)))) 475 | [1 2 3])) 476 | 477 | ; With a gap and expiry 478 | (is (= (map :metric (run-stream-intervals 479 | (fill-in 0.05 {:metric 0}) 480 | [{:metric 1} 0.06 481 | {:metric 2} nil 482 | {:metric 3} 0.08 483 | {:metric 4 :state "expired"} 0.06 484 | {:metric 5}])) 485 | [1 0 2 3 0 4 5])) 486 | ) 487 | 488 | (deftest fill-in-last-test 489 | (test-stream-intervals 490 | (fill-in-last 0.01 {:metric 0}) 491 | [] 492 | []) 493 | 494 | ; Quick succession 495 | (let [output (run-stream-intervals 496 | (fill-in-last 0.01 {:metric 0}) 497 | (interpose nil (evs {:host "foo" :service "bar"} 498 | 1 2 3)))] 499 | (is (= (map :metric output) 500 | [1 2 3])) 501 | (is (= (map :host output) 502 | ["foo" "foo" "foo"])) 503 | (is (= (map :service output) 504 | ["bar" "bar" "bar"]))) 505 | 506 | ; With a gap and expiry 507 | (let [output (run-stream-intervals 508 | (fill-in-last 0.05 {:metric 0}) 509 | [{:host "a" :metric 1} 0.06 510 | {:host "b" :metric 2} nil 511 | {:host "c" :metric 3} 0.08 512 | {:host "d" :metric 4 :state "expired"} 0.06 513 | {:host "e" :metric 5}])] 514 | (is (= (map :metric output) 515 | [1 0 2 3 0 4 5])) 516 | (is (= (map :host output) 517 | ["a" "a" "b" "c" "c" "d" "e"])))) 518 | 519 | (deftest interpolate-constant-test 520 | (test-stream-intervals 521 | (interpolate-constant 0.01) 522 | [] 523 | []) 524 | 525 | ; Should forward a single state 526 | (is (= (map :metric (run-stream-intervals 527 | (interpolate-constant 0.1) 528 | [{:metric 1} 0.05])) 529 | [1])) 530 | 531 | ; Should forward first state and ignore immediate successors 532 | (is (= (map :metric (run-stream-intervals 533 | (interpolate-constant 0.1) 534 | [{:metric 1} 0.05 535 | {:metric 2} nil 536 | {:metric 3}])) 537 | [1])) 538 | 539 | ; Should fill in missing states regularly 540 | (is (= (map :metric (run-stream-intervals 541 | (interpolate-constant 0.1) 542 | (interpose 0.22 (em 1 2 3 4)))) 543 | [1 1 1 2 2 3 3])) 544 | 545 | ; Should forward final "expired" state. 546 | (is (= (map :metric (run-stream-intervals 547 | (interpolate-constant 0.1) 548 | (interpose 0.22 549 | [{:metric 1} 550 | {:metric 2} 551 | {:metric 3} 552 | {:metric 4 :state "expired"}]))) 553 | [1 1 1 2 2 3 3 4])) 554 | 555 | ; Should not fill during expired times. 556 | (is (= (map :metric (run-stream-intervals 557 | (interpolate-constant 0.05) 558 | [{:metric 1 :state "expired"} 559 | 0.12 560 | {:metric 2} 561 | 0.12 562 | {:metric 3 :state "expired"} 563 | 0.12])) 564 | [1 2 2 2 3])) 565 | ) 566 | 567 | (deftest ddt-immediate-test 568 | ; Empty -> empty 569 | (test-stream (ddt) [] []) 570 | ; Ignore stream without metrics 571 | (test-stream (ddt) [{} {} {} {}] []) 572 | ; Do nothing the first time 573 | (test-stream (ddt) [{:metric 1 :time 0}] []) 574 | ; Differentiate 575 | (test-stream (ddt) 576 | [{:metric 0 :time 0} 577 | {:metric 0 :time 1} 578 | {:metric 2 :time 2} 579 | {:metric -4 :time 4}] 580 | [{:metric 0 :time 1} 581 | {:metric 2 :time 2} 582 | {:metric -3 :time 4}])) 583 | 584 | (deftest ddt-interval-test 585 | ; Quick burst without crossing interval 586 | (is (= (map :metric (run-stream-intervals 587 | (ddt 0.1) 588 | [{:metric 1} nil {:metric 2} nil {:metric 3}])) 589 | [])) 590 | 591 | ; 1 event per interval 592 | (let [t0 (unix-time)] 593 | (is (= (map :metric (run-stream-intervals 594 | (ddt 0.1) 595 | [{:metric -1 :time t0} 0.1 596 | {:metric 0 :time (+ 1/10 t0)} 0.1 597 | {:metric -5 :time (+ 2/10 t0)} 0.1])) 598 | [10 -50]))) 599 | 600 | (reset-time!) 601 | 602 | ; n events per interval 603 | (let [t0 (unix-time)] 604 | (is (= (map :metric (run-stream-intervals 605 | (ddt 0.1) 606 | [{:metric -1 :time t0} 0.01 ; counts 607 | {:metric 100 :time (+ 1/20 t0)} 0.05 608 | {:metric 1 :time (+ 2/20 t0)} 0.05 609 | {:metric nil :time (+ 3/20 t0)} 0.05 610 | {:metric -3 :time (+ 4/20 t0)} 0.05])) 611 | [20 -40]))) 612 | ) 613 | 614 | (deftest rate-slow-even 615 | (let [output (ref []) 616 | interval 1 617 | intervals 5 618 | gen-rate 10 619 | total (* gen-rate intervals) 620 | gen-period (/ interval gen-rate) 621 | r (rate interval 622 | (fn [event] (dosync (alter output conj event))))] 623 | 624 | ; Generate events 625 | (dotimes [_ intervals] 626 | (dotimes [_ gen-rate] 627 | (advance! (+ (unix-time) gen-period)) 628 | (r {:metric 1 :time (unix-time)}))) 629 | 630 | ; Give all futures time to complete 631 | (advance! (+ (unix-time) gen-period)) 632 | 633 | ; Verify output states 634 | (let [o (deref output)] 635 | 636 | ; All events recorded 637 | (is (approx-equal total (reduce + (map :metric o)))) 638 | 639 | ; Middle events should have the correct rate. 640 | (is (every? (fn [measured-rate] 641 | (approx-equal gen-rate measured-rate)) 642 | (map :metric (drop 1 (drop-last o))))) 643 | 644 | ; First and last events should be complementary 645 | (let [first-last (+ (:metric (first o)) 646 | (:metric (last o)))] 647 | (is (or (approx-equal (* 2 gen-rate) first-last) 648 | (approx-equal gen-rate first-last)))) 649 | 650 | ))) 651 | 652 | (deftest rate-threaded 653 | (let [output (atom nil) 654 | interval 5/2 655 | total 10000 656 | threads 4 657 | r (rate interval 658 | (fn [event] (dosync (reset! output event)))) 659 | 660 | ; Generate events 661 | workers (map (fn [t] (future 662 | (dotimes [i (/ total threads)] 663 | (r {:metric 1 :time (unix-time)})))) 664 | (range threads))] 665 | 666 | ; Wait for workers 667 | (dorun (map deref workers)) 668 | (advance! interval) 669 | 670 | ; All events recorded 671 | (is (= (/ total interval) (:metric @output))))) 672 | 673 | (deftest fold-interval-test 674 | (test-stream-intervals 675 | (riemann.streams/fold-interval 1 :metric incanter.stats/sd) 676 | [{:metric 2} 0.1 677 | {:metric 4} 0.2 678 | {:metric 2} 0.3 679 | {:metric 4} 1.0 680 | {:metric 100} 0.1 681 | {:metric 100} 1.0] 682 | (em 1.1547005383792515 0.0))) 683 | 684 | (deftest fold-interval-metric-test 685 | (test-stream-intervals 686 | (riemann.streams/fold-interval-metric 1 incanter.stats/sd) 687 | [{:metric 2} 0.1 688 | {:metric 4} 0.2 689 | {:metric 2} 0.3 690 | {:metric 4} 1.0 691 | {:metric 100} 0.1 692 | {:metric 100} 1.0] 693 | (em 1.1547005383792515 0.0))) 694 | 695 | (deftest changed-test 696 | (let [output (ref []) 697 | r (changed :state 698 | (fn [event] (dosync (alter output conj event)))) 699 | r2 (changed :state {:init :ok} 700 | (append output)) 701 | states [:ok :bad :bad :ok :ok :ok :evil :bad]] 702 | 703 | ; Apply states 704 | (doseq [state states] 705 | (r {:state state})) 706 | 707 | ; Check output 708 | (is (= [:ok :bad :ok :evil :bad] 709 | (vec (map (fn [s] (:state s)) (deref output))))) 710 | 711 | ; Test with init 712 | (dosync (ref-set output [])) 713 | (doseq [state states] 714 | (r2 {:state state})) 715 | 716 | (is (= [:bad :ok :evil :bad] 717 | (vec (map (fn [s] (:state s)) (deref output))))))) 718 | 719 | (deftest changed-state-test 720 | ; Each test stream keeps track of the first host/service it sees, and 721 | ; confirms that each subsequent event matches that host, and that 722 | ; each event is different from the previous state. 723 | (let [i (ref 0) 724 | s (changed-state 725 | (let [host (ref nil) 726 | service (ref nil) 727 | state (ref nil)] 728 | (fn [event] 729 | (dosync 730 | (alter i inc) 731 | 732 | (is (not= (deref state) (:state event))) 733 | (ref-set state (:state event)) 734 | 735 | (when (nil? (deref host)) 736 | (ref-set host (event :host))) 737 | (when (nil? (deref service)) 738 | (ref-set service (event :service))) 739 | 740 | (is (= (deref host) (event :host))) 741 | (is (= (deref service) (event :service))))))) 742 | 743 | events [{:host 1 :service 1 :state 1} 744 | {:host 2 :service 1 :state 1} 745 | {:host 1 :service 1 :state 1} 746 | {:host 1 :service 1 :state 2} 747 | {:host 2 :service 1 :state 2} 748 | {:host 2 :service 2 :state 1} 749 | {:host 2 :service 1 :state 1}]] 750 | 751 | (doseq [event events] 752 | (s event)) 753 | (is (= 6 (deref i))))) 754 | (deftest within-test 755 | (let [output (ref []) 756 | r (within [1 2] 757 | (fn [e] (dosync (alter output conj e)))) 758 | metrics [0.5 1 1.5 2 2.5] 759 | expect [1 1.5 2]] 760 | 761 | (doseq [m metrics] (r {:metric m})) 762 | (is (= expect (vec (map (fn [s] (:metric s)) (deref output))))))) 763 | 764 | (deftest without-test 765 | (let [output (ref []) 766 | r (without [1 2] 767 | (fn [e] (dosync (alter output conj e)))) 768 | metrics [0.5 1 1.5 2 2.5] 769 | expect [0.5 2.5]] 770 | 771 | (doseq [m metrics] (r {:metric m})) 772 | (is (= expect (vec (map (fn [s] (:metric s)) (deref output))))))) 773 | 774 | (deftest over-test 775 | (let [output (ref []) 776 | r (over 1.5 777 | (fn [e] (dosync (alter output conj e)))) 778 | metrics [0.5 1 1.5 2 2.5] 779 | expect [2 2.5]] 780 | 781 | (doseq [m metrics] (r {:metric m})) 782 | (is (= expect (vec (map (fn [s] (:metric s)) (deref output))))))) 783 | 784 | (deftest under-test 785 | (let [output (ref []) 786 | r (under 1.5 787 | (fn [e] (dosync (alter output conj e)))) 788 | metrics [0.5 1 1.5 2 2.5] 789 | expect [0.5 1]] 790 | 791 | (doseq [m metrics] (r {:metric m})) 792 | (is (= expect (vec (map (fn [s] (:metric s)) (deref output))))))) 793 | 794 | (deftest update-test 795 | (let [i (index/index) 796 | s (update-index i) 797 | states [{:host 1 :state "ok"} 798 | {:host 2 :state "ok"} 799 | {:host 1 :state "bad"}]] 800 | (doseq [state states] (s state)) 801 | (is (= (set i) 802 | #{{:host 1 :state "bad"} 803 | {:host 2 :state "ok"}})))) 804 | 805 | (deftest delete-from-index-test 806 | (let [i (index/index) 807 | s (update-index i) 808 | d (delete-from-index i) 809 | states [{:host 1 :state "ok"} 810 | {:host 2 :state "ok"} 811 | {:host 1 :state "bad"}]] 812 | (doseq [state states] (s state)) 813 | (doseq [state states] (d state)) 814 | (is (= (vec (seq i)) [])))) 815 | 816 | (deftest ewma-timeless-test 817 | (test-stream (ewma-timeless 0) 818 | (em 1 10 20 -100 4) 819 | (em 0 0 0 0 0)) 820 | (test-stream (ewma-timeless 1) 821 | (em 5 13 1 -10 3) 822 | (em 5 13 1 -10 3)) 823 | (test-stream (ewma-timeless 1/2) 824 | (em 1 1 1 1 1 ) 825 | (em 1/2 3/4 7/8 15/16 31/32))) 826 | 827 | (deftest throttle-test 828 | (let [out (ref []) 829 | quantum 0.1 830 | stream (throttle 5 quantum (append out)) 831 | t1 (unix-time)] 832 | 833 | (doseq [state (take 100000 (repeat {:state "foo"}))] 834 | (stream state)) 835 | 836 | (let [dt (- (unix-time) t1) 837 | slices (inc (quot dt quantum)) 838 | maxcount (* slices 5) 839 | count (count (deref out))] 840 | 841 | ; Depending on whether we fell exactly on the interval boundary... 842 | ; ugh I hate testing this shit 843 | (is (approx-equal count maxcount 0.01)) 844 | (is (zero? (mod count 5)))))) 845 | 846 | (deftest rollup-test 847 | (let [out (ref []) 848 | quantum 0.1 849 | stream (rollup 2 quantum (append out)) 850 | t1 (unix-time)] 851 | 852 | (stream 1) 853 | (is (= (deref out) [[1]])) 854 | (stream 2) 855 | (is (= (deref out) [[1] [2]])) 856 | (stream 3) 857 | (is (= (deref out) [[1] [2]])) 858 | 859 | (advance! (* 1 quantum)) 860 | (is (= (deref out) [[1] [2] [3]])) 861 | 862 | (stream 4) 863 | (is (= (deref out) [[1] [2] [3] [4]])) 864 | (stream 5) 865 | (stream 6) 866 | (stream 7) 867 | (is (= (deref out) [[1] [2] [3] [4]])) 868 | 869 | (advance! (* 2 quantum)) 870 | (is (= (deref out) [[1] [2] [3] [4] [5 6 7]])))) 871 | 872 | (deftest coalesce-test 873 | (let [out (ref []) 874 | s (coalesce (register out)) 875 | a {:service 1 :state "ok" :time (unix-time)} 876 | b {:service 2 :state "ok" :time (unix-time)} 877 | c {:service 1 :state "bad" :time (unix-time)} 878 | d {:service 1 :state "ok" :time (unix-time) :ttl 0.01} 879 | e {:service 3 :state "ok" :time (unix-time)}] 880 | 881 | (s a) 882 | (is (= (set (deref out)) #{a})) 883 | 884 | (s b) 885 | (is (= (set (deref out)) #{a b})) 886 | 887 | (s c) 888 | (is (= (set (deref out)) #{b c})) 889 | 890 | (s d) 891 | (is (= (set (deref out)) #{b d})) 892 | 893 | ; Wait for ttl expiry of d 894 | (advance! 0.02) 895 | 896 | (s e) 897 | (is (= (set (deref out)) #{b e})))) 898 | 899 | (deftest adjust-test 900 | (let [out (ref nil) 901 | s (adjust [:state str " 2"] (register out))] 902 | 903 | (s {}) 904 | (is (= (deref out) {:state " 2"})) 905 | 906 | (s {:state "hey" :service "bar"}) 907 | (is (= (deref out) {:state "hey 2" :service "bar"}))) 908 | 909 | (let [out (ref nil) 910 | s (adjust #(assoc % :metric (count (:tags %))) 911 | (register out))] 912 | 913 | (s {:service "a" :tags []}) 914 | (is (= (deref out) {:service "a" :tags [] :metric 0})) 915 | 916 | (s {:service "a" :tags ["foo" "bar"]}) 917 | (is (= (deref out) {:service "a" :tags ["foo" "bar"] :metric 2})))) 918 | 919 | (deftest moving-event-window-test 920 | ; Zero-width windows. 921 | (test-stream (moving-event-window 0) [] []) 922 | (test-stream (moving-event-window 0) [1 2] [[] []]) 923 | 924 | ; n-width windows 925 | (test-stream (moving-event-window 2) [1 2 3] [[1] [1 2] [2 3]])) 926 | 927 | (deftest fixed-event-window-test 928 | ; Zero-width windows. 929 | (test-stream (fixed-event-window 0) [] []) 930 | (test-stream (fixed-event-window 0) [1 2] []) 931 | 932 | ; n-width windows 933 | (test-stream (fixed-event-window 2) [1] []) 934 | (test-stream (fixed-event-window 2) [1 2] [[1 2]]) 935 | (test-stream (fixed-event-window 2) [1 2 3 4 5] [[1 2] [3 4]])) 936 | 937 | (deftest moving-time-window-test 938 | ; Zero-second windows. 939 | (test-stream (moving-time-window 0) [] []) 940 | (test-stream (moving-time-window 0) [{:time 1} {:time 2}] []) 941 | 942 | ; n-width windows 943 | (test-stream (moving-time-window 2) [] []) 944 | (test-stream (moving-time-window 2) [{:time 1}] [[{:time 1}]]) 945 | (test-stream (moving-time-window 2) 946 | [{:time 1} {:time 2} {:time 3} {:time 4}] 947 | [[{:time 1}] 948 | [{:time 1} {:time 2}] 949 | [{:time 2} {:time 3}] 950 | [{:time 3} {:time 4}]]) 951 | 952 | ; With out-of-order events 953 | (test-stream (moving-time-window 2) 954 | [{:time 5} 955 | {:time 1} 956 | {:time 2} 957 | {:time 6} 958 | {:time 3} 959 | {:time 8} 960 | {:time 4} 961 | {:time 8} 962 | {:time 5} 963 | {:time 9}] 964 | [[{:time 5}] 965 | [{:time 5} {:time 6}] 966 | [{:time 8}] 967 | [{:time 8} {:time 8}] 968 | [{:time 8} {:time 8} {:time 9}]])) 969 | 970 | (deftest fixed-time-window-test 971 | ; Zero-time windows. 972 | (is (thrown? IllegalArgumentException (fixed-time-window 0))) 973 | 974 | ; n-width windows 975 | (test-stream (fixed-time-window 2) [] []) 976 | (test-stream (fixed-time-window 2) [{:time 1}] []) 977 | (test-stream (fixed-time-window 2) 978 | [{:time 1} {:time 2} {:time 3} {:time 4} {:time 5}] 979 | [[{:time 1} {:time 2}] 980 | [{:time 3} {:time 4}]]) 981 | 982 | ; With a gap 983 | (test-stream (fixed-time-window 2) [{:time 1} {:time 7}] 984 | [[{:time 1}] [] []]) 985 | 986 | ; With out-of-order events 987 | (test-stream (fixed-time-window 2) 988 | [{:time 5} 989 | {:time 1} 990 | {:time 2} 991 | {:time 6} 992 | {:time 3} 993 | {:time 8} 994 | {:time 4} 995 | {:time 8} 996 | {:time 5} 997 | {:time 9} 998 | {:time 11}] 999 | [[{:time 5} {:time 6}] 1000 | [{:time 8} {:time 8}] 1001 | [{:time 9}]])) 1002 | --------------------------------------------------------------------------------