├── .gitignore ├── LICENSE ├── project.clj ├── resources └── config.edn ├── README.md └── src └── braid_onyx ├── core.clj └── jobs └── datomic.clj /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | /data 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | onyx.log* 12 | hs_err_pid* 13 | .#* 14 | .DS_Store 15 | *.jfr 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Distributed Masonry LLC 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject braid-onyx "0.1.0-SNAPSHOT" 2 | :description "" 3 | :url "" 4 | :license {:name "" 5 | :url ""} 6 | :dependencies [[aero "1.0.0-beta2"] 7 | [org.clojure/clojure "1.8.0"] 8 | [org.clojure/tools.cli "0.3.5"] 9 | [org.onyxplatform/onyx "0.9.10-beta1" :exclusions [prismatic/schema commons-codec commons-logging 10 | org.clojure/tools.reader]] 11 | [org.onyxplatform/lib-onyx "0.9.7.1" :exclusions [commons-codec]] 12 | [com.datomic/datomic-pro "0.9.5201" :exclusions [joda-time commons-codec 13 | org.apache.httpcomponents/httpcore 14 | org.apache.httpcomponents/httpclient]] 15 | [org.onyxplatform/onyx-datomic "0.9.10.0-beta1" :exclusions [org.slf4j/slf4j-api commons-codec]] 16 | [org.onyxplatform/onyx-elasticsearch "0.9.10.0-beta1" :exclusions [org.slf4j/slf4j-api]] 17 | [org.postgresql/postgresql "9.3-1103-jdbc4"] 18 | ] 19 | :source-paths ["src"] 20 | 21 | :main braid-onyx.core 22 | 23 | :repositories {"my.datomic.com" {:url "https://my.datomic.com/repo" 24 | :creds :gpg}} 25 | 26 | :profiles {:dev {:jvm-opts ["-XX:-OmitStackTraceInFastThrow"] 27 | :global-vars {*assert* true}} 28 | :dependencies [[org.clojure/tools.namespace "0.2.11"] 29 | [lein-project-version "0.1.0"]] 30 | :uberjar {:aot [lib-onyx.media-driver 31 | braid-onyx.core] 32 | :uberjar-name "peer.jar" 33 | :global-vars {*assert* false}}}) 34 | -------------------------------------------------------------------------------- /resources/config.edn: -------------------------------------------------------------------------------- 1 | {:env-config 2 | {:onyx/tenancy-id #profile {:default "1" 3 | :docker #env ONYX_ID} 4 | :onyx.bookkeeper/server? true 5 | :onyx.bookkeeper/local-quorum? true 6 | :onyx.bookkeeper/delete-server-data? true 7 | :onyx.bookkeeper/local-quorum-ports [3196 3197 3198] 8 | :onyx.bookkeeper/port 3196 9 | :zookeeper/address #profile {:default "127.0.0.1:2188" 10 | :docker "zookeeper:2181"} 11 | :zookeeper/server? #profile {:default true 12 | :docker false} 13 | :zookeeper.server/port 2188 14 | :onyx.log/config #profile {:default nil 15 | :docker {:level :info}}} 16 | 17 | :peer-config 18 | {:onyx/tenancy-id #profile {:default "1" 19 | :docker #env ONYX_ID} 20 | :zookeeper/address #profile {:default "127.0.0.1:2188" 21 | :docker "zookeeper:2181"} 22 | :onyx.peer/job-scheduler :onyx.job-scheduler/balanced 23 | :onyx.peer/zookeeper-timeout 60000 24 | :onyx.messaging/allow-short-circuit? #profile {:default false 25 | :docker true} 26 | :onyx.messaging/impl :aeron 27 | :onyx.messaging/bind-addr #or [#env BIND_ADDR "localhost"] 28 | :onyx.messaging/peer-port 40200 29 | :onyx.messaging.aeron/embedded-driver? #profile {:default true 30 | :docker false} 31 | :onyx.log/config #profile {:default nil 32 | :docker {:level :info}}} 33 | 34 | :datomic-config 35 | {:datomic/db-uri #or [#env DATOMIC_URI 36 | "datomic:dev://localhost:4334/chat-dev"] 37 | :elasticsearch/host #or [#env ELASTICSEARCH_HOST 38 | "localhost"] 39 | :elasticsearch/port #or [#env ELASTICSEARCH_PORT 40 | 9200]}} 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # braid-onyx 2 | 3 | An work-in-progress Onyx application to do useful things for Braid. 4 | Currently, its purpose is to monitor the Datomic log and send messages content to ElasticSearch to facilitate better search for Braid. 5 | 6 | This is still a work-in-progress, instructions on use will be added when things are actually working. 7 | 8 | # Setting Up 9 | 10 | ## Elasticsearch 11 | 12 | The following instructions assume Ubuntu 16.04 13 | 14 | - [install java](https://www.digitalocean.com/community/tutorials/how-to-install-java-on-ubuntu-with-apt-get) 15 | 16 | - install elasticsearch 2.x 17 | 18 | ``` 19 | wget -qO - https://packages.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add - 20 | echo "deb https://packages.elastic.co/elasticsearch/2.x/debian stable main" | sudo tee -a /etc/apt/sources.list.d/elasticsearch-2.x.list 21 | sudo apt-get update 22 | sudo apt-get install elasticsearch 23 | ``` 24 | 25 | - optionally, edit `/etc/elasticsearch/elasticsearch.yml` (e.g. in prod, probably should change `cluster.name`) 26 | 27 | - start elasticsearch running 28 | 29 | ``` 30 | sudo systemctl enable elasticsearch.service 31 | ``` 32 | 33 | ## Running Onyx 34 | 35 | 36 | lein run start-peers 10 37 | lein run submit-job datomic-job 38 | 39 | # Prod Deployments 40 | 41 | ## Elasticsearch in Prod 42 | 43 | If elasticsearch is running on a different host, you'll need to set up the 44 | firewall to allow that. 45 | 46 | 1. Make elasticsearch bind to the network interface by setting `http.host: _eth0_` in elasticsearch.yml 47 | 2. Allow connections from the braid app server: `sudo ufw allow from $BRAID_IP to any port 9200` 48 | 49 | ## Connecting to prod datomic 50 | 51 | If the datomic is running on another machine, you'll need to make sure the 52 | interface is available. If Datomic is using Postgresql as the backing store, 53 | you'll need to do the following: 54 | 55 | 1. Edit `/etc/postgresql/9.5/main/postgresql.conf` and change `listen_address` from `localhost` to `*` 56 | 2. Allow the datomic user to connect over TCP: Edit `/etc/postgresql/9.5/main/pg_hba.conf` and add the following line: ` 57 | host datomic all 159.203.33.218/32 md5` 58 | 3. Allow connections from the onyx server to postgres: `sudo ufw allow from $ONYX_IP to any port 5432` 59 | 4. Edit the datomic config file to set `host` to `0.0.0.0` and `alt-host` to the public IP of the braid server 60 | 5. Allow connections from the onyx server to datomic: `sudo ufw allow from $ONYX_IP to any port 4334` 61 | -------------------------------------------------------------------------------- /src/braid_onyx/core.clj: -------------------------------------------------------------------------------- 1 | (ns braid-onyx.core 2 | (:gen-class) 3 | (:require 4 | [aero.core :refer [read-config]] 5 | [braid-onyx.jobs.datomic] 6 | [clojure.java.io :as io] 7 | [clojure.tools.cli :refer [parse-opts]] 8 | [lib-onyx.peer :as peer] 9 | [onyx.api] 10 | [onyx.job] 11 | [onyx.test-helper])) 12 | 13 | (defn file-exists? 14 | "Check both the file system and the resources/ directory 15 | on the classpath for the existence of a file" 16 | [file] 17 | (let [f (clojure.string/trim file) 18 | classf (io/resource file) 19 | relf (when (.exists (io/as-file f)) (io/as-file f))] 20 | (or classf relf))) 21 | 22 | (defn cli-options [] 23 | [["-c" "--config FILE" "Aero/EDN config file" 24 | :default (io/resource "config.edn") 25 | :default-desc "resources/config.edn" 26 | :parse-fn file-exists? 27 | :validate [identity "File does not exist relative to the workdir or on the classpath" 28 | read-config "Not a valid Aero or EDN file"]] 29 | 30 | ["-p" "--profile PROFILE" "Aero profile" 31 | :parse-fn (fn [profile] (clojure.edn/read-string (clojure.string/trim profile)))] 32 | 33 | ["-h" "--help"]]) 34 | 35 | (defn usage [options-summary] 36 | (->> ["Onyx Peer and Job Launcher" 37 | "" 38 | "Usage: [options] action [arg]" 39 | "" 40 | "Options:" 41 | options-summary 42 | "" 43 | "Actions:" 44 | " start-peers [npeers] Start Onyx peers." 45 | " submit-job [job-name] Submit a registered job to an Onyx cluster." 46 | ""] 47 | (clojure.string/join \newline))) 48 | 49 | (defn error-msg [errors] 50 | (str "The following errors occurred while parsing your command:\n\n" 51 | (clojure.string/join \newline errors))) 52 | 53 | (defn exit [status msg] 54 | (println msg) 55 | (System/exit status)) 56 | 57 | (defn assert-job-exists [job-name] 58 | (let [jobs (methods onyx.job/register-job)] 59 | (when-not (contains? jobs job-name) 60 | (exit 1 (error-msg (into [(str "There is no job registered under the name " job-name "\n") 61 | "Available jobs: "] (keys jobs))))))) 62 | 63 | (defn -main [& args] 64 | (let [{:keys [options arguments errors summary] :as pargs} (parse-opts args (cli-options)) 65 | action (first args) 66 | argument (clojure.edn/read-string (second args))] 67 | (cond (:help options) (exit 0 (usage summary)) 68 | (not= (count arguments) 2) (exit 1 (usage summary)) 69 | errors (exit 1 (error-msg errors))) 70 | (case action 71 | "start-peers" (let [{:keys [env-config peer-config] :as config} 72 | (read-config (:config options) {:profile (:profile options)})] 73 | (peer/start-peer argument peer-config env-config)) 74 | 75 | "submit-job" (let [{:keys [peer-config] :as config} 76 | (read-config (:config options) {:profile (:profile options)}) 77 | job-name (if (keyword? argument) argument (str argument))] 78 | (assert-job-exists job-name) 79 | (let [job-id (:job-id 80 | (onyx.api/submit-job peer-config 81 | (onyx.job/register-job job-name config)))] 82 | (println "Successfully submitted job: " job-id) 83 | (println "Blocking on job completion...") 84 | (onyx.test-helper/feedback-exception! peer-config job-id) 85 | (exit 0 "Job Completed")))))) 86 | -------------------------------------------------------------------------------- /src/braid_onyx/jobs/datomic.clj: -------------------------------------------------------------------------------- 1 | (ns braid-onyx.jobs.datomic 2 | (:require 3 | [datomic.api :as d] 4 | [onyx.job :refer [add-task register-job]] 5 | [onyx.plugin.datomic] 6 | [onyx.plugin.elasticsearch] 7 | [onyx.tasks.datomic :as datomic-task])) 8 | 9 | ;; Workflow 10 | 11 | (def workflow 12 | [[:read-log :split-txns] 13 | [:split-txns :process-for-es] 14 | [:process-for-es :write-messages]]) 15 | 16 | ;; Catalog entries 17 | 18 | (defn build-catalog 19 | ([config] (build-catalog config 5 50)) 20 | ([config batch-size batch-timeout] 21 | [{:onyx/name :write-messages 22 | :onyx/plugin :onyx.plugin.elasticsearch/write-messages 23 | :onyx/type :output 24 | :onyx/medium :elasticsearch 25 | :elasticsearch/host (config :elasticsearch/host) 26 | :elasticsearch/port (config :elasticsearch/port) 27 | ;:elasticsearch/cluster-name "my-cluster-name" 28 | :elasticsearch/client-type :http 29 | ;:elasticsearch/http-ops {:basic-auth ["user" "pass"]} 30 | :elasticsearch/index "braid-messages" 31 | :elasticsearch/mapping "messages-mapping" 32 | ;:elasticsearch/doc-id "my-id" 33 | :elasticsearch/write-type :insert 34 | :onyx/batch-size batch-size 35 | :onyx/doc "Writes documents to elasticsearch"} 36 | 37 | {:onyx/name :split-txns 38 | :onyx/fn ::split-txns 39 | :onyx/type :function 40 | :onyx/batch-size batch-size 41 | :onyx/batch-timeout batch-timeout} 42 | 43 | {:onyx/name :process-for-es 44 | :onyx/fn ::process-for-es 45 | :onyx/type :function 46 | :onyx/batch-size batch-size 47 | :onyx/batch-timeout batch-timeout}])) 48 | 49 | (defn process-for-es 50 | [{[eid attr v t insert?] :txn :as segment}] 51 | {:elasticsearch/message {:content v} 52 | :elasticsearch/doc-id (str eid)}) 53 | 54 | (defn split-txns 55 | [{:keys [id data t] :as segment}] 56 | (map (fn [d] {:txn d}) data)) 57 | 58 | ;; Lifecycles 59 | 60 | (defn build-lifecycles 61 | [] 62 | [{:lifecycle/task :write-messages 63 | :lifecycle/calls :onyx.plugin.elasticsearch/write-messages-calls}]) 64 | 65 | ;; flow conditions 66 | 67 | (def attribute-id 68 | (memoize (fn [db-uri attr] 69 | (-> (d/pull (d/db (d/connect db-uri)) [:db/id] [:db/ident attr]) 70 | :db/id)))) 71 | 72 | (defn message? 73 | [event {[eid attr v t insert?] :txn :as old-segment} new-segment all-new-segments db-uri] 74 | (and insert? (= (attribute-id db-uri :message/content) attr))) 75 | 76 | (defn build-flow-conditions 77 | [db-uri] 78 | [{:flow/from :process-for-es 79 | :flow/to [:write-messages] 80 | ::db-uri db-uri 81 | :flow/predicate [::message? ::db-uri]}]) 82 | 83 | ;; the job, proper 84 | 85 | (defn datomic-job 86 | [{:keys [onyx/batch-size onyx/batch-timeout] :as batch-settings} 87 | {db-uri :datomic/db-uri :as datomic-config}] 88 | (let [job {:workflow workflow 89 | :catalog (build-catalog datomic-config batch-size batch-timeout) 90 | :lifecycles (build-lifecycles) 91 | :windows [] 92 | :triggers [] 93 | :flow-conditions (build-flow-conditions db-uri) 94 | :task-scheduler :onyx.task-scheduler/balanced}] 95 | (-> job 96 | (add-task (datomic-task/read-log :read-log 97 | (merge {:datomic/uri db-uri 98 | :checkpoint/key "checkpoint" 99 | :checkpoint/force-reset? false 100 | :onyx/max-peers 1} 101 | batch-settings)))))) 102 | 103 | (defmethod register-job "datomic-job" 104 | [job-name {:keys [datomic-config] :as config}] 105 | (println "Starting datomic job for db" datomic-config) 106 | (let [batch-settings {:onyx/batch-size 1 :onyx/batch-timeout 1000}] 107 | (datomic-job batch-settings datomic-config))) 108 | --------------------------------------------------------------------------------