├── .env ├── Procfile ├── .gitignore ├── demo ├── .gitignore ├── Procfile ├── dev │ └── user.clj ├── project.clj ├── README.md └── src │ └── demo │ └── core.clj ├── etc ├── config.edn ├── config.edn.example ├── logback.xml └── logback.xml.example ├── src └── uswitch │ └── blueshift │ ├── util.clj │ ├── system.clj │ ├── main.clj │ ├── telemetry.clj │ ├── redshift.clj │ └── s3.clj ├── dev └── user.clj ├── project.clj ├── README.md └── LICENSE /.env: -------------------------------------------------------------------------------- 1 | S3_DATABASE_EXPORT_BUCKET= 2 | S3_DATABASE_EXPORT_ID= 3 | S3_DATABASE_EXPORT_KEY= 4 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | watcher: lein run -- --s3id $S3_DATABASE_EXPORT_ID --s3key $S3_DATABASE_EXPORT_KEY --s3bucket $S3_DATABASE_EXPORT_BUCKET 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | -------------------------------------------------------------------------------- /demo/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | jdbc-url.txt 11 | -------------------------------------------------------------------------------- /demo/Procfile: -------------------------------------------------------------------------------- 1 | blueshift: sh -c 'cd .. && exec lein run -- --config ./etc/config.edn' 2 | demo: lein run -- --config ../etc/config.edn --jdbc-url ./jdbc-url.txt 3 | -------------------------------------------------------------------------------- /etc/config.edn: -------------------------------------------------------------------------------- 1 | {:s3 {:credentials {:access-key "" 2 | :secret-key ""} 3 | :bucket "blueshift-data" 4 | :key-pattern ".*" 5 | :poll-interval {:seconds 30}} 6 | } 7 | -------------------------------------------------------------------------------- /etc/config.edn.example: -------------------------------------------------------------------------------- 1 | {:s3 {:credentials {:access-key "" 2 | :secret-key ""} 3 | :bucket "blueshift-data" 4 | :key-pattern ".*" 5 | :poll-interval {:seconds 30}} 6 | :telemetry {:reporters [uswitch.blueshift.telemetry/log-metrics-reporter]}} 7 | -------------------------------------------------------------------------------- /src/uswitch/blueshift/util.clj: -------------------------------------------------------------------------------- 1 | (ns uswitch.blueshift.util 2 | (:require [clojure.core.async :refer (close!)])) 3 | 4 | (defn clear-keys 5 | "dissoc for components records. assoc's nil for the specified keys" 6 | [m & ks] 7 | (apply assoc m (interleave ks (repeat (count ks) nil)))) 8 | 9 | (defn close-channels [state & ks] 10 | (doseq [k ks] 11 | (when-let [ch (get state k)] 12 | (close! ch))) 13 | (apply clear-keys state ks)) 14 | -------------------------------------------------------------------------------- /etc/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | [%d] %-5level %logger{36} - %msg%n%ex{full} 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /etc/logback.xml.example: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | [%d] %-5level %logger{36} - %msg%n%ex{full} 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/uswitch/blueshift/system.clj: -------------------------------------------------------------------------------- 1 | (ns uswitch.blueshift.system 2 | (:require [uswitch.blueshift.s3 :refer (s3-system)] 3 | [uswitch.blueshift.telemetry :refer (telemetry-system)] 4 | [com.stuartsierra.component :refer (system-map using Lifecycle start)] 5 | [clojure.core.async :refer (chan close!)]) 6 | (:import [clojure.core.async.impl.channels ManyToManyChannel])) 7 | 8 | (defn build-system [config] 9 | (system-map :s3-system (s3-system config) 10 | :telemetry-system (telemetry-system config))) 11 | -------------------------------------------------------------------------------- /dev/user.clj: -------------------------------------------------------------------------------- 1 | (ns user 2 | (:require [uswitch.blueshift.system :refer (build-system)] 3 | [clojure.tools.namespace.repl :refer (refresh)] 4 | [com.stuartsierra.component :as component])) 5 | 6 | (def system nil) 7 | 8 | (defn init [] 9 | (alter-var-root #'system 10 | (constantly (build-system (read-string (slurp "./etc/config.edn")))))) 11 | 12 | (defn start [] 13 | (alter-var-root #'system component/start)) 14 | 15 | (defn stop [] 16 | (alter-var-root #'system (fn [s] (when s (component/stop s))))) 17 | 18 | (defn go [] 19 | (init) 20 | (start)) 21 | 22 | (defn reset [] 23 | (stop) 24 | (refresh :after 'user/go)) 25 | -------------------------------------------------------------------------------- /demo/dev/user.clj: -------------------------------------------------------------------------------- 1 | (ns user 2 | (:require [demo.core :refer (build-system main)] 3 | [clojure.tools.namespace.repl :refer (refresh)] 4 | [com.stuartsierra.component :as component])) 5 | 6 | (def system nil) 7 | 8 | (defn jdbc-url [] 9 | (or (get (System/getenv) "JDBC_URL") 10 | (clojure.string/trim (slurp "jdbc-url.txt")))) 11 | 12 | (defn init [] 13 | (alter-var-root #'system 14 | (constantly (main (read-string (slurp "../etc/config.edn")) 15 | (jdbc-url))))) 16 | 17 | (defn start [] 18 | (alter-var-root #'system component/start)) 19 | 20 | (defn stop [] 21 | (alter-var-root #'system (fn [s] (when s (component/stop s))))) 22 | 23 | (defn go [] 24 | (init) 25 | (start)) 26 | 27 | (defn reset [] 28 | (stop) 29 | (refresh :after 'user/go)) 30 | -------------------------------------------------------------------------------- /demo/project.clj: -------------------------------------------------------------------------------- 1 | (defproject demo "0.1.0-SNAPSHOT" 2 | :dependencies [[org.clojure/clojure "1.6.0"] 3 | [org.clojure/tools.logging "0.2.6"] 4 | [com.stuartsierra/component "0.2.1"] 5 | [org.clojure/core.async "0.1.303.0-886421-alpha"] 6 | [org.clojure/tools.cli "0.3.1"] 7 | [clj-aws-s3 "0.3.9" :exclusions [commons-logging commons-codec joda-time]] 8 | [joda-time "2.6"] 9 | [commons-codec "1.3"] 10 | [org.slf4j/jcl-over-slf4j "1.7.7"] 11 | [cheshire "5.3.1"] 12 | [postgresql "8.0-318.jdbc3"] 13 | [org.slf4j/slf4j-simple "1.7.7"] 14 | [org.clojure/tools.namespace "0.2.3"]] 15 | :jvm-opts ["-Dorg.slf4j.simpleLogger.defaultLogLevel=debug" 16 | "-Dorg.slf4j.simpleLogger.log.org.apache.http=info" 17 | "-Dorg.slf4j.simpleLogger.log.com.amazonaws=info" 18 | "-Dorg.slf4j.simpleLogger.log.com.codahale=debug"] 19 | :profiles {:dev {:source-paths ["./dev"]}} 20 | :main demo.core) 21 | -------------------------------------------------------------------------------- /demo/README.md: -------------------------------------------------------------------------------- 1 | # Blueshift demo 2 | 3 | This demo program will periodically upload new `tsv`-files to a S3 4 | bucket for Blueshift to upload. The program will ensure that a 5 | `manifest.edn` file is present and that a `demo` table is present in 6 | Redshift. 7 | 8 | The `demo` table contains three columns: 9 | 10 | - `uuid` A unique identifer for the row 11 | - `key` The name of the `tsv`-file corresponsing to the row 12 | - `timestamp` A timestamp for when the `tsv`-file was created. 13 | 14 | There's only one line in each `tsv`-file even though Blueshift support 15 | multiple lines when loading files into tables. 16 | 17 | The demo will periodically monitor the S3 bucket to listen for file 18 | changes, and it will monitor the `demo` table in Redshift to look for 19 | new timestamps. 20 | 21 | ## Usage 22 | 23 | The demo is configured using the same configuration file as 24 | Blueshift. The demo also needs to know the JDBC-URL for Redshift. The 25 | URL should be stored in a `txt`-file. 26 | 27 | There is a `Procfile` that assumes `../etc/config.edn` exists, and that 28 | there is a file named `./jdbc-url.txt` with content on the form 29 | `jdbc:postgresql://...`. The Procfile will start the demo and a 30 | Blueshift process when run using: 31 | 32 | foreman start 33 | -------------------------------------------------------------------------------- /src/uswitch/blueshift/main.clj: -------------------------------------------------------------------------------- 1 | (ns uswitch.blueshift.main 2 | (:require [clojure.tools.logging :refer (info)] 3 | [clojure.tools.cli :refer (parse-opts)] 4 | [uswitch.blueshift.system :refer (build-system)] 5 | [com.stuartsierra.component :refer (start stop)]) 6 | (:gen-class)) 7 | 8 | (def cli-options 9 | [["-c" "--config CONFIG" "Path to EDN configuration file" 10 | :default "./etc/config.edn" 11 | :validate [string?]] 12 | ["-i" "--s3id ID" "S3 ID" 13 | :default nil] 14 | ["-k" "--s3key KEY" "S3 KEY" 15 | :default nil] 16 | ["-b" "--s3bucket BUCKET" "S3 BUCKET" 17 | :default nil] 18 | ["-h" "--help"]]) 19 | 20 | (defn wait! [] 21 | (let [s (java.util.concurrent.Semaphore. 0)] 22 | (.acquire s))) 23 | 24 | (defn -main [& args] 25 | (let [{:keys [options summary]} (parse-opts args cli-options)] 26 | (when (:help options) 27 | (println summary) 28 | (System/exit 0)) 29 | (let [{:keys [config s3id s3key s3bucket]} options] 30 | (info "Starting Blueshift with configuration" config) 31 | (def config-options (read-string (slurp config))) 32 | (def merged (merge-with merge 33 | config-options 34 | {:s3 {:credentials {:access-key s3id :secret-key s3key} :bucket s3bucket}})) 35 | (let [system (build-system merged)] 36 | (start system) 37 | (wait!))))) 38 | -------------------------------------------------------------------------------- /src/uswitch/blueshift/telemetry.clj: -------------------------------------------------------------------------------- 1 | (ns uswitch.blueshift.telemetry 2 | (:require [com.stuartsierra.component :refer (system-map Lifecycle start stop)] 3 | [metrics.core :refer (default-registry)] 4 | [clojure.tools.logging :refer (info *logger-factory* debug)] 5 | [clojure.tools.logging.impl :refer (get-logger)] 6 | [clojure.string :refer (split)] 7 | [uswitch.blueshift.util :refer (clear-keys)]) 8 | (:import [java.util.concurrent TimeUnit] 9 | [com.codahale.metrics Slf4jReporter])) 10 | 11 | (defrecord LogMetricsReporter [registry] 12 | Lifecycle 13 | (start [this] 14 | (let [reporter (.build (doto (Slf4jReporter/forRegistry registry) 15 | (.outputTo (get-logger *logger-factory* *ns*)) 16 | (.convertRatesTo TimeUnit/SECONDS) 17 | (.convertDurationsTo TimeUnit/MILLISECONDS)))] 18 | (info "Starting Slf4j metrics reporter") 19 | (.start reporter 1 TimeUnit/MINUTES) 20 | (assoc this :reporter reporter))) 21 | (stop [this] 22 | (when-let [reporter (:reporter this)] 23 | (info "Stopping Slf4j metrics reporter") 24 | (.stop reporter)) 25 | (clear-keys this :reporter))) 26 | 27 | (defn log-metrics-reporter [config registry] 28 | (map->LogMetricsReporter {:registry registry})) 29 | 30 | (defn- load-reporter [config sym] 31 | (info "Loading reporter" sym "for registry" default-registry) 32 | (require (symbol (namespace sym))) 33 | (let [sysfn (var-get (find-var sym))] 34 | (sysfn config default-registry))) 35 | 36 | (defn telemetry-system [config] 37 | (let [configured-reporters (-> config :telemetry :reporters)] 38 | (reduce (fn [system reporter] 39 | (assoc system (keyword (str reporter)) (load-reporter config reporter))) 40 | (system-map) 41 | configured-reporters))) 42 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject blueshift "0.1.0-SNAPSHOT" 2 | :description "Automate importing S3 data into Amazon Redshift" 3 | :url "https://github.com/uswitch/blueshift" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.6.0"] 7 | [org.clojure/tools.logging "0.2.6"] 8 | [com.stuartsierra/component "0.2.1"] 9 | [org.clojure/core.async "0.1.303.0-886421-alpha"] 10 | [org.clojure/tools.cli "0.3.1"] 11 | [clj-aws-s3 "0.3.9" :exclusions [commons-logging commons-codec joda-time]] 12 | [joda-time "2.6"] 13 | [commons-codec "1.3"] 14 | [org.slf4j/jcl-over-slf4j "1.7.7"] 15 | [cheshire "5.3.1"] 16 | [postgresql "8.0-318.jdbc3"] 17 | [prismatic/schema "0.2.2"] 18 | [metrics-clojure "2.0.2"] 19 | [com.codahale.metrics/metrics-jvm "3.0.2"] 20 | [org.slf4j/slf4j-simple "1.7.7"]] 21 | :profiles {:dev {:dependencies [[org.slf4j/slf4j-simple "1.7.7"] 22 | [org.clojure/tools.namespace "0.2.3"]] 23 | :source-paths ["./dev"] 24 | :jvm-opts ["-Dorg.slf4j.simpleLogger.defaultLogLevel=debug" 25 | "-Dorg.slf4j.simpleLogger.log.org.apache.http=info" 26 | "-Dorg.slf4j.simpleLogger.log.com.amazonaws=info" 27 | "-Dorg.slf4j.simpleLogger.log.com.codahale=debug"] 28 | :resource-paths ["/Users/paul/Work/uswitch/blueshift-riemann-metrics/target/blueshift-riemann-metrics-0.1.0-SNAPSHOT-standalone.jar"]} 29 | :uberjar {:aot [uswitch.blueshift.main] 30 | :dependencies [[ch.qos.logback/logback-classic "1.1.2"]]}} 31 | :main uswitch.blueshift.main) 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Blueshift 2 | 3 | Service to watch Amazon S3 and automate the load into Amazon Redshift. 4 | 5 | ![Gravitational Blueshift](http://upload.wikimedia.org/wikipedia/commons/5/5c/Gravitional_well.jpg) ([Image used under CC Attribution Share-Alike License](http://en.wikipedia.org/wiki/File:Gravitional_well.jpg)). 6 | 7 | ## Rationale 8 | 9 | [Amazon Redshift](https://aws.amazon.com/redshift/) is a "a fast, fully managed, petabyte-scale data warehouse service" but importing data into it can be a bit tricky: e.g. if you want upsert behaviour you have to [implement it yourself with temporary tables](http://docs.aws.amazon.com/redshift/latest/dg/t_updating-inserting-using-staging-tables-.html), and we've had problems [importing across machines into the same tables](https://forums.aws.amazon.com/message.jspa?messageID=443795). Redshift also performs best when bulk importing lots of large files from S3. 10 | 11 | [Blueshift](https://github.com/uswitch/blueshift) is a little service(tm) that makes it easy to automate the loading of data from Amazon S3 and into Amazon Redshift. It will periodically check for data files within a designated bucket and, when new files are found, import them. It provides upsert behaviour by default. 12 | 13 | Importing to Redshift now requires just the ability to write files to S3. 14 | 15 | ## Using 16 | 17 | ### Configuring 18 | 19 | Blueshift requires minimal configuration. It will only monitor a single S3 bucket currently, so the configuration file (ordinarily stored in `./etc/config.edn`) looks like this: 20 | 21 | ```clojure 22 | {:s3 {:credentials {:access-key "" 23 | :secret-key ""} 24 | :bucket "blueshift-data" 25 | :key-pattern ".*" 26 | :poll-interval {:seconds 30}} 27 | :telemetry {:reporters [uswitch.blueshift.telemetry/log-metrics-reporter]}} 28 | ``` 29 | 30 | The S3 credentials are shared by Blueshift for watching for new files and for [Redshift's `COPY` command](http://docs.aws.amazon.com/redshift/latest/dg/t_loading-tables-from-s3.html). The `:key-pattern` option is used to filter for specific keys (so you can have a single bucket with data from different environments, systems etc.). 31 | 32 | ### Building & Running 33 | 34 | The application is written in [Clojure](http://clojure.org), to build the project you'll need to use [Leiningen](https://github.com/technomancy/leiningen). 35 | 36 | If you want to run the application on your computer you can run it directly with Leiningen (providing the path to your configuration file) 37 | 38 | $ lein run -- --config ./etc/config.edn 39 | 40 | Alternatively, you can build an Uberjar that you can run: 41 | 42 | $ lein uberjar 43 | $ java -Dlogback.configurationFile=./etc/logback.xml -jar target/blueshift-0.1.0-standalone.jar --config ./etc/config.edn 44 | 45 | The uberjar includes [Logback](http://logback.qos.ch/) for logging. `./etc/logback.xml.example` provides a simple starter configuration file with a console appender. 46 | 47 | ## Using 48 | 49 | Once the service is running you can create any number of directories in the S3 bucket. These will be periodically checked for files and, if found, an import triggered. If you wish the contents of the directory to be imported it's necessary for it to contain a file called `manifest.edn` which is used by Blueshift to know which Redshift cluster to import to and how to interpret the data files. 50 | 51 | Your S3 structure could look like this: 52 | 53 | bucket 54 | ├── directory-a 55 | │   └── foo 56 | │   └── manifest.edn 57 | │   └── 0001.tsv 58 | │   └── 0002.tsv 59 | └── directory-b 60 | └── manifest.edn 61 | 62 | and the `manifest.edn` could look like this: 63 | 64 | {:table "testing" 65 | :pk-columns ["foo"] 66 | :columns ["foo" "bar"] 67 | :jdbc-url "jdbc:postgresql://foo.eu-west-1.redshift.amazonaws.com:5439/db?tcpKeepAlive=true&user=user&password=pwd" 68 | :options ["DELIMITER '\\t'" "IGNOREHEADER 1" "ESCAPE" "TRIMBLANKS"] 69 | :data-pattern ".*tsv$"} 70 | 71 | When a manifest and data files are found an import is triggered. Once the import has been successfully committed Blueshift will **delete** any data files that were imported; the manifest remains ready for new data files to be imported. 72 | 73 | It's important that `:columns` lists all the columns (and only the columns) included within the data file and that they are in the same order. `:pk-columns` must contain a uniquely identifying primary key to ensure the correct upsert behaviour. `:options` can be used to override the Redshift copy options used during the load. 74 | 75 | Blueshift creates a temporary Amazon Redshift Copy manifest that lists all the data files found as mandatory for importing, this also makes it very efficient when loading lots of files into a highly distributed cluster. 76 | 77 | ## Metrics 78 | Blueshift tracks a few metrics using [https://github.com/sjl/metrics-clojure](https://github.com/sjl/metrics-clojure). Currently these are logged to the Slf4j logger. 79 | 80 | Starting the app will (eventually) show something like this: 81 | 82 | [metrics-logger-reporter-thread-1] INFO user - type=COUNTER, name=uswitch.blueshift.s3.directories-watched.directories, count=0 83 | [metrics-logger-reporter-thread-1] INFO user - type=METER, name=uswitch.blueshift.redshift.redshift-imports.commits, count=0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second 84 | [metrics-logger-reporter-thread-1] INFO user - type=METER, name=uswitch.blueshift.redshift.redshift-imports.imports, count=0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second 85 | [metrics-logger-reporter-thread-1] INFO user - type=METER, name=uswitch.blueshift.redshift.redshift-imports.rollbacks, count=0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second 86 | 87 | ### Riemann Metrics 88 | Reporting metrics to [Riemann](http://riemann.io/) can be achieved using the [https://github.com/uswitch/blueshift-riemann-metrics](https://github.com/uswitch/blueshift-riemann-metrics) project. To enable support you'll need to build the project: 89 | 90 | $ cd blueshift-riemann-metrics 91 | $ lein uberjar 92 | 93 | And then change the `./etc/config.edn` to reference the riemann reporter: 94 | 95 | :telemetry {:reporters [uswitch.blueshift.telemetry/log-metrics-reporter 96 | uswitch.blueshift.telemetry.riemann/riemann-metrics-reporter]} 97 | 98 | Then, when you've built and run Blueshift, be sure to add the jar to the classpath (the following assumes you're in the blueshift working directory): 99 | 100 | $ cp blueshift-riemann-metrics/target/blueshift-riemann-metrics-0.1.0-standalone.jar ./target 101 | $ java -cp "target/*" uswitch.blueshift.main --config ./etc/config.edn 102 | 103 | Obviously for a production deployment you'd probably want to automate this with your continuous integration server of choice :) 104 | 105 | ## TODO 106 | 107 | * Add exception handling when cleaning uploaded files from S3 108 | * Change `KeyWatcher` to identify when directories are deleted, can exit the watcher process and remove from the list of watched directories. If it's added again later can then just create a new process. 109 | * Add safety check when processing data files- ensure that the header line of the TSV file matches the contents of `manifest.edn` 110 | 111 | ## Authors 112 | 113 | * [Paul Ingles](https://github.com/pingles) ([@pingles](http://twitter.com/pingles)) 114 | * [Thomas Kristensen](https://github.com/tgk) ([@tgkristensen](http://twitter.com/tgkristensen)) 115 | 116 | ## License 117 | 118 | Copyright © 2014 [uSwitch.com](http://www.uswitch.com) Limited. 119 | 120 | Distributed under the Eclipse Public License either version 1.0 or (at 121 | your option) any later version. 122 | 123 | -------------------------------------------------------------------------------- /demo/src/demo/core.clj: -------------------------------------------------------------------------------- 1 | (ns demo.core 2 | (:require [clojure.tools.logging :refer (info error warn debug errorf)] 3 | [com.stuartsierra.component :refer (system-map using Lifecycle start)] 4 | [aws.sdk.s3 :refer (list-objects get-object delete-object put-object)] 5 | [clojure.core.async :refer (chan close! thread alts!! timeout)] 6 | [clojure.tools.cli :refer (parse-opts)]) 7 | (:import [java.util UUID] 8 | [java.sql DriverManager SQLException])) 9 | 10 | ;; pgsql driver isn't loaded automatically from classpath 11 | 12 | (Class/forName "org.postgresql.Driver") 13 | 14 | ;; SQL utils 15 | 16 | (defn- get-connection 17 | [jdbc-url] 18 | (doto (DriverManager/getConnection jdbc-url) 19 | (.setAutoCommit false))) 20 | 21 | ;; Lifecycle utils 22 | 23 | (defn clear-keys 24 | "dissoc for components records. assoc's nil for the specified keys" 25 | [m & ks] 26 | (apply assoc m (interleave ks (repeat (count ks) nil)))) 27 | 28 | (defn close-channels [state & ks] 29 | (doseq [k ks] 30 | (when-let [ch (get state k)] 31 | (close! ch))) 32 | (apply clear-keys state ks)) 33 | 34 | ;; Checking S3 directory structure 35 | 36 | (defn- key-pattern->prefix 37 | "Converts a key-pattern to a prefix by replacing any .* by /" 38 | [key-pattern] 39 | (.replaceAll key-pattern "\\.\\*" "/")) 40 | 41 | (defn- all-s3-keys 42 | [cred bucket key-pattern] 43 | (->> 44 | (list-objects cred bucket 45 | {:prefix (key-pattern->prefix key-pattern) :delimiter "/"}) 46 | :objects 47 | (map :key))) 48 | 49 | (defrecord S3ChangePoller [config] 50 | Lifecycle 51 | (start [this] 52 | (let [control-ch (chan)] 53 | (thread 54 | (loop [s3-keys #{}] 55 | (info "Checking S3 buckets") 56 | (let [new-s3-keys (set 57 | (all-s3-keys 58 | (-> config :s3 :credentials) 59 | (-> config :s3 :bucket) 60 | (-> config :s3 :key-pattern key-pattern->prefix 61 | (str "folder/"))))] 62 | (when (not= s3-keys new-s3-keys) 63 | (info "S3 bucket content changed:") 64 | (info (sort new-s3-keys))) 65 | (let [[_ port] (alts!! [(timeout 15000) control-ch])] 66 | (if (= port control-ch) 67 | (info "Halting S3 change poller loop") 68 | (recur new-s3-keys)))))) 69 | (assoc this :control-ch control-ch))) 70 | (stop [this] 71 | (close-channels this :control-ch))) 72 | 73 | ;; Publishing new TSV files 74 | 75 | (defn- publish-new-tsv! 76 | [cred bucket prefix] 77 | (let [timestamp (System/currentTimeMillis) 78 | key (str prefix "folder/" timestamp ".tsv") 79 | uuid (.toString (UUID/randomUUID))] 80 | (put-object cred bucket 81 | key 82 | (str uuid \tab key \tab timestamp)))) 83 | 84 | (defrecord TSVPublisher [config] 85 | Lifecycle 86 | (start [this] 87 | (let [control-ch (chan)] 88 | (thread 89 | (loop [] 90 | (info "Publishing new TSV file") 91 | (publish-new-tsv! 92 | (-> config :s3 :credentials) 93 | (-> config :s3 :bucket) 94 | (-> config :s3 :key-pattern key-pattern->prefix)) 95 | (let [[_ port] (alts!! [(timeout 20000) control-ch])] 96 | (if (= port control-ch) 97 | (info "Halting TSV publisher") 98 | (recur))))) 99 | (assoc this :control-ch control-ch))) 100 | (stop [this] 101 | (close-channels this :control-ch))) 102 | 103 | ;; Poll for largest timestamp 104 | 105 | (defn- latest-timestamp 106 | [jdbc-url] 107 | (let [c (get-connection jdbc-url) 108 | s (.createStatement c) 109 | rs (.executeQuery s "SELECT MAX(timestamp) AS mt FROM demo")] 110 | (when (.next rs) 111 | (.getTimestamp rs "mt")))) 112 | 113 | (defrecord LatestTimestampPoller [jdbc-url] 114 | Lifecycle 115 | (start [this] 116 | (let [control-ch (chan)] 117 | (thread 118 | (loop [timestamp -1] 119 | (info "Polling Redshift for latest timestamp") 120 | (let [new-timestamp (latest-timestamp jdbc-url)] 121 | (when (not= new-timestamp timestamp) 122 | (info "Reshift timestamp updated:" new-timestamp)) 123 | (let [[_ port] (alts!! [(timeout 15000) control-ch])] 124 | (if (= port control-ch) 125 | (info "Halting Redshift timestamp poller loop") 126 | (recur new-timestamp)))))) 127 | (assoc this :control-ch control-ch))) 128 | (stop [this] 129 | (close-channels this :control-ch))) 130 | 131 | ;; System 132 | 133 | (defn build-system [config jdbc-url] 134 | (system-map 135 | :tsv-publisher (TSVPublisher. config) 136 | :s3-change-poller (S3ChangePoller. config) 137 | :latest-timestamp-poller (LatestTimestampPoller. jdbc-url))) 138 | 139 | ;; Ensuring manifest file is present 140 | 141 | (defn- manifest-as-string 142 | [jdbc-url] 143 | (-> {:table "demo" 144 | :pk-columns ["uuid"] 145 | :columns ["uuid" "key" "timestamp"] 146 | :options ["DELIMITER '\t'" 147 | "COMPUPDATE OFF" 148 | "STATUPDATE ON" 149 | "ESCAPE" 150 | "TRIMBLANKS" 151 | "EMPTYASNULL" 152 | "BLANKSASNULL" 153 | "FILLRECORD" 154 | "TRUNCATECOLUMNS" 155 | "ROUNDEC" 156 | "MAXERROR 1000" 157 | "TIMEFORMAT 'epochmillisecs'"] 158 | :data-pattern ".*tsv$"} 159 | (assoc :jdbc-url jdbc-url) 160 | str)) 161 | 162 | (defn- ensure-manifest! 163 | [config jdbc-url] 164 | (put-object (-> config :s3 :credentials) 165 | (-> config :s3 :bucket) 166 | (str (-> config :s3 :key-pattern key-pattern->prefix) "folder/manifest.edn") 167 | (manifest-as-string jdbc-url))) 168 | 169 | ;; Ensure demo table is present 170 | 171 | (defn- drop-demo-table! 172 | [jdbc-url] 173 | (let [c (get-connection jdbc-url) 174 | s (.createStatement c)] 175 | (.executeUpdate s "DROP TABLE IF EXISTS demo") 176 | (.commit c))) 177 | 178 | (defn- create-demo-table! 179 | [jdbc-url] 180 | (let [c (get-connection jdbc-url) 181 | s (.createStatement c)] 182 | (.executeUpdate s " 183 | CREATE TABLE demo ( 184 | uuid varchar(40) NOT NULL encode lzo, 185 | key varchar(128) NOT NULL encode lzo, 186 | timestamp datetime NOT NULL encode delta32k, 187 | 188 | PRIMARY KEY (uuid) 189 | )") 190 | (.commit c))) 191 | 192 | ;; Main entry-point 193 | 194 | (defn main 195 | [config jdbc-url] 196 | (info "Ensuring manifest.edn present in S3 bucket") 197 | (ensure-manifest! config jdbc-url) 198 | (info "Dropping demo table if exists") 199 | (drop-demo-table! jdbc-url) 200 | (info "Creating demo table") 201 | (create-demo-table! jdbc-url) 202 | (build-system config jdbc-url)) 203 | 204 | ;; ... and from the command line 205 | 206 | (def cli-options 207 | [["-c" "--config CONFIG" "Path to EDN configuration file" 208 | :default "../etc/config.edn" 209 | :validate [string?]] 210 | ["-j" "--jdbc-url JDBC-URL" "Path to jdbc-url txt file" 211 | :default "./jdbc-url.txt" 212 | :validate [string?]] 213 | ["-h" "--help"]]) 214 | 215 | (defn wait! [] 216 | (let [s (java.util.concurrent.Semaphore. 0)] 217 | (.acquire s))) 218 | 219 | (defn -main [& args] 220 | (let [{:keys [options summary]} (parse-opts args cli-options)] 221 | (when (:help options) 222 | (println summary) 223 | (System/exit 0)) 224 | (let [{:keys [config jdbc-url]} options] 225 | (info "Starting demo") 226 | (let [system (main (read-string (slurp config)) 227 | (clojure.string/trim (slurp jdbc-url)))] 228 | (start system) 229 | (wait!))))) 230 | -------------------------------------------------------------------------------- /src/uswitch/blueshift/redshift.clj: -------------------------------------------------------------------------------- 1 | (ns uswitch.blueshift.redshift 2 | (:require [aws.sdk.s3 :refer (put-object delete-object)] 3 | [cheshire.core :refer (generate-string)] 4 | [clojure.tools.logging :refer (info error debug)] 5 | [clojure.string :as s] 6 | [com.stuartsierra.component :refer (system-map Lifecycle using)] 7 | [clojure.core.async :refer (chan !! close! thread)] 8 | [uswitch.blueshift.util :refer (close-channels)] 9 | [metrics.meters :refer (mark! meter)] 10 | [metrics.counters :refer (inc! dec! counter)] 11 | [metrics.timers :refer (timer time!)]) 12 | (:import [java.util UUID] 13 | [java.sql DriverManager SQLException])) 14 | 15 | 16 | (defn manifest [bucket files] 17 | {:entries (for [f files] {:url (str "s3://" bucket "/" f) 18 | :mandatory true})}) 19 | 20 | (defn put-manifest 21 | "Uploads the manifest to S3 as JSON, returns the URL to the uploaded object. 22 | Manifest should be generated with uswitch.blueshift.redshift/manifest." 23 | [credentials bucket manifest] 24 | (let [file-name (str (UUID/randomUUID) ".manifest") 25 | s3-url (str "s3://" bucket "/" file-name)] 26 | (put-object credentials bucket file-name (generate-string manifest)) 27 | {:key file-name 28 | :url s3-url})) 29 | 30 | (def redshift-imports (meter [(str *ns*) "redshift-imports" "imports"])) 31 | (def redshift-import-rollbacks (meter [(str *ns*) "redshift-imports" "rollbacks"])) 32 | (def redshift-import-commits (meter [(str *ns*) "redshift-imports" "commits"])) 33 | 34 | ;; pgsql driver isn't loaded automatically from classpath 35 | (Class/forName "org.postgresql.Driver") 36 | 37 | (defn connection [jdbc-url] 38 | (doto (DriverManager/getConnection jdbc-url) 39 | (.setAutoCommit false))) 40 | 41 | (def ^{:dynamic true} *current-connection* nil) 42 | 43 | (defn prepare-statement 44 | [sql] 45 | (.prepareStatement *current-connection* sql)) 46 | 47 | (defmacro with-connection [jdbc-url & body] 48 | `(binding [*current-connection* (connection ~jdbc-url)] 49 | (try ~@body 50 | (debug "COMMIT") 51 | (.commit *current-connection*) 52 | (mark! redshift-import-commits) 53 | (catch SQLException e# 54 | (error e# "ROLLBACK") 55 | (mark! redshift-import-rollbacks) 56 | (.rollback *current-connection*) 57 | (throw e#)) 58 | (finally 59 | (when-not (.isClosed *current-connection*) 60 | (.close *current-connection*)))))) 61 | 62 | 63 | (defn create-staging-table-stmt [target-table staging-table] 64 | (prepare-statement (format "CREATE TEMPORARY TABLE %s (LIKE %s INCLUDING DEFAULTS)" 65 | staging-table 66 | target-table))) 67 | 68 | (defn copy-from-s3-stmt [table manifest-url {:keys [access-key secret-key] :as creds} {:keys [columns options] :as table-manifest}] 69 | (prepare-statement (format "COPY %s (%s) FROM '%s' CREDENTIALS 'aws_access_key_id=%s;aws_secret_access_key=%s' %s manifest" 70 | table 71 | (s/join "," columns) 72 | manifest-url 73 | access-key 74 | secret-key 75 | (s/join " " options)))) 76 | 77 | (defn truncate-table-stmt [target-table] 78 | (prepare-statement (format "truncate table %s" target-table))) 79 | 80 | 81 | (defn delete-in-query [target-table staging-table key] 82 | (format "DELETE FROM %s WHERE %s IN (SELECT %s FROM %s)" target-table key key staging-table)) 83 | 84 | (defn delete-join-query 85 | [target-table staging-table keys] 86 | (let [where (s/join " AND " (for [pk keys] (str target-table "." pk "=" staging-table "." pk)))] 87 | (format "DELETE FROM %s USING %s WHERE %s" target-table staging-table where))) 88 | 89 | (defn delete-target-query 90 | "Attempts to optimise delete strategy based on keys arity. With single primary keys 91 | its significantly faster to delete." 92 | [target-table staging-table keys] 93 | (cond (= 1 (count keys)) (delete-in-query target-table staging-table (first keys)) 94 | :default (delete-join-query target-table staging-table keys))) 95 | 96 | (defn delete-target-stmt 97 | "Deletes rows, with the same primary key value(s), from target-table that will be 98 | overwritten by values in staging-table." 99 | [target-table staging-table keys] 100 | (prepare-statement (delete-target-query target-table staging-table keys))) 101 | 102 | (defn staging-select-statement [{:keys [staging-select] :as table-manifest} staging-table] 103 | (cond 104 | (string? staging-select) (s/replace staging-select #"\{\{table\}\}" staging-table) 105 | (= :distinct staging-select) (format "SELECT DISTINCT * FROM %s" staging-table) 106 | :default (format "SELECT * FROM %s" staging-table))) 107 | 108 | (defn insert-from-staging-stmt [target-table staging-table table-manifest] 109 | (let [select-statement (staging-select-statement table-manifest staging-table)] 110 | (prepare-statement (format "INSERT INTO %s %s" target-table select-statement)))) 111 | 112 | (defn append-from-staging-stmt [target-table staging-table keys] 113 | (let [join-columns (s/join " AND " (map #(str "s." % " = t." %) keys)) 114 | where-clauses (s/join " AND " (map #(str "t." % " IS NULL") keys))] 115 | (prepare-statement (format "INSERT INTO %s SELECT s.* FROM %s s LEFT JOIN %s t ON %s WHERE %s" 116 | target-table staging-table target-table join-columns where-clauses)))) 117 | 118 | (defn drop-table-stmt [table] 119 | (prepare-statement (format "DROP TABLE %s" table))) 120 | 121 | (defn- aws-censor 122 | [s] 123 | (-> s 124 | (clojure.string/replace #"aws_access_key_id=[^;]*" "aws_access_key_id=***") 125 | (clojure.string/replace #"aws_secret_access_key=[^;]*" "aws_secret_access_key=***"))) 126 | 127 | (defn execute [& statements] 128 | (doseq [statement statements] 129 | (debug (aws-censor (.toString statement))) 130 | (try (.execute statement) 131 | (catch SQLException e 132 | (error "Error executing statement:" (.toString statement)) 133 | (throw e))))) 134 | 135 | (defn merge-table [credentials redshift-manifest-url {:keys [table jdbc-url pk-columns strategy] :as table-manifest}] 136 | (let [staging-table (str table "_staging")] 137 | (mark! redshift-imports) 138 | (with-connection jdbc-url 139 | (execute (create-staging-table-stmt table staging-table) 140 | (copy-from-s3-stmt staging-table redshift-manifest-url credentials table-manifest) 141 | (delete-target-stmt table staging-table pk-columns) 142 | (insert-from-staging-stmt table staging-table table-manifest) 143 | (drop-table-stmt staging-table))))) 144 | 145 | (defn replace-table [credentials redshift-manifest-url {:keys [table jdbc-url pk-columns strategy] :as table-manifest}] 146 | (mark! redshift-imports) 147 | (with-connection jdbc-url 148 | (execute (truncate-table-stmt table) 149 | (copy-from-s3-stmt table redshift-manifest-url credentials table-manifest)))) 150 | 151 | (defn append-table [credentials redshift-manifest-url {:keys [table jdbc-url pk-columns strategy] :as table-manifest}] 152 | (let [staging-table (str table "_staging")] 153 | (mark! redshift-imports) 154 | (with-connection jdbc-url 155 | (execute (create-staging-table-stmt table staging-table) 156 | (copy-from-s3-stmt staging-table redshift-manifest-url credentials table-manifest) 157 | (append-from-staging-stmt table staging-table pk-columns) 158 | (drop-table-stmt staging-table))))) 159 | 160 | (defn load-table [credentials redshift-manifest-url {strategy :strategy :as table-manifest}] 161 | (case (keyword strategy) 162 | :merge (merge-table credentials redshift-manifest-url table-manifest) 163 | :replace (replace-table credentials redshift-manifest-url table-manifest) 164 | :append (append-table credentials redshift-manifest-url table-manifest))) 165 | -------------------------------------------------------------------------------- /src/uswitch/blueshift/s3.clj: -------------------------------------------------------------------------------- 1 | (ns uswitch.blueshift.s3 2 | (:require [com.stuartsierra.component :refer (Lifecycle system-map using start stop)] 3 | [clojure.tools.logging :refer (info error warn debug errorf)] 4 | [aws.sdk.s3 :refer (list-objects get-object delete-object)] 5 | [clojure.set :refer (difference)] 6 | [clojure.core.async :refer (go-loop thread put! chan >!! ! (read-edn content) 78 | (map->Manifest) 79 | (assoc-if-nil :strategy "merge") 80 | (update-in [:data-pattern] re-pattern)))))) 81 | 82 | (defn- step-scan 83 | [credentials bucket directory] 84 | (try 85 | (let [fs (files credentials bucket directory)] 86 | (if-let [manifest (manifest credentials bucket fs)] 87 | (do 88 | (validate manifest) 89 | (let [data-files (filter (fn [{:keys [key]}] 90 | (re-matches (:data-pattern manifest) key)) 91 | fs)] 92 | (if (seq data-files) 93 | (do 94 | (info "Watcher triggering import" (:table manifest)) 95 | (debug "Triggering load:" load) 96 | {:state :load, :table-manifest manifest, :files (map :key data-files)}) 97 | {:state :scan, :pause? true}))) 98 | {:state :scan, :pause? true})) 99 | (catch clojure.lang.ExceptionInfo e 100 | (error e "Error with manifest file") 101 | {:state :scan, :pause? true}) 102 | (catch ConnectionPoolTimeoutException e 103 | (warn e "Connection timed out. Will re-try.") 104 | {:state :scan, :pause? true}) 105 | (catch Exception e 106 | (error e "Failed reading content of" (str bucket "/" directory)) 107 | {:state :scan, :pause? true}))) 108 | 109 | (def importing-files (counter [(str *ns*) "importing-files" "files"])) 110 | (def import-timer (timer [(str *ns*) "importing-files" "time"])) 111 | 112 | (defn- step-load 113 | [credentials bucket table-manifest files] 114 | (let [redshift-manifest (redshift/manifest bucket files) 115 | {:keys [key url]} (redshift/put-manifest credentials bucket redshift-manifest)] 116 | 117 | (info "Importing" (count files) "data files to table" (:table table-manifest) "from manifest" url) 118 | (debug "Importing Redshift Manifest" redshift-manifest) 119 | (inc! importing-files (count files)) 120 | (try (time! import-timer 121 | (redshift/load-table credentials url table-manifest)) 122 | (info "Successfully imported" (count files) "files") 123 | (delete-object credentials bucket key) 124 | (dec! importing-files (count files)) 125 | {:state :delete 126 | :files files} 127 | (catch java.sql.SQLException e 128 | (error e "Error loading into" (:table table-manifest)) 129 | (error (:table table-manifest) "Redshift manifest content:" redshift-manifest) 130 | (delete-object credentials bucket key) 131 | (dec! importing-files (count files)) 132 | {:state :scan 133 | :pause? true})))) 134 | 135 | (defn- step-delete 136 | [credentials bucket files] 137 | (do 138 | (doseq [key files] 139 | (info "Deleting" (str "s3://" bucket "/" key)) 140 | (try 141 | (delete-object credentials bucket key) 142 | (catch Exception e 143 | (warn "Couldn't delete" key " - ignoring")))) 144 | {:state :scan, :pause? true})) 145 | 146 | (defn- progress 147 | [{:keys [state] :as world} 148 | {:keys [credentials bucket directory] :as configuration}] 149 | (case state 150 | :scan (step-scan credentials bucket directory ) 151 | :load (step-load credentials bucket (:table-manifest world) (:files world)) 152 | :delete (step-delete credentials bucket (:files world)))) 153 | 154 | (defrecord KeyWatcher [credentials bucket directory poll-interval-seconds] 155 | Lifecycle 156 | (start [this] 157 | (info "Starting KeyWatcher for" (str bucket "/" directory) "polling every" poll-interval-seconds "seconds") 158 | (let [control-ch (chan) 159 | configuration {:credentials credentials :bucket bucket :directory directory}] 160 | (thread 161 | (loop [timer (timeout (* poll-interval-seconds 1000)) 162 | world {:state :scan}] 163 | (let [next-world (progress world configuration)] 164 | (if (:pause? next-world) 165 | (let [[_ c] (alts!! [control-ch timer])] 166 | (when (not= c control-ch) 167 | (recur (timeout (* poll-interval-seconds 1000)) next-world))) 168 | (recur timer next-world))))) 169 | (assoc this :watcher-control-ch control-ch))) 170 | (stop [this] 171 | (info "Stopping KeyWatcher for" (str bucket "/" directory)) 172 | (close-channels this :watcher-control-ch))) 173 | 174 | 175 | (defn spawn-key-watcher! [credentials bucket directory poll-interval-seconds] 176 | (start (KeyWatcher. credentials bucket directory poll-interval-seconds))) 177 | 178 | (def directories-watched (counter [(str *ns*) "directories-watched" "directories"])) 179 | 180 | (defrecord KeyWatcherSpawner [bucket-watcher poll-interval-seconds] 181 | Lifecycle 182 | (start [this] 183 | (info "Starting KeyWatcherSpawner") 184 | (let [{:keys [new-directories-ch bucket credentials]} bucket-watcher 185 | watchers (atom nil)] 186 | (go-loop [dirs (KeyWatcherSpawner {:poll-interval-seconds (-> config :s3 :poll-interval :seconds)})) 204 | 205 | (defn matching-directories [credentials bucket key-pattern] 206 | (try (->> (leaf-directories credentials bucket) 207 | (filter #(re-matches key-pattern %)) 208 | (set)) 209 | (catch Exception e 210 | (errorf e "Error checking for matching object keys in \"%s\"" bucket) 211 | #{}))) 212 | 213 | (defrecord BucketWatcher [credentials bucket key-pattern poll-interval-seconds] 214 | Lifecycle 215 | (start [this] 216 | (info "Starting BucketWatcher. Polling" bucket "every" poll-interval-seconds "seconds for keys matching" key-pattern) 217 | (let [new-directories-ch (chan) 218 | control-ch (chan)] 219 | (thread 220 | (loop [dirs nil] 221 | (let [available-dirs (matching-directories credentials bucket key-pattern) 222 | new-dirs (difference available-dirs dirs)] 223 | (when (seq new-dirs) 224 | (info "New directories:" new-dirs "spawning" (count new-dirs) "watchers") 225 | (>!! new-directories-ch new-dirs)) 226 | (let [[v c] (alts!! [(timeout (* 1000 poll-interval-seconds)) control-ch])] 227 | (when-not (= c control-ch) 228 | (recur available-dirs)))))) 229 | (assoc this :control-ch control-ch :new-directories-ch new-directories-ch))) 230 | (stop [this] 231 | (info "Stopping BucketWatcher") 232 | (close-channels this :control-ch :new-directories-ch))) 233 | 234 | (defn bucket-watcher 235 | "Creates a process watching for objects in S3 buckets." 236 | [config] 237 | (map->BucketWatcher {:credentials (-> config :s3 :credentials) 238 | :bucket (-> config :s3 :bucket) 239 | :poll-interval-seconds (-> config :s3 :poll-interval :seconds) 240 | :key-pattern (or (re-pattern (-> config :s3 :key-pattern)) 241 | #".*")})) 242 | 243 | (defrecord PrintSink [prefix chan-k component] 244 | Lifecycle 245 | (start [this] 246 | (let [ch (get component chan-k)] 247 | (go-loop [msg (PrintSink {:prefix prefix :chan-k chan-k})) 258 | 259 | (defn s3-system [config] 260 | (system-map :bucket-watcher (bucket-watcher config) 261 | :key-watcher-spawner (using (key-watcher-spawner config) 262 | [:bucket-watcher]))) 263 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor tocontrol, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of Washington and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | --------------------------------------------------------------------------------