├── .env
├── Procfile
├── .gitignore
├── demo
├── .gitignore
├── Procfile
├── dev
│ └── user.clj
├── project.clj
├── README.md
└── src
│ └── demo
│ └── core.clj
├── etc
├── config.edn
├── config.edn.example
├── logback.xml
└── logback.xml.example
├── src
└── uswitch
│ └── blueshift
│ ├── util.clj
│ ├── system.clj
│ ├── main.clj
│ ├── telemetry.clj
│ ├── redshift.clj
│ └── s3.clj
├── dev
└── user.clj
├── project.clj
├── README.md
└── LICENSE
/.env:
--------------------------------------------------------------------------------
1 | S3_DATABASE_EXPORT_BUCKET=
2 | S3_DATABASE_EXPORT_ID=
3 | S3_DATABASE_EXPORT_KEY=
4 |
--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | watcher: lein run -- --s3id $S3_DATABASE_EXPORT_ID --s3key $S3_DATABASE_EXPORT_KEY --s3bucket $S3_DATABASE_EXPORT_BUCKET
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /classes
3 | /checkouts
4 | pom.xml
5 | pom.xml.asc
6 | *.jar
7 | *.class
8 | /.lein-*
9 | /.nrepl-port
10 |
--------------------------------------------------------------------------------
/demo/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /classes
3 | /checkouts
4 | pom.xml
5 | pom.xml.asc
6 | *.jar
7 | *.class
8 | /.lein-*
9 | /.nrepl-port
10 | jdbc-url.txt
11 |
--------------------------------------------------------------------------------
/demo/Procfile:
--------------------------------------------------------------------------------
1 | blueshift: sh -c 'cd .. && exec lein run -- --config ./etc/config.edn'
2 | demo: lein run -- --config ../etc/config.edn --jdbc-url ./jdbc-url.txt
3 |
--------------------------------------------------------------------------------
/etc/config.edn:
--------------------------------------------------------------------------------
1 | {:s3 {:credentials {:access-key ""
2 | :secret-key ""}
3 | :bucket "blueshift-data"
4 | :key-pattern ".*"
5 | :poll-interval {:seconds 30}}
6 | }
7 |
--------------------------------------------------------------------------------
/etc/config.edn.example:
--------------------------------------------------------------------------------
1 | {:s3 {:credentials {:access-key ""
2 | :secret-key ""}
3 | :bucket "blueshift-data"
4 | :key-pattern ".*"
5 | :poll-interval {:seconds 30}}
6 | :telemetry {:reporters [uswitch.blueshift.telemetry/log-metrics-reporter]}}
7 |
--------------------------------------------------------------------------------
/src/uswitch/blueshift/util.clj:
--------------------------------------------------------------------------------
1 | (ns uswitch.blueshift.util
2 | (:require [clojure.core.async :refer (close!)]))
3 |
4 | (defn clear-keys
5 | "dissoc for components records. assoc's nil for the specified keys"
6 | [m & ks]
7 | (apply assoc m (interleave ks (repeat (count ks) nil))))
8 |
9 | (defn close-channels [state & ks]
10 | (doseq [k ks]
11 | (when-let [ch (get state k)]
12 | (close! ch)))
13 | (apply clear-keys state ks))
14 |
--------------------------------------------------------------------------------
/etc/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | [%d] %-5level %logger{36} - %msg%n%ex{full}
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/etc/logback.xml.example:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | [%d] %-5level %logger{36} - %msg%n%ex{full}
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/src/uswitch/blueshift/system.clj:
--------------------------------------------------------------------------------
1 | (ns uswitch.blueshift.system
2 | (:require [uswitch.blueshift.s3 :refer (s3-system)]
3 | [uswitch.blueshift.telemetry :refer (telemetry-system)]
4 | [com.stuartsierra.component :refer (system-map using Lifecycle start)]
5 | [clojure.core.async :refer (chan close!)])
6 | (:import [clojure.core.async.impl.channels ManyToManyChannel]))
7 |
8 | (defn build-system [config]
9 | (system-map :s3-system (s3-system config)
10 | :telemetry-system (telemetry-system config)))
11 |
--------------------------------------------------------------------------------
/dev/user.clj:
--------------------------------------------------------------------------------
1 | (ns user
2 | (:require [uswitch.blueshift.system :refer (build-system)]
3 | [clojure.tools.namespace.repl :refer (refresh)]
4 | [com.stuartsierra.component :as component]))
5 |
6 | (def system nil)
7 |
8 | (defn init []
9 | (alter-var-root #'system
10 | (constantly (build-system (read-string (slurp "./etc/config.edn"))))))
11 |
12 | (defn start []
13 | (alter-var-root #'system component/start))
14 |
15 | (defn stop []
16 | (alter-var-root #'system (fn [s] (when s (component/stop s)))))
17 |
18 | (defn go []
19 | (init)
20 | (start))
21 |
22 | (defn reset []
23 | (stop)
24 | (refresh :after 'user/go))
25 |
--------------------------------------------------------------------------------
/demo/dev/user.clj:
--------------------------------------------------------------------------------
1 | (ns user
2 | (:require [demo.core :refer (build-system main)]
3 | [clojure.tools.namespace.repl :refer (refresh)]
4 | [com.stuartsierra.component :as component]))
5 |
6 | (def system nil)
7 |
8 | (defn jdbc-url []
9 | (or (get (System/getenv) "JDBC_URL")
10 | (clojure.string/trim (slurp "jdbc-url.txt"))))
11 |
12 | (defn init []
13 | (alter-var-root #'system
14 | (constantly (main (read-string (slurp "../etc/config.edn"))
15 | (jdbc-url)))))
16 |
17 | (defn start []
18 | (alter-var-root #'system component/start))
19 |
20 | (defn stop []
21 | (alter-var-root #'system (fn [s] (when s (component/stop s)))))
22 |
23 | (defn go []
24 | (init)
25 | (start))
26 |
27 | (defn reset []
28 | (stop)
29 | (refresh :after 'user/go))
30 |
--------------------------------------------------------------------------------
/demo/project.clj:
--------------------------------------------------------------------------------
1 | (defproject demo "0.1.0-SNAPSHOT"
2 | :dependencies [[org.clojure/clojure "1.6.0"]
3 | [org.clojure/tools.logging "0.2.6"]
4 | [com.stuartsierra/component "0.2.1"]
5 | [org.clojure/core.async "0.1.303.0-886421-alpha"]
6 | [org.clojure/tools.cli "0.3.1"]
7 | [clj-aws-s3 "0.3.9" :exclusions [commons-logging commons-codec joda-time]]
8 | [joda-time "2.6"]
9 | [commons-codec "1.3"]
10 | [org.slf4j/jcl-over-slf4j "1.7.7"]
11 | [cheshire "5.3.1"]
12 | [postgresql "8.0-318.jdbc3"]
13 | [org.slf4j/slf4j-simple "1.7.7"]
14 | [org.clojure/tools.namespace "0.2.3"]]
15 | :jvm-opts ["-Dorg.slf4j.simpleLogger.defaultLogLevel=debug"
16 | "-Dorg.slf4j.simpleLogger.log.org.apache.http=info"
17 | "-Dorg.slf4j.simpleLogger.log.com.amazonaws=info"
18 | "-Dorg.slf4j.simpleLogger.log.com.codahale=debug"]
19 | :profiles {:dev {:source-paths ["./dev"]}}
20 | :main demo.core)
21 |
--------------------------------------------------------------------------------
/demo/README.md:
--------------------------------------------------------------------------------
1 | # Blueshift demo
2 |
3 | This demo program will periodically upload new `tsv`-files to a S3
4 | bucket for Blueshift to upload. The program will ensure that a
5 | `manifest.edn` file is present and that a `demo` table is present in
6 | Redshift.
7 |
8 | The `demo` table contains three columns:
9 |
10 | - `uuid` A unique identifer for the row
11 | - `key` The name of the `tsv`-file corresponsing to the row
12 | - `timestamp` A timestamp for when the `tsv`-file was created.
13 |
14 | There's only one line in each `tsv`-file even though Blueshift support
15 | multiple lines when loading files into tables.
16 |
17 | The demo will periodically monitor the S3 bucket to listen for file
18 | changes, and it will monitor the `demo` table in Redshift to look for
19 | new timestamps.
20 |
21 | ## Usage
22 |
23 | The demo is configured using the same configuration file as
24 | Blueshift. The demo also needs to know the JDBC-URL for Redshift. The
25 | URL should be stored in a `txt`-file.
26 |
27 | There is a `Procfile` that assumes `../etc/config.edn` exists, and that
28 | there is a file named `./jdbc-url.txt` with content on the form
29 | `jdbc:postgresql://...`. The Procfile will start the demo and a
30 | Blueshift process when run using:
31 |
32 | foreman start
33 |
--------------------------------------------------------------------------------
/src/uswitch/blueshift/main.clj:
--------------------------------------------------------------------------------
1 | (ns uswitch.blueshift.main
2 | (:require [clojure.tools.logging :refer (info)]
3 | [clojure.tools.cli :refer (parse-opts)]
4 | [uswitch.blueshift.system :refer (build-system)]
5 | [com.stuartsierra.component :refer (start stop)])
6 | (:gen-class))
7 |
8 | (def cli-options
9 | [["-c" "--config CONFIG" "Path to EDN configuration file"
10 | :default "./etc/config.edn"
11 | :validate [string?]]
12 | ["-i" "--s3id ID" "S3 ID"
13 | :default nil]
14 | ["-k" "--s3key KEY" "S3 KEY"
15 | :default nil]
16 | ["-b" "--s3bucket BUCKET" "S3 BUCKET"
17 | :default nil]
18 | ["-h" "--help"]])
19 |
20 | (defn wait! []
21 | (let [s (java.util.concurrent.Semaphore. 0)]
22 | (.acquire s)))
23 |
24 | (defn -main [& args]
25 | (let [{:keys [options summary]} (parse-opts args cli-options)]
26 | (when (:help options)
27 | (println summary)
28 | (System/exit 0))
29 | (let [{:keys [config s3id s3key s3bucket]} options]
30 | (info "Starting Blueshift with configuration" config)
31 | (def config-options (read-string (slurp config)))
32 | (def merged (merge-with merge
33 | config-options
34 | {:s3 {:credentials {:access-key s3id :secret-key s3key} :bucket s3bucket}}))
35 | (let [system (build-system merged)]
36 | (start system)
37 | (wait!)))))
38 |
--------------------------------------------------------------------------------
/src/uswitch/blueshift/telemetry.clj:
--------------------------------------------------------------------------------
1 | (ns uswitch.blueshift.telemetry
2 | (:require [com.stuartsierra.component :refer (system-map Lifecycle start stop)]
3 | [metrics.core :refer (default-registry)]
4 | [clojure.tools.logging :refer (info *logger-factory* debug)]
5 | [clojure.tools.logging.impl :refer (get-logger)]
6 | [clojure.string :refer (split)]
7 | [uswitch.blueshift.util :refer (clear-keys)])
8 | (:import [java.util.concurrent TimeUnit]
9 | [com.codahale.metrics Slf4jReporter]))
10 |
11 | (defrecord LogMetricsReporter [registry]
12 | Lifecycle
13 | (start [this]
14 | (let [reporter (.build (doto (Slf4jReporter/forRegistry registry)
15 | (.outputTo (get-logger *logger-factory* *ns*))
16 | (.convertRatesTo TimeUnit/SECONDS)
17 | (.convertDurationsTo TimeUnit/MILLISECONDS)))]
18 | (info "Starting Slf4j metrics reporter")
19 | (.start reporter 1 TimeUnit/MINUTES)
20 | (assoc this :reporter reporter)))
21 | (stop [this]
22 | (when-let [reporter (:reporter this)]
23 | (info "Stopping Slf4j metrics reporter")
24 | (.stop reporter))
25 | (clear-keys this :reporter)))
26 |
27 | (defn log-metrics-reporter [config registry]
28 | (map->LogMetricsReporter {:registry registry}))
29 |
30 | (defn- load-reporter [config sym]
31 | (info "Loading reporter" sym "for registry" default-registry)
32 | (require (symbol (namespace sym)))
33 | (let [sysfn (var-get (find-var sym))]
34 | (sysfn config default-registry)))
35 |
36 | (defn telemetry-system [config]
37 | (let [configured-reporters (-> config :telemetry :reporters)]
38 | (reduce (fn [system reporter]
39 | (assoc system (keyword (str reporter)) (load-reporter config reporter)))
40 | (system-map)
41 | configured-reporters)))
42 |
--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
1 | (defproject blueshift "0.1.0-SNAPSHOT"
2 | :description "Automate importing S3 data into Amazon Redshift"
3 | :url "https://github.com/uswitch/blueshift"
4 | :license {:name "Eclipse Public License"
5 | :url "http://www.eclipse.org/legal/epl-v10.html"}
6 | :dependencies [[org.clojure/clojure "1.6.0"]
7 | [org.clojure/tools.logging "0.2.6"]
8 | [com.stuartsierra/component "0.2.1"]
9 | [org.clojure/core.async "0.1.303.0-886421-alpha"]
10 | [org.clojure/tools.cli "0.3.1"]
11 | [clj-aws-s3 "0.3.9" :exclusions [commons-logging commons-codec joda-time]]
12 | [joda-time "2.6"]
13 | [commons-codec "1.3"]
14 | [org.slf4j/jcl-over-slf4j "1.7.7"]
15 | [cheshire "5.3.1"]
16 | [postgresql "8.0-318.jdbc3"]
17 | [prismatic/schema "0.2.2"]
18 | [metrics-clojure "2.0.2"]
19 | [com.codahale.metrics/metrics-jvm "3.0.2"]
20 | [org.slf4j/slf4j-simple "1.7.7"]]
21 | :profiles {:dev {:dependencies [[org.slf4j/slf4j-simple "1.7.7"]
22 | [org.clojure/tools.namespace "0.2.3"]]
23 | :source-paths ["./dev"]
24 | :jvm-opts ["-Dorg.slf4j.simpleLogger.defaultLogLevel=debug"
25 | "-Dorg.slf4j.simpleLogger.log.org.apache.http=info"
26 | "-Dorg.slf4j.simpleLogger.log.com.amazonaws=info"
27 | "-Dorg.slf4j.simpleLogger.log.com.codahale=debug"]
28 | :resource-paths ["/Users/paul/Work/uswitch/blueshift-riemann-metrics/target/blueshift-riemann-metrics-0.1.0-SNAPSHOT-standalone.jar"]}
29 | :uberjar {:aot [uswitch.blueshift.main]
30 | :dependencies [[ch.qos.logback/logback-classic "1.1.2"]]}}
31 | :main uswitch.blueshift.main)
32 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Blueshift
2 |
3 | Service to watch Amazon S3 and automate the load into Amazon Redshift.
4 |
5 |  ([Image used under CC Attribution Share-Alike License](http://en.wikipedia.org/wiki/File:Gravitional_well.jpg)).
6 |
7 | ## Rationale
8 |
9 | [Amazon Redshift](https://aws.amazon.com/redshift/) is a "a fast, fully managed, petabyte-scale data warehouse service" but importing data into it can be a bit tricky: e.g. if you want upsert behaviour you have to [implement it yourself with temporary tables](http://docs.aws.amazon.com/redshift/latest/dg/t_updating-inserting-using-staging-tables-.html), and we've had problems [importing across machines into the same tables](https://forums.aws.amazon.com/message.jspa?messageID=443795). Redshift also performs best when bulk importing lots of large files from S3.
10 |
11 | [Blueshift](https://github.com/uswitch/blueshift) is a little service(tm) that makes it easy to automate the loading of data from Amazon S3 and into Amazon Redshift. It will periodically check for data files within a designated bucket and, when new files are found, import them. It provides upsert behaviour by default.
12 |
13 | Importing to Redshift now requires just the ability to write files to S3.
14 |
15 | ## Using
16 |
17 | ### Configuring
18 |
19 | Blueshift requires minimal configuration. It will only monitor a single S3 bucket currently, so the configuration file (ordinarily stored in `./etc/config.edn`) looks like this:
20 |
21 | ```clojure
22 | {:s3 {:credentials {:access-key ""
23 | :secret-key ""}
24 | :bucket "blueshift-data"
25 | :key-pattern ".*"
26 | :poll-interval {:seconds 30}}
27 | :telemetry {:reporters [uswitch.blueshift.telemetry/log-metrics-reporter]}}
28 | ```
29 |
30 | The S3 credentials are shared by Blueshift for watching for new files and for [Redshift's `COPY` command](http://docs.aws.amazon.com/redshift/latest/dg/t_loading-tables-from-s3.html). The `:key-pattern` option is used to filter for specific keys (so you can have a single bucket with data from different environments, systems etc.).
31 |
32 | ### Building & Running
33 |
34 | The application is written in [Clojure](http://clojure.org), to build the project you'll need to use [Leiningen](https://github.com/technomancy/leiningen).
35 |
36 | If you want to run the application on your computer you can run it directly with Leiningen (providing the path to your configuration file)
37 |
38 | $ lein run -- --config ./etc/config.edn
39 |
40 | Alternatively, you can build an Uberjar that you can run:
41 |
42 | $ lein uberjar
43 | $ java -Dlogback.configurationFile=./etc/logback.xml -jar target/blueshift-0.1.0-standalone.jar --config ./etc/config.edn
44 |
45 | The uberjar includes [Logback](http://logback.qos.ch/) for logging. `./etc/logback.xml.example` provides a simple starter configuration file with a console appender.
46 |
47 | ## Using
48 |
49 | Once the service is running you can create any number of directories in the S3 bucket. These will be periodically checked for files and, if found, an import triggered. If you wish the contents of the directory to be imported it's necessary for it to contain a file called `manifest.edn` which is used by Blueshift to know which Redshift cluster to import to and how to interpret the data files.
50 |
51 | Your S3 structure could look like this:
52 |
53 | bucket
54 | ├── directory-a
55 | │ └── foo
56 | │ └── manifest.edn
57 | │ └── 0001.tsv
58 | │ └── 0002.tsv
59 | └── directory-b
60 | └── manifest.edn
61 |
62 | and the `manifest.edn` could look like this:
63 |
64 | {:table "testing"
65 | :pk-columns ["foo"]
66 | :columns ["foo" "bar"]
67 | :jdbc-url "jdbc:postgresql://foo.eu-west-1.redshift.amazonaws.com:5439/db?tcpKeepAlive=true&user=user&password=pwd"
68 | :options ["DELIMITER '\\t'" "IGNOREHEADER 1" "ESCAPE" "TRIMBLANKS"]
69 | :data-pattern ".*tsv$"}
70 |
71 | When a manifest and data files are found an import is triggered. Once the import has been successfully committed Blueshift will **delete** any data files that were imported; the manifest remains ready for new data files to be imported.
72 |
73 | It's important that `:columns` lists all the columns (and only the columns) included within the data file and that they are in the same order. `:pk-columns` must contain a uniquely identifying primary key to ensure the correct upsert behaviour. `:options` can be used to override the Redshift copy options used during the load.
74 |
75 | Blueshift creates a temporary Amazon Redshift Copy manifest that lists all the data files found as mandatory for importing, this also makes it very efficient when loading lots of files into a highly distributed cluster.
76 |
77 | ## Metrics
78 | Blueshift tracks a few metrics using [https://github.com/sjl/metrics-clojure](https://github.com/sjl/metrics-clojure). Currently these are logged to the Slf4j logger.
79 |
80 | Starting the app will (eventually) show something like this:
81 |
82 | [metrics-logger-reporter-thread-1] INFO user - type=COUNTER, name=uswitch.blueshift.s3.directories-watched.directories, count=0
83 | [metrics-logger-reporter-thread-1] INFO user - type=METER, name=uswitch.blueshift.redshift.redshift-imports.commits, count=0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second
84 | [metrics-logger-reporter-thread-1] INFO user - type=METER, name=uswitch.blueshift.redshift.redshift-imports.imports, count=0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second
85 | [metrics-logger-reporter-thread-1] INFO user - type=METER, name=uswitch.blueshift.redshift.redshift-imports.rollbacks, count=0, mean_rate=0.0, m1=0.0, m5=0.0, m15=0.0, rate_unit=events/second
86 |
87 | ### Riemann Metrics
88 | Reporting metrics to [Riemann](http://riemann.io/) can be achieved using the [https://github.com/uswitch/blueshift-riemann-metrics](https://github.com/uswitch/blueshift-riemann-metrics) project. To enable support you'll need to build the project:
89 |
90 | $ cd blueshift-riemann-metrics
91 | $ lein uberjar
92 |
93 | And then change the `./etc/config.edn` to reference the riemann reporter:
94 |
95 | :telemetry {:reporters [uswitch.blueshift.telemetry/log-metrics-reporter
96 | uswitch.blueshift.telemetry.riemann/riemann-metrics-reporter]}
97 |
98 | Then, when you've built and run Blueshift, be sure to add the jar to the classpath (the following assumes you're in the blueshift working directory):
99 |
100 | $ cp blueshift-riemann-metrics/target/blueshift-riemann-metrics-0.1.0-standalone.jar ./target
101 | $ java -cp "target/*" uswitch.blueshift.main --config ./etc/config.edn
102 |
103 | Obviously for a production deployment you'd probably want to automate this with your continuous integration server of choice :)
104 |
105 | ## TODO
106 |
107 | * Add exception handling when cleaning uploaded files from S3
108 | * Change `KeyWatcher` to identify when directories are deleted, can exit the watcher process and remove from the list of watched directories. If it's added again later can then just create a new process.
109 | * Add safety check when processing data files- ensure that the header line of the TSV file matches the contents of `manifest.edn`
110 |
111 | ## Authors
112 |
113 | * [Paul Ingles](https://github.com/pingles) ([@pingles](http://twitter.com/pingles))
114 | * [Thomas Kristensen](https://github.com/tgk) ([@tgkristensen](http://twitter.com/tgkristensen))
115 |
116 | ## License
117 |
118 | Copyright © 2014 [uSwitch.com](http://www.uswitch.com) Limited.
119 |
120 | Distributed under the Eclipse Public License either version 1.0 or (at
121 | your option) any later version.
122 |
123 |
--------------------------------------------------------------------------------
/demo/src/demo/core.clj:
--------------------------------------------------------------------------------
1 | (ns demo.core
2 | (:require [clojure.tools.logging :refer (info error warn debug errorf)]
3 | [com.stuartsierra.component :refer (system-map using Lifecycle start)]
4 | [aws.sdk.s3 :refer (list-objects get-object delete-object put-object)]
5 | [clojure.core.async :refer (chan close! thread alts!! timeout)]
6 | [clojure.tools.cli :refer (parse-opts)])
7 | (:import [java.util UUID]
8 | [java.sql DriverManager SQLException]))
9 |
10 | ;; pgsql driver isn't loaded automatically from classpath
11 |
12 | (Class/forName "org.postgresql.Driver")
13 |
14 | ;; SQL utils
15 |
16 | (defn- get-connection
17 | [jdbc-url]
18 | (doto (DriverManager/getConnection jdbc-url)
19 | (.setAutoCommit false)))
20 |
21 | ;; Lifecycle utils
22 |
23 | (defn clear-keys
24 | "dissoc for components records. assoc's nil for the specified keys"
25 | [m & ks]
26 | (apply assoc m (interleave ks (repeat (count ks) nil))))
27 |
28 | (defn close-channels [state & ks]
29 | (doseq [k ks]
30 | (when-let [ch (get state k)]
31 | (close! ch)))
32 | (apply clear-keys state ks))
33 |
34 | ;; Checking S3 directory structure
35 |
36 | (defn- key-pattern->prefix
37 | "Converts a key-pattern to a prefix by replacing any .* by /"
38 | [key-pattern]
39 | (.replaceAll key-pattern "\\.\\*" "/"))
40 |
41 | (defn- all-s3-keys
42 | [cred bucket key-pattern]
43 | (->>
44 | (list-objects cred bucket
45 | {:prefix (key-pattern->prefix key-pattern) :delimiter "/"})
46 | :objects
47 | (map :key)))
48 |
49 | (defrecord S3ChangePoller [config]
50 | Lifecycle
51 | (start [this]
52 | (let [control-ch (chan)]
53 | (thread
54 | (loop [s3-keys #{}]
55 | (info "Checking S3 buckets")
56 | (let [new-s3-keys (set
57 | (all-s3-keys
58 | (-> config :s3 :credentials)
59 | (-> config :s3 :bucket)
60 | (-> config :s3 :key-pattern key-pattern->prefix
61 | (str "folder/"))))]
62 | (when (not= s3-keys new-s3-keys)
63 | (info "S3 bucket content changed:")
64 | (info (sort new-s3-keys)))
65 | (let [[_ port] (alts!! [(timeout 15000) control-ch])]
66 | (if (= port control-ch)
67 | (info "Halting S3 change poller loop")
68 | (recur new-s3-keys))))))
69 | (assoc this :control-ch control-ch)))
70 | (stop [this]
71 | (close-channels this :control-ch)))
72 |
73 | ;; Publishing new TSV files
74 |
75 | (defn- publish-new-tsv!
76 | [cred bucket prefix]
77 | (let [timestamp (System/currentTimeMillis)
78 | key (str prefix "folder/" timestamp ".tsv")
79 | uuid (.toString (UUID/randomUUID))]
80 | (put-object cred bucket
81 | key
82 | (str uuid \tab key \tab timestamp))))
83 |
84 | (defrecord TSVPublisher [config]
85 | Lifecycle
86 | (start [this]
87 | (let [control-ch (chan)]
88 | (thread
89 | (loop []
90 | (info "Publishing new TSV file")
91 | (publish-new-tsv!
92 | (-> config :s3 :credentials)
93 | (-> config :s3 :bucket)
94 | (-> config :s3 :key-pattern key-pattern->prefix))
95 | (let [[_ port] (alts!! [(timeout 20000) control-ch])]
96 | (if (= port control-ch)
97 | (info "Halting TSV publisher")
98 | (recur)))))
99 | (assoc this :control-ch control-ch)))
100 | (stop [this]
101 | (close-channels this :control-ch)))
102 |
103 | ;; Poll for largest timestamp
104 |
105 | (defn- latest-timestamp
106 | [jdbc-url]
107 | (let [c (get-connection jdbc-url)
108 | s (.createStatement c)
109 | rs (.executeQuery s "SELECT MAX(timestamp) AS mt FROM demo")]
110 | (when (.next rs)
111 | (.getTimestamp rs "mt"))))
112 |
113 | (defrecord LatestTimestampPoller [jdbc-url]
114 | Lifecycle
115 | (start [this]
116 | (let [control-ch (chan)]
117 | (thread
118 | (loop [timestamp -1]
119 | (info "Polling Redshift for latest timestamp")
120 | (let [new-timestamp (latest-timestamp jdbc-url)]
121 | (when (not= new-timestamp timestamp)
122 | (info "Reshift timestamp updated:" new-timestamp))
123 | (let [[_ port] (alts!! [(timeout 15000) control-ch])]
124 | (if (= port control-ch)
125 | (info "Halting Redshift timestamp poller loop")
126 | (recur new-timestamp))))))
127 | (assoc this :control-ch control-ch)))
128 | (stop [this]
129 | (close-channels this :control-ch)))
130 |
131 | ;; System
132 |
133 | (defn build-system [config jdbc-url]
134 | (system-map
135 | :tsv-publisher (TSVPublisher. config)
136 | :s3-change-poller (S3ChangePoller. config)
137 | :latest-timestamp-poller (LatestTimestampPoller. jdbc-url)))
138 |
139 | ;; Ensuring manifest file is present
140 |
141 | (defn- manifest-as-string
142 | [jdbc-url]
143 | (-> {:table "demo"
144 | :pk-columns ["uuid"]
145 | :columns ["uuid" "key" "timestamp"]
146 | :options ["DELIMITER '\t'"
147 | "COMPUPDATE OFF"
148 | "STATUPDATE ON"
149 | "ESCAPE"
150 | "TRIMBLANKS"
151 | "EMPTYASNULL"
152 | "BLANKSASNULL"
153 | "FILLRECORD"
154 | "TRUNCATECOLUMNS"
155 | "ROUNDEC"
156 | "MAXERROR 1000"
157 | "TIMEFORMAT 'epochmillisecs'"]
158 | :data-pattern ".*tsv$"}
159 | (assoc :jdbc-url jdbc-url)
160 | str))
161 |
162 | (defn- ensure-manifest!
163 | [config jdbc-url]
164 | (put-object (-> config :s3 :credentials)
165 | (-> config :s3 :bucket)
166 | (str (-> config :s3 :key-pattern key-pattern->prefix) "folder/manifest.edn")
167 | (manifest-as-string jdbc-url)))
168 |
169 | ;; Ensure demo table is present
170 |
171 | (defn- drop-demo-table!
172 | [jdbc-url]
173 | (let [c (get-connection jdbc-url)
174 | s (.createStatement c)]
175 | (.executeUpdate s "DROP TABLE IF EXISTS demo")
176 | (.commit c)))
177 |
178 | (defn- create-demo-table!
179 | [jdbc-url]
180 | (let [c (get-connection jdbc-url)
181 | s (.createStatement c)]
182 | (.executeUpdate s "
183 | CREATE TABLE demo (
184 | uuid varchar(40) NOT NULL encode lzo,
185 | key varchar(128) NOT NULL encode lzo,
186 | timestamp datetime NOT NULL encode delta32k,
187 |
188 | PRIMARY KEY (uuid)
189 | )")
190 | (.commit c)))
191 |
192 | ;; Main entry-point
193 |
194 | (defn main
195 | [config jdbc-url]
196 | (info "Ensuring manifest.edn present in S3 bucket")
197 | (ensure-manifest! config jdbc-url)
198 | (info "Dropping demo table if exists")
199 | (drop-demo-table! jdbc-url)
200 | (info "Creating demo table")
201 | (create-demo-table! jdbc-url)
202 | (build-system config jdbc-url))
203 |
204 | ;; ... and from the command line
205 |
206 | (def cli-options
207 | [["-c" "--config CONFIG" "Path to EDN configuration file"
208 | :default "../etc/config.edn"
209 | :validate [string?]]
210 | ["-j" "--jdbc-url JDBC-URL" "Path to jdbc-url txt file"
211 | :default "./jdbc-url.txt"
212 | :validate [string?]]
213 | ["-h" "--help"]])
214 |
215 | (defn wait! []
216 | (let [s (java.util.concurrent.Semaphore. 0)]
217 | (.acquire s)))
218 |
219 | (defn -main [& args]
220 | (let [{:keys [options summary]} (parse-opts args cli-options)]
221 | (when (:help options)
222 | (println summary)
223 | (System/exit 0))
224 | (let [{:keys [config jdbc-url]} options]
225 | (info "Starting demo")
226 | (let [system (main (read-string (slurp config))
227 | (clojure.string/trim (slurp jdbc-url)))]
228 | (start system)
229 | (wait!)))))
230 |
--------------------------------------------------------------------------------
/src/uswitch/blueshift/redshift.clj:
--------------------------------------------------------------------------------
1 | (ns uswitch.blueshift.redshift
2 | (:require [aws.sdk.s3 :refer (put-object delete-object)]
3 | [cheshire.core :refer (generate-string)]
4 | [clojure.tools.logging :refer (info error debug)]
5 | [clojure.string :as s]
6 | [com.stuartsierra.component :refer (system-map Lifecycle using)]
7 | [clojure.core.async :refer (chan !! close! thread)]
8 | [uswitch.blueshift.util :refer (close-channels)]
9 | [metrics.meters :refer (mark! meter)]
10 | [metrics.counters :refer (inc! dec! counter)]
11 | [metrics.timers :refer (timer time!)])
12 | (:import [java.util UUID]
13 | [java.sql DriverManager SQLException]))
14 |
15 |
16 | (defn manifest [bucket files]
17 | {:entries (for [f files] {:url (str "s3://" bucket "/" f)
18 | :mandatory true})})
19 |
20 | (defn put-manifest
21 | "Uploads the manifest to S3 as JSON, returns the URL to the uploaded object.
22 | Manifest should be generated with uswitch.blueshift.redshift/manifest."
23 | [credentials bucket manifest]
24 | (let [file-name (str (UUID/randomUUID) ".manifest")
25 | s3-url (str "s3://" bucket "/" file-name)]
26 | (put-object credentials bucket file-name (generate-string manifest))
27 | {:key file-name
28 | :url s3-url}))
29 |
30 | (def redshift-imports (meter [(str *ns*) "redshift-imports" "imports"]))
31 | (def redshift-import-rollbacks (meter [(str *ns*) "redshift-imports" "rollbacks"]))
32 | (def redshift-import-commits (meter [(str *ns*) "redshift-imports" "commits"]))
33 |
34 | ;; pgsql driver isn't loaded automatically from classpath
35 | (Class/forName "org.postgresql.Driver")
36 |
37 | (defn connection [jdbc-url]
38 | (doto (DriverManager/getConnection jdbc-url)
39 | (.setAutoCommit false)))
40 |
41 | (def ^{:dynamic true} *current-connection* nil)
42 |
43 | (defn prepare-statement
44 | [sql]
45 | (.prepareStatement *current-connection* sql))
46 |
47 | (defmacro with-connection [jdbc-url & body]
48 | `(binding [*current-connection* (connection ~jdbc-url)]
49 | (try ~@body
50 | (debug "COMMIT")
51 | (.commit *current-connection*)
52 | (mark! redshift-import-commits)
53 | (catch SQLException e#
54 | (error e# "ROLLBACK")
55 | (mark! redshift-import-rollbacks)
56 | (.rollback *current-connection*)
57 | (throw e#))
58 | (finally
59 | (when-not (.isClosed *current-connection*)
60 | (.close *current-connection*))))))
61 |
62 |
63 | (defn create-staging-table-stmt [target-table staging-table]
64 | (prepare-statement (format "CREATE TEMPORARY TABLE %s (LIKE %s INCLUDING DEFAULTS)"
65 | staging-table
66 | target-table)))
67 |
68 | (defn copy-from-s3-stmt [table manifest-url {:keys [access-key secret-key] :as creds} {:keys [columns options] :as table-manifest}]
69 | (prepare-statement (format "COPY %s (%s) FROM '%s' CREDENTIALS 'aws_access_key_id=%s;aws_secret_access_key=%s' %s manifest"
70 | table
71 | (s/join "," columns)
72 | manifest-url
73 | access-key
74 | secret-key
75 | (s/join " " options))))
76 |
77 | (defn truncate-table-stmt [target-table]
78 | (prepare-statement (format "truncate table %s" target-table)))
79 |
80 |
81 | (defn delete-in-query [target-table staging-table key]
82 | (format "DELETE FROM %s WHERE %s IN (SELECT %s FROM %s)" target-table key key staging-table))
83 |
84 | (defn delete-join-query
85 | [target-table staging-table keys]
86 | (let [where (s/join " AND " (for [pk keys] (str target-table "." pk "=" staging-table "." pk)))]
87 | (format "DELETE FROM %s USING %s WHERE %s" target-table staging-table where)))
88 |
89 | (defn delete-target-query
90 | "Attempts to optimise delete strategy based on keys arity. With single primary keys
91 | its significantly faster to delete."
92 | [target-table staging-table keys]
93 | (cond (= 1 (count keys)) (delete-in-query target-table staging-table (first keys))
94 | :default (delete-join-query target-table staging-table keys)))
95 |
96 | (defn delete-target-stmt
97 | "Deletes rows, with the same primary key value(s), from target-table that will be
98 | overwritten by values in staging-table."
99 | [target-table staging-table keys]
100 | (prepare-statement (delete-target-query target-table staging-table keys)))
101 |
102 | (defn staging-select-statement [{:keys [staging-select] :as table-manifest} staging-table]
103 | (cond
104 | (string? staging-select) (s/replace staging-select #"\{\{table\}\}" staging-table)
105 | (= :distinct staging-select) (format "SELECT DISTINCT * FROM %s" staging-table)
106 | :default (format "SELECT * FROM %s" staging-table)))
107 |
108 | (defn insert-from-staging-stmt [target-table staging-table table-manifest]
109 | (let [select-statement (staging-select-statement table-manifest staging-table)]
110 | (prepare-statement (format "INSERT INTO %s %s" target-table select-statement))))
111 |
112 | (defn append-from-staging-stmt [target-table staging-table keys]
113 | (let [join-columns (s/join " AND " (map #(str "s." % " = t." %) keys))
114 | where-clauses (s/join " AND " (map #(str "t." % " IS NULL") keys))]
115 | (prepare-statement (format "INSERT INTO %s SELECT s.* FROM %s s LEFT JOIN %s t ON %s WHERE %s"
116 | target-table staging-table target-table join-columns where-clauses))))
117 |
118 | (defn drop-table-stmt [table]
119 | (prepare-statement (format "DROP TABLE %s" table)))
120 |
121 | (defn- aws-censor
122 | [s]
123 | (-> s
124 | (clojure.string/replace #"aws_access_key_id=[^;]*" "aws_access_key_id=***")
125 | (clojure.string/replace #"aws_secret_access_key=[^;]*" "aws_secret_access_key=***")))
126 |
127 | (defn execute [& statements]
128 | (doseq [statement statements]
129 | (debug (aws-censor (.toString statement)))
130 | (try (.execute statement)
131 | (catch SQLException e
132 | (error "Error executing statement:" (.toString statement))
133 | (throw e)))))
134 |
135 | (defn merge-table [credentials redshift-manifest-url {:keys [table jdbc-url pk-columns strategy] :as table-manifest}]
136 | (let [staging-table (str table "_staging")]
137 | (mark! redshift-imports)
138 | (with-connection jdbc-url
139 | (execute (create-staging-table-stmt table staging-table)
140 | (copy-from-s3-stmt staging-table redshift-manifest-url credentials table-manifest)
141 | (delete-target-stmt table staging-table pk-columns)
142 | (insert-from-staging-stmt table staging-table table-manifest)
143 | (drop-table-stmt staging-table)))))
144 |
145 | (defn replace-table [credentials redshift-manifest-url {:keys [table jdbc-url pk-columns strategy] :as table-manifest}]
146 | (mark! redshift-imports)
147 | (with-connection jdbc-url
148 | (execute (truncate-table-stmt table)
149 | (copy-from-s3-stmt table redshift-manifest-url credentials table-manifest))))
150 |
151 | (defn append-table [credentials redshift-manifest-url {:keys [table jdbc-url pk-columns strategy] :as table-manifest}]
152 | (let [staging-table (str table "_staging")]
153 | (mark! redshift-imports)
154 | (with-connection jdbc-url
155 | (execute (create-staging-table-stmt table staging-table)
156 | (copy-from-s3-stmt staging-table redshift-manifest-url credentials table-manifest)
157 | (append-from-staging-stmt table staging-table pk-columns)
158 | (drop-table-stmt staging-table)))))
159 |
160 | (defn load-table [credentials redshift-manifest-url {strategy :strategy :as table-manifest}]
161 | (case (keyword strategy)
162 | :merge (merge-table credentials redshift-manifest-url table-manifest)
163 | :replace (replace-table credentials redshift-manifest-url table-manifest)
164 | :append (append-table credentials redshift-manifest-url table-manifest)))
165 |
--------------------------------------------------------------------------------
/src/uswitch/blueshift/s3.clj:
--------------------------------------------------------------------------------
1 | (ns uswitch.blueshift.s3
2 | (:require [com.stuartsierra.component :refer (Lifecycle system-map using start stop)]
3 | [clojure.tools.logging :refer (info error warn debug errorf)]
4 | [aws.sdk.s3 :refer (list-objects get-object delete-object)]
5 | [clojure.set :refer (difference)]
6 | [clojure.core.async :refer (go-loop thread put! chan >!! ! (read-edn content)
78 | (map->Manifest)
79 | (assoc-if-nil :strategy "merge")
80 | (update-in [:data-pattern] re-pattern))))))
81 |
82 | (defn- step-scan
83 | [credentials bucket directory]
84 | (try
85 | (let [fs (files credentials bucket directory)]
86 | (if-let [manifest (manifest credentials bucket fs)]
87 | (do
88 | (validate manifest)
89 | (let [data-files (filter (fn [{:keys [key]}]
90 | (re-matches (:data-pattern manifest) key))
91 | fs)]
92 | (if (seq data-files)
93 | (do
94 | (info "Watcher triggering import" (:table manifest))
95 | (debug "Triggering load:" load)
96 | {:state :load, :table-manifest manifest, :files (map :key data-files)})
97 | {:state :scan, :pause? true})))
98 | {:state :scan, :pause? true}))
99 | (catch clojure.lang.ExceptionInfo e
100 | (error e "Error with manifest file")
101 | {:state :scan, :pause? true})
102 | (catch ConnectionPoolTimeoutException e
103 | (warn e "Connection timed out. Will re-try.")
104 | {:state :scan, :pause? true})
105 | (catch Exception e
106 | (error e "Failed reading content of" (str bucket "/" directory))
107 | {:state :scan, :pause? true})))
108 |
109 | (def importing-files (counter [(str *ns*) "importing-files" "files"]))
110 | (def import-timer (timer [(str *ns*) "importing-files" "time"]))
111 |
112 | (defn- step-load
113 | [credentials bucket table-manifest files]
114 | (let [redshift-manifest (redshift/manifest bucket files)
115 | {:keys [key url]} (redshift/put-manifest credentials bucket redshift-manifest)]
116 |
117 | (info "Importing" (count files) "data files to table" (:table table-manifest) "from manifest" url)
118 | (debug "Importing Redshift Manifest" redshift-manifest)
119 | (inc! importing-files (count files))
120 | (try (time! import-timer
121 | (redshift/load-table credentials url table-manifest))
122 | (info "Successfully imported" (count files) "files")
123 | (delete-object credentials bucket key)
124 | (dec! importing-files (count files))
125 | {:state :delete
126 | :files files}
127 | (catch java.sql.SQLException e
128 | (error e "Error loading into" (:table table-manifest))
129 | (error (:table table-manifest) "Redshift manifest content:" redshift-manifest)
130 | (delete-object credentials bucket key)
131 | (dec! importing-files (count files))
132 | {:state :scan
133 | :pause? true}))))
134 |
135 | (defn- step-delete
136 | [credentials bucket files]
137 | (do
138 | (doseq [key files]
139 | (info "Deleting" (str "s3://" bucket "/" key))
140 | (try
141 | (delete-object credentials bucket key)
142 | (catch Exception e
143 | (warn "Couldn't delete" key " - ignoring"))))
144 | {:state :scan, :pause? true}))
145 |
146 | (defn- progress
147 | [{:keys [state] :as world}
148 | {:keys [credentials bucket directory] :as configuration}]
149 | (case state
150 | :scan (step-scan credentials bucket directory )
151 | :load (step-load credentials bucket (:table-manifest world) (:files world))
152 | :delete (step-delete credentials bucket (:files world))))
153 |
154 | (defrecord KeyWatcher [credentials bucket directory poll-interval-seconds]
155 | Lifecycle
156 | (start [this]
157 | (info "Starting KeyWatcher for" (str bucket "/" directory) "polling every" poll-interval-seconds "seconds")
158 | (let [control-ch (chan)
159 | configuration {:credentials credentials :bucket bucket :directory directory}]
160 | (thread
161 | (loop [timer (timeout (* poll-interval-seconds 1000))
162 | world {:state :scan}]
163 | (let [next-world (progress world configuration)]
164 | (if (:pause? next-world)
165 | (let [[_ c] (alts!! [control-ch timer])]
166 | (when (not= c control-ch)
167 | (recur (timeout (* poll-interval-seconds 1000)) next-world)))
168 | (recur timer next-world)))))
169 | (assoc this :watcher-control-ch control-ch)))
170 | (stop [this]
171 | (info "Stopping KeyWatcher for" (str bucket "/" directory))
172 | (close-channels this :watcher-control-ch)))
173 |
174 |
175 | (defn spawn-key-watcher! [credentials bucket directory poll-interval-seconds]
176 | (start (KeyWatcher. credentials bucket directory poll-interval-seconds)))
177 |
178 | (def directories-watched (counter [(str *ns*) "directories-watched" "directories"]))
179 |
180 | (defrecord KeyWatcherSpawner [bucket-watcher poll-interval-seconds]
181 | Lifecycle
182 | (start [this]
183 | (info "Starting KeyWatcherSpawner")
184 | (let [{:keys [new-directories-ch bucket credentials]} bucket-watcher
185 | watchers (atom nil)]
186 | (go-loop [dirs (KeyWatcherSpawner {:poll-interval-seconds (-> config :s3 :poll-interval :seconds)}))
204 |
205 | (defn matching-directories [credentials bucket key-pattern]
206 | (try (->> (leaf-directories credentials bucket)
207 | (filter #(re-matches key-pattern %))
208 | (set))
209 | (catch Exception e
210 | (errorf e "Error checking for matching object keys in \"%s\"" bucket)
211 | #{})))
212 |
213 | (defrecord BucketWatcher [credentials bucket key-pattern poll-interval-seconds]
214 | Lifecycle
215 | (start [this]
216 | (info "Starting BucketWatcher. Polling" bucket "every" poll-interval-seconds "seconds for keys matching" key-pattern)
217 | (let [new-directories-ch (chan)
218 | control-ch (chan)]
219 | (thread
220 | (loop [dirs nil]
221 | (let [available-dirs (matching-directories credentials bucket key-pattern)
222 | new-dirs (difference available-dirs dirs)]
223 | (when (seq new-dirs)
224 | (info "New directories:" new-dirs "spawning" (count new-dirs) "watchers")
225 | (>!! new-directories-ch new-dirs))
226 | (let [[v c] (alts!! [(timeout (* 1000 poll-interval-seconds)) control-ch])]
227 | (when-not (= c control-ch)
228 | (recur available-dirs))))))
229 | (assoc this :control-ch control-ch :new-directories-ch new-directories-ch)))
230 | (stop [this]
231 | (info "Stopping BucketWatcher")
232 | (close-channels this :control-ch :new-directories-ch)))
233 |
234 | (defn bucket-watcher
235 | "Creates a process watching for objects in S3 buckets."
236 | [config]
237 | (map->BucketWatcher {:credentials (-> config :s3 :credentials)
238 | :bucket (-> config :s3 :bucket)
239 | :poll-interval-seconds (-> config :s3 :poll-interval :seconds)
240 | :key-pattern (or (re-pattern (-> config :s3 :key-pattern))
241 | #".*")}))
242 |
243 | (defrecord PrintSink [prefix chan-k component]
244 | Lifecycle
245 | (start [this]
246 | (let [ch (get component chan-k)]
247 | (go-loop [msg (PrintSink {:prefix prefix :chan-k chan-k}))
258 |
259 | (defn s3-system [config]
260 | (system-map :bucket-watcher (bucket-watcher config)
261 | :key-watcher-spawner (using (key-watcher-spawner config)
262 | [:bucket-watcher])))
263 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
4 |
5 | 1. DEFINITIONS
6 |
7 | "Contribution" means:
8 |
9 | a) in the case of the initial Contributor, the initial code and
10 | documentation distributed under this Agreement, and
11 |
12 | b) in the case of each subsequent Contributor:
13 |
14 | i) changes to the Program, and
15 |
16 | ii) additions to the Program;
17 |
18 | where such changes and/or additions to the Program originate from and are
19 | distributed by that particular Contributor. A Contribution 'originates' from
20 | a Contributor if it was added to the Program by such Contributor itself or
21 | anyone acting on such Contributor's behalf. Contributions do not include
22 | additions to the Program which: (i) are separate modules of software
23 | distributed in conjunction with the Program under their own license
24 | agreement, and (ii) are not derivative works of the Program.
25 |
26 | "Contributor" means any person or entity that distributes the Program.
27 |
28 | "Licensed Patents" mean patent claims licensable by a Contributor which are
29 | necessarily infringed by the use or sale of its Contribution alone or when
30 | combined with the Program.
31 |
32 | "Program" means the Contributions distributed in accordance with this
33 | Agreement.
34 |
35 | "Recipient" means anyone who receives the Program under this Agreement,
36 | including all Contributors.
37 |
38 | 2. GRANT OF RIGHTS
39 |
40 | a) Subject to the terms of this Agreement, each Contributor hereby grants
41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to
42 | reproduce, prepare derivative works of, publicly display, publicly perform,
43 | distribute and sublicense the Contribution of such Contributor, if any, and
44 | such derivative works, in source code and object code form.
45 |
46 | b) Subject to the terms of this Agreement, each Contributor hereby grants
47 | Recipient a non-exclusive, worldwide, royalty-free patent license under
48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise
49 | transfer the Contribution of such Contributor, if any, in source code and
50 | object code form. This patent license shall apply to the combination of the
51 | Contribution and the Program if, at the time the Contribution is added by the
52 | Contributor, such addition of the Contribution causes such combination to be
53 | covered by the Licensed Patents. The patent license shall not apply to any
54 | other combinations which include the Contribution. No hardware per se is
55 | licensed hereunder.
56 |
57 | c) Recipient understands that although each Contributor grants the licenses
58 | to its Contributions set forth herein, no assurances are provided by any
59 | Contributor that the Program does not infringe the patent or other
60 | intellectual property rights of any other entity. Each Contributor disclaims
61 | any liability to Recipient for claims brought by any other entity based on
62 | infringement of intellectual property rights or otherwise. As a condition to
63 | exercising the rights and licenses granted hereunder, each Recipient hereby
64 | assumes sole responsibility to secure any other intellectual property rights
65 | needed, if any. For example, if a third party patent license is required to
66 | allow Recipient to distribute the Program, it is Recipient's responsibility
67 | to acquire that license before distributing the Program.
68 |
69 | d) Each Contributor represents that to its knowledge it has sufficient
70 | copyright rights in its Contribution, if any, to grant the copyright license
71 | set forth in this Agreement.
72 |
73 | 3. REQUIREMENTS
74 |
75 | A Contributor may choose to distribute the Program in object code form under
76 | its own license agreement, provided that:
77 |
78 | a) it complies with the terms and conditions of this Agreement; and
79 |
80 | b) its license agreement:
81 |
82 | i) effectively disclaims on behalf of all Contributors all warranties and
83 | conditions, express and implied, including warranties or conditions of title
84 | and non-infringement, and implied warranties or conditions of merchantability
85 | and fitness for a particular purpose;
86 |
87 | ii) effectively excludes on behalf of all Contributors all liability for
88 | damages, including direct, indirect, special, incidental and consequential
89 | damages, such as lost profits;
90 |
91 | iii) states that any provisions which differ from this Agreement are offered
92 | by that Contributor alone and not by any other party; and
93 |
94 | iv) states that source code for the Program is available from such
95 | Contributor, and informs licensees how to obtain it in a reasonable manner on
96 | or through a medium customarily used for software exchange.
97 |
98 | When the Program is made available in source code form:
99 |
100 | a) it must be made available under this Agreement; and
101 |
102 | b) a copy of this Agreement must be included with each copy of the Program.
103 |
104 | Contributors may not remove or alter any copyright notices contained within
105 | the Program.
106 |
107 | Each Contributor must identify itself as the originator of its Contribution,
108 | if any, in a manner that reasonably allows subsequent Recipients to identify
109 | the originator of the Contribution.
110 |
111 | 4. COMMERCIAL DISTRIBUTION
112 |
113 | Commercial distributors of software may accept certain responsibilities with
114 | respect to end users, business partners and the like. While this license is
115 | intended to facilitate the commercial use of the Program, the Contributor who
116 | includes the Program in a commercial product offering should do so in a
117 | manner which does not create potential liability for other Contributors.
118 | Therefore, if a Contributor includes the Program in a commercial product
119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend
120 | and indemnify every other Contributor ("Indemnified Contributor") against any
121 | losses, damages and costs (collectively "Losses") arising from claims,
122 | lawsuits and other legal actions brought by a third party against the
123 | Indemnified Contributor to the extent caused by the acts or omissions of such
124 | Commercial Contributor in connection with its distribution of the Program in
125 | a commercial product offering. The obligations in this section do not apply
126 | to any claims or Losses relating to any actual or alleged intellectual
127 | property infringement. In order to qualify, an Indemnified Contributor must:
128 | a) promptly notify the Commercial Contributor in writing of such claim, and
129 | b) allow the Commercial Contributor tocontrol, and cooperate with the
130 | Commercial Contributor in, the defense and any related settlement
131 | negotiations. The Indemnified Contributor may participate in any such claim
132 | at its own expense.
133 |
134 | For example, a Contributor might include the Program in a commercial product
135 | offering, Product X. That Contributor is then a Commercial Contributor. If
136 | that Commercial Contributor then makes performance claims, or offers
137 | warranties related to Product X, those performance claims and warranties are
138 | such Commercial Contributor's responsibility alone. Under this section, the
139 | Commercial Contributor would have to defend claims against the other
140 | Contributors related to those performance claims and warranties, and if a
141 | court requires any other Contributor to pay any damages as a result, the
142 | Commercial Contributor must pay those damages.
143 |
144 | 5. NO WARRANTY
145 |
146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON
147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER
148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR
149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A
150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the
151 | appropriateness of using and distributing the Program and assumes all risks
152 | associated with its exercise of rights under this Agreement , including but
153 | not limited to the risks and costs of program errors, compliance with
154 | applicable laws, damage to or loss of data, programs or equipment, and
155 | unavailability or interruption of operations.
156 |
157 | 6. DISCLAIMER OF LIABILITY
158 |
159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION
162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY
166 | OF SUCH DAMAGES.
167 |
168 | 7. GENERAL
169 |
170 | If any provision of this Agreement is invalid or unenforceable under
171 | applicable law, it shall not affect the validity or enforceability of the
172 | remainder of the terms of this Agreement, and without further action by the
173 | parties hereto, such provision shall be reformed to the minimum extent
174 | necessary to make such provision valid and enforceable.
175 |
176 | If Recipient institutes patent litigation against any entity (including a
177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself
178 | (excluding combinations of the Program with other software or hardware)
179 | infringes such Recipient's patent(s), then such Recipient's rights granted
180 | under Section 2(b) shall terminate as of the date such litigation is filed.
181 |
182 | All Recipient's rights under this Agreement shall terminate if it fails to
183 | comply with any of the material terms or conditions of this Agreement and
184 | does not cure such failure in a reasonable period of time after becoming
185 | aware of such noncompliance. If all Recipient's rights under this Agreement
186 | terminate, Recipient agrees to cease use and distribution of the Program as
187 | soon as reasonably practicable. However, Recipient's obligations under this
188 | Agreement and any licenses granted by Recipient relating to the Program shall
189 | continue and survive.
190 |
191 | Everyone is permitted to copy and distribute copies of this Agreement, but in
192 | order to avoid inconsistency the Agreement is copyrighted and may only be
193 | modified in the following manner. The Agreement Steward reserves the right to
194 | publish new versions (including revisions) of this Agreement from time to
195 | time. No one other than the Agreement Steward has the right to modify this
196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The
197 | Eclipse Foundation may assign the responsibility to serve as the Agreement
198 | Steward to a suitable separate entity. Each new version of the Agreement will
199 | be given a distinguishing version number. The Program (including
200 | Contributions) may always be distributed subject to the version of the
201 | Agreement under which it was received. In addition, after a new version of
202 | the Agreement is published, Contributor may elect to distribute the Program
203 | (including its Contributions) under the new version. Except as expressly
204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or
205 | licenses to the intellectual property of any Contributor under this
206 | Agreement, whether expressly, by implication, estoppel or otherwise. All
207 | rights in the Program not expressly granted under this Agreement are
208 | reserved.
209 |
210 | This Agreement is governed by the laws of the State of Washington and the
211 | intellectual property laws of the United States of America. No party to this
212 | Agreement will bring a legal action under this Agreement more than one year
213 | after the cause of action arose. Each party waives its rights to a jury trial
214 | in any resulting litigation.
215 |
--------------------------------------------------------------------------------