├── .lein-classpath
├── test
├── .gitignore
├── data
│ ├── acl2.xml
│ ├── cors1.xml
│ ├── acl3.xml
│ ├── acl1.xml
│ ├── acl5.xml
│ ├── form-upload1.txt
│ ├── form-upload2.txt
│ └── acl4.xml
└── io
│ └── pithos
│ ├── reporter_test.clj
│ ├── cors_test.clj
│ ├── acl_test.clj
│ ├── util_test.clj
│ ├── sig_test.clj
│ └── meta_test.clj
├── vagrant
├── .gitignore
├── manifests
│ └── store.pp
├── common.yaml
├── modules
│ ├── cassandra
│ │ ├── templates
│ │ │ ├── cassandra.env.erb
│ │ │ └── cassandra.yaml.erb
│ │ └── manifests
│ │ │ └── init.pp
│ ├── base
│ │ └── manifests
│ │ │ └── init.pp
│ └── pithos
│ │ └── manifests
│ │ └── init.pp
├── hiera.yaml
└── Vagrantfile
├── doc
├── .gitignore
├── s3cmd.cfg
├── pithos.yaml
├── source
│ ├── index.rst
│ ├── quickstart.rst
│ ├── developer.rst
│ ├── concepts.rst
│ └── clients.rst
└── Makefile
├── .dockerignore
├── pkg
├── deb
│ ├── postrm.sh
│ ├── prerm.sh
│ ├── pithos.default
│ ├── pithos
│ ├── preinst.sh
│ ├── postinst.sh
│ └── init.sh
├── rpm
│ ├── prerm.sh
│ ├── pithos-default
│ ├── pithos
│ ├── postinst.sh
│ └── init.sh
└── tar
│ └── pithos
├── .travis.yml
├── docker
├── pithos
│ ├── pithos.yaml.toml
│ ├── docker-entrypoint.sh
│ ├── pithos.yaml.tmpl
│ └── Dockerfile
└── nginx
│ ├── Dockerfile
│ ├── entrypoint.sh
│ └── nginx.conf
├── .gitignore
├── src
└── io
│ ├── pithos
│ ├── reporter.clj
│ ├── keystore.clj
│ ├── system.clj
│ ├── schema.clj
│ ├── api.clj
│ ├── response.clj
│ ├── store.clj
│ ├── acl.clj
│ ├── sig.clj
│ ├── util.clj
│ ├── cors.clj
│ ├── bucket.clj
│ ├── config.clj
│ ├── blob.clj
│ ├── desc.clj
│ ├── perms.clj
│ ├── sig4.clj
│ ├── stream.clj
│ └── request.clj
│ └── pithos.clj
├── resources
└── logback.xml
├── LICENSE
├── docker-compose.yml
├── project.clj
├── README.md
└── tasks
└── leiningen
├── tar.clj
├── fatdeb.clj
└── fatrpm.clj
/.lein-classpath:
--------------------------------------------------------------------------------
1 | :tasks
2 |
--------------------------------------------------------------------------------
/test/.gitignore:
--------------------------------------------------------------------------------
1 | shell/
2 |
--------------------------------------------------------------------------------
/test/data/acl2.xml:
--------------------------------------------------------------------------------
1 | foo fa fi
2 |
--------------------------------------------------------------------------------
/vagrant/.gitignore:
--------------------------------------------------------------------------------
1 | .vagrant
2 |
--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
1 | *.png
2 | *.html
3 |
--------------------------------------------------------------------------------
/vagrant/manifests/store.pp:
--------------------------------------------------------------------------------
1 | include cassandra
2 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git/
2 | .m2/
3 | pithos-quickstart/
4 | target/
5 |
--------------------------------------------------------------------------------
/pkg/deb/postrm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e
3 | if [ "$1" = "purge" ] ; then
4 | update-rc.d pithos remove >/dev/null
5 | fi
6 |
--------------------------------------------------------------------------------
/pkg/rpm/prerm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e
3 | if [ -x "/etc/init.d/pithos" ]; then
4 | service pithos stop || exit $?
5 | fi
6 |
--------------------------------------------------------------------------------
/vagrant/common.yaml:
--------------------------------------------------------------------------------
1 | motd: 'puppet managed host with hiera support'
2 | cassandra_heap_size: "8G"
3 | cassandra_heap_new: "800m"
4 |
--------------------------------------------------------------------------------
/pkg/deb/prerm.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e
3 | if [ -x "/etc/init.d/pithos" ]; then
4 | invoke-rc.d pithos stop || exit $?
5 | fi
6 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | language: clojure
3 | lein: lein
4 | jdk:
5 | - oraclejdk8
6 | branches:
7 | except:
8 | - gh-pages
9 |
--------------------------------------------------------------------------------
/vagrant/modules/cassandra/templates/cassandra.env.erb:
--------------------------------------------------------------------------------
1 | MAX_HEAP_SIZE="<%= @cassandra_heap_size %>"
2 | HEAP_NEWSIZE="<%= @cassandra_heap_new %>"
3 |
--------------------------------------------------------------------------------
/vagrant/hiera.yaml:
--------------------------------------------------------------------------------
1 | ---
2 | :backend:
3 | - yaml
4 | :hierarchy:
5 | - '%{hostname}'
6 | - common
7 | :yaml:
8 | :datadir: '/vagrant/configstore'
--------------------------------------------------------------------------------
/pkg/deb/pithos.default:
--------------------------------------------------------------------------------
1 | # Optionally add classes to the classpath for additional functionality
2 | # EXTRA_CLASSPATH=
3 |
4 | # Optional JAVA_OPTS
5 | # EXTRA_JAVA_OPTS=
6 |
--------------------------------------------------------------------------------
/pkg/rpm/pithos-default:
--------------------------------------------------------------------------------
1 | # Optionally add classes to the classpath for additional functionality
2 | # EXTRA_CLASSPATH=
3 |
4 | # Optional JAVA_OPTS
5 | # EXTRA_JAVA_OPTS=
6 |
--------------------------------------------------------------------------------
/docker/pithos/pithos.yaml.toml:
--------------------------------------------------------------------------------
1 | [template]
2 | src = "pithos.yaml.tmpl"
3 | dest = "/etc/pithos/pithos.yaml"
4 | keys = [
5 | "/pithos/service/uri",
6 | "/pithos/cassandra/host",
7 | ]
8 |
--------------------------------------------------------------------------------
/vagrant/modules/base/manifests/init.pp:
--------------------------------------------------------------------------------
1 | class base {
2 |
3 | $motd_content = hiera('motd', "no motd set")
4 |
5 | file { '/etc/motd':
6 | content => "${motd_content}\n"
7 | }
8 |
9 | }
10 |
--------------------------------------------------------------------------------
/pkg/deb/pithos:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -f /etc/default/pithos ]; then
4 | . /etc/default/pithos
5 | fi
6 |
7 | JAR="$EXTRA_CLASSPATH:/usr/lib/pithos/pithos.jar"
8 | CONFIG="/etc/pithos/pithos.yaml"
9 |
10 | exec java $EXTRA_JAVA_OPTS $OPTS -cp "$JAR" io.pithos -f "$CONFIG" $*
11 |
--------------------------------------------------------------------------------
/pkg/rpm/pithos:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -f /etc/default/pithos ]; then
4 | . /etc/default/pithos
5 | fi
6 |
7 | JAR="$EXTRA_CLASSPATH:/usr/lib/pithos/pithos.jar"
8 | CONFIG="/etc/pithos/pithos.yaml"
9 |
10 | exec java $EXTRA_JAVA_OPTS $OPTS -cp "$JAR" io.pithos -f "$CONFIG" $*
11 |
--------------------------------------------------------------------------------
/pkg/tar/pithos:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -f /etc/default/pithos ]; then
4 | . /etc/default/pithos
5 | fi
6 |
7 | JAR="$EXTRA_CLASSPATH:/usr/lib/pithos/pithos.jar"
8 | CONFIG="/etc/pithos/pithos.yaml"
9 |
10 | exec java $EXTRA_JAVA_OPTS $OPTS -cp "$JAR" io.pithos -f "$CONFIG" $*
11 |
--------------------------------------------------------------------------------
/docker/nginx/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM nginx:latest
2 | RUN apt-get update && apt-get install -y openssl
3 | RUN mkdir -p /etc/nginx/external
4 | ADD docker/nginx/nginx.conf /etc/nginx/nginx.conf
5 | ADD docker/nginx/entrypoint.sh /docker-entrypoint
6 | ENTRYPOINT ["/docker-entrypoint"]
7 | CMD ["nginx"]
8 |
--------------------------------------------------------------------------------
/test/data/cors1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | http://*.example.com
4 | GET
5 | *
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .nrepl-port
2 | hs_err_pid*
3 | /doc/build
4 | /site
5 | /target
6 | /lib
7 | /docs
8 | /classes
9 | /checkouts
10 | pom.xml
11 | pom.xml.asc
12 | *.jar
13 | *.class
14 | .lein-deps-sum
15 | .lein-failures
16 | .lein-plugins
17 | .lein-repl-history
18 | .m2
19 | .sass-cache
20 | _site
21 | img
22 | node_modules
23 |
--------------------------------------------------------------------------------
/doc/s3cmd.cfg:
--------------------------------------------------------------------------------
1 | [default]
2 | host_base = s3.example.com
3 | host_bucket = %(bucket)s.s3.example.com
4 | access_key = AKIAIOSFODNN7EXAMPLE
5 | secret_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
6 | signature_v2 = True
7 | # Remove those lines when not running locally:
8 | use_https = False
9 | proxy_host = localhost
10 | proxy_port = 8080
11 |
--------------------------------------------------------------------------------
/pkg/deb/preinst.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Create pithos user and group
3 | set -e
4 |
5 | USERNAME="pithos"
6 | GROUPNAME="pithos"
7 | getent group "$GROUPNAME" >/dev/null || groupadd -r "$GROUPNAME"
8 | getent passwd "$USERNAME" >/dev/null || \
9 | useradd -r -g "$GROUPNAME" -d /usr/lib/pithos -s /bin/false \
10 | -c "Pithos object store" "$USERNAME"
11 | exit 0
12 |
--------------------------------------------------------------------------------
/pkg/rpm/postinst.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Fakeroot and lein don't get along, so we set ownership after the fact.
3 | set -e
4 |
5 | chown -R root:root /usr/lib/pithos
6 | chown root:root /usr/bin/pithos
7 | chown pithos:pithos /var/log/pithos
8 | chown pithos:pithos /etc/pithos/pithos.yaml
9 | chown root:root /etc/init.d/pithos
10 |
11 | if [ -x "/etc/init.d/pithos" ]; then
12 | service pithos start || exit $?
13 | fi
14 |
--------------------------------------------------------------------------------
/pkg/deb/postinst.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Fakeroot and lein don't get along, so we set ownership after the fact.
3 | set -e
4 |
5 | chown -R root:root /usr/lib/pithos
6 | chown root:root /usr/bin/pithos
7 | chown pithos:pithos /var/log/pithos
8 | chown pithos:pithos /etc/pithos/pithos.yaml
9 | chown root:root /etc/init.d/pithos
10 |
11 | if [ -x "/etc/init.d/pithos" ]; then
12 | invoke-rc.d pithos start || exit $?
13 | fi
14 |
--------------------------------------------------------------------------------
/src/io/pithos/reporter.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.reporter
2 | (:require [clojure.tools.logging :refer [log]]))
3 |
4 | (defprotocol Reporter
5 | (report! [this event]))
6 |
7 | (defn logging-reporter
8 | [{:keys [level]}]
9 | (reify Reporter
10 | (report! [_ event]
11 | (log (keyword level) (pr-str event)))))
12 |
13 | (defn report-all!
14 | [reporters event]
15 | (doseq [reporter reporters]
16 | (report! reporter event)))
17 |
--------------------------------------------------------------------------------
/docker/pithos/docker-entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -eux
4 |
5 | export PITHOS_CASSANDRA_HOST=${PITHOS_CASSANDRA_HOST:-cassandra}
6 | export PITHOS_SERVICE_URI=${PITHOS_SERVICE_URI:-s3.example.com}
7 |
8 | confd -onetime -backend env
9 |
10 | # wait for cassandra being ready
11 | until nc -z -w 2 $PITHOS_CASSANDRA_HOST 9042; do sleep 1; done
12 |
13 | java -jar /pithos-standalone.jar -a install-schema || true
14 |
15 | exec java -jar /pithos-standalone.jar -a api-run
16 |
--------------------------------------------------------------------------------
/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/test/data/acl3.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | foo
5 | bar
6 |
7 |
8 |
9 | FULL_CONTROL
10 |
11 | foo
12 | bar
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/src/io/pithos/keystore.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.keystore
2 | "A keystore is a simple protocol which yields a map
3 | of tenant details for a key id.
4 |
5 | The basic implementation wants keys from the configuration
6 | file, you'll likely want to use a custom implementation that
7 | interacts with your user-base here.
8 | ")
9 |
10 | (defn map-keystore [{:keys [keys]}]
11 | "Wrap a map, translating looked-up keys to keywords."
12 | (reify
13 | clojure.lang.ILookup
14 | (valAt [this id]
15 | (get keys (keyword id)))))
16 |
--------------------------------------------------------------------------------
/vagrant/modules/pithos/manifests/init.pp:
--------------------------------------------------------------------------------
1 | class cassandra {
2 |
3 | $keyid = 'F758CE318D77295D'
4 |
5 | exec { 'cassandra-recv-keys':
6 | command => "gpg --keyserver pgp.mit.edu --recv-keys ${keyid} && gpg --export --armor ${keyid} | apt-key add - && apt-get update",
7 | user => 'root',
8 | group => 'root',
9 | path => "/bin:/usr/bin:/sbin:/usr/sbin",
10 | unless =>"apt-key list | grep ${keyid}",
11 | }
12 |
13 | package { 'cassandra':
14 | ensure => latest,
15 | require => Exec['cassandra-recv-keys']
16 | }
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/test/data/acl1.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | foo
4 | bar
5 |
6 |
7 |
8 |
9 | foo
10 | bar
11 |
12 | FULL_CONTROL
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2014 exoscale(tm)
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
--------------------------------------------------------------------------------
/test/data/acl5.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | foo
4 | bar
5 |
6 |
7 |
8 |
9 | http://acs.amazonaws.com/groups/global/AllUsers
10 | anonymous
11 |
12 | READ
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/test/io/pithos/reporter_test.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.reporter-test
2 | (:require [clojure.test :refer :all]
3 | [io.pithos.reporter :refer [report! report-all! Reporter]]))
4 |
5 | (defn atom-reporter
6 | []
7 | (let [contents (atom nil)]
8 | [contents
9 | (reify Reporter
10 | (report! [_ e]
11 | (swap! contents conj e)))]))
12 |
13 | (deftest reporter-test
14 |
15 | (let [[contents r] (atom-reporter)]
16 | (report! r :foo)
17 | (report! r :bar)
18 | (report! r :baz)
19 |
20 | (testing "simple inserts"
21 | (is (= [:baz :bar :foo] @contents)))))
22 |
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: '3.3'
2 |
3 | volumes:
4 | cassandra: {}
5 | nginx_tls: {}
6 |
7 | services:
8 | cassandra:
9 | image: cassandra:2.1
10 | volumes:
11 | - cassandra:/var/lib/cassandra
12 |
13 | pithos:
14 | build:
15 | context: .
16 | dockerfile: docker/pithos/Dockerfile
17 | depends_on:
18 | - cassandra
19 |
20 | nginx-proxy:
21 | build:
22 | context: .
23 | dockerfile: docker/nginx/Dockerfile
24 | volumes:
25 | - nginx_tls:/etc/nginx/external/
26 | depends_on:
27 | - pithos
28 | ports:
29 | - "0.0.0.0:80:80"
30 | - "0.0.0.0:443:443"
31 |
--------------------------------------------------------------------------------
/docker/nginx/entrypoint.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -z ${DH_SIZE+x} ]
4 | then
5 | >&2 echo ">> no \$DH_SIZE specified using default"
6 | DH_SIZE="2048"
7 | fi
8 |
9 | DH="/etc/nginx/external/dh.pem"
10 |
11 | if [ ! -e "$DH" ]
12 | then
13 | echo ">> generating $DH with size: $DH_SIZE"
14 | openssl dhparam -out "$DH" $DH_SIZE
15 | fi
16 |
17 | if [ ! -e "/etc/nginx/external/cert.pem" ] || [ ! -e "/etc/nginx/external/key.pem" ]
18 | then
19 | echo ">> generating self signed cert"
20 | openssl req -x509 -newkey rsa:4086 \
21 | -subj "/C=XX/ST=XXXX/L=XXXX/O=XXXX/CN=localhost" \
22 | -keyout "/etc/nginx/external/key.pem" \
23 | -out "/etc/nginx/external/cert.pem" \
24 | -days 3650 -nodes -sha256
25 | fi
26 |
27 | echo "$@"
28 | exec "$@"
29 |
--------------------------------------------------------------------------------
/src/io/pithos/system.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.system)
2 |
3 | (defprotocol SystemDescriptor
4 | (regions [this])
5 | (bucketstore [this])
6 | (keystore [this])
7 | (reporters [this])
8 | (service [this])
9 | (service-uri [this]))
10 |
11 | (defn system-descriptor
12 | [config]
13 | (reify
14 | SystemDescriptor
15 | (regions [this] (:regions config))
16 | (bucketstore [this] (:bucketstore config))
17 | (keystore [this] (:keystore config))
18 | (reporters [this] (:reporters config))
19 | (service [this] (:service config))
20 | (service-uri [this] (get-in config [:options :service-uri]))
21 | clojure.lang.ILookup
22 | (valAt [this k] (get config k))
23 | (valAt [this k default] (or (get config k) default))))
24 |
--------------------------------------------------------------------------------
/test/io/pithos/cors_test.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.cors-test
2 | (:require [io.pithos.cors :refer :all]
3 | [clojure.test :refer :all]
4 | [clojure.java.io :as io]))
5 |
6 | (deftest xml-slurp-test
7 | (let [repr {:cors1 [{:origins ["http://*.example.com"]
8 | :methods [:get]
9 | :headers ["*"]
10 | :exposed []
11 | :max-age nil}]}]
12 |
13 | (doseq [[src int-repr] repr
14 | :let [path (format "%s.xml" (name src))
15 | ext-repr (slurp (io/resource path))]]
16 | (testing (str "valid xml input for " (name src))
17 | (is (= (xml->cors ext-repr) int-repr))))
18 |
19 | (doseq [[src int-repr] repr
20 | :let [path (format "%s.xml" (name src))
21 | ext-repr (slurp (io/resource path))]]
22 | (testing (str "valid xml output for " (name src))
23 | (is (= (as-xml int-repr true) ext-repr))))))
24 |
--------------------------------------------------------------------------------
/docker/pithos/pithos.yaml.tmpl:
--------------------------------------------------------------------------------
1 | service:
2 | host: "0.0.0.0"
3 | port: 8080
4 |
5 | logging:
6 | level: info
7 | console: true
8 |
9 | options:
10 | service-uri: {{ getv "/pithos/service/uri" "s3.example.com" }}
11 | default-region: myregion
12 |
13 | keystore:
14 | keys:
15 | AKIAIOSFODNN7EXAMPLE:
16 | master: true
17 | tenant: test@example.com
18 | secret: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
19 | AKIDEXAMPLE:
20 | master: true
21 | tenant: test@example.com
22 | secret: 'wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY'
23 |
24 | bucketstore:
25 | default-region: myregion
26 | cluster: {{ getv "/pithos/cassandra/host" "cassandra" }}
27 | keyspace: storage
28 |
29 | regions:
30 | myregion:
31 | metastore:
32 | cluster: {{ getv "/pithos/cassandra/host" "cassandra" }}
33 | keyspace: storage
34 |
35 | storage-classes:
36 | standard:
37 | cluster: {{ getv "/pithos/cassandra/host" "cassandra" }}
38 | keyspace: storage
39 | max-chunk: "128k"
40 | max-block-chunk: 1024
41 |
--------------------------------------------------------------------------------
/docker/pithos/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM clojure:lein-2.6.1 AS builder
2 |
3 | WORKDIR /pithos
4 |
5 | RUN curl -L https://github.com/kelseyhightower/confd/releases/download/v0.12.0/confd-0.12.0-linux-amd64 -o /confd && chmod +x /confd
6 |
7 | COPY project.clj /pithos/project.clj
8 | RUN cd /pithos && lein deps
9 |
10 | COPY resources /pithos/resources
11 | COPY src /pithos/src
12 | RUN cd /pithos && lein uberjar && mv target/pithos-*-standalone.jar /pithos-standalone.jar
13 |
14 |
15 | FROM openjdk:jre-alpine
16 |
17 | RUN apk --no-cache add netcat-openbsd
18 |
19 | RUN addgroup -S pithos && adduser -S -g pithos pithos
20 | RUN mkdir /etc/pithos && chown pithos: /etc/pithos && chmod 0700 /etc/pithos
21 | USER pithos
22 |
23 | COPY --from=builder /confd /usr/local/bin/confd
24 | COPY --from=builder /pithos-standalone.jar /pithos-standalone.jar
25 |
26 | COPY docker/pithos/docker-entrypoint.sh /docker-entrypoint.sh
27 | COPY docker/pithos/pithos.yaml.tmpl /etc/confd/templates/pithos.yaml.tmpl
28 | COPY docker/pithos/pithos.yaml.toml /etc/confd/conf.d/pithos.yaml.toml
29 |
30 | CMD ["/docker-entrypoint.sh"]
31 |
--------------------------------------------------------------------------------
/test/data/form-upload1.txt:
--------------------------------------------------------------------------------
1 | --9431149156168
2 | Content-Disposition: form-data; name="key"
3 |
4 | qux
5 | --9431149156168
6 | Content-Disposition: form-data; name="acl"
7 |
8 | private
9 | --9431149156168
10 | Content-Disposition: form-data; name="success_action_status"
11 |
12 | 201
13 | --9431149156168
14 | Content-Disposition: form-data; name="Content-Type"
15 |
16 | text/plain
17 | --9431149156168
18 | Content-Disposition: form-data; name="AWSAccessKeyId"
19 |
20 | AKIAIOSFODNN7EXAMPLE
21 | --9431149156168
22 | Content-Disposition: form-data; name="Policy"
23 |
24 | eyJleHBpcmF0aW9uIjogIjIwMjUtMTItMDFUMTI6MDA6MDAuMDAwWiIsCiAiY29uZGl0aW9ucyI6IFt7ImJ1Y2tldCI6ICJiYXRtYW4ifSwKICAgICAgICAgICAgICAgIHsiYWNsIjogInByaXZhdGUifSwKICAgICAgICAgICAgICAgIHsic3VjY2Vzc19hY3Rpb25fc3RhdHVzIjogIjIwMSJ9XX0K
25 | --9431149156168
26 | Content-Disposition: form-data; name="Signature"
27 |
28 | c3iMCe5m4lNmRHt+cmPAyOK0lf4=
29 | --9431149156168
30 | Content-Disposition: form-data; name="file"; filename="MyFilename.jpg"
31 | Content-Type: text/plain
32 |
33 | not much to say.
34 | --9431149156168--
35 |
--------------------------------------------------------------------------------
/test/data/form-upload2.txt:
--------------------------------------------------------------------------------
1 | --9431149156168
2 | Content-Disposition: form-data; name="key"
3 |
4 | qux
5 | --9431149156168
6 | Content-Disposition: form-data; name="acl"
7 |
8 | private
9 | --9431149156168
10 | Content-Disposition: form-data; name="success_action_status"
11 |
12 | 204
13 | --9431149156168
14 | Content-Disposition: form-data; name="Content-Type"
15 |
16 | text/plain
17 | --9431149156168
18 | Content-Disposition: form-data; name="AWSAccessKeyId"
19 |
20 | AKIAIOSFODNN7EXAMPLE
21 | --9431149156168
22 | Content-Disposition: form-data; name="Policy"
23 |
24 | eyJleHBpcmF0aW9uIjogIjIwMjUtMTItMDFUMTI6MDA6MDAuMDAwWiIsCiAiY29uZGl0aW9ucyI6IFt7ImJ1Y2tldCI6ICJiYXRtYW4ifSwKICAgICAgICAgICAgICAgIHsiYWNsIjogInByaXZhdGUifSwKICAgICAgICAgICAgICAgIHsic3VjY2Vzc19hY3Rpb25fc3RhdHVzIjogIjIwMSJ9XX0K
25 | --9431149156168
26 | Content-Disposition: form-data; name="Signature"
27 |
28 | c3iMCe5m4lNmRHt+cmPAyOK0lf4=
29 | --9431149156168
30 | Content-Disposition: form-data; name="file"; filename="MyFilename.jpg"
31 | Content-Type: text/plain
32 |
33 | not much to say.
34 | --9431149156168--
35 |
--------------------------------------------------------------------------------
/doc/pithos.yaml:
--------------------------------------------------------------------------------
1 | service:
2 | host: "0.0.0.0"
3 | port: 8080
4 | logging:
5 | level: info
6 | console: true
7 | overrides:
8 | io.pithos: debug
9 | options:
10 | service-uri: s3.example.com
11 | default-region: myregion
12 | keystore:
13 | keys:
14 | AKIAIOSFODNN7EXAMPLE:
15 | master: true
16 | tenant: test@example.com
17 | secret: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
18 | AKIDEXAMPLE:
19 | master: true
20 | tenant: test@example.com
21 | secret: 'wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY'
22 |
23 |
24 | bucketstore:
25 | default-region: myregion
26 | cluster: "localhost"
27 | keyspace: storage
28 | regions:
29 | myregion:
30 | metastore:
31 | cluster: "localhost"
32 | keyspace: storage
33 | storage-classes:
34 | standard:
35 | cluster: "localhost"
36 | keyspace: storage
37 | max-chunk: "128k"
38 | max-block-chunk: 1024
39 | cassandra:
40 | saved_caches_directory: "target/db/saved_caches"
41 | data_file_directories:
42 | - "target/db/data"
43 | commitlog_directory: "target/db/commitlog"
44 |
--------------------------------------------------------------------------------
/src/io/pithos/schema.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.schema
2 | "Namespace holding a single action which installs the schema"
3 | (:require [clojure.tools.logging :refer [info error]]
4 | [io.pithos.system :as system]
5 | [io.pithos.store :as store]))
6 |
7 | (defn converge-schema
8 | "Loops through all storage layers and calls converge! on them"
9 | ([system exit?]
10 | (info "converging all schemas...")
11 | (try
12 | (info "converging bucketstore schema")
13 | (store/converge! (system/bucketstore system))
14 |
15 | (doseq [region (system/regions system)
16 | :let [[region {:keys [metastore storage-classes]}] region]]
17 | (info "converging metastore for region " region)
18 | (store/converge! metastore)
19 |
20 | (doseq [[storage-class blobstore] storage-classes]
21 | (info "converging blobstore for region and storage-class "
22 | region storage-class)
23 | (store/converge! blobstore))
24 | (when exit? (System/exit 0)))
25 | (catch Exception e
26 | (error e "cannot create schema")
27 | (when exit? (System/exit 1)))))
28 | ([system]
29 | (converge-schema system true)))
30 |
--------------------------------------------------------------------------------
/test/data/acl4.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | foo
4 | bar
5 |
6 |
7 |
8 |
9 | foo
10 | bar
11 |
12 | FULL_CONTROL
13 |
14 |
15 |
16 | bar
17 | bar
18 |
19 | FULL_CONTROL
20 |
21 |
22 |
23 | foo
24 | baz
25 |
26 | READ_ACP
27 |
28 |
29 |
30 | baz
31 | baz
32 |
33 | READ_ACP
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/vagrant/modules/cassandra/manifests/init.pp:
--------------------------------------------------------------------------------
1 | class cassandra {
2 |
3 | $keyid = '4BD736A82B5C1B00'
4 | $shortkey = '2048R/2B5C1B00'
5 |
6 | file { '/etc/apt/sources.list.d/cassandra.list':
7 | content => 'deb http://www.apache.org/dist/cassandra/debian 20x main'
8 | }
9 |
10 | exec { 'cassandra-recv-keys':
11 | command => "gpg --keyserver pgp.mit.edu --recv-keys ${keyid} && gpg --export --armor ${keyid} | apt-key add - && apt-get update",
12 | user => 'root',
13 | group => 'root',
14 | path => "/bin:/usr/bin:/sbin:/usr/sbin",
15 | unless =>"apt-key list | grep ${shortkey}",
16 | require => File['/etc/apt/sources.list.d/cassandra.list']
17 | }
18 |
19 | $cassandra_heap_size = hiera('cassandra-heap-size', '8G')
20 | $cassandra_heap_new = hiera('cassandra-heap-new', '800m')
21 |
22 | package { 'cassandra':
23 | ensure => present,
24 | require => Exec['cassandra-recv-keys']
25 | }
26 |
27 | file { '/etc/cassandra/cassandra.yaml':
28 | content => template('cassandra/cassandra.yaml.erb'),
29 | require => Package['cassandra'],
30 | notify => Service['cassandra']
31 | }
32 |
33 | file { '/etc/default/cassandra':
34 | content => template('cassandra/cassandra.env.erb'),
35 | require => Package['cassandra'],
36 | notify => Service['cassandra']
37 | }
38 |
39 | service {'cassandra':
40 | ensure => running
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/io/pithos/api.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.api
2 | "Our main HTTP facade. Serving functionality is provided by aleph.
3 | Aleph is preferred over more traditional HTTP servers because
4 | it avoids creating one thread per (potentially) long streaming
5 | request or response. Moreover, certain specific operations
6 | just cannot be handled by the traditional synchronous handlers
7 | like ring, such as the 100: Continue response expected for uploads.
8 | "
9 | (:require [qbits.jet.server :refer [run-jetty]]
10 | [clojure.tools.logging :refer [info]]
11 | [io.pithos.system :refer [service]]
12 | [io.pithos.operations :refer [dispatch]]
13 | [io.pithos.request :refer [safe-prepare]]))
14 |
15 | (defn executor
16 | "Given a system map, yield a handler function for incoming
17 | request maps"
18 | [system]
19 | (fn [request]
20 | (-> (safe-prepare request system)
21 | (dispatch system))))
22 |
23 | (defn run
24 | "Run an asynchronous API handler through Netty thanks to aleph http.
25 | The request handler is an anonymous function which stores the channel
26 | inside the request to mimick the operations of http-kit then runs
27 | several wrappers defined in `io.pithos.api.request` before letting
28 | `io.pithos.operations` dispatch based on the type of request"
29 | [system]
30 | (let [handler (executor system)]
31 | (run-jetty (merge (service system) {:input-buffer-size 65536
32 | :parser-compliance :legacy
33 | :ring-handler handler})))
34 | (info "server up and running"))
35 |
--------------------------------------------------------------------------------
/test/io/pithos/acl_test.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.acl-test
2 | (:require [clojure.test :refer :all]
3 | [clojure.pprint :refer [pprint]]
4 | [io.pithos.acl :refer [as-xml xml->acl]]
5 | [clojure.java.io :as io]))
6 |
7 | (deftest xml-to-acl-test
8 | (let [repr {:acl1 {:FULL_CONTROL [{:ID "foo" :DisplayName "bar"}]}
9 | :acl4 {:FULL_CONTROL [{:ID "foo" :DisplayName "bar"}
10 | {:URI "bar" :DisplayName "bar"}]
11 | :READ_ACP [{:ID "foo" :DisplayName "baz"}
12 | {:URI "baz" :DisplayName "baz"}]}
13 | :acl5 {:READ [{:URI "anonymous"
14 | :DisplayName "anonymous"}]}}]
15 |
16 | (doseq [[src int-repr] repr
17 | :let [path (format "%s.xml" (name src))
18 | ext-repr (slurp (io/resource path))]]
19 | (testing (str "valid xml input for " (name src))
20 | (is (= (xml->acl ext-repr) int-repr))))
21 |
22 | (doseq [[src int-repr] repr
23 | :let [path (format "%s.xml" (name src))
24 | ext-repr (slurp (io/resource path))]]
25 | (testing (str "valid xml output for " (name src))
26 | (is (= (as-xml int-repr true) ext-repr))))
27 |
28 | (testing "invalid xml"
29 | (is (thrown-with-msg?
30 | clojure.lang.ExceptionInfo
31 | #"Invalid XML in ACL Body"
32 | (xml->acl (slurp (io/resource "acl2.xml")))))
33 |
34 | (is (thrown-with-msg?
35 | clojure.lang.ExceptionInfo
36 | #"XML Root Node should be AccessControlPolicy"
37 | (xml->acl (slurp (io/resource "acl3.xml"))))))))
38 |
--------------------------------------------------------------------------------
/src/io/pithos/response.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.response
2 | "Provides ring like facilities for working with
3 | HTTP responses"
4 | (:require [clojure.tools.logging :refer [debug error]]))
5 |
6 | (defn response
7 | "Create a basic response, with optional body"
8 | ([]
9 | {:status 200 :headers {}})
10 | ([body]
11 | {:status 200 :headers {} :body body}))
12 |
13 | (defn redirect
14 | [location]
15 | {:status 302 :headers {"location" location} :body ""})
16 |
17 | (defn header
18 | "Add a header to a response, coerce value to string"
19 | [resp header val]
20 | (let [strval (if (keyword? val) (name val) (str val))]
21 | (assoc-in resp [:headers header] strval)))
22 |
23 | (defn content-type
24 | "Add Content-Type header"
25 | [resp type]
26 | (header resp "Content-Type" type))
27 |
28 | (defn status
29 | "Set response status code"
30 | [resp status]
31 | (assoc resp :status status))
32 |
33 | (defn xml-response
34 | "Yields a HTTP response, assuming body is XML data"
35 | [body]
36 | (-> body
37 | response
38 | (header "Content-Type" "application/xml")))
39 |
40 | (defn html-response
41 | "Yields a HTTP response, assuming body is HTML data"
42 | [body]
43 | (-> (response body)
44 | (header "Content-Type" "text/html")))
45 |
46 | (defn request-id
47 | "Provision S3 specific headers"
48 | [resp {:keys [reqid]}]
49 | (-> resp
50 | (header "Server" "Pithos")
51 | (header "x-amz-id-2" (str reqid))
52 | (header "x-amz-request-id" (str reqid))))
53 |
54 | (defn exception-status
55 | "When handler raised an exception, try to look up a status code
56 | in its data"
57 | [resp details]
58 | (let [{:keys [status-code] :or {status-code 500}} details]
59 | (-> resp
60 | (status status-code))))
61 |
--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
1 | The Pithos Guide
2 | ================
3 |
4 | .. image:: _static/pithos.svg
5 | :alt: pithos log
6 | :align: right
7 |
8 | *pithos* is a daemon which provides an S3-compatible frontend for storing files
9 | in a `Cassandra`_ cluster.
10 |
11 | *pithos* provides the ability to build complex object storage topologies spanning
12 | multiple regions and focuses on the following:
13 |
14 | Scalability
15 | By relying on Apache Cassandra, pithos splits your files (objects) in small chunks
16 | which are replicated across a cluster of machines. This allows pithos to provide
17 | the following guarantees:
18 |
19 | - Fast writes
20 | - High Availability
21 | - Partition tolerance
22 |
23 | Compatibility
24 | While there are no wide-spread official standard for object storage, the S3 protocol
25 | has become a de-facto standard and has thus been chosen as pithos' protocol.
26 | This means you can start using your favorite S3 tools to work with pithos, such as:
27 |
28 | - `s3cmd`_
29 | - `boto`_
30 |
31 | Simplicity
32 | Pithos was built with ease of use and operability in mind. It should be easy to get started, require as few moving parts as possible and still be relatively easy to extend for larger installations. Pithos is distributed as a single executable JAR-file and relies on a YAML configuration file. As many of the JVM specifics are hidden from the administrator.
33 |
34 | *pithos* is sponsored by exoscale_
35 |
36 | .. _Cassandra: http://cassandra.apache.org/
37 | .. _s3cmd: http://s3tools.org/
38 | .. _boto: https://github.com/boto/boto
39 | .. _exoscale: https://exoscale.ch
40 |
41 | .. toctree::
42 | :maxdepth: 2
43 |
44 | quickstart
45 | concepts
46 | administrator
47 | api
48 | developer
49 | clients
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/docker/nginx/nginx.conf:
--------------------------------------------------------------------------------
1 | user nginx;
2 | worker_processes 1;
3 |
4 | error_log /dev/stdout warn;
5 |
6 | daemon off;
7 |
8 | pid /var/run/nginx.pid;
9 |
10 | events {
11 | worker_connections 1024;
12 | }
13 |
14 | http {
15 | include /etc/nginx/mime.types;
16 | default_type application/octet-stream;
17 |
18 | log_format main '$remote_addr - $remote_user [$time_local] "$request" '
19 | '$status $body_bytes_sent "$http_referer" '
20 | '"$http_user_agent" "$http_x_forwarded_for"';
21 |
22 | access_log /dev/stdout main;
23 |
24 | keepalive_timeout 65;
25 |
26 | # Don't leak metadata about this server
27 | server_tokens off;
28 |
29 | # Enforce some security hardening HTTP headers
30 | add_header X-Content-Type-Options nosniff;
31 |
32 | # Decent set of ciphers...
33 | ssl_dhparam /etc/nginx/external/dh.pem;
34 | ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # disable poodle
35 | ssl_prefer_server_ciphers on;
36 | ssl_ciphers ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS;
37 |
38 | client_max_body_size 5G;
39 |
40 | upstream app {
41 | server pithos:8080;
42 | }
43 |
44 | server {
45 | listen 80 default_server;
46 |
47 | charset utf-8;
48 |
49 | location / {
50 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
51 | proxy_set_header Host $http_host;
52 | proxy_redirect off;
53 | proxy_pass http://app;
54 | }
55 | }
56 |
57 | server {
58 | listen 443 default_server;
59 |
60 | ssl on;
61 | ssl_certificate external/cert.pem;
62 | ssl_certificate_key external/key.pem;
63 |
64 | charset utf-8;
65 |
66 | location / {
67 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
68 | proxy_set_header Host $http_host;
69 | proxy_redirect off;
70 | proxy_pass http://app;
71 | }
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
1 | (defproject io.pithos/pithos "0.7.6-SNAPSHOT"
2 | :description "cassandra-backed object storage"
3 | :maintainer {:email "Pierre-Yves Ritschard "}
4 | :url "http://pithos.io"
5 | :license {:name "Apache License, Version 2.0"
6 | :url "http://www.apache.org/licenses/LICENSE-2.0"}
7 | :aot :all
8 | :main io.pithos
9 | :jvm-opts ["-Xmx2g"]
10 | :profiles {:dev {:resource-paths ["test/data"]}}
11 | :dependencies [[org.clojure/clojure "1.9.0-alpha14"]
12 | [org.clojure/data.codec "0.1.0"]
13 | [org.clojure/data.xml "0.0.8"]
14 | [org.clojure/data.zip "0.1.1"]
15 | [org.clojure/tools.cli "0.3.5"]
16 | [org.clojure/tools.logging "0.3.1"]
17 | [org.clojure/core.async "0.2.374"]
18 | [spootnik/unilog "0.7.17"]
19 | [spootnik/constance "0.5.3"]
20 | [spootnik/raven "0.1.1"]
21 | [spootnik/uncaught "0.5.3"]
22 | [clj-yaml "0.4.0"]
23 | [clout "2.1.2"]
24 | [cheshire "5.6.3"]
25 | [clj-time "0.9.0"]
26 | [ring/ring-core "1.3.2"
27 | :exclusions [org.clojure/tools.reader]]
28 | [ring/ring-codec "1.0.0"]
29 | [com.eaio.uuid/uuid "3.2"]
30 | [cc.qbits/alia-all "3.3.0"
31 | :exclusions [com.eaio.uuid/uuid]]
32 | [cc.qbits/hayt "3.0.1"]
33 | [cc.qbits/jet "0.7.9"
34 | :exclusions [org.clojure/tools.reader]]
35 | [net.jpountz.lz4/lz4 "1.3.0"]
36 | [org.xerial.snappy/snappy-java "1.1.2.4"]])
37 |
--------------------------------------------------------------------------------
/pkg/rpm/init.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | ### BEGIN INIT INFO
3 | # Provides: pithos
4 | # Required-Start: $remote_fs $syslog
5 | # Required-Stop: $remote_fs $syslog
6 | # Default-Start: 2 3 4 5
7 | # Default-Stop: 0 1 6
8 | # Short-Description: Pithos object store
9 | # Description: Pithos, a cassandra-backed object store
10 | ### END INIT INFO
11 |
12 | # Source function library.
13 | . /etc/rc.d/init.d/functions
14 |
15 | # Pull in sysconfig settings
16 | [ -f /etc/sysconfig/pithos ] && . /etc/sysconfig/pithos
17 |
18 | # PATH should only include /usr/* if it runs after the mountnfs.sh script
19 | PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
20 | DESC="Pithos"
21 | NAME=pithos
22 | DAEMON=/usr/bin/pithos
23 | DAEMON_ARGS="-f /etc/pithos/pithos.yaml"
24 | DAEMON_USER=pithos
25 | PID_FILE=/var/run/$NAME.pid
26 | SCRIPT_NAME=/etc/init.d/$NAME
27 | LOCK_FILE=/var/lock/subsys/$NAME
28 |
29 | start()
30 | {
31 | echo -n $"Starting ${NAME}: "
32 | ulimit -n $NFILES
33 | daemonize -u $DAEMON_USER -p $PID_FILE -l $LOCK_FILE $DAEMON $DAEMON_ARGS
34 | RETVAL=$?
35 | echo
36 | [ $RETVAL -eq 0 ] && touch $LOCK_FILE
37 | return $RETVAL
38 | }
39 |
40 | stop()
41 | {
42 | echo -n $"Stopping ${NAME}: "
43 | killproc -p ${PID_FILE} -d 10 $DAEMON
44 | RETVAL=$?
45 | echo
46 | [ $RETVAL = 0 ] && rm -f ${LOCK_FILE} ${PID_FILE}
47 | return $RETVAL
48 | }
49 |
50 | do_reload() {
51 | echo -n $"Reloading ${NAME}: "
52 | killproc -p ${PID_FILE} $DAEMON -1
53 | RETVAL=$?
54 | echo
55 | return $RETVAL
56 | }
57 |
58 | case "$1" in
59 | start)
60 | start
61 | ;;
62 | stop)
63 | stop
64 | ;;
65 | status)
66 | status -p ${PID_FILE} $DAEMON
67 | RETVAL=$?
68 | ;;
69 | reload|force-reload)
70 | reload
71 | ;;
72 | restart)
73 | stop
74 | start
75 | ;;
76 | *)
77 | N=/etc/init.d/${NAME}
78 | echo "Usage: $N {start|stop|status|restart|force-reload}" >&2
79 | RETVAL=2
80 | ;;
81 | esac
82 |
83 | exit $RETVAL
84 |
--------------------------------------------------------------------------------
/test/io/pithos/util_test.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.util-test
2 | (:require [clojure.test :refer :all]
3 | [io.pithos.util :refer [inc-prefix to-bytes parse-uuid
4 | string->pattern]]))
5 |
6 | (deftest inc-prefix-test
7 | (testing "nil prefix"
8 | (is (= nil (inc-prefix nil))))
9 | (testing "empty prefix"
10 | (is (= nil (inc-prefix ""))))
11 |
12 | (testing "simple prefix"
13 | (is (= "fop" (inc-prefix "foo")))))
14 |
15 | (deftest byte-factor-test
16 | (testing "from bytes"
17 | (is (= 512 (to-bytes "512"))))
18 |
19 | (testing "from kilobytes"
20 | (is (= 2048 (to-bytes "2k"))))
21 |
22 | (testing "from megabytes"
23 | (is (= 2097152 (to-bytes "2m"))))
24 |
25 | (testing "from gigabytes"
26 | (is (= 2147483648 (to-bytes "2G"))))
27 |
28 | (testing "from terabytes"
29 | (is (= 2199023255552 (to-bytes "2tb"))))
30 |
31 | (testing "from petabytes"
32 | (is (= 2251799813685248 (to-bytes "2Pb")))))
33 |
34 | (deftest uuid-test
35 | (testing "from uuid"
36 | (is (= #uuid "05ac767e-170f-a639-1ce7-39078945ee4480"
37 | (parse-uuid "05ac767e-170f-a639-1ce7-39078945ee4480")))))
38 |
39 | (deftest string-to-pattern-test
40 | (testing "no special characters"
41 | (is (= "17hj018" (string->pattern "17hj018"))))
42 | (testing "single character"
43 | (is (= "/" (string->pattern "/"))))
44 | (testing "with dots and stars"
45 | (is (= "\\.\\*89\\+2\\?" (string->pattern ".*89+2?"))))
46 | (testing "with grouping"
47 | (is (= "\\(89\\)" (string->pattern "(89)"))))
48 | (testing "with anchors"
49 | (is (= "\\^test\\$" (string->pattern "^test$"))))
50 | (testing "with classes"
51 | (is (= "\\\\d\\\\s\\\\S" (string->pattern "\\d\\s\\S"))))
52 | (testing "with sets and repetitions"
53 | (is (= "\\[a\\-z\\]\\{1,2\\}" (string->pattern "[a-z]{1,2}"))))
54 | (testing "with alternatives"
55 | (is (= "abc\\|cde" (string->pattern "abc|cde"))))
56 | (testing "with escapes"
57 | (is (= "abc\\\\" (string->pattern "abc\\"))))
58 | (testing "with back references"
59 | (is (= "\\(42\\)\\\\1\\\\k\\"
60 | (string->pattern "(42)\\1\\k")))))
61 |
--------------------------------------------------------------------------------
/src/io/pithos/store.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.store
2 | "Generic cassandra cluster connection services."
3 | (:import com.datastax.driver.core.exceptions.InvalidQueryException)
4 | (:require [qbits.alia :as alia]
5 | [qbits.hayt :refer [use-keyspace create-keyspace with]]
6 | [clojure.tools.logging :refer [debug]]))
7 |
8 | (defprotocol Convergeable
9 | (converge! [this]))
10 |
11 | (defprotocol Crudable
12 | (fetch [this k] [this k1 k2] [this k1 k2 k3])
13 | (update! [this k v] [this k1 k2 v] [this k1 k2 k3 v])
14 | (delete! [this k] [this k1 k2] [this k1 k2 k3])
15 | (create! [this k v] [this k1 k2 v] [this k1 k2 k3 v]))
16 |
17 | (defn cassandra-store
18 | "Connect to a cassandra cluster, and use a specific keyspace.
19 | When the keyspace is not found, try creating it"
20 | [{:keys [cassandra-options cluster keyspace hints repfactor username password]}]
21 | (debug "building cassandra store for: " cluster keyspace hints)
22 | (let [hints (or hints
23 | {:replication {:class "SimpleStrategy"
24 | :replication_factor (or repfactor 1)}})
25 | cluster (if (sequential? cluster) cluster [cluster])
26 | session (-> (assoc cassandra-options :contact-points cluster)
27 | (cond-> (and username password)
28 | (assoc :credentials {:user username
29 | :password password}))
30 | (alia/cluster)
31 | (alia/connect))]
32 | (try (alia/execute session (use-keyspace keyspace))
33 | session
34 | (catch clojure.lang.ExceptionInfo e
35 | (let [{:keys [exception]} (ex-data e)]
36 | (if (and (= (class exception) InvalidQueryException)
37 | (re-find #"^[kK]eyspace.*does not exist$"
38 | (.getMessage exception)))
39 | (do (alia/execute session
40 | (create-keyspace keyspace (with hints)))
41 | (alia/execute session (use-keyspace keyspace))
42 | session)
43 | (throw e)))))))
44 |
--------------------------------------------------------------------------------
/src/io/pithos.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos
2 | "
3 | pithos: object storage daemon
4 | =============================
5 |
6 | Pithos is an object storage daemon with
7 | pluggable implementation of storage
8 | engines. See [pithos.io](http://pithos.io) for details.
9 |
10 | The `io.pithos` namespace is only responsible for parsing
11 | command line arguments, loading configuration and starting
12 | up the appropriate action."
13 | (:gen-class)
14 | (:require [io.pithos.schema :as schema]
15 | [io.pithos.config :as config]
16 | [io.pithos.api :as api]
17 | [io.pithos.system :as system]
18 | [spootnik.uncaught :refer [uncaught]]
19 | [clojure.tools.logging :refer [error]]
20 | [clojure.tools.cli :refer [cli]]))
21 |
22 | (defn get-action
23 | "Figure out what the expected action is from the command-line."
24 | [action]
25 | (let [amap {"api-run" api/run
26 | "install-schema" schema/converge-schema}]
27 | (or (get amap action)
28 | (do (println "unknown action: " action)
29 | (System/exit 1)))))
30 |
31 | (defn get-cli
32 | "Parse command line arguments and ensure we return a proper structure."
33 | [args]
34 | (try
35 | (-> (cli args
36 | ["-h" "--help" "Show Help"
37 | :default false :flag true]
38 | ["-f" "--path" "Configuration file path"
39 | :default nil]
40 | ["-q" "--quiet" "Never output to stdout"
41 | :default false :flag true]
42 | ["-a" "--action" "Specify an action (api-run, install-schema)"
43 | :default "api-run"])
44 | (update-in [0 :action] get-action))
45 | (catch Exception e
46 | (println "Could not parse arguments: " (.getMessage e))
47 | (System/exit 1))))
48 |
49 | (defn setup-uncaught
50 | [{:keys [sentry] :as system}]
51 | (uncaught e
52 | (when (and sentry (fn? sentry))
53 | (sentry e))
54 | (error e "uncaught exception"))
55 | system)
56 |
57 | (defn -main
58 | "Main startup path, parse command line arguments, then dispatch to
59 | appropriate action.
60 |
61 | Only two actions are available:
62 |
63 | - `api-run`: run the S3 api handler
64 | - `install-schema`: converge cassandra schema"
65 | [& args]
66 | (let [[{:keys [path help action quiet]} args banner] (get-cli args)]
67 |
68 | (when help
69 | (println banner)
70 | (System/exit 0))
71 |
72 | (-> path
73 | (config/init quiet)
74 | (setup-uncaught)
75 | (system/system-descriptor)
76 | action))
77 | nil)
78 |
--------------------------------------------------------------------------------
/vagrant/Vagrantfile:
--------------------------------------------------------------------------------
1 | # -*- mode: ruby -*-
2 | # vi: set ft=ruby :
3 |
4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
5 | VAGRANTFILE_API_VERSION = "2"
6 | PITHOS_CLUSTER_SIZE = (ENV['PITHOS_CLUSTER_SIZE'] && ENV['PITHOS_CLUSTER_SIZE'].to_i) || 9
7 |
8 | #prod
9 | EXOSCALE_API_KEY = ENV['EXOSCALE_API_KEY']
10 | EXOSCALE_API_SECRET = ENV['EXOSCALE_API_SECRET']
11 | EXOSCALE_INSTANCE_TYPE = ENV['EXOSCALE_INSTANCE_TYPE'] || '350dc5ea-fe6d-42ba-b6c0-efb8b75617ad'
12 | EXOSCALE_TEMPLATE = ENV['EXOSCALE_TEMPLATE'] || '5e705f28-e561-44c7-aba7-67963daf6c9f'
13 | EXOSCALE_ZONE = ENV['EXOSCALE_ZONE'] || '1128bd56-b4d9-4ac6-a7b9-c715b187ce11'
14 | EXOSCALE_HOST = ENV['EXOSCALE_HOST'] || 'api.exoscale.ch'
15 |
16 | EXOSCALE_KEYPAIR = ENV['EXOSCALE_KEYPAIR'] || 'default'
17 |
18 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
19 |
20 | config.vm.box = "exoscale"
21 | config.hostmanager.manage_host = true
22 |
23 | # The url from where the 'config.vm.box' box will be fetched if it
24 | # doesn't already exist on the user's system.
25 |
26 |
27 | config.vm.provider :cloudstack do |cloudstack, override|
28 | cloudstack.host = EXOSCALE_HOST
29 | cloudstack.path = "/compute"
30 | cloudstack.port = "443"
31 | cloudstack.scheme = "https"
32 | cloudstack.network_id = "00304a04-c7ea-4e77-a786-18bc64347bf7"
33 |
34 | cloudstack.template_id = EXOSCALE_TEMPLATE
35 | cloudstack.zone_id = EXOSCALE_ZONE
36 |
37 |
38 | cloudstack.network_type = "Basic"
39 | cloudstack.api_key = EXOSCALE_API_KEY
40 | cloudstack.secret_key = EXOSCALE_API_SECRET
41 | cloudstack.service_offering_id = EXOSCALE_INSTANCE_TYPE
42 |
43 | cloudstack.keypair = EXOSCALE_KEYPAIR
44 | end
45 |
46 | config.vm.provision :shell, inline: "apt-get update"
47 | config.vm.provision :shell, inline: "apt-get -qy install puppet"
48 | config.vm.provision :hostmanager
49 |
50 | PITHOS_CLUSTER_SIZE.times do |i|
51 | config.vm.define "store#{i}" do |node|
52 | node.ssh.username = "root"
53 | node.ssh.private_key_path = "#{ENV['HOME']}/.ssh/id_rsa"
54 | node.vm.provision :shell, inline: "hostname store#{i}"
55 | node.vm.hostname = "store#{i}"
56 | node.vm.provision :puppet do |puppet|
57 | puppet.working_directory = "/vagrant"
58 | puppet.manifests_path = "manifests"
59 | puppet.module_path = "modules"
60 | puppet.hiera_config_path = "hiera.yaml"
61 | puppet.manifest_file = "store.pp"
62 | puppet.facter = {
63 | "cassandra_topology" => PITHOS_CLUSTER_SIZE.times.map { |i| "store#{i}"}.join(",")
64 | }
65 | end
66 | end
67 | end
68 | end
69 |
--------------------------------------------------------------------------------
/vagrant/modules/cassandra/templates/cassandra.yaml.erb:
--------------------------------------------------------------------------------
1 | cluster_name: 'storage-ring'
2 | num_tokens: 8
3 | hinted_handoff_enabled: true
4 | max_hint_window_in_ms: 10800000 # 3 hours
5 | hinted_handoff_throttle_in_kb: 4096
6 | max_hints_delivery_threads: 2
7 | batchlog_replay_throttle_in_kb: 4096
8 | authenticator: AllowAllAuthenticator
9 | authorizer: AllowAllAuthorizer
10 | permissions_validity_in_ms: 2000
11 | partitioner: org.apache.cassandra.dht.Murmur3Partitioner
12 | data_file_directories:
13 | - /var/lib/cassandra/data
14 | commitlog_directory: /var/lib/cassandra/commitlog
15 | disk_failure_policy: stop
16 | commit_failure_policy: stop
17 | key_cache_size_in_mb:
18 | key_cache_save_period: 14400
19 | row_cache_size_in_mb: 0
20 | row_cache_save_period: 0
21 | saved_caches_directory: /var/lib/cassandra/saved_caches
22 | commitlog_sync: periodic
23 | commitlog_sync_period_in_ms: 10000
24 | commitlog_segment_size_in_mb: 32
25 | seed_provider:
26 | - class_name: org.apache.cassandra.locator.SimpleSeedProvider
27 | parameters:
28 | - seeds: "<%= @cassandra_topology %>"
29 | concurrent_reads: 32
30 | concurrent_writes: 32
31 | memtable_flush_queue_size: 4
32 | trickle_fsync: false
33 | trickle_fsync_interval_in_kb: 10240
34 | storage_port: 7000
35 | ssl_storage_port: 7001
36 | listen_address: <%= @hostname %>
37 | start_native_transport: true
38 | native_transport_port: 9042
39 | start_rpc: true
40 | rpc_address: <%= @hostname %>
41 | rpc_port: 9160
42 | rpc_keepalive: true
43 | rpc_server_type: sync
44 | thrift_framed_transport_size_in_mb: 15
45 | incremental_backups: false
46 | snapshot_before_compaction: false
47 | auto_snapshot: true
48 | tombstone_warn_threshold: 1000
49 | tombstone_failure_threshold: 100000
50 | column_index_size_in_kb: 64
51 | in_memory_compaction_limit_in_mb: 64
52 | multithreaded_compaction: false
53 | compaction_throughput_mb_per_sec: 16
54 | compaction_preheat_key_cache: true
55 | read_request_timeout_in_ms: 5000
56 | range_request_timeout_in_ms: 10000
57 | write_request_timeout_in_ms: 2000
58 | cas_contention_timeout_in_ms: 1000
59 | truncate_request_timeout_in_ms: 60000
60 | request_timeout_in_ms: 10000
61 | cross_node_timeout: false
62 | endpoint_snitch: SimpleSnitch
63 | dynamic_snitch_update_interval_in_ms: 100
64 | dynamic_snitch_reset_interval_in_ms: 600000
65 | dynamic_snitch_badness_threshold: 0.1
66 | request_scheduler: org.apache.cassandra.scheduler.NoScheduler
67 | server_encryption_options:
68 | internode_encryption: none
69 | keystore: conf/.keystore
70 | keystore_password: cassandra
71 | truststore: conf/.truststore
72 | truststore_password: cassandra
73 | client_encryption_options:
74 | enabled: false
75 | keystore: conf/.keystore
76 | keystore_password: cassandra
77 | internode_compression: all
78 | inter_dc_tcp_nodelay: false
79 | preheat_kernel_page_cache: false
80 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | :warning: Project not under active development :warning:
2 | -------------------------------------------------------------------
3 |
4 | We're working on open-sourcing another iteration of pithos which offers much better protocol support and better performances.
5 |
6 | No release date known yet
7 |
8 |
9 |
10 | pithos: cassandra object storage
11 | --------------------------------
12 |
13 | Pithos is an S3-compatible object store leveraging cassandra
14 | to distribute contents horizontally.
15 |
16 | Documentation site lives at http://pithos.io
17 |
18 | [](https://travis-ci.org/exoscale/pithos)
19 |
20 |
21 | # Quickstart
22 |
23 | You can use [docker-compose](https://docs.docker.com/compose/) to easily
24 | run the current branch in a Docker container for testing purposes. The
25 | Clojure and Cassandra Docker images will use around 1GB of disk space.
26 |
27 | docker-compose up
28 |
29 | Create a new bucket:
30 |
31 | s3cmd --config doc/s3cmd.cfg mb s3://my-bucket
32 | Bucket 's3://my-bucket/' created
33 |
34 | s3cmd --config doc/s3cmd.cfg ls s3://
35 | 2016-05-27 09:04 s3://my-bucket
36 |
37 | To build an run Pithos manually, continue reading.
38 |
39 | # Prerequisites
40 |
41 | In order to build and run Pithos you will need the following components:
42 |
43 | * Working Java Runtime Environment version 7 or higher
44 | * A [Cassandra](http://cassandra.apache.org/) cluster in version 2 or higher
45 | * [Leiningen](https://github.com/technomancy/leiningen) Clojure package builder
46 |
47 | # Build
48 |
49 | To build pithos run:
50 |
51 | lein uberjar
52 |
53 | you will get a standalone Java jar file in the `target/` directory
54 |
55 | # Run
56 |
57 | To run Pithos manually start it with
58 |
59 | java -jar target/pithos-0.7.5-standalone.jar
60 |
61 | Pithos will expect finding a valid configuration file under `/etc/pithos/pithos.yaml`. You can specify a distinct config file using the `-f` switch.
62 |
63 | The following startup switches are available:
64 |
65 | Switches Default Desc
66 | -------- ------- ----
67 | -h, --no-help, --help false Show Help
68 | -f, --path Configuration file path
69 | -q, --no-quiet, --quiet false Never output to stdout
70 | -a, --action api-run Specify an action (api-run, install-schema)
71 |
72 | ## Bootstrapping the environment
73 |
74 | Pithos includes a schema definition file in order to bootstrap your Cassandra cluster.
75 | To install the schema, run:
76 |
77 | java -jar target/pithos-0.7.5-standalone.jar -a install-schema
78 |
79 |
80 | ## Test using the s3cmd command line client
81 |
82 | Have a look at the minimal configuration file provided in
83 | `doc/s3cmd.cfg`. If not running locally, remove the last lines, as
84 | explained in the configuration file.
85 |
86 | Create a bucket:
87 |
88 | s3cmd -c doc/s3cmd.cfg mb S3://
89 |
90 | List your buckets:
91 |
92 | s3cmd -c doc/s3cmd.cfg la
93 |
--------------------------------------------------------------------------------
/tasks/leiningen/tar.clj:
--------------------------------------------------------------------------------
1 | (ns leiningen.tar
2 | "Create a tarball suitable for rpm packaging, stolen from riemann."
3 | (:use [clojure.java.shell :only [sh with-sh-dir]]
4 | [clojure.java.io :only [file delete-file writer copy]]
5 | [clojure.string :only [join capitalize trim-newline split trim]]
6 | [leiningen.uberjar :only [uberjar]]))
7 |
8 | (defn delete-file-recursively
9 | "Delete file f. If it's a directory, recursively delete all its contents.
10 | Raise an exception if any deletion fails unless silently is true."
11 | [f & [silently]]
12 | (System/gc) ; This sometimes helps release files for deletion on windows.
13 | (let [f (file f)]
14 | (if (.isDirectory f)
15 | (doseq [child (.listFiles f)]
16 | (delete-file-recursively child silently)))
17 | (delete-file f silently)))
18 |
19 | (defn tar-dir
20 | "Tar package working directory."
21 | [project]
22 | (file (:root project) "target" "tar" (str (:name project) "-"
23 | (:version project))))
24 |
25 | (defn cleanup
26 | [project]
27 | ; Delete working dir.
28 | (when (.exists (file (:root project) "target" "tar"))
29 | (delete-file-recursively (file (:root project) "target" "tar"))))
30 |
31 | (defn reset
32 | [project]
33 | (cleanup project)
34 | (sh "rm" (str (:root project) "/target/*.tar.bz2")))
35 |
36 | (defn make-tar-dir
37 | "Creates the tarball package structure in a new directory."
38 | [project]
39 | (let [dir (tar-dir project)]
40 | (.mkdirs dir)
41 |
42 | ; Jar
43 | (.mkdirs (file dir "lib"))
44 | (copy (file (:root project) "target"
45 | (str "pithos-" (:version project) "-standalone.jar"))
46 | (file dir "lib" "pithos.jar"))
47 |
48 | ; Binary
49 | (.mkdirs (file dir "bin"))
50 | (copy (file (:root project) "pkg" "tar" "pithos")
51 | (file dir "bin" "pithos"))
52 | (.setExecutable (file dir "bin" "pithosn") true false)
53 |
54 | ; Config
55 | (.mkdirs (file dir "etc"))
56 | (copy (file (:root project) "pkg" "tar" "pithos.config")
57 | (file dir "etc" "pithos.config"))
58 |
59 | dir))
60 |
61 | (defn write
62 | "Write string to file, plus newline"
63 | [file string]
64 | (with-open [w (writer file)]
65 | (.write w (str (trim-newline string) "\n"))))
66 |
67 | (defn md5
68 | "Computes the md5 checksum of a file. Returns a hex string."
69 | [file]
70 | (-> (->> file
71 | str
72 | (sh "md5sum")
73 | :out)
74 | (split #" ")
75 | first
76 | trim))
77 |
78 | (defn compress
79 | "Convert given package directory to a .tar.bz2."
80 | [project tar-dir]
81 | (let [filename (str (:name project)
82 | "-"
83 | (:version project)
84 | ".tar.bz2")
85 | tarball (str (file (:root project)
86 | "target"
87 | filename))]
88 | (with-sh-dir (.getParent tar-dir)
89 | (print (:err (sh "tar" "cvjf" tarball (.getName tar-dir)))))
90 |
91 | (write (str tarball ".md5")
92 | (str (md5 tarball) " " filename))))
93 |
94 | (defn tar
95 | ([project] (tar project true))
96 | ([project uberjar?]
97 | (reset project)
98 | (when uberjar? (uberjar project))
99 | (compress project (make-tar-dir project))
100 | (cleanup project)))
101 |
--------------------------------------------------------------------------------
/test/io/pithos/sig_test.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.sig-test
2 | (:require [clojure.test :refer :all]
3 | [clojure.string :refer [join]]
4 | [io.pithos.sig :refer [string-to-sign]]))
5 |
6 | (deftest string-to-sign-test
7 | (testing "signature with only Date header"
8 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT"}
9 | :request-method "GET"
10 | :sign-uri "/bucket/batman"
11 | :params {}})
12 | (join "\n" ["GET"
13 | ""
14 | ""
15 | "Thu, 17 Nov 2005 18:49:58 GMT"
16 | "/bucket/batman"]))))
17 |
18 | (testing "signature with custom x-amz-* headers"
19 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT"
20 | "x-amz-meta-magic" "magic string"
21 | "x-amz-magic" "batman"}
22 | :request-method "GET"
23 | :sign-uri "/bucket/batman"
24 | :params {}})
25 | (join "\n" ["GET"
26 | ""
27 | ""
28 | "Thu, 17 Nov 2005 18:49:58 GMT"
29 | "x-amz-magic:batman"
30 | "x-amz-meta-magic:magic string"
31 | "/bucket/batman"]))))
32 |
33 | (testing "signature with non x-amz-headers"
34 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT"
35 | "x-noamz-meta-magic" "magic string"
36 | "x-noamz-magic" "batman"}
37 | :request-method "GET"
38 | :sign-uri "/bucket/batman"
39 | :params {}})
40 | (join "\n" ["GET"
41 | ""
42 | ""
43 | "Thu, 17 Nov 2005 18:49:58 GMT"
44 | "/bucket/batman"]))))
45 |
46 | (testing "signature with both Content-{Md5,Type} headers"
47 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT"
48 | "content-md5" "c8fdb181845a4ca6b8fec737b3581d76"
49 | "content-type" "text/html"}
50 | :request-method "GET"
51 | :sign-uri "/bucket/batman"
52 | :params {}})
53 | (join "\n" ["GET"
54 | "c8fdb181845a4ca6b8fec737b3581d76"
55 | "text/html"
56 | "Thu, 17 Nov 2005 18:49:58 GMT"
57 | "/bucket/batman"]))))
58 |
59 | (testing "signature for GET and x-amz-date header"
60 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT"
61 | "x-amz-date" "Thu, 17 Nov 2005 18:49:20 GMT"}
62 | :request-method "GET"
63 | :sign-uri "/bucket/batman"
64 | :params {}})
65 | (join "\n" ["GET"
66 | ""
67 | ""
68 | ""
69 | "x-amz-date:Thu, 17 Nov 2005 18:49:20 GMT"
70 | "/bucket/batman"]))))
71 |
72 | (testing "signature with query string"
73 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT"}
74 | :request-method "GET"
75 | :sign-uri "/bucket/batman"
76 | :params {:expires "1141889120"}})
77 | (join "\n" ["GET"
78 | ""
79 | ""
80 | "1141889120"
81 | "/bucket/batman"])))))
82 |
--------------------------------------------------------------------------------
/test/io/pithos/meta_test.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.meta-test
2 | (:require [clojure.test :refer :all]
3 | [io.pithos.util :refer [inc-prefix]]
4 | [io.pithos.meta :refer [filter-keys filter-prefixes
5 | get-prefixes]]))
6 |
7 |
8 | (deftest prefixes-and-contents-test
9 |
10 | (let [in-and-outs ["simple list"
11 | [{:object "foo/bar.txt"}
12 | {:object "foo/baz.txt"}]
13 | ""
14 | "/"
15 | #{"foo/"}
16 | []
17 | 10
18 | nil
19 | false
20 |
21 | "object shows up"
22 | [{:object "foo.txt"}]
23 | "foo.txt"
24 | "/"
25 | #{}
26 | [{:object "foo.txt"}]
27 | 10
28 | nil
29 | false
30 |
31 |
32 | "no delimiter"
33 | [{:object "foo/bar.txt"}
34 | {:object "foo/baz.txt"}]
35 | ""
36 | nil
37 | #{}
38 | [{:object "foo/bar.txt"}
39 | {:object "foo/baz.txt"}]
40 | 10
41 | nil
42 | false
43 |
44 | "simple list with prefix"
45 | [{:object "foo/bar.txt"}
46 | {:object "foo/baz.txt"}
47 | {:object "batman/foo.txt"}]
48 | "foo/"
49 | "/"
50 | #{}
51 | [{:object "foo/bar.txt"}
52 | {:object "foo/baz.txt"}]
53 | 10
54 | nil
55 | false
56 |
57 | "with prefix but no delimiter"
58 | [{:object "foo-bar.txt"}
59 | {:object "foo-baz.txt"}
60 | {:object "batman-foo.txt"}]
61 | "foo-"
62 | nil
63 | #{}
64 | [{:object "foo-bar.txt"}
65 | {:object "foo-baz.txt"}]
66 | 10
67 | nil
68 | false
69 | ]]
70 | (doseq [[nickname objects prefix delimiter
71 | prefixes keys max-keys marker truncated?]
72 | (partition 9 in-and-outs)]
73 | (testing (str "valid output for " nickname)
74 |
75 | (let [found-prefixes (filter-prefixes objects prefix delimiter)]
76 | (is (= prefixes found-prefixes))
77 | (is (= keys (remove found-prefixes
78 | (filter-keys objects prefix delimiter)))))))))
79 |
80 |
81 | (defn make-fetcher
82 | "Provide a simulation of cassandra's wide row storage for testing
83 | Alternate store implementations will need to provide the same properties"
84 | [input]
85 | (fn [prefix marker limit init?]
86 | (let [>pred #(or (= (:object %) (or marker prefix))
87 | (not (.startsWith (or (:object %) "")
88 | (or marker prefix ""))))
89 | > input
92 | (sort-by :object)
93 | (drop-while >pred)
94 | (take-while
13 |
14 | # Do NOT "set -e"
15 |
16 | # PATH should only include /usr/* if it runs after the mountnfs.sh script
17 | PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin
18 | DESC="Pithos"
19 | NAME=pithos
20 | DAEMON=/usr/bin/pithos
21 | DAEMON_ARGS="-f /etc/pithos/pithos.yaml"
22 | DAEMON_USER=pithos
23 | PIDFILE=/var/run/$NAME.pid
24 | SCRIPTNAME=/etc/init.d/$NAME
25 |
26 | # Exit if the package is not installed
27 | [ -x "$DAEMON" ] || exit 0
28 |
29 | # Read configuration variable file if it is present
30 | [ -r /etc/default/$NAME ] && . /etc/default/$NAME
31 |
32 | # Load the VERBOSE setting and other rcS variables
33 | . /lib/init/vars.sh
34 |
35 | # Define LSB log_* functions.
36 | # Depend on lsb-base (>= 3.2-14) to ensure that this file is present
37 | # and status_of_proc is working.
38 | . /lib/lsb/init-functions
39 |
40 | # Function that starts the daemon/service
41 | do_start()
42 | {
43 | # Return
44 | # 0 if daemon has been started
45 | # 1 if daemon was already running
46 | # 2 if daemon could not be started
47 | pid=$( pidofproc -p $PIDFILE "$NAME")
48 | if [ -n "$pid" ] ; then
49 | log_daemon_msg "Pithos is already running (PID `cat ${PIDFILE}`)"
50 | return 1
51 | fi
52 | start-stop-daemon --start --quiet --chuid $DAEMON_USER --chdir / --make-pidfile --background --pidfile $PIDFILE --exec $DAEMON -- \
53 | $DAEMON_ARGS \
54 | || return 2
55 | # Add code here, if necessary, that waits for the process to be ready
56 | # to handle requests from services started subsequently which depend
57 | # on this one. As a last resort, sleep for some time.
58 | }
59 |
60 | # Function that stops the daemon/service
61 | do_stop()
62 | {
63 | # Return
64 | # 0 if daemon has been stopped
65 | # 1 if daemon was already stopped
66 | # 2 if daemon could not be stopped
67 | # other if a failure occurred
68 | start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE
69 | RETVAL="$?"
70 | [ "$RETVAL" = 2 ] && return 2
71 | # Wait for children to finish too if this is a daemon that forks
72 | # and if the daemon is only ever run from this initscript.
73 | # If the above conditions are not satisfied then add some other code
74 | # that waits for the process to drop all resources that could be
75 | # needed by services started subsequently. A last resort is to
76 | # sleep for some time.
77 | start-stop-daemon --stop --quiet --oknodo --retry=0/30/KILL/5 --exec $DAEMON
78 | [ "$?" = 2 ] && return 2
79 | # Many daemons don't delete their pidfiles when they exit.
80 | rm -f $PIDFILE
81 | return "$RETVAL"
82 | }
83 |
84 | # Function that sends a SIGHUP to the daemon/service
85 | do_reload() {
86 | #
87 | # If the daemon can reload its configuration without
88 | # restarting (for example, when it is sent a SIGHUP),
89 | # then implement that here.
90 | #
91 | start-stop-daemon --stop --quiet --signal HUP --pidfile $PIDFILE
92 | return $?
93 | }
94 |
95 | case "$1" in
96 | start)
97 | [ "$VERBOSE" != no ] && log_daemon_msg "Starting $DESC" "$NAME"
98 | do_start
99 | case "$?" in
100 | 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;;
101 | 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;;
102 | esac
103 | ;;
104 | stop)
105 | [ "$VERBOSE" != no ] && log_daemon_msg "Stopping $DESC" "$NAME"
106 | do_stop
107 | case "$?" in
108 | 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;;
109 | 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;;
110 | esac
111 | ;;
112 | status)
113 | status_of_proc "$DAEMON" "$NAME" && exit 0 || exit $?
114 | ;;
115 | reload|force-reload)
116 | log_daemon_msg "Reloading $DESC" "$NAME"
117 | do_reload
118 | log_end_msg $?
119 | ;;
120 | restart)
121 | log_daemon_msg "Restarting $DESC" "$NAME"
122 | do_stop
123 | case "$?" in
124 | 0|1)
125 | do_start
126 | case "$?" in
127 | 0) log_end_msg 0 ;;
128 | 1) log_end_msg 1 ;; # Old process is still running
129 | *) log_end_msg 1 ;; # Failed to start
130 | esac
131 | ;;
132 | *)
133 | # Failed to stop
134 | log_end_msg 1
135 | ;;
136 | esac
137 | ;;
138 | *)
139 | echo "Usage: $SCRIPTNAME {start|stop|status|restart|reload|force-reload}" >&2
140 | exit 3
141 | ;;
142 | esac
143 |
144 | :
145 |
--------------------------------------------------------------------------------
/src/io/pithos/acl.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.acl
2 | "The purpose of this namespace is to coerce to and from
3 | the internal representation of ACLs.
4 |
5 | The current representation is:
6 |
7 | {:FULL_CONTROL
8 | [{:ID \"AUTH_KEY\" :DisplayName \"Some Name\"}
9 | {:URI \"http://groups/group-uri\"}]
10 | ...}
11 | "
12 | (:require [clojure.data.xml :refer [parse-str emit-str indent-str]]
13 | [clojure.zip :refer [xml-zip node root]]
14 | [clojure.data.zip :refer [children]]
15 | [clojure.data.zip.xml :refer [xml-> xml1-> text]]
16 | [io.pithos.xml :refer [seq->xml]]))
17 |
18 | ;; ### XML ACL parsing functions
19 | ;;
20 | ;; We're doing a very sloppy type of schema validation
21 | ;; this should likely move to a stricter XSD validation
22 | ;; phase.
23 |
24 | (def ^{:doc "List of known permissions. Valid in ACLs"}
25 | valid-permission?
26 | #{:FULL_CONTROL :READ :WRITE :READ_ACP :WRITE_ACP})
27 |
28 | (def ^{:doc "List of known tags in grantees"}
29 | valid-grantee-tag?
30 | #{:ID :DisplayName :URI :EmailAddress})
31 |
32 | (def ^{:doc "List of known URIs"}
33 | known-uris
34 | {"http://acs.amazonaws.com/groups/global/AllUsers" "anonymous"})
35 |
36 | (def ^{:doc "List of known Groups"}
37 | known-groups
38 | (reduce merge {} (map (juxt val key) known-uris)))
39 |
40 | (defn node->grantee-spec
41 | "Produce a grantee specifier (ID, DisplayName or URI)"
42 | [n]
43 | (let [{:keys [tag content]} (node n)
44 | text (first content)]
45 | (when (and (valid-grantee-tag? tag) (string? text))
46 | (cond
47 | (= :URI tag) (hash-map tag (or (known-uris text) text))
48 | (= :EmailAddress tag) (hash-map :ID text)
49 | :else (hash-map tag text)))))
50 |
51 | (defn node->grantee
52 | "Produce a valid grantee."
53 | [n]
54 | (reduce merge {} (xml-> n children node->grantee-spec)))
55 |
56 | (defn node->grant
57 | "Each grant in an input body shoudl contain at least an ID and DisplayName or
58 | a URI"
59 | [node]
60 | (hash-map
61 | (xml1-> node :Permission text (fnil keyword "invalid"))
62 | (vec (xml-> node :Grantee node->grantee))))
63 |
64 |
65 | (defn safe-xml-zip
66 | "Ingest an XML representation, safely, throwing explicit
67 | and details errors."
68 | [src]
69 | (try
70 | (let [tree (xml-zip (parse-str src))
71 | {:keys [tag]} (root tree)]
72 | (when-not (= :AccessControlPolicy tag)
73 | (throw (ex-info "XML Root Node should be AccessControlPolicy"
74 | {:type :invalid-xml-root-node
75 | :expected :AccessControlPolicy
76 | :got tag})))
77 | tree)
78 | (catch clojure.lang.ExceptionInfo e
79 | (throw e))
80 | (catch Exception e
81 | (throw (ex-info "Invalid XML in ACL Body"
82 | {:type :invalid-acl-xml
83 | :status-code 400})))))
84 |
85 | (defn xml->acl
86 | "Given an XML source, try to parse it and return valid"
87 | [src]
88 | (let [xml-tree (safe-xml-zip src)
89 | policies (xml-> xml-tree
90 | :AccessControlList
91 | :Grant
92 | node->grant)
93 | policy (apply merge-with (comp vec concat) policies)]
94 | (when-not (every? valid-permission? (keys policy))
95 | (throw (ex-info "Invalid XML Acl Body" {:type :invalid-acl-xml
96 | :status-code 400})))
97 | policy))
98 |
99 | (defn grant->permission
100 | "Generate grant XML tags from a hash map of permissions to grantees"
101 | [[permission grantees]]
102 | (let [xmlns-xsi "http://www.w3.org/2001/XMLSchema-instance"]
103 | (for [{:keys [ID DisplayName URI]} grantees]
104 | [:Grant
105 | (if URI
106 | [:Grantee {:xmlns:xsi xmlns-xsi :xsi:type "Group"}
107 | [:URI (or (known-groups URI) URI)]
108 | [:DisplayName (or DisplayName URI)]]
109 | [:Grantee {:xmlns:xsi xmlns-xsi :xsi:type "CanonicalUser"}
110 | [:ID ID]
111 | [:DisplayName (or DisplayName ID)]])
112 | [:Permission (name permission)]])))
113 |
114 | (defn as-xml
115 | "Given an internal representation of an ACL, output a valid
116 | XML representation.
117 | Optionaly supply a boolean to indicate whether to indent the output"
118 | ([grants indent?]
119 | (let [xmlns "http://s3.amazonaws.com/doc/2006-03-01/"
120 | format (if indent? indent-str emit-str)]
121 | (format
122 | (seq->xml
123 | [:AccessControlPolicy {:xmlns xmlns}
124 | [:Owner
125 | [:ID "foo"]
126 | [:DisplayName "bar"]]
127 | (apply vector :AccessControlList
128 | (mapcat grant->permission grants))]))))
129 | ([grants]
130 | (as-xml grants false)))
131 |
--------------------------------------------------------------------------------
/src/io/pithos/sig.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.sig
2 | "Compute request signatures as described in
3 | http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html"
4 | (:require [clojure.string :as s]
5 | [clojure.tools.logging :refer [info debug]]
6 | [clojure.data.codec.base64 :as base64]
7 | [clj-time.core :refer [after? now]]
8 | [clj-time.coerce :refer [to-date-time]]
9 | [constance.comp :refer [===]]
10 | [io.pithos.util :refer [date->rfc822]])
11 | (:import javax.crypto.Mac javax.crypto.spec.SecretKeySpec))
12 |
13 | (defn canonicalized
14 | "Group headers starting with x-amz, each on a separate line and add uri"
15 | [headers uri]
16 | (s/join "\n"
17 | (concat (->> headers
18 | (map (juxt (comp name key) (comp s/trim val)))
19 | (filter (comp #(.startsWith % "x-amz") first))
20 | (sort-by first)
21 | (map (partial s/join ":")))
22 | [uri])))
23 |
24 | (defn string-to-sign
25 | "Yield the string to sign for an incoming request"
26 | [{:keys [headers request-method sign-uri params] :as request}]
27 | (let [content-md5 (get headers "content-md5")
28 | content-type (get headers "content-type")
29 | date (or (get params :expires)
30 | (if-not (get headers "x-amz-date")
31 | (get headers "date")))]
32 | (s/join
33 | "\n"
34 | [(-> request-method name s/upper-case)
35 | (or content-md5 "")
36 | (or content-type "")
37 | (or date "")
38 | (canonicalized headers sign-uri)])))
39 |
40 | (defn sign-string
41 | [src secret-key]
42 | (let [key (SecretKeySpec. (.getBytes secret-key) "HmacSHA1")]
43 | (String. (-> (doto (Mac/getInstance "HmacSHA1") (.init key))
44 | (.doFinal (.getBytes src))
45 | (base64/encode)))))
46 |
47 | (defn sign-request
48 | "Sign the request, signatures are basic HmacSHA1s, encoded in base64"
49 | [request secret-key]
50 | (sign-string (string-to-sign request) secret-key))
51 |
52 | (defn auth
53 | "Extract access key and signature from the request, using query string
54 | parameters or Authorization header"
55 | [request]
56 | (if-let [auth-str (get-in request [:headers "authorization"])]
57 | (let [[_ access-key sig] (re-matches #"^[Aa][Ww][Ss] (.*):(.*)$" auth-str)]
58 | {:sig sig :access-key access-key})
59 | (let [access-key (get-in request [:params :awsaccesskeyid])
60 | sig (get-in request [:params :signature])]
61 | (if (and access-key sig)
62 | {:sig sig :access-key access-key}
63 | nil))))
64 |
65 | (defn check-sig
66 | [request keystore key str sig]
67 | (let [{:keys [secret] :as authorization} (get keystore key)
68 | signed (try (sign-string str secret)
69 | (catch Exception e
70 | {:failed true :exception e}))]
71 | (when-not (and (not (nil? sig))
72 | (string? signed)
73 | (=== sig signed))
74 | (info "will throw because of failed signature!")
75 | (when (:exception signed)
76 | (debug (:exception signed) "got exception during signing"))
77 | (throw (ex-info "invalid policy signature"
78 | {:type :signature-does-not-match
79 | :status-code 403
80 | :request request
81 | :expected signed
82 | :to-sign str})))
83 | (update-in authorization [:memberof] concat ["authenticated-users"])))
84 |
85 | (def anonymous {:tenant :anonymous :memberof ["anonymous"]})
86 |
87 | (defn validate
88 | "Validate an incoming request (e.g: make sure the signature is correct),
89 | when applicable (requests may be unauthenticated)"
90 | [keystore request]
91 | (if-let [data (auth request)]
92 | (let [{:keys [sig access-key]} data
93 | {:keys [secret] :as authorization} (get keystore access-key)
94 | signed (try (sign-request request secret)
95 | (catch Exception e
96 | {:failed true :exception e}))]
97 | (when-not (and (not (nil? sig))
98 | (string? signed)
99 | (=== sig signed))
100 | (info "will throw because of failed signature!")
101 | (when (:exception signed)
102 | (debug (:exception signed) "got exception during signing"))
103 | (debug "string-to-sign: " (string-to-sign request))
104 | (throw (ex-info "invalid request signature"
105 | {:type :signature-does-not-match
106 | :status-code 403
107 | :request request
108 | :expected signed
109 | :to-sign (string-to-sign request)})))
110 | (when-let [expires (get-in request [:params :expires])]
111 | (let [expires (to-date-time (* 1000 (Integer/parseInt expires)))]
112 | (when (after? (now) expires)
113 | (throw (ex-info "expired request"
114 | {:type :expired-request
115 | :status-code 403
116 | :request request
117 | :expires (date->rfc822 expires)})))))
118 | (update-in authorization [:memberof] concat ["authenticated-users"
119 | "anonymous"]))
120 | anonymous))
121 |
--------------------------------------------------------------------------------
/src/io/pithos/util.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.util
2 | "A few utility functions, used in several places"
3 | (:import [java.io PipedInputStream PipedOutputStream]
4 | [java.lang Math]
5 | [org.joda.time DateTimeZone])
6 | (:require [clojure.string :as s]
7 | [clojure.string :refer [lower-case]]
8 | [clj-time.core :refer [now]]
9 | [clj-time.format :refer [formatters parse unparse formatter]]))
10 |
11 | (defn uri-decode
12 | [s]
13 | (when s
14 | (java.net.URLDecoder/decode s "UTF-8")))
15 |
16 | (defn md5-init
17 | "Yield an MD5 MessageDigest instance"
18 | []
19 | (doto (java.security.MessageDigest/getInstance "MD5") (.reset)))
20 |
21 | (defn md5-update
22 | "Add data from byte-array in a MessageDigest instance"
23 | [hash ba from to]
24 | (locking hash
25 | (doto hash
26 | (.update ba from to))))
27 |
28 | (defn md5-sum
29 | "Yield a padded hex string of an MD5 digest"
30 | [hash]
31 | (let [digest (.toString (java.math.BigInteger. 1 (.digest hash)) 16)
32 | pad (apply str (repeat (- 32 (count digest)) "0"))]
33 | (str pad digest)))
34 |
35 | (defn inc-prefix
36 | "Given an object path, yield the next semantic one."
37 | [p]
38 | (when (seq p)
39 | (let [[c & s] (reverse p)
40 | reversed (conj s (-> c int inc char))]
41 | (apply str (reverse reversed)))))
42 |
43 | (def byte-factors
44 | "1024 factor of corresponding storage unit"
45 | {"k" 1 "m" 2 "g" 3 "t" 4 "p" 5})
46 |
47 | (def byte-pattern
48 | "Regular expression pattern for data size"
49 | #"([0-9]+)(([kKmMgGtTpP])[bB]?)?")
50 |
51 | (defn to-bytes
52 | "Parse an input string into a byte amount, the input
53 | string can be suffixed by a unit specifier"
54 | [input & [param]]
55 | (when input
56 | (if-let [[_ amount _ factor] (re-find byte-pattern (str input))]
57 | (long
58 | (* (Long/parseLong amount)
59 | (if factor
60 | (Math/pow 1024 (get byte-factors (lower-case factor)))
61 | 1)))
62 | (throw (ex-info (format "invalid byte amount [%s]: %s"
63 | (or param "") input) {})))))
64 |
65 |
66 | (defn piped-input-stream
67 | "yield two interconnected PipedInputStream and PipedOutputStream"
68 | []
69 | (let [os (PipedOutputStream.)
70 | is (PipedInputStream. os)]
71 | [is os]))
72 |
73 | (defn parse-uuid
74 | "Parse the string representation of a uuid"
75 | [s]
76 | (java.util.UUID/fromString s))
77 |
78 | (def gmt
79 | "The GMT timezone, only fetched once"
80 | (DateTimeZone/forID "GMT"))
81 |
82 | (def rfc822-format
83 | (formatter "EEE, dd MMM yyyy HH:mm:ss" gmt))
84 |
85 | (defn date->rfc822
86 | [d]
87 | (str (unparse rfc822-format d) " GMT"))
88 |
89 | (defn iso8601->date
90 | [isodate]
91 | (parse (:date-time-parser formatters) isodate))
92 |
93 | (defn iso8601->rfc822
94 | "RFC822 representation based on an iso8601 timestamp"
95 | [isodate]
96 | (->> (parse (:date-time-parser formatters) isodate)
97 | (date->rfc822)))
98 |
99 | (defn iso8601
100 | "iso8601 timestamp representation"
101 | [date]
102 | (unparse (:date-time formatters) date))
103 |
104 | (defn iso8601-timestamp
105 | "String representation of the current timestamp in UTC"
106 | []
107 | (iso8601 (now)))
108 |
109 | (def ^:private regex-char-esc-smap
110 | "Characters to be escaped in a regular pattern (including inside a set)"
111 | ;; The documentation is available here:a
112 | ;; https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html
113 | (let [esc-chars "[]{}()<>*+^$?|\\.&-!#"]
114 | (zipmap esc-chars
115 | (map (partial str "\\") esc-chars))))
116 | (defn string->pattern
117 | "Escape a string to be used as a regular pattern"
118 | [string]
119 | (->> string
120 | (replace regex-char-esc-smap)
121 | (reduce str "")))
122 |
123 | (defn interpol
124 | [s args]
125 | (let [trimk (fn [s] (keyword (.substring s 2 (dec (.length s)))))]
126 | (s/replace s #"\$\{[^}]*\}" (fn [k] (get args (trimk k) "")))))
127 |
128 |
129 | (defmacro cond-with
130 | "Takes a symbol and a set of test/expr pairs. It evaluates
131 | each test one at a time. If a test returns logical true,
132 | cond-with evaluates the corresponding expr, binding the
133 | symbol to the test's return. The return value of the expr
134 | is returned and no more tests are evaluated. If no test/expr
135 | pairs are present, nil is returned. An odd number of clauses
136 | will throw an exception."
137 | [sym & clauses]
138 | (when clauses
139 | (list 'if-let [`~sym (first clauses)]
140 | (if (next clauses)
141 | (second clauses)
142 | (throw (IllegalArgumentException.
143 | "cond-with requires an even number of forms.")))
144 | (cons `cond-with (conj (next (next clauses)) `~sym)))))
145 |
146 | (defmacro cond-let
147 | "Takes a symbol and a set of test/expr pairs. Tests may be
148 | expressions or binding vectors. If a test returns logical true,
149 | cond-let evaluates the corresponding, if a binding vector was
150 | provided, the expr will be evaluated within that context. The
151 | return value of the expr is returned and no more tests are
152 | evaluated. If no test/expr paris are present, nil is returned.
153 | An odd number of clauses will throw an exception."
154 | [& clauses]
155 | (when clauses
156 | (list (if (vector? (first clauses)) 'if-let 'if)
157 | (first clauses)
158 | (if (next clauses)
159 | (second clauses)
160 | (throw (IllegalArgumentException.
161 | "cond-let requires an even number of forms.")))
162 | (cons `cond-let (next (next clauses))))))
163 |
--------------------------------------------------------------------------------
/tasks/leiningen/fatdeb.clj:
--------------------------------------------------------------------------------
1 | (ns leiningen.fatdeb
2 | "Build a .deb package from leiningen, stolen from riemann."
3 | (:refer-clojure :exclude [replace])
4 | (:require [clojure.java.shell :refer [sh]]
5 | [clojure.java.io :refer [file delete-file writer copy]]
6 | [clojure.string :refer [join capitalize trim-newline replace]]
7 | [leiningen.uberjar :refer [uberjar]])
8 | (:import java.text.SimpleDateFormat
9 | java.util.Date))
10 |
11 | (defn md5
12 | [input]
13 | (let [digest (-> (doto (java.security.MessageDigest/getInstance "MD5")
14 | (.reset)
15 | (.update (.getBytes input)))
16 | (.digest))]
17 | (.toString (java.math.BigInteger. 1 digest) 16)))
18 |
19 | (defn delete-file-recursively
20 | "Delete file f. If it's a directory, recursively delete all its contents.
21 | Raise an exception if any deletion fails unless silently is true."
22 | [f & [silently]]
23 | (System/gc) ; This sometimes helps release files for deletion on windows.
24 | (let [f (file f)]
25 | (if (.isDirectory f)
26 | (doseq [child (.listFiles f)]
27 | (delete-file-recursively child silently)))
28 | (delete-file f silently)))
29 |
30 | (defn deb-dir
31 | "Debian package working directory."
32 | [project]
33 | (file (:root project) "target/deb/pithos"))
34 |
35 | (defn cleanup
36 | [project]
37 | ; Delete working dir.
38 | (when (.exists (deb-dir project))
39 | (delete-file-recursively (deb-dir project))))
40 |
41 | (defn reset
42 | [project]
43 | (cleanup project)
44 | (sh "rm" (str (:root project) "/target/*.deb")))
45 |
46 | (def build-date (Date.))
47 |
48 | (defn get-version
49 | [project]
50 | (let [df (SimpleDateFormat. "yyyyMMdd-HHmmss")]
51 | (replace (:version project) #"SNAPSHOT" (.format df build-date))))
52 |
53 | (defn control
54 | "Control file"
55 | [project]
56 | (join "\n"
57 | (map (fn [[k v]] (str (capitalize (name k)) ": " v))
58 | {:package (str (:name project) "-s3")
59 | :version (get-version project)
60 | :section "base"
61 | :priority "optional"
62 | :architecture "all"
63 | :depends (join ", " ["bash" "java7-runtime-headless | openjdk-7-jre-headless"])
64 | :maintainer (:email (:maintainer project))
65 | :description (:description project)})))
66 |
67 | (defn write
68 | "Write string to file, plus newline"
69 | [file string]
70 | (with-open [w (writer file)]
71 | (.write w (str (trim-newline string) "\n"))))
72 |
73 | (defn make-deb-dir
74 | "Creates the debian package structure in a new directory."
75 | [project]
76 | (let [dir (deb-dir project)]
77 | (.mkdirs dir)
78 |
79 | ;; Meta
80 | (.mkdirs (file dir "DEBIAN"))
81 |
82 | (write (file dir "DEBIAN" "control") (control project))
83 | (write (file dir "DEBIAN" "conffiles")
84 | (join "\n" ["/etc/pithos/pithos.yaml"
85 | "/etc/init.d/pithos"
86 | "/etc/default/pithos"]))
87 |
88 | ;; Preinst
89 | (copy (file (:root project) "pkg" "deb" "preinst.sh")
90 | (file dir "DEBIAN" "preinst"))
91 | (.setExecutable (file dir "DEBIAN" "preinst") true false)
92 |
93 | ;; Postinst
94 | (copy (file (:root project) "pkg" "deb" "postinst.sh")
95 | (file dir "DEBIAN" "postinst"))
96 | (.setExecutable (file dir "DEBIAN" "postinst") true false)
97 |
98 | ;; Prerm
99 | (copy (file (:root project) "pkg" "deb" "prerm.sh")
100 | (file dir "DEBIAN" "prerm"))
101 | (.setExecutable (file dir "DEBIAN" "prerm") true false)
102 |
103 | ;; Postrm
104 | (copy (file (:root project) "pkg" "deb" "postrm.sh")
105 | (file dir "DEBIAN" "postrm"))
106 | (.setExecutable (file dir "DEBIAN" "postrm") true false)
107 |
108 | ;; Jar
109 | (.mkdirs (file dir "usr" "lib" "pithos"))
110 | (copy (file (:root project) "target"
111 | (str "pithos-" (:version project) "-standalone.jar"))
112 | (file dir "usr" "lib" "pithos" "pithos.jar"))
113 |
114 |
115 | ;; Binary
116 | (.mkdirs (file dir "usr" "bin"))
117 | (copy (file (:root project) "pkg" "deb" "pithos")
118 | (file dir "usr" "bin" "pithos"))
119 | (.setExecutable (file dir "usr" "bin" "pithos") true false)
120 |
121 | ; Log dir
122 | (.mkdirs (file dir "var" "log" "pithos"))
123 |
124 | ; Config
125 | (.mkdirs (file dir "etc" "pithos"))
126 | (copy (file (:root project) "doc" "pithos.yaml")
127 | (file dir "etc" "pithos" "pithos.yaml"))
128 |
129 | ; defaults file
130 | (.mkdirs (file dir "etc" "default"))
131 | (copy (file (:root project) "pkg" "deb" "pithos.default")
132 | (file dir "etc" "default" "pithos"))
133 |
134 | ; Init script
135 | (.mkdirs (file dir "etc" "init.d"))
136 | (copy (file (:root project) "pkg" "deb" "init.sh")
137 | (file dir "etc" "init.d" "pithos"))
138 | (.setExecutable (file dir "etc" "init.d" "pithos") true false)
139 |
140 | dir))
141 |
142 | (defn dpkg
143 | "Convert given package directory to a .deb."
144 | [project deb-dir]
145 | (print (:err (sh "dpkg" "--build"
146 | (str deb-dir)
147 | (str (file (:root project) "target")))))
148 | (let [deb-file-name (str (:name project) "-s3_"
149 | (get-version project) "_"
150 | "all" ".deb")
151 | deb-file (file (:root project) "target" deb-file-name)]
152 | (write (str deb-file ".md5")
153 | (str (md5 (slurp deb-file)) " " deb-file-name))))
154 |
155 | (defn fatdeb
156 | ([project]
157 | (reset project)
158 | (uberjar project)
159 | (dpkg project (make-deb-dir project))
160 | (cleanup project)
161 | (flush)))
162 |
--------------------------------------------------------------------------------
/src/io/pithos/cors.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.cors
2 | (:refer-clojure :exclude [replace])
3 | (:require [clojure.data.xml :refer [parse-str emit-str indent-str]]
4 | [clojure.string :refer [upper-case lower-case replace join split]]
5 | [clojure.tools.logging :refer [debug]]
6 | [clojure.zip :refer [xml-zip node root]]
7 | [clojure.data.zip :refer [children]]
8 | [clojure.data.zip.xml :refer [xml-> xml1-> text]]
9 | [io.pithos.util :refer [string->pattern]]
10 | [io.pithos.xml :refer [seq->xml]]))
11 |
12 | (defn node->rule
13 | "Provide a "
14 | [node]
15 | {:origins (vec (xml-> node :AllowedOrigin text))
16 | :methods (vec (map (comp keyword lower-case)
17 | (xml-> node :AllowedMethod text)))
18 | :headers (vec (xml-> node :AllowedHeader text))
19 | :exposed (vec (xml-> node :ExposeHeader text))
20 | :max-age (xml1-> node :MaxAgeSeconds text)})
21 |
22 | (defn safe-xml-zip
23 | "Ingest an XML representation, safely, throwing explicit
24 | and details errors."
25 | [src]
26 | (try
27 | (let [tree (xml-zip (parse-str src))
28 | {:keys [tag]} (root tree)]
29 | (when-not (= :CORSConfiguration tag)
30 | (throw (ex-info "XML Root Node should be CORSConfiguration"
31 | {:type :invalid-xml-root-node
32 | :expected :CORSConfiguration
33 | :got tag})))
34 | tree)
35 | (catch clojure.lang.ExceptionInfo e
36 | (throw e))
37 | (catch Exception e
38 | (throw (ex-info "Invalid XML in CORS Body"
39 | {:type :invalid-cors-xml
40 | :status-code 400})))))
41 |
42 | (defn xml->cors
43 | [src]
44 | (let [xml-tree (safe-xml-zip src)
45 | rules (xml-> xml-tree
46 | :CORSRule
47 | node->rule)]
48 | (vec rules)))
49 |
50 | (defn as-xml
51 | ([rules indent?]
52 | (let [format (if indent? indent-str emit-str)
53 | xml-ns "http://s3.amazonaws.com/doc/2006-03-01/"]
54 | (format
55 | (seq->xml
56 | (apply vector
57 | :CORSConfiguration {:xmlns xml-ns}
58 | (for [{:keys [origins methods headers exposed max-age]} rules]
59 | (apply vector :CORSRule
60 | (concat
61 | (mapv (partial vector :AllowedOrigin) origins)
62 | (mapv (partial vector :AllowedMethod)
63 | (map (comp upper-case name) methods))
64 | (mapv (partial vector :AllowedHeader) headers)
65 | (mapv (partial vector :ExposeHeader) exposed)
66 | (if max-age
67 | [[:MaxAgeSeconds max-age]]
68 | [])))))))))
69 | ([rules]
70 | (as-xml rules false)))
71 |
72 | (defn origin-matches?
73 | [src dst]
74 | (let [dst (string->pattern dst)
75 | pat (str "^" (replace dst "\\*" "(.*)") "$")]
76 | (re-find (re-pattern pat) src)))
77 |
78 | (defn origins-match?
79 | [origin method req-headers {:keys [origins methods headers]}]
80 | (and (some #(origin-matches? origin %) origins)
81 | ((set methods) method)))
82 |
83 | (defn merge-rules
84 | [left right]
85 | (if (sequential? left)
86 | (set (concat left right))
87 | (if (neg? (compare left right)) left right)))
88 |
89 | (defn make-pattern
90 | "Our limited pattern builder. If a star is found, do a prefix-match"
91 | [s]
92 | (let [s (lower-case s)]
93 | (if (.contains s "*")
94 | {:pattern (replace s #"\*.*$" "")}
95 | {:exact-match s})))
96 |
97 | (defn pattern-matches?
98 | [header {:keys [pattern exact-match]}]
99 | (let [header (lower-case header)]
100 | (if pattern
101 | (.startsWith header pattern)
102 | (= header exact-match))))
103 |
104 | (defn match-headers
105 | [req-headers headers]
106 | (when req-headers
107 | (let [patterns (map make-pattern headers)
108 | req-headers (split req-headers #"[ \t]*,[ \t]*")]
109 | (join ", "
110 | (for [header req-headers
111 | :when (some #(pattern-matches? header %) patterns)]
112 | header)))))
113 |
114 | (defn rule->headers
115 | [origin method req-headers {:keys [methods exposed headers max-age]}]
116 | (let [allowed-headers (match-headers req-headers headers)]
117 | (-> {"Access-Control-Allow-Origin" origin
118 | "Access-Control-Allow-Methods" (-> method name upper-case)
119 | "Access-Control-Expose-Headers" (join ", " exposed)}
120 | (cond-> max-age (assoc "Access-Control-Max-Age"
121 | (str max-age))
122 | allowed-headers (assoc "Access-Control-Allow-Headers"
123 | allowed-headers)))))
124 |
125 | (defn matches?
126 | [cors headers method]
127 | (let [origin (get headers "origin" "_____________________________")
128 | method (if (= method :options)
129 | (some-> (get headers "access-control-request-method")
130 | lower-case
131 | keyword)
132 | method)
133 | req-headers (get headers "access-control-request-headers")]
134 | (when-not method
135 | (throw (ex-info "Invalid Argument" {:type :invalid-argument
136 | :status-code 400
137 | :arg "Access-Control-Request-Method"
138 | :val ""})))
139 | (if-let [matching-rules (seq (filter (partial origins-match?
140 | origin method req-headers)
141 | cors))]
142 | (rule->headers
143 | origin method req-headers
144 | (reduce (partial merge-with merge-rules) {} matching-rules))
145 | {})))
146 |
--------------------------------------------------------------------------------
/src/io/pithos/bucket.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.bucket
2 | "The bucketstore stores ring-global information on bucket
3 | ownership. It contains a single column-family and an
4 | accompanying index."
5 | (:refer-clojure :exclude [update])
6 | (:require [qbits.alia :as a]
7 | [qbits.hayt :refer [select where set-columns
8 | create-table create-index
9 | column-definitions index-name
10 | delete update limit]]
11 | [io.pithos.util :refer [iso8601-timestamp]]
12 | [io.pithos.system :as system]
13 | [io.pithos.store :as store]))
14 |
15 | (defprotocol Bucketstore
16 | "The bucketstore contains the schema migration function,
17 | two bucket lookup functions and CRUD signatures"
18 | (by-tenant [this tenant])
19 | (by-name [this bucket]))
20 |
21 | (defprotocol BucketDescriptor
22 | (region [this])
23 | (versioned? [this]))
24 |
25 | (defprotocol RegionDescriptor
26 | (metastore [this]))
27 |
28 | ;; ring-global metastore
29 |
30 | (def bucket-table
31 | "Bucket properties"
32 | (create-table
33 | :bucket
34 | (column-definitions {:bucket :text
35 | :created :text
36 | :tenant :text
37 | :region :text
38 | :acl :text
39 | :cors :text
40 | :website :text
41 | :policy :text
42 | :versioned :boolean
43 | :primary-key :bucket})))
44 |
45 | (def bucket_tenant-index
46 | "Index bucket on tenant"
47 | (create-index
48 | :bucket
49 | :tenant
50 | (index-name :bucket_tenant)))
51 |
52 | (defn bucket-by-tenant-q
53 | "Cassandra query for bucket by tenant"
54 | [tenant]
55 | (select :bucket (where [[= :tenant tenant]])))
56 |
57 | (defn fetch-bucket-q
58 | "Cassandra query for bucket by name"
59 | [bucket]
60 | (select :bucket (where [[= :bucket bucket]]) (limit 1)))
61 |
62 | (defn update-bucket-q
63 | "Bucket creation or update"
64 | [bucket columns]
65 | (update :bucket
66 | (set-columns columns)
67 | (where [[= :bucket bucket]])))
68 |
69 | (defn delete-bucket-q
70 | "Bucket destruction"
71 | [bucket]
72 | (delete :bucket (where [[= :bucket bucket]])))
73 |
74 | (defn cassandra-bucket-store
75 | "Given a cluster configuration, reify an instance of Bucketstore.
76 | The cassandra bucket store suffers from a design flaw since last
77 | write-wins might yield a success response for a bucket which will
78 | later be claimed.
79 |
80 | This can be fixed with the following strategies:
81 |
82 | - writing a bucket store that targets an SQL DB instead of cassandra
83 | - using lightweight transactions
84 | - wrap ownership around a zookeeper lock
85 |
86 | If you care deeply about bucket ownership, I'd suggest looking into
87 | the above options"
88 | [{:keys [default-region read-consistency write-consistency] :as config}]
89 | (let [copts (dissoc config :read-consistency :write-consistency)
90 | session (store/cassandra-store copts)
91 | rdcty (or (some-> read-consistency keyword) :quorum)
92 | wrcty (or (some-> write-consistency keyword) :quorum)
93 | read! (fn [query] (a/execute session query {:consistency rdcty}))
94 | write! (fn [query] (a/execute session query {:consistency wrcty}))]
95 | (reify
96 | store/Convergeable
97 | (converge! [this]
98 | (write! bucket-table)
99 | (write! bucket_tenant-index))
100 | store/Crudable
101 | (create! [this tenant bucket columns]
102 | (if-let [[details] (seq (read! (fetch-bucket-q bucket)))]
103 | (when (not= tenant (:tenant details))
104 | (throw (ex-info
105 | "bucket already exists"
106 | {:type :bucket-already-exists
107 | :bucket bucket
108 | :status-code 409})))
109 | (write!
110 | (update-bucket-q bucket
111 | (merge {:region default-region
112 | :created (iso8601-timestamp)}
113 | columns
114 | {:tenant tenant})))))
115 | (update! [this bucket columns]
116 | (write! (update-bucket-q bucket columns)))
117 | (delete! [this bucket]
118 | (if-let [info (seq (read! (fetch-bucket-q bucket)))]
119 | (write! (delete-bucket-q bucket))
120 | (throw (ex-info "bucket not found"
121 | {:type :no-such-bucket
122 | :status-code 404
123 | :bucket bucket}))))
124 | Bucketstore
125 | (by-tenant [this tenant]
126 | (read! (bucket-by-tenant-q tenant)))
127 | (by-name [this bucket]
128 | (first
129 | (read! (fetch-bucket-q bucket)))))))
130 |
131 | (defn get-region
132 | "Fetch the regionstore from regions"
133 | [system region]
134 | (or (get (system/regions system) region)
135 | (throw (ex-info (str "could not find region: " region)
136 | {:status-code 500}))))
137 |
138 | (defn bucket-descriptor
139 | [system bucket]
140 | (let [bucketstore (system/bucketstore system)
141 | details (by-name bucketstore bucket)]
142 | (if details
143 | (let [{:keys [versioned region bucket]} details
144 | {:keys [metastore]} (get-region system region)]
145 | (reify
146 | BucketDescriptor
147 | (versioned? [this] versioned)
148 | (region [this] region)
149 | RegionDescriptor
150 | (metastore [this] metastore)
151 | clojure.lang.ILookup
152 | (valAt [this k]
153 | (get details k))
154 | (valAt [this k def]
155 | (get details k def))))
156 | (throw (ex-info "bucket not found"
157 | {:type :no-such-bucket
158 | :status-code 404
159 | :bucket bucket})))))
160 |
--------------------------------------------------------------------------------
/doc/source/quickstart.rst:
--------------------------------------------------------------------------------
1 | Quickstart Guide
2 | ================
3 |
4 | Getting up and running with pithos involves two things which
5 | we'll cover in this quick walk-through:
6 |
7 | - Installing and running Apache Cassandra
8 | - Installing and running pithos
9 |
10 | Alternately, there is a version of pithos which embeds Apache Cassandra.
11 |
12 | Obtaining pithos
13 | ----------------
14 |
15 | Pithos is released in both source and binary. Binary distributions come in
16 | two flavors: standard and standalone with embedded cassandra.
17 |
18 | Binary releases
19 | ~~~~~~~~~~~~~~~
20 |
21 | Binary release are the simplest way to get started and are hosted on github:
22 | https://github.com/exoscale/pithos/releases.
23 |
24 | Each release contains:
25 |
26 | - A source code archive
27 | - A standard build (*pithos-VERSION-standalone.jar*)
28 | - A quickstart build which embeds cassandra (*pithos-quickstart-VERSION-standalone.jar*)
29 |
30 |
31 | Requirements
32 | ------------
33 |
34 | Runtime requirements
35 | ~~~~~~~~~~~~~~~~~~~~
36 |
37 | Runtime requirements for pithos are kept to a minimum
38 |
39 | - Java 7 Runtime (Sun JDK recommended)
40 | - Apache Cassandra 2.1 (for standard distribution)
41 |
42 | Build requirements
43 | ~~~~~~~~~~~~~~~~~~
44 |
45 | If you wish to build pithos you will additionally need the
46 | `leiningen`_ build tool to produce working artifacts.
47 |
48 | .. _leiningen: https://leiningen.org
49 |
50 | Minimal configuration
51 | ---------------------
52 |
53 | Pithos is configured with a single configuration file, formatted in YAML_.
54 |
55 |
56 | .. _YAML: http://yaml.org
57 |
58 | .. sourcecode:: yaml
59 |
60 | #
61 | ## pithos main configuration
62 | ## =========================
63 | #
64 | # This file contains the following sections
65 | # - service
66 | # - logging
67 | # - options
68 | # - keystore
69 | # - bucketstore
70 | # - regions
71 |
72 |
73 | ## service configuration
74 | ## ---------------------
75 | #
76 | # indicates
77 | service:
78 | host: '127.0.0.1'
79 | port: 8080
80 |
81 |
82 | ## logging configuration
83 | ## ---------------------
84 | logging:
85 | level: info
86 | console: true
87 | files:
88 | - "/tmp/pithos.log"
89 | # overrides:
90 | # io.exo.pithos: debug
91 |
92 |
93 | ## global options
94 | ## --------------
95 | options:
96 | service-uri: 's3.example.com'
97 | reporting: true
98 | server-side-encryption: true
99 | multipart-upload: true
100 | masterkey-provisioning: true
101 | masterkey-access: true
102 | default-region: 'CH-GV1'
103 |
104 |
105 | ## keystore configuration
106 | ## ----------------------
107 | #
108 | # Keystores associate an access key with
109 | # an organization and secret key.
110 | #
111 | # They may offer provisioning capacities with the
112 | # masterkey. The default provider relies on keys
113 | # being defined inline.
114 | keystore:
115 | keys:
116 | AKIAIOSFODNN7EXAMPLE:
117 | # The master key allows provisinning operations
118 | # when the masterkey-provisioning feature is
119 | # set to true and will allow access to all
120 | # buckets when masterkey-access is set to true
121 | master: true
122 | tenant: 'pyr@spootnik.org'
123 | secret: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
124 | BKIAIOSFODNN7EXAMPLE:
125 | tenant: 'exoscale'
126 | secret: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
127 |
128 |
129 | ## bucketstore configuration
130 | ## -------------------------
131 | #
132 | # The bucketstore is ring global and contains information
133 | # on bucket location and global parameters.
134 | #
135 | # Its primary aim is to hold bucket location and ownership
136 | # information.
137 | #
138 | # The default provider relies on cassandra.
139 | bucketstore:
140 | default-region: 'CH-GV1'
141 | cluster: 'localhost'
142 | keyspace: 'storage'
143 |
144 |
145 | ## regions
146 | ## -------
147 | #
148 | # Regions are composed of a metastore and an arbitrary number
149 | # of named storage classes which depend on a blobstore.
150 | #
151 | # The metastore holds metadata for the full region, as well as
152 | # object storage-class placement information.
153 | #
154 | # The default implementation of both metastore and blobstore
155 | # rely on cassandra.
156 | #
157 | regions:
158 | CH-GV1:
159 | metastore:
160 | cluster: 'localhost'
161 | keyspace: 'storage'
162 | storage-classes:
163 | standard:
164 | cluster: 'localhost'
165 | keyspace: 'storage'
166 | max-chunk: '128k'
167 | max-block-chunks: 1024
168 |
169 |
170 | Running pithos
171 | --------------
172 |
173 | Command line arguments
174 | ~~~~~~~~~~~~~~~~~~~~~~
175 |
176 | Pithos accepts the following arguments::
177 |
178 | Switches Default Desc
179 | -------- ------- ----
180 | -h, --no-help, --help false Show Help
181 | -f, --path Configuration file path
182 | -q, --no-quiet, --quiet false Never output to stdout
183 | -a, --action api-run Specify an action (api-run, install-schema)
184 |
185 | The only non-standard option is the `-a` option which allows either starting
186 | the service normally or converging a cassandra schema.
187 |
188 | Running the standalone version
189 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
190 | The standalone version can just be run against a configuration file::
191 |
192 | java -jar pithos-quickstart-VERSION-standalone.jar -f pithos.yaml
193 |
194 | Running against an existing cluster
195 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
196 |
197 | The first time you run a standard pithos distribution, you will need
198 | to converge the necessary cassandra schema::
199 |
200 | java -jar pithos-VERSION-standalone.jar -f pithos.yaml -a install-schema
201 |
202 | This will create the necessary keyspaces and column families in cassandra.
203 | Once finished, pithos can be started normally::
204 |
205 | java -jar pithos-VERSION-standalone.jar -f pithos.yaml
206 |
207 |
208 |
--------------------------------------------------------------------------------
/src/io/pithos/config.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.config
2 | "This namespace parses YAML data into clojure forms which
3 | are then augmented with a mechanism initialy described here:
4 |
5 | http://bit.ly/1xRgOLb
6 |
7 | Default implementation for protocols are provided but can be overriden
8 | with the `use` keyword.
9 | "
10 | (:require [clj-yaml.core :refer [parse-string]]
11 | [clojure.tools.logging :refer [error info debug]]
12 | [io.pithos.util :refer [to-bytes]]
13 | [unilog.config :refer [start-logging!]]
14 | [raven.client :refer [capture!]]
15 | [net.http.client :refer [build-client]]))
16 |
17 |
18 | (start-logging!
19 | {:pattern "%p [%d] %t - %c - %m%n"
20 | :external false
21 | :console true
22 | :files []
23 | :level "info"
24 | :overrides {}})
25 |
26 | (def default-logging
27 | "Logging can be bypassed if a logback configuration is provided
28 | to the underlying JVM"
29 | {:use "org.spootnik.logconfig/start-logging!"
30 | :pattern "%p [%d] %t - %c - %m%n"
31 | :external false
32 | :console true
33 | :files []
34 | :level "info"
35 | :overrides {:io.pithos "debug"}})
36 |
37 | (def default-keystore
38 | "keystore defaults to MapKeyStore"
39 | {:use "io.pithos.keystore/map-keystore"})
40 |
41 | (def default-bucketstore
42 | "bucketstore defaults to cassandra"
43 | {:use "io.pithos.bucket/cassandra-bucket-store"})
44 |
45 | (def default-metastore
46 | "metastore defaults to cassandra"
47 | {:use "io.pithos.meta/cassandra-meta-store"})
48 |
49 | (def default-blobstore
50 | "blobstore defaults to cassandra, a max chunk of 512k
51 | and no more than 2048 chunks per block"
52 | {:use "io.pithos.blob/cassandra-blob-store"
53 | :max-chunk "512k"
54 | :max-block-chunks 2048})
55 |
56 | (def default-reporter
57 | "reporters default to logging"
58 | {:use "io.pithos.reporter/logging-reporter"})
59 |
60 | (def default-service
61 | "The http service is exposed on localhost port 8080 by default"
62 | {:host "127.0.0.1"
63 | :port 8080})
64 |
65 | (def default-options
66 | "Some default global options."
67 | {:service-uri "s3.amazonaws.com"
68 | :reporting true
69 | :server-side-encryption true
70 | :multipart-upload true
71 | :masterkey-provisioning true
72 | :masterkey-access true})
73 |
74 | (defn find-ns-var
75 | "Go fetch a var in a namespace. Extracts the namespace and requires it,
76 | then finds the var"
77 | [s]
78 | (try
79 | (let [n (namespace (symbol s))]
80 | (require (symbol n))
81 | (find-var (symbol s)))
82 | (catch Exception _
83 | nil)))
84 |
85 | (defn instantiate
86 | "Find a symbol pointing to a function of a single argument and
87 | call it"
88 | [class config]
89 | (if-let [f (find-ns-var class)]
90 | (f config)
91 | (throw (ex-info (str "no such namespace: " class) {}))))
92 |
93 | (defn get-instance
94 | "Create instance by supplying config to the implementation specified
95 | in `use`"
96 | [{:keys [use] :as config} target]
97 | (debug "building " target " with " use)
98 | (instantiate (-> use name symbol) config))
99 |
100 | (defn load-path
101 | "Try to find a pathname, on the command line, in
102 | system properties or the environment and load it."
103 | [path]
104 | (-> (or path
105 | (System/getProperty "pithos.configuration")
106 | (System/getenv "PITHOS_CONFIGURATION")
107 | "/etc/pithos/pithos.yaml")
108 | slurp
109 | parse-string))
110 |
111 | (defn get-storage-classes
112 | "Create instances of blobstores for all storage classes (in a region)"
113 | [storage-classes]
114 | (->> (for [[storage-class blobstore] storage-classes
115 | :let [blobstore (-> (merge default-blobstore blobstore)
116 | (update-in [:max-chunk] to-bytes :max-chunk))]]
117 | [storage-class (get-instance blobstore :blobstore)])
118 | (reduce merge {})))
119 |
120 | (defn get-region-stores
121 | "Create instances for each region's metastore then create storage classes"
122 | [regions]
123 | (->> (for [[region {:keys [metastore storage-classes]}] regions
124 | :let [metastore (merge default-metastore metastore)]]
125 | [(name region)
126 | {:metastore (get-instance metastore :metastore)
127 | :storage-classes (get-storage-classes storage-classes)}])
128 | (reduce merge {})))
129 |
130 | (defn get-reporters
131 | [reporters]
132 | (for [reporter reporters
133 | :let [reporter (merge default-reporter reporter)]]
134 | (get-instance reporter :reporter)))
135 |
136 | (defn get-sentry
137 | [sentry]
138 | (if sentry
139 | (let [client (build-client (:http sentry))]
140 | (fn [ev]
141 | (capture! client (:dsn sentry) ev)))
142 | (fn [& _]
143 | (debug "no sentry configuration, no capture done."))))
144 |
145 | (defn parse-cors
146 | [rules]
147 | (let [->sym (fn [s] (-> s name .toLowerCase keyword))
148 | sanitize (fn [{:keys [methods] :as rule}]
149 | (assoc rule :methods (map ->sym methods)))]
150 | (mapv sanitize rules)))
151 |
152 | (defn init
153 | "Parse YAML file, merge in defaults and then create instances
154 | where applicable"
155 | [path quiet?]
156 | (try
157 | (when-not quiet?
158 | (println "starting with configuration: " path))
159 | (let [opts (load-path path)]
160 | (info "setting up logging according to config")
161 | (start-logging! (merge default-logging (:logging opts)))
162 | (-> opts
163 | (update-in [:service] (partial merge default-service))
164 | (update-in [:options] (partial merge default-options))
165 | (update-in [:options :default-cors] parse-cors)
166 | (update-in [:keystore] (partial merge default-keystore))
167 | (update-in [:keystore] get-instance :keystore)
168 | (update-in [:bucketstore] (partial merge default-bucketstore))
169 | (update-in [:bucketstore] get-instance :bucketstore)
170 | (update-in [:reporters] get-reporters)
171 | (update-in [:sentry] get-sentry)
172 | (update-in [:regions] get-region-stores)))
173 | (catch Exception e
174 | (when-not quiet?
175 | (println "invalid or incomplete configuration: " (str e)))
176 | (error e "invalid or incomplete configuration")
177 | (System/exit 1))))
178 |
--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
21 |
22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
23 |
24 | help:
25 | @echo "Please use \`make ' where is one of"
26 | @echo " html to make standalone HTML files"
27 | @echo " dirhtml to make HTML files named index.html in directories"
28 | @echo " singlehtml to make a single large HTML file"
29 | @echo " pickle to make pickle files"
30 | @echo " json to make JSON files"
31 | @echo " htmlhelp to make HTML files and a HTML help project"
32 | @echo " qthelp to make HTML files and a qthelp project"
33 | @echo " devhelp to make HTML files and a Devhelp project"
34 | @echo " epub to make an epub"
35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
36 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
38 | @echo " text to make text files"
39 | @echo " man to make manual pages"
40 | @echo " texinfo to make Texinfo files"
41 | @echo " info to make Texinfo files and run them through makeinfo"
42 | @echo " gettext to make PO message catalogs"
43 | @echo " changes to make an overview of all changed/added/deprecated items"
44 | @echo " xml to make Docutils-native XML files"
45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
46 | @echo " linkcheck to check all external links for integrity"
47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
48 |
49 | clean:
50 | rm -rf $(BUILDDIR)/*
51 |
52 | html:
53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
54 | @echo
55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
56 |
57 | dirhtml:
58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
59 | @echo
60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
61 |
62 | singlehtml:
63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
64 | @echo
65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
66 |
67 | pickle:
68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
69 | @echo
70 | @echo "Build finished; now you can process the pickle files."
71 |
72 | json:
73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
74 | @echo
75 | @echo "Build finished; now you can process the JSON files."
76 |
77 | htmlhelp:
78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
79 | @echo
80 | @echo "Build finished; now you can run HTML Help Workshop with the" \
81 | ".hhp project file in $(BUILDDIR)/htmlhelp."
82 |
83 | qthelp:
84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
85 | @echo
86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pithos.qhcp"
89 | @echo "To view the help file:"
90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pithos.qhc"
91 |
92 | devhelp:
93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
94 | @echo
95 | @echo "Build finished."
96 | @echo "To view the help file:"
97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pithos"
98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pithos"
99 | @echo "# devhelp"
100 |
101 | epub:
102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | @echo
104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 |
106 | latex:
107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | @echo
109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | "(use \`make latexpdf' here to do that automatically)."
112 |
113 | latexpdf:
114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | @echo "Running LaTeX files through pdflatex..."
116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 |
119 | latexpdfja:
120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | @echo "Running LaTeX files through platex and dvipdfmx..."
122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 |
125 | text:
126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | @echo
128 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
129 |
130 | man:
131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | @echo
133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 |
135 | texinfo:
136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | @echo
138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | @echo "Run \`make' in that directory to run these through makeinfo" \
140 | "(use \`make info' here to do that automatically)."
141 |
142 | info:
143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | @echo "Running Texinfo files through makeinfo..."
145 | make -C $(BUILDDIR)/texinfo info
146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 |
148 | gettext:
149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | @echo
151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 |
153 | changes:
154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | @echo
156 | @echo "The overview file is in $(BUILDDIR)/changes."
157 |
158 | linkcheck:
159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | @echo
161 | @echo "Link check complete; look for any errors in the above output " \
162 | "or in $(BUILDDIR)/linkcheck/output.txt."
163 |
164 | doctest:
165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | @echo "Testing of doctests in the sources finished, look at the " \
167 | "results in $(BUILDDIR)/doctest/output.txt."
168 |
169 | xml:
170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | @echo
172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 |
174 | pseudoxml:
175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | @echo
177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 |
--------------------------------------------------------------------------------
/tasks/leiningen/fatrpm.clj:
--------------------------------------------------------------------------------
1 | (ns leiningen.fatrpm
2 | "Build a .rpm package from leiningen, stolen from riemann."
3 | (:refer-clojure :exclude [replace])
4 | (:use [clojure.java.shell :only [sh]]
5 | [clojure.java.io :only [file delete-file writer copy]]
6 | [clojure.string :only [join capitalize trim-newline replace]]
7 | [leiningen.uberjar :only [uberjar]]
8 | [leiningen.tar :only [md5]])
9 | (:import java.util.Date
10 | java.text.SimpleDateFormat
11 | (org.codehaus.mojo.rpm RPMMojo
12 | AbstractRPMMojo
13 | Mapping Source
14 | SoftlinkSource
15 | Scriptlet)
16 | (org.apache.maven.project MavenProject)
17 | (org.apache.maven.shared.filtering DefaultMavenFileFilter)
18 | (org.codehaus.plexus.logging.console ConsoleLogger)))
19 |
20 | (defn write
21 | "Write string to file, plus newline"
22 | [file string]
23 | (with-open [w (writer file)]
24 | (.write w (str (trim-newline string) "\n"))))
25 |
26 | (defn workarea
27 | [project]
28 | (file (:root project) "target" "rpm"))
29 |
30 | (defn cleanup
31 | [project]
32 | (sh "rm" "-rf" (str (workarea project))))
33 |
34 | (defn reset
35 | [project]
36 | (cleanup project)
37 | (sh "rm" (str (:root project) "/target/*.rpm")))
38 |
39 | (defn get-version
40 | [project]
41 | (let [df (SimpleDateFormat. ".yyyyMMdd.HHmmss")]
42 | (replace (:version project) #"-SNAPSHOT" (.format df (Date.)))))
43 |
44 | (defn set-mojo!
45 | "Set a field on an AbstractRPMMojo object."
46 | [object name value]
47 | (let [field (.getDeclaredField AbstractRPMMojo name)]
48 | (.setAccessible field true)
49 | (.set field object value))
50 | object)
51 |
52 | (defn array-list
53 | [list]
54 | (let [list (java.util.ArrayList.)]
55 | (doseq [item list] (.add list item))
56 | list))
57 |
58 | (defn scriptlet
59 | "Creates a scriptlet backed by a file"
60 | [filename]
61 | (doto (Scriptlet.)
62 | (.setScriptFile (file filename))))
63 |
64 | (defn source
65 | "Create a source with a local location and a destination."
66 | ([] (Source.))
67 | ([location]
68 | (doto (Source.)
69 | (.setLocation (str location))))
70 | ([location destination]
71 | (doto (Source.)
72 | (.setLocation (str location))
73 | (.setDestination (str destination)))))
74 |
75 | (defn mapping
76 | [m]
77 | (doto (Mapping.)
78 | (.setArtifact (:artifact m))
79 | (.setConfiguration (case (:configuration m)
80 | true "true"
81 | false "false"
82 | nil "false"
83 | (:configuration m)))
84 | (.setDependency (:dependency m))
85 | (.setDirectory (:directory m))
86 | (.setDirectoryIncluded (boolean (:directory-included? m)))
87 | (.setDocumentation (boolean (:documentation? m)))
88 | (.setFilemode (:filemode m))
89 | (.setGroupname (:groupname m))
90 | (.setRecurseDirectories (boolean (:recurse-directories? m)))
91 | (.setSources (:sources m))
92 | (.setUsername (:username m))))
93 |
94 | (defn mappings
95 | [project]
96 | (map (comp mapping
97 | (partial merge {:username "pithos"
98 | :groupname "pithos"}))
99 |
100 | [; Jar
101 | {:directory "/usr/lib/pithos/"
102 | :filemode "644"
103 | :sources [(source (str (file (:root project)
104 | "target"
105 | (str "pithos-"
106 | (:version project)
107 | "-standalone.jar")))
108 | "pithos.jar")]}
109 |
110 | ; Binary
111 | {:directory "/usr/bin"
112 | :filemode "755"
113 | :sources [(source (file (:root project) "pkg" "rpm" "pithos")
114 | "pithos")]}
115 |
116 | ; Log dir
117 | {:directory "/var/log/pithos"
118 | :filemode "755"
119 | :directory-included? true}
120 |
121 | ; Config dir
122 | {:directory "/etc/pithos"
123 | :filemode "755"
124 | :directory-included? true
125 | :sources [(source (file (:root project) "doc" "pithos.yaml")
126 | "pithos.yaml")]}
127 |
128 | ; Default file
129 | {:directory "/etc/sysconfig"
130 | :filemode "644"
131 | :configuration true
132 | :sources [(source (file (:root project) "pkg" "rpm" "pithos-default")
133 | "pithos-default")]}
134 |
135 | ; Init script
136 | {:directory "/etc/init.d"
137 | :filemode "755"
138 | :username "root"
139 | :groupname "root"
140 | :sources [(source (file (:root project) "pkg" "rpm" "init.sh")
141 | "pithos")]}]))
142 |
143 | (defn blank-rpm
144 | "Create a new RPM file"
145 | []
146 | (let [mojo (RPMMojo.)
147 | fileFilter (DefaultMavenFileFilter.)]
148 | (set-mojo! mojo "project" (MavenProject.))
149 | (.enableLogging fileFilter (ConsoleLogger. 0 "Logger"))
150 | (set-mojo! mojo "mavenFileFilter" fileFilter)))
151 |
152 | (defn create-dependency
153 | [rs]
154 | (let [hs (java.util.LinkedHashSet.)]
155 | (doseq [r rs] (.add hs r))
156 | hs))
157 |
158 | (defn make-rpm
159 | "Create and execute a Mojo RPM."
160 | [project]
161 | (doto (blank-rpm)
162 | (set-mojo! "projversion" (get-version project))
163 | (set-mojo! "name" (:name project))
164 | (set-mojo! "summary" (:description project))
165 | (set-mojo! "copyright" "exoscale")
166 | (set-mojo! "workarea" (workarea project))
167 | (set-mojo! "mappings" (mappings project))
168 | (set-mojo! "preinstallScriptlet" (scriptlet
169 | (file (:root project)
170 | "pkg" "deb" "preinst.sh")))
171 | (set-mojo! "postinstallScriptlet" (scriptlet
172 | (file (:root project)
173 | "pkg" "rpm" "postinst.sh")))
174 | (set-mojo! "preremoveScriptlet" (scriptlet
175 | (file (:root project)
176 | "pkg" "rpm" "prerm.sh")))
177 | (set-mojo! "postremoveScriptlet" (scriptlet
178 | (file (:root project)
179 | "pkg" "deb" "postrm.sh")))
180 | (set-mojo! "requires" (create-dependency ["daemonize >= 1.7.3"]))
181 | (.execute)))
182 |
183 | (defn extract-rpm
184 | "Snags the RPM file out of its little mouse-hole and brings it up to target/,
185 | then generates an md5"
186 | [project]
187 | (let [dir (file (workarea project)
188 | (:name project)
189 | "RPMS"
190 | "noarch")
191 | rpms (remove #(.isDirectory %) (.listFiles dir))]
192 | (doseq [rpm rpms]
193 | (let [dest (file (:root project) "target" (.getName rpm))]
194 | ; Move
195 | (.renameTo rpm dest)
196 |
197 | ; MD5
198 | (write (str dest ".md5")
199 | (str (md5 dest) " " (.getName rpm)))))))
200 |
201 | (defn fatrpm
202 | ([project] (fatrpm project true))
203 | ([project uberjar?]
204 | (reset project)
205 | (when uberjar? (uberjar project))
206 | (make-rpm project)
207 | (extract-rpm project)
208 | (cleanup project)))
209 |
--------------------------------------------------------------------------------
/src/io/pithos/blob.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.blob
2 | "Blobstore interaction. This is one of the four storage protocols.
3 | Storage protocols are split even though they mostly target cassandra
4 | because it allows:
5 |
6 | - Easy implementation of the protocol targetting different DBs
7 | - Splitting data in different keyspace with different replication props
8 |
9 | Implementations may be swapped in the configuration file, as described
10 | in the documentation for the `io.pithos.config' namespace.
11 |
12 | The Blobstore is the storage layer concerned with actually storing data.
13 | Its operations are purely commutative and never deal with filenames, since
14 | that responsibility lies within the _Metastore_ (see `io.pithos.meta`).
15 |
16 | The storage layout is rather simple:
17 |
18 | - Data is stored in inodes
19 | - An inode has a list of blocks
20 | - Blocks contain a list of chunks
21 |
22 | The maximum size of chunks in blocks and the payload size in chunks
23 | are configurable. This approach allows storage of large files spread
24 | accross many rows.
25 |
26 | To ensure metadata operations are decoupled from storage, the protocol
27 | relies on callbacks in a few places.
28 |
29 | "
30 | (:import java.util.UUID
31 | java.nio.ByteBuffer)
32 | (:require [clojure.java.io :as io]
33 | [io.pithos.store :as store]
34 | [io.pithos.desc :as d]
35 | [qbits.alia.uuid :as uuid]
36 | [qbits.alia :as a]
37 | [qbits.hayt :refer [select where columns order-by
38 | insert values limit delete count*
39 | create-table column-definitions]]
40 | [io.pithos.util :refer [md5-update md5-sum md5-init]]
41 | [clojure.tools.logging :refer [debug info error]]))
42 |
43 | ;;
44 | ;; A word on storage protocols
45 | ;; ---------------------------
46 | ;;
47 | ;; All storage protocols expose functions to produce side-effects
48 | ;; and a `converge!` function whose role is to apply the schema
49 |
50 | (def absolute-chunk-limit
51 | "max block per chunk can be exceeded when small chunks are uploaded.
52 | set a large limit of chunks to retrieve from a block."
53 | 524288)
54 |
55 |
56 | (defprotocol Blobstore
57 | "The blobstore protocol, provides methods to read and write data
58 | to inodes, as well as a schema migration function.
59 | "
60 | (converge! [this])
61 | (delete! [this inode version])
62 | (blocks [this od])
63 | (max-chunk [this])
64 | (chunks [this od block offset])
65 | (start-block! [this od block])
66 | (chunk! [this od block offset chunk])
67 | (boundary? [this block offset]))
68 |
69 | ;; CQL Schema
70 | (def inode_blocks-table
71 | "List of blocks found in an inode, keyed by inode and version"
72 | (create-table
73 | :inode_blocks
74 | (column-definitions {:inode :uuid
75 | :version :timeuuid
76 | :block :bigint
77 | :primary-key [[:inode :version] :block]})))
78 |
79 | (def block-table
80 | "A block is keyed by inode version and first offset in the block.
81 |
82 | Blocks contain a list of offset, chunksize and payload (a byte-buffer)
83 | which contain the actual data being stored. chunksize is set in the
84 | configuration."
85 | (create-table
86 | :block
87 | (column-definitions {:inode :uuid
88 | :version :timeuuid
89 | :block :bigint
90 | :offset :bigint
91 | :chunksize :int
92 | :payload :blob
93 | :primary-key [[:inode :version :block] :offset]})))
94 |
95 |
96 | ;;
97 | ;; start declaring CQL queries
98 |
99 | (defn get-block-q
100 | "Fetch list of blocks in an inode."
101 | [inode version order]
102 | (select :inode_blocks
103 | (columns :block)
104 | (where [[= :inode inode]
105 | [= :version version]])
106 | (order-by [:block order])))
107 |
108 | (defn set-block-q
109 | "Add a block to an inode."
110 | [inode version block]
111 | (insert :inode_blocks
112 | (values {:inode inode :version version :block block})))
113 |
114 | (defn get-chunk-q
115 | "Fetch a specific chunk in a block."
116 | [inode version block offset max]
117 | (select :block
118 | (where [[= :inode inode]
119 | [= :version version]
120 | [= :block block]
121 | [>= :offset offset]])
122 | (limit max)))
123 |
124 | (defn set-chunk-q
125 | "Set a chunk in a block."
126 | [inode version block offset size chunk]
127 | (insert :block
128 | (values {:inode inode
129 | :version version
130 | :block block
131 | :offset offset
132 | :chunksize size
133 | :payload chunk})))
134 |
135 | (defn delete-blockref-q
136 | "Remove all blocks in an inode."
137 | [inode version]
138 | (delete :inode_blocks (where [[= :inode inode]
139 | [= :version version]])))
140 |
141 | (defn delete-block-q
142 | "Delete a specific inode block."
143 | [inode version block]
144 | (delete :block (where [[= :inode inode]
145 | [= :version version]
146 | [= :block block]])))
147 |
148 | (defn cassandra-blob-store
149 | "cassandra-blob-store, given a maximum chunk size and maximum
150 | number of chunks per block and cluster configuration details,
151 | will create a cassandra session and reify a Blobstore instance
152 | "
153 | [{:keys [max-chunk max-block-chunks read-consistency write-consistency]
154 | :as config}]
155 | (let [copts (dissoc config :read-consistency :write-consistency)
156 | session (store/cassandra-store copts)
157 | rdcty (or (some-> read-consistency keyword) :quorum)
158 | wrcty (or (some-> write-consistency keyword) :quorum)
159 | read! (fn [query] (a/execute session query {:consistency rdcty}))
160 | write! (fn [query] (a/execute session query {:consistency wrcty}))
161 | bs (* max-chunk max-block-chunks)
162 | limit 100]
163 | (debug "got max-chunk " max-chunk "and max-block-chunks " max-block-chunks)
164 | (reify
165 | store/Convergeable
166 | (converge! [this]
167 | (write! inode_blocks-table)
168 | (write! block-table))
169 | store/Crudable
170 | (delete! [this od version]
171 | (let [ino (if (= (class od) java.util.UUID) od (d/inode od))]
172 | (doseq [{block :block} (read! (get-block-q ino version :asc))]
173 | (write! (delete-block-q ino version block))
174 | (write! (delete-blockref-q ino version)))))
175 | Blobstore
176 | (blocks [this od]
177 | (let [ino (d/inode od)
178 | ver (d/version od)]
179 | (read! (get-block-q ino ver :asc))))
180 |
181 | (max-chunk [this]
182 | max-chunk)
183 |
184 | (chunks [this od block offset]
185 | (let [ino (d/inode od)
186 | ver (d/version od)]
187 | (seq (read! (get-chunk-q ino ver block offset
188 | absolute-chunk-limit)))))
189 |
190 | (boundary? [this block offset]
191 | (>= offset (+ block bs)))
192 |
193 | (start-block! [this od block]
194 | (write! (set-block-q (d/inode od) (d/version od) block)))
195 |
196 | (chunk! [this od block offset chunk]
197 | (let [size (- (.limit chunk) (.position chunk))]
198 | (write! (set-chunk-q (d/inode od) (d/version od)
199 | block offset size chunk))
200 | size)))))
201 |
--------------------------------------------------------------------------------
/doc/source/developer.rst:
--------------------------------------------------------------------------------
1 | Developer Guide
2 | ===============
3 |
4 | *pithos* is an open source project, available on github_:
5 | https://github.com/exoscale/pithos.
6 |
7 | *pithos* is developed in clojure_, a functional lisp which
8 | runs on the **JVM**.
9 |
10 | Building Pithos from source
11 | ---------------------------
12 |
13 | *pithos* is built with leiningen_, to build the
14 | standard version of *pithos* just run::
15 |
16 | lein test
17 | lein compile :all
18 | lein uberjar
19 |
20 | If you wish to quickly test versions as you develop,
21 | you can run pithos directly from leiningen. You
22 | should place your test configuration file in
23 | the ``site/`` subdirectory::
24 |
25 | lein run -- -f site/pithos.yaml
26 |
27 | Contributing to pithos
28 | ----------------------
29 |
30 | Contributions to *pithos* are heavily encouraged.
31 | The best way to contribute is to work on a separate
32 | git branch, branching off of the master branch::
33 |
34 | git pull origin/master
35 | git checkout -B feature/new-feature
36 |
37 | Once work is ready, use the github pull-request
38 | mechanism for a code review to happen.
39 |
40 | .. _Custom Stores:
41 |
42 | Creating alternative store implementations
43 | ------------------------------------------
44 |
45 | While pithos primarly targets Apache Cassandra,
46 | nothing prevents alternative implementation to be
47 | created for all parts of the service.
48 |
49 | *pithos*, through a simple dependency injection
50 | mechanism allows custom implementations of stores
51 | to be plugged.
52 |
53 | In clojure parlance, the only requirement an implementation
54 | must fulfill is to realize the correct protocol.
55 |
56 | Here is a summary of all current protocols:
57 |
58 | Convergeable
59 | ~~~~~~~~~~~~
60 |
61 | The convergeable protocol is used to create the
62 | initial schema for databases that need it. It
63 | consists of a single method:
64 |
65 | .. sourcecode:: clojure
66 |
67 | (defprotocol Convergeable
68 | (converge! [this]))
69 |
70 | This method is called on blobstores, metastores and bucketstores
71 | during the ``install-schema`` phase.
72 |
73 | Crudable
74 | ~~~~~~~~
75 |
76 | The metastore, blobstore and bucketstores share a few functions
77 | which are gathered in this protocol:
78 |
79 | ``fetch``
80 | Retrieve metadata from buckets or objects (unused in blobstores)
81 |
82 | ``update!``
83 | Updates an object's or bucket's metadata (unused in blobstores)
84 |
85 | ``create!``
86 | Insert a bucket (unused in metastores and blobstores)
87 |
88 | ``delete!``
89 | Delete an object, bucket or blob
90 |
91 | .. sourcecode:: clojure
92 |
93 | (defprotocol Crudable
94 | (fetch [this k] [this k1 k2] [this k1 k2 k3])
95 | (update! [this k v] [this k1 k2 v] [this k1 k2 k3 v])
96 | (delete! [this k] [this k1 k2] [this k1 k2 k3])
97 | (create! [this k v] [this k1 k2 v] [this k1 k2 k3 v]))
98 |
99 |
100 | clojure.lang.ILookup
101 | ~~~~~~~~~~~~~~~~~~~~
102 | While not a *pithos* protocol per-se, this protocol
103 | is used by keystores to behave like standard clojure
104 | maps. The method used within ``ILookup`` is ``valAt``,
105 | the expected output is a map containing the keys:
106 |
107 | - ``tenant``: the tenant this key belongs to
108 | - ``secret``: the associated secret key
109 | - ``memberof``: (*optional*) groups this tenant belongs to
110 |
111 | Bucketstore
112 | ~~~~~~~~~~~
113 |
114 | The bucketstore exposes methods to handle buckets:
115 |
116 | ``by-tenant``
117 | Retrieves a list of bucket by tenant
118 |
119 | ``by-name``
120 | Retrieves a bucket by name
121 |
122 | .. sourcecode:: clojure
123 |
124 | (defprotocol Bucketstore
125 | "The bucketstore contains the schema migration function,
126 | two bucket lookup functions and CRUD signatures"
127 | (by-tenant [this tenant])
128 | (by-name [this bucket]))
129 |
130 |
131 | Metastore
132 | ~~~~~~~~~
133 |
134 | The metastore exposes methods to handle bucket metadata:
135 |
136 | ``prefixes``
137 | Lists objects
138 |
139 | ``abort-multipart-upload!``
140 | Aborts a multipart upload
141 |
142 | ``update-part!``
143 | Updates a multipart upload's part metadata
144 |
145 | ``initiate-upload!``
146 | Stores metadata for a multipart upload
147 |
148 | ``get-upload-details``
149 | Retrieves metadata on an ongoing upload
150 |
151 | ``list-uploads``
152 | Lists all uploads for a bucket
153 |
154 | ``list-object-uploads``
155 | Lists all uploads for an object
156 |
157 | ``list-upload-parts``
158 | Lists registered upload parts for an upload.
159 |
160 | .. sourcecode:: clojure
161 |
162 | (defprotocol Metastore
163 | "All necessary functions to manipulate bucket metadata"
164 | (prefixes [this bucket params])
165 | (abort-multipart-upload! [this bucket object upload])
166 | (update-part! [this bucket object upload partno columns])
167 | (initiate-upload! [this bucket object upload metadata])
168 | (get-upload-details [this bucket object upload])
169 | (list-uploads [this bucket prefix])
170 | (list-object-uploads [this bucket object])
171 | (list-upload-parts [this bucket object upload]))
172 |
173 |
174 | Blobstore
175 | ~~~~~~~~~
176 |
177 | The blobstore expores methods to store and retrieve data:
178 |
179 | ``blocks``
180 | Retrieves blocks from an object descriptor
181 |
182 | ``max-chunk``
183 | Maximum chunk-size for this blobstore
184 |
185 | ``chunks``
186 | Retrieve chunks from a starting offset
187 |
188 | ``start-block!``
189 | Mark the start of a block
190 |
191 | ``chunk!``
192 | Store a chunk
193 |
194 | ``boundary?``
195 | Check if a block boundary has been reached
196 |
197 | .. sourcecode:: clojure
198 |
199 | (defprotocol Blobstore
200 | "The blobstore protocol, provides methods to read and write data
201 | to inodes, as well as a schema migration function.
202 | "
203 | (blocks [this od])
204 | (max-chunk [this])
205 | (chunks [this od block offset])
206 | (start-block! [this od block offset])
207 | (chunk! [this od block offset chunk])
208 | (boundary? [this block offset]))
209 |
210 | Reporter
211 | ~~~~~~~~
212 |
213 | The reporter protocol exposes a single method used to register
214 | an event.
215 |
216 | ``report!``
217 | This method hands off an event to the current reporter.
218 |
219 |
220 | .. sourcecode:: clojure
221 |
222 | (defprotocol Reporter
223 | (report! [this event]))
224 |
225 |
226 | An alternative keystore
227 | ~~~~~~~~~~~~~~~~~~~~~~~
228 |
229 | The simplest example would be to create an alternative keystore.
230 | Let's pretend a simple, non-authenticated API is used to provide
231 | credential results.
232 |
233 | .. sourcecode:: clojure
234 |
235 | (ns com.example.http-keystore
236 | (:require [qbits.jet.client.http :as http]
237 | [clojure.core.async :refer [ meta
110 | (merge {:inode inode
111 | :version version
112 | :atime ts})
113 | (merge @cols)
114 | (dissoc :bucket :object))]
115 | (when-not (and (:inode meta)
116 | (:version meta)
117 | (:size meta)
118 | (:checksum meta))
119 | (error "trying to write incomplete metadata"
120 | (pr-str meta))
121 | (throw (ex-info "bad metadata" {:type :incomplete-metadata
122 | :status-code 500
123 | :meta (pr-str meta)})))
124 | (store/update! metastore bucket object meta)
125 | (swap! cols assoc :atime ts)))
126 | clojure.lang.ILookup
127 | (valAt [this k]
128 | (get (merge meta {:tenant tenant :inode inode :version version} @cols)
129 | k))
130 | (valAt [this k def]
131 | (get (merge meta {:tenant tenant :inode inode :version version} @cols)
132 | k
133 | def)))))
134 |
135 |
136 | (defn part-descriptor
137 | [system bucket object upload-id partnumber]
138 | (let [bucketstore (system/bucketstore system)
139 | regions (system/regions system)
140 | {:keys [region]} (bucket/by-name bucketstore bucket)
141 | {:keys [metastore
142 | storage-classes]} (bucket/get-region system region)
143 | meta (store/fetch metastore bucket object false)
144 | inode (uuid/random)
145 | version (uuid/time-based)
146 | ;; XXX: should support several storage classes
147 | blobstore (get storage-classes :standard)
148 | cols (atom {})
149 | part (Long/parseLong partnumber)]
150 | (or
151 | (meta/get-upload-details metastore bucket object upload-id)
152 | (throw (ex-info "no such upload" {:type :no-such-upload
153 | :status-code 404
154 | :key object
155 | :upload upload-id})))
156 | (reify
157 | bucket/BucketDescriptor
158 | (region [this] (get regions region))
159 | bucket/RegionDescriptor
160 | (metastore [this] metastore)
161 | BlobDescriptor
162 | (size [this] (or (:size @cols) (:size meta)))
163 | (checksum [this] (or (:checksum @cols) (:checksum meta)))
164 | (inode [this] (or (:inode @cols) inode))
165 | (version [this] (or (:version @cols) version))
166 | (blobstore [this] blobstore)
167 | ObjectDescriptor
168 | (col! [this field val]
169 | (if (#{:size :checksum :inode :version} field)
170 | (swap! cols assoc (keyword field) val)
171 | (swap! cols assoc-in [:metadata (name field)] val)))
172 | (save! [this]
173 | (let [meta (-> {:inode inode :version version}
174 | (merge @cols)
175 | (merge {:modified (util/iso8601-timestamp)}))]
176 | (meta/update-part! metastore bucket object upload-id part meta))))))
177 |
--------------------------------------------------------------------------------
/src/io/pithos/perms.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.perms
2 | (:require [io.pithos.bucket :as bucket]
3 | [io.pithos.system :as system]
4 | [io.pithos.desc :as desc]
5 | [io.pithos.acl :as acl]
6 | [clojure.string :refer [split]]
7 | [clojure.tools.logging :refer [debug]]))
8 |
9 | (defn granted-for?
10 | "Do current permission allow for operation on this particular perm ?"
11 | [acl for needs]
12 | (loop [[{:keys [URI DisplayName ID] :as id} & ids] (get acl needs)]
13 | (when id
14 | (or (= URI for) (= ID for) (recur ids)))))
15 |
16 | (defn granted?
17 | "Do current permissions allow for operation ?"
18 | [acl needs for]
19 | (some identity (map (partial granted-for? acl for) needs)))
20 |
21 | (defn bucket-satisfies?
22 | "Ensure sufficient rights for bucket access"
23 | [{:keys [tenant acl]} {:keys [for groups needs]}]
24 | (let [needs [:FULL_CONTROL needs]
25 | acl (if acl (read-string acl))]
26 | (or (= tenant for)
27 | (granted? acl needs for)
28 | (some identity (map (partial granted? acl needs) groups)))))
29 |
30 | (defn object-satisfies?
31 | "Ensure sufficient rights for object accessp"
32 | [{tenant :tenant} {acl :acl} {:keys [for groups needs]}]
33 | (let [needs [:FULL_CONTROL needs]
34 | acl (if acl (read-string acl))]
35 | (or (= tenant for)
36 | (granted? acl needs for)
37 | (some identity (map (partial granted? acl needs) groups)))))
38 |
39 | (defn authorize
40 | "Check permission to service operation, each operation has a list
41 | of needed permissions, any failure results in an exception being raised
42 | which prevents any further action from being taken."
43 | [{:keys [authorization bucket object]} perms system]
44 | (let [{:keys [tenant memberof]} authorization
45 | memberof? (set memberof)
46 | bucketstore (system/bucketstore system)]
47 | (doseq [[perm arg] (map (comp flatten vector) perms)]
48 | (case perm
49 | :authenticated (when-not (not= tenant :anonymous)
50 | (debug "unauthenticated request to private resource")
51 | (throw (ex-info "access denied" {:status-code 403
52 | :type :access-denied})))
53 | :memberof (when-not (memberof? arg)
54 | (debug "not a member of: " arg "groups:" (pr-str memberof?))
55 | (throw (ex-info "access denied" {:status-code 403
56 | :type :access-denied})))
57 | :bucket (let [bd (bucket/by-name bucketstore bucket)]
58 | (when-not bd
59 | (throw (ex-info "bucket not found"
60 | {:type :no-such-bucket
61 | :status-code 404
62 | :bucket bucket})))
63 | (when-not (bucket-satisfies? bd {:for tenant
64 | :groups memberof?
65 | :needs arg})
66 | (debug "unsatisfied ACL for bucket. candidate:" (pr-str tenant)
67 | "groups:" (pr-str memberof?)
68 | "needs:" arg
69 | "acl:" (:acl bd)
70 | "bucket-owner:" (:tenant bd))
71 | (throw (ex-info "access denied" {:status-code 403
72 | :type :access-denied}))))
73 | :object (when-not (object-satisfies?
74 | (bucket/by-name bucketstore bucket)
75 | (desc/object-descriptor system bucket object)
76 | {:for tenant
77 | :groups memberof?
78 | :needs arg})
79 | (debug "unsatisfied ACL for object. candidate:" (pr-str tenant)
80 | "groups:" (pr-str memberof?)
81 | "needs:" arg)
82 | (throw (ex-info "access denied" {:status-code 403
83 | :type :access-denied})))))
84 | true))
85 |
86 | (defn ->grantee
87 | [str]
88 | (debug "translating: " str)
89 | (let [[_ type dest] (or (re-find #"(emailAddress|id|uri)=\"(.*)\"" str)
90 | (re-find #"(emailAddress|id|uri)=(.*)" str)
91 | (throw (ex-info "Invalid Argument"
92 | {:type :invalid-argument
93 | :status-code 400
94 | :arg "x-amz-acl-*"
95 | :val str})))]
96 | (cond (#{"id" "emailAddress"} type) {:ID dest :DisplayName dest}
97 | :else {:URI (or (acl/known-uris dest)
98 | dest)})))
99 |
100 | (defn has-header-acl?
101 | [headers]
102 | (or (get headers "x-amz-acl")
103 | (get headers "x-amz-grant-read")
104 | (get headers "x-amz-grant-read-acp")
105 | (get headers "x-amz-grant-write")
106 | (get headers "x-amz-grant-write-acp")
107 | (get headers "x-amz-grant-full-control")))
108 |
109 | (defn header-acl
110 | [owner tenant headers]
111 | (let [init (if (= owner tenant)
112 | {:FULL_CONTROL [{:ID owner :DisplayName owner}]}
113 | {:FULL_CONTROL [{:ID owner :DisplayName owner}
114 | {:ID tenant :DisplayName tenant}]})
115 | canned-acl (get headers "x-amz-acl")
116 | acl-read (some-> (get headers "x-amz-grant-read")
117 | (split #","))
118 | acl-write (some-> (get headers "x-amz-grant-write")
119 | (split #","))
120 | acl-read-acp (some-> (get headers "x-amz-grant-read-acp")
121 | (split #","))
122 | acl-write-acp (some-> (get headers "x-amz-grant-write-acp")
123 | (split #","))
124 | acl-full-ctl (some-> (get headers "x-amz-grant-full-control")
125 | (split #","))
126 | explicit-acl {:READ (mapv ->grantee acl-read)
127 | :READ_ACP (mapv ->grantee acl-read-acp)
128 | :WRITE (mapv ->grantee acl-write)
129 | :WRITE_ACP (mapv ->grantee acl-write-acp)
130 | :FULL_CONTROL (mapv ->grantee acl-full-ctl)}]
131 | (pr-str
132 | (cond
133 |
134 | canned-acl
135 | (case canned-acl
136 | "public-read-write"
137 | (merge init {:READ [{:URI "anonymous"}]
138 | :WRITE [{:URI "anonymous"}]})
139 |
140 | "public-read"
141 | (merge init {:READ [{:URI "anonymous"}]})
142 |
143 | "authenticated-read"
144 | (merge init {:READ [{:URI "authenticated"}]})
145 |
146 | "log-delivery-write" init
147 |
148 | "bucket-owner-read"
149 | (merge init {:READ [{:DisplayName owner
150 | :ID owner}]})
151 |
152 | "bucket-owner-full-control"
153 | init
154 |
155 | "private"
156 | (-> init
157 | (update-in [:FULL_CONTROL] conj {:ID tenant :DisplayName tenant})
158 | (update-in [:FULL_CONTROL] vec))
159 |
160 | nil
161 | init
162 |
163 | (throw (ex-info "Invalid Argument"
164 | {:arg "x-amz-acl"
165 | :val canned-acl
166 | :status-code 400
167 | :type :invalid-argument})))
168 |
169 | (some seq [acl-read acl-write
170 | acl-read-acp acl-write-acp
171 | acl-full-ctl])
172 | (-> explicit-acl
173 | (update-in [:FULL_CONTROL] conj {:ID tenant
174 | :DisplayName tenant})
175 | (update-in [:FULL_CONTROL] vec))
176 |
177 |
178 | :else
179 | init))))
180 |
--------------------------------------------------------------------------------
/src/io/pithos/sig4.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.sig4
2 | (:require [clojure.string :as str]
3 | [clojure.tools.logging :refer [info debug]]
4 | [clj-time.core :as time]
5 | [clj-time.format :as format]
6 | [ring.util.codec :as codec])
7 | (:import [javax.crypto Mac]
8 | [javax.crypto.spec SecretKeySpec]
9 | [java.security MessageDigest]))
10 |
11 |
12 | (defn parse-authorization [request]
13 | """
14 | Parse an AWS SIG4 authorization header.. e.g.
15 |
16 | AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20170805/us-east-1/s3/aws4_request,SignedHeaders=host;x-amz-content-sha256;x-amz-date,Signature=fadf01d63c3c6e4c8238625fc971eddf7a1b2d0470750a21ae8f33c03b4bbdb7
17 |
18 | TYPEKEY=VALUE/VALUE/VALUE,KEY=VALUE;VALUE;VALUE,KEY=VALUE
19 | """
20 | (let [
21 | authorization-header (get (get request :headers) "authorization")
22 | authorization (zipmap [:access-key :date :region :service :signed-headers :signature]
23 | (rest (re-find #"AWS4-HMAC-SHA256 Credential=(\w+)\/(\d{8})\/([\w\d-]+)\/([\w\d]+)\/aws4_request,[ ]*SignedHeaders=([\w-;]+),[ ]*Signature=(\w+)" authorization-header))
24 | )]
25 | (assoc authorization :signed-headers (str/split (get authorization :signed-headers) #";"))
26 | ))
27 |
28 | (defn sha256 [input]
29 | (let [hash (MessageDigest/getInstance "SHA-256")]
30 | (. hash digest input)))
31 |
32 | (defn secretKeyInst [key mac]
33 | (SecretKeySpec. key (.getAlgorithm mac)))
34 |
35 | (defn hmac-sha256 [key string]
36 | "Returns the signature of a string with a given key, using a SHA-256 HMAC."
37 | (let [mac (Mac/getInstance "HMACSHA256")
38 | secretKey (secretKeyInst key mac)]
39 | (-> (doto mac
40 | (.init secretKey)
41 | (.update (.getBytes string)))
42 | .doFinal)))
43 |
44 | (defn hex [input]
45 | """ Format bytes as a hex string """
46 | (apply str (map #(format "%02x" %) input)))
47 |
48 | (defn bytes [input]
49 | """ Format a string as bytes """
50 | (. input getBytes))
51 |
52 | (defn request-time [request]
53 | """ Parse the date or x-amzdate headers and return a time object """
54 | (let [headers (get request :headers)]
55 | (cond
56 | (contains? headers "x-amz-date")
57 | (format/parse (format/formatters :basic-date-time-no-ms) (get headers "x-amz-date"))
58 | (contains? headers "date")
59 | (format/parse (format/formatters :basic-date-time-no-ms) (get headers "date"))
60 | )))
61 |
62 | (defn signing-key [secret-key request-time authorization]
63 | """ Generate a signing key for a v4 signature """
64 | (debug secret-key)
65 | (-> (str "AWS4" secret-key)
66 | (bytes)
67 | (hmac-sha256 (format/unparse (format/formatters :basic-date) request-time))
68 | (hmac-sha256 (get authorization :region))
69 | (hmac-sha256 (get authorization :service))
70 | (hmac-sha256 "aws4_request")
71 | ))
72 |
73 | (defn canonical-verb [request]
74 | ( -> (get request :request-method) name str/upper-case))
75 |
76 | (defn- double-escape [^String x]
77 | (.replace (.replace x "\\" "\\\\") "$" "\\$"))
78 |
79 | (defn- percent-encode [^String unencoded]
80 | (->> (.getBytes unencoded "UTF-8")
81 | (map (partial format "%%%02X"))
82 | (str/join)))
83 |
84 | (defn uri-escape [unencoded]
85 | (str/replace
86 | unencoded
87 | #"[^A-Za-z0-9_~.\-/]+"
88 | #(double-escape (percent-encode %))))
89 |
90 | (defn query-escape [unencoded]
91 | (str/replace
92 | unencoded
93 | #"[^A-Za-z0-9_~.\-]+"
94 | #(double-escape (percent-encode %))))
95 |
96 | (defn canonical-uri [request]
97 | (uri-escape (get request :orig-uri)))
98 |
99 | (defn canonical-query-string [request]
100 | (let [
101 | query-string (get request :query-string)
102 | decoded (and (seq query-string) (codec/form-decode query-string))
103 | params (cond (map? decoded) decoded
104 | (string? decoded) {decoded nil}
105 | :else {})]
106 |
107 | (str/join "&"
108 | (->> params
109 | (map (juxt (comp query-escape name key) (comp query-escape str/trim (fn [input] (if (nil? input) "" input)) val)))
110 | (sort-by first)
111 | (map (partial str/join "="))
112 | ))))
113 |
114 | (defn canonical-headers [request, include-headers]
115 | (str/join "\n"
116 | (concat (->> (select-keys (get request :headers) include-headers)
117 | (map (juxt (comp name key) (comp str/trim val)))
118 | (sort-by first)
119 | (map (partial str/join ":")))
120 | )))
121 |
122 | (defn signed-headers [include-headers]
123 | (str/join ";" (sort-by first include-headers)))
124 |
125 | (defn hash-payload [request]
126 | """ Hash the entire body. Thankfully this is done for us - its in the
127 | x-amz-content-sha256 and we have wrapped :body in something that will error
128 | if the stream is closed before reading out content matching the sha """
129 | (get (get request :headers) "x-amz-content-sha256"))
130 |
131 | (defn canonical-request [request include-headers]
132 | (str/join "\n" [
133 | (canonical-verb request)
134 | (canonical-uri request)
135 | (canonical-query-string request)
136 | (canonical-headers request include-headers)
137 | ""
138 | (signed-headers include-headers)
139 | (hash-payload request)
140 | ]))
141 |
142 | (defn string-to-sign [request request-time authorization]
143 | """ Format a request into a canonicalized representation for signing """
144 | (let [canonical-request (canonical-request request (get authorization :signed-headers))]
145 | (debug "canonical-request" canonical-request)
146 | (str/join "\n" [
147 | "AWS4-HMAC-SHA256"
148 | (format/unparse (format/formatters :basic-date-time-no-ms) request-time)
149 | (str/join "/" [
150 | (format/unparse (format/formatters :basic-date) request-time)
151 | (get authorization :region)
152 | (get authorization :service)
153 | "aws4_request"
154 | ])
155 | (hex (sha256 (bytes canonical-request)))
156 | ])))
157 |
158 | (defn signature [signing-key, string-to-sign]
159 | """ Sign a canonicalized representation of the request with a signing key """
160 | (hex (hmac-sha256 signing-key string-to-sign)))
161 |
162 | (defn is-signed-by? [request authorization secret-key]
163 | (let[
164 | request-time (request-time request)
165 | signing-key (signing-key secret-key request-time authorization)
166 | string-to-sign (string-to-sign request request-time authorization)
167 | signature (signature signing-key, string-to-sign)
168 | ]
169 | (debug request-time)
170 | (debug (hex signing-key))
171 | (debug string-to-sign)
172 | (debug signature (get authorization :signature) (= signature (get authorization :signature)))
173 | (= signature (get authorization :signature))
174 | )
175 | )
176 |
177 | (defn sha256-input-stream [body, goal-hash]
178 | """ Wrap a body stream with a hashing adapter that will throw if the data is invalid """
179 | (let [hash (MessageDigest/getInstance "SHA-256")]
180 | (proxy [java.io.InputStream] []
181 | (close []
182 | (try
183 | ;; Calculate final digest and if doesn't match expected value - throw
184 | (if (not= goal-hash (hex (.digest hash)))
185 | ;; FIXME: Is there a more appropriate error here?
186 | (throw (ex-info "body signature is incorrect"
187 | {:type :signature-does-not-match
188 | :status-code 403
189 | :expected goal-hash
190 | :to-sign ""
191 | })))
192 | (finally (.close body)))
193 | )
194 |
195 | (read [^bytes ba]
196 | (let [bytes_read (.read body ba)]
197 | (if (not= bytes_read -1) (.update hash ba 0 bytes_read))
198 | bytes_read))
199 | )))
200 |
201 | (defn validate4
202 | [keystore request]
203 | (let [
204 | authorization (parse-authorization request)
205 | secret-key (get (get keystore (get authorization :access-key)) :secret)
206 | is-valid-signature (is-signed-by? request authorization secret-key)
207 | auth (get keystore (get authorization :access-key))
208 | retval (cond
209 | is-valid-signature
210 | (update-in auth [:memberof] concat ["authenticated-users" "anonymous"])
211 | :else
212 | {:tenant :anonymous :memberof ["anonymous"]}
213 | )]
214 | (debug "request" request)
215 | (debug "authorization" (get keystore (get authorization :access-key)))
216 | (debug "secret" secret-key)
217 | (debug "is-valid-sig" is-valid-signature)
218 | (debug "retval" retval)
219 | retval
220 | ))
221 |
--------------------------------------------------------------------------------
/doc/source/concepts.rst:
--------------------------------------------------------------------------------
1 | Design and Concepts
2 | ===================
3 |
4 | This section describe the overall design and concepts within *pithos* and
5 | its interaction with Apache Cassandra.
6 |
7 | .. _S3 Concepts:
8 |
9 | S3 Concepts
10 | -----------
11 |
12 | Since *pithos* exposes the AWS S3 API, some of its properties have direct impact on
13 | pithos's design.
14 |
15 | Terminology
16 | ~~~~~~~~~~~
17 |
18 | If you're not familiar with S3, the following terms need clarification:
19 |
20 | Bucket
21 | A bucket is a named container for objects. A bucket belongs to a region
22 | and may contain an arbitrary number of objects, potentially in different
23 | storage classes.
24 |
25 | Region
26 | A region hosts the metadata for objects. Regions may have several available
27 | storage classes.
28 |
29 | Object
30 | An object is the S3 representation of a file. There is no filesystem hierarchy
31 | in S3 even though some mechanisms may help in emulating one.
32 |
33 | Storage Class
34 | A storage class is a destination for objects with specific storage properties.
35 | A typical use case for storage properties is to provide cheap storage with
36 | low safety properties in a *reduced redundancy* class and standard safety
37 | properties in a *standard* class.
38 |
39 | A global bucket namespace
40 | ~~~~~~~~~~~~~~~~~~~~~~~~~
41 |
42 | The first thing of note is that an S3-compatible object store will expose a
43 | global namespace for buckets across all tenants. Bucket names are first come,
44 | first served and hold very little information. The most important properties stored
45 | in a bucket are:
46 |
47 | - The bucket name
48 | - The bucket's tenant
49 | - The bucket's ACL
50 | - The bucket's CORS configuration
51 | - The region the bucket's objects will be stored in
52 |
53 |
54 | Cassandra concepts
55 | ------------------
56 |
57 | *pithos* relies on `Apache Cassandra`_, which brings its own set of terminology and
58 | concepts:
59 |
60 | Cluster
61 | A Cassandra cluster is a collection of a number of nodes which share
62 | properties such as available schema and data.
63 |
64 | Node
65 | A Cassandra node is a participant in a cluster. It can be seen as the
66 | equivalent of an SQL instance.
67 |
68 | Keyspace
69 | A Cassandra keyspace holds a collection of column families which share
70 | similar properties, such as a replication factor. It can be seen as the
71 | equivalent of an SQL database
72 |
73 | Column Family
74 | A Cassandra column family stores keyed rows of data sharing a specific
75 | schema. It can be seen as the equivalent of an SQL table.
76 |
77 | .. _Apache Cassandra: http://cassandra.apache.org
78 |
79 | Pithos properties
80 | -----------------
81 |
82 | *pithos* strives to provide an eventual consistent system, enforcing
83 | validity through clients.
84 |
85 | There is a single operation throughout *pithos* which necessitates some
86 | transactional properties: the act of claiming a bucket's ownership.
87 | As it stands, given the current implementation of the bucketstore, this
88 | operation may return inconsistent results.
89 |
90 | All other operations in *pithos* are commutative and may be retried
91 | to achieve expected results.
92 |
93 | It is worth noting that typical S3 clients will ensure the validity of operations
94 | by comparing local MD5 checksums and checksums reported by *pithos*.
95 |
96 | While operations are commutative, stale data may be left in the
97 | ``inode_blocks`` and ``block`` column families when upload operations
98 | fail, or clients interrupt uploads. This will result in orphaned blocks,
99 | which need to be regularily purged from the blobstore. Pithos does not
100 | provide a method to check for these yet.
101 |
102 |
103 |
104 | .. _Pithos Architecture:
105 |
106 | Pithos Architecture
107 | -------------------
108 |
109 | To isolate concerns and provide flexibility when building an object store service,
110 | *pithos* is built around the notion of different stores which are all responsible
111 | for a subset of the overall object store data. Each of the store can be independently
112 | configured and may point to a separate location.
113 |
114 | *pithos* provides default implementations of each store targetting cassandra (except for
115 | the keystore which is static by default) but the configuration file format allows for
116 | providing different implementations if necessary.
117 |
118 | The Keystore
119 | ~~~~~~~~~~~~
120 |
121 | *pithos* does not concern itself with handling tenants, it relies on a
122 | keystore to provide an association from API key to tenant information.
123 |
124 | Out of the box, *pithos* only ships with a simple config-file based keystore,
125 | but writing a separate one is trivial and covered in the developer documentation.
126 |
127 | A key lookup in the keystore should yield a map of the following attributes::
128 |
129 | {
130 | "master": false,
131 | "tenant": "tenant name",
132 | "secret": "secret key",
133 | "memberof": ["group1", "group2"]
134 | }
135 |
136 | This properties are then used by pithos to authenticate requests.
137 |
138 | The Bucketstore
139 | ~~~~~~~~~~~~~~~
140 |
141 | The bucketstore holds an association of bucket name to tenant and properties.
142 |
143 |
144 | ========== ======== ===============================
145 | Column Type Description
146 | ========== ======== ===============================
147 | bucket text bucket name
148 | acl text serialized ACL definition
149 | cors text serialized CORS definition
150 | created text ISO8601 timestamp
151 | policy text (unused)
152 | region text region name
153 | tenant text tenant name
154 | versioned boolean (unused)
155 | website text website configuration (unused)
156 | ========== ======== ===============================
157 |
158 |
159 | The Metastore
160 | ~~~~~~~~~~~~~
161 |
162 | The metastore hold object metadata for a specific region. It also associates
163 | objects with their storage class location and keeps track of ongoing
164 | multipart object uploads.
165 |
166 | An object has the following properties:
167 |
168 | ============ ======== ==================================
169 | Column Type Description
170 | ============ ======== ==================================
171 | bucket text bucket name
172 | object text full object path
173 | acl text serialized ACL definition
174 | atime text ISO8601 timestamp of access time
175 | checksum text MD5 checksum of object
176 | size bigint total file size
177 | inode uuid object inode ID
178 | version uuid object version ID
179 | storageclass text storage class where data is stored
180 | metadata map additional attributes
181 | ============ ======== ==================================
182 |
183 | Multipart upload descriptions span two entities, the first
184 | stores a list of ongoing uploads:
185 |
186 | ============ ======== ==================================
187 | Column Type Description
188 | ============ ======== ==================================
189 | bucket text bucket name
190 | object text full object path
191 | upload uuid object inode ID
192 | metadata map additional attributes
193 | ============ ======== ==================================
194 |
195 | The second stores information on each uploaded part
196 |
197 | ============ ======== ==================================
198 | Column Type Description
199 | ============ ======== ==================================
200 | bucket text bucket name
201 | object text full object path
202 | upload uuid object inode ID
203 | partno int part number within this upload
204 | cheksum text MD5 checksum of this part
205 | inode uuid upload part inode ID
206 | version uuid upload part version ID
207 | modified text ISO 8601 timestamp of part upload
208 | size bigint upload part total size
209 | ============ ======== ==================================
210 |
211 |
212 | The Blobstore
213 | ~~~~~~~~~~~~~
214 |
215 | The blobstore holds data for your files. Data is stored
216 | based on inode ids. Data is stored across two entities
217 | by default.
218 |
219 | The first one is a list of blocks within an inode:
220 |
221 | ============ ======== ==================================
222 | Column Type Description
223 | ============ ======== ==================================
224 | inode uuid inode ID
225 | version uuid version ID
226 | block bigint offset of block start
227 | size bigint block size
228 | ============ ======== ==================================
229 |
230 | The second one holds data within a block:
231 |
232 | ============ ======== ==================================
233 | Column Type Description
234 | ============ ======== ==================================
235 | inode uuid inode ID
236 | version uuid version ID
237 | block bigint offset of block start
238 | offset bigint offset of payload within object
239 | chunksize int payload size
240 | payload blob bytes for this payload
241 | ============ ======== ==================================
242 |
243 |
--------------------------------------------------------------------------------
/src/io/pithos/stream.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.stream
2 | "Read and write to cassandra from OutputStream and InputStream"
3 | (:import java.io.OutputStream
4 | java.io.InputStream
5 | java.nio.ByteBuffer
6 | org.eclipse.jetty.server.HttpInputOverHTTP
7 | javax.servlet.ReadListener)
8 | (:require [io.pithos.blob :as b]
9 | [io.pithos.desc :as d]
10 | [io.pithos.util :as u]
11 | [clojure.tools.logging :refer [debug error]]))
12 |
13 | (defn chunk->ba
14 | "Chunks in pithos come back as bytebuffers and we
15 | need byte-arrays for outputstreams, this converts
16 | from the former to the latter.
17 |
18 | The underlying bytebuffers can be reused, which is why
19 | we need to watch respect the position and limit parameters
20 | given."
21 | [{:keys [payload]}]
22 | (let [array (.array payload)
23 | off (.position payload)
24 | len (- (.limit payload) off)]
25 | [array off len]))
26 |
27 | (defn full-file?
28 | "Does a range specify the full file?"
29 | [od start end]
30 | (and (= start 0) (= end (d/size od))))
31 |
32 | (defn within-range?
33 | "Is a chunk within the range expected"
34 | [{:keys [chunksize offset]} start end]
35 | (and (<= start (+ offset chunksize)) (<= offset end)))
36 |
37 | (defn crop-chunk
38 | "This is the land of off-by-one errors, but bear with me:
39 | For a specific chunk, we have three streaming cases:
40 |
41 | - We need to stream all of it when it starts beyond the start
42 | offset of the range and it ends before the end offset of the range.
43 | - If the start offset is contained in this chunk but beyond the
44 | first byte, we need to start at the correct mark.
45 | - If the end offset is contained in this chunk but before the
46 | last byte, we need to stop at the correct mark.
47 |
48 | Here, we treat the last two cases as a single one, by
49 | computing a head and tail, and adapting the start offset
50 | as well as the length to stream in one go."
51 | [{:keys [offset chunksize] :as chunk} start end]
52 | (let [[array off len] (chunk->ba chunk)
53 | buf-start offset
54 | buf-end (+ offset chunksize)]
55 | (if (and (<= (+ offset chunksize) end) (>= offset start))
56 | ;; No cropping necessary.
57 | ;; Just pass the byte-buffer as-is.
58 | [array off len]
59 | ;; We need to crop, compute head and tail and infer
60 | ;; actual length from them.
61 | (let [head (if (< buf-start start) (- start buf-start) 0)
62 | tail (if (> buf-end end) (- buf-end end) 0)
63 | croplen (- len (+ head tail))]
64 | [array (+ off head) croplen]))))
65 |
66 | (defn stream-file
67 | "Stream a whole file. Do not handle supplied ranges and
68 | just write out all chunks."
69 | [od ^OutputStream stream blob blocks]
70 | (doseq [{:keys [block]} blocks]
71 | (doseq [chunk (b/chunks blob od block block)
72 | :let [[array off len] (chunk->ba chunk)]]
73 | (.write stream array off len))))
74 |
75 | (defn stream-range
76 | "Stream a range of bytes. Keep iterating on blocks until
77 | we reach the end, then only consider chunks in the
78 | supplied range, and optionally crop them before streaming out."
79 | [od ^OutputStream stream blob blocks start end]
80 | (doseq [{:keys [block]} blocks
81 | :while (<= block end)]
82 | (doseq [chunk (b/chunks blob od block block)
83 | :when (within-range? chunk start end)]
84 | (let [[array off len] (crop-chunk chunk start end)]
85 | (.write stream array off len)))))
86 |
87 | (defn stream-to
88 | "Stream a range or a whole file."
89 | [od ^OutputStream stream [start end]]
90 | (debug "streaming range: " start end)
91 | (let [blob (d/blobstore od)
92 | blocks (b/blocks blob od)]
93 | (try
94 | (if (full-file? od start end)
95 | (stream-file od stream blob blocks)
96 | (stream-range od stream blob blocks start end))
97 | (catch Exception e
98 | (error e "error during read"))
99 | (finally
100 | (debug "closing after read")
101 | (.flush stream)
102 | (.close stream))))
103 | od)
104 |
105 | (defn stream-from
106 | "Given an input stream and an object descriptor, stream data from the
107 | input stream to the descriptor.
108 |
109 | Our current approach has the drawback of not enforcing blocksize
110 | requirements since we have no way of being notified when reaching a
111 | threshold."
112 | [^InputStream stream od]
113 | (let [blob (d/blobstore od)
114 | hash (u/md5-init)]
115 | (try
116 | (loop [block 0
117 | offset 0]
118 | (when (>= block offset)
119 | (debug "marking new block")
120 | (b/start-block! blob od block))
121 | (let [chunk-size (b/max-chunk blob)
122 | ba (byte-array chunk-size)
123 | br (.read stream ba)]
124 | (if (neg? br)
125 | (do
126 | (debug "negative write, read whole stream")
127 | (d/col! od :size offset)
128 | (d/col! od :checksum (u/md5-sum hash))
129 | od)
130 | (let [chunk (ByteBuffer/wrap ba 0 br)
131 | sz (b/chunk! blob od block offset chunk)
132 | offset (+ sz offset)]
133 | (u/md5-update hash ba 0 br)
134 | (if (b/boundary? blob block offset)
135 | (recur offset offset)
136 | (recur block offset))))))
137 | (catch Exception e
138 | (error e "error during write"))
139 | (finally
140 | (debug "closing after write")
141 | (.close stream)))))
142 |
143 | (defn validate-range
144 | [src start end]
145 | (when-not (<= 0 start end (d/size src))
146 | (throw (IllegalArgumentException. "Invalid range supplied"))))
147 |
148 | (defn stream-copy-range-block
149 | "I have seen prouder days."
150 | [hash offset dblob dst sblob src start end block]
151 | (b/start-block! dblob dst (- block start))
152 | (if-let [chunks (seq (b/chunks sblob src block offset))]
153 | (do
154 | (doseq [chunk chunks
155 | :when (and chunk (within-range? chunk start end))
156 | :let [[ba offset len] (crop-chunk chunk start end)]]
157 | (b/chunk! dblob dst block (- offset start) (ByteBuffer/wrap ba offset len))
158 | (u/md5-update hash ba offset len))
159 | (let [{:keys [offset chunksize]} (last chunks)]
160 | (- (+ offset chunksize) start)))
161 | offset))
162 |
163 | (defn stream-copy-range
164 | [src dst [start end]]
165 | (debug "copying from range: " start end)
166 | (let [sblob (d/blobstore src)
167 | dblob (d/blobstore dst)
168 | blocks (b/blocks sblob src)
169 | hash (u/md5-init)]
170 | (validate-range src start end)
171 | (loop [[block & blocks] (map :block (b/blocks sblob src))
172 | offset 0]
173 | (cond
174 | (or (nil? block) (> block end))
175 | (do
176 | (d/col! dst :size (- end start))
177 | (d/col! dst :checksum (u/md5-sum hash)))
178 |
179 | ;; look-ahead to see if it's worth skipping a whole block
180 | (and (seq blocks) (< start (first blocks)))
181 | (recur blocks offset)
182 |
183 | ::else
184 | (do
185 | (recur blocks
186 | (stream-copy-range-block hash offset dblob dst sblob
187 | src start end block)))))))
188 |
189 | (defn stream-copy
190 | "Copy from one object descriptor to another."
191 | [src dst]
192 | (let [sblob (d/blobstore src)
193 | dblob (d/blobstore dst)
194 | blocks (b/blocks sblob src)]
195 | (doseq [{:keys [block]} blocks]
196 | (b/start-block! dblob dst block)
197 | (debug "found block " block)
198 | (loop [offset block]
199 | (when-let [chunks (seq (b/chunks sblob src block offset))]
200 | (doseq [chunk chunks
201 | :let [offset (:offset chunk)]]
202 | (b/chunk! dblob dst block offset (:payload chunk)))
203 | (let [{:keys [offset chunksize]} (last chunks)]
204 | (recur (+ offset chunksize))))))
205 | (d/col! dst :size (d/size src))
206 | (d/col! dst :checksum (d/checksum src))
207 | dst))
208 |
209 | (defn stream-copy-part-block
210 | "Copy a single part's block to a destination"
211 | [notifier dst hash part g-offset {:keys [block]}]
212 | (let [dblob (d/blobstore dst)
213 | sblob (d/blobstore part)
214 | real-block (+ g-offset block)]
215 | (debug "streaming block: " block)
216 | (b/start-block! dblob dst real-block)
217 | (notifier :block)
218 | (last
219 | (for [chunk (b/chunks sblob part block block)
220 | :let [offset (:offset chunk)
221 | payload (:payload chunk)
222 | real-offset (+ g-offset offset)]]
223 | (do
224 | (b/chunk! dblob dst real-block real-offset payload)
225 | (notifier :chunk)
226 | (let [pos (.position payload)
227 | sz (.remaining payload)
228 | ba (byte-array sz)]
229 | (.get payload ba)
230 | (.position payload pos)
231 | (u/md5-update hash ba 0 sz)
232 | (+ real-offset sz)))))))
233 |
234 |
235 | (defn stream-copy-part
236 | "Copy a single part to a destination"
237 | [notifier dst [offset hash] part]
238 | (let [sblob (d/blobstore part)
239 | blocks (b/blocks sblob part)]
240 |
241 | (debug "streaming part: " (d/part part))
242 | [(reduce (partial stream-copy-part-block notifier dst hash part)
243 | offset blocks)
244 | hash]))
245 |
246 | (defn stream-copy-parts
247 | "Given a list of parts, stream their content to a destination inode"
248 | [parts dst notifier]
249 | (let [dblob (d/blobstore dst)
250 | [size hash] (reduce (partial stream-copy-part notifier dst)
251 | [0 (u/md5-init)] parts)]
252 | (d/col! dst :size size)
253 | (d/col! dst :checksum (u/md5-sum hash))
254 | (debug "stored size:" size "and checksum: " (u/md5-sum hash))
255 | dst))
256 |
--------------------------------------------------------------------------------
/doc/source/clients.rst:
--------------------------------------------------------------------------------
1 | Client compatibility list
2 | =========================
3 |
4 | This section needs your help
5 |
6 | s3cmd
7 | -----
8 |
9 | Fully tested with the current API coverage. Here is a minimal
10 | configuration you can put in ``~/.s3cfg``::
11 |
12 | [default]
13 | host_base = s3.example.com
14 | host_bucket = %(bucket)s.s3.example.com
15 | access_key = YOUR_ACCESS_KEY
16 | secret_key = YOUR_SECRET_KEY
17 | use_https = True
18 | signature_v2 = True
19 |
20 | Adapt with your credentials and replace ``s3.example.com`` with the
21 | value you specified for ``service-uri``. ``use_https`` is needed only
22 | if Pithos is served over TLS. Currently pithos doesn't support v4
23 | signatures so the ``signature_v2`` flag is necessary.
24 |
25 | When testing locally, the following configuration can be used::
26 |
27 | [default]
28 | host_base = s3.example.com
29 | host_bucket = %(bucket)s.s3.example.com
30 | access_key = YOUR_ACCESS_KEY
31 | secret_key = YOUR_SECRET_KEY
32 | use_https = False
33 | signature_v2 = True
34 | proxy_host = localhost
35 | proxy_port = 8080
36 |
37 |
38 | libcloud
39 | --------
40 |
41 | Working support with the S3 provider::
42 |
43 | from libcloud.storage.types import Provider
44 | from libcloud.storage.providers import get_driver
45 | cls = get_driver(Provider.S3)
46 | driver = cls('api key', 'api secret key', host='s3.example.com')
47 | driver.list_containers()
48 |
49 | rclone
50 | --------
51 |
52 | Working support with the S3 provider::
53 |
54 | RCLONE_CONFIG__TYPE=s3
55 | RCLONE_CONFIG__ACCESS_KEY_ID=YOUR_ACCESS_KEY
56 | RCLONE_CONFIG__SECRET_ACCESS_KEY=YOUR_SECRET_KEY
57 | RCLONE_CONFIG__REGION=other-v2-signature
58 | RCLONE_CONFIG__ENDPOINT=s3.example.com
59 | RCLONE_CONFIG__ACL=private
60 |
61 | Ansible
62 | -------
63 |
64 | Sample task configuration to list a bucket using the signature V2 by using the
65 | scheme ``fakes3`` *(= http)* or ``fakes3s`` *(= https)*::
66 |
67 | - name: List bucket content
68 | aws_s3:
69 | s3_url: "fakes3s://s3.example.com"
70 | bucket: "my_bucket"
71 | mode: list
72 | register: my_bucket_content
73 |
74 | cyberduck
75 | ---------
76 |
77 | On-going integration
78 |
79 | owncloud
80 | --------
81 |
82 | Working support
83 |
84 | s3fs - s3 fuse support
85 | ----------------------
86 |
87 | Working support. If you specified ``s3.example.com`` as
88 | ``service-uri``, you can mount the bucket ``bucket`` with the
89 | following command::
90 |
91 | s3fs bucket /mnt/bucket -o url=https://s3.example.com
92 |
93 | The credentials have to be specified in ``~/.passwd-s3fs``::
94 |
95 | YOUR_ACCESS_KEY:YOUR_SECRET_KEY
96 |
97 | WAL-E - continuous archiving for Postgres
98 | -----------------------------------------
99 |
100 | Support for S3-compatible object stores was added in version 0.8 of WAL-E.
101 | Configure WAL-E with the following environment variables:
102 |
103 | ===================== ============================
104 | AWS_ACCESS_KEY_ID YOUR_ACCESS_KEY
105 | AWS_SECRET_ACCESS_KEY YOUR_SECRET_KEY
106 | WALE_S3_ENDPOINT https+path://s3.example.com
107 | WALE_S3_PREFIX s3://your-bucket/your-prefix
108 | ===================== ============================
109 |
110 | Archiving WAL files
111 | ```````````````````
112 |
113 | Postgresql needs the following settings in ``postresql.conf``::
114 |
115 | wal_level = archive
116 | archive_mode = on
117 | archive_command = 'envdir /etc/wal-e.d/env /path/to/wal-e wal-push %p'
118 | archive_timeout = 60
119 |
120 | Once postgres is setup to send WAL files, make a base backup with ``envdir
121 | /etc/wal-e.d/env /path/to/wal-e backup-push /path/to/postgres/data``
122 |
123 | Restoring from archived WAL files
124 | `````````````````````````````````
125 |
126 | Pull a base backup::
127 |
128 | envdir /etc/wal-e.d/env /path/to/wal-e backup-fetch /path/to/postgres/data LATEST
129 |
130 | Create a ``recovery.conf`` file in the postgres data dir with the following
131 | content::
132 |
133 | restore_command = 'envdir /etc/wal-e.d/env /path/to/wal-e wal-fetch "%f" "%p"'
134 |
135 | Start postgresql and check the logs to see its restore status.
136 |
137 | elasticsearch - index backup and restore
138 | ----------------------------------------
139 |
140 | Snapshotting and restoring indices to Pithos is supported thanks to the `AWS
141 | Cloud Plugin`_. To configure a snapshot repository that points to your pithos
142 | installation, simply add to your ``/etc/elasticsearch/elasticsearch.yml``:
143 |
144 | .. code-block:: yaml
145 |
146 | cloud:
147 | aws:
148 | access_key:
149 | secret_key:
150 | s3:
151 | protocol: https
152 | endpoint: s3.example.com
153 |
154 | Then create your repository::
155 |
156 | $ curl -XPUT 'http://localhost:9200/_snapshot/pithos' -d '{
157 | "type": "s3",
158 | "settings": {
159 | "bucket": "es-snapshots"
160 | }
161 | }'
162 |
163 | Starting with version 2.4.2 of the plugin, all settings can be provided
164 | per-repository::
165 |
166 | $ curl -XPUT 'http://localhost:9200/_snapshot/pithos' -d '{
167 | "type": "s3",
168 | "settings": {
169 | "bucket": "es-snapshots",
170 | "access_key": "your key",
171 | "secret_key": "your secret",
172 | "protocol": "http",
173 | "endpoint": "s3.example.com",
174 | }
175 | }'
176 |
177 | .. _AWS Cloud Plugin: https://github.com/elasticsearch/elasticsearch-cloud-aws
178 |
179 | AWS Languages SDKs
180 | ------------------
181 |
182 | In general, AWS Language SDKs can work with Pithos with the following
183 | configuration:
184 |
185 | * In ``~/.aws/config``::
186 |
187 | [default]
188 | s3 =
189 | signature_version = s3
190 |
191 | * In ``~/.aws/credentials``::
192 |
193 | [default]
194 | aws_access_key_id =
195 | aws_secret_access_key =
196 |
197 | You can have multiple profiles instead of altering the ``[default]``
198 | configuration. Simply repeat configuration sections and name them ``[profile
199 | ]``
200 |
201 | Shell (awscli)
202 | ``````````````
203 |
204 | Install `awscli`_, then::
205 |
206 | aws s3 ls --endpoint-url=https://your-endpoint
207 |
208 | To use a non-default profile::
209 |
210 | aws s3 ls --endpoint-url=https://your-endpoint --profile=
211 |
212 | Python (boto3)
213 | ``````````````
214 |
215 | Install `boto3`_ and create a Pithos client like this:
216 |
217 | .. code-block:: python
218 |
219 | import boto3.session
220 |
221 | session = boto3.session.Session()
222 | client = session.client('s3', endpoint_url='https://pithos-endpoint')
223 | client.list_buckets()
224 |
225 | To use a non-default profile:
226 |
227 | .. code-block:: python
228 |
229 | import boto3.session
230 | session = boto3.session.Session(profile_name='profile-name')
231 | client = session.client('s3', endpoint_url='https://pithos-endpoint')
232 |
233 | Python (boto)
234 | `````````````
235 |
236 | `Boto`_ version 2 is boto3's ancestor but is still widely used. It doesn't
237 | take ``~/.aws/*`` configuration files into account.
238 |
239 | .. code-block:: python
240 |
241 | from boto.s3.connection import S3Connection, OrdinaryCallingFormat
242 |
243 | connection = S3Connection(key, secret, host='pithos-endpoint',
244 | port=443, is_secure=True,
245 | calling_format=OrdinaryCallingFormat())
246 | bucket = connection.get_bucket('your-bucket')
247 |
248 | .NET
249 | ````
250 |
251 | Install `AWSSDK.S3`_, then:
252 |
253 | .. code-block:: csharp
254 |
255 | Amazon.AWSConfigsS3.UseSignatureVersion4 = false;
256 | var config = new Amazon.S3.AmazonS3Config()
257 | {
258 | ServiceURL = host,
259 | SignatureVersion = "s3",
260 | };
261 | var client = new Amazon.S3.AmazonS3Client(apikey, secretKey, config);
262 |
263 | Java
264 | ````
265 |
266 | Install `AWS SDK for Java`_, then:
267 |
268 | .. code-block:: java
269 |
270 | // works with the latest (last confirmed version: 1.11.123) AWS Java SDK
271 |
272 | import com.amazonaws.ClientConfiguration;
273 | import com.amazonaws.services.s3.AmazonS3Client;
274 |
275 | ClientConfiguration config = new ClientConfiguration();
276 | config.setSignerOverride("S3SignerType");
277 |
278 | AmazonS3Client s3 = new AmazonS3Client(config);
279 | s3.setEndpoint("https://your-endpoint");
280 |
281 |
282 | // You can eliminate the credentials file by instead passing in (or reading from your own config file)
283 | // credentials as below:
284 | // AWSCredentials credentials = new BasicAWSCredentials("AKIAIOSFODNN7EXAMPLE",
285 | // "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
286 | // AmazonS3 s3 = new AmazonS3Client(credentials, config);
287 |
288 | // if your endpoint is hosted on a non standard port for example,
289 | // s3.setEndpoint("http://your-endpoint:8081");
290 | // then your pithos.yaml server-uri should also include the port for example:
291 | // server-uri : your-endpoint:8081
292 |
293 | PHP
294 | ```
295 |
296 | Install `PHP AWS SDK`_ - important: Only Version2 is suitable (Version 3 only supports SignatureVersion 4, which is not yet implemented). After install, use something like this:
297 |
298 | .. code-block:: php
299 |
300 | // connect
301 | $s3Client=Aws\S3\S3Client::factory([
302 | 'base_url'=>'https://your-endpoint.com',
303 | 'key'=>'your-key',
304 | 'secret'=>'your-secret',
305 | 'region'=>'region', // must be filled with something, even if you have no regions
306 | ]);
307 |
308 | // list all files in bucket
309 | $iterator = $s3Client->getIterator('ListObjects', array(
310 | 'Bucket' => $bucket,
311 | 'Prefix' => 'foo'
312 | ));
313 |
314 | foreach ($iterator as $object) {
315 | echo $object['Key'] . "\n";
316 | }
317 |
318 | .. _awscli: https://aws.amazon.com/cli/
319 | .. _boto3: https://boto3.readthedocs.io/en/latest/
320 | .. _Boto: http://boto.cloudhackers.com/en/latest/
321 | .. _AWSSDK.S3: https://www.nuget.org/packages/AWSSDK.S3/
322 | .. _AWS SDK for Java: https://aws.amazon.com/sdk-for-java/
323 | .. _PHP AWS SDK: http://docs.aws.amazon.com/aws-sdk-php/v2/guide/installation.html
324 |
325 |
326 |
--------------------------------------------------------------------------------
/src/io/pithos/request.clj:
--------------------------------------------------------------------------------
1 | (ns io.pithos.request
2 | "This namespace provides all necessary wrapper functions to validate and
3 | augment the incoming request map."
4 | (:require [clojure.string :refer [lower-case join starts-with?]]
5 | [clojure.tools.logging :refer [debug info warn error]]
6 | [clojure.pprint :refer [pprint]]
7 | [clojure.java.io :as io]
8 | [io.pithos.sig :refer [validate check-sig anonymous]]
9 | [io.pithos.sig4 :refer [validate4 sha256-input-stream]]
10 | [io.pithos.system :refer [service-uri keystore]]
11 | [io.pithos.util :refer [string->pattern uri-decode]]
12 | [clout.core :as c]
13 | [ring.middleware.multipart-params :as mp]
14 | [ring.util.request :as req]
15 | [ring.util.codec :as codec]
16 | [clojure.data.codec.base64 :as base64]
17 | [cheshire.core :as json]
18 | [qbits.alia.uuid :as uuid])
19 | (:import [java.io ByteArrayInputStream]
20 | [java.io ByteArrayOutputStream]))
21 |
22 | (def known
23 | "known query args"
24 | #{"acl"
25 | "awsaccesskeyid"
26 | "cors"
27 | "delete"
28 | "delimiter"
29 | "expires"
30 | "file"
31 | "key"
32 | "lifecycle"
33 | "location"
34 | "logging"
35 | "marker"
36 | "max-keys"
37 | "notification"
38 | "partnumber"
39 | "policy"
40 | "prefix"
41 | "requestpayment"
42 | "response-cache-control"
43 | "response-content-type"
44 | "response-content-disposition"
45 | "response-content-encoding"
46 | "response-content-language"
47 | "response-expires"
48 | "restore"
49 | "signature"
50 | "success_action_redirect"
51 | "success_action_status"
52 | "tagging"
53 | "uploadid"
54 | "uploads"
55 | "versionid"
56 | "versioning"
57 | "versions"
58 | "website"})
59 |
60 | (def actions
61 | "known actions"
62 | #{:acl
63 | :cors
64 | :delete
65 | :lifecycle
66 | :location
67 | :logging
68 | :notification
69 | :policy
70 | :requestpayment
71 | :restore
72 | :tagging
73 | :uploads
74 | :uploadid
75 | :versioning
76 | :versions
77 | :website})
78 |
79 | (def subresources
80 | "known subresources, used when signing"
81 | {:acl "acl"
82 | :cors "cors"
83 | :delete "delete"
84 | :lifecycle "lifecycle"
85 | :location "location"
86 | :logging "logging"
87 | :notification "notification"
88 | :partnumber "partNumber"
89 | :policy "policy"
90 | :response-content-disposition "response-content-disposition"
91 | :response-content-type "response-content-type"
92 | :response-content-encoding "response-content-encoding"
93 | :response-content-language "response-content-language"
94 | :response-cache-control "response-cache-control"
95 | :response-expires "response-expires"
96 | :requestpayment "requestPayment"
97 | :tagging "tagging"
98 | :torrent "torrent"
99 | :uploadid "uploadId"
100 | :uploads "uploads"
101 | :versionid "versionId"
102 | :versioning "versioning"
103 | :versions "versions"
104 | :website "website"})
105 |
106 | (defn action-routes
107 | "Really simple router, extracts target (service, bucket or object)"
108 | []
109 | (let [sroute (c/route-compile "/")
110 | broute1 (c/route-compile "/:bucket")
111 | broute2 (c/route-compile "/:bucket/")
112 | oroute (c/route-compile "/:bucket/*")]
113 | [[:service (partial c/route-matches sroute)]
114 | [:bucket (partial c/route-matches broute1)]
115 | [:bucket (partial c/route-matches broute2)]
116 | [:object (partial c/route-matches oroute)]]))
117 |
118 | (defn match-action-route
119 | "Matches incoming route and yields target bucket and object"
120 | [request [target matcher]]
121 | (when-let [{bucket :bucket object :*} (matcher request)]
122 | {:target target :bucket (uri-decode bucket) :object (uri-decode object)}))
123 |
124 | (defn yield-assoc-target
125 | "closure which for each incoming request will assoc target, bucket
126 | abnd object"
127 | []
128 | (let [routes (action-routes)]
129 | (fn [request]
130 | (merge request
131 | (or (some (partial match-action-route request) routes)
132 | {:target :unknown})))))
133 |
134 | (defn yield-assoc-operation
135 | "Closure which will build an operation keyword based on the incoming
136 | request. This is the bulk of the routing in pithos. This becomes necessary
137 | because S3's behavior varies based on the route, but also based on query
138 | arguments.
139 |
140 | `action-params` holds query args which are relevant and need to be taken
141 | into account, when found, it will be part of the operation name."
142 | [suffixes]
143 | (fn [{:keys [uri request-method action-params target params] :as request}]
144 | (let [suffix (some suffixes action-params)
145 | getpair (fn [[k v]] (if (and v (seq v)) (str k "=" v) k))
146 | append (some->> (filter (comp subresources key) params)
147 | (map (juxt (comp subresources first) second))
148 | (sort-by first)
149 | (map getpair)
150 | (seq)
151 | (join "&")
152 | ((partial str "?")))]
153 | (assoc request
154 | :sign-uri (str uri append)
155 | :action (when suffix (name suffix))
156 | :operation (->> (map name (if suffix
157 | [request-method target suffix]
158 | [request-method target]))
159 | (join "-")
160 | (keyword))))))
161 |
162 | (defn keywordized
163 | "Yield a map where string keys are keywordized"
164 | [params]
165 | (dissoc
166 | (->> (map (juxt (comp keyword known lower-case key) val) params)
167 | (reduce merge {}))
168 | nil))
169 |
170 | (defn insert-id
171 | "Assoc a random UUID to a request"
172 | [req]
173 | (assoc req :reqid (uuid/random)))
174 |
175 | (defn assoc-orig-uri
176 | "Assoc a random UUID to a request"
177 | [req]
178 | (assoc req :orig-uri (get req :uri)))
179 |
180 | (defn protect-body-stream [request]
181 | (let [headers (get request :headers)]
182 | (if (and (contains? headers "x-amz-content-sha256") (not= (get headers "x-amz-content-sha256") "UNSIGNED-PAYLOAD"))
183 | (assoc request :body (sha256-input-stream (get request :body) (get headers "x-amz-content-sha256"))))
184 | request))
185 |
186 | (defn assoc-params
187 | "Parse, keywordize and store query arguments"
188 | [{:keys [query-string] :as req}]
189 | (or
190 | (when-let [params (and (seq query-string)
191 | (codec/form-decode query-string))]
192 | (as-> req req
193 | (assoc req :params (keywordized
194 | (cond (map? params) params
195 | (string? params) {params nil}
196 | :else {})))
197 | (assoc req :action-params
198 | (set (filter actions (-> req :params keys))))))
199 | (assoc req :params {} :action-params #{})))
200 |
201 | (defn rewrite-host
202 | "Discard host from URI"
203 | [{:keys [uri] :as request}]
204 | (if-let [[_ trail] (re-find #"^https?://[^/]+/?(.*)" uri)]
205 | (assoc request :uri (str "/" trail))
206 | request))
207 |
208 | (defn yield-rewrite-bucket
209 | "Move from a vhost based access method to a full resource access path"
210 | [service-uri]
211 | (let [pattern-str (str "^(.*)\\." (string->pattern service-uri) "$")
212 | pattern (re-pattern pattern-str)
213 | transformer (fn [bucket uri] (str "/" bucket (if (seq uri) uri "/")))]
214 | (fn [{:keys [uri] {:strs [host] :or {host ""}} :headers :as request}]
215 | (if-let [[_ bucket] (re-find pattern host)]
216 | (assoc request :uri (transformer bucket uri))
217 | request))))
218 |
219 | (defn authenticate
220 | "Authenticate tenant, allow masquerading only for _master_ keys"
221 | [{:keys [multipart-params request-method sign-uri] :as req} system]
222 |
223 | (cond
224 |
225 | (= request-method :options)
226 | (assoc req :authorization anonymous)
227 |
228 | (and (= request-method :post) (seq multipart-params))
229 | (let [{:keys [signature awsaccesskeyid policy]} multipart-params
230 | [_ bucket] (re-find #"^/[^/]*(/.*)?$" sign-uri)
231 | auth (check-sig req (keystore system) awsaccesskeyid policy signature)]
232 | (assoc req
233 | :post-upload? true
234 | :authorization auth
235 | :policy (json/parse-string (String. (-> policy
236 | .getBytes
237 | base64/decode))
238 | true)))
239 | (and (contains? (get req :headers) "authorization") (starts-with? (get (get req :headers) "authorization") "AWS4-"))
240 | (assoc req :authorization (validate4 (keystore system) req))
241 | :else
242 | (let [auth (validate (keystore system) req)
243 | master (:master auth)
244 | tenant (get-in req [:headers "x-amz-masquerade-tenant"])]
245 | (assoc req :authorization
246 | (if (and master tenant) (assoc auth :tenant tenant) auth)))))
247 |
248 | (defn decode-uri
249 | [req]
250 | (update-in req [:uri] uri-decode))
251 |
252 | (defn multipart-params
253 | [req]
254 | (if (= (req/content-type req) "multipart/form-data")
255 | (let [make-input-stream #(when % (java.io.FileInputStream. %))]
256 | (-> (mp/multipart-params-request req)
257 | (update-in [:params] #(reduce merge {} (filter (comp keyword? key) %)))
258 | (update-in [:multipart-params] keywordized)
259 | (update-in [:multipart-params :file :tempfile] make-input-stream)))
260 | req))
261 |
262 | (defn prepare
263 | "Generate closures and walks each requests through wrappers."
264 | [req system]
265 | (let [service-uri (service-uri system)
266 | rewrite-bucket (yield-rewrite-bucket service-uri)
267 | assoc-target (yield-assoc-target)
268 | assoc-operation (yield-assoc-operation actions)]
269 |
270 | (-> req
271 | (insert-id)
272 | (assoc-orig-uri)
273 | (assoc-params)
274 | (protect-body-stream)
275 | (rewrite-host)
276 | (rewrite-bucket)
277 |
278 | (assoc-target)
279 | (assoc-operation)
280 |
281 | (multipart-params)
282 | (authenticate system)
283 | (decode-uri))))
284 |
285 | (defn safe-prepare
286 | "Wrap prepare in a try-catch block"
287 | [req system]
288 | (try (prepare req system)
289 | (catch Exception e
290 | (debug e "unhandled exception during request preparation")
291 | (insert-id
292 | {:operation :error :exception e}))))
293 |
--------------------------------------------------------------------------------