├── .lein-classpath ├── test ├── .gitignore ├── data │ ├── acl2.xml │ ├── cors1.xml │ ├── acl3.xml │ ├── acl1.xml │ ├── acl5.xml │ ├── form-upload1.txt │ ├── form-upload2.txt │ └── acl4.xml └── io │ └── pithos │ ├── reporter_test.clj │ ├── cors_test.clj │ ├── acl_test.clj │ ├── util_test.clj │ ├── sig_test.clj │ └── meta_test.clj ├── vagrant ├── .gitignore ├── manifests │ └── store.pp ├── common.yaml ├── modules │ ├── cassandra │ │ ├── templates │ │ │ ├── cassandra.env.erb │ │ │ └── cassandra.yaml.erb │ │ └── manifests │ │ │ └── init.pp │ ├── base │ │ └── manifests │ │ │ └── init.pp │ └── pithos │ │ └── manifests │ │ └── init.pp ├── hiera.yaml └── Vagrantfile ├── doc ├── .gitignore ├── s3cmd.cfg ├── pithos.yaml ├── source │ ├── index.rst │ ├── quickstart.rst │ ├── developer.rst │ ├── concepts.rst │ └── clients.rst └── Makefile ├── .dockerignore ├── pkg ├── deb │ ├── postrm.sh │ ├── prerm.sh │ ├── pithos.default │ ├── pithos │ ├── preinst.sh │ ├── postinst.sh │ └── init.sh ├── rpm │ ├── prerm.sh │ ├── pithos-default │ ├── pithos │ ├── postinst.sh │ └── init.sh └── tar │ └── pithos ├── .travis.yml ├── docker ├── pithos │ ├── pithos.yaml.toml │ ├── docker-entrypoint.sh │ ├── pithos.yaml.tmpl │ └── Dockerfile └── nginx │ ├── Dockerfile │ ├── entrypoint.sh │ └── nginx.conf ├── .gitignore ├── src └── io │ ├── pithos │ ├── reporter.clj │ ├── keystore.clj │ ├── system.clj │ ├── schema.clj │ ├── api.clj │ ├── response.clj │ ├── store.clj │ ├── acl.clj │ ├── sig.clj │ ├── util.clj │ ├── cors.clj │ ├── bucket.clj │ ├── config.clj │ ├── blob.clj │ ├── desc.clj │ ├── perms.clj │ ├── sig4.clj │ ├── stream.clj │ └── request.clj │ └── pithos.clj ├── resources └── logback.xml ├── LICENSE ├── docker-compose.yml ├── project.clj ├── README.md └── tasks └── leiningen ├── tar.clj ├── fatdeb.clj └── fatrpm.clj /.lein-classpath: -------------------------------------------------------------------------------- 1 | :tasks 2 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | shell/ 2 | -------------------------------------------------------------------------------- /test/data/acl2.xml: -------------------------------------------------------------------------------- 1 | foo fa fi 2 | -------------------------------------------------------------------------------- /vagrant/.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant 2 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | *.png 2 | *.html 3 | -------------------------------------------------------------------------------- /vagrant/manifests/store.pp: -------------------------------------------------------------------------------- 1 | include cassandra 2 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git/ 2 | .m2/ 3 | pithos-quickstart/ 4 | target/ 5 | -------------------------------------------------------------------------------- /pkg/deb/postrm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | if [ "$1" = "purge" ] ; then 4 | update-rc.d pithos remove >/dev/null 5 | fi 6 | -------------------------------------------------------------------------------- /pkg/rpm/prerm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | if [ -x "/etc/init.d/pithos" ]; then 4 | service pithos stop || exit $? 5 | fi 6 | -------------------------------------------------------------------------------- /vagrant/common.yaml: -------------------------------------------------------------------------------- 1 | motd: 'puppet managed host with hiera support' 2 | cassandra_heap_size: "8G" 3 | cassandra_heap_new: "800m" 4 | -------------------------------------------------------------------------------- /pkg/deb/prerm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | if [ -x "/etc/init.d/pithos" ]; then 4 | invoke-rc.d pithos stop || exit $? 5 | fi 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: clojure 3 | lein: lein 4 | jdk: 5 | - oraclejdk8 6 | branches: 7 | except: 8 | - gh-pages 9 | -------------------------------------------------------------------------------- /vagrant/modules/cassandra/templates/cassandra.env.erb: -------------------------------------------------------------------------------- 1 | MAX_HEAP_SIZE="<%= @cassandra_heap_size %>" 2 | HEAP_NEWSIZE="<%= @cassandra_heap_new %>" 3 | -------------------------------------------------------------------------------- /vagrant/hiera.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | :backend: 3 | - yaml 4 | :hierarchy: 5 | - '%{hostname}' 6 | - common 7 | :yaml: 8 | :datadir: '/vagrant/configstore' -------------------------------------------------------------------------------- /pkg/deb/pithos.default: -------------------------------------------------------------------------------- 1 | # Optionally add classes to the classpath for additional functionality 2 | # EXTRA_CLASSPATH= 3 | 4 | # Optional JAVA_OPTS 5 | # EXTRA_JAVA_OPTS= 6 | -------------------------------------------------------------------------------- /pkg/rpm/pithos-default: -------------------------------------------------------------------------------- 1 | # Optionally add classes to the classpath for additional functionality 2 | # EXTRA_CLASSPATH= 3 | 4 | # Optional JAVA_OPTS 5 | # EXTRA_JAVA_OPTS= 6 | -------------------------------------------------------------------------------- /docker/pithos/pithos.yaml.toml: -------------------------------------------------------------------------------- 1 | [template] 2 | src = "pithos.yaml.tmpl" 3 | dest = "/etc/pithos/pithos.yaml" 4 | keys = [ 5 | "/pithos/service/uri", 6 | "/pithos/cassandra/host", 7 | ] 8 | -------------------------------------------------------------------------------- /vagrant/modules/base/manifests/init.pp: -------------------------------------------------------------------------------- 1 | class base { 2 | 3 | $motd_content = hiera('motd', "no motd set") 4 | 5 | file { '/etc/motd': 6 | content => "${motd_content}\n" 7 | } 8 | 9 | } 10 | -------------------------------------------------------------------------------- /pkg/deb/pithos: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -f /etc/default/pithos ]; then 4 | . /etc/default/pithos 5 | fi 6 | 7 | JAR="$EXTRA_CLASSPATH:/usr/lib/pithos/pithos.jar" 8 | CONFIG="/etc/pithos/pithos.yaml" 9 | 10 | exec java $EXTRA_JAVA_OPTS $OPTS -cp "$JAR" io.pithos -f "$CONFIG" $* 11 | -------------------------------------------------------------------------------- /pkg/rpm/pithos: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -f /etc/default/pithos ]; then 4 | . /etc/default/pithos 5 | fi 6 | 7 | JAR="$EXTRA_CLASSPATH:/usr/lib/pithos/pithos.jar" 8 | CONFIG="/etc/pithos/pithos.yaml" 9 | 10 | exec java $EXTRA_JAVA_OPTS $OPTS -cp "$JAR" io.pithos -f "$CONFIG" $* 11 | -------------------------------------------------------------------------------- /pkg/tar/pithos: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -f /etc/default/pithos ]; then 4 | . /etc/default/pithos 5 | fi 6 | 7 | JAR="$EXTRA_CLASSPATH:/usr/lib/pithos/pithos.jar" 8 | CONFIG="/etc/pithos/pithos.yaml" 9 | 10 | exec java $EXTRA_JAVA_OPTS $OPTS -cp "$JAR" io.pithos -f "$CONFIG" $* 11 | -------------------------------------------------------------------------------- /docker/nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx:latest 2 | RUN apt-get update && apt-get install -y openssl 3 | RUN mkdir -p /etc/nginx/external 4 | ADD docker/nginx/nginx.conf /etc/nginx/nginx.conf 5 | ADD docker/nginx/entrypoint.sh /docker-entrypoint 6 | ENTRYPOINT ["/docker-entrypoint"] 7 | CMD ["nginx"] 8 | -------------------------------------------------------------------------------- /test/data/cors1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | http://*.example.com 4 | GET 5 | * 6 | 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .nrepl-port 2 | hs_err_pid* 3 | /doc/build 4 | /site 5 | /target 6 | /lib 7 | /docs 8 | /classes 9 | /checkouts 10 | pom.xml 11 | pom.xml.asc 12 | *.jar 13 | *.class 14 | .lein-deps-sum 15 | .lein-failures 16 | .lein-plugins 17 | .lein-repl-history 18 | .m2 19 | .sass-cache 20 | _site 21 | img 22 | node_modules 23 | -------------------------------------------------------------------------------- /doc/s3cmd.cfg: -------------------------------------------------------------------------------- 1 | [default] 2 | host_base = s3.example.com 3 | host_bucket = %(bucket)s.s3.example.com 4 | access_key = AKIAIOSFODNN7EXAMPLE 5 | secret_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY 6 | signature_v2 = True 7 | # Remove those lines when not running locally: 8 | use_https = False 9 | proxy_host = localhost 10 | proxy_port = 8080 11 | -------------------------------------------------------------------------------- /pkg/deb/preinst.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Create pithos user and group 3 | set -e 4 | 5 | USERNAME="pithos" 6 | GROUPNAME="pithos" 7 | getent group "$GROUPNAME" >/dev/null || groupadd -r "$GROUPNAME" 8 | getent passwd "$USERNAME" >/dev/null || \ 9 | useradd -r -g "$GROUPNAME" -d /usr/lib/pithos -s /bin/false \ 10 | -c "Pithos object store" "$USERNAME" 11 | exit 0 12 | -------------------------------------------------------------------------------- /pkg/rpm/postinst.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Fakeroot and lein don't get along, so we set ownership after the fact. 3 | set -e 4 | 5 | chown -R root:root /usr/lib/pithos 6 | chown root:root /usr/bin/pithos 7 | chown pithos:pithos /var/log/pithos 8 | chown pithos:pithos /etc/pithos/pithos.yaml 9 | chown root:root /etc/init.d/pithos 10 | 11 | if [ -x "/etc/init.d/pithos" ]; then 12 | service pithos start || exit $? 13 | fi 14 | -------------------------------------------------------------------------------- /pkg/deb/postinst.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Fakeroot and lein don't get along, so we set ownership after the fact. 3 | set -e 4 | 5 | chown -R root:root /usr/lib/pithos 6 | chown root:root /usr/bin/pithos 7 | chown pithos:pithos /var/log/pithos 8 | chown pithos:pithos /etc/pithos/pithos.yaml 9 | chown root:root /etc/init.d/pithos 10 | 11 | if [ -x "/etc/init.d/pithos" ]; then 12 | invoke-rc.d pithos start || exit $? 13 | fi 14 | -------------------------------------------------------------------------------- /src/io/pithos/reporter.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.reporter 2 | (:require [clojure.tools.logging :refer [log]])) 3 | 4 | (defprotocol Reporter 5 | (report! [this event])) 6 | 7 | (defn logging-reporter 8 | [{:keys [level]}] 9 | (reify Reporter 10 | (report! [_ event] 11 | (log (keyword level) (pr-str event))))) 12 | 13 | (defn report-all! 14 | [reporters event] 15 | (doseq [reporter reporters] 16 | (report! reporter event))) 17 | -------------------------------------------------------------------------------- /docker/pithos/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -eux 4 | 5 | export PITHOS_CASSANDRA_HOST=${PITHOS_CASSANDRA_HOST:-cassandra} 6 | export PITHOS_SERVICE_URI=${PITHOS_SERVICE_URI:-s3.example.com} 7 | 8 | confd -onetime -backend env 9 | 10 | # wait for cassandra being ready 11 | until nc -z -w 2 $PITHOS_CASSANDRA_HOST 9042; do sleep 1; done 12 | 13 | java -jar /pithos-standalone.jar -a install-schema || true 14 | 15 | exec java -jar /pithos-standalone.jar -a api-run 16 | -------------------------------------------------------------------------------- /resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /test/data/acl3.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | foo 5 | bar 6 | 7 | 8 | 9 | FULL_CONTROL 10 | 11 | foo 12 | bar 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/io/pithos/keystore.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.keystore 2 | "A keystore is a simple protocol which yields a map 3 | of tenant details for a key id. 4 | 5 | The basic implementation wants keys from the configuration 6 | file, you'll likely want to use a custom implementation that 7 | interacts with your user-base here. 8 | ") 9 | 10 | (defn map-keystore [{:keys [keys]}] 11 | "Wrap a map, translating looked-up keys to keywords." 12 | (reify 13 | clojure.lang.ILookup 14 | (valAt [this id] 15 | (get keys (keyword id))))) 16 | -------------------------------------------------------------------------------- /vagrant/modules/pithos/manifests/init.pp: -------------------------------------------------------------------------------- 1 | class cassandra { 2 | 3 | $keyid = 'F758CE318D77295D' 4 | 5 | exec { 'cassandra-recv-keys': 6 | command => "gpg --keyserver pgp.mit.edu --recv-keys ${keyid} && gpg --export --armor ${keyid} | apt-key add - && apt-get update", 7 | user => 'root', 8 | group => 'root', 9 | path => "/bin:/usr/bin:/sbin:/usr/sbin", 10 | unless =>"apt-key list | grep ${keyid}", 11 | } 12 | 13 | package { 'cassandra': 14 | ensure => latest, 15 | require => Exec['cassandra-recv-keys'] 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /test/data/acl1.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | foo 4 | bar 5 | 6 | 7 | 8 | 9 | foo 10 | bar 11 | 12 | FULL_CONTROL 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2014 exoscale(tm) 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. -------------------------------------------------------------------------------- /test/data/acl5.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | foo 4 | bar 5 | 6 | 7 | 8 | 9 | http://acs.amazonaws.com/groups/global/AllUsers 10 | anonymous 11 | 12 | READ 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /test/io/pithos/reporter_test.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.reporter-test 2 | (:require [clojure.test :refer :all] 3 | [io.pithos.reporter :refer [report! report-all! Reporter]])) 4 | 5 | (defn atom-reporter 6 | [] 7 | (let [contents (atom nil)] 8 | [contents 9 | (reify Reporter 10 | (report! [_ e] 11 | (swap! contents conj e)))])) 12 | 13 | (deftest reporter-test 14 | 15 | (let [[contents r] (atom-reporter)] 16 | (report! r :foo) 17 | (report! r :bar) 18 | (report! r :baz) 19 | 20 | (testing "simple inserts" 21 | (is (= [:baz :bar :foo] @contents))))) 22 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.3' 2 | 3 | volumes: 4 | cassandra: {} 5 | nginx_tls: {} 6 | 7 | services: 8 | cassandra: 9 | image: cassandra:2.1 10 | volumes: 11 | - cassandra:/var/lib/cassandra 12 | 13 | pithos: 14 | build: 15 | context: . 16 | dockerfile: docker/pithos/Dockerfile 17 | depends_on: 18 | - cassandra 19 | 20 | nginx-proxy: 21 | build: 22 | context: . 23 | dockerfile: docker/nginx/Dockerfile 24 | volumes: 25 | - nginx_tls:/etc/nginx/external/ 26 | depends_on: 27 | - pithos 28 | ports: 29 | - "0.0.0.0:80:80" 30 | - "0.0.0.0:443:443" 31 | -------------------------------------------------------------------------------- /docker/nginx/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z ${DH_SIZE+x} ] 4 | then 5 | >&2 echo ">> no \$DH_SIZE specified using default" 6 | DH_SIZE="2048" 7 | fi 8 | 9 | DH="/etc/nginx/external/dh.pem" 10 | 11 | if [ ! -e "$DH" ] 12 | then 13 | echo ">> generating $DH with size: $DH_SIZE" 14 | openssl dhparam -out "$DH" $DH_SIZE 15 | fi 16 | 17 | if [ ! -e "/etc/nginx/external/cert.pem" ] || [ ! -e "/etc/nginx/external/key.pem" ] 18 | then 19 | echo ">> generating self signed cert" 20 | openssl req -x509 -newkey rsa:4086 \ 21 | -subj "/C=XX/ST=XXXX/L=XXXX/O=XXXX/CN=localhost" \ 22 | -keyout "/etc/nginx/external/key.pem" \ 23 | -out "/etc/nginx/external/cert.pem" \ 24 | -days 3650 -nodes -sha256 25 | fi 26 | 27 | echo "$@" 28 | exec "$@" 29 | -------------------------------------------------------------------------------- /src/io/pithos/system.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.system) 2 | 3 | (defprotocol SystemDescriptor 4 | (regions [this]) 5 | (bucketstore [this]) 6 | (keystore [this]) 7 | (reporters [this]) 8 | (service [this]) 9 | (service-uri [this])) 10 | 11 | (defn system-descriptor 12 | [config] 13 | (reify 14 | SystemDescriptor 15 | (regions [this] (:regions config)) 16 | (bucketstore [this] (:bucketstore config)) 17 | (keystore [this] (:keystore config)) 18 | (reporters [this] (:reporters config)) 19 | (service [this] (:service config)) 20 | (service-uri [this] (get-in config [:options :service-uri])) 21 | clojure.lang.ILookup 22 | (valAt [this k] (get config k)) 23 | (valAt [this k default] (or (get config k) default)))) 24 | -------------------------------------------------------------------------------- /test/io/pithos/cors_test.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.cors-test 2 | (:require [io.pithos.cors :refer :all] 3 | [clojure.test :refer :all] 4 | [clojure.java.io :as io])) 5 | 6 | (deftest xml-slurp-test 7 | (let [repr {:cors1 [{:origins ["http://*.example.com"] 8 | :methods [:get] 9 | :headers ["*"] 10 | :exposed [] 11 | :max-age nil}]}] 12 | 13 | (doseq [[src int-repr] repr 14 | :let [path (format "%s.xml" (name src)) 15 | ext-repr (slurp (io/resource path))]] 16 | (testing (str "valid xml input for " (name src)) 17 | (is (= (xml->cors ext-repr) int-repr)))) 18 | 19 | (doseq [[src int-repr] repr 20 | :let [path (format "%s.xml" (name src)) 21 | ext-repr (slurp (io/resource path))]] 22 | (testing (str "valid xml output for " (name src)) 23 | (is (= (as-xml int-repr true) ext-repr)))))) 24 | -------------------------------------------------------------------------------- /docker/pithos/pithos.yaml.tmpl: -------------------------------------------------------------------------------- 1 | service: 2 | host: "0.0.0.0" 3 | port: 8080 4 | 5 | logging: 6 | level: info 7 | console: true 8 | 9 | options: 10 | service-uri: {{ getv "/pithos/service/uri" "s3.example.com" }} 11 | default-region: myregion 12 | 13 | keystore: 14 | keys: 15 | AKIAIOSFODNN7EXAMPLE: 16 | master: true 17 | tenant: test@example.com 18 | secret: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' 19 | AKIDEXAMPLE: 20 | master: true 21 | tenant: test@example.com 22 | secret: 'wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY' 23 | 24 | bucketstore: 25 | default-region: myregion 26 | cluster: {{ getv "/pithos/cassandra/host" "cassandra" }} 27 | keyspace: storage 28 | 29 | regions: 30 | myregion: 31 | metastore: 32 | cluster: {{ getv "/pithos/cassandra/host" "cassandra" }} 33 | keyspace: storage 34 | 35 | storage-classes: 36 | standard: 37 | cluster: {{ getv "/pithos/cassandra/host" "cassandra" }} 38 | keyspace: storage 39 | max-chunk: "128k" 40 | max-block-chunk: 1024 41 | -------------------------------------------------------------------------------- /docker/pithos/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM clojure:lein-2.6.1 AS builder 2 | 3 | WORKDIR /pithos 4 | 5 | RUN curl -L https://github.com/kelseyhightower/confd/releases/download/v0.12.0/confd-0.12.0-linux-amd64 -o /confd && chmod +x /confd 6 | 7 | COPY project.clj /pithos/project.clj 8 | RUN cd /pithos && lein deps 9 | 10 | COPY resources /pithos/resources 11 | COPY src /pithos/src 12 | RUN cd /pithos && lein uberjar && mv target/pithos-*-standalone.jar /pithos-standalone.jar 13 | 14 | 15 | FROM openjdk:jre-alpine 16 | 17 | RUN apk --no-cache add netcat-openbsd 18 | 19 | RUN addgroup -S pithos && adduser -S -g pithos pithos 20 | RUN mkdir /etc/pithos && chown pithos: /etc/pithos && chmod 0700 /etc/pithos 21 | USER pithos 22 | 23 | COPY --from=builder /confd /usr/local/bin/confd 24 | COPY --from=builder /pithos-standalone.jar /pithos-standalone.jar 25 | 26 | COPY docker/pithos/docker-entrypoint.sh /docker-entrypoint.sh 27 | COPY docker/pithos/pithos.yaml.tmpl /etc/confd/templates/pithos.yaml.tmpl 28 | COPY docker/pithos/pithos.yaml.toml /etc/confd/conf.d/pithos.yaml.toml 29 | 30 | CMD ["/docker-entrypoint.sh"] 31 | -------------------------------------------------------------------------------- /test/data/form-upload1.txt: -------------------------------------------------------------------------------- 1 | --9431149156168 2 | Content-Disposition: form-data; name="key" 3 | 4 | qux 5 | --9431149156168 6 | Content-Disposition: form-data; name="acl" 7 | 8 | private 9 | --9431149156168 10 | Content-Disposition: form-data; name="success_action_status" 11 | 12 | 201 13 | --9431149156168 14 | Content-Disposition: form-data; name="Content-Type" 15 | 16 | text/plain 17 | --9431149156168 18 | Content-Disposition: form-data; name="AWSAccessKeyId" 19 | 20 | AKIAIOSFODNN7EXAMPLE 21 | --9431149156168 22 | Content-Disposition: form-data; name="Policy" 23 | 24 | eyJleHBpcmF0aW9uIjogIjIwMjUtMTItMDFUMTI6MDA6MDAuMDAwWiIsCiAiY29uZGl0aW9ucyI6IFt7ImJ1Y2tldCI6ICJiYXRtYW4ifSwKICAgICAgICAgICAgICAgIHsiYWNsIjogInByaXZhdGUifSwKICAgICAgICAgICAgICAgIHsic3VjY2Vzc19hY3Rpb25fc3RhdHVzIjogIjIwMSJ9XX0K 25 | --9431149156168 26 | Content-Disposition: form-data; name="Signature" 27 | 28 | c3iMCe5m4lNmRHt+cmPAyOK0lf4= 29 | --9431149156168 30 | Content-Disposition: form-data; name="file"; filename="MyFilename.jpg" 31 | Content-Type: text/plain 32 | 33 | not much to say. 34 | --9431149156168-- 35 | -------------------------------------------------------------------------------- /test/data/form-upload2.txt: -------------------------------------------------------------------------------- 1 | --9431149156168 2 | Content-Disposition: form-data; name="key" 3 | 4 | qux 5 | --9431149156168 6 | Content-Disposition: form-data; name="acl" 7 | 8 | private 9 | --9431149156168 10 | Content-Disposition: form-data; name="success_action_status" 11 | 12 | 204 13 | --9431149156168 14 | Content-Disposition: form-data; name="Content-Type" 15 | 16 | text/plain 17 | --9431149156168 18 | Content-Disposition: form-data; name="AWSAccessKeyId" 19 | 20 | AKIAIOSFODNN7EXAMPLE 21 | --9431149156168 22 | Content-Disposition: form-data; name="Policy" 23 | 24 | eyJleHBpcmF0aW9uIjogIjIwMjUtMTItMDFUMTI6MDA6MDAuMDAwWiIsCiAiY29uZGl0aW9ucyI6IFt7ImJ1Y2tldCI6ICJiYXRtYW4ifSwKICAgICAgICAgICAgICAgIHsiYWNsIjogInByaXZhdGUifSwKICAgICAgICAgICAgICAgIHsic3VjY2Vzc19hY3Rpb25fc3RhdHVzIjogIjIwMSJ9XX0K 25 | --9431149156168 26 | Content-Disposition: form-data; name="Signature" 27 | 28 | c3iMCe5m4lNmRHt+cmPAyOK0lf4= 29 | --9431149156168 30 | Content-Disposition: form-data; name="file"; filename="MyFilename.jpg" 31 | Content-Type: text/plain 32 | 33 | not much to say. 34 | --9431149156168-- 35 | -------------------------------------------------------------------------------- /doc/pithos.yaml: -------------------------------------------------------------------------------- 1 | service: 2 | host: "0.0.0.0" 3 | port: 8080 4 | logging: 5 | level: info 6 | console: true 7 | overrides: 8 | io.pithos: debug 9 | options: 10 | service-uri: s3.example.com 11 | default-region: myregion 12 | keystore: 13 | keys: 14 | AKIAIOSFODNN7EXAMPLE: 15 | master: true 16 | tenant: test@example.com 17 | secret: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' 18 | AKIDEXAMPLE: 19 | master: true 20 | tenant: test@example.com 21 | secret: 'wJalrXUtnFEMI/K7MDENG+bPxRfiCYEXAMPLEKEY' 22 | 23 | 24 | bucketstore: 25 | default-region: myregion 26 | cluster: "localhost" 27 | keyspace: storage 28 | regions: 29 | myregion: 30 | metastore: 31 | cluster: "localhost" 32 | keyspace: storage 33 | storage-classes: 34 | standard: 35 | cluster: "localhost" 36 | keyspace: storage 37 | max-chunk: "128k" 38 | max-block-chunk: 1024 39 | cassandra: 40 | saved_caches_directory: "target/db/saved_caches" 41 | data_file_directories: 42 | - "target/db/data" 43 | commitlog_directory: "target/db/commitlog" 44 | -------------------------------------------------------------------------------- /src/io/pithos/schema.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.schema 2 | "Namespace holding a single action which installs the schema" 3 | (:require [clojure.tools.logging :refer [info error]] 4 | [io.pithos.system :as system] 5 | [io.pithos.store :as store])) 6 | 7 | (defn converge-schema 8 | "Loops through all storage layers and calls converge! on them" 9 | ([system exit?] 10 | (info "converging all schemas...") 11 | (try 12 | (info "converging bucketstore schema") 13 | (store/converge! (system/bucketstore system)) 14 | 15 | (doseq [region (system/regions system) 16 | :let [[region {:keys [metastore storage-classes]}] region]] 17 | (info "converging metastore for region " region) 18 | (store/converge! metastore) 19 | 20 | (doseq [[storage-class blobstore] storage-classes] 21 | (info "converging blobstore for region and storage-class " 22 | region storage-class) 23 | (store/converge! blobstore)) 24 | (when exit? (System/exit 0))) 25 | (catch Exception e 26 | (error e "cannot create schema") 27 | (when exit? (System/exit 1))))) 28 | ([system] 29 | (converge-schema system true))) 30 | -------------------------------------------------------------------------------- /test/data/acl4.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | foo 4 | bar 5 | 6 | 7 | 8 | 9 | foo 10 | bar 11 | 12 | FULL_CONTROL 13 | 14 | 15 | 16 | bar 17 | bar 18 | 19 | FULL_CONTROL 20 | 21 | 22 | 23 | foo 24 | baz 25 | 26 | READ_ACP 27 | 28 | 29 | 30 | baz 31 | baz 32 | 33 | READ_ACP 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /vagrant/modules/cassandra/manifests/init.pp: -------------------------------------------------------------------------------- 1 | class cassandra { 2 | 3 | $keyid = '4BD736A82B5C1B00' 4 | $shortkey = '2048R/2B5C1B00' 5 | 6 | file { '/etc/apt/sources.list.d/cassandra.list': 7 | content => 'deb http://www.apache.org/dist/cassandra/debian 20x main' 8 | } 9 | 10 | exec { 'cassandra-recv-keys': 11 | command => "gpg --keyserver pgp.mit.edu --recv-keys ${keyid} && gpg --export --armor ${keyid} | apt-key add - && apt-get update", 12 | user => 'root', 13 | group => 'root', 14 | path => "/bin:/usr/bin:/sbin:/usr/sbin", 15 | unless =>"apt-key list | grep ${shortkey}", 16 | require => File['/etc/apt/sources.list.d/cassandra.list'] 17 | } 18 | 19 | $cassandra_heap_size = hiera('cassandra-heap-size', '8G') 20 | $cassandra_heap_new = hiera('cassandra-heap-new', '800m') 21 | 22 | package { 'cassandra': 23 | ensure => present, 24 | require => Exec['cassandra-recv-keys'] 25 | } 26 | 27 | file { '/etc/cassandra/cassandra.yaml': 28 | content => template('cassandra/cassandra.yaml.erb'), 29 | require => Package['cassandra'], 30 | notify => Service['cassandra'] 31 | } 32 | 33 | file { '/etc/default/cassandra': 34 | content => template('cassandra/cassandra.env.erb'), 35 | require => Package['cassandra'], 36 | notify => Service['cassandra'] 37 | } 38 | 39 | service {'cassandra': 40 | ensure => running 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/io/pithos/api.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.api 2 | "Our main HTTP facade. Serving functionality is provided by aleph. 3 | Aleph is preferred over more traditional HTTP servers because 4 | it avoids creating one thread per (potentially) long streaming 5 | request or response. Moreover, certain specific operations 6 | just cannot be handled by the traditional synchronous handlers 7 | like ring, such as the 100: Continue response expected for uploads. 8 | " 9 | (:require [qbits.jet.server :refer [run-jetty]] 10 | [clojure.tools.logging :refer [info]] 11 | [io.pithos.system :refer [service]] 12 | [io.pithos.operations :refer [dispatch]] 13 | [io.pithos.request :refer [safe-prepare]])) 14 | 15 | (defn executor 16 | "Given a system map, yield a handler function for incoming 17 | request maps" 18 | [system] 19 | (fn [request] 20 | (-> (safe-prepare request system) 21 | (dispatch system)))) 22 | 23 | (defn run 24 | "Run an asynchronous API handler through Netty thanks to aleph http. 25 | The request handler is an anonymous function which stores the channel 26 | inside the request to mimick the operations of http-kit then runs 27 | several wrappers defined in `io.pithos.api.request` before letting 28 | `io.pithos.operations` dispatch based on the type of request" 29 | [system] 30 | (let [handler (executor system)] 31 | (run-jetty (merge (service system) {:input-buffer-size 65536 32 | :parser-compliance :legacy 33 | :ring-handler handler}))) 34 | (info "server up and running")) 35 | -------------------------------------------------------------------------------- /test/io/pithos/acl_test.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.acl-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.pprint :refer [pprint]] 4 | [io.pithos.acl :refer [as-xml xml->acl]] 5 | [clojure.java.io :as io])) 6 | 7 | (deftest xml-to-acl-test 8 | (let [repr {:acl1 {:FULL_CONTROL [{:ID "foo" :DisplayName "bar"}]} 9 | :acl4 {:FULL_CONTROL [{:ID "foo" :DisplayName "bar"} 10 | {:URI "bar" :DisplayName "bar"}] 11 | :READ_ACP [{:ID "foo" :DisplayName "baz"} 12 | {:URI "baz" :DisplayName "baz"}]} 13 | :acl5 {:READ [{:URI "anonymous" 14 | :DisplayName "anonymous"}]}}] 15 | 16 | (doseq [[src int-repr] repr 17 | :let [path (format "%s.xml" (name src)) 18 | ext-repr (slurp (io/resource path))]] 19 | (testing (str "valid xml input for " (name src)) 20 | (is (= (xml->acl ext-repr) int-repr)))) 21 | 22 | (doseq [[src int-repr] repr 23 | :let [path (format "%s.xml" (name src)) 24 | ext-repr (slurp (io/resource path))]] 25 | (testing (str "valid xml output for " (name src)) 26 | (is (= (as-xml int-repr true) ext-repr)))) 27 | 28 | (testing "invalid xml" 29 | (is (thrown-with-msg? 30 | clojure.lang.ExceptionInfo 31 | #"Invalid XML in ACL Body" 32 | (xml->acl (slurp (io/resource "acl2.xml"))))) 33 | 34 | (is (thrown-with-msg? 35 | clojure.lang.ExceptionInfo 36 | #"XML Root Node should be AccessControlPolicy" 37 | (xml->acl (slurp (io/resource "acl3.xml")))))))) 38 | -------------------------------------------------------------------------------- /src/io/pithos/response.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.response 2 | "Provides ring like facilities for working with 3 | HTTP responses" 4 | (:require [clojure.tools.logging :refer [debug error]])) 5 | 6 | (defn response 7 | "Create a basic response, with optional body" 8 | ([] 9 | {:status 200 :headers {}}) 10 | ([body] 11 | {:status 200 :headers {} :body body})) 12 | 13 | (defn redirect 14 | [location] 15 | {:status 302 :headers {"location" location} :body ""}) 16 | 17 | (defn header 18 | "Add a header to a response, coerce value to string" 19 | [resp header val] 20 | (let [strval (if (keyword? val) (name val) (str val))] 21 | (assoc-in resp [:headers header] strval))) 22 | 23 | (defn content-type 24 | "Add Content-Type header" 25 | [resp type] 26 | (header resp "Content-Type" type)) 27 | 28 | (defn status 29 | "Set response status code" 30 | [resp status] 31 | (assoc resp :status status)) 32 | 33 | (defn xml-response 34 | "Yields a HTTP response, assuming body is XML data" 35 | [body] 36 | (-> body 37 | response 38 | (header "Content-Type" "application/xml"))) 39 | 40 | (defn html-response 41 | "Yields a HTTP response, assuming body is HTML data" 42 | [body] 43 | (-> (response body) 44 | (header "Content-Type" "text/html"))) 45 | 46 | (defn request-id 47 | "Provision S3 specific headers" 48 | [resp {:keys [reqid]}] 49 | (-> resp 50 | (header "Server" "Pithos") 51 | (header "x-amz-id-2" (str reqid)) 52 | (header "x-amz-request-id" (str reqid)))) 53 | 54 | (defn exception-status 55 | "When handler raised an exception, try to look up a status code 56 | in its data" 57 | [resp details] 58 | (let [{:keys [status-code] :or {status-code 500}} details] 59 | (-> resp 60 | (status status-code)))) 61 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | The Pithos Guide 2 | ================ 3 | 4 | .. image:: _static/pithos.svg 5 | :alt: pithos log 6 | :align: right 7 | 8 | *pithos* is a daemon which provides an S3-compatible frontend for storing files 9 | in a `Cassandra`_ cluster. 10 | 11 | *pithos* provides the ability to build complex object storage topologies spanning 12 | multiple regions and focuses on the following: 13 | 14 | Scalability 15 | By relying on Apache Cassandra, pithos splits your files (objects) in small chunks 16 | which are replicated across a cluster of machines. This allows pithos to provide 17 | the following guarantees: 18 | 19 | - Fast writes 20 | - High Availability 21 | - Partition tolerance 22 | 23 | Compatibility 24 | While there are no wide-spread official standard for object storage, the S3 protocol 25 | has become a de-facto standard and has thus been chosen as pithos' protocol. 26 | This means you can start using your favorite S3 tools to work with pithos, such as: 27 | 28 | - `s3cmd`_ 29 | - `boto`_ 30 | 31 | Simplicity 32 | Pithos was built with ease of use and operability in mind. It should be easy to get started, require as few moving parts as possible and still be relatively easy to extend for larger installations. Pithos is distributed as a single executable JAR-file and relies on a YAML configuration file. As many of the JVM specifics are hidden from the administrator. 33 | 34 | *pithos* is sponsored by exoscale_ 35 | 36 | .. _Cassandra: http://cassandra.apache.org/ 37 | .. _s3cmd: http://s3tools.org/ 38 | .. _boto: https://github.com/boto/boto 39 | .. _exoscale: https://exoscale.ch 40 | 41 | .. toctree:: 42 | :maxdepth: 2 43 | 44 | quickstart 45 | concepts 46 | administrator 47 | api 48 | developer 49 | clients 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /docker/nginx/nginx.conf: -------------------------------------------------------------------------------- 1 | user nginx; 2 | worker_processes 1; 3 | 4 | error_log /dev/stdout warn; 5 | 6 | daemon off; 7 | 8 | pid /var/run/nginx.pid; 9 | 10 | events { 11 | worker_connections 1024; 12 | } 13 | 14 | http { 15 | include /etc/nginx/mime.types; 16 | default_type application/octet-stream; 17 | 18 | log_format main '$remote_addr - $remote_user [$time_local] "$request" ' 19 | '$status $body_bytes_sent "$http_referer" ' 20 | '"$http_user_agent" "$http_x_forwarded_for"'; 21 | 22 | access_log /dev/stdout main; 23 | 24 | keepalive_timeout 65; 25 | 26 | # Don't leak metadata about this server 27 | server_tokens off; 28 | 29 | # Enforce some security hardening HTTP headers 30 | add_header X-Content-Type-Options nosniff; 31 | 32 | # Decent set of ciphers... 33 | ssl_dhparam /etc/nginx/external/dh.pem; 34 | ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # disable poodle 35 | ssl_prefer_server_ciphers on; 36 | ssl_ciphers ECDH+AESGCM:DH+AESGCM:ECDH+AES256:DH+AES256:ECDH+AES128:DH+AES:ECDH+3DES:DH+3DES:RSA+AESGCM:RSA+AES:RSA+3DES:!aNULL:!MD5:!DSS; 37 | 38 | client_max_body_size 5G; 39 | 40 | upstream app { 41 | server pithos:8080; 42 | } 43 | 44 | server { 45 | listen 80 default_server; 46 | 47 | charset utf-8; 48 | 49 | location / { 50 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 51 | proxy_set_header Host $http_host; 52 | proxy_redirect off; 53 | proxy_pass http://app; 54 | } 55 | } 56 | 57 | server { 58 | listen 443 default_server; 59 | 60 | ssl on; 61 | ssl_certificate external/cert.pem; 62 | ssl_certificate_key external/key.pem; 63 | 64 | charset utf-8; 65 | 66 | location / { 67 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 68 | proxy_set_header Host $http_host; 69 | proxy_redirect off; 70 | proxy_pass http://app; 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject io.pithos/pithos "0.7.6-SNAPSHOT" 2 | :description "cassandra-backed object storage" 3 | :maintainer {:email "Pierre-Yves Ritschard "} 4 | :url "http://pithos.io" 5 | :license {:name "Apache License, Version 2.0" 6 | :url "http://www.apache.org/licenses/LICENSE-2.0"} 7 | :aot :all 8 | :main io.pithos 9 | :jvm-opts ["-Xmx2g"] 10 | :profiles {:dev {:resource-paths ["test/data"]}} 11 | :dependencies [[org.clojure/clojure "1.9.0-alpha14"] 12 | [org.clojure/data.codec "0.1.0"] 13 | [org.clojure/data.xml "0.0.8"] 14 | [org.clojure/data.zip "0.1.1"] 15 | [org.clojure/tools.cli "0.3.5"] 16 | [org.clojure/tools.logging "0.3.1"] 17 | [org.clojure/core.async "0.2.374"] 18 | [spootnik/unilog "0.7.17"] 19 | [spootnik/constance "0.5.3"] 20 | [spootnik/raven "0.1.1"] 21 | [spootnik/uncaught "0.5.3"] 22 | [clj-yaml "0.4.0"] 23 | [clout "2.1.2"] 24 | [cheshire "5.6.3"] 25 | [clj-time "0.9.0"] 26 | [ring/ring-core "1.3.2" 27 | :exclusions [org.clojure/tools.reader]] 28 | [ring/ring-codec "1.0.0"] 29 | [com.eaio.uuid/uuid "3.2"] 30 | [cc.qbits/alia-all "3.3.0" 31 | :exclusions [com.eaio.uuid/uuid]] 32 | [cc.qbits/hayt "3.0.1"] 33 | [cc.qbits/jet "0.7.9" 34 | :exclusions [org.clojure/tools.reader]] 35 | [net.jpountz.lz4/lz4 "1.3.0"] 36 | [org.xerial.snappy/snappy-java "1.1.2.4"]]) 37 | -------------------------------------------------------------------------------- /pkg/rpm/init.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ### BEGIN INIT INFO 3 | # Provides: pithos 4 | # Required-Start: $remote_fs $syslog 5 | # Required-Stop: $remote_fs $syslog 6 | # Default-Start: 2 3 4 5 7 | # Default-Stop: 0 1 6 8 | # Short-Description: Pithos object store 9 | # Description: Pithos, a cassandra-backed object store 10 | ### END INIT INFO 11 | 12 | # Source function library. 13 | . /etc/rc.d/init.d/functions 14 | 15 | # Pull in sysconfig settings 16 | [ -f /etc/sysconfig/pithos ] && . /etc/sysconfig/pithos 17 | 18 | # PATH should only include /usr/* if it runs after the mountnfs.sh script 19 | PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin 20 | DESC="Pithos" 21 | NAME=pithos 22 | DAEMON=/usr/bin/pithos 23 | DAEMON_ARGS="-f /etc/pithos/pithos.yaml" 24 | DAEMON_USER=pithos 25 | PID_FILE=/var/run/$NAME.pid 26 | SCRIPT_NAME=/etc/init.d/$NAME 27 | LOCK_FILE=/var/lock/subsys/$NAME 28 | 29 | start() 30 | { 31 | echo -n $"Starting ${NAME}: " 32 | ulimit -n $NFILES 33 | daemonize -u $DAEMON_USER -p $PID_FILE -l $LOCK_FILE $DAEMON $DAEMON_ARGS 34 | RETVAL=$? 35 | echo 36 | [ $RETVAL -eq 0 ] && touch $LOCK_FILE 37 | return $RETVAL 38 | } 39 | 40 | stop() 41 | { 42 | echo -n $"Stopping ${NAME}: " 43 | killproc -p ${PID_FILE} -d 10 $DAEMON 44 | RETVAL=$? 45 | echo 46 | [ $RETVAL = 0 ] && rm -f ${LOCK_FILE} ${PID_FILE} 47 | return $RETVAL 48 | } 49 | 50 | do_reload() { 51 | echo -n $"Reloading ${NAME}: " 52 | killproc -p ${PID_FILE} $DAEMON -1 53 | RETVAL=$? 54 | echo 55 | return $RETVAL 56 | } 57 | 58 | case "$1" in 59 | start) 60 | start 61 | ;; 62 | stop) 63 | stop 64 | ;; 65 | status) 66 | status -p ${PID_FILE} $DAEMON 67 | RETVAL=$? 68 | ;; 69 | reload|force-reload) 70 | reload 71 | ;; 72 | restart) 73 | stop 74 | start 75 | ;; 76 | *) 77 | N=/etc/init.d/${NAME} 78 | echo "Usage: $N {start|stop|status|restart|force-reload}" >&2 79 | RETVAL=2 80 | ;; 81 | esac 82 | 83 | exit $RETVAL 84 | -------------------------------------------------------------------------------- /test/io/pithos/util_test.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.util-test 2 | (:require [clojure.test :refer :all] 3 | [io.pithos.util :refer [inc-prefix to-bytes parse-uuid 4 | string->pattern]])) 5 | 6 | (deftest inc-prefix-test 7 | (testing "nil prefix" 8 | (is (= nil (inc-prefix nil)))) 9 | (testing "empty prefix" 10 | (is (= nil (inc-prefix "")))) 11 | 12 | (testing "simple prefix" 13 | (is (= "fop" (inc-prefix "foo"))))) 14 | 15 | (deftest byte-factor-test 16 | (testing "from bytes" 17 | (is (= 512 (to-bytes "512")))) 18 | 19 | (testing "from kilobytes" 20 | (is (= 2048 (to-bytes "2k")))) 21 | 22 | (testing "from megabytes" 23 | (is (= 2097152 (to-bytes "2m")))) 24 | 25 | (testing "from gigabytes" 26 | (is (= 2147483648 (to-bytes "2G")))) 27 | 28 | (testing "from terabytes" 29 | (is (= 2199023255552 (to-bytes "2tb")))) 30 | 31 | (testing "from petabytes" 32 | (is (= 2251799813685248 (to-bytes "2Pb"))))) 33 | 34 | (deftest uuid-test 35 | (testing "from uuid" 36 | (is (= #uuid "05ac767e-170f-a639-1ce7-39078945ee4480" 37 | (parse-uuid "05ac767e-170f-a639-1ce7-39078945ee4480"))))) 38 | 39 | (deftest string-to-pattern-test 40 | (testing "no special characters" 41 | (is (= "17hj018" (string->pattern "17hj018")))) 42 | (testing "single character" 43 | (is (= "/" (string->pattern "/")))) 44 | (testing "with dots and stars" 45 | (is (= "\\.\\*89\\+2\\?" (string->pattern ".*89+2?")))) 46 | (testing "with grouping" 47 | (is (= "\\(89\\)" (string->pattern "(89)")))) 48 | (testing "with anchors" 49 | (is (= "\\^test\\$" (string->pattern "^test$")))) 50 | (testing "with classes" 51 | (is (= "\\\\d\\\\s\\\\S" (string->pattern "\\d\\s\\S")))) 52 | (testing "with sets and repetitions" 53 | (is (= "\\[a\\-z\\]\\{1,2\\}" (string->pattern "[a-z]{1,2}")))) 54 | (testing "with alternatives" 55 | (is (= "abc\\|cde" (string->pattern "abc|cde")))) 56 | (testing "with escapes" 57 | (is (= "abc\\\\" (string->pattern "abc\\")))) 58 | (testing "with back references" 59 | (is (= "\\(42\\)\\\\1\\\\k\\" 60 | (string->pattern "(42)\\1\\k"))))) 61 | -------------------------------------------------------------------------------- /src/io/pithos/store.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.store 2 | "Generic cassandra cluster connection services." 3 | (:import com.datastax.driver.core.exceptions.InvalidQueryException) 4 | (:require [qbits.alia :as alia] 5 | [qbits.hayt :refer [use-keyspace create-keyspace with]] 6 | [clojure.tools.logging :refer [debug]])) 7 | 8 | (defprotocol Convergeable 9 | (converge! [this])) 10 | 11 | (defprotocol Crudable 12 | (fetch [this k] [this k1 k2] [this k1 k2 k3]) 13 | (update! [this k v] [this k1 k2 v] [this k1 k2 k3 v]) 14 | (delete! [this k] [this k1 k2] [this k1 k2 k3]) 15 | (create! [this k v] [this k1 k2 v] [this k1 k2 k3 v])) 16 | 17 | (defn cassandra-store 18 | "Connect to a cassandra cluster, and use a specific keyspace. 19 | When the keyspace is not found, try creating it" 20 | [{:keys [cassandra-options cluster keyspace hints repfactor username password]}] 21 | (debug "building cassandra store for: " cluster keyspace hints) 22 | (let [hints (or hints 23 | {:replication {:class "SimpleStrategy" 24 | :replication_factor (or repfactor 1)}}) 25 | cluster (if (sequential? cluster) cluster [cluster]) 26 | session (-> (assoc cassandra-options :contact-points cluster) 27 | (cond-> (and username password) 28 | (assoc :credentials {:user username 29 | :password password})) 30 | (alia/cluster) 31 | (alia/connect))] 32 | (try (alia/execute session (use-keyspace keyspace)) 33 | session 34 | (catch clojure.lang.ExceptionInfo e 35 | (let [{:keys [exception]} (ex-data e)] 36 | (if (and (= (class exception) InvalidQueryException) 37 | (re-find #"^[kK]eyspace.*does not exist$" 38 | (.getMessage exception))) 39 | (do (alia/execute session 40 | (create-keyspace keyspace (with hints))) 41 | (alia/execute session (use-keyspace keyspace)) 42 | session) 43 | (throw e))))))) 44 | -------------------------------------------------------------------------------- /src/io/pithos.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos 2 | " 3 | pithos: object storage daemon 4 | ============================= 5 | 6 | Pithos is an object storage daemon with 7 | pluggable implementation of storage 8 | engines. See [pithos.io](http://pithos.io) for details. 9 | 10 | The `io.pithos` namespace is only responsible for parsing 11 | command line arguments, loading configuration and starting 12 | up the appropriate action." 13 | (:gen-class) 14 | (:require [io.pithos.schema :as schema] 15 | [io.pithos.config :as config] 16 | [io.pithos.api :as api] 17 | [io.pithos.system :as system] 18 | [spootnik.uncaught :refer [uncaught]] 19 | [clojure.tools.logging :refer [error]] 20 | [clojure.tools.cli :refer [cli]])) 21 | 22 | (defn get-action 23 | "Figure out what the expected action is from the command-line." 24 | [action] 25 | (let [amap {"api-run" api/run 26 | "install-schema" schema/converge-schema}] 27 | (or (get amap action) 28 | (do (println "unknown action: " action) 29 | (System/exit 1))))) 30 | 31 | (defn get-cli 32 | "Parse command line arguments and ensure we return a proper structure." 33 | [args] 34 | (try 35 | (-> (cli args 36 | ["-h" "--help" "Show Help" 37 | :default false :flag true] 38 | ["-f" "--path" "Configuration file path" 39 | :default nil] 40 | ["-q" "--quiet" "Never output to stdout" 41 | :default false :flag true] 42 | ["-a" "--action" "Specify an action (api-run, install-schema)" 43 | :default "api-run"]) 44 | (update-in [0 :action] get-action)) 45 | (catch Exception e 46 | (println "Could not parse arguments: " (.getMessage e)) 47 | (System/exit 1)))) 48 | 49 | (defn setup-uncaught 50 | [{:keys [sentry] :as system}] 51 | (uncaught e 52 | (when (and sentry (fn? sentry)) 53 | (sentry e)) 54 | (error e "uncaught exception")) 55 | system) 56 | 57 | (defn -main 58 | "Main startup path, parse command line arguments, then dispatch to 59 | appropriate action. 60 | 61 | Only two actions are available: 62 | 63 | - `api-run`: run the S3 api handler 64 | - `install-schema`: converge cassandra schema" 65 | [& args] 66 | (let [[{:keys [path help action quiet]} args banner] (get-cli args)] 67 | 68 | (when help 69 | (println banner) 70 | (System/exit 0)) 71 | 72 | (-> path 73 | (config/init quiet) 74 | (setup-uncaught) 75 | (system/system-descriptor) 76 | action)) 77 | nil) 78 | -------------------------------------------------------------------------------- /vagrant/Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! 5 | VAGRANTFILE_API_VERSION = "2" 6 | PITHOS_CLUSTER_SIZE = (ENV['PITHOS_CLUSTER_SIZE'] && ENV['PITHOS_CLUSTER_SIZE'].to_i) || 9 7 | 8 | #prod 9 | EXOSCALE_API_KEY = ENV['EXOSCALE_API_KEY'] 10 | EXOSCALE_API_SECRET = ENV['EXOSCALE_API_SECRET'] 11 | EXOSCALE_INSTANCE_TYPE = ENV['EXOSCALE_INSTANCE_TYPE'] || '350dc5ea-fe6d-42ba-b6c0-efb8b75617ad' 12 | EXOSCALE_TEMPLATE = ENV['EXOSCALE_TEMPLATE'] || '5e705f28-e561-44c7-aba7-67963daf6c9f' 13 | EXOSCALE_ZONE = ENV['EXOSCALE_ZONE'] || '1128bd56-b4d9-4ac6-a7b9-c715b187ce11' 14 | EXOSCALE_HOST = ENV['EXOSCALE_HOST'] || 'api.exoscale.ch' 15 | 16 | EXOSCALE_KEYPAIR = ENV['EXOSCALE_KEYPAIR'] || 'default' 17 | 18 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| 19 | 20 | config.vm.box = "exoscale" 21 | config.hostmanager.manage_host = true 22 | 23 | # The url from where the 'config.vm.box' box will be fetched if it 24 | # doesn't already exist on the user's system. 25 | 26 | 27 | config.vm.provider :cloudstack do |cloudstack, override| 28 | cloudstack.host = EXOSCALE_HOST 29 | cloudstack.path = "/compute" 30 | cloudstack.port = "443" 31 | cloudstack.scheme = "https" 32 | cloudstack.network_id = "00304a04-c7ea-4e77-a786-18bc64347bf7" 33 | 34 | cloudstack.template_id = EXOSCALE_TEMPLATE 35 | cloudstack.zone_id = EXOSCALE_ZONE 36 | 37 | 38 | cloudstack.network_type = "Basic" 39 | cloudstack.api_key = EXOSCALE_API_KEY 40 | cloudstack.secret_key = EXOSCALE_API_SECRET 41 | cloudstack.service_offering_id = EXOSCALE_INSTANCE_TYPE 42 | 43 | cloudstack.keypair = EXOSCALE_KEYPAIR 44 | end 45 | 46 | config.vm.provision :shell, inline: "apt-get update" 47 | config.vm.provision :shell, inline: "apt-get -qy install puppet" 48 | config.vm.provision :hostmanager 49 | 50 | PITHOS_CLUSTER_SIZE.times do |i| 51 | config.vm.define "store#{i}" do |node| 52 | node.ssh.username = "root" 53 | node.ssh.private_key_path = "#{ENV['HOME']}/.ssh/id_rsa" 54 | node.vm.provision :shell, inline: "hostname store#{i}" 55 | node.vm.hostname = "store#{i}" 56 | node.vm.provision :puppet do |puppet| 57 | puppet.working_directory = "/vagrant" 58 | puppet.manifests_path = "manifests" 59 | puppet.module_path = "modules" 60 | puppet.hiera_config_path = "hiera.yaml" 61 | puppet.manifest_file = "store.pp" 62 | puppet.facter = { 63 | "cassandra_topology" => PITHOS_CLUSTER_SIZE.times.map { |i| "store#{i}"}.join(",") 64 | } 65 | end 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /vagrant/modules/cassandra/templates/cassandra.yaml.erb: -------------------------------------------------------------------------------- 1 | cluster_name: 'storage-ring' 2 | num_tokens: 8 3 | hinted_handoff_enabled: true 4 | max_hint_window_in_ms: 10800000 # 3 hours 5 | hinted_handoff_throttle_in_kb: 4096 6 | max_hints_delivery_threads: 2 7 | batchlog_replay_throttle_in_kb: 4096 8 | authenticator: AllowAllAuthenticator 9 | authorizer: AllowAllAuthorizer 10 | permissions_validity_in_ms: 2000 11 | partitioner: org.apache.cassandra.dht.Murmur3Partitioner 12 | data_file_directories: 13 | - /var/lib/cassandra/data 14 | commitlog_directory: /var/lib/cassandra/commitlog 15 | disk_failure_policy: stop 16 | commit_failure_policy: stop 17 | key_cache_size_in_mb: 18 | key_cache_save_period: 14400 19 | row_cache_size_in_mb: 0 20 | row_cache_save_period: 0 21 | saved_caches_directory: /var/lib/cassandra/saved_caches 22 | commitlog_sync: periodic 23 | commitlog_sync_period_in_ms: 10000 24 | commitlog_segment_size_in_mb: 32 25 | seed_provider: 26 | - class_name: org.apache.cassandra.locator.SimpleSeedProvider 27 | parameters: 28 | - seeds: "<%= @cassandra_topology %>" 29 | concurrent_reads: 32 30 | concurrent_writes: 32 31 | memtable_flush_queue_size: 4 32 | trickle_fsync: false 33 | trickle_fsync_interval_in_kb: 10240 34 | storage_port: 7000 35 | ssl_storage_port: 7001 36 | listen_address: <%= @hostname %> 37 | start_native_transport: true 38 | native_transport_port: 9042 39 | start_rpc: true 40 | rpc_address: <%= @hostname %> 41 | rpc_port: 9160 42 | rpc_keepalive: true 43 | rpc_server_type: sync 44 | thrift_framed_transport_size_in_mb: 15 45 | incremental_backups: false 46 | snapshot_before_compaction: false 47 | auto_snapshot: true 48 | tombstone_warn_threshold: 1000 49 | tombstone_failure_threshold: 100000 50 | column_index_size_in_kb: 64 51 | in_memory_compaction_limit_in_mb: 64 52 | multithreaded_compaction: false 53 | compaction_throughput_mb_per_sec: 16 54 | compaction_preheat_key_cache: true 55 | read_request_timeout_in_ms: 5000 56 | range_request_timeout_in_ms: 10000 57 | write_request_timeout_in_ms: 2000 58 | cas_contention_timeout_in_ms: 1000 59 | truncate_request_timeout_in_ms: 60000 60 | request_timeout_in_ms: 10000 61 | cross_node_timeout: false 62 | endpoint_snitch: SimpleSnitch 63 | dynamic_snitch_update_interval_in_ms: 100 64 | dynamic_snitch_reset_interval_in_ms: 600000 65 | dynamic_snitch_badness_threshold: 0.1 66 | request_scheduler: org.apache.cassandra.scheduler.NoScheduler 67 | server_encryption_options: 68 | internode_encryption: none 69 | keystore: conf/.keystore 70 | keystore_password: cassandra 71 | truststore: conf/.truststore 72 | truststore_password: cassandra 73 | client_encryption_options: 74 | enabled: false 75 | keystore: conf/.keystore 76 | keystore_password: cassandra 77 | internode_compression: all 78 | inter_dc_tcp_nodelay: false 79 | preheat_kernel_page_cache: false 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | :warning: Project not under active development :warning: 2 | ------------------------------------------------------------------- 3 | 4 | We're working on open-sourcing another iteration of pithos which offers much better protocol support and better performances. 5 | 6 | No release date known yet 7 | 8 | 9 | 10 | pithos: cassandra object storage 11 | -------------------------------- 12 | 13 | Pithos is an S3-compatible object store leveraging cassandra 14 | to distribute contents horizontally. 15 | 16 | Documentation site lives at http://pithos.io 17 | 18 | [![Build Status](https://travis-ci.org/exoscale/pithos.svg)](https://travis-ci.org/exoscale/pithos) 19 | 20 | 21 | # Quickstart 22 | 23 | You can use [docker-compose](https://docs.docker.com/compose/) to easily 24 | run the current branch in a Docker container for testing purposes. The 25 | Clojure and Cassandra Docker images will use around 1GB of disk space. 26 | 27 | docker-compose up 28 | 29 | Create a new bucket: 30 | 31 | s3cmd --config doc/s3cmd.cfg mb s3://my-bucket 32 | Bucket 's3://my-bucket/' created 33 | 34 | s3cmd --config doc/s3cmd.cfg ls s3:// 35 | 2016-05-27 09:04 s3://my-bucket 36 | 37 | To build an run Pithos manually, continue reading. 38 | 39 | # Prerequisites 40 | 41 | In order to build and run Pithos you will need the following components: 42 | 43 | * Working Java Runtime Environment version 7 or higher 44 | * A [Cassandra](http://cassandra.apache.org/) cluster in version 2 or higher 45 | * [Leiningen](https://github.com/technomancy/leiningen) Clojure package builder 46 | 47 | # Build 48 | 49 | To build pithos run: 50 | 51 | lein uberjar 52 | 53 | you will get a standalone Java jar file in the `target/` directory 54 | 55 | # Run 56 | 57 | To run Pithos manually start it with 58 | 59 | java -jar target/pithos-0.7.5-standalone.jar 60 | 61 | Pithos will expect finding a valid configuration file under `/etc/pithos/pithos.yaml`. You can specify a distinct config file using the `-f` switch. 62 | 63 | The following startup switches are available: 64 | 65 | Switches Default Desc 66 | -------- ------- ---- 67 | -h, --no-help, --help false Show Help 68 | -f, --path Configuration file path 69 | -q, --no-quiet, --quiet false Never output to stdout 70 | -a, --action api-run Specify an action (api-run, install-schema) 71 | 72 | ## Bootstrapping the environment 73 | 74 | Pithos includes a schema definition file in order to bootstrap your Cassandra cluster. 75 | To install the schema, run: 76 | 77 | java -jar target/pithos-0.7.5-standalone.jar -a install-schema 78 | 79 | 80 | ## Test using the s3cmd command line client 81 | 82 | Have a look at the minimal configuration file provided in 83 | `doc/s3cmd.cfg`. If not running locally, remove the last lines, as 84 | explained in the configuration file. 85 | 86 | Create a bucket: 87 | 88 | s3cmd -c doc/s3cmd.cfg mb S3:// 89 | 90 | List your buckets: 91 | 92 | s3cmd -c doc/s3cmd.cfg la 93 | -------------------------------------------------------------------------------- /tasks/leiningen/tar.clj: -------------------------------------------------------------------------------- 1 | (ns leiningen.tar 2 | "Create a tarball suitable for rpm packaging, stolen from riemann." 3 | (:use [clojure.java.shell :only [sh with-sh-dir]] 4 | [clojure.java.io :only [file delete-file writer copy]] 5 | [clojure.string :only [join capitalize trim-newline split trim]] 6 | [leiningen.uberjar :only [uberjar]])) 7 | 8 | (defn delete-file-recursively 9 | "Delete file f. If it's a directory, recursively delete all its contents. 10 | Raise an exception if any deletion fails unless silently is true." 11 | [f & [silently]] 12 | (System/gc) ; This sometimes helps release files for deletion on windows. 13 | (let [f (file f)] 14 | (if (.isDirectory f) 15 | (doseq [child (.listFiles f)] 16 | (delete-file-recursively child silently))) 17 | (delete-file f silently))) 18 | 19 | (defn tar-dir 20 | "Tar package working directory." 21 | [project] 22 | (file (:root project) "target" "tar" (str (:name project) "-" 23 | (:version project)))) 24 | 25 | (defn cleanup 26 | [project] 27 | ; Delete working dir. 28 | (when (.exists (file (:root project) "target" "tar")) 29 | (delete-file-recursively (file (:root project) "target" "tar")))) 30 | 31 | (defn reset 32 | [project] 33 | (cleanup project) 34 | (sh "rm" (str (:root project) "/target/*.tar.bz2"))) 35 | 36 | (defn make-tar-dir 37 | "Creates the tarball package structure in a new directory." 38 | [project] 39 | (let [dir (tar-dir project)] 40 | (.mkdirs dir) 41 | 42 | ; Jar 43 | (.mkdirs (file dir "lib")) 44 | (copy (file (:root project) "target" 45 | (str "pithos-" (:version project) "-standalone.jar")) 46 | (file dir "lib" "pithos.jar")) 47 | 48 | ; Binary 49 | (.mkdirs (file dir "bin")) 50 | (copy (file (:root project) "pkg" "tar" "pithos") 51 | (file dir "bin" "pithos")) 52 | (.setExecutable (file dir "bin" "pithosn") true false) 53 | 54 | ; Config 55 | (.mkdirs (file dir "etc")) 56 | (copy (file (:root project) "pkg" "tar" "pithos.config") 57 | (file dir "etc" "pithos.config")) 58 | 59 | dir)) 60 | 61 | (defn write 62 | "Write string to file, plus newline" 63 | [file string] 64 | (with-open [w (writer file)] 65 | (.write w (str (trim-newline string) "\n")))) 66 | 67 | (defn md5 68 | "Computes the md5 checksum of a file. Returns a hex string." 69 | [file] 70 | (-> (->> file 71 | str 72 | (sh "md5sum") 73 | :out) 74 | (split #" ") 75 | first 76 | trim)) 77 | 78 | (defn compress 79 | "Convert given package directory to a .tar.bz2." 80 | [project tar-dir] 81 | (let [filename (str (:name project) 82 | "-" 83 | (:version project) 84 | ".tar.bz2") 85 | tarball (str (file (:root project) 86 | "target" 87 | filename))] 88 | (with-sh-dir (.getParent tar-dir) 89 | (print (:err (sh "tar" "cvjf" tarball (.getName tar-dir))))) 90 | 91 | (write (str tarball ".md5") 92 | (str (md5 tarball) " " filename)))) 93 | 94 | (defn tar 95 | ([project] (tar project true)) 96 | ([project uberjar?] 97 | (reset project) 98 | (when uberjar? (uberjar project)) 99 | (compress project (make-tar-dir project)) 100 | (cleanup project))) 101 | -------------------------------------------------------------------------------- /test/io/pithos/sig_test.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.sig-test 2 | (:require [clojure.test :refer :all] 3 | [clojure.string :refer [join]] 4 | [io.pithos.sig :refer [string-to-sign]])) 5 | 6 | (deftest string-to-sign-test 7 | (testing "signature with only Date header" 8 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT"} 9 | :request-method "GET" 10 | :sign-uri "/bucket/batman" 11 | :params {}}) 12 | (join "\n" ["GET" 13 | "" 14 | "" 15 | "Thu, 17 Nov 2005 18:49:58 GMT" 16 | "/bucket/batman"])))) 17 | 18 | (testing "signature with custom x-amz-* headers" 19 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT" 20 | "x-amz-meta-magic" "magic string" 21 | "x-amz-magic" "batman"} 22 | :request-method "GET" 23 | :sign-uri "/bucket/batman" 24 | :params {}}) 25 | (join "\n" ["GET" 26 | "" 27 | "" 28 | "Thu, 17 Nov 2005 18:49:58 GMT" 29 | "x-amz-magic:batman" 30 | "x-amz-meta-magic:magic string" 31 | "/bucket/batman"])))) 32 | 33 | (testing "signature with non x-amz-headers" 34 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT" 35 | "x-noamz-meta-magic" "magic string" 36 | "x-noamz-magic" "batman"} 37 | :request-method "GET" 38 | :sign-uri "/bucket/batman" 39 | :params {}}) 40 | (join "\n" ["GET" 41 | "" 42 | "" 43 | "Thu, 17 Nov 2005 18:49:58 GMT" 44 | "/bucket/batman"])))) 45 | 46 | (testing "signature with both Content-{Md5,Type} headers" 47 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT" 48 | "content-md5" "c8fdb181845a4ca6b8fec737b3581d76" 49 | "content-type" "text/html"} 50 | :request-method "GET" 51 | :sign-uri "/bucket/batman" 52 | :params {}}) 53 | (join "\n" ["GET" 54 | "c8fdb181845a4ca6b8fec737b3581d76" 55 | "text/html" 56 | "Thu, 17 Nov 2005 18:49:58 GMT" 57 | "/bucket/batman"])))) 58 | 59 | (testing "signature for GET and x-amz-date header" 60 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT" 61 | "x-amz-date" "Thu, 17 Nov 2005 18:49:20 GMT"} 62 | :request-method "GET" 63 | :sign-uri "/bucket/batman" 64 | :params {}}) 65 | (join "\n" ["GET" 66 | "" 67 | "" 68 | "" 69 | "x-amz-date:Thu, 17 Nov 2005 18:49:20 GMT" 70 | "/bucket/batman"])))) 71 | 72 | (testing "signature with query string" 73 | (is (= (string-to-sign {:headers {"date" "Thu, 17 Nov 2005 18:49:58 GMT"} 74 | :request-method "GET" 75 | :sign-uri "/bucket/batman" 76 | :params {:expires "1141889120"}}) 77 | (join "\n" ["GET" 78 | "" 79 | "" 80 | "1141889120" 81 | "/bucket/batman"]))))) 82 | -------------------------------------------------------------------------------- /test/io/pithos/meta_test.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.meta-test 2 | (:require [clojure.test :refer :all] 3 | [io.pithos.util :refer [inc-prefix]] 4 | [io.pithos.meta :refer [filter-keys filter-prefixes 5 | get-prefixes]])) 6 | 7 | 8 | (deftest prefixes-and-contents-test 9 | 10 | (let [in-and-outs ["simple list" 11 | [{:object "foo/bar.txt"} 12 | {:object "foo/baz.txt"}] 13 | "" 14 | "/" 15 | #{"foo/"} 16 | [] 17 | 10 18 | nil 19 | false 20 | 21 | "object shows up" 22 | [{:object "foo.txt"}] 23 | "foo.txt" 24 | "/" 25 | #{} 26 | [{:object "foo.txt"}] 27 | 10 28 | nil 29 | false 30 | 31 | 32 | "no delimiter" 33 | [{:object "foo/bar.txt"} 34 | {:object "foo/baz.txt"}] 35 | "" 36 | nil 37 | #{} 38 | [{:object "foo/bar.txt"} 39 | {:object "foo/baz.txt"}] 40 | 10 41 | nil 42 | false 43 | 44 | "simple list with prefix" 45 | [{:object "foo/bar.txt"} 46 | {:object "foo/baz.txt"} 47 | {:object "batman/foo.txt"}] 48 | "foo/" 49 | "/" 50 | #{} 51 | [{:object "foo/bar.txt"} 52 | {:object "foo/baz.txt"}] 53 | 10 54 | nil 55 | false 56 | 57 | "with prefix but no delimiter" 58 | [{:object "foo-bar.txt"} 59 | {:object "foo-baz.txt"} 60 | {:object "batman-foo.txt"}] 61 | "foo-" 62 | nil 63 | #{} 64 | [{:object "foo-bar.txt"} 65 | {:object "foo-baz.txt"}] 66 | 10 67 | nil 68 | false 69 | ]] 70 | (doseq [[nickname objects prefix delimiter 71 | prefixes keys max-keys marker truncated?] 72 | (partition 9 in-and-outs)] 73 | (testing (str "valid output for " nickname) 74 | 75 | (let [found-prefixes (filter-prefixes objects prefix delimiter)] 76 | (is (= prefixes found-prefixes)) 77 | (is (= keys (remove found-prefixes 78 | (filter-keys objects prefix delimiter))))))))) 79 | 80 | 81 | (defn make-fetcher 82 | "Provide a simulation of cassandra's wide row storage for testing 83 | Alternate store implementations will need to provide the same properties" 84 | [input] 85 | (fn [prefix marker limit init?] 86 | (let [>pred #(or (= (:object %) (or marker prefix)) 87 | (not (.startsWith (or (:object %) "") 88 | (or marker prefix "")))) 89 | > input 92 | (sort-by :object) 93 | (drop-while >pred) 94 | (take-while 13 | 14 | # Do NOT "set -e" 15 | 16 | # PATH should only include /usr/* if it runs after the mountnfs.sh script 17 | PATH=/sbin:/usr/sbin:/bin:/usr/bin:/usr/local/bin 18 | DESC="Pithos" 19 | NAME=pithos 20 | DAEMON=/usr/bin/pithos 21 | DAEMON_ARGS="-f /etc/pithos/pithos.yaml" 22 | DAEMON_USER=pithos 23 | PIDFILE=/var/run/$NAME.pid 24 | SCRIPTNAME=/etc/init.d/$NAME 25 | 26 | # Exit if the package is not installed 27 | [ -x "$DAEMON" ] || exit 0 28 | 29 | # Read configuration variable file if it is present 30 | [ -r /etc/default/$NAME ] && . /etc/default/$NAME 31 | 32 | # Load the VERBOSE setting and other rcS variables 33 | . /lib/init/vars.sh 34 | 35 | # Define LSB log_* functions. 36 | # Depend on lsb-base (>= 3.2-14) to ensure that this file is present 37 | # and status_of_proc is working. 38 | . /lib/lsb/init-functions 39 | 40 | # Function that starts the daemon/service 41 | do_start() 42 | { 43 | # Return 44 | # 0 if daemon has been started 45 | # 1 if daemon was already running 46 | # 2 if daemon could not be started 47 | pid=$( pidofproc -p $PIDFILE "$NAME") 48 | if [ -n "$pid" ] ; then 49 | log_daemon_msg "Pithos is already running (PID `cat ${PIDFILE}`)" 50 | return 1 51 | fi 52 | start-stop-daemon --start --quiet --chuid $DAEMON_USER --chdir / --make-pidfile --background --pidfile $PIDFILE --exec $DAEMON -- \ 53 | $DAEMON_ARGS \ 54 | || return 2 55 | # Add code here, if necessary, that waits for the process to be ready 56 | # to handle requests from services started subsequently which depend 57 | # on this one. As a last resort, sleep for some time. 58 | } 59 | 60 | # Function that stops the daemon/service 61 | do_stop() 62 | { 63 | # Return 64 | # 0 if daemon has been stopped 65 | # 1 if daemon was already stopped 66 | # 2 if daemon could not be stopped 67 | # other if a failure occurred 68 | start-stop-daemon --stop --quiet --retry=TERM/30/KILL/5 --pidfile $PIDFILE 69 | RETVAL="$?" 70 | [ "$RETVAL" = 2 ] && return 2 71 | # Wait for children to finish too if this is a daemon that forks 72 | # and if the daemon is only ever run from this initscript. 73 | # If the above conditions are not satisfied then add some other code 74 | # that waits for the process to drop all resources that could be 75 | # needed by services started subsequently. A last resort is to 76 | # sleep for some time. 77 | start-stop-daemon --stop --quiet --oknodo --retry=0/30/KILL/5 --exec $DAEMON 78 | [ "$?" = 2 ] && return 2 79 | # Many daemons don't delete their pidfiles when they exit. 80 | rm -f $PIDFILE 81 | return "$RETVAL" 82 | } 83 | 84 | # Function that sends a SIGHUP to the daemon/service 85 | do_reload() { 86 | # 87 | # If the daemon can reload its configuration without 88 | # restarting (for example, when it is sent a SIGHUP), 89 | # then implement that here. 90 | # 91 | start-stop-daemon --stop --quiet --signal HUP --pidfile $PIDFILE 92 | return $? 93 | } 94 | 95 | case "$1" in 96 | start) 97 | [ "$VERBOSE" != no ] && log_daemon_msg "Starting $DESC" "$NAME" 98 | do_start 99 | case "$?" in 100 | 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; 101 | 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; 102 | esac 103 | ;; 104 | stop) 105 | [ "$VERBOSE" != no ] && log_daemon_msg "Stopping $DESC" "$NAME" 106 | do_stop 107 | case "$?" in 108 | 0|1) [ "$VERBOSE" != no ] && log_end_msg 0 ;; 109 | 2) [ "$VERBOSE" != no ] && log_end_msg 1 ;; 110 | esac 111 | ;; 112 | status) 113 | status_of_proc "$DAEMON" "$NAME" && exit 0 || exit $? 114 | ;; 115 | reload|force-reload) 116 | log_daemon_msg "Reloading $DESC" "$NAME" 117 | do_reload 118 | log_end_msg $? 119 | ;; 120 | restart) 121 | log_daemon_msg "Restarting $DESC" "$NAME" 122 | do_stop 123 | case "$?" in 124 | 0|1) 125 | do_start 126 | case "$?" in 127 | 0) log_end_msg 0 ;; 128 | 1) log_end_msg 1 ;; # Old process is still running 129 | *) log_end_msg 1 ;; # Failed to start 130 | esac 131 | ;; 132 | *) 133 | # Failed to stop 134 | log_end_msg 1 135 | ;; 136 | esac 137 | ;; 138 | *) 139 | echo "Usage: $SCRIPTNAME {start|stop|status|restart|reload|force-reload}" >&2 140 | exit 3 141 | ;; 142 | esac 143 | 144 | : 145 | -------------------------------------------------------------------------------- /src/io/pithos/acl.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.acl 2 | "The purpose of this namespace is to coerce to and from 3 | the internal representation of ACLs. 4 | 5 | The current representation is: 6 | 7 | {:FULL_CONTROL 8 | [{:ID \"AUTH_KEY\" :DisplayName \"Some Name\"} 9 | {:URI \"http://groups/group-uri\"}] 10 | ...} 11 | " 12 | (:require [clojure.data.xml :refer [parse-str emit-str indent-str]] 13 | [clojure.zip :refer [xml-zip node root]] 14 | [clojure.data.zip :refer [children]] 15 | [clojure.data.zip.xml :refer [xml-> xml1-> text]] 16 | [io.pithos.xml :refer [seq->xml]])) 17 | 18 | ;; ### XML ACL parsing functions 19 | ;; 20 | ;; We're doing a very sloppy type of schema validation 21 | ;; this should likely move to a stricter XSD validation 22 | ;; phase. 23 | 24 | (def ^{:doc "List of known permissions. Valid in ACLs"} 25 | valid-permission? 26 | #{:FULL_CONTROL :READ :WRITE :READ_ACP :WRITE_ACP}) 27 | 28 | (def ^{:doc "List of known tags in grantees"} 29 | valid-grantee-tag? 30 | #{:ID :DisplayName :URI :EmailAddress}) 31 | 32 | (def ^{:doc "List of known URIs"} 33 | known-uris 34 | {"http://acs.amazonaws.com/groups/global/AllUsers" "anonymous"}) 35 | 36 | (def ^{:doc "List of known Groups"} 37 | known-groups 38 | (reduce merge {} (map (juxt val key) known-uris))) 39 | 40 | (defn node->grantee-spec 41 | "Produce a grantee specifier (ID, DisplayName or URI)" 42 | [n] 43 | (let [{:keys [tag content]} (node n) 44 | text (first content)] 45 | (when (and (valid-grantee-tag? tag) (string? text)) 46 | (cond 47 | (= :URI tag) (hash-map tag (or (known-uris text) text)) 48 | (= :EmailAddress tag) (hash-map :ID text) 49 | :else (hash-map tag text))))) 50 | 51 | (defn node->grantee 52 | "Produce a valid grantee." 53 | [n] 54 | (reduce merge {} (xml-> n children node->grantee-spec))) 55 | 56 | (defn node->grant 57 | "Each grant in an input body shoudl contain at least an ID and DisplayName or 58 | a URI" 59 | [node] 60 | (hash-map 61 | (xml1-> node :Permission text (fnil keyword "invalid")) 62 | (vec (xml-> node :Grantee node->grantee)))) 63 | 64 | 65 | (defn safe-xml-zip 66 | "Ingest an XML representation, safely, throwing explicit 67 | and details errors." 68 | [src] 69 | (try 70 | (let [tree (xml-zip (parse-str src)) 71 | {:keys [tag]} (root tree)] 72 | (when-not (= :AccessControlPolicy tag) 73 | (throw (ex-info "XML Root Node should be AccessControlPolicy" 74 | {:type :invalid-xml-root-node 75 | :expected :AccessControlPolicy 76 | :got tag}))) 77 | tree) 78 | (catch clojure.lang.ExceptionInfo e 79 | (throw e)) 80 | (catch Exception e 81 | (throw (ex-info "Invalid XML in ACL Body" 82 | {:type :invalid-acl-xml 83 | :status-code 400}))))) 84 | 85 | (defn xml->acl 86 | "Given an XML source, try to parse it and return valid" 87 | [src] 88 | (let [xml-tree (safe-xml-zip src) 89 | policies (xml-> xml-tree 90 | :AccessControlList 91 | :Grant 92 | node->grant) 93 | policy (apply merge-with (comp vec concat) policies)] 94 | (when-not (every? valid-permission? (keys policy)) 95 | (throw (ex-info "Invalid XML Acl Body" {:type :invalid-acl-xml 96 | :status-code 400}))) 97 | policy)) 98 | 99 | (defn grant->permission 100 | "Generate grant XML tags from a hash map of permissions to grantees" 101 | [[permission grantees]] 102 | (let [xmlns-xsi "http://www.w3.org/2001/XMLSchema-instance"] 103 | (for [{:keys [ID DisplayName URI]} grantees] 104 | [:Grant 105 | (if URI 106 | [:Grantee {:xmlns:xsi xmlns-xsi :xsi:type "Group"} 107 | [:URI (or (known-groups URI) URI)] 108 | [:DisplayName (or DisplayName URI)]] 109 | [:Grantee {:xmlns:xsi xmlns-xsi :xsi:type "CanonicalUser"} 110 | [:ID ID] 111 | [:DisplayName (or DisplayName ID)]]) 112 | [:Permission (name permission)]]))) 113 | 114 | (defn as-xml 115 | "Given an internal representation of an ACL, output a valid 116 | XML representation. 117 | Optionaly supply a boolean to indicate whether to indent the output" 118 | ([grants indent?] 119 | (let [xmlns "http://s3.amazonaws.com/doc/2006-03-01/" 120 | format (if indent? indent-str emit-str)] 121 | (format 122 | (seq->xml 123 | [:AccessControlPolicy {:xmlns xmlns} 124 | [:Owner 125 | [:ID "foo"] 126 | [:DisplayName "bar"]] 127 | (apply vector :AccessControlList 128 | (mapcat grant->permission grants))])))) 129 | ([grants] 130 | (as-xml grants false))) 131 | -------------------------------------------------------------------------------- /src/io/pithos/sig.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.sig 2 | "Compute request signatures as described in 3 | http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html" 4 | (:require [clojure.string :as s] 5 | [clojure.tools.logging :refer [info debug]] 6 | [clojure.data.codec.base64 :as base64] 7 | [clj-time.core :refer [after? now]] 8 | [clj-time.coerce :refer [to-date-time]] 9 | [constance.comp :refer [===]] 10 | [io.pithos.util :refer [date->rfc822]]) 11 | (:import javax.crypto.Mac javax.crypto.spec.SecretKeySpec)) 12 | 13 | (defn canonicalized 14 | "Group headers starting with x-amz, each on a separate line and add uri" 15 | [headers uri] 16 | (s/join "\n" 17 | (concat (->> headers 18 | (map (juxt (comp name key) (comp s/trim val))) 19 | (filter (comp #(.startsWith % "x-amz") first)) 20 | (sort-by first) 21 | (map (partial s/join ":"))) 22 | [uri]))) 23 | 24 | (defn string-to-sign 25 | "Yield the string to sign for an incoming request" 26 | [{:keys [headers request-method sign-uri params] :as request}] 27 | (let [content-md5 (get headers "content-md5") 28 | content-type (get headers "content-type") 29 | date (or (get params :expires) 30 | (if-not (get headers "x-amz-date") 31 | (get headers "date")))] 32 | (s/join 33 | "\n" 34 | [(-> request-method name s/upper-case) 35 | (or content-md5 "") 36 | (or content-type "") 37 | (or date "") 38 | (canonicalized headers sign-uri)]))) 39 | 40 | (defn sign-string 41 | [src secret-key] 42 | (let [key (SecretKeySpec. (.getBytes secret-key) "HmacSHA1")] 43 | (String. (-> (doto (Mac/getInstance "HmacSHA1") (.init key)) 44 | (.doFinal (.getBytes src)) 45 | (base64/encode))))) 46 | 47 | (defn sign-request 48 | "Sign the request, signatures are basic HmacSHA1s, encoded in base64" 49 | [request secret-key] 50 | (sign-string (string-to-sign request) secret-key)) 51 | 52 | (defn auth 53 | "Extract access key and signature from the request, using query string 54 | parameters or Authorization header" 55 | [request] 56 | (if-let [auth-str (get-in request [:headers "authorization"])] 57 | (let [[_ access-key sig] (re-matches #"^[Aa][Ww][Ss] (.*):(.*)$" auth-str)] 58 | {:sig sig :access-key access-key}) 59 | (let [access-key (get-in request [:params :awsaccesskeyid]) 60 | sig (get-in request [:params :signature])] 61 | (if (and access-key sig) 62 | {:sig sig :access-key access-key} 63 | nil)))) 64 | 65 | (defn check-sig 66 | [request keystore key str sig] 67 | (let [{:keys [secret] :as authorization} (get keystore key) 68 | signed (try (sign-string str secret) 69 | (catch Exception e 70 | {:failed true :exception e}))] 71 | (when-not (and (not (nil? sig)) 72 | (string? signed) 73 | (=== sig signed)) 74 | (info "will throw because of failed signature!") 75 | (when (:exception signed) 76 | (debug (:exception signed) "got exception during signing")) 77 | (throw (ex-info "invalid policy signature" 78 | {:type :signature-does-not-match 79 | :status-code 403 80 | :request request 81 | :expected signed 82 | :to-sign str}))) 83 | (update-in authorization [:memberof] concat ["authenticated-users"]))) 84 | 85 | (def anonymous {:tenant :anonymous :memberof ["anonymous"]}) 86 | 87 | (defn validate 88 | "Validate an incoming request (e.g: make sure the signature is correct), 89 | when applicable (requests may be unauthenticated)" 90 | [keystore request] 91 | (if-let [data (auth request)] 92 | (let [{:keys [sig access-key]} data 93 | {:keys [secret] :as authorization} (get keystore access-key) 94 | signed (try (sign-request request secret) 95 | (catch Exception e 96 | {:failed true :exception e}))] 97 | (when-not (and (not (nil? sig)) 98 | (string? signed) 99 | (=== sig signed)) 100 | (info "will throw because of failed signature!") 101 | (when (:exception signed) 102 | (debug (:exception signed) "got exception during signing")) 103 | (debug "string-to-sign: " (string-to-sign request)) 104 | (throw (ex-info "invalid request signature" 105 | {:type :signature-does-not-match 106 | :status-code 403 107 | :request request 108 | :expected signed 109 | :to-sign (string-to-sign request)}))) 110 | (when-let [expires (get-in request [:params :expires])] 111 | (let [expires (to-date-time (* 1000 (Integer/parseInt expires)))] 112 | (when (after? (now) expires) 113 | (throw (ex-info "expired request" 114 | {:type :expired-request 115 | :status-code 403 116 | :request request 117 | :expires (date->rfc822 expires)}))))) 118 | (update-in authorization [:memberof] concat ["authenticated-users" 119 | "anonymous"])) 120 | anonymous)) 121 | -------------------------------------------------------------------------------- /src/io/pithos/util.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.util 2 | "A few utility functions, used in several places" 3 | (:import [java.io PipedInputStream PipedOutputStream] 4 | [java.lang Math] 5 | [org.joda.time DateTimeZone]) 6 | (:require [clojure.string :as s] 7 | [clojure.string :refer [lower-case]] 8 | [clj-time.core :refer [now]] 9 | [clj-time.format :refer [formatters parse unparse formatter]])) 10 | 11 | (defn uri-decode 12 | [s] 13 | (when s 14 | (java.net.URLDecoder/decode s "UTF-8"))) 15 | 16 | (defn md5-init 17 | "Yield an MD5 MessageDigest instance" 18 | [] 19 | (doto (java.security.MessageDigest/getInstance "MD5") (.reset))) 20 | 21 | (defn md5-update 22 | "Add data from byte-array in a MessageDigest instance" 23 | [hash ba from to] 24 | (locking hash 25 | (doto hash 26 | (.update ba from to)))) 27 | 28 | (defn md5-sum 29 | "Yield a padded hex string of an MD5 digest" 30 | [hash] 31 | (let [digest (.toString (java.math.BigInteger. 1 (.digest hash)) 16) 32 | pad (apply str (repeat (- 32 (count digest)) "0"))] 33 | (str pad digest))) 34 | 35 | (defn inc-prefix 36 | "Given an object path, yield the next semantic one." 37 | [p] 38 | (when (seq p) 39 | (let [[c & s] (reverse p) 40 | reversed (conj s (-> c int inc char))] 41 | (apply str (reverse reversed))))) 42 | 43 | (def byte-factors 44 | "1024 factor of corresponding storage unit" 45 | {"k" 1 "m" 2 "g" 3 "t" 4 "p" 5}) 46 | 47 | (def byte-pattern 48 | "Regular expression pattern for data size" 49 | #"([0-9]+)(([kKmMgGtTpP])[bB]?)?") 50 | 51 | (defn to-bytes 52 | "Parse an input string into a byte amount, the input 53 | string can be suffixed by a unit specifier" 54 | [input & [param]] 55 | (when input 56 | (if-let [[_ amount _ factor] (re-find byte-pattern (str input))] 57 | (long 58 | (* (Long/parseLong amount) 59 | (if factor 60 | (Math/pow 1024 (get byte-factors (lower-case factor))) 61 | 1))) 62 | (throw (ex-info (format "invalid byte amount [%s]: %s" 63 | (or param "") input) {}))))) 64 | 65 | 66 | (defn piped-input-stream 67 | "yield two interconnected PipedInputStream and PipedOutputStream" 68 | [] 69 | (let [os (PipedOutputStream.) 70 | is (PipedInputStream. os)] 71 | [is os])) 72 | 73 | (defn parse-uuid 74 | "Parse the string representation of a uuid" 75 | [s] 76 | (java.util.UUID/fromString s)) 77 | 78 | (def gmt 79 | "The GMT timezone, only fetched once" 80 | (DateTimeZone/forID "GMT")) 81 | 82 | (def rfc822-format 83 | (formatter "EEE, dd MMM yyyy HH:mm:ss" gmt)) 84 | 85 | (defn date->rfc822 86 | [d] 87 | (str (unparse rfc822-format d) " GMT")) 88 | 89 | (defn iso8601->date 90 | [isodate] 91 | (parse (:date-time-parser formatters) isodate)) 92 | 93 | (defn iso8601->rfc822 94 | "RFC822 representation based on an iso8601 timestamp" 95 | [isodate] 96 | (->> (parse (:date-time-parser formatters) isodate) 97 | (date->rfc822))) 98 | 99 | (defn iso8601 100 | "iso8601 timestamp representation" 101 | [date] 102 | (unparse (:date-time formatters) date)) 103 | 104 | (defn iso8601-timestamp 105 | "String representation of the current timestamp in UTC" 106 | [] 107 | (iso8601 (now))) 108 | 109 | (def ^:private regex-char-esc-smap 110 | "Characters to be escaped in a regular pattern (including inside a set)" 111 | ;; The documentation is available here:a 112 | ;; https://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html 113 | (let [esc-chars "[]{}()<>*+^$?|\\.&-!#"] 114 | (zipmap esc-chars 115 | (map (partial str "\\") esc-chars)))) 116 | (defn string->pattern 117 | "Escape a string to be used as a regular pattern" 118 | [string] 119 | (->> string 120 | (replace regex-char-esc-smap) 121 | (reduce str ""))) 122 | 123 | (defn interpol 124 | [s args] 125 | (let [trimk (fn [s] (keyword (.substring s 2 (dec (.length s)))))] 126 | (s/replace s #"\$\{[^}]*\}" (fn [k] (get args (trimk k) ""))))) 127 | 128 | 129 | (defmacro cond-with 130 | "Takes a symbol and a set of test/expr pairs. It evaluates 131 | each test one at a time. If a test returns logical true, 132 | cond-with evaluates the corresponding expr, binding the 133 | symbol to the test's return. The return value of the expr 134 | is returned and no more tests are evaluated. If no test/expr 135 | pairs are present, nil is returned. An odd number of clauses 136 | will throw an exception." 137 | [sym & clauses] 138 | (when clauses 139 | (list 'if-let [`~sym (first clauses)] 140 | (if (next clauses) 141 | (second clauses) 142 | (throw (IllegalArgumentException. 143 | "cond-with requires an even number of forms."))) 144 | (cons `cond-with (conj (next (next clauses)) `~sym))))) 145 | 146 | (defmacro cond-let 147 | "Takes a symbol and a set of test/expr pairs. Tests may be 148 | expressions or binding vectors. If a test returns logical true, 149 | cond-let evaluates the corresponding, if a binding vector was 150 | provided, the expr will be evaluated within that context. The 151 | return value of the expr is returned and no more tests are 152 | evaluated. If no test/expr paris are present, nil is returned. 153 | An odd number of clauses will throw an exception." 154 | [& clauses] 155 | (when clauses 156 | (list (if (vector? (first clauses)) 'if-let 'if) 157 | (first clauses) 158 | (if (next clauses) 159 | (second clauses) 160 | (throw (IllegalArgumentException. 161 | "cond-let requires an even number of forms."))) 162 | (cons `cond-let (next (next clauses)))))) 163 | -------------------------------------------------------------------------------- /tasks/leiningen/fatdeb.clj: -------------------------------------------------------------------------------- 1 | (ns leiningen.fatdeb 2 | "Build a .deb package from leiningen, stolen from riemann." 3 | (:refer-clojure :exclude [replace]) 4 | (:require [clojure.java.shell :refer [sh]] 5 | [clojure.java.io :refer [file delete-file writer copy]] 6 | [clojure.string :refer [join capitalize trim-newline replace]] 7 | [leiningen.uberjar :refer [uberjar]]) 8 | (:import java.text.SimpleDateFormat 9 | java.util.Date)) 10 | 11 | (defn md5 12 | [input] 13 | (let [digest (-> (doto (java.security.MessageDigest/getInstance "MD5") 14 | (.reset) 15 | (.update (.getBytes input))) 16 | (.digest))] 17 | (.toString (java.math.BigInteger. 1 digest) 16))) 18 | 19 | (defn delete-file-recursively 20 | "Delete file f. If it's a directory, recursively delete all its contents. 21 | Raise an exception if any deletion fails unless silently is true." 22 | [f & [silently]] 23 | (System/gc) ; This sometimes helps release files for deletion on windows. 24 | (let [f (file f)] 25 | (if (.isDirectory f) 26 | (doseq [child (.listFiles f)] 27 | (delete-file-recursively child silently))) 28 | (delete-file f silently))) 29 | 30 | (defn deb-dir 31 | "Debian package working directory." 32 | [project] 33 | (file (:root project) "target/deb/pithos")) 34 | 35 | (defn cleanup 36 | [project] 37 | ; Delete working dir. 38 | (when (.exists (deb-dir project)) 39 | (delete-file-recursively (deb-dir project)))) 40 | 41 | (defn reset 42 | [project] 43 | (cleanup project) 44 | (sh "rm" (str (:root project) "/target/*.deb"))) 45 | 46 | (def build-date (Date.)) 47 | 48 | (defn get-version 49 | [project] 50 | (let [df (SimpleDateFormat. "yyyyMMdd-HHmmss")] 51 | (replace (:version project) #"SNAPSHOT" (.format df build-date)))) 52 | 53 | (defn control 54 | "Control file" 55 | [project] 56 | (join "\n" 57 | (map (fn [[k v]] (str (capitalize (name k)) ": " v)) 58 | {:package (str (:name project) "-s3") 59 | :version (get-version project) 60 | :section "base" 61 | :priority "optional" 62 | :architecture "all" 63 | :depends (join ", " ["bash" "java7-runtime-headless | openjdk-7-jre-headless"]) 64 | :maintainer (:email (:maintainer project)) 65 | :description (:description project)}))) 66 | 67 | (defn write 68 | "Write string to file, plus newline" 69 | [file string] 70 | (with-open [w (writer file)] 71 | (.write w (str (trim-newline string) "\n")))) 72 | 73 | (defn make-deb-dir 74 | "Creates the debian package structure in a new directory." 75 | [project] 76 | (let [dir (deb-dir project)] 77 | (.mkdirs dir) 78 | 79 | ;; Meta 80 | (.mkdirs (file dir "DEBIAN")) 81 | 82 | (write (file dir "DEBIAN" "control") (control project)) 83 | (write (file dir "DEBIAN" "conffiles") 84 | (join "\n" ["/etc/pithos/pithos.yaml" 85 | "/etc/init.d/pithos" 86 | "/etc/default/pithos"])) 87 | 88 | ;; Preinst 89 | (copy (file (:root project) "pkg" "deb" "preinst.sh") 90 | (file dir "DEBIAN" "preinst")) 91 | (.setExecutable (file dir "DEBIAN" "preinst") true false) 92 | 93 | ;; Postinst 94 | (copy (file (:root project) "pkg" "deb" "postinst.sh") 95 | (file dir "DEBIAN" "postinst")) 96 | (.setExecutable (file dir "DEBIAN" "postinst") true false) 97 | 98 | ;; Prerm 99 | (copy (file (:root project) "pkg" "deb" "prerm.sh") 100 | (file dir "DEBIAN" "prerm")) 101 | (.setExecutable (file dir "DEBIAN" "prerm") true false) 102 | 103 | ;; Postrm 104 | (copy (file (:root project) "pkg" "deb" "postrm.sh") 105 | (file dir "DEBIAN" "postrm")) 106 | (.setExecutable (file dir "DEBIAN" "postrm") true false) 107 | 108 | ;; Jar 109 | (.mkdirs (file dir "usr" "lib" "pithos")) 110 | (copy (file (:root project) "target" 111 | (str "pithos-" (:version project) "-standalone.jar")) 112 | (file dir "usr" "lib" "pithos" "pithos.jar")) 113 | 114 | 115 | ;; Binary 116 | (.mkdirs (file dir "usr" "bin")) 117 | (copy (file (:root project) "pkg" "deb" "pithos") 118 | (file dir "usr" "bin" "pithos")) 119 | (.setExecutable (file dir "usr" "bin" "pithos") true false) 120 | 121 | ; Log dir 122 | (.mkdirs (file dir "var" "log" "pithos")) 123 | 124 | ; Config 125 | (.mkdirs (file dir "etc" "pithos")) 126 | (copy (file (:root project) "doc" "pithos.yaml") 127 | (file dir "etc" "pithos" "pithos.yaml")) 128 | 129 | ; defaults file 130 | (.mkdirs (file dir "etc" "default")) 131 | (copy (file (:root project) "pkg" "deb" "pithos.default") 132 | (file dir "etc" "default" "pithos")) 133 | 134 | ; Init script 135 | (.mkdirs (file dir "etc" "init.d")) 136 | (copy (file (:root project) "pkg" "deb" "init.sh") 137 | (file dir "etc" "init.d" "pithos")) 138 | (.setExecutable (file dir "etc" "init.d" "pithos") true false) 139 | 140 | dir)) 141 | 142 | (defn dpkg 143 | "Convert given package directory to a .deb." 144 | [project deb-dir] 145 | (print (:err (sh "dpkg" "--build" 146 | (str deb-dir) 147 | (str (file (:root project) "target"))))) 148 | (let [deb-file-name (str (:name project) "-s3_" 149 | (get-version project) "_" 150 | "all" ".deb") 151 | deb-file (file (:root project) "target" deb-file-name)] 152 | (write (str deb-file ".md5") 153 | (str (md5 (slurp deb-file)) " " deb-file-name)))) 154 | 155 | (defn fatdeb 156 | ([project] 157 | (reset project) 158 | (uberjar project) 159 | (dpkg project (make-deb-dir project)) 160 | (cleanup project) 161 | (flush))) 162 | -------------------------------------------------------------------------------- /src/io/pithos/cors.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.cors 2 | (:refer-clojure :exclude [replace]) 3 | (:require [clojure.data.xml :refer [parse-str emit-str indent-str]] 4 | [clojure.string :refer [upper-case lower-case replace join split]] 5 | [clojure.tools.logging :refer [debug]] 6 | [clojure.zip :refer [xml-zip node root]] 7 | [clojure.data.zip :refer [children]] 8 | [clojure.data.zip.xml :refer [xml-> xml1-> text]] 9 | [io.pithos.util :refer [string->pattern]] 10 | [io.pithos.xml :refer [seq->xml]])) 11 | 12 | (defn node->rule 13 | "Provide a " 14 | [node] 15 | {:origins (vec (xml-> node :AllowedOrigin text)) 16 | :methods (vec (map (comp keyword lower-case) 17 | (xml-> node :AllowedMethod text))) 18 | :headers (vec (xml-> node :AllowedHeader text)) 19 | :exposed (vec (xml-> node :ExposeHeader text)) 20 | :max-age (xml1-> node :MaxAgeSeconds text)}) 21 | 22 | (defn safe-xml-zip 23 | "Ingest an XML representation, safely, throwing explicit 24 | and details errors." 25 | [src] 26 | (try 27 | (let [tree (xml-zip (parse-str src)) 28 | {:keys [tag]} (root tree)] 29 | (when-not (= :CORSConfiguration tag) 30 | (throw (ex-info "XML Root Node should be CORSConfiguration" 31 | {:type :invalid-xml-root-node 32 | :expected :CORSConfiguration 33 | :got tag}))) 34 | tree) 35 | (catch clojure.lang.ExceptionInfo e 36 | (throw e)) 37 | (catch Exception e 38 | (throw (ex-info "Invalid XML in CORS Body" 39 | {:type :invalid-cors-xml 40 | :status-code 400}))))) 41 | 42 | (defn xml->cors 43 | [src] 44 | (let [xml-tree (safe-xml-zip src) 45 | rules (xml-> xml-tree 46 | :CORSRule 47 | node->rule)] 48 | (vec rules))) 49 | 50 | (defn as-xml 51 | ([rules indent?] 52 | (let [format (if indent? indent-str emit-str) 53 | xml-ns "http://s3.amazonaws.com/doc/2006-03-01/"] 54 | (format 55 | (seq->xml 56 | (apply vector 57 | :CORSConfiguration {:xmlns xml-ns} 58 | (for [{:keys [origins methods headers exposed max-age]} rules] 59 | (apply vector :CORSRule 60 | (concat 61 | (mapv (partial vector :AllowedOrigin) origins) 62 | (mapv (partial vector :AllowedMethod) 63 | (map (comp upper-case name) methods)) 64 | (mapv (partial vector :AllowedHeader) headers) 65 | (mapv (partial vector :ExposeHeader) exposed) 66 | (if max-age 67 | [[:MaxAgeSeconds max-age]] 68 | []))))))))) 69 | ([rules] 70 | (as-xml rules false))) 71 | 72 | (defn origin-matches? 73 | [src dst] 74 | (let [dst (string->pattern dst) 75 | pat (str "^" (replace dst "\\*" "(.*)") "$")] 76 | (re-find (re-pattern pat) src))) 77 | 78 | (defn origins-match? 79 | [origin method req-headers {:keys [origins methods headers]}] 80 | (and (some #(origin-matches? origin %) origins) 81 | ((set methods) method))) 82 | 83 | (defn merge-rules 84 | [left right] 85 | (if (sequential? left) 86 | (set (concat left right)) 87 | (if (neg? (compare left right)) left right))) 88 | 89 | (defn make-pattern 90 | "Our limited pattern builder. If a star is found, do a prefix-match" 91 | [s] 92 | (let [s (lower-case s)] 93 | (if (.contains s "*") 94 | {:pattern (replace s #"\*.*$" "")} 95 | {:exact-match s}))) 96 | 97 | (defn pattern-matches? 98 | [header {:keys [pattern exact-match]}] 99 | (let [header (lower-case header)] 100 | (if pattern 101 | (.startsWith header pattern) 102 | (= header exact-match)))) 103 | 104 | (defn match-headers 105 | [req-headers headers] 106 | (when req-headers 107 | (let [patterns (map make-pattern headers) 108 | req-headers (split req-headers #"[ \t]*,[ \t]*")] 109 | (join ", " 110 | (for [header req-headers 111 | :when (some #(pattern-matches? header %) patterns)] 112 | header))))) 113 | 114 | (defn rule->headers 115 | [origin method req-headers {:keys [methods exposed headers max-age]}] 116 | (let [allowed-headers (match-headers req-headers headers)] 117 | (-> {"Access-Control-Allow-Origin" origin 118 | "Access-Control-Allow-Methods" (-> method name upper-case) 119 | "Access-Control-Expose-Headers" (join ", " exposed)} 120 | (cond-> max-age (assoc "Access-Control-Max-Age" 121 | (str max-age)) 122 | allowed-headers (assoc "Access-Control-Allow-Headers" 123 | allowed-headers))))) 124 | 125 | (defn matches? 126 | [cors headers method] 127 | (let [origin (get headers "origin" "_____________________________") 128 | method (if (= method :options) 129 | (some-> (get headers "access-control-request-method") 130 | lower-case 131 | keyword) 132 | method) 133 | req-headers (get headers "access-control-request-headers")] 134 | (when-not method 135 | (throw (ex-info "Invalid Argument" {:type :invalid-argument 136 | :status-code 400 137 | :arg "Access-Control-Request-Method" 138 | :val ""}))) 139 | (if-let [matching-rules (seq (filter (partial origins-match? 140 | origin method req-headers) 141 | cors))] 142 | (rule->headers 143 | origin method req-headers 144 | (reduce (partial merge-with merge-rules) {} matching-rules)) 145 | {}))) 146 | -------------------------------------------------------------------------------- /src/io/pithos/bucket.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.bucket 2 | "The bucketstore stores ring-global information on bucket 3 | ownership. It contains a single column-family and an 4 | accompanying index." 5 | (:refer-clojure :exclude [update]) 6 | (:require [qbits.alia :as a] 7 | [qbits.hayt :refer [select where set-columns 8 | create-table create-index 9 | column-definitions index-name 10 | delete update limit]] 11 | [io.pithos.util :refer [iso8601-timestamp]] 12 | [io.pithos.system :as system] 13 | [io.pithos.store :as store])) 14 | 15 | (defprotocol Bucketstore 16 | "The bucketstore contains the schema migration function, 17 | two bucket lookup functions and CRUD signatures" 18 | (by-tenant [this tenant]) 19 | (by-name [this bucket])) 20 | 21 | (defprotocol BucketDescriptor 22 | (region [this]) 23 | (versioned? [this])) 24 | 25 | (defprotocol RegionDescriptor 26 | (metastore [this])) 27 | 28 | ;; ring-global metastore 29 | 30 | (def bucket-table 31 | "Bucket properties" 32 | (create-table 33 | :bucket 34 | (column-definitions {:bucket :text 35 | :created :text 36 | :tenant :text 37 | :region :text 38 | :acl :text 39 | :cors :text 40 | :website :text 41 | :policy :text 42 | :versioned :boolean 43 | :primary-key :bucket}))) 44 | 45 | (def bucket_tenant-index 46 | "Index bucket on tenant" 47 | (create-index 48 | :bucket 49 | :tenant 50 | (index-name :bucket_tenant))) 51 | 52 | (defn bucket-by-tenant-q 53 | "Cassandra query for bucket by tenant" 54 | [tenant] 55 | (select :bucket (where [[= :tenant tenant]]))) 56 | 57 | (defn fetch-bucket-q 58 | "Cassandra query for bucket by name" 59 | [bucket] 60 | (select :bucket (where [[= :bucket bucket]]) (limit 1))) 61 | 62 | (defn update-bucket-q 63 | "Bucket creation or update" 64 | [bucket columns] 65 | (update :bucket 66 | (set-columns columns) 67 | (where [[= :bucket bucket]]))) 68 | 69 | (defn delete-bucket-q 70 | "Bucket destruction" 71 | [bucket] 72 | (delete :bucket (where [[= :bucket bucket]]))) 73 | 74 | (defn cassandra-bucket-store 75 | "Given a cluster configuration, reify an instance of Bucketstore. 76 | The cassandra bucket store suffers from a design flaw since last 77 | write-wins might yield a success response for a bucket which will 78 | later be claimed. 79 | 80 | This can be fixed with the following strategies: 81 | 82 | - writing a bucket store that targets an SQL DB instead of cassandra 83 | - using lightweight transactions 84 | - wrap ownership around a zookeeper lock 85 | 86 | If you care deeply about bucket ownership, I'd suggest looking into 87 | the above options" 88 | [{:keys [default-region read-consistency write-consistency] :as config}] 89 | (let [copts (dissoc config :read-consistency :write-consistency) 90 | session (store/cassandra-store copts) 91 | rdcty (or (some-> read-consistency keyword) :quorum) 92 | wrcty (or (some-> write-consistency keyword) :quorum) 93 | read! (fn [query] (a/execute session query {:consistency rdcty})) 94 | write! (fn [query] (a/execute session query {:consistency wrcty}))] 95 | (reify 96 | store/Convergeable 97 | (converge! [this] 98 | (write! bucket-table) 99 | (write! bucket_tenant-index)) 100 | store/Crudable 101 | (create! [this tenant bucket columns] 102 | (if-let [[details] (seq (read! (fetch-bucket-q bucket)))] 103 | (when (not= tenant (:tenant details)) 104 | (throw (ex-info 105 | "bucket already exists" 106 | {:type :bucket-already-exists 107 | :bucket bucket 108 | :status-code 409}))) 109 | (write! 110 | (update-bucket-q bucket 111 | (merge {:region default-region 112 | :created (iso8601-timestamp)} 113 | columns 114 | {:tenant tenant}))))) 115 | (update! [this bucket columns] 116 | (write! (update-bucket-q bucket columns))) 117 | (delete! [this bucket] 118 | (if-let [info (seq (read! (fetch-bucket-q bucket)))] 119 | (write! (delete-bucket-q bucket)) 120 | (throw (ex-info "bucket not found" 121 | {:type :no-such-bucket 122 | :status-code 404 123 | :bucket bucket})))) 124 | Bucketstore 125 | (by-tenant [this tenant] 126 | (read! (bucket-by-tenant-q tenant))) 127 | (by-name [this bucket] 128 | (first 129 | (read! (fetch-bucket-q bucket))))))) 130 | 131 | (defn get-region 132 | "Fetch the regionstore from regions" 133 | [system region] 134 | (or (get (system/regions system) region) 135 | (throw (ex-info (str "could not find region: " region) 136 | {:status-code 500})))) 137 | 138 | (defn bucket-descriptor 139 | [system bucket] 140 | (let [bucketstore (system/bucketstore system) 141 | details (by-name bucketstore bucket)] 142 | (if details 143 | (let [{:keys [versioned region bucket]} details 144 | {:keys [metastore]} (get-region system region)] 145 | (reify 146 | BucketDescriptor 147 | (versioned? [this] versioned) 148 | (region [this] region) 149 | RegionDescriptor 150 | (metastore [this] metastore) 151 | clojure.lang.ILookup 152 | (valAt [this k] 153 | (get details k)) 154 | (valAt [this k def] 155 | (get details k def)))) 156 | (throw (ex-info "bucket not found" 157 | {:type :no-such-bucket 158 | :status-code 404 159 | :bucket bucket}))))) 160 | -------------------------------------------------------------------------------- /doc/source/quickstart.rst: -------------------------------------------------------------------------------- 1 | Quickstart Guide 2 | ================ 3 | 4 | Getting up and running with pithos involves two things which 5 | we'll cover in this quick walk-through: 6 | 7 | - Installing and running Apache Cassandra 8 | - Installing and running pithos 9 | 10 | Alternately, there is a version of pithos which embeds Apache Cassandra. 11 | 12 | Obtaining pithos 13 | ---------------- 14 | 15 | Pithos is released in both source and binary. Binary distributions come in 16 | two flavors: standard and standalone with embedded cassandra. 17 | 18 | Binary releases 19 | ~~~~~~~~~~~~~~~ 20 | 21 | Binary release are the simplest way to get started and are hosted on github: 22 | https://github.com/exoscale/pithos/releases. 23 | 24 | Each release contains: 25 | 26 | - A source code archive 27 | - A standard build (*pithos-VERSION-standalone.jar*) 28 | - A quickstart build which embeds cassandra (*pithos-quickstart-VERSION-standalone.jar*) 29 | 30 | 31 | Requirements 32 | ------------ 33 | 34 | Runtime requirements 35 | ~~~~~~~~~~~~~~~~~~~~ 36 | 37 | Runtime requirements for pithos are kept to a minimum 38 | 39 | - Java 7 Runtime (Sun JDK recommended) 40 | - Apache Cassandra 2.1 (for standard distribution) 41 | 42 | Build requirements 43 | ~~~~~~~~~~~~~~~~~~ 44 | 45 | If you wish to build pithos you will additionally need the 46 | `leiningen`_ build tool to produce working artifacts. 47 | 48 | .. _leiningen: https://leiningen.org 49 | 50 | Minimal configuration 51 | --------------------- 52 | 53 | Pithos is configured with a single configuration file, formatted in YAML_. 54 | 55 | 56 | .. _YAML: http://yaml.org 57 | 58 | .. sourcecode:: yaml 59 | 60 | # 61 | ## pithos main configuration 62 | ## ========================= 63 | # 64 | # This file contains the following sections 65 | # - service 66 | # - logging 67 | # - options 68 | # - keystore 69 | # - bucketstore 70 | # - regions 71 | 72 | 73 | ## service configuration 74 | ## --------------------- 75 | # 76 | # indicates 77 | service: 78 | host: '127.0.0.1' 79 | port: 8080 80 | 81 | 82 | ## logging configuration 83 | ## --------------------- 84 | logging: 85 | level: info 86 | console: true 87 | files: 88 | - "/tmp/pithos.log" 89 | # overrides: 90 | # io.exo.pithos: debug 91 | 92 | 93 | ## global options 94 | ## -------------- 95 | options: 96 | service-uri: 's3.example.com' 97 | reporting: true 98 | server-side-encryption: true 99 | multipart-upload: true 100 | masterkey-provisioning: true 101 | masterkey-access: true 102 | default-region: 'CH-GV1' 103 | 104 | 105 | ## keystore configuration 106 | ## ---------------------- 107 | # 108 | # Keystores associate an access key with 109 | # an organization and secret key. 110 | # 111 | # They may offer provisioning capacities with the 112 | # masterkey. The default provider relies on keys 113 | # being defined inline. 114 | keystore: 115 | keys: 116 | AKIAIOSFODNN7EXAMPLE: 117 | # The master key allows provisinning operations 118 | # when the masterkey-provisioning feature is 119 | # set to true and will allow access to all 120 | # buckets when masterkey-access is set to true 121 | master: true 122 | tenant: 'pyr@spootnik.org' 123 | secret: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' 124 | BKIAIOSFODNN7EXAMPLE: 125 | tenant: 'exoscale' 126 | secret: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' 127 | 128 | 129 | ## bucketstore configuration 130 | ## ------------------------- 131 | # 132 | # The bucketstore is ring global and contains information 133 | # on bucket location and global parameters. 134 | # 135 | # Its primary aim is to hold bucket location and ownership 136 | # information. 137 | # 138 | # The default provider relies on cassandra. 139 | bucketstore: 140 | default-region: 'CH-GV1' 141 | cluster: 'localhost' 142 | keyspace: 'storage' 143 | 144 | 145 | ## regions 146 | ## ------- 147 | # 148 | # Regions are composed of a metastore and an arbitrary number 149 | # of named storage classes which depend on a blobstore. 150 | # 151 | # The metastore holds metadata for the full region, as well as 152 | # object storage-class placement information. 153 | # 154 | # The default implementation of both metastore and blobstore 155 | # rely on cassandra. 156 | # 157 | regions: 158 | CH-GV1: 159 | metastore: 160 | cluster: 'localhost' 161 | keyspace: 'storage' 162 | storage-classes: 163 | standard: 164 | cluster: 'localhost' 165 | keyspace: 'storage' 166 | max-chunk: '128k' 167 | max-block-chunks: 1024 168 | 169 | 170 | Running pithos 171 | -------------- 172 | 173 | Command line arguments 174 | ~~~~~~~~~~~~~~~~~~~~~~ 175 | 176 | Pithos accepts the following arguments:: 177 | 178 | Switches Default Desc 179 | -------- ------- ---- 180 | -h, --no-help, --help false Show Help 181 | -f, --path Configuration file path 182 | -q, --no-quiet, --quiet false Never output to stdout 183 | -a, --action api-run Specify an action (api-run, install-schema) 184 | 185 | The only non-standard option is the `-a` option which allows either starting 186 | the service normally or converging a cassandra schema. 187 | 188 | Running the standalone version 189 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 190 | The standalone version can just be run against a configuration file:: 191 | 192 | java -jar pithos-quickstart-VERSION-standalone.jar -f pithos.yaml 193 | 194 | Running against an existing cluster 195 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 196 | 197 | The first time you run a standard pithos distribution, you will need 198 | to converge the necessary cassandra schema:: 199 | 200 | java -jar pithos-VERSION-standalone.jar -f pithos.yaml -a install-schema 201 | 202 | This will create the necessary keyspaces and column families in cassandra. 203 | Once finished, pithos can be started normally:: 204 | 205 | java -jar pithos-VERSION-standalone.jar -f pithos.yaml 206 | 207 | 208 | -------------------------------------------------------------------------------- /src/io/pithos/config.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.config 2 | "This namespace parses YAML data into clojure forms which 3 | are then augmented with a mechanism initialy described here: 4 | 5 | http://bit.ly/1xRgOLb 6 | 7 | Default implementation for protocols are provided but can be overriden 8 | with the `use` keyword. 9 | " 10 | (:require [clj-yaml.core :refer [parse-string]] 11 | [clojure.tools.logging :refer [error info debug]] 12 | [io.pithos.util :refer [to-bytes]] 13 | [unilog.config :refer [start-logging!]] 14 | [raven.client :refer [capture!]] 15 | [net.http.client :refer [build-client]])) 16 | 17 | 18 | (start-logging! 19 | {:pattern "%p [%d] %t - %c - %m%n" 20 | :external false 21 | :console true 22 | :files [] 23 | :level "info" 24 | :overrides {}}) 25 | 26 | (def default-logging 27 | "Logging can be bypassed if a logback configuration is provided 28 | to the underlying JVM" 29 | {:use "org.spootnik.logconfig/start-logging!" 30 | :pattern "%p [%d] %t - %c - %m%n" 31 | :external false 32 | :console true 33 | :files [] 34 | :level "info" 35 | :overrides {:io.pithos "debug"}}) 36 | 37 | (def default-keystore 38 | "keystore defaults to MapKeyStore" 39 | {:use "io.pithos.keystore/map-keystore"}) 40 | 41 | (def default-bucketstore 42 | "bucketstore defaults to cassandra" 43 | {:use "io.pithos.bucket/cassandra-bucket-store"}) 44 | 45 | (def default-metastore 46 | "metastore defaults to cassandra" 47 | {:use "io.pithos.meta/cassandra-meta-store"}) 48 | 49 | (def default-blobstore 50 | "blobstore defaults to cassandra, a max chunk of 512k 51 | and no more than 2048 chunks per block" 52 | {:use "io.pithos.blob/cassandra-blob-store" 53 | :max-chunk "512k" 54 | :max-block-chunks 2048}) 55 | 56 | (def default-reporter 57 | "reporters default to logging" 58 | {:use "io.pithos.reporter/logging-reporter"}) 59 | 60 | (def default-service 61 | "The http service is exposed on localhost port 8080 by default" 62 | {:host "127.0.0.1" 63 | :port 8080}) 64 | 65 | (def default-options 66 | "Some default global options." 67 | {:service-uri "s3.amazonaws.com" 68 | :reporting true 69 | :server-side-encryption true 70 | :multipart-upload true 71 | :masterkey-provisioning true 72 | :masterkey-access true}) 73 | 74 | (defn find-ns-var 75 | "Go fetch a var in a namespace. Extracts the namespace and requires it, 76 | then finds the var" 77 | [s] 78 | (try 79 | (let [n (namespace (symbol s))] 80 | (require (symbol n)) 81 | (find-var (symbol s))) 82 | (catch Exception _ 83 | nil))) 84 | 85 | (defn instantiate 86 | "Find a symbol pointing to a function of a single argument and 87 | call it" 88 | [class config] 89 | (if-let [f (find-ns-var class)] 90 | (f config) 91 | (throw (ex-info (str "no such namespace: " class) {})))) 92 | 93 | (defn get-instance 94 | "Create instance by supplying config to the implementation specified 95 | in `use`" 96 | [{:keys [use] :as config} target] 97 | (debug "building " target " with " use) 98 | (instantiate (-> use name symbol) config)) 99 | 100 | (defn load-path 101 | "Try to find a pathname, on the command line, in 102 | system properties or the environment and load it." 103 | [path] 104 | (-> (or path 105 | (System/getProperty "pithos.configuration") 106 | (System/getenv "PITHOS_CONFIGURATION") 107 | "/etc/pithos/pithos.yaml") 108 | slurp 109 | parse-string)) 110 | 111 | (defn get-storage-classes 112 | "Create instances of blobstores for all storage classes (in a region)" 113 | [storage-classes] 114 | (->> (for [[storage-class blobstore] storage-classes 115 | :let [blobstore (-> (merge default-blobstore blobstore) 116 | (update-in [:max-chunk] to-bytes :max-chunk))]] 117 | [storage-class (get-instance blobstore :blobstore)]) 118 | (reduce merge {}))) 119 | 120 | (defn get-region-stores 121 | "Create instances for each region's metastore then create storage classes" 122 | [regions] 123 | (->> (for [[region {:keys [metastore storage-classes]}] regions 124 | :let [metastore (merge default-metastore metastore)]] 125 | [(name region) 126 | {:metastore (get-instance metastore :metastore) 127 | :storage-classes (get-storage-classes storage-classes)}]) 128 | (reduce merge {}))) 129 | 130 | (defn get-reporters 131 | [reporters] 132 | (for [reporter reporters 133 | :let [reporter (merge default-reporter reporter)]] 134 | (get-instance reporter :reporter))) 135 | 136 | (defn get-sentry 137 | [sentry] 138 | (if sentry 139 | (let [client (build-client (:http sentry))] 140 | (fn [ev] 141 | (capture! client (:dsn sentry) ev))) 142 | (fn [& _] 143 | (debug "no sentry configuration, no capture done.")))) 144 | 145 | (defn parse-cors 146 | [rules] 147 | (let [->sym (fn [s] (-> s name .toLowerCase keyword)) 148 | sanitize (fn [{:keys [methods] :as rule}] 149 | (assoc rule :methods (map ->sym methods)))] 150 | (mapv sanitize rules))) 151 | 152 | (defn init 153 | "Parse YAML file, merge in defaults and then create instances 154 | where applicable" 155 | [path quiet?] 156 | (try 157 | (when-not quiet? 158 | (println "starting with configuration: " path)) 159 | (let [opts (load-path path)] 160 | (info "setting up logging according to config") 161 | (start-logging! (merge default-logging (:logging opts))) 162 | (-> opts 163 | (update-in [:service] (partial merge default-service)) 164 | (update-in [:options] (partial merge default-options)) 165 | (update-in [:options :default-cors] parse-cors) 166 | (update-in [:keystore] (partial merge default-keystore)) 167 | (update-in [:keystore] get-instance :keystore) 168 | (update-in [:bucketstore] (partial merge default-bucketstore)) 169 | (update-in [:bucketstore] get-instance :bucketstore) 170 | (update-in [:reporters] get-reporters) 171 | (update-in [:sentry] get-sentry) 172 | (update-in [:regions] get-region-stores))) 173 | (catch Exception e 174 | (when-not quiet? 175 | (println "invalid or incomplete configuration: " (str e))) 176 | (error e "invalid or incomplete configuration") 177 | (System/exit 1)))) 178 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pithos.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pithos.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pithos" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pithos" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /tasks/leiningen/fatrpm.clj: -------------------------------------------------------------------------------- 1 | (ns leiningen.fatrpm 2 | "Build a .rpm package from leiningen, stolen from riemann." 3 | (:refer-clojure :exclude [replace]) 4 | (:use [clojure.java.shell :only [sh]] 5 | [clojure.java.io :only [file delete-file writer copy]] 6 | [clojure.string :only [join capitalize trim-newline replace]] 7 | [leiningen.uberjar :only [uberjar]] 8 | [leiningen.tar :only [md5]]) 9 | (:import java.util.Date 10 | java.text.SimpleDateFormat 11 | (org.codehaus.mojo.rpm RPMMojo 12 | AbstractRPMMojo 13 | Mapping Source 14 | SoftlinkSource 15 | Scriptlet) 16 | (org.apache.maven.project MavenProject) 17 | (org.apache.maven.shared.filtering DefaultMavenFileFilter) 18 | (org.codehaus.plexus.logging.console ConsoleLogger))) 19 | 20 | (defn write 21 | "Write string to file, plus newline" 22 | [file string] 23 | (with-open [w (writer file)] 24 | (.write w (str (trim-newline string) "\n")))) 25 | 26 | (defn workarea 27 | [project] 28 | (file (:root project) "target" "rpm")) 29 | 30 | (defn cleanup 31 | [project] 32 | (sh "rm" "-rf" (str (workarea project)))) 33 | 34 | (defn reset 35 | [project] 36 | (cleanup project) 37 | (sh "rm" (str (:root project) "/target/*.rpm"))) 38 | 39 | (defn get-version 40 | [project] 41 | (let [df (SimpleDateFormat. ".yyyyMMdd.HHmmss")] 42 | (replace (:version project) #"-SNAPSHOT" (.format df (Date.))))) 43 | 44 | (defn set-mojo! 45 | "Set a field on an AbstractRPMMojo object." 46 | [object name value] 47 | (let [field (.getDeclaredField AbstractRPMMojo name)] 48 | (.setAccessible field true) 49 | (.set field object value)) 50 | object) 51 | 52 | (defn array-list 53 | [list] 54 | (let [list (java.util.ArrayList.)] 55 | (doseq [item list] (.add list item)) 56 | list)) 57 | 58 | (defn scriptlet 59 | "Creates a scriptlet backed by a file" 60 | [filename] 61 | (doto (Scriptlet.) 62 | (.setScriptFile (file filename)))) 63 | 64 | (defn source 65 | "Create a source with a local location and a destination." 66 | ([] (Source.)) 67 | ([location] 68 | (doto (Source.) 69 | (.setLocation (str location)))) 70 | ([location destination] 71 | (doto (Source.) 72 | (.setLocation (str location)) 73 | (.setDestination (str destination))))) 74 | 75 | (defn mapping 76 | [m] 77 | (doto (Mapping.) 78 | (.setArtifact (:artifact m)) 79 | (.setConfiguration (case (:configuration m) 80 | true "true" 81 | false "false" 82 | nil "false" 83 | (:configuration m))) 84 | (.setDependency (:dependency m)) 85 | (.setDirectory (:directory m)) 86 | (.setDirectoryIncluded (boolean (:directory-included? m))) 87 | (.setDocumentation (boolean (:documentation? m))) 88 | (.setFilemode (:filemode m)) 89 | (.setGroupname (:groupname m)) 90 | (.setRecurseDirectories (boolean (:recurse-directories? m))) 91 | (.setSources (:sources m)) 92 | (.setUsername (:username m)))) 93 | 94 | (defn mappings 95 | [project] 96 | (map (comp mapping 97 | (partial merge {:username "pithos" 98 | :groupname "pithos"})) 99 | 100 | [; Jar 101 | {:directory "/usr/lib/pithos/" 102 | :filemode "644" 103 | :sources [(source (str (file (:root project) 104 | "target" 105 | (str "pithos-" 106 | (:version project) 107 | "-standalone.jar"))) 108 | "pithos.jar")]} 109 | 110 | ; Binary 111 | {:directory "/usr/bin" 112 | :filemode "755" 113 | :sources [(source (file (:root project) "pkg" "rpm" "pithos") 114 | "pithos")]} 115 | 116 | ; Log dir 117 | {:directory "/var/log/pithos" 118 | :filemode "755" 119 | :directory-included? true} 120 | 121 | ; Config dir 122 | {:directory "/etc/pithos" 123 | :filemode "755" 124 | :directory-included? true 125 | :sources [(source (file (:root project) "doc" "pithos.yaml") 126 | "pithos.yaml")]} 127 | 128 | ; Default file 129 | {:directory "/etc/sysconfig" 130 | :filemode "644" 131 | :configuration true 132 | :sources [(source (file (:root project) "pkg" "rpm" "pithos-default") 133 | "pithos-default")]} 134 | 135 | ; Init script 136 | {:directory "/etc/init.d" 137 | :filemode "755" 138 | :username "root" 139 | :groupname "root" 140 | :sources [(source (file (:root project) "pkg" "rpm" "init.sh") 141 | "pithos")]}])) 142 | 143 | (defn blank-rpm 144 | "Create a new RPM file" 145 | [] 146 | (let [mojo (RPMMojo.) 147 | fileFilter (DefaultMavenFileFilter.)] 148 | (set-mojo! mojo "project" (MavenProject.)) 149 | (.enableLogging fileFilter (ConsoleLogger. 0 "Logger")) 150 | (set-mojo! mojo "mavenFileFilter" fileFilter))) 151 | 152 | (defn create-dependency 153 | [rs] 154 | (let [hs (java.util.LinkedHashSet.)] 155 | (doseq [r rs] (.add hs r)) 156 | hs)) 157 | 158 | (defn make-rpm 159 | "Create and execute a Mojo RPM." 160 | [project] 161 | (doto (blank-rpm) 162 | (set-mojo! "projversion" (get-version project)) 163 | (set-mojo! "name" (:name project)) 164 | (set-mojo! "summary" (:description project)) 165 | (set-mojo! "copyright" "exoscale") 166 | (set-mojo! "workarea" (workarea project)) 167 | (set-mojo! "mappings" (mappings project)) 168 | (set-mojo! "preinstallScriptlet" (scriptlet 169 | (file (:root project) 170 | "pkg" "deb" "preinst.sh"))) 171 | (set-mojo! "postinstallScriptlet" (scriptlet 172 | (file (:root project) 173 | "pkg" "rpm" "postinst.sh"))) 174 | (set-mojo! "preremoveScriptlet" (scriptlet 175 | (file (:root project) 176 | "pkg" "rpm" "prerm.sh"))) 177 | (set-mojo! "postremoveScriptlet" (scriptlet 178 | (file (:root project) 179 | "pkg" "deb" "postrm.sh"))) 180 | (set-mojo! "requires" (create-dependency ["daemonize >= 1.7.3"])) 181 | (.execute))) 182 | 183 | (defn extract-rpm 184 | "Snags the RPM file out of its little mouse-hole and brings it up to target/, 185 | then generates an md5" 186 | [project] 187 | (let [dir (file (workarea project) 188 | (:name project) 189 | "RPMS" 190 | "noarch") 191 | rpms (remove #(.isDirectory %) (.listFiles dir))] 192 | (doseq [rpm rpms] 193 | (let [dest (file (:root project) "target" (.getName rpm))] 194 | ; Move 195 | (.renameTo rpm dest) 196 | 197 | ; MD5 198 | (write (str dest ".md5") 199 | (str (md5 dest) " " (.getName rpm))))))) 200 | 201 | (defn fatrpm 202 | ([project] (fatrpm project true)) 203 | ([project uberjar?] 204 | (reset project) 205 | (when uberjar? (uberjar project)) 206 | (make-rpm project) 207 | (extract-rpm project) 208 | (cleanup project))) 209 | -------------------------------------------------------------------------------- /src/io/pithos/blob.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.blob 2 | "Blobstore interaction. This is one of the four storage protocols. 3 | Storage protocols are split even though they mostly target cassandra 4 | because it allows: 5 | 6 | - Easy implementation of the protocol targetting different DBs 7 | - Splitting data in different keyspace with different replication props 8 | 9 | Implementations may be swapped in the configuration file, as described 10 | in the documentation for the `io.pithos.config' namespace. 11 | 12 | The Blobstore is the storage layer concerned with actually storing data. 13 | Its operations are purely commutative and never deal with filenames, since 14 | that responsibility lies within the _Metastore_ (see `io.pithos.meta`). 15 | 16 | The storage layout is rather simple: 17 | 18 | - Data is stored in inodes 19 | - An inode has a list of blocks 20 | - Blocks contain a list of chunks 21 | 22 | The maximum size of chunks in blocks and the payload size in chunks 23 | are configurable. This approach allows storage of large files spread 24 | accross many rows. 25 | 26 | To ensure metadata operations are decoupled from storage, the protocol 27 | relies on callbacks in a few places. 28 | 29 | " 30 | (:import java.util.UUID 31 | java.nio.ByteBuffer) 32 | (:require [clojure.java.io :as io] 33 | [io.pithos.store :as store] 34 | [io.pithos.desc :as d] 35 | [qbits.alia.uuid :as uuid] 36 | [qbits.alia :as a] 37 | [qbits.hayt :refer [select where columns order-by 38 | insert values limit delete count* 39 | create-table column-definitions]] 40 | [io.pithos.util :refer [md5-update md5-sum md5-init]] 41 | [clojure.tools.logging :refer [debug info error]])) 42 | 43 | ;; 44 | ;; A word on storage protocols 45 | ;; --------------------------- 46 | ;; 47 | ;; All storage protocols expose functions to produce side-effects 48 | ;; and a `converge!` function whose role is to apply the schema 49 | 50 | (def absolute-chunk-limit 51 | "max block per chunk can be exceeded when small chunks are uploaded. 52 | set a large limit of chunks to retrieve from a block." 53 | 524288) 54 | 55 | 56 | (defprotocol Blobstore 57 | "The blobstore protocol, provides methods to read and write data 58 | to inodes, as well as a schema migration function. 59 | " 60 | (converge! [this]) 61 | (delete! [this inode version]) 62 | (blocks [this od]) 63 | (max-chunk [this]) 64 | (chunks [this od block offset]) 65 | (start-block! [this od block]) 66 | (chunk! [this od block offset chunk]) 67 | (boundary? [this block offset])) 68 | 69 | ;; CQL Schema 70 | (def inode_blocks-table 71 | "List of blocks found in an inode, keyed by inode and version" 72 | (create-table 73 | :inode_blocks 74 | (column-definitions {:inode :uuid 75 | :version :timeuuid 76 | :block :bigint 77 | :primary-key [[:inode :version] :block]}))) 78 | 79 | (def block-table 80 | "A block is keyed by inode version and first offset in the block. 81 | 82 | Blocks contain a list of offset, chunksize and payload (a byte-buffer) 83 | which contain the actual data being stored. chunksize is set in the 84 | configuration." 85 | (create-table 86 | :block 87 | (column-definitions {:inode :uuid 88 | :version :timeuuid 89 | :block :bigint 90 | :offset :bigint 91 | :chunksize :int 92 | :payload :blob 93 | :primary-key [[:inode :version :block] :offset]}))) 94 | 95 | 96 | ;; 97 | ;; start declaring CQL queries 98 | 99 | (defn get-block-q 100 | "Fetch list of blocks in an inode." 101 | [inode version order] 102 | (select :inode_blocks 103 | (columns :block) 104 | (where [[= :inode inode] 105 | [= :version version]]) 106 | (order-by [:block order]))) 107 | 108 | (defn set-block-q 109 | "Add a block to an inode." 110 | [inode version block] 111 | (insert :inode_blocks 112 | (values {:inode inode :version version :block block}))) 113 | 114 | (defn get-chunk-q 115 | "Fetch a specific chunk in a block." 116 | [inode version block offset max] 117 | (select :block 118 | (where [[= :inode inode] 119 | [= :version version] 120 | [= :block block] 121 | [>= :offset offset]]) 122 | (limit max))) 123 | 124 | (defn set-chunk-q 125 | "Set a chunk in a block." 126 | [inode version block offset size chunk] 127 | (insert :block 128 | (values {:inode inode 129 | :version version 130 | :block block 131 | :offset offset 132 | :chunksize size 133 | :payload chunk}))) 134 | 135 | (defn delete-blockref-q 136 | "Remove all blocks in an inode." 137 | [inode version] 138 | (delete :inode_blocks (where [[= :inode inode] 139 | [= :version version]]))) 140 | 141 | (defn delete-block-q 142 | "Delete a specific inode block." 143 | [inode version block] 144 | (delete :block (where [[= :inode inode] 145 | [= :version version] 146 | [= :block block]]))) 147 | 148 | (defn cassandra-blob-store 149 | "cassandra-blob-store, given a maximum chunk size and maximum 150 | number of chunks per block and cluster configuration details, 151 | will create a cassandra session and reify a Blobstore instance 152 | " 153 | [{:keys [max-chunk max-block-chunks read-consistency write-consistency] 154 | :as config}] 155 | (let [copts (dissoc config :read-consistency :write-consistency) 156 | session (store/cassandra-store copts) 157 | rdcty (or (some-> read-consistency keyword) :quorum) 158 | wrcty (or (some-> write-consistency keyword) :quorum) 159 | read! (fn [query] (a/execute session query {:consistency rdcty})) 160 | write! (fn [query] (a/execute session query {:consistency wrcty})) 161 | bs (* max-chunk max-block-chunks) 162 | limit 100] 163 | (debug "got max-chunk " max-chunk "and max-block-chunks " max-block-chunks) 164 | (reify 165 | store/Convergeable 166 | (converge! [this] 167 | (write! inode_blocks-table) 168 | (write! block-table)) 169 | store/Crudable 170 | (delete! [this od version] 171 | (let [ino (if (= (class od) java.util.UUID) od (d/inode od))] 172 | (doseq [{block :block} (read! (get-block-q ino version :asc))] 173 | (write! (delete-block-q ino version block)) 174 | (write! (delete-blockref-q ino version))))) 175 | Blobstore 176 | (blocks [this od] 177 | (let [ino (d/inode od) 178 | ver (d/version od)] 179 | (read! (get-block-q ino ver :asc)))) 180 | 181 | (max-chunk [this] 182 | max-chunk) 183 | 184 | (chunks [this od block offset] 185 | (let [ino (d/inode od) 186 | ver (d/version od)] 187 | (seq (read! (get-chunk-q ino ver block offset 188 | absolute-chunk-limit))))) 189 | 190 | (boundary? [this block offset] 191 | (>= offset (+ block bs))) 192 | 193 | (start-block! [this od block] 194 | (write! (set-block-q (d/inode od) (d/version od) block))) 195 | 196 | (chunk! [this od block offset chunk] 197 | (let [size (- (.limit chunk) (.position chunk))] 198 | (write! (set-chunk-q (d/inode od) (d/version od) 199 | block offset size chunk)) 200 | size))))) 201 | -------------------------------------------------------------------------------- /doc/source/developer.rst: -------------------------------------------------------------------------------- 1 | Developer Guide 2 | =============== 3 | 4 | *pithos* is an open source project, available on github_: 5 | https://github.com/exoscale/pithos. 6 | 7 | *pithos* is developed in clojure_, a functional lisp which 8 | runs on the **JVM**. 9 | 10 | Building Pithos from source 11 | --------------------------- 12 | 13 | *pithos* is built with leiningen_, to build the 14 | standard version of *pithos* just run:: 15 | 16 | lein test 17 | lein compile :all 18 | lein uberjar 19 | 20 | If you wish to quickly test versions as you develop, 21 | you can run pithos directly from leiningen. You 22 | should place your test configuration file in 23 | the ``site/`` subdirectory:: 24 | 25 | lein run -- -f site/pithos.yaml 26 | 27 | Contributing to pithos 28 | ---------------------- 29 | 30 | Contributions to *pithos* are heavily encouraged. 31 | The best way to contribute is to work on a separate 32 | git branch, branching off of the master branch:: 33 | 34 | git pull origin/master 35 | git checkout -B feature/new-feature 36 | 37 | Once work is ready, use the github pull-request 38 | mechanism for a code review to happen. 39 | 40 | .. _Custom Stores: 41 | 42 | Creating alternative store implementations 43 | ------------------------------------------ 44 | 45 | While pithos primarly targets Apache Cassandra, 46 | nothing prevents alternative implementation to be 47 | created for all parts of the service. 48 | 49 | *pithos*, through a simple dependency injection 50 | mechanism allows custom implementations of stores 51 | to be plugged. 52 | 53 | In clojure parlance, the only requirement an implementation 54 | must fulfill is to realize the correct protocol. 55 | 56 | Here is a summary of all current protocols: 57 | 58 | Convergeable 59 | ~~~~~~~~~~~~ 60 | 61 | The convergeable protocol is used to create the 62 | initial schema for databases that need it. It 63 | consists of a single method: 64 | 65 | .. sourcecode:: clojure 66 | 67 | (defprotocol Convergeable 68 | (converge! [this])) 69 | 70 | This method is called on blobstores, metastores and bucketstores 71 | during the ``install-schema`` phase. 72 | 73 | Crudable 74 | ~~~~~~~~ 75 | 76 | The metastore, blobstore and bucketstores share a few functions 77 | which are gathered in this protocol: 78 | 79 | ``fetch`` 80 | Retrieve metadata from buckets or objects (unused in blobstores) 81 | 82 | ``update!`` 83 | Updates an object's or bucket's metadata (unused in blobstores) 84 | 85 | ``create!`` 86 | Insert a bucket (unused in metastores and blobstores) 87 | 88 | ``delete!`` 89 | Delete an object, bucket or blob 90 | 91 | .. sourcecode:: clojure 92 | 93 | (defprotocol Crudable 94 | (fetch [this k] [this k1 k2] [this k1 k2 k3]) 95 | (update! [this k v] [this k1 k2 v] [this k1 k2 k3 v]) 96 | (delete! [this k] [this k1 k2] [this k1 k2 k3]) 97 | (create! [this k v] [this k1 k2 v] [this k1 k2 k3 v])) 98 | 99 | 100 | clojure.lang.ILookup 101 | ~~~~~~~~~~~~~~~~~~~~ 102 | While not a *pithos* protocol per-se, this protocol 103 | is used by keystores to behave like standard clojure 104 | maps. The method used within ``ILookup`` is ``valAt``, 105 | the expected output is a map containing the keys: 106 | 107 | - ``tenant``: the tenant this key belongs to 108 | - ``secret``: the associated secret key 109 | - ``memberof``: (*optional*) groups this tenant belongs to 110 | 111 | Bucketstore 112 | ~~~~~~~~~~~ 113 | 114 | The bucketstore exposes methods to handle buckets: 115 | 116 | ``by-tenant`` 117 | Retrieves a list of bucket by tenant 118 | 119 | ``by-name`` 120 | Retrieves a bucket by name 121 | 122 | .. sourcecode:: clojure 123 | 124 | (defprotocol Bucketstore 125 | "The bucketstore contains the schema migration function, 126 | two bucket lookup functions and CRUD signatures" 127 | (by-tenant [this tenant]) 128 | (by-name [this bucket])) 129 | 130 | 131 | Metastore 132 | ~~~~~~~~~ 133 | 134 | The metastore exposes methods to handle bucket metadata: 135 | 136 | ``prefixes`` 137 | Lists objects 138 | 139 | ``abort-multipart-upload!`` 140 | Aborts a multipart upload 141 | 142 | ``update-part!`` 143 | Updates a multipart upload's part metadata 144 | 145 | ``initiate-upload!`` 146 | Stores metadata for a multipart upload 147 | 148 | ``get-upload-details`` 149 | Retrieves metadata on an ongoing upload 150 | 151 | ``list-uploads`` 152 | Lists all uploads for a bucket 153 | 154 | ``list-object-uploads`` 155 | Lists all uploads for an object 156 | 157 | ``list-upload-parts`` 158 | Lists registered upload parts for an upload. 159 | 160 | .. sourcecode:: clojure 161 | 162 | (defprotocol Metastore 163 | "All necessary functions to manipulate bucket metadata" 164 | (prefixes [this bucket params]) 165 | (abort-multipart-upload! [this bucket object upload]) 166 | (update-part! [this bucket object upload partno columns]) 167 | (initiate-upload! [this bucket object upload metadata]) 168 | (get-upload-details [this bucket object upload]) 169 | (list-uploads [this bucket prefix]) 170 | (list-object-uploads [this bucket object]) 171 | (list-upload-parts [this bucket object upload])) 172 | 173 | 174 | Blobstore 175 | ~~~~~~~~~ 176 | 177 | The blobstore expores methods to store and retrieve data: 178 | 179 | ``blocks`` 180 | Retrieves blocks from an object descriptor 181 | 182 | ``max-chunk`` 183 | Maximum chunk-size for this blobstore 184 | 185 | ``chunks`` 186 | Retrieve chunks from a starting offset 187 | 188 | ``start-block!`` 189 | Mark the start of a block 190 | 191 | ``chunk!`` 192 | Store a chunk 193 | 194 | ``boundary?`` 195 | Check if a block boundary has been reached 196 | 197 | .. sourcecode:: clojure 198 | 199 | (defprotocol Blobstore 200 | "The blobstore protocol, provides methods to read and write data 201 | to inodes, as well as a schema migration function. 202 | " 203 | (blocks [this od]) 204 | (max-chunk [this]) 205 | (chunks [this od block offset]) 206 | (start-block! [this od block offset]) 207 | (chunk! [this od block offset chunk]) 208 | (boundary? [this block offset])) 209 | 210 | Reporter 211 | ~~~~~~~~ 212 | 213 | The reporter protocol exposes a single method used to register 214 | an event. 215 | 216 | ``report!`` 217 | This method hands off an event to the current reporter. 218 | 219 | 220 | .. sourcecode:: clojure 221 | 222 | (defprotocol Reporter 223 | (report! [this event])) 224 | 225 | 226 | An alternative keystore 227 | ~~~~~~~~~~~~~~~~~~~~~~~ 228 | 229 | The simplest example would be to create an alternative keystore. 230 | Let's pretend a simple, non-authenticated API is used to provide 231 | credential results. 232 | 233 | .. sourcecode:: clojure 234 | 235 | (ns com.example.http-keystore 236 | (:require [qbits.jet.client.http :as http] 237 | [clojure.core.async :refer [ meta 110 | (merge {:inode inode 111 | :version version 112 | :atime ts}) 113 | (merge @cols) 114 | (dissoc :bucket :object))] 115 | (when-not (and (:inode meta) 116 | (:version meta) 117 | (:size meta) 118 | (:checksum meta)) 119 | (error "trying to write incomplete metadata" 120 | (pr-str meta)) 121 | (throw (ex-info "bad metadata" {:type :incomplete-metadata 122 | :status-code 500 123 | :meta (pr-str meta)}))) 124 | (store/update! metastore bucket object meta) 125 | (swap! cols assoc :atime ts))) 126 | clojure.lang.ILookup 127 | (valAt [this k] 128 | (get (merge meta {:tenant tenant :inode inode :version version} @cols) 129 | k)) 130 | (valAt [this k def] 131 | (get (merge meta {:tenant tenant :inode inode :version version} @cols) 132 | k 133 | def))))) 134 | 135 | 136 | (defn part-descriptor 137 | [system bucket object upload-id partnumber] 138 | (let [bucketstore (system/bucketstore system) 139 | regions (system/regions system) 140 | {:keys [region]} (bucket/by-name bucketstore bucket) 141 | {:keys [metastore 142 | storage-classes]} (bucket/get-region system region) 143 | meta (store/fetch metastore bucket object false) 144 | inode (uuid/random) 145 | version (uuid/time-based) 146 | ;; XXX: should support several storage classes 147 | blobstore (get storage-classes :standard) 148 | cols (atom {}) 149 | part (Long/parseLong partnumber)] 150 | (or 151 | (meta/get-upload-details metastore bucket object upload-id) 152 | (throw (ex-info "no such upload" {:type :no-such-upload 153 | :status-code 404 154 | :key object 155 | :upload upload-id}))) 156 | (reify 157 | bucket/BucketDescriptor 158 | (region [this] (get regions region)) 159 | bucket/RegionDescriptor 160 | (metastore [this] metastore) 161 | BlobDescriptor 162 | (size [this] (or (:size @cols) (:size meta))) 163 | (checksum [this] (or (:checksum @cols) (:checksum meta))) 164 | (inode [this] (or (:inode @cols) inode)) 165 | (version [this] (or (:version @cols) version)) 166 | (blobstore [this] blobstore) 167 | ObjectDescriptor 168 | (col! [this field val] 169 | (if (#{:size :checksum :inode :version} field) 170 | (swap! cols assoc (keyword field) val) 171 | (swap! cols assoc-in [:metadata (name field)] val))) 172 | (save! [this] 173 | (let [meta (-> {:inode inode :version version} 174 | (merge @cols) 175 | (merge {:modified (util/iso8601-timestamp)}))] 176 | (meta/update-part! metastore bucket object upload-id part meta)))))) 177 | -------------------------------------------------------------------------------- /src/io/pithos/perms.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.perms 2 | (:require [io.pithos.bucket :as bucket] 3 | [io.pithos.system :as system] 4 | [io.pithos.desc :as desc] 5 | [io.pithos.acl :as acl] 6 | [clojure.string :refer [split]] 7 | [clojure.tools.logging :refer [debug]])) 8 | 9 | (defn granted-for? 10 | "Do current permission allow for operation on this particular perm ?" 11 | [acl for needs] 12 | (loop [[{:keys [URI DisplayName ID] :as id} & ids] (get acl needs)] 13 | (when id 14 | (or (= URI for) (= ID for) (recur ids))))) 15 | 16 | (defn granted? 17 | "Do current permissions allow for operation ?" 18 | [acl needs for] 19 | (some identity (map (partial granted-for? acl for) needs))) 20 | 21 | (defn bucket-satisfies? 22 | "Ensure sufficient rights for bucket access" 23 | [{:keys [tenant acl]} {:keys [for groups needs]}] 24 | (let [needs [:FULL_CONTROL needs] 25 | acl (if acl (read-string acl))] 26 | (or (= tenant for) 27 | (granted? acl needs for) 28 | (some identity (map (partial granted? acl needs) groups))))) 29 | 30 | (defn object-satisfies? 31 | "Ensure sufficient rights for object accessp" 32 | [{tenant :tenant} {acl :acl} {:keys [for groups needs]}] 33 | (let [needs [:FULL_CONTROL needs] 34 | acl (if acl (read-string acl))] 35 | (or (= tenant for) 36 | (granted? acl needs for) 37 | (some identity (map (partial granted? acl needs) groups))))) 38 | 39 | (defn authorize 40 | "Check permission to service operation, each operation has a list 41 | of needed permissions, any failure results in an exception being raised 42 | which prevents any further action from being taken." 43 | [{:keys [authorization bucket object]} perms system] 44 | (let [{:keys [tenant memberof]} authorization 45 | memberof? (set memberof) 46 | bucketstore (system/bucketstore system)] 47 | (doseq [[perm arg] (map (comp flatten vector) perms)] 48 | (case perm 49 | :authenticated (when-not (not= tenant :anonymous) 50 | (debug "unauthenticated request to private resource") 51 | (throw (ex-info "access denied" {:status-code 403 52 | :type :access-denied}))) 53 | :memberof (when-not (memberof? arg) 54 | (debug "not a member of: " arg "groups:" (pr-str memberof?)) 55 | (throw (ex-info "access denied" {:status-code 403 56 | :type :access-denied}))) 57 | :bucket (let [bd (bucket/by-name bucketstore bucket)] 58 | (when-not bd 59 | (throw (ex-info "bucket not found" 60 | {:type :no-such-bucket 61 | :status-code 404 62 | :bucket bucket}))) 63 | (when-not (bucket-satisfies? bd {:for tenant 64 | :groups memberof? 65 | :needs arg}) 66 | (debug "unsatisfied ACL for bucket. candidate:" (pr-str tenant) 67 | "groups:" (pr-str memberof?) 68 | "needs:" arg 69 | "acl:" (:acl bd) 70 | "bucket-owner:" (:tenant bd)) 71 | (throw (ex-info "access denied" {:status-code 403 72 | :type :access-denied})))) 73 | :object (when-not (object-satisfies? 74 | (bucket/by-name bucketstore bucket) 75 | (desc/object-descriptor system bucket object) 76 | {:for tenant 77 | :groups memberof? 78 | :needs arg}) 79 | (debug "unsatisfied ACL for object. candidate:" (pr-str tenant) 80 | "groups:" (pr-str memberof?) 81 | "needs:" arg) 82 | (throw (ex-info "access denied" {:status-code 403 83 | :type :access-denied}))))) 84 | true)) 85 | 86 | (defn ->grantee 87 | [str] 88 | (debug "translating: " str) 89 | (let [[_ type dest] (or (re-find #"(emailAddress|id|uri)=\"(.*)\"" str) 90 | (re-find #"(emailAddress|id|uri)=(.*)" str) 91 | (throw (ex-info "Invalid Argument" 92 | {:type :invalid-argument 93 | :status-code 400 94 | :arg "x-amz-acl-*" 95 | :val str})))] 96 | (cond (#{"id" "emailAddress"} type) {:ID dest :DisplayName dest} 97 | :else {:URI (or (acl/known-uris dest) 98 | dest)}))) 99 | 100 | (defn has-header-acl? 101 | [headers] 102 | (or (get headers "x-amz-acl") 103 | (get headers "x-amz-grant-read") 104 | (get headers "x-amz-grant-read-acp") 105 | (get headers "x-amz-grant-write") 106 | (get headers "x-amz-grant-write-acp") 107 | (get headers "x-amz-grant-full-control"))) 108 | 109 | (defn header-acl 110 | [owner tenant headers] 111 | (let [init (if (= owner tenant) 112 | {:FULL_CONTROL [{:ID owner :DisplayName owner}]} 113 | {:FULL_CONTROL [{:ID owner :DisplayName owner} 114 | {:ID tenant :DisplayName tenant}]}) 115 | canned-acl (get headers "x-amz-acl") 116 | acl-read (some-> (get headers "x-amz-grant-read") 117 | (split #",")) 118 | acl-write (some-> (get headers "x-amz-grant-write") 119 | (split #",")) 120 | acl-read-acp (some-> (get headers "x-amz-grant-read-acp") 121 | (split #",")) 122 | acl-write-acp (some-> (get headers "x-amz-grant-write-acp") 123 | (split #",")) 124 | acl-full-ctl (some-> (get headers "x-amz-grant-full-control") 125 | (split #",")) 126 | explicit-acl {:READ (mapv ->grantee acl-read) 127 | :READ_ACP (mapv ->grantee acl-read-acp) 128 | :WRITE (mapv ->grantee acl-write) 129 | :WRITE_ACP (mapv ->grantee acl-write-acp) 130 | :FULL_CONTROL (mapv ->grantee acl-full-ctl)}] 131 | (pr-str 132 | (cond 133 | 134 | canned-acl 135 | (case canned-acl 136 | "public-read-write" 137 | (merge init {:READ [{:URI "anonymous"}] 138 | :WRITE [{:URI "anonymous"}]}) 139 | 140 | "public-read" 141 | (merge init {:READ [{:URI "anonymous"}]}) 142 | 143 | "authenticated-read" 144 | (merge init {:READ [{:URI "authenticated"}]}) 145 | 146 | "log-delivery-write" init 147 | 148 | "bucket-owner-read" 149 | (merge init {:READ [{:DisplayName owner 150 | :ID owner}]}) 151 | 152 | "bucket-owner-full-control" 153 | init 154 | 155 | "private" 156 | (-> init 157 | (update-in [:FULL_CONTROL] conj {:ID tenant :DisplayName tenant}) 158 | (update-in [:FULL_CONTROL] vec)) 159 | 160 | nil 161 | init 162 | 163 | (throw (ex-info "Invalid Argument" 164 | {:arg "x-amz-acl" 165 | :val canned-acl 166 | :status-code 400 167 | :type :invalid-argument}))) 168 | 169 | (some seq [acl-read acl-write 170 | acl-read-acp acl-write-acp 171 | acl-full-ctl]) 172 | (-> explicit-acl 173 | (update-in [:FULL_CONTROL] conj {:ID tenant 174 | :DisplayName tenant}) 175 | (update-in [:FULL_CONTROL] vec)) 176 | 177 | 178 | :else 179 | init)))) 180 | -------------------------------------------------------------------------------- /src/io/pithos/sig4.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.sig4 2 | (:require [clojure.string :as str] 3 | [clojure.tools.logging :refer [info debug]] 4 | [clj-time.core :as time] 5 | [clj-time.format :as format] 6 | [ring.util.codec :as codec]) 7 | (:import [javax.crypto Mac] 8 | [javax.crypto.spec SecretKeySpec] 9 | [java.security MessageDigest])) 10 | 11 | 12 | (defn parse-authorization [request] 13 | """ 14 | Parse an AWS SIG4 authorization header.. e.g. 15 | 16 | AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20170805/us-east-1/s3/aws4_request,SignedHeaders=host;x-amz-content-sha256;x-amz-date,Signature=fadf01d63c3c6e4c8238625fc971eddf7a1b2d0470750a21ae8f33c03b4bbdb7 17 | 18 | TYPEKEY=VALUE/VALUE/VALUE,KEY=VALUE;VALUE;VALUE,KEY=VALUE 19 | """ 20 | (let [ 21 | authorization-header (get (get request :headers) "authorization") 22 | authorization (zipmap [:access-key :date :region :service :signed-headers :signature] 23 | (rest (re-find #"AWS4-HMAC-SHA256 Credential=(\w+)\/(\d{8})\/([\w\d-]+)\/([\w\d]+)\/aws4_request,[ ]*SignedHeaders=([\w-;]+),[ ]*Signature=(\w+)" authorization-header)) 24 | )] 25 | (assoc authorization :signed-headers (str/split (get authorization :signed-headers) #";")) 26 | )) 27 | 28 | (defn sha256 [input] 29 | (let [hash (MessageDigest/getInstance "SHA-256")] 30 | (. hash digest input))) 31 | 32 | (defn secretKeyInst [key mac] 33 | (SecretKeySpec. key (.getAlgorithm mac))) 34 | 35 | (defn hmac-sha256 [key string] 36 | "Returns the signature of a string with a given key, using a SHA-256 HMAC." 37 | (let [mac (Mac/getInstance "HMACSHA256") 38 | secretKey (secretKeyInst key mac)] 39 | (-> (doto mac 40 | (.init secretKey) 41 | (.update (.getBytes string))) 42 | .doFinal))) 43 | 44 | (defn hex [input] 45 | """ Format bytes as a hex string """ 46 | (apply str (map #(format "%02x" %) input))) 47 | 48 | (defn bytes [input] 49 | """ Format a string as bytes """ 50 | (. input getBytes)) 51 | 52 | (defn request-time [request] 53 | """ Parse the date or x-amzdate headers and return a time object """ 54 | (let [headers (get request :headers)] 55 | (cond 56 | (contains? headers "x-amz-date") 57 | (format/parse (format/formatters :basic-date-time-no-ms) (get headers "x-amz-date")) 58 | (contains? headers "date") 59 | (format/parse (format/formatters :basic-date-time-no-ms) (get headers "date")) 60 | ))) 61 | 62 | (defn signing-key [secret-key request-time authorization] 63 | """ Generate a signing key for a v4 signature """ 64 | (debug secret-key) 65 | (-> (str "AWS4" secret-key) 66 | (bytes) 67 | (hmac-sha256 (format/unparse (format/formatters :basic-date) request-time)) 68 | (hmac-sha256 (get authorization :region)) 69 | (hmac-sha256 (get authorization :service)) 70 | (hmac-sha256 "aws4_request") 71 | )) 72 | 73 | (defn canonical-verb [request] 74 | ( -> (get request :request-method) name str/upper-case)) 75 | 76 | (defn- double-escape [^String x] 77 | (.replace (.replace x "\\" "\\\\") "$" "\\$")) 78 | 79 | (defn- percent-encode [^String unencoded] 80 | (->> (.getBytes unencoded "UTF-8") 81 | (map (partial format "%%%02X")) 82 | (str/join))) 83 | 84 | (defn uri-escape [unencoded] 85 | (str/replace 86 | unencoded 87 | #"[^A-Za-z0-9_~.\-/]+" 88 | #(double-escape (percent-encode %)))) 89 | 90 | (defn query-escape [unencoded] 91 | (str/replace 92 | unencoded 93 | #"[^A-Za-z0-9_~.\-]+" 94 | #(double-escape (percent-encode %)))) 95 | 96 | (defn canonical-uri [request] 97 | (uri-escape (get request :orig-uri))) 98 | 99 | (defn canonical-query-string [request] 100 | (let [ 101 | query-string (get request :query-string) 102 | decoded (and (seq query-string) (codec/form-decode query-string)) 103 | params (cond (map? decoded) decoded 104 | (string? decoded) {decoded nil} 105 | :else {})] 106 | 107 | (str/join "&" 108 | (->> params 109 | (map (juxt (comp query-escape name key) (comp query-escape str/trim (fn [input] (if (nil? input) "" input)) val))) 110 | (sort-by first) 111 | (map (partial str/join "=")) 112 | )))) 113 | 114 | (defn canonical-headers [request, include-headers] 115 | (str/join "\n" 116 | (concat (->> (select-keys (get request :headers) include-headers) 117 | (map (juxt (comp name key) (comp str/trim val))) 118 | (sort-by first) 119 | (map (partial str/join ":"))) 120 | ))) 121 | 122 | (defn signed-headers [include-headers] 123 | (str/join ";" (sort-by first include-headers))) 124 | 125 | (defn hash-payload [request] 126 | """ Hash the entire body. Thankfully this is done for us - its in the 127 | x-amz-content-sha256 and we have wrapped :body in something that will error 128 | if the stream is closed before reading out content matching the sha """ 129 | (get (get request :headers) "x-amz-content-sha256")) 130 | 131 | (defn canonical-request [request include-headers] 132 | (str/join "\n" [ 133 | (canonical-verb request) 134 | (canonical-uri request) 135 | (canonical-query-string request) 136 | (canonical-headers request include-headers) 137 | "" 138 | (signed-headers include-headers) 139 | (hash-payload request) 140 | ])) 141 | 142 | (defn string-to-sign [request request-time authorization] 143 | """ Format a request into a canonicalized representation for signing """ 144 | (let [canonical-request (canonical-request request (get authorization :signed-headers))] 145 | (debug "canonical-request" canonical-request) 146 | (str/join "\n" [ 147 | "AWS4-HMAC-SHA256" 148 | (format/unparse (format/formatters :basic-date-time-no-ms) request-time) 149 | (str/join "/" [ 150 | (format/unparse (format/formatters :basic-date) request-time) 151 | (get authorization :region) 152 | (get authorization :service) 153 | "aws4_request" 154 | ]) 155 | (hex (sha256 (bytes canonical-request))) 156 | ]))) 157 | 158 | (defn signature [signing-key, string-to-sign] 159 | """ Sign a canonicalized representation of the request with a signing key """ 160 | (hex (hmac-sha256 signing-key string-to-sign))) 161 | 162 | (defn is-signed-by? [request authorization secret-key] 163 | (let[ 164 | request-time (request-time request) 165 | signing-key (signing-key secret-key request-time authorization) 166 | string-to-sign (string-to-sign request request-time authorization) 167 | signature (signature signing-key, string-to-sign) 168 | ] 169 | (debug request-time) 170 | (debug (hex signing-key)) 171 | (debug string-to-sign) 172 | (debug signature (get authorization :signature) (= signature (get authorization :signature))) 173 | (= signature (get authorization :signature)) 174 | ) 175 | ) 176 | 177 | (defn sha256-input-stream [body, goal-hash] 178 | """ Wrap a body stream with a hashing adapter that will throw if the data is invalid """ 179 | (let [hash (MessageDigest/getInstance "SHA-256")] 180 | (proxy [java.io.InputStream] [] 181 | (close [] 182 | (try 183 | ;; Calculate final digest and if doesn't match expected value - throw 184 | (if (not= goal-hash (hex (.digest hash))) 185 | ;; FIXME: Is there a more appropriate error here? 186 | (throw (ex-info "body signature is incorrect" 187 | {:type :signature-does-not-match 188 | :status-code 403 189 | :expected goal-hash 190 | :to-sign "" 191 | }))) 192 | (finally (.close body))) 193 | ) 194 | 195 | (read [^bytes ba] 196 | (let [bytes_read (.read body ba)] 197 | (if (not= bytes_read -1) (.update hash ba 0 bytes_read)) 198 | bytes_read)) 199 | ))) 200 | 201 | (defn validate4 202 | [keystore request] 203 | (let [ 204 | authorization (parse-authorization request) 205 | secret-key (get (get keystore (get authorization :access-key)) :secret) 206 | is-valid-signature (is-signed-by? request authorization secret-key) 207 | auth (get keystore (get authorization :access-key)) 208 | retval (cond 209 | is-valid-signature 210 | (update-in auth [:memberof] concat ["authenticated-users" "anonymous"]) 211 | :else 212 | {:tenant :anonymous :memberof ["anonymous"]} 213 | )] 214 | (debug "request" request) 215 | (debug "authorization" (get keystore (get authorization :access-key))) 216 | (debug "secret" secret-key) 217 | (debug "is-valid-sig" is-valid-signature) 218 | (debug "retval" retval) 219 | retval 220 | )) 221 | -------------------------------------------------------------------------------- /doc/source/concepts.rst: -------------------------------------------------------------------------------- 1 | Design and Concepts 2 | =================== 3 | 4 | This section describe the overall design and concepts within *pithos* and 5 | its interaction with Apache Cassandra. 6 | 7 | .. _S3 Concepts: 8 | 9 | S3 Concepts 10 | ----------- 11 | 12 | Since *pithos* exposes the AWS S3 API, some of its properties have direct impact on 13 | pithos's design. 14 | 15 | Terminology 16 | ~~~~~~~~~~~ 17 | 18 | If you're not familiar with S3, the following terms need clarification: 19 | 20 | Bucket 21 | A bucket is a named container for objects. A bucket belongs to a region 22 | and may contain an arbitrary number of objects, potentially in different 23 | storage classes. 24 | 25 | Region 26 | A region hosts the metadata for objects. Regions may have several available 27 | storage classes. 28 | 29 | Object 30 | An object is the S3 representation of a file. There is no filesystem hierarchy 31 | in S3 even though some mechanisms may help in emulating one. 32 | 33 | Storage Class 34 | A storage class is a destination for objects with specific storage properties. 35 | A typical use case for storage properties is to provide cheap storage with 36 | low safety properties in a *reduced redundancy* class and standard safety 37 | properties in a *standard* class. 38 | 39 | A global bucket namespace 40 | ~~~~~~~~~~~~~~~~~~~~~~~~~ 41 | 42 | The first thing of note is that an S3-compatible object store will expose a 43 | global namespace for buckets across all tenants. Bucket names are first come, 44 | first served and hold very little information. The most important properties stored 45 | in a bucket are: 46 | 47 | - The bucket name 48 | - The bucket's tenant 49 | - The bucket's ACL 50 | - The bucket's CORS configuration 51 | - The region the bucket's objects will be stored in 52 | 53 | 54 | Cassandra concepts 55 | ------------------ 56 | 57 | *pithos* relies on `Apache Cassandra`_, which brings its own set of terminology and 58 | concepts: 59 | 60 | Cluster 61 | A Cassandra cluster is a collection of a number of nodes which share 62 | properties such as available schema and data. 63 | 64 | Node 65 | A Cassandra node is a participant in a cluster. It can be seen as the 66 | equivalent of an SQL instance. 67 | 68 | Keyspace 69 | A Cassandra keyspace holds a collection of column families which share 70 | similar properties, such as a replication factor. It can be seen as the 71 | equivalent of an SQL database 72 | 73 | Column Family 74 | A Cassandra column family stores keyed rows of data sharing a specific 75 | schema. It can be seen as the equivalent of an SQL table. 76 | 77 | .. _Apache Cassandra: http://cassandra.apache.org 78 | 79 | Pithos properties 80 | ----------------- 81 | 82 | *pithos* strives to provide an eventual consistent system, enforcing 83 | validity through clients. 84 | 85 | There is a single operation throughout *pithos* which necessitates some 86 | transactional properties: the act of claiming a bucket's ownership. 87 | As it stands, given the current implementation of the bucketstore, this 88 | operation may return inconsistent results. 89 | 90 | All other operations in *pithos* are commutative and may be retried 91 | to achieve expected results. 92 | 93 | It is worth noting that typical S3 clients will ensure the validity of operations 94 | by comparing local MD5 checksums and checksums reported by *pithos*. 95 | 96 | While operations are commutative, stale data may be left in the 97 | ``inode_blocks`` and ``block`` column families when upload operations 98 | fail, or clients interrupt uploads. This will result in orphaned blocks, 99 | which need to be regularily purged from the blobstore. Pithos does not 100 | provide a method to check for these yet. 101 | 102 | 103 | 104 | .. _Pithos Architecture: 105 | 106 | Pithos Architecture 107 | ------------------- 108 | 109 | To isolate concerns and provide flexibility when building an object store service, 110 | *pithos* is built around the notion of different stores which are all responsible 111 | for a subset of the overall object store data. Each of the store can be independently 112 | configured and may point to a separate location. 113 | 114 | *pithos* provides default implementations of each store targetting cassandra (except for 115 | the keystore which is static by default) but the configuration file format allows for 116 | providing different implementations if necessary. 117 | 118 | The Keystore 119 | ~~~~~~~~~~~~ 120 | 121 | *pithos* does not concern itself with handling tenants, it relies on a 122 | keystore to provide an association from API key to tenant information. 123 | 124 | Out of the box, *pithos* only ships with a simple config-file based keystore, 125 | but writing a separate one is trivial and covered in the developer documentation. 126 | 127 | A key lookup in the keystore should yield a map of the following attributes:: 128 | 129 | { 130 | "master": false, 131 | "tenant": "tenant name", 132 | "secret": "secret key", 133 | "memberof": ["group1", "group2"] 134 | } 135 | 136 | This properties are then used by pithos to authenticate requests. 137 | 138 | The Bucketstore 139 | ~~~~~~~~~~~~~~~ 140 | 141 | The bucketstore holds an association of bucket name to tenant and properties. 142 | 143 | 144 | ========== ======== =============================== 145 | Column Type Description 146 | ========== ======== =============================== 147 | bucket text bucket name 148 | acl text serialized ACL definition 149 | cors text serialized CORS definition 150 | created text ISO8601 timestamp 151 | policy text (unused) 152 | region text region name 153 | tenant text tenant name 154 | versioned boolean (unused) 155 | website text website configuration (unused) 156 | ========== ======== =============================== 157 | 158 | 159 | The Metastore 160 | ~~~~~~~~~~~~~ 161 | 162 | The metastore hold object metadata for a specific region. It also associates 163 | objects with their storage class location and keeps track of ongoing 164 | multipart object uploads. 165 | 166 | An object has the following properties: 167 | 168 | ============ ======== ================================== 169 | Column Type Description 170 | ============ ======== ================================== 171 | bucket text bucket name 172 | object text full object path 173 | acl text serialized ACL definition 174 | atime text ISO8601 timestamp of access time 175 | checksum text MD5 checksum of object 176 | size bigint total file size 177 | inode uuid object inode ID 178 | version uuid object version ID 179 | storageclass text storage class where data is stored 180 | metadata map additional attributes 181 | ============ ======== ================================== 182 | 183 | Multipart upload descriptions span two entities, the first 184 | stores a list of ongoing uploads: 185 | 186 | ============ ======== ================================== 187 | Column Type Description 188 | ============ ======== ================================== 189 | bucket text bucket name 190 | object text full object path 191 | upload uuid object inode ID 192 | metadata map additional attributes 193 | ============ ======== ================================== 194 | 195 | The second stores information on each uploaded part 196 | 197 | ============ ======== ================================== 198 | Column Type Description 199 | ============ ======== ================================== 200 | bucket text bucket name 201 | object text full object path 202 | upload uuid object inode ID 203 | partno int part number within this upload 204 | cheksum text MD5 checksum of this part 205 | inode uuid upload part inode ID 206 | version uuid upload part version ID 207 | modified text ISO 8601 timestamp of part upload 208 | size bigint upload part total size 209 | ============ ======== ================================== 210 | 211 | 212 | The Blobstore 213 | ~~~~~~~~~~~~~ 214 | 215 | The blobstore holds data for your files. Data is stored 216 | based on inode ids. Data is stored across two entities 217 | by default. 218 | 219 | The first one is a list of blocks within an inode: 220 | 221 | ============ ======== ================================== 222 | Column Type Description 223 | ============ ======== ================================== 224 | inode uuid inode ID 225 | version uuid version ID 226 | block bigint offset of block start 227 | size bigint block size 228 | ============ ======== ================================== 229 | 230 | The second one holds data within a block: 231 | 232 | ============ ======== ================================== 233 | Column Type Description 234 | ============ ======== ================================== 235 | inode uuid inode ID 236 | version uuid version ID 237 | block bigint offset of block start 238 | offset bigint offset of payload within object 239 | chunksize int payload size 240 | payload blob bytes for this payload 241 | ============ ======== ================================== 242 | 243 | -------------------------------------------------------------------------------- /src/io/pithos/stream.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.stream 2 | "Read and write to cassandra from OutputStream and InputStream" 3 | (:import java.io.OutputStream 4 | java.io.InputStream 5 | java.nio.ByteBuffer 6 | org.eclipse.jetty.server.HttpInputOverHTTP 7 | javax.servlet.ReadListener) 8 | (:require [io.pithos.blob :as b] 9 | [io.pithos.desc :as d] 10 | [io.pithos.util :as u] 11 | [clojure.tools.logging :refer [debug error]])) 12 | 13 | (defn chunk->ba 14 | "Chunks in pithos come back as bytebuffers and we 15 | need byte-arrays for outputstreams, this converts 16 | from the former to the latter. 17 | 18 | The underlying bytebuffers can be reused, which is why 19 | we need to watch respect the position and limit parameters 20 | given." 21 | [{:keys [payload]}] 22 | (let [array (.array payload) 23 | off (.position payload) 24 | len (- (.limit payload) off)] 25 | [array off len])) 26 | 27 | (defn full-file? 28 | "Does a range specify the full file?" 29 | [od start end] 30 | (and (= start 0) (= end (d/size od)))) 31 | 32 | (defn within-range? 33 | "Is a chunk within the range expected" 34 | [{:keys [chunksize offset]} start end] 35 | (and (<= start (+ offset chunksize)) (<= offset end))) 36 | 37 | (defn crop-chunk 38 | "This is the land of off-by-one errors, but bear with me: 39 | For a specific chunk, we have three streaming cases: 40 | 41 | - We need to stream all of it when it starts beyond the start 42 | offset of the range and it ends before the end offset of the range. 43 | - If the start offset is contained in this chunk but beyond the 44 | first byte, we need to start at the correct mark. 45 | - If the end offset is contained in this chunk but before the 46 | last byte, we need to stop at the correct mark. 47 | 48 | Here, we treat the last two cases as a single one, by 49 | computing a head and tail, and adapting the start offset 50 | as well as the length to stream in one go." 51 | [{:keys [offset chunksize] :as chunk} start end] 52 | (let [[array off len] (chunk->ba chunk) 53 | buf-start offset 54 | buf-end (+ offset chunksize)] 55 | (if (and (<= (+ offset chunksize) end) (>= offset start)) 56 | ;; No cropping necessary. 57 | ;; Just pass the byte-buffer as-is. 58 | [array off len] 59 | ;; We need to crop, compute head and tail and infer 60 | ;; actual length from them. 61 | (let [head (if (< buf-start start) (- start buf-start) 0) 62 | tail (if (> buf-end end) (- buf-end end) 0) 63 | croplen (- len (+ head tail))] 64 | [array (+ off head) croplen])))) 65 | 66 | (defn stream-file 67 | "Stream a whole file. Do not handle supplied ranges and 68 | just write out all chunks." 69 | [od ^OutputStream stream blob blocks] 70 | (doseq [{:keys [block]} blocks] 71 | (doseq [chunk (b/chunks blob od block block) 72 | :let [[array off len] (chunk->ba chunk)]] 73 | (.write stream array off len)))) 74 | 75 | (defn stream-range 76 | "Stream a range of bytes. Keep iterating on blocks until 77 | we reach the end, then only consider chunks in the 78 | supplied range, and optionally crop them before streaming out." 79 | [od ^OutputStream stream blob blocks start end] 80 | (doseq [{:keys [block]} blocks 81 | :while (<= block end)] 82 | (doseq [chunk (b/chunks blob od block block) 83 | :when (within-range? chunk start end)] 84 | (let [[array off len] (crop-chunk chunk start end)] 85 | (.write stream array off len))))) 86 | 87 | (defn stream-to 88 | "Stream a range or a whole file." 89 | [od ^OutputStream stream [start end]] 90 | (debug "streaming range: " start end) 91 | (let [blob (d/blobstore od) 92 | blocks (b/blocks blob od)] 93 | (try 94 | (if (full-file? od start end) 95 | (stream-file od stream blob blocks) 96 | (stream-range od stream blob blocks start end)) 97 | (catch Exception e 98 | (error e "error during read")) 99 | (finally 100 | (debug "closing after read") 101 | (.flush stream) 102 | (.close stream)))) 103 | od) 104 | 105 | (defn stream-from 106 | "Given an input stream and an object descriptor, stream data from the 107 | input stream to the descriptor. 108 | 109 | Our current approach has the drawback of not enforcing blocksize 110 | requirements since we have no way of being notified when reaching a 111 | threshold." 112 | [^InputStream stream od] 113 | (let [blob (d/blobstore od) 114 | hash (u/md5-init)] 115 | (try 116 | (loop [block 0 117 | offset 0] 118 | (when (>= block offset) 119 | (debug "marking new block") 120 | (b/start-block! blob od block)) 121 | (let [chunk-size (b/max-chunk blob) 122 | ba (byte-array chunk-size) 123 | br (.read stream ba)] 124 | (if (neg? br) 125 | (do 126 | (debug "negative write, read whole stream") 127 | (d/col! od :size offset) 128 | (d/col! od :checksum (u/md5-sum hash)) 129 | od) 130 | (let [chunk (ByteBuffer/wrap ba 0 br) 131 | sz (b/chunk! blob od block offset chunk) 132 | offset (+ sz offset)] 133 | (u/md5-update hash ba 0 br) 134 | (if (b/boundary? blob block offset) 135 | (recur offset offset) 136 | (recur block offset)))))) 137 | (catch Exception e 138 | (error e "error during write")) 139 | (finally 140 | (debug "closing after write") 141 | (.close stream))))) 142 | 143 | (defn validate-range 144 | [src start end] 145 | (when-not (<= 0 start end (d/size src)) 146 | (throw (IllegalArgumentException. "Invalid range supplied")))) 147 | 148 | (defn stream-copy-range-block 149 | "I have seen prouder days." 150 | [hash offset dblob dst sblob src start end block] 151 | (b/start-block! dblob dst (- block start)) 152 | (if-let [chunks (seq (b/chunks sblob src block offset))] 153 | (do 154 | (doseq [chunk chunks 155 | :when (and chunk (within-range? chunk start end)) 156 | :let [[ba offset len] (crop-chunk chunk start end)]] 157 | (b/chunk! dblob dst block (- offset start) (ByteBuffer/wrap ba offset len)) 158 | (u/md5-update hash ba offset len)) 159 | (let [{:keys [offset chunksize]} (last chunks)] 160 | (- (+ offset chunksize) start))) 161 | offset)) 162 | 163 | (defn stream-copy-range 164 | [src dst [start end]] 165 | (debug "copying from range: " start end) 166 | (let [sblob (d/blobstore src) 167 | dblob (d/blobstore dst) 168 | blocks (b/blocks sblob src) 169 | hash (u/md5-init)] 170 | (validate-range src start end) 171 | (loop [[block & blocks] (map :block (b/blocks sblob src)) 172 | offset 0] 173 | (cond 174 | (or (nil? block) (> block end)) 175 | (do 176 | (d/col! dst :size (- end start)) 177 | (d/col! dst :checksum (u/md5-sum hash))) 178 | 179 | ;; look-ahead to see if it's worth skipping a whole block 180 | (and (seq blocks) (< start (first blocks))) 181 | (recur blocks offset) 182 | 183 | ::else 184 | (do 185 | (recur blocks 186 | (stream-copy-range-block hash offset dblob dst sblob 187 | src start end block))))))) 188 | 189 | (defn stream-copy 190 | "Copy from one object descriptor to another." 191 | [src dst] 192 | (let [sblob (d/blobstore src) 193 | dblob (d/blobstore dst) 194 | blocks (b/blocks sblob src)] 195 | (doseq [{:keys [block]} blocks] 196 | (b/start-block! dblob dst block) 197 | (debug "found block " block) 198 | (loop [offset block] 199 | (when-let [chunks (seq (b/chunks sblob src block offset))] 200 | (doseq [chunk chunks 201 | :let [offset (:offset chunk)]] 202 | (b/chunk! dblob dst block offset (:payload chunk))) 203 | (let [{:keys [offset chunksize]} (last chunks)] 204 | (recur (+ offset chunksize)))))) 205 | (d/col! dst :size (d/size src)) 206 | (d/col! dst :checksum (d/checksum src)) 207 | dst)) 208 | 209 | (defn stream-copy-part-block 210 | "Copy a single part's block to a destination" 211 | [notifier dst hash part g-offset {:keys [block]}] 212 | (let [dblob (d/blobstore dst) 213 | sblob (d/blobstore part) 214 | real-block (+ g-offset block)] 215 | (debug "streaming block: " block) 216 | (b/start-block! dblob dst real-block) 217 | (notifier :block) 218 | (last 219 | (for [chunk (b/chunks sblob part block block) 220 | :let [offset (:offset chunk) 221 | payload (:payload chunk) 222 | real-offset (+ g-offset offset)]] 223 | (do 224 | (b/chunk! dblob dst real-block real-offset payload) 225 | (notifier :chunk) 226 | (let [pos (.position payload) 227 | sz (.remaining payload) 228 | ba (byte-array sz)] 229 | (.get payload ba) 230 | (.position payload pos) 231 | (u/md5-update hash ba 0 sz) 232 | (+ real-offset sz))))))) 233 | 234 | 235 | (defn stream-copy-part 236 | "Copy a single part to a destination" 237 | [notifier dst [offset hash] part] 238 | (let [sblob (d/blobstore part) 239 | blocks (b/blocks sblob part)] 240 | 241 | (debug "streaming part: " (d/part part)) 242 | [(reduce (partial stream-copy-part-block notifier dst hash part) 243 | offset blocks) 244 | hash])) 245 | 246 | (defn stream-copy-parts 247 | "Given a list of parts, stream their content to a destination inode" 248 | [parts dst notifier] 249 | (let [dblob (d/blobstore dst) 250 | [size hash] (reduce (partial stream-copy-part notifier dst) 251 | [0 (u/md5-init)] parts)] 252 | (d/col! dst :size size) 253 | (d/col! dst :checksum (u/md5-sum hash)) 254 | (debug "stored size:" size "and checksum: " (u/md5-sum hash)) 255 | dst)) 256 | -------------------------------------------------------------------------------- /doc/source/clients.rst: -------------------------------------------------------------------------------- 1 | Client compatibility list 2 | ========================= 3 | 4 | This section needs your help 5 | 6 | s3cmd 7 | ----- 8 | 9 | Fully tested with the current API coverage. Here is a minimal 10 | configuration you can put in ``~/.s3cfg``:: 11 | 12 | [default] 13 | host_base = s3.example.com 14 | host_bucket = %(bucket)s.s3.example.com 15 | access_key = YOUR_ACCESS_KEY 16 | secret_key = YOUR_SECRET_KEY 17 | use_https = True 18 | signature_v2 = True 19 | 20 | Adapt with your credentials and replace ``s3.example.com`` with the 21 | value you specified for ``service-uri``. ``use_https`` is needed only 22 | if Pithos is served over TLS. Currently pithos doesn't support v4 23 | signatures so the ``signature_v2`` flag is necessary. 24 | 25 | When testing locally, the following configuration can be used:: 26 | 27 | [default] 28 | host_base = s3.example.com 29 | host_bucket = %(bucket)s.s3.example.com 30 | access_key = YOUR_ACCESS_KEY 31 | secret_key = YOUR_SECRET_KEY 32 | use_https = False 33 | signature_v2 = True 34 | proxy_host = localhost 35 | proxy_port = 8080 36 | 37 | 38 | libcloud 39 | -------- 40 | 41 | Working support with the S3 provider:: 42 | 43 | from libcloud.storage.types import Provider 44 | from libcloud.storage.providers import get_driver 45 | cls = get_driver(Provider.S3) 46 | driver = cls('api key', 'api secret key', host='s3.example.com') 47 | driver.list_containers() 48 | 49 | rclone 50 | -------- 51 | 52 | Working support with the S3 provider:: 53 | 54 | RCLONE_CONFIG__TYPE=s3 55 | RCLONE_CONFIG__ACCESS_KEY_ID=YOUR_ACCESS_KEY 56 | RCLONE_CONFIG__SECRET_ACCESS_KEY=YOUR_SECRET_KEY 57 | RCLONE_CONFIG__REGION=other-v2-signature 58 | RCLONE_CONFIG__ENDPOINT=s3.example.com 59 | RCLONE_CONFIG__ACL=private 60 | 61 | Ansible 62 | ------- 63 | 64 | Sample task configuration to list a bucket using the signature V2 by using the 65 | scheme ``fakes3`` *(= http)* or ``fakes3s`` *(= https)*:: 66 | 67 | - name: List bucket content 68 | aws_s3: 69 | s3_url: "fakes3s://s3.example.com" 70 | bucket: "my_bucket" 71 | mode: list 72 | register: my_bucket_content 73 | 74 | cyberduck 75 | --------- 76 | 77 | On-going integration 78 | 79 | owncloud 80 | -------- 81 | 82 | Working support 83 | 84 | s3fs - s3 fuse support 85 | ---------------------- 86 | 87 | Working support. If you specified ``s3.example.com`` as 88 | ``service-uri``, you can mount the bucket ``bucket`` with the 89 | following command:: 90 | 91 | s3fs bucket /mnt/bucket -o url=https://s3.example.com 92 | 93 | The credentials have to be specified in ``~/.passwd-s3fs``:: 94 | 95 | YOUR_ACCESS_KEY:YOUR_SECRET_KEY 96 | 97 | WAL-E - continuous archiving for Postgres 98 | ----------------------------------------- 99 | 100 | Support for S3-compatible object stores was added in version 0.8 of WAL-E. 101 | Configure WAL-E with the following environment variables: 102 | 103 | ===================== ============================ 104 | AWS_ACCESS_KEY_ID YOUR_ACCESS_KEY 105 | AWS_SECRET_ACCESS_KEY YOUR_SECRET_KEY 106 | WALE_S3_ENDPOINT https+path://s3.example.com 107 | WALE_S3_PREFIX s3://your-bucket/your-prefix 108 | ===================== ============================ 109 | 110 | Archiving WAL files 111 | ``````````````````` 112 | 113 | Postgresql needs the following settings in ``postresql.conf``:: 114 | 115 | wal_level = archive 116 | archive_mode = on 117 | archive_command = 'envdir /etc/wal-e.d/env /path/to/wal-e wal-push %p' 118 | archive_timeout = 60 119 | 120 | Once postgres is setup to send WAL files, make a base backup with ``envdir 121 | /etc/wal-e.d/env /path/to/wal-e backup-push /path/to/postgres/data`` 122 | 123 | Restoring from archived WAL files 124 | ````````````````````````````````` 125 | 126 | Pull a base backup:: 127 | 128 | envdir /etc/wal-e.d/env /path/to/wal-e backup-fetch /path/to/postgres/data LATEST 129 | 130 | Create a ``recovery.conf`` file in the postgres data dir with the following 131 | content:: 132 | 133 | restore_command = 'envdir /etc/wal-e.d/env /path/to/wal-e wal-fetch "%f" "%p"' 134 | 135 | Start postgresql and check the logs to see its restore status. 136 | 137 | elasticsearch - index backup and restore 138 | ---------------------------------------- 139 | 140 | Snapshotting and restoring indices to Pithos is supported thanks to the `AWS 141 | Cloud Plugin`_. To configure a snapshot repository that points to your pithos 142 | installation, simply add to your ``/etc/elasticsearch/elasticsearch.yml``: 143 | 144 | .. code-block:: yaml 145 | 146 | cloud: 147 | aws: 148 | access_key: 149 | secret_key: 150 | s3: 151 | protocol: https 152 | endpoint: s3.example.com 153 | 154 | Then create your repository:: 155 | 156 | $ curl -XPUT 'http://localhost:9200/_snapshot/pithos' -d '{ 157 | "type": "s3", 158 | "settings": { 159 | "bucket": "es-snapshots" 160 | } 161 | }' 162 | 163 | Starting with version 2.4.2 of the plugin, all settings can be provided 164 | per-repository:: 165 | 166 | $ curl -XPUT 'http://localhost:9200/_snapshot/pithos' -d '{ 167 | "type": "s3", 168 | "settings": { 169 | "bucket": "es-snapshots", 170 | "access_key": "your key", 171 | "secret_key": "your secret", 172 | "protocol": "http", 173 | "endpoint": "s3.example.com", 174 | } 175 | }' 176 | 177 | .. _AWS Cloud Plugin: https://github.com/elasticsearch/elasticsearch-cloud-aws 178 | 179 | AWS Languages SDKs 180 | ------------------ 181 | 182 | In general, AWS Language SDKs can work with Pithos with the following 183 | configuration: 184 | 185 | * In ``~/.aws/config``:: 186 | 187 | [default] 188 | s3 = 189 | signature_version = s3 190 | 191 | * In ``~/.aws/credentials``:: 192 | 193 | [default] 194 | aws_access_key_id = 195 | aws_secret_access_key = 196 | 197 | You can have multiple profiles instead of altering the ``[default]`` 198 | configuration. Simply repeat configuration sections and name them ``[profile 199 | ]`` 200 | 201 | Shell (awscli) 202 | `````````````` 203 | 204 | Install `awscli`_, then:: 205 | 206 | aws s3 ls --endpoint-url=https://your-endpoint 207 | 208 | To use a non-default profile:: 209 | 210 | aws s3 ls --endpoint-url=https://your-endpoint --profile= 211 | 212 | Python (boto3) 213 | `````````````` 214 | 215 | Install `boto3`_ and create a Pithos client like this: 216 | 217 | .. code-block:: python 218 | 219 | import boto3.session 220 | 221 | session = boto3.session.Session() 222 | client = session.client('s3', endpoint_url='https://pithos-endpoint') 223 | client.list_buckets() 224 | 225 | To use a non-default profile: 226 | 227 | .. code-block:: python 228 | 229 | import boto3.session 230 | session = boto3.session.Session(profile_name='profile-name') 231 | client = session.client('s3', endpoint_url='https://pithos-endpoint') 232 | 233 | Python (boto) 234 | ````````````` 235 | 236 | `Boto`_ version 2 is boto3's ancestor but is still widely used. It doesn't 237 | take ``~/.aws/*`` configuration files into account. 238 | 239 | .. code-block:: python 240 | 241 | from boto.s3.connection import S3Connection, OrdinaryCallingFormat 242 | 243 | connection = S3Connection(key, secret, host='pithos-endpoint', 244 | port=443, is_secure=True, 245 | calling_format=OrdinaryCallingFormat()) 246 | bucket = connection.get_bucket('your-bucket') 247 | 248 | .NET 249 | ```` 250 | 251 | Install `AWSSDK.S3`_, then: 252 | 253 | .. code-block:: csharp 254 | 255 | Amazon.AWSConfigsS3.UseSignatureVersion4 = false; 256 | var config = new Amazon.S3.AmazonS3Config() 257 | { 258 | ServiceURL = host, 259 | SignatureVersion = "s3", 260 | }; 261 | var client = new Amazon.S3.AmazonS3Client(apikey, secretKey, config); 262 | 263 | Java 264 | ```` 265 | 266 | Install `AWS SDK for Java`_, then: 267 | 268 | .. code-block:: java 269 | 270 | // works with the latest (last confirmed version: 1.11.123) AWS Java SDK 271 | 272 | import com.amazonaws.ClientConfiguration; 273 | import com.amazonaws.services.s3.AmazonS3Client; 274 | 275 | ClientConfiguration config = new ClientConfiguration(); 276 | config.setSignerOverride("S3SignerType"); 277 | 278 | AmazonS3Client s3 = new AmazonS3Client(config); 279 | s3.setEndpoint("https://your-endpoint"); 280 | 281 | 282 | // You can eliminate the credentials file by instead passing in (or reading from your own config file) 283 | // credentials as below: 284 | // AWSCredentials credentials = new BasicAWSCredentials("AKIAIOSFODNN7EXAMPLE", 285 | // "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"); 286 | // AmazonS3 s3 = new AmazonS3Client(credentials, config); 287 | 288 | // if your endpoint is hosted on a non standard port for example, 289 | // s3.setEndpoint("http://your-endpoint:8081"); 290 | // then your pithos.yaml server-uri should also include the port for example: 291 | // server-uri : your-endpoint:8081 292 | 293 | PHP 294 | ``` 295 | 296 | Install `PHP AWS SDK`_ - important: Only Version2 is suitable (Version 3 only supports SignatureVersion 4, which is not yet implemented). After install, use something like this: 297 | 298 | .. code-block:: php 299 | 300 | // connect 301 | $s3Client=Aws\S3\S3Client::factory([ 302 | 'base_url'=>'https://your-endpoint.com', 303 | 'key'=>'your-key', 304 | 'secret'=>'your-secret', 305 | 'region'=>'region', // must be filled with something, even if you have no regions 306 | ]); 307 | 308 | // list all files in bucket 309 | $iterator = $s3Client->getIterator('ListObjects', array( 310 | 'Bucket' => $bucket, 311 | 'Prefix' => 'foo' 312 | )); 313 | 314 | foreach ($iterator as $object) { 315 | echo $object['Key'] . "\n"; 316 | } 317 | 318 | .. _awscli: https://aws.amazon.com/cli/ 319 | .. _boto3: https://boto3.readthedocs.io/en/latest/ 320 | .. _Boto: http://boto.cloudhackers.com/en/latest/ 321 | .. _AWSSDK.S3: https://www.nuget.org/packages/AWSSDK.S3/ 322 | .. _AWS SDK for Java: https://aws.amazon.com/sdk-for-java/ 323 | .. _PHP AWS SDK: http://docs.aws.amazon.com/aws-sdk-php/v2/guide/installation.html 324 | 325 | 326 | -------------------------------------------------------------------------------- /src/io/pithos/request.clj: -------------------------------------------------------------------------------- 1 | (ns io.pithos.request 2 | "This namespace provides all necessary wrapper functions to validate and 3 | augment the incoming request map." 4 | (:require [clojure.string :refer [lower-case join starts-with?]] 5 | [clojure.tools.logging :refer [debug info warn error]] 6 | [clojure.pprint :refer [pprint]] 7 | [clojure.java.io :as io] 8 | [io.pithos.sig :refer [validate check-sig anonymous]] 9 | [io.pithos.sig4 :refer [validate4 sha256-input-stream]] 10 | [io.pithos.system :refer [service-uri keystore]] 11 | [io.pithos.util :refer [string->pattern uri-decode]] 12 | [clout.core :as c] 13 | [ring.middleware.multipart-params :as mp] 14 | [ring.util.request :as req] 15 | [ring.util.codec :as codec] 16 | [clojure.data.codec.base64 :as base64] 17 | [cheshire.core :as json] 18 | [qbits.alia.uuid :as uuid]) 19 | (:import [java.io ByteArrayInputStream] 20 | [java.io ByteArrayOutputStream])) 21 | 22 | (def known 23 | "known query args" 24 | #{"acl" 25 | "awsaccesskeyid" 26 | "cors" 27 | "delete" 28 | "delimiter" 29 | "expires" 30 | "file" 31 | "key" 32 | "lifecycle" 33 | "location" 34 | "logging" 35 | "marker" 36 | "max-keys" 37 | "notification" 38 | "partnumber" 39 | "policy" 40 | "prefix" 41 | "requestpayment" 42 | "response-cache-control" 43 | "response-content-type" 44 | "response-content-disposition" 45 | "response-content-encoding" 46 | "response-content-language" 47 | "response-expires" 48 | "restore" 49 | "signature" 50 | "success_action_redirect" 51 | "success_action_status" 52 | "tagging" 53 | "uploadid" 54 | "uploads" 55 | "versionid" 56 | "versioning" 57 | "versions" 58 | "website"}) 59 | 60 | (def actions 61 | "known actions" 62 | #{:acl 63 | :cors 64 | :delete 65 | :lifecycle 66 | :location 67 | :logging 68 | :notification 69 | :policy 70 | :requestpayment 71 | :restore 72 | :tagging 73 | :uploads 74 | :uploadid 75 | :versioning 76 | :versions 77 | :website}) 78 | 79 | (def subresources 80 | "known subresources, used when signing" 81 | {:acl "acl" 82 | :cors "cors" 83 | :delete "delete" 84 | :lifecycle "lifecycle" 85 | :location "location" 86 | :logging "logging" 87 | :notification "notification" 88 | :partnumber "partNumber" 89 | :policy "policy" 90 | :response-content-disposition "response-content-disposition" 91 | :response-content-type "response-content-type" 92 | :response-content-encoding "response-content-encoding" 93 | :response-content-language "response-content-language" 94 | :response-cache-control "response-cache-control" 95 | :response-expires "response-expires" 96 | :requestpayment "requestPayment" 97 | :tagging "tagging" 98 | :torrent "torrent" 99 | :uploadid "uploadId" 100 | :uploads "uploads" 101 | :versionid "versionId" 102 | :versioning "versioning" 103 | :versions "versions" 104 | :website "website"}) 105 | 106 | (defn action-routes 107 | "Really simple router, extracts target (service, bucket or object)" 108 | [] 109 | (let [sroute (c/route-compile "/") 110 | broute1 (c/route-compile "/:bucket") 111 | broute2 (c/route-compile "/:bucket/") 112 | oroute (c/route-compile "/:bucket/*")] 113 | [[:service (partial c/route-matches sroute)] 114 | [:bucket (partial c/route-matches broute1)] 115 | [:bucket (partial c/route-matches broute2)] 116 | [:object (partial c/route-matches oroute)]])) 117 | 118 | (defn match-action-route 119 | "Matches incoming route and yields target bucket and object" 120 | [request [target matcher]] 121 | (when-let [{bucket :bucket object :*} (matcher request)] 122 | {:target target :bucket (uri-decode bucket) :object (uri-decode object)})) 123 | 124 | (defn yield-assoc-target 125 | "closure which for each incoming request will assoc target, bucket 126 | abnd object" 127 | [] 128 | (let [routes (action-routes)] 129 | (fn [request] 130 | (merge request 131 | (or (some (partial match-action-route request) routes) 132 | {:target :unknown}))))) 133 | 134 | (defn yield-assoc-operation 135 | "Closure which will build an operation keyword based on the incoming 136 | request. This is the bulk of the routing in pithos. This becomes necessary 137 | because S3's behavior varies based on the route, but also based on query 138 | arguments. 139 | 140 | `action-params` holds query args which are relevant and need to be taken 141 | into account, when found, it will be part of the operation name." 142 | [suffixes] 143 | (fn [{:keys [uri request-method action-params target params] :as request}] 144 | (let [suffix (some suffixes action-params) 145 | getpair (fn [[k v]] (if (and v (seq v)) (str k "=" v) k)) 146 | append (some->> (filter (comp subresources key) params) 147 | (map (juxt (comp subresources first) second)) 148 | (sort-by first) 149 | (map getpair) 150 | (seq) 151 | (join "&") 152 | ((partial str "?")))] 153 | (assoc request 154 | :sign-uri (str uri append) 155 | :action (when suffix (name suffix)) 156 | :operation (->> (map name (if suffix 157 | [request-method target suffix] 158 | [request-method target])) 159 | (join "-") 160 | (keyword)))))) 161 | 162 | (defn keywordized 163 | "Yield a map where string keys are keywordized" 164 | [params] 165 | (dissoc 166 | (->> (map (juxt (comp keyword known lower-case key) val) params) 167 | (reduce merge {})) 168 | nil)) 169 | 170 | (defn insert-id 171 | "Assoc a random UUID to a request" 172 | [req] 173 | (assoc req :reqid (uuid/random))) 174 | 175 | (defn assoc-orig-uri 176 | "Assoc a random UUID to a request" 177 | [req] 178 | (assoc req :orig-uri (get req :uri))) 179 | 180 | (defn protect-body-stream [request] 181 | (let [headers (get request :headers)] 182 | (if (and (contains? headers "x-amz-content-sha256") (not= (get headers "x-amz-content-sha256") "UNSIGNED-PAYLOAD")) 183 | (assoc request :body (sha256-input-stream (get request :body) (get headers "x-amz-content-sha256")))) 184 | request)) 185 | 186 | (defn assoc-params 187 | "Parse, keywordize and store query arguments" 188 | [{:keys [query-string] :as req}] 189 | (or 190 | (when-let [params (and (seq query-string) 191 | (codec/form-decode query-string))] 192 | (as-> req req 193 | (assoc req :params (keywordized 194 | (cond (map? params) params 195 | (string? params) {params nil} 196 | :else {}))) 197 | (assoc req :action-params 198 | (set (filter actions (-> req :params keys)))))) 199 | (assoc req :params {} :action-params #{}))) 200 | 201 | (defn rewrite-host 202 | "Discard host from URI" 203 | [{:keys [uri] :as request}] 204 | (if-let [[_ trail] (re-find #"^https?://[^/]+/?(.*)" uri)] 205 | (assoc request :uri (str "/" trail)) 206 | request)) 207 | 208 | (defn yield-rewrite-bucket 209 | "Move from a vhost based access method to a full resource access path" 210 | [service-uri] 211 | (let [pattern-str (str "^(.*)\\." (string->pattern service-uri) "$") 212 | pattern (re-pattern pattern-str) 213 | transformer (fn [bucket uri] (str "/" bucket (if (seq uri) uri "/")))] 214 | (fn [{:keys [uri] {:strs [host] :or {host ""}} :headers :as request}] 215 | (if-let [[_ bucket] (re-find pattern host)] 216 | (assoc request :uri (transformer bucket uri)) 217 | request)))) 218 | 219 | (defn authenticate 220 | "Authenticate tenant, allow masquerading only for _master_ keys" 221 | [{:keys [multipart-params request-method sign-uri] :as req} system] 222 | 223 | (cond 224 | 225 | (= request-method :options) 226 | (assoc req :authorization anonymous) 227 | 228 | (and (= request-method :post) (seq multipart-params)) 229 | (let [{:keys [signature awsaccesskeyid policy]} multipart-params 230 | [_ bucket] (re-find #"^/[^/]*(/.*)?$" sign-uri) 231 | auth (check-sig req (keystore system) awsaccesskeyid policy signature)] 232 | (assoc req 233 | :post-upload? true 234 | :authorization auth 235 | :policy (json/parse-string (String. (-> policy 236 | .getBytes 237 | base64/decode)) 238 | true))) 239 | (and (contains? (get req :headers) "authorization") (starts-with? (get (get req :headers) "authorization") "AWS4-")) 240 | (assoc req :authorization (validate4 (keystore system) req)) 241 | :else 242 | (let [auth (validate (keystore system) req) 243 | master (:master auth) 244 | tenant (get-in req [:headers "x-amz-masquerade-tenant"])] 245 | (assoc req :authorization 246 | (if (and master tenant) (assoc auth :tenant tenant) auth))))) 247 | 248 | (defn decode-uri 249 | [req] 250 | (update-in req [:uri] uri-decode)) 251 | 252 | (defn multipart-params 253 | [req] 254 | (if (= (req/content-type req) "multipart/form-data") 255 | (let [make-input-stream #(when % (java.io.FileInputStream. %))] 256 | (-> (mp/multipart-params-request req) 257 | (update-in [:params] #(reduce merge {} (filter (comp keyword? key) %))) 258 | (update-in [:multipart-params] keywordized) 259 | (update-in [:multipart-params :file :tempfile] make-input-stream))) 260 | req)) 261 | 262 | (defn prepare 263 | "Generate closures and walks each requests through wrappers." 264 | [req system] 265 | (let [service-uri (service-uri system) 266 | rewrite-bucket (yield-rewrite-bucket service-uri) 267 | assoc-target (yield-assoc-target) 268 | assoc-operation (yield-assoc-operation actions)] 269 | 270 | (-> req 271 | (insert-id) 272 | (assoc-orig-uri) 273 | (assoc-params) 274 | (protect-body-stream) 275 | (rewrite-host) 276 | (rewrite-bucket) 277 | 278 | (assoc-target) 279 | (assoc-operation) 280 | 281 | (multipart-params) 282 | (authenticate system) 283 | (decode-uri)))) 284 | 285 | (defn safe-prepare 286 | "Wrap prepare in a try-catch block" 287 | [req system] 288 | (try (prepare req system) 289 | (catch Exception e 290 | (debug e "unhandled exception during request preparation") 291 | (insert-id 292 | {:operation :error :exception e})))) 293 | --------------------------------------------------------------------------------