├── hack ├── icons ├── shell ├── src ├── kj ├── blackrock │ ├── blackrock.link-static │ ├── storage-tool.link-static │ ├── blackrock.ekam-manifest │ ├── fs-storage-test.capnp │ ├── blank-ext4.capnp │ ├── blank-ext4.ekam-rule │ ├── mke2fs.conf │ ├── sparse-data.capnp │ ├── common.h │ ├── common.c++ │ ├── bundle.h │ ├── master.capnp │ ├── gce.h │ ├── logs.h │ ├── frontend.capnp │ ├── storage-schema.capnp │ ├── local-persistent-registry.h │ ├── sparse-data.c++ │ ├── master.h │ ├── fs-storage.capnp │ ├── frontend.h │ ├── logs-tester.c++ │ ├── gateway.h │ ├── fs-storage.h │ ├── cluster-rpc.h │ ├── local-persistent-registry.c++ │ ├── worker.capnp │ ├── backend-set.h │ ├── machine.capnp │ ├── nbd-test-loopback.c++ │ ├── bundle.c++ │ ├── backend-set.c++ │ ├── nbd-bridge.h │ ├── worker.h │ ├── distributed-blocks.c++ │ ├── storage-tool.c++ │ ├── gateway.c++ │ ├── gce.c++ │ └── cluster-rpc.capnp ├── capnp ├── sodium ├── openssl ├── ekam-rules ├── joyent-http ├── libseccomp ├── node-capnp └── sandstorm ├── CHANGELOG.md ├── localedata-C ├── make-bundle.sh ├── meteor-bundle-main.js ├── find-meteor-dev-bundle.sh ├── .gitignore ├── test-config.capnp ├── check-e2fsprogs.sh ├── README.md ├── tests └── run-testrock.sh ├── Vagrantfile ├── Makefile ├── release.sh └── LICENSE /hack: -------------------------------------------------------------------------------- 1 | deps/sandstorm/hack -------------------------------------------------------------------------------- /icons: -------------------------------------------------------------------------------- 1 | deps/sandstorm/icons -------------------------------------------------------------------------------- /shell: -------------------------------------------------------------------------------- 1 | deps/sandstorm/shell -------------------------------------------------------------------------------- /src/kj: -------------------------------------------------------------------------------- 1 | ../deps/sandstorm/src/kj -------------------------------------------------------------------------------- /src/blackrock/blackrock.link-static: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/blackrock/storage-tool.link-static: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/capnp: -------------------------------------------------------------------------------- 1 | ../deps/sandstorm/src/capnp -------------------------------------------------------------------------------- /src/sodium: -------------------------------------------------------------------------------- 1 | ../deps/sandstorm/src/sodium -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | deps/sandstorm/CHANGELOG.md -------------------------------------------------------------------------------- /localedata-C: -------------------------------------------------------------------------------- 1 | deps/sandstorm/localedata-C -------------------------------------------------------------------------------- /make-bundle.sh: -------------------------------------------------------------------------------- 1 | deps/sandstorm/make-bundle.sh -------------------------------------------------------------------------------- /src/openssl: -------------------------------------------------------------------------------- 1 | ../deps/sandstorm/src/openssl -------------------------------------------------------------------------------- /src/ekam-rules: -------------------------------------------------------------------------------- 1 | ../deps/sandstorm/src/ekam-rules -------------------------------------------------------------------------------- /src/joyent-http: -------------------------------------------------------------------------------- 1 | ../deps/sandstorm/src/joyent-http -------------------------------------------------------------------------------- /src/libseccomp: -------------------------------------------------------------------------------- 1 | ../deps/sandstorm/src/libseccomp -------------------------------------------------------------------------------- /src/node-capnp: -------------------------------------------------------------------------------- 1 | ../deps/sandstorm/src/node-capnp -------------------------------------------------------------------------------- /src/sandstorm: -------------------------------------------------------------------------------- 1 | ../deps/sandstorm/src/sandstorm -------------------------------------------------------------------------------- /meteor-bundle-main.js: -------------------------------------------------------------------------------- 1 | deps/sandstorm/meteor-bundle-main.js -------------------------------------------------------------------------------- /src/blackrock/blackrock.ekam-manifest: -------------------------------------------------------------------------------- 1 | blackrock bin 2 | -------------------------------------------------------------------------------- /find-meteor-dev-bundle.sh: -------------------------------------------------------------------------------- 1 | deps/sandstorm/find-meteor-dev-bundle.sh -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Copied from Sandstorm 2 | bin 3 | tmp 4 | deps 5 | node_modules 6 | shell-build 7 | sandstorm-*.tar.xz 8 | bundle/ 9 | .docker 10 | .shell-env 11 | .vagrant/ 12 | phantomjsdriver.log 13 | selenium-debug.log 14 | 15 | # Added for Blackrock 16 | dbg 17 | config.capnp 18 | oasis-config 19 | testrock-config 20 | blackrock.tar.xz 21 | local-config 22 | .local 23 | -------------------------------------------------------------------------------- /test-config.capnp: -------------------------------------------------------------------------------- 1 | @0xa9101b1fec595220; 2 | 3 | using import "/blackrock/master.capnp".MasterConfig; 4 | 5 | const vagrant :MasterConfig = ( 6 | workerCount = 2, 7 | frontendCount = 2, 8 | frontendConfig = ( 9 | baseUrl = "https://localrock.sandstorm.io", 10 | wildcardHost = "*.localrock.sandstorm.io", 11 | allowDemoAccounts = true, 12 | isTesting = true, 13 | # stripeKey = "sk_test_???", 14 | # stripePublicKey = "pk_test_???", 15 | outOfBeta = true, 16 | allowUninvited = true, 17 | replicasPerMachine = 2, 18 | # mailchimpKey = "???", 19 | # mailchimpListId = "???", 20 | privateKeyPassword = "abcd1234", 21 | ), 22 | vagrant = () 23 | ); 24 | 25 | -------------------------------------------------------------------------------- /src/blackrock/fs-storage-test.capnp: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | @0xed33f8595b36bba5; 18 | 19 | $import "/capnp/c++.capnp".namespace("blackrock"); 20 | using Storage = import "storage.capnp"; 21 | 22 | struct TestStoredObject { 23 | text @0 :Text; 24 | sub1 @1 :Storage.OwnedAssignable(TestStoredObject); 25 | sub2 @2 :Storage.OwnedAssignable(TestStoredObject); 26 | volume @3 :Storage.OwnedVolume; 27 | } 28 | -------------------------------------------------------------------------------- /src/blackrock/blank-ext4.capnp: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | @0x8eb19add5ef5349e; 18 | 19 | $import "/capnp/c++.capnp".namespace("blackrock"); 20 | 21 | using import "sparse-data.capnp".SparseData; 22 | 23 | const blankExt4 :SparseData = embed "blank-ext4.sparse"; 24 | # blank-ext4.sparse is created by blank-ext4.ekam-rule which runs mkfs.ext4 to create a new 25 | # ext4 FS and then uses sparse-data.c++ to turn its contents into SparseData. 26 | -------------------------------------------------------------------------------- /src/blackrock/blank-ext4.ekam-rule: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | set -eu 4 | 5 | echo findProvider canonical:blackrock/sparse-data 6 | read MKSPARSE 7 | 8 | echo findProvider canonical:blackrock/mke2fs.conf 9 | read MKE2FS_CONFIG 10 | export MKE2FS_CONFIG 11 | 12 | echo newOutput blackrock/blank-ext4.sparse 13 | read OUTPUT 14 | 15 | # Put temp file in /var/tmp because tmpfs doesn't handle sparse files well. 16 | TMPFILE=/var/tmp/blackrock-blank-ext4 17 | 18 | # Start with an 8GB sparse file. 19 | # Experimentally, filesystem overhead (count of non-zero blocks after initialization) for 20 | # ext4 seems to be 12 blocks plus 1 for every 2GiB of space. At 8GiB we use 16 blocks, aka 21 | # 64KiB, which seems reasonable. 22 | rm -f $TMPFILE 23 | truncate -s 8589934592 $TMPFILE 24 | 25 | # Format as ext4. 26 | # TODO(someday): Figure out if we can make this deterministic. It seems there's more randomness 27 | # than just the UUID. 28 | /sbin/mkfs.ext4 -q -b 4096 -U 00000000-0000-0000-0000-000000000000 -m 0 -O sparse_super2 \ 29 | -E num_backup_sb=0,resize=4294967295 -J size=16 $TMPFILE 30 | $MKSPARSE $TMPFILE > $OUTPUT 31 | rm $TMPFILE 32 | -------------------------------------------------------------------------------- /src/blackrock/mke2fs.conf: -------------------------------------------------------------------------------- 1 | [defaults] 2 | base_features = sparse_super,filetype,resize_inode,dir_index,ext_attr 3 | default_mntopts = acl,user_xattr 4 | enable_periodic_fsck = 0 5 | blocksize = 4096 6 | inode_size = 256 7 | inode_ratio = 16384 8 | 9 | [fs_types] 10 | ext3 = { 11 | features = has_journal 12 | } 13 | ext4 = { 14 | features = has_journal,extent,huge_file,flex_bg,uninit_bg,dir_nlink,extra_isize 15 | auto_64-bit_support = 1 16 | inode_size = 256 17 | } 18 | ext4dev = { 19 | features = has_journal,extent,huge_file,flex_bg,uninit_bg,dir_nlink,extra_isize 20 | inode_size = 256 21 | options = test_fs=1 22 | } 23 | small = { 24 | blocksize = 1024 25 | inode_size = 128 26 | inode_ratio = 4096 27 | } 28 | floppy = { 29 | blocksize = 1024 30 | inode_size = 128 31 | inode_ratio = 8192 32 | } 33 | big = { 34 | inode_ratio = 32768 35 | } 36 | huge = { 37 | inode_ratio = 65536 38 | } 39 | news = { 40 | inode_ratio = 4096 41 | } 42 | largefile = { 43 | inode_ratio = 1048576 44 | blocksize = -1 45 | } 46 | largefile4 = { 47 | inode_ratio = 4194304 48 | blocksize = -1 49 | } 50 | hurd = { 51 | blocksize = 4096 52 | inode_size = 128 53 | } 54 | -------------------------------------------------------------------------------- /check-e2fsprogs.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # Verify that we've correctly built e2fsprogs and then copy them into the bundle. 4 | 5 | set -euo pipefail 6 | 7 | fail() { 8 | echo "check-e2fsprogs: $@" >&2 9 | exit 1 10 | } 11 | 12 | PROGS="tmp/e2fsprogs/e2fsck/e2fsck tmp/e2fsprogs/misc/tune2fs tmp/e2fsprogs/resize/resize2fs" 13 | 14 | for PROG in $PROGS; do 15 | if [ ! -e $PROG ]; then 16 | fail "$PROG does not exist" 17 | elif [ "$(ldd $PROG 2>&1 | tr -d '\t')" != "not a dynamic executable" ]; then 18 | fail "$PROG is not statically-linked" 19 | fi 20 | done 21 | 22 | rm -f /var/tmp/test-ext4fs-uuid-bug 23 | truncate -s 8G /var/tmp/test-ext4fs-uuid-bug >/dev/null 2>&1 24 | tmp/e2fsprogs/misc/mke2fs -t ext4 -U 00000000-0000-0000-0000-000000000000 /var/tmp/test-ext4fs-uuid-bug >/dev/null 2>&1 || fail "mke2fs failed" 25 | tmp/e2fsprogs/e2fsck/e2fsck -p /var/tmp/test-ext4fs-uuid-bug >tmp/check-e2fsck.out 2>&1 || fail "e2fsck failed" 26 | if grep -q UUID tmp/check-e2fsck.out; then 27 | fail "e2fsck not compiled to ignore null UUID" 28 | fi 29 | tmp/e2fsprogs/e2fsck/e2fsck -p /var/tmp/test-ext4fs-uuid-bug >/dev/null 2>&1 || fail "e2fsck repeat failed" 30 | rm /var/tmp/test-ext4fs-uuid-bug 31 | 32 | cp $PROGS bin 33 | -------------------------------------------------------------------------------- /src/blackrock/sparse-data.capnp: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | @0xb4ec463ef590911d; 18 | 19 | $import "/capnp/c++.capnp".namespace("blackrock"); 20 | 21 | struct SparseData { 22 | # Represents a chunk of "sparse" data, i.e. bytes with a lot of long runs of zeros. We only 23 | # include the non-zero bytes. 24 | # 25 | # This is used in particular to store a blank ext4 filesystem template directly into the 26 | # Blackrock binary so that we can quickly format new volumes. 27 | 28 | chunks @0 :List(Chunk); 29 | struct Chunk { 30 | offset @0 :UInt64; 31 | data @1 :Data; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/blackrock/common.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_COMMON_H_ 18 | #define BLACKROCK_COMMON_H_ 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | namespace blackrock { 25 | 26 | #define KJ_MVCAP(var) var = ::kj::mv(var) 27 | // Capture the given variable by move. Place this in a lambda capture list. Requires C++14. 28 | // 29 | // TODO(cleanup): Move to libkj. 30 | 31 | using kj::uint; 32 | using kj::byte; 33 | 34 | kj::AutoCloseFd newEventFd(uint value, int flags); 35 | uint64_t readEvent(int fd); 36 | void writeEvent(int fd, uint64_t value); 37 | // TODO(cleanup): Find a better home for these. 38 | 39 | } // namespace blackrock 40 | 41 | #endif // BLACKROCK_COMMON_H_ 42 | -------------------------------------------------------------------------------- /src/blackrock/common.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #include "common.h" 18 | #include 19 | #include 20 | #include 21 | 22 | namespace blackrock { 23 | 24 | kj::AutoCloseFd newEventFd(uint value, int flags) { 25 | int fd; 26 | KJ_SYSCALL(fd = eventfd(0, flags)); 27 | return kj::AutoCloseFd(fd); 28 | } 29 | 30 | uint64_t readEvent(int fd) { 31 | ssize_t n; 32 | uint64_t result; 33 | KJ_SYSCALL(n = read(fd, &result, sizeof(result))); 34 | KJ_ASSERT(n == 8, "wrong-sized read from eventfd", n); 35 | return result; 36 | } 37 | 38 | void writeEvent(int fd, uint64_t value) { 39 | ssize_t n; 40 | KJ_SYSCALL(n = write(fd, &value, sizeof(value))); 41 | KJ_ASSERT(n == 8, "wrong-sized write on eventfd", n); 42 | } 43 | 44 | } // namespace blackrock 45 | -------------------------------------------------------------------------------- /src/blackrock/bundle.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_BUNDLE_H_ 18 | #define BLACKROCK_BUNDLE_H_ 19 | 20 | #include "common.h" 21 | #include 22 | 23 | namespace blackrock { 24 | 25 | void createSandstormDirectories(); 26 | // Call before enterSandstormBundle() (before forking) to ensure directory tree is initialized. 27 | 28 | void enterSandstormBundle(); 29 | // Call to cause the current process (typically newly-forked) to enter the Sandstorm bundle. 30 | // Its directory tree will then appear to be Sandstorm's. 31 | 32 | kj::Maybe checkPgpSignatureInBundle( 33 | kj::StringPtr appIdString, sandstorm::spk::Metadata::Reader metadata); 34 | // Runs sandstorm::ctheckPgpSignature() inside the Sandstorm bundle, since it invokes gpg. 35 | 36 | } // namespace blackrock 37 | 38 | #endif // BLACKROCK_BUNDLE_H_ 39 | -------------------------------------------------------------------------------- /src/blackrock/master.capnp: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | @0xf58bc2dacec400ce; 18 | 19 | $import "/capnp/c++.capnp".namespace("blackrock"); 20 | 21 | struct MasterConfig { 22 | workerCount @0 :UInt32; 23 | frontendCount @4 :UInt32 = 1; 24 | 25 | # For now, we expect exactly one of each of the other machine types. 26 | 27 | frontendConfig @1 :import "frontend.capnp".FrontendConfig; 28 | 29 | union { 30 | vagrant @2 :VagrantConfig; 31 | gce @3 :GceConfig; 32 | } 33 | } 34 | 35 | struct VagrantConfig {} 36 | 37 | struct GceConfig { 38 | project @0 :Text; 39 | zone @1 :Text; 40 | gatewayAddresses @8 :List(Text); 41 | instanceTypes :group { 42 | storage @2 :Text = "n1-standard-1"; 43 | worker @3 :Text = "n1-highmem-2"; 44 | coordinator @4 :Text = "n1-standard-1"; 45 | frontend @5 :Text = "n1-highcpu-2"; 46 | mongo @6 :Text = "n1-standard-1"; 47 | gateway @7 :Text = "n1-standard-1"; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Blackrock 2 | 3 | Blackrock is an extension to [Sandstorm](https://github.com/sandstorm-io/sandstorm) that allows a single instance to scale across a cluster of machines. It is used to power [Sandstorm Oasis](https://oasis.sandstorm.io), the managed hosting service offered by [Sandstorm.io](https://sandstorm.io). 4 | 5 | See the [Blackrock roadmap](https://github.com/sandstorm-io/sandstorm/tree/master/roadmap/blackrock) for a design overview. 6 | 7 | ## Running locally 8 | 9 | WARNING: This runs six VMs, and the number may increase in the future. It also allocates disk images totalling 20GB, although they are sparse images so won't actually use that much space on your drive unless you use them a lot. 10 | 11 | First, some prep: 12 | 13 | * This has only been tested on Debian Sid. On other distros, YMMV. (Patches welcome.) 14 | * You will need Vagrant installed. 15 | * If you want to build with modified version of Sandstorm, make sure that `deps/sandstorm` symlinks to your Sandstorm source tree. Hint: You can symlink `deps` to `..` if Blockrock is checked out next to Sandstorm. 16 | * You may want to edit `test-config.capnp` to add your Stripe test key and Mailchip key, if you want to test those features. Otherwise, leave them commented out. 17 | 18 | To run locally: 19 | 20 | make run-local 21 | 22 | This will take a very long time the first time it runs, but once all the VMs are up you'll be able to ctrl+C and re-run quickly. 23 | 24 | Your instance will be accessible at: http://localrock.sandstorm.io:6080/ 25 | 26 | To create an admin token: 27 | 28 | make local-admintoken 29 | 30 | Then go to: http://localrock.sandstorm.io:6080/setup/token/testtoken 31 | 32 | To get a Mongo shell: 33 | 34 | make local-mongo 35 | 36 | To shut down: 37 | 38 | make kill-local 39 | 40 | ## Deploying 41 | 42 | Please talk to us on [sandstorm-dev](https://groups.google.com/group/sandstorm-dev). 43 | 44 | If you are deploying on Google Compute Engine, this may be relatively easy. On any other infrastructure, a new `ComputeDriver` will be needed. See `src/blackrock/gce.{h,c++}` to see how this is implemented for GCE. Perhaps you'd like to contribute an implementation for another service? 45 | -------------------------------------------------------------------------------- /tests/run-testrock.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Sandstorm - Personal Cloud Sandbox 4 | # Copyright (c) 2014 Sandstorm Development Group, Inc. and contributors 5 | # All rights reserved. 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | set -euo pipefail 20 | 21 | XVFB_PID="" 22 | RUN_SELENIUM="true" 23 | 24 | cleanExit () { 25 | rc=$1 26 | 27 | if [ -n "$XVFB_PID" ] ; then 28 | # Send SIGINT to the selenium-server child of the backgrounded xvfb-run, so 29 | # it will exit cleanly and the Xvfb process will also be cleaned up. 30 | # We don't actually know that PID, so we find it with pgrep. 31 | kill -SIGINT $(pgrep --parent $XVFB_PID node) 32 | wait $XVFB_PID 33 | fi 34 | exit $rc 35 | } 36 | 37 | checkInstalled() { 38 | if ! $(which $1 >/dev/null 2>/dev/null) ; then 39 | echo "Couldn't find executable '$1' - try installing the $2 package?" 40 | exit 1 41 | fi 42 | } 43 | 44 | 45 | THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") 46 | 47 | # Parse arguments. 48 | while [ $# -gt 0 ] ; do 49 | case $1 in 50 | --no-selenium) 51 | RUN_SELENIUM="false" 52 | ;; 53 | *) 54 | ;; 55 | esac 56 | shift 57 | done 58 | 59 | cd "$THIS_DIR"/../deps/sandstorm/tests 60 | 61 | checkInstalled npm npm 62 | checkInstalled firefox firefox 63 | 64 | npm install 65 | 66 | if [ "$RUN_SELENIUM" != "false" ] ; then 67 | checkInstalled java default-jre-headless 68 | checkInstalled xvfb-run Xvfb 69 | checkInstalled pgrep procps 70 | xvfb-run ./node_modules/selenium-standalone/bin/selenium-standalone start & 71 | XVFB_PID=$! 72 | fi 73 | 74 | export LAUNCH_URL="https://testrock.sandstorm.io" 75 | export DISABLE_DEMO=true 76 | export SKIP_UNITTESTS=true 77 | set +e 78 | 79 | npm test 80 | 81 | cleanExit $? 82 | -------------------------------------------------------------------------------- /src/blackrock/gce.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_GCE_H_ 18 | #define BLACKROCK_GCE_H_ 19 | 20 | #include "master.h" 21 | #include 22 | #include 23 | 24 | namespace blackrock { 25 | 26 | class GceDriver: public ComputeDriver { 27 | public: 28 | GceDriver(sandstorm::SubprocessSet& subprocessSet, kj::LowLevelAsyncIoProvider& ioProvider, 29 | GceConfig::Reader config); 30 | ~GceDriver() noexcept(false); 31 | 32 | SimpleAddress getMasterBindAddress() override; 33 | kj::Promise> listMachines() override; 34 | kj::Promise boot(MachineId id) override; 35 | kj::Promise run(MachineId id, VatId::Reader masterVatId, 36 | bool requireRestartProcess) override; 37 | kj::Promise stop(MachineId id) override; 38 | 39 | private: 40 | sandstorm::SubprocessSet& subprocessSet; 41 | kj::LowLevelAsyncIoProvider& ioProvider; 42 | GceConfig::Reader config; 43 | kj::String image; 44 | std::map> vatPaths; 45 | SimpleAddress masterBindAddress; 46 | 47 | LogSink logSink; 48 | kj::Promise logTask; 49 | SimpleAddress logSinkAddress; 50 | 51 | kj::Promise gceCommand(kj::ArrayPtr args, 52 | int stdin = STDIN_FILENO, int stdout = STDOUT_FILENO); 53 | kj::Promise gceCommand(std::initializer_list args, 54 | int stdin = STDIN_FILENO, int stdout = STDOUT_FILENO) { 55 | return gceCommand(kj::arrayPtr(args.begin(), args.size()), stdin, stdout); 56 | } 57 | }; 58 | 59 | } // namespace blackrock 60 | 61 | #endif // BLACKROCK_GCE_H_ 62 | -------------------------------------------------------------------------------- /src/blackrock/logs.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_LOGS_H_ 18 | #define BLACKROCK_LOGS_H_ 19 | 20 | #include "common.h" 21 | #include 22 | #include 23 | 24 | namespace sandstorm { 25 | class Subprocess; 26 | } 27 | 28 | namespace blackrock { 29 | 30 | class SimpleAddress; 31 | 32 | class LogSink: private kj::TaskSet::ErrorHandler { 33 | public: 34 | LogSink(); 35 | 36 | kj::Promise acceptLoop(kj::Own receiver); 37 | 38 | private: 39 | class ClientHandler; 40 | 41 | std::set namesSeen; 42 | 43 | kj::TaskSet tasks; 44 | 45 | void write(kj::ArrayPtr part1, kj::ArrayPtr part2 = nullptr); 46 | // Write a line to the log file, prefixed by a timestamp. 47 | 48 | void taskFailed(kj::Exception&& exception) override; 49 | }; 50 | 51 | void rotateLogs(int input, int logDirFd); 52 | // Read logs on `input` and write them to files in `logDirFd`, rotated to avoid any file becoming 53 | // overly large. 54 | 55 | void runLogClient(kj::StringPtr name, kj::StringPtr logAddressFile, kj::StringPtr backlogDir); 56 | // Reads logs from standard input and upload them to the log sink server, reconnecting to the 57 | // server as needed, buffering logs to a local file when the log server is unreachable. Note that 58 | // some logs may be lost around the moment of a disconnect; this is not intended to be 100% 59 | // reliable, only as reliable as is reasonable. 60 | // 61 | // `logAddressFile` is the name of a file on the hard drive which contains the address (in 62 | // SimpleAddress format). The file is re-read every time a reconnect is attempted. This allows an 63 | // external entity to update the log server address without restarting the process. 64 | 65 | } // namespace blackrock 66 | 67 | #endif // BLACKROCK_LOGS_H_ 68 | -------------------------------------------------------------------------------- /src/blackrock/frontend.capnp: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | @0xfb7fa19ecd585d19; 18 | 19 | $import "/capnp/c++.capnp".namespace("blackrock"); 20 | 21 | using ClusterRpc = import "cluster-rpc.capnp"; 22 | using Util = import "/sandstorm/util.capnp"; 23 | using Package = import "/sandstorm/package.capnp"; 24 | using Supervisor = import "/sandstorm/supervisor.capnp".Supervisor; 25 | using GatewayRouter = import "/sandstorm/backend.capnp".GatewayRouter; 26 | 27 | interface Frontend { 28 | # Front-ends run the Sandstorm shell UI (a Meteor app). They accept HTTP connections proxied 29 | # from the Gateways. 30 | 31 | struct Instance { 32 | replicaNumber @0 :UInt32; 33 | httpAddress @1 :ClusterRpc.Address; 34 | smtpAddress @2 :ClusterRpc.Address; 35 | router @3 :GatewayRouter; 36 | } 37 | 38 | getInstances @0 () -> (instances :List(Instance)); 39 | # A front-end machine may run multiple instances of the Sandstorm Shell server. This method gets 40 | # a list of instances, so that the gateway can consitently route requests from a particular user 41 | # to a particular instance. 42 | } 43 | 44 | interface Mongo { 45 | getConnectionInfo @0 () -> (address :ClusterRpc.Address, username :Text, password :Text); 46 | 47 | # TODO(someday): Support replicas. 48 | } 49 | 50 | struct FrontendConfig { 51 | # Config for shells -- and for gateways, for historical reasons. 52 | 53 | baseUrl @0 :Text; 54 | # Equivalent to BASE_URL from sandstorm.conf. 55 | 56 | wildcardHost @1 :Text; 57 | # Equivalent to WILDCARD_HOST from sandstorm.conf. 58 | 59 | ddpUrl @2 :Text; 60 | # Equivalent to DDP_DEFAULT_CONNECTION_URL from sandstorm.conf. 61 | 62 | mailUrl @3 :Text; 63 | # Equivalent to MAIL_URL from sandstorm.conf. 64 | 65 | allowDemoAccounts @4 :Bool; 66 | # Equivalent to ALLOW_DEMO_ACCOUNTS from sandstorm.conf. 67 | 68 | isTesting @5 :Bool; 69 | # Equivalent to IS_TESTING from sandstorm.conf. 70 | 71 | isQuotaEnabled @13 :Bool = true; 72 | 73 | stripeKey @6 :Text; 74 | stripePublicKey @7 :Text; 75 | outOfBeta @12 :Bool; 76 | 77 | mailchimpKey @10 :Text; 78 | mailchimpListId @11 :Text; 79 | 80 | allowUninvited @8 :Bool; 81 | 82 | replicasPerMachine @9 :UInt32; 83 | 84 | privateKeyPassword @14 :Text; 85 | termsPublicId @15 :Text; 86 | } 87 | -------------------------------------------------------------------------------- /src/blackrock/storage-schema.capnp: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | @0xf1a0240d9d1e831b; 18 | # Main storage schemas for Blackrock. 19 | 20 | $import "/capnp/c++.capnp".namespace("blackrock"); 21 | 22 | using Storage = import "storage.capnp"; 23 | using OwnedAssignable = Storage.OwnedAssignable; 24 | using OwnedVolume = Storage.OwnedVolume; 25 | using Supervisor = import "/sandstorm/supervisor.capnp".Supervisor; 26 | using Package = import "/sandstorm/package.capnp"; 27 | 28 | struct AccountStorage { 29 | # TODO(someday): 30 | # - Basic metadata. 31 | # - Quota etc. 32 | # - Opaque collection of received capabilities. 33 | 34 | grains @0 :List(GrainInfo); 35 | # All grains owned by the user. 36 | # 37 | # TODO(perf): Use a Collection here, when they are implemented. 38 | 39 | struct GrainInfo { 40 | id @0 :Text; 41 | state @1 :OwnedAssignable(GrainState); 42 | } 43 | } 44 | 45 | struct GatewayStorage { 46 | # TODO(someday): 47 | # - Incoming and outgoing SturdyRefs. 48 | } 49 | 50 | struct PackageStorage { 51 | volume @0 :OwnedVolume; 52 | appId @1 :Text; 53 | manifest @2 :Package.Manifest; 54 | authorPgpKeyFingerprint @3 :Text; 55 | } 56 | 57 | struct GrainState { 58 | union { 59 | inactive @0 :Void; 60 | # No worker is currently assigned to this grain. 61 | 62 | active @1 :Supervisor; 63 | # This grain is currently running on a worker machine. 64 | # 65 | # Upon loading the `GrainState` from storage and finding `active` is set, the first thing you 66 | # should do is call `keepAlive()` on this capability. If that fails or times out, then it 67 | # would appear that the grain is no longer running. Now we get into a complicated situation 68 | # where it's necessary to either convince the worker holding the grain to give it up or revoke 69 | # that worker's access to the grain state and volume entirely, but hopefully this is 70 | # infrequent. 71 | } 72 | 73 | volume @2 :OwnedVolume; 74 | 75 | savedCaps @3 :List(SavedCap); 76 | # TODO(perf): Use a Collection here, when they are implemented. 77 | 78 | struct SavedCap { 79 | token @0 :Data; 80 | # Token given to the app. (We can't give the app the raw SturdyRef because it contains the 81 | # encryption key which means the bits are powerful outside the context of the app.) 82 | 83 | cap @1 :Capability; 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Because Blackrock messes with kernel knobs that can break the system (like 18 | # nbd), it's preferable to run non-unit tests inside a VM with Vagrant. 19 | # 20 | # Cheat sheet: 21 | # vagrant up Initializes and starts a VM, with the source directory 22 | # mapped read-only at /blackrock. 23 | # vagrant ssh SSHes into the VM. 24 | # vagrant destroy Shuts down and deletes the VM. I recommend this over 25 | # `vagrant halt` to keep your dev environment clean. 26 | 27 | VAGRANTFILE_API_VERSION = "2" 28 | 29 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| 30 | config.vm.box = "debian/contrib-stretch64" 31 | 32 | # We build Blackrock outside of Vagrant, so there's no reason for the VM 33 | # to be modifying the source directory. Mount it read-only. 34 | config.vm.synced_folder ".", "/blackrock", type: "nfs", :mount_options => ["ro"] 35 | 36 | # The directory ".local" should contain two ext4 disk images: "storage" 37 | # and "mongo". Server state will be stored in these so that 38 | # "vagrant destroy"ing all VMs and bringing them back up doesn't mean 39 | # wiping storage. To create this directory, do something like: 40 | # 41 | # mkdir .local 42 | # truncate -s 10737418240 .local/storage 43 | # truncate -s 10737418240 .local/mongo 44 | # /sbin/mkfs.ext4 .local/storage 45 | # /sbin/mkfs.ext4 .local/mongo 46 | config.vm.synced_folder ".local", "/blackrock-local", type: "nfs" 47 | 48 | # Don't check for image updates on every run; could be slow. 49 | config.vm.box_check_update = false 50 | 51 | config.vm.provider "virtualbox" do |v| 52 | v.memory = 1024 53 | v.cpus = 1 54 | end 55 | 56 | config.vm.define "storage0" do |storage0| 57 | storage0.vm.network "private_network", ip: "172.28.128.10" 58 | 59 | storage0.vm.provision "shell", 60 | inline: "mkdir -p /var/blackrock/storage && mount /blackrock-local/storage /var/blackrock/storage", 61 | run: "always" 62 | end 63 | config.vm.define "worker0" do |worker0| 64 | worker0.vm.network "private_network", ip: "172.28.128.20" 65 | end 66 | config.vm.define "worker1" do |worker1| 67 | worker1.vm.network "private_network", ip: "172.28.128.21" 68 | end 69 | config.vm.define "worker2" do |worker2| 70 | worker2.vm.network "private_network", ip: "172.28.128.22" 71 | end 72 | config.vm.define "worker3" do |worker3| 73 | worker3.vm.network "private_network", ip: "172.28.128.23" 74 | end 75 | config.vm.define "coordinator0" do |coordinator0| 76 | coordinator0.vm.network "private_network", ip: "172.28.128.30" 77 | end 78 | config.vm.define "frontend0" do |frontend0| 79 | frontend0.vm.network "private_network", ip: "172.28.128.60" 80 | end 81 | config.vm.define "frontend1" do |frontend1| 82 | frontend1.vm.network "private_network", ip: "172.28.128.61" 83 | end 84 | config.vm.define "mongo0" do |mongo0| 85 | mongo0.vm.network "private_network", ip: "172.28.128.50" 86 | 87 | mongo0.vm.provision "shell", 88 | inline: "mkdir -p /var/blackrock/bundle && mount /blackrock-local/mongo /var/blackrock/bundle", 89 | run: "always" 90 | end 91 | config.vm.define "gateway0" do |gateway0| 92 | gateway0.vm.network "private_network", ip: "172.28.128.40" 93 | end 94 | end 95 | -------------------------------------------------------------------------------- /src/blackrock/local-persistent-registry.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_LOCAL_PERSISTENT_REGISTRY_H_ 18 | #define BLACKROCK_LOCAL_PERSISTENT_REGISTRY_H_ 19 | 20 | #include "common.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | namespace blackrock { 27 | 28 | class LocalPersistentRegistry { 29 | // Class which manages the set of persistent capabilities hosted by this vat which, when saved, 30 | // will use the "Transient" SturdyRef type; i.e. these capabilities are specific to this vat and 31 | // won't continue to exist once the process exits. 32 | // 33 | // Typically a LocalPersistentRegistry& should be passed around to any component that needs to be 34 | // able to make its capabilities persistent. The LocalPersistentRegistry's scope should match the 35 | // RpcSystem. 36 | 37 | struct SavedRef; 38 | class PersistentImpl; 39 | class RestorerImpl; 40 | 41 | struct DataHash { 42 | inline size_t operator()(capnp::Data::Reader r) const { 43 | // The keys in the map are randomly-generated so the hash might as well be the prefix bytes. 44 | size_t result = 0; 45 | memcpy(&result, r.begin(), kj::min(r.size(), sizeof(result))); 46 | return result; 47 | } 48 | }; 49 | 50 | public: 51 | LocalPersistentRegistry(VatPath::Reader thisVatPath): thisVatPath(thisVatPath) {} 52 | 53 | class Registration { 54 | public: 55 | Registration(LocalPersistentRegistry& registry, capnp::Capability::Client cap); 56 | 57 | KJ_DISALLOW_COPY(Registration); 58 | ~Registration() noexcept(false); 59 | // Dropping the registration invalidates all saved SturdyRefs. Calls to save() will still 60 | // succeed but return tokens that don't work (as if save() had been called just before the 61 | // deregistration). 62 | 63 | Persistent::Client getWrapped(); 64 | // Get a capability which forwards all calls to the original except for save() which is handled 65 | // by the LocalPersistentRegistry. 66 | 67 | private: 68 | LocalPersistentRegistry& registry; 69 | kj::Own wrapped; 70 | std::set savedRefs; 71 | friend class LocalPersistentRegistry; 72 | }; 73 | 74 | kj::Own makePersistent(capnp::Capability::Client cap); 75 | // Wraps the capability in a wrapper that implements save() by returning a transient SturdyRef. 76 | 77 | Restorer::Client createRestorerFor(VatPath::Reader clientId); 78 | // Create a Restorer to be used by the given authenticated client. 79 | 80 | private: 81 | VatPath::Reader thisVatPath; 82 | 83 | struct SavedRef { 84 | explicit SavedRef(Registration& registration); 85 | ~SavedRef() noexcept(false); 86 | 87 | Registration& registration; 88 | byte token[16]; 89 | 90 | // TODO(security): Track the ref's owner. This is easy enough when the owner is another vat, 91 | // but if it's e.g. the storage system then we don't really have a good way to authenticate 92 | // that here. 93 | }; 94 | 95 | std::unordered_map, DataHash> savedRefs; 96 | }; 97 | 98 | } // namespace blackrock 99 | 100 | #endif // BLACKROCK_LOCAL_PERSISTENT_REGISTRY_H_ 101 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Blackrock's Makefile augments Sandstorm's. 18 | 19 | # You may override the following vars on the command line to suit 20 | # your config. 21 | CC=clang 22 | CXX=clang++ 23 | CFLAGS=-O2 -g -Wall 24 | CXXFLAGS=$(CFLAGS) 25 | BUILD=0 26 | PARALLEL=$(shell nproc) 27 | 28 | .PHONY: all fast clean continuous deps update-deps 29 | 30 | define color 31 | printf '\033[0;34m==== $1 ====\033[0m\n' 32 | endef 33 | 34 | all: blackrock.tar.xz 35 | 36 | fast: blackrock-fast.tar.xz 37 | 38 | clean: 39 | rm -rf blackrock*.tar.xz local-config 40 | make -f deps/sandstorm/Makefile clean 41 | 42 | continuous: tmp/.deps 43 | make -f deps/sandstorm/Makefile continuous 44 | 45 | bundle: tmp/.deps 46 | make -f deps/sandstorm/Makefile bundle 47 | 48 | bin/e2fsck: tmp/e2fsprogs/e2fsck/e2fsck check-e2fsprogs.sh 49 | ./check-e2fsprogs.sh 50 | 51 | shell-env: tmp/.deps 52 | make -f deps/sandstorm/Makefile shell-env 53 | 54 | deps: tmp/.deps 55 | 56 | tmp/.deps: deps/sandstorm 57 | cd deps/sandstorm && make deps 58 | @mkdir -p tmp 59 | @touch tmp/.deps 60 | 61 | deps/sandstorm: 62 | @$(call color,downloading sandstorm) 63 | @mkdir -p deps 64 | git clone https://github.com/sandstorm-io/sandstorm.git deps/sandstorm 65 | 66 | deps/e2fsprogs: 67 | @$(call color,downloading e2fsprogs) 68 | @mkdir -p deps 69 | git clone https://github.com/tytso/e2fsprogs.git deps/e2fsprogs 70 | 71 | tmp/e2fsprogs/e2fsck/e2fsck: deps/e2fsprogs 72 | @$(call color,build e2fsprogs) 73 | @mkdir -p tmp/e2fsprogs 74 | cd tmp/e2fsprogs && ../../deps/e2fsprogs/configure CFLAGS='-Os -DEXT2_SKIP_UUID' LDFLAGS='-static' && make -j$(PARALLEL) 75 | 76 | update-deps: 77 | @$(call color,updating sandstorm) 78 | @cd deps/sandstorm && echo "pulling sandstorm..." && git pull && make update-deps 79 | 80 | bin/blackrock.unstripped: bundle 81 | @ # TODO(cleanup): This is ugly. 82 | @$(call color,strip binaries) 83 | @cp bin/blackrock bin/blackrock.unstripped 84 | @strip bin/blackrock 85 | 86 | blackrock.tar.xz: bundle bin/e2fsck bin/blackrock.unstripped 87 | @$(call color,compress release bundle) 88 | @tar c --transform="s,^,blackrock/,S" bin/blackrock bin/e2fsck bin/tune2fs bin/resize2fs bundle | xz -c -9e > blackrock.tar.xz 89 | 90 | blackrock-fast.tar.xz: bundle bin/e2fsck bin/blackrock.unstripped 91 | @$(call color,compress fast bundle) 92 | @tar c --transform="s,^,blackrock/,S" bin/blackrock bin/e2fsck bin/tune2fs bin/resize2fs bundle | xz -c -0 > blackrock-fast.tar.xz 93 | 94 | # ======================================================================================== 95 | # Local testing 96 | 97 | .local/mongo: 98 | @mkdir -p .local 99 | truncate -s 10737418240 .local/mongo 100 | /sbin/mkfs.ext4 .local/mongo 101 | 102 | .local/storage: 103 | @mkdir -p .local 104 | truncate -s 10737418240 .local/storage 105 | /sbin/mkfs.ext4 .local/storage 106 | 107 | local-config: test-config.capnp 108 | capnp eval --binary -Isrc test-config.capnp vagrant > local-config 109 | 110 | run-local: bundle bin/e2fsck local-config .local/mongo .local/storage 111 | # We need to bring up one VM in advance to make the vboxnet0 network interface appear. 112 | (vagrant status --machine-readable | grep -q 'storage0,state,running') || vagrant up storage0 113 | bin/blackrock master local-config -r 114 | 115 | kill-local: 116 | vagrant destroy -f 117 | 118 | local-mongo: 119 | mongo -u sandstorm --password="$$(vagrant ssh mongo0 -c 'cat /var/blackrock/bundle/mongo/passwd')" --authenticationDatabase admin 172.28.128.50/meteor 120 | 121 | local-admintoken: 122 | vagrant ssh frontend0 -c 'echo -n testtoken > /var/blackrock/bundle/sandstorm/adminToken' 123 | @echo "Now go to: http://localrock.sandstorm.io:6080/setup/token/testtoken" 124 | -------------------------------------------------------------------------------- /release.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | set -euo pipefail 4 | 5 | if [ $# -lt 1 ]; then 6 | echo "usage: $0 test|prod [-n|-m]" >&2 7 | exit 1 8 | fi 9 | 10 | case $1 in 11 | test ) 12 | GCE_PROJECT=sandstorm-blackrock-testing 13 | export CLOUDSDK_COMPUTE_ZONE=us-central1-f 14 | BUILD=0 15 | BUILDSTAMP=$(date -u +%Y%m%d-%H%M%S) 16 | ;; 17 | prod ) 18 | GCE_PROJECT=sandstorm-oasis 19 | export CLOUDSDK_COMPUTE_ZONE=us-central1-c 20 | 21 | # We always do a Blackrock prod release shortly after a Sandstorm release. 22 | BUILD=$(curl -s https://install.sandstorm.io/dev) 23 | BUILDSTAMP=$BUILD-$(date -u +%Y%m%d-%H%M%S) 24 | 25 | if (grep -r KJ_DBG src/* | egrep -v '/(debug(-test)?|exception)[.]'); then 26 | echo '*** Error: There are instances of KJ_DBG in the code.' >&2 27 | exit 1 28 | fi 29 | 30 | if egrep -r 'TODO\(now\)' src/*; then 31 | echo '*** Error: There are release-blocking TODOs in the code.' >&2 32 | exit 1 33 | fi 34 | 35 | if [ "x$(git status --porcelain)" != "x" ]; then 36 | echo "Please commit changes to git before releasing." >&2 37 | exit 1 38 | fi 39 | ;; 40 | * ) 41 | echo "no such target: $1" >&2 42 | exit 1 43 | ;; 44 | esac 45 | 46 | shift 47 | 48 | DRY_RUN=no 49 | CONFIRM_EACH=no 50 | HOTFIX=no 51 | 52 | while [ $# -gt 0 ]; do 53 | case $1 in 54 | -n ) 55 | DRY_RUN=yes 56 | ;; 57 | -m ) 58 | CONFIRM_EACH=yes 59 | ;; 60 | -h ) 61 | HOTFIX=yes 62 | ;; 63 | * ) 64 | echo "unknown arg: $1" >&2 65 | exit 1 66 | ;; 67 | esac 68 | shift 69 | done 70 | 71 | gce() { 72 | gcloud --project=$GCE_PROJECT compute "$@" 73 | } 74 | 75 | doit() { 76 | local ANSWER 77 | if [ "$CONFIRM_EACH" != "no" ]; then 78 | printf "\033[0;33m=== RUN? %s ===\033[0m" "$*" 79 | read -sn 1 ANSWER 80 | if [ -z "$ANSWER" ]; then 81 | printf "\r\033[K" 82 | else 83 | printf "\033[0;31m\r=== SKIPPED: %s ===\033[0m\n" "$*" 84 | return 85 | fi 86 | fi 87 | 88 | printf "\033[0;35m=== %s ===\033[0m\n" "$*" 89 | 90 | if [ "$DRY_RUN" = "no" ]; then 91 | "$@" 92 | fi 93 | } 94 | 95 | doit make clean BUILD=$BUILD 96 | doit make BUILD=$BUILD 97 | 98 | if [ "$HOTFIX" = "yes" ]; then 99 | FRONTENDS=$(gce instances list --format=text | grep '^name:' | sed -e 's/^name: *//g' | grep '^frontend' | grep -v 'frontend0$') 100 | FRONTENDS="$FRONTENDS frontend0" 101 | GATEWAYS=$(gce instances list --format=text | grep '^name:' | sed -e 's/^name: *//g' | grep '^gateway') 102 | MACHINES="$FRONTENDS $GATEWAYS" 103 | 104 | for MACHINE in $MACHINES; do 105 | doit gce copy-files blackrock.tar.xz "root@$MACHINE:/root" 106 | done 107 | 108 | for FRONTEND in $FRONTENDS; do 109 | doit gce ssh "root@$FRONTEND" --command 'cd /root && rm -rf blackrock /blackrock/bundle.new && tar Jxof blackrock.tar.xz && mv blackrock/bundle /blackrock/bundle.new && cd /blackrock && mv bundle bundle.$(date -u +%Y%m%d-%H%M%S) && mv bundle.new bundle' 110 | done 111 | for GATEWAY in $GATEWAYS; do 112 | doit gce ssh "root@$GATEWAY" --command 'cd /root && rm -rf blackrock /blackrock/bundle.new && tar Jxof blackrock.tar.xz && mv blackrock/bin/blackrock /blackrock/bin/blackrock.new && cd /blackrock/bin && mv blackrock blackrock.$(date -u +%Y%m%d-%H%M%S) && mv blackrock.new blackrock' 113 | done 114 | 115 | for FRONTEND in $FRONTENDS; do 116 | doit gce ssh "root@$FRONTEND" --command 'kill $(pidof node)' 117 | done 118 | for GATEWAY in $GATEWAYS; do 119 | doit gce ssh "root@$GATEWAY" --command 'kill -9 $(ps ax | grep blackrock | grep slave | awk "{print \$1}")' 120 | done 121 | 122 | exit 0 123 | fi 124 | 125 | # Keep unstripped binary for debugging. 126 | mkdir -p dbg 127 | cp bin/blackrock.unstripped dbg/blackrock-$BUILDSTAMP 128 | 129 | # Create a new image. 130 | doit gce instances create build --image blackrock-240-20181020-200157 131 | doit sleep 10 # make sure instance is up 132 | doit gce ssh build --command 'sudo sed -i -e "s/PermitRootLogin no/PermitRootLogin without-password/g" /etc/ssh/sshd_config; sudo service ssh restart' 133 | doit gce copy-files blackrock.tar.xz root@build:/ 134 | doit gce ssh root@build --command "cd / && rm -rf /blackrock && tar Jxof blackrock.tar.xz && rm /blackrock.tar.xz" 135 | doit gce instances delete build -q --keep-disks boot 136 | doit gce images create blackrock-$BUILDSTAMP --source-disk build 137 | doit gce disks delete -q build 138 | 139 | # Also upload to master. 140 | doit gce copy-files bin/blackrock root@master:/blackrock/bin/blackrock-$BUILDSTAMP 141 | -------------------------------------------------------------------------------- /src/blackrock/sparse-data.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | namespace blackrock { 27 | 28 | class SparseDataMain { 29 | // Main class for a simple program that produces a SparseData from an input sparse file. 30 | // The output is written as a single-segment message (no leading segment table). 31 | 32 | public: 33 | SparseDataMain(kj::ProcessContext& context): context(context) {} 34 | 35 | kj::MainFunc getMain() { 36 | return kj::MainBuilder(context, "unknown version", 37 | "Given a sparse file, output (on stdout) a blackrock::SparseData " 38 | "Cap'n Proto representation of the file content.") 39 | .expectArg("", KJ_BIND_METHOD(*this, run)) 40 | .build(); 41 | } 42 | 43 | kj::MainBuilder::Validity run(kj::StringPtr arg) { 44 | auto fd = sandstorm::raiiOpen(arg, O_RDONLY | O_CLOEXEC); 45 | 46 | capnp::MallocMessageBuilder message(1 << 17); // start with 1MB 47 | auto root = message.getRoot(); 48 | auto orphanage = message.getOrphanage(); 49 | 50 | kj::Vector chunks; 51 | Chunk chunk; 52 | chunk.offset = 0; 53 | 54 | kj::byte block[4096]; 55 | 56 | off_t offset = 0; 57 | for (;;) { 58 | retry: 59 | offset = lseek(fd, offset, SEEK_DATA); 60 | if (offset < 0) { 61 | int error = errno; 62 | if (error == EINTR) { 63 | goto retry; 64 | } else if (error == ENXIO) { 65 | // reached EOF 66 | break; 67 | } else { 68 | KJ_FAIL_SYSCALL("lseek", error); 69 | } 70 | } 71 | 72 | KJ_ASSERT(offset % sizeof(block) == 0); 73 | 74 | size_t n = kj::FdInputStream(fd.get()).tryRead(block, sizeof(block), sizeof(block)); 75 | KJ_ASSERT(n > 0); 76 | 77 | KJ_LOG(INFO, kj::hex((uint64_t)offset / sizeof(block)), kj::encodeHex(block)); 78 | 79 | for (kj::byte b: block) { 80 | if (b != 0) { 81 | // This block has non-zero bytes. We need to add it to the results. Note that we write 82 | // a whole block even if it contains runs of zeros because block-aligned writes probably 83 | // will make our main use case (initializing ext4 block devices) more efficient. 84 | if (chunk.data == nullptr) { 85 | newChunk: 86 | chunk.offset = offset; 87 | chunk.data = orphanage.newOrphanCopy(capnp::Data::Reader(block, n)); 88 | } else { 89 | size_t chunkSize = chunk.data.getReader().size(); 90 | if (chunk.offset + chunkSize == offset) { 91 | // Extend the chunk. 92 | chunk.data.truncate(chunkSize + n); 93 | memcpy(chunk.data.get().begin() + chunkSize, block, n); 94 | } else { 95 | // Start new chunk. 96 | chunks.add(kj::mv(chunk)); 97 | goto newChunk; 98 | } 99 | } 100 | break; 101 | } 102 | } 103 | 104 | offset += n; 105 | } 106 | 107 | if (chunk.data != nullptr) { 108 | chunks.add(kj::mv(chunk)); 109 | } 110 | 111 | auto list = root.initChunks(chunks.size()); 112 | for (auto i: kj::indices(chunks)) { 113 | auto chunkBuilder = list[i]; 114 | chunkBuilder.setOffset(chunks[i].offset); 115 | chunkBuilder.adoptData(kj::mv(chunks[i].data)); 116 | } 117 | 118 | capnp::writeMessageToFd(STDOUT_FILENO, message); 119 | 120 | return true; 121 | } 122 | 123 | private: 124 | kj::ProcessContext& context; 125 | 126 | struct Chunk { 127 | uint64_t offset; 128 | capnp::Orphan data; 129 | }; 130 | }; 131 | 132 | } // namespace blackrock 133 | 134 | KJ_MAIN(blackrock::SparseDataMain) 135 | -------------------------------------------------------------------------------- /src/blackrock/master.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_MASTER_H_ 18 | #define BLACKROCK_MASTER_H_ 19 | 20 | #include "common.h" 21 | #include "cluster-rpc.h" 22 | #include 23 | #include 24 | #include 25 | #include "logs.h" 26 | 27 | namespace sandstorm { 28 | class SubprocessSet; 29 | } 30 | 31 | namespace blackrock { 32 | 33 | class ComputeDriver { 34 | public: 35 | enum class MachineType { 36 | STORAGE, 37 | WORKER, 38 | COORDINATOR, 39 | FRONTEND, 40 | MONGO, 41 | GATEWAY 42 | }; 43 | 44 | struct MachineId { 45 | MachineType type; 46 | uint index; 47 | 48 | inline bool operator==(const MachineId& other) const { 49 | return type == other.type && index == other.index; 50 | } 51 | inline bool operator<(const MachineId& other) const { 52 | return type < other.type ? true : 53 | type > other.type ? false : 54 | index < other.index; 55 | } 56 | 57 | kj::String toString() const; 58 | // Makes reasonable hostnames. E.g. { STORAGE, 123 } becomes "storage123". 59 | 60 | MachineId() = default; 61 | inline MachineId(MachineType type, uint index): type(type), index(index) {} 62 | MachineId(kj::StringPtr name); 63 | // Parses results of toString(). 64 | }; 65 | 66 | struct MachineStatus { 67 | MachineId id; 68 | kj::Maybe path; 69 | // Current path, or null if not powered up. Path remains valid until halt() or destroy() is 70 | // called on the machine. 71 | }; 72 | 73 | virtual SimpleAddress getMasterBindAddress() = 0; 74 | // Get the address at which other machines in the cluster will see the master (i.e. this) 75 | // machine. 76 | 77 | virtual kj::Promise> listMachines() KJ_WARN_UNUSED_RESULT = 0; 78 | // List all machines currently running in the cluster. 79 | 80 | virtual kj::Promise boot(MachineId id) = 0; 81 | // Boot the given machine. 82 | 83 | virtual kj::Promise run(MachineId id, 84 | VatId::Reader masterVatId, bool requireRestartProcess) KJ_WARN_UNUSED_RESULT = 0; 85 | // Run the Blackrock process on the given machine. If `requireRestartProcess` is true, 86 | // then all blackrock processes on the machine should be immediately terminated and restarted. 87 | // Depending on the driver, this may or may not have the effect of updating the binary to the 88 | // latest version. Note that `requireRestartProcess` is often much faster than stop() followed 89 | // by boot() and run(), but not as reliable. 90 | 91 | virtual kj::Promise stop(MachineId id) KJ_WARN_UNUSED_RESULT = 0; 92 | // Shut down the given machine. 93 | }; 94 | 95 | void runMaster(kj::AsyncIoContext& ioContext, ComputeDriver& driver, MasterConfig::Reader config, 96 | bool shouldRestart, kj::ArrayPtr machinesToRestart); 97 | 98 | class VagrantDriver: public ComputeDriver { 99 | public: 100 | VagrantDriver(sandstorm::SubprocessSet& subprocessSet, kj::LowLevelAsyncIoProvider& ioProvider); 101 | ~VagrantDriver() noexcept(false); 102 | 103 | SimpleAddress getMasterBindAddress() override; 104 | kj::Promise> listMachines() override; 105 | kj::Promise boot(MachineId id) override; 106 | kj::Promise run(MachineId id, VatId::Reader masterVatId, 107 | bool requireRestartProcess) override; 108 | kj::Promise stop(MachineId id) override; 109 | 110 | private: 111 | sandstorm::SubprocessSet& subprocessSet; 112 | kj::LowLevelAsyncIoProvider& ioProvider; 113 | std::map> vatPaths; 114 | SimpleAddress masterBindAddress; 115 | 116 | LogSink logSink; 117 | kj::Promise logTask; 118 | SimpleAddress logSinkAddress; 119 | 120 | kj::Promise bootQueue = kj::READY_NOW; 121 | }; 122 | 123 | } // namespace blackrock 124 | 125 | #endif // BLACKROCK_MASTER_H_ 126 | -------------------------------------------------------------------------------- /src/blackrock/fs-storage.capnp: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | @0xfc40bcbedafbe11c; 18 | # An implementation of the Storage interfaces based on a standard filesystem. 19 | # 20 | # TODO(doc): The following is outdated! 21 | # 22 | # All objects are stored in a massive directory with filenames like: 23 | # o: object content 24 | # c: list of object IDs that should be deleted if this object is deleted. This list 25 | # is generally append-only and so can contain IDs that no longer exist; those 26 | # should be ignored when deleting. 27 | # 28 | # In all cases above, an is a 16-byte value base64-encoded to 22 bytes (with '-' and '_' as 29 | # digits 62 and 63). Note that ext4 directory entries are 8 + name_len bytes, rounded up to a 30 | # multiple of 4, with no NUL terminator stored. Since our filenames are 23 bytes (including prefix 31 | # character), each directory entry comes out to 32 bytes (31 rounded up). That seems sort of nice? 32 | # 33 | # A second directory, called "staging", stores files whose names consist of exactly 16 hex digits, 34 | # and which are intended to be rename()ed into place later on. Files in staging exist only for 35 | # their content. If that content includes outgoing owned references, the target objects are either 36 | # in staging themselves or are owned by some non-staging objects and are scheduled to have owneship 37 | # transferred in an upcoming transaction. In other words, when deleting an object out of staging, 38 | # it does NOT make sense to recursively delete its children. 39 | # 40 | # A third directory, called "deathrow", contains objects scheduled for recursive deletion. Objects 41 | # here used to be under the main directory, but have been deleted. Before actually deleting the 42 | # file, it is necessary to move all of its children into "deathrow". This process of recursive 43 | # deletion can occur in a separate thread (or process!) so that deep deletions do not block other 44 | # tasks. 45 | 46 | $import "/capnp/c++.capnp".namespace("blackrock"); 47 | using Storage = import "storage.capnp"; 48 | using SturdyRef = import "cluster-rpc.capnp".SturdyRef; 49 | 50 | struct StoredObjectId { 51 | # 16-byte ID of the object. This is calculated as the 16-byte blake2b hash of the object key. 52 | 53 | id0 @0 :UInt64; 54 | id1 @1 :UInt64; 55 | } 56 | 57 | struct StoredObjectKey { 58 | # Key to decrypt an object. 59 | 60 | key0 @0 :UInt64; 61 | key1 @1 :UInt64; 62 | key2 @2 :UInt64; 63 | key3 @3 :UInt64; 64 | } 65 | 66 | struct StoredIncomingRef { 67 | # Stored in `ref/`, where is the base64('+','_') of the 16-byte blake2b hash of 68 | # the ref key (the 32-byte key stored in the SturdyRef). Encrypted by the ref key. 69 | 70 | owner @0 :SturdyRef.Owner; 71 | # Who is allowed to restore this ref? 72 | 73 | key @1 :StoredObjectKey; 74 | # Key to the object. 75 | } 76 | 77 | struct StoredChildIds { 78 | # A stored `Assignable` or `Immutable` object file contains two Cap'n Proto messages: 79 | # StoredChildIds followed by StoredObject. The latter could be encrypted. 80 | 81 | children @0 :List(StoredObjectId); 82 | # List of owned children of this object. If this object is deleted, all children should be 83 | # deleted as well. 84 | } 85 | 86 | struct StoredObject { 87 | # A stored `Assignable` or `Immutable` object file contains two Cap'n Proto messages: 88 | # StoredChildIds followed by StoredObject. The latter could be encrypted. 89 | 90 | capTable @0 :List(CapDescriptor); 91 | payload @1 :AnyPointer; 92 | 93 | struct CapDescriptor { 94 | union { 95 | none @0 :Void; 96 | # Null. (But `null` is not a good variable name due to macro conflicts.) 97 | 98 | child @1 :StoredObjectKey; 99 | # This points to an owned child object. 100 | 101 | external @2 :SturdyRef; 102 | # A remote capability. (Could point back to storage, but the object isn't owned by us.) 103 | } 104 | } 105 | } 106 | 107 | struct StoredRoot { 108 | # A root object. 109 | 110 | key @0 :StoredObjectKey; 111 | } 112 | -------------------------------------------------------------------------------- /src/blackrock/frontend.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_FRONTEND_H_ 18 | #define BLACKROCK_FRONTEND_H_ 19 | 20 | #include "common.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include "backend-set.h" 32 | #include "cluster-rpc.h" 33 | 34 | namespace blackrock { 35 | 36 | class FrontendImpl: public Frontend::Server { 37 | public: 38 | FrontendImpl(kj::LowLevelAsyncIoProvider& llaiop, 39 | sandstorm::SubprocessSet& subprocessSet, 40 | FrontendConfig::Reader config, uint replicaNumber, 41 | SimpleAddress bindAddress); 42 | 43 | void setConfig(FrontendConfig::Reader config); 44 | 45 | BackendSet::Client getStorageRootBackendSet(); 46 | BackendSet::Client getStorageFactoryBackendSet(); 47 | BackendSet::Client getWorkerBackendSet(); 48 | BackendSet::Client getMongoBackendSet(); 49 | 50 | protected: 51 | kj::Promise getInstances(GetInstancesContext context) override; 52 | 53 | private: 54 | class BackendImpl; 55 | struct MongoInfo; 56 | class Instance; 57 | 58 | kj::Own configMessage; 59 | FrontendConfig::Reader config; 60 | 61 | kj::Own> storageRoots; 62 | kj::Own> storageFactories; 63 | kj::Own> workers; 64 | kj::Own> mongos; 65 | 66 | kj::Vector> instances; 67 | 68 | class Instance: private kj::TaskSet::ErrorHandler { 69 | public: 70 | Instance(FrontendImpl& frontend, kj::LowLevelAsyncIoProvider& llaiop, 71 | sandstorm::SubprocessSet& subprocessSet, uint frontendNumber, uint instanceNumber, 72 | SimpleAddress bindAddress, 73 | kj::PromiseFulfillerPair paf = 74 | kj::newPromiseAndFulfiller()); 75 | 76 | void restart(FrontendConfig::Reader config); 77 | 78 | void getInfo(Frontend::Instance::Builder info); 79 | 80 | private: 81 | kj::Timer& timer; 82 | sandstorm::SubprocessSet& subprocessSet; 83 | FrontendConfig::Reader config; 84 | uint replicaNumber; 85 | uint httpPort; 86 | uint smtpPort; 87 | SimpleAddress bindAddress; 88 | 89 | sandstorm::TwoPartyServerWithClientBootstrap capnpServer; 90 | pid_t pid = 0; 91 | kj::TaskSet tasks; 92 | 93 | kj::Promise startExecLoop(MongoInfo&& mongoInfo, kj::AutoCloseFd&& backendClientFd); 94 | 95 | kj::Promise execLoop(MongoInfo&& mongoInfo, kj::AutoCloseFd&& http, 96 | kj::AutoCloseFd&& backendClientFd, kj::AutoCloseFd&& smtp); 97 | 98 | void taskFailed(kj::Exception&& exception) override; 99 | }; 100 | }; 101 | 102 | class MongoImpl: public Mongo::Server { 103 | public: 104 | explicit MongoImpl( 105 | kj::Timer& timer, sandstorm::SubprocessSet& subprocessSet, SimpleAddress bindAddress, 106 | kj::PromiseFulfillerPair passwordPaf = kj::newPromiseAndFulfiller()); 107 | 108 | protected: 109 | kj::Promise getConnectionInfo(GetConnectionInfoContext context) override; 110 | 111 | private: 112 | kj::Timer& timer; 113 | sandstorm::SubprocessSet& subprocessSet; 114 | SimpleAddress bindAddress; 115 | kj::Maybe password; 116 | kj::ForkedPromise passwordPromise; 117 | kj::Promise execTask; 118 | 119 | kj::Promise startExecLoop(kj::Own> passwordFulfiller); 120 | kj::Promise execLoop(kj::PromiseFulfiller& passwordFulfiller); 121 | kj::Promise initializeMongo(); 122 | kj::Promise mongoCommand(kj::String command, kj::StringPtr dbName = "meteor"); 123 | }; 124 | 125 | } // namespace blackrock 126 | 127 | #endif // BLACKROCK_FRONTEND_H_ 128 | -------------------------------------------------------------------------------- /src/blackrock/logs-tester.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #include "logs.h" 18 | #include 19 | #include 20 | #include 21 | #include "cluster-rpc.h" 22 | 23 | namespace blackrock { 24 | 25 | class LogsTester { 26 | // A test program for the logging system. 27 | 28 | public: 29 | LogsTester(kj::ProcessContext& context): context(context) {} 30 | 31 | kj::MainFunc getMain() { 32 | return kj::MainBuilder(context, "Blackrock logs tester", "Tests logs.") 33 | .addSubCommand("server", KJ_BIND_METHOD(*this, getServerMain), "run a logs server") 34 | .addSubCommand("client", KJ_BIND_METHOD(*this, getClientMain), "run a logs client") 35 | .addSubCommand("fake", KJ_BIND_METHOD(*this, getFakeMain), "run a fake log server") 36 | .build(); 37 | } 38 | 39 | kj::MainFunc getServerMain() { 40 | return kj::MainBuilder(context, "Blackrock logs tester", 41 | "Runs a log server locally and arranges for clients to be able " 42 | "to connect to it. Prints all logs to stdout unless a log directory " 43 | "is provided.") 44 | .addOptionWithArg({'d', "dir"}, KJ_BIND_METHOD(*this, setLogDir), "", 45 | "save logs to a directory") 46 | .callAfterParsing(KJ_BIND_METHOD(*this, runServer)) 47 | .build(); 48 | } 49 | 50 | kj::MainFunc getClientMain() { 51 | return kj::MainBuilder(context, "Blackrock logs tester", 52 | "Runs a client with the given name connecting to the local server. " 53 | "Whatever you enter on stdin will be logged.") 54 | .expectArg("", KJ_BIND_METHOD(*this, setName)) 55 | .callAfterParsing(KJ_BIND_METHOD(*this, runClient)) 56 | .build(); 57 | } 58 | 59 | kj::MainFunc getFakeMain() { 60 | return kj::MainBuilder(context, "Blackrock logs tester", 61 | "Runs a fake server that closes connections immediately upon receipt.") 62 | .callAfterParsing(KJ_BIND_METHOD(*this, runFake)) 63 | .build(); 64 | } 65 | 66 | private: 67 | kj::ProcessContext& context; 68 | kj::Maybe logDir; 69 | kj::StringPtr name; 70 | kj::StringPtr addrFile = "/tmp/blackrock-logs-tester-addr"; 71 | 72 | bool setLogDir(kj::StringPtr arg) { 73 | logDir = sandstorm::raiiOpen(arg, O_RDONLY | O_DIRECTORY | O_CLOEXEC); 74 | return true; 75 | } 76 | 77 | bool setName(kj::StringPtr arg) { 78 | name = arg; 79 | return true; 80 | } 81 | 82 | bool runServer() { 83 | auto io = kj::setupAsyncIo(); 84 | sandstorm::SubprocessSet subprocessSet(io.unixEventPort); 85 | 86 | kj::Own rotater; 87 | KJ_IF_MAYBE(l, logDir) { 88 | auto logPipe = sandstorm::Pipe::make(); 89 | auto readEnd = kj::mv(logPipe.readEnd); 90 | int logDirFd = *l; 91 | rotater = kj::heap([KJ_MVCAP(readEnd),logDirFd]() { 92 | rotateLogs(readEnd, logDirFd); 93 | }); 94 | 95 | KJ_SYSCALL(dup2(logPipe.writeEnd, STDOUT_FILENO)); 96 | } 97 | 98 | // Close log pipe on scope exit, so that thread stops. 99 | KJ_DEFER(KJ_SYSCALL(dup2(STDERR_FILENO, STDOUT_FILENO))); 100 | 101 | LogSink sink; 102 | sink.acceptLoop(listen(io.provider->getNetwork())).wait(io.waitScope); 103 | return true; 104 | } 105 | 106 | bool runClient() { 107 | runLogClient(name, addrFile, "/tmp"); 108 | return true; 109 | } 110 | 111 | bool runFake() { 112 | auto io = kj::setupAsyncIo(); 113 | auto listener = listen(io.provider->getNetwork()); 114 | for (;;) { 115 | // Accept connections and just close them right away. 116 | listener->accept().wait(io.waitScope); 117 | } 118 | } 119 | 120 | kj::Own listen(kj::Network& network) { 121 | auto addr = SimpleAddress::getLocalhost(AF_INET); 122 | auto listener = addr.onNetwork(network)->listen(); 123 | addr.setPort(listener->getPort()); 124 | kj::FdOutputStream(sandstorm::raiiOpen(addrFile, O_WRONLY | O_CREAT | O_TRUNC)) 125 | .write(&addr, sizeof(addr)); 126 | return listener; 127 | } 128 | }; 129 | 130 | } // namespace blackrock 131 | 132 | KJ_MAIN(blackrock::LogsTester); 133 | -------------------------------------------------------------------------------- /src/blackrock/gateway.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2017 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_GATEWAY_H_ 18 | #define BLACKROCK_GATEWAY_H_ 19 | 20 | #include "common.h" 21 | #include 22 | #include "backend-set.h" 23 | #include "cluster-rpc.h" 24 | #include 25 | #include 26 | #include 27 | 28 | namespace blackrock { 29 | 30 | class GatewayImpl: public GatewayImplBase::Server, private kj::HttpService, 31 | private kj::TaskSet::ErrorHandler { 32 | public: 33 | GatewayImpl(kj::Timer& timer, kj::Network& network, FrontendConfig::Reader config); 34 | 35 | void setConfig(FrontendConfig::Reader config); 36 | 37 | protected: 38 | kj::Promise reset(ResetContext context) override; 39 | kj::Promise add(AddContext context) override; 40 | kj::Promise remove(RemoveContext context) override; 41 | // We implement BackendSet directly rather than use BackendSetImpl because we want to 42 | // implement session affinity. 43 | 44 | kj::Promise request( 45 | kj::HttpMethod method, kj::StringPtr url, const kj::HttpHeaders& headers, 46 | kj::AsyncInputStream& requestBody, Response& response) override; 47 | 48 | private: 49 | struct ShellReplica: kj::Refcounted { 50 | uint64_t backendId; 51 | kj::Own httpAddress; 52 | kj::Own smtpAddress; 53 | kj::Own shellHttp; 54 | sandstorm::GatewayRouter::Client router; 55 | sandstorm::GatewayService service; 56 | kj::Promise cleanupLoop; 57 | 58 | ShellReplica(GatewayImpl& gateway, uint64_t backendId, Frontend::Instance::Reader instance); 59 | }; 60 | 61 | class EntropySourceImpl: public kj::EntropySource { 62 | public: 63 | void generate(kj::ArrayPtr buffer) override; 64 | }; 65 | 66 | class SmtpNetworkAddressImpl: public kj::NetworkAddress { 67 | public: 68 | SmtpNetworkAddressImpl(GatewayImpl& gateway): gateway(gateway) {} 69 | 70 | kj::Promise> connect() override; 71 | kj::Own listen() override { KJ_UNIMPLEMENTED("fake address"); } 72 | kj::Own clone() override { KJ_UNIMPLEMENTED("fake address"); } 73 | kj::String toString() override { KJ_UNIMPLEMENTED("fake address"); } 74 | 75 | private: 76 | GatewayImpl& gateway; 77 | }; 78 | 79 | kj::Timer& timer; 80 | kj::Network& network; 81 | 82 | sandstorm::GatewayService::Tables gatewayServiceTables; 83 | kj::HttpHeaderId hXRealIp; 84 | 85 | kj::Own configMessage; 86 | FrontendConfig::Reader config; 87 | sandstorm::WildcardMatcher wildcardHost; 88 | 89 | kj::Vector>> shellReplicas; 90 | // Maps replica number -> ShellReplica. Used as hash buckets when load balancing with affinity. 91 | // If a shell is down, its bucket will be null, and we have to search for an alternative. 92 | 93 | kj::Own httpReceiver; 94 | 95 | EntropySourceImpl entropySource; 96 | kj::HttpClientSettings clientSettings; 97 | 98 | kj::Own headerTable; 99 | kj::HttpServer httpServer; 100 | sandstorm::AltPortService altPortService; 101 | kj::HttpServer altPortHttpServer; 102 | SmtpNetworkAddressImpl smtpServer; 103 | sandstorm::GatewayTlsManager tlsManager; 104 | 105 | uint roundRobinCounter = 0; 106 | 107 | struct ReadyPair { 108 | kj::ForkedPromise promise; 109 | kj::Own> fulfiller; 110 | }; 111 | kj::Maybe readyPaf; 112 | 113 | kj::TaskSet tasks; 114 | 115 | GatewayImpl(kj::Timer& timer, kj::Network& network, FrontendConfig::Reader config, 116 | kj::HttpHeaderTable::Builder headerTableBuilder); 117 | 118 | kj::Promise addFrontend(uint64_t backendId, Frontend::Client frontend); 119 | 120 | void addReplica(kj::Own newReplica); 121 | 122 | void setReplica(uint replicaNumber, kj::Maybe> newReplica, 123 | kj::Maybe requireBackendId = nullptr); 124 | kj::Promise> chooseReplica(uint64_t hash); 125 | 126 | uint64_t urlSessionHash(kj::StringPtr url, const kj::HttpHeaders& headers); 127 | 128 | void taskFailed(kj::Exception&& exception) override; 129 | }; 130 | 131 | } // namespace blackrock 132 | 133 | #endif // BLACKROCK_GATEWAY_H_ 134 | -------------------------------------------------------------------------------- /src/blackrock/fs-storage.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_VOLUME_H_ 18 | #define BLACKROCK_VOLUME_H_ 19 | 20 | #include "common.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | namespace kj { 27 | class UnixEventPort; 28 | class Timer; 29 | } 30 | 31 | namespace blackrock { 32 | 33 | class FilesystemStorage: public StorageRootSet::Server { 34 | public: 35 | FilesystemStorage(int directoryFd, kj::UnixEventPort& eventPort, kj::Timer& timer, 36 | Restorer::Client&& restorer); 37 | ~FilesystemStorage() noexcept(false); 38 | 39 | protected: 40 | kj::Promise set(SetContext context) override; 41 | kj::Promise get(GetContext context) override; 42 | kj::Promise tryGet(TryGetContext context) override; 43 | kj::Promise getOrCreateAssignable(GetOrCreateAssignableContext context) override; 44 | kj::Promise remove(RemoveContext context) override; 45 | kj::Promise getFactory(GetFactoryContext context) override; 46 | 47 | public: 48 | struct ObjectKey { 49 | uint64_t key[4]; 50 | 51 | ObjectKey() = default; 52 | ObjectKey(StoredObjectKey::Reader reader) 53 | : key { reader.getKey0(), reader.getKey1(), reader.getKey2(), reader.getKey3() } {} 54 | ~ObjectKey() { 55 | sodium_memzero(key, sizeof(key)); 56 | } 57 | 58 | static ObjectKey generate(); 59 | 60 | inline void copyTo(StoredObjectKey::Builder builder) const { 61 | builder.setKey0(key[0]); 62 | builder.setKey1(key[1]); 63 | builder.setKey2(key[2]); 64 | builder.setKey3(key[3]); 65 | } 66 | }; 67 | 68 | struct ObjectId { 69 | uint64_t id[2]; 70 | // The object ID. Equals the 16-byte blake2b hash of the key. 71 | 72 | ObjectId() = default; 73 | ObjectId(decltype(nullptr)): id {0, 0} {} 74 | ObjectId(StoredObjectId::Reader reader) 75 | : id { reader.getId0(), reader.getId1() } {} 76 | ObjectId(const ObjectKey& key); 77 | 78 | inline bool operator==(const ObjectId& other) const { 79 | return ((id[0] ^ other.id[0]) | (id[1] ^ other.id[1])) == 0; // constant-time 80 | } 81 | inline bool operator!=(const ObjectId& other) const { 82 | return !operator==(other); 83 | } 84 | inline bool operator==(decltype(nullptr)) const { 85 | return (id[0] | id[1]) == 0; 86 | } 87 | inline bool operator!=(decltype(nullptr)) const { 88 | return !operator==(nullptr); 89 | } 90 | 91 | inline void copyTo(StoredObjectId::Builder builder) const { 92 | builder.setId0(id[0]); 93 | builder.setId1(id[1]); 94 | } 95 | 96 | struct Hash { 97 | inline size_t operator()(const ObjectId& id) const { return id.id[0]; } 98 | }; 99 | 100 | kj::FixedArray filename(char prefix) const; 101 | }; 102 | 103 | private: 104 | class ObjectBase; 105 | class BlobImpl; 106 | class VolumeImpl; 107 | class ImmutableImpl; 108 | class AssignableImpl; 109 | class CollectionImpl; 110 | class OpaqueImpl; 111 | class StorageFactoryImpl; 112 | enum class Type: uint8_t; 113 | struct Xattr; 114 | class Journal; 115 | class DeathRow; 116 | class ObjectFactory; 117 | 118 | kj::AutoCloseFd mainDirFd; 119 | kj::AutoCloseFd stagingDirFd; 120 | kj::AutoCloseFd deathRowFd; 121 | kj::AutoCloseFd rootsFd; 122 | 123 | kj::Own deathRow; 124 | kj::Own journal; 125 | kj::Own factory; 126 | 127 | kj::Promise setImpl(kj::String name, OwnedStorage<>::Client object); 128 | 129 | kj::Maybe openObject(ObjectId id); 130 | kj::Maybe openStaging(uint64_t number); 131 | kj::AutoCloseFd createObject(ObjectId id); 132 | kj::AutoCloseFd createTempFile(); 133 | void linkTempIntoStaging(uint64_t number, int fd, const Xattr& xattr); 134 | void deleteStaging(uint64_t number); 135 | void deleteAllStaging(); 136 | void createFromStagingIfExists(uint64_t stagingId, ObjectId finalId, const Xattr& attributes); 137 | void replaceFromStagingIfExists(uint64_t stagingId, ObjectId finalId, const Xattr& attributes); 138 | void setAttributesIfExists(ObjectId objectId, const Xattr& attributes); 139 | void moveToDeathRowIfExists(ObjectId id, bool notify = true); 140 | void sync(); 141 | 142 | static bool isStoredObjectType(Type type); 143 | }; 144 | 145 | } // namespace blackrock 146 | 147 | #endif // BLACKROCK_VOLUME_H_ 148 | -------------------------------------------------------------------------------- /src/blackrock/cluster-rpc.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_CLUSTERRPC_H_ 18 | #define BLACKROCK_CLUSTERRPC_H_ 19 | 20 | #include "common.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace blackrock { 30 | 31 | class SimpleAddress { 32 | public: 33 | SimpleAddress(decltype(nullptr)) {} 34 | SimpleAddress(struct sockaddr_in ip4); 35 | SimpleAddress(struct sockaddr_in6 ip6); 36 | SimpleAddress(struct sockaddr& addr, socklen_t addrLen); 37 | SimpleAddress(Address::Reader reader); 38 | 39 | static SimpleAddress getPeer(kj::AsyncIoStream& socket); 40 | static SimpleAddress getLocal(kj::AsyncIoStream& socket); 41 | static SimpleAddress getLocal(int fd); 42 | static SimpleAddress getWildcard(sa_family_t family); 43 | static SimpleAddress getLocalhost(sa_family_t family); 44 | static SimpleAddress getInterfaceAddress(sa_family_t family, kj::StringPtr ifname); 45 | static SimpleAddress lookup(kj::StringPtr address); 46 | 47 | inline sa_family_t family() const { return addr.sa_family; } 48 | 49 | uint16_t getPort() const; 50 | void setPort(uint16_t port); 51 | 52 | void copyTo(Address::Builder builder) const; 53 | 54 | static constexpr size_t FLAT_SIZE = 18; 55 | void getFlat(byte* target) const; 56 | 57 | kj::Own onNetwork(kj::Network& network); 58 | 59 | inline const struct sockaddr* asSockaddr() const { return &addr; } 60 | inline size_t getSockaddrSize() const { 61 | return addr.sa_family == AF_INET ? sizeof(ip4) : sizeof(ip6); 62 | } 63 | 64 | bool operator==(const SimpleAddress& other) const; 65 | inline bool operator!=(const SimpleAddress& other) const { return !operator==(other); } 66 | 67 | kj::String toStringWithoutPort() const; 68 | 69 | private: 70 | union { 71 | struct sockaddr addr; 72 | struct sockaddr_in ip4; 73 | struct sockaddr_in6 ip6; 74 | }; 75 | 76 | friend kj::String KJ_STRINGIFY(const SimpleAddress& addr); 77 | }; 78 | 79 | kj::String KJ_STRINGIFY(const SimpleAddress& addr); 80 | 81 | class VatNetwork final: public capnp::VatNetwork { 83 | public: 84 | VatNetwork(kj::Network& network, kj::Timer& timer, SimpleAddress address); 85 | // Create a new VatNetwork exported on the given local address. If the port is zero, an arbitrary 86 | // unused port will be chosen. 87 | 88 | ~VatNetwork(); 89 | 90 | VatPath::Reader getSelf() { return self.getRoot(); } 91 | 92 | kj::Maybe> connect(VatPath::Reader hostId) override; 93 | kj::Promise> accept() override; 94 | 95 | private: 96 | class LittleEndian64; 97 | class Mac; 98 | class SymmetricKey; 99 | class PrivateKey; 100 | class PublicKey; 101 | class Header; 102 | 103 | class PublicKey { 104 | public: 105 | inline PublicKey(decltype(nullptr)) {} 106 | PublicKey(VatId::Reader id); 107 | 108 | void copyTo(VatId::Builder id); 109 | 110 | inline bool operator<(const PublicKey& other) const { 111 | return memcmp(key, other.key, sizeof(key)) < 0; 112 | } 113 | inline bool operator==(const PublicKey& other) const { 114 | return memcmp(key, other.key, sizeof(key)) == 0; 115 | } 116 | inline bool operator!=(const PublicKey& other) const { 117 | return memcmp(key, other.key, sizeof(key)) != 0; 118 | } 119 | 120 | struct Hash; 121 | 122 | private: 123 | friend class PrivateKey; 124 | 125 | explicit PublicKey(const byte* privateBytes); 126 | byte key[32]; 127 | }; 128 | 129 | class PrivateKey { 130 | public: 131 | PrivateKey(); 132 | ~PrivateKey(); 133 | KJ_DISALLOW_COPY(PrivateKey); 134 | 135 | PublicKey getPublic() const; 136 | SymmetricKey getSharedSecret(PublicKey otherPublic) const; 137 | 138 | private: 139 | byte* key; // Allocated with sodium_malloc. 140 | }; 141 | 142 | class ConnectionImpl; 143 | struct ConnectionMap; 144 | 145 | kj::Network& network; 146 | kj::Timer& timer; 147 | PrivateKey privateKey; 148 | PublicKey publicKey; 149 | SimpleAddress address; 150 | capnp::MallocMessageBuilder self; 151 | kj::Own connectionReceiver; 152 | kj::Own connectionMap; 153 | }; 154 | 155 | } // namespace blackrock 156 | 157 | #endif // BLACKROCK_CLUSTERRPC_H_ 158 | -------------------------------------------------------------------------------- /src/blackrock/local-persistent-registry.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #include "local-persistent-registry.h" 18 | #include 19 | #include 20 | #include 21 | 22 | namespace blackrock { 23 | 24 | class LocalPersistentRegistry::PersistentImpl: public Persistent::Server, public kj::Refcounted { 25 | public: 26 | PersistentImpl(Registration& registration, capnp::Capability::Client inner) 27 | : registry(registration.registry), registration(registration), 28 | inner(kj::mv(inner)) {} 29 | 30 | void unregister() { 31 | registration = nullptr; 32 | } 33 | 34 | capnp::Capability::Server::DispatchCallResult dispatchCall( 35 | uint64_t interfaceId, uint16_t methodId, 36 | capnp::CallContext context) override { 37 | // TODO(perf): We need a better way to check if a method is implemented locally. Here we 38 | // attempt a local call and catch UNIMPLEMENTED exceptions, but constructing exceptions is 39 | // slow due to string manipulation (even though no actual throw/catch will take place here). 40 | auto result = Persistent::Server::dispatchCall(interfaceId, methodId, context); 41 | result.promise = result.promise.catch_([=](kj::Exception&& e) mutable -> kj::Promise { 42 | if (e.getType() == kj::Exception::Type::UNIMPLEMENTED) { 43 | auto params = context.getParams(); 44 | auto req = inner.typelessRequest(interfaceId, methodId, params.targetSize()); 45 | req.set(params); 46 | return context.tailCall(kj::mv(req)); 47 | } else { 48 | return kj::mv(e); 49 | } 50 | }); 51 | return result; 52 | } 53 | 54 | kj::Promise save(SaveContext context) override { 55 | // TODO(security): Pay attention to `sealFor`. 56 | context.releaseParams(); 57 | 58 | auto ref = context.getResults(capnp::MessageSize {16, 0}).initSturdyRef().initTransient(); 59 | ref.setVat(registry.thisVatPath); 60 | 61 | KJ_IF_MAYBE(reg, registration) { 62 | auto savedRef = kj::heap(*reg); 63 | ref.getLocalRef().setAs(kj::ArrayPtr(savedRef->token)); 64 | auto key = kj::ArrayPtr(savedRef->token); 65 | auto insertResult = reg->registry.savedRefs.insert(std::make_pair(key, kj::mv(savedRef))); 66 | KJ_ASSERT(insertResult.second, kj::encodeHex(savedRef->token)); 67 | } else { 68 | ref.getLocalRef().initAs(sizeof(SavedRef::token)); 69 | } 70 | return kj::READY_NOW; 71 | } 72 | 73 | private: 74 | LocalPersistentRegistry& registry; 75 | kj::Maybe registration; 76 | capnp::Capability::Client inner; 77 | }; 78 | 79 | LocalPersistentRegistry::Registration::Registration( 80 | LocalPersistentRegistry& registry, capnp::Capability::Client cap) 81 | : registry(registry), wrapped(kj::refcounted(*this, kj::mv(cap))) {} 82 | 83 | LocalPersistentRegistry::Registration::~Registration() noexcept(false) { 84 | wrapped->unregister(); 85 | 86 | for (auto ref: savedRefs) { 87 | // Note: This actually deletes the ref. 88 | registry.savedRefs.erase(kj::ArrayPtr(ref->token)); 89 | } 90 | } 91 | 92 | Persistent::Client LocalPersistentRegistry::Registration::getWrapped() { 93 | return kj::addRef(*wrapped); 94 | } 95 | 96 | LocalPersistentRegistry::SavedRef::SavedRef(Registration& registration) 97 | : registration(registration) { 98 | randombytes_buf(token, sizeof(token)); 99 | registration.savedRefs.insert(this); 100 | } 101 | 102 | LocalPersistentRegistry::SavedRef::~SavedRef() noexcept(false) { 103 | registration.savedRefs.erase(this); 104 | } 105 | 106 | kj::Own 107 | LocalPersistentRegistry::makePersistent(capnp::Capability::Client cap) { 108 | return kj::heap(*this, kj::mv(cap)); 109 | } 110 | 111 | // ======================================================================================= 112 | 113 | class LocalPersistentRegistry::RestorerImpl: public Restorer::Server { 114 | public: 115 | RestorerImpl(LocalPersistentRegistry& registry, VatPath::Reader clientId) 116 | : registry(registry), clientId(clientId.totalSize().wordCount + 4) { 117 | this->clientId.setRoot(clientId); 118 | } 119 | 120 | protected: 121 | kj::Promise restore(RestoreContext context) override { 122 | auto iter = registry.savedRefs.find(context.getParams().getSturdyRef()); 123 | KJ_REQUIRE(iter != registry.savedRefs.end(), 124 | "requested local SturdyRef doesn't exist; maybe the object was deleted"); 125 | context.releaseParams(); 126 | 127 | SavedRef& savedRef = *iter->second; 128 | context.getResults(capnp::MessageSize { 4, 1 }).setCap(savedRef.registration.getWrapped()); 129 | return kj::READY_NOW; 130 | } 131 | 132 | kj::Promise drop(DropContext context) override { 133 | registry.savedRefs.erase(context.getParams().getSturdyRef()); 134 | return kj::READY_NOW; 135 | } 136 | 137 | private: 138 | LocalPersistentRegistry& registry; 139 | capnp::MallocMessageBuilder clientId; 140 | }; 141 | 142 | Restorer::Client 143 | LocalPersistentRegistry::createRestorerFor(VatPath::Reader clientId) { 144 | return kj::heap(*this, clientId); 145 | } 146 | 147 | } // namespace blackrock 148 | -------------------------------------------------------------------------------- /src/blackrock/worker.capnp: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | @0x95ec494d81e25bb1; 18 | 19 | $import "/capnp/c++.capnp".namespace("blackrock"); 20 | 21 | using Supervisor = import "/sandstorm/supervisor.capnp".Supervisor; 22 | using SandstormCore = import "/sandstorm/supervisor.capnp".SandstormCore; 23 | using Grain = import "/sandstorm/grain.capnp"; 24 | using Storage = import "storage.capnp"; 25 | using StorageSchema = import "storage-schema.capnp"; 26 | using Package = import "/sandstorm/package.capnp"; 27 | using Util = import "/sandstorm/util.capnp"; 28 | 29 | using GrainState = StorageSchema.GrainState; 30 | 31 | using Timepoint = UInt64; 32 | # Nanoseconds since epoch. 33 | 34 | interface Worker { 35 | # Top-level interface to a Sandstorm worker node, which runs apps. 36 | 37 | newGrain @0 (package :PackageInfo, 38 | command :Package.Manifest.Command, 39 | storage :Storage.StorageFactory, 40 | grainId :Text, 41 | core :SandstormCore) 42 | -> (grain :Supervisor, grainState :Storage.OwnedAssignable(GrainState)); 43 | # Start a new grain using the given package. 44 | # 45 | # The caller needs to save `grainState` into a user's grain collection to make the grain 46 | # permanent. 47 | 48 | restoreGrain @1 (package :PackageInfo, 49 | command :Package.Manifest.Command, 50 | storage :Storage.StorageFactory, 51 | grainState :GrainState, 52 | exclusiveGrainStateSetter :Util.Assignable(GrainState).Setter, 53 | grainId :Text, 54 | core :SandstormCore) 55 | -> (grain :Supervisor); 56 | # Continue an existing grain. 57 | # 58 | # `grainState` is the current value of the grain's GrainState assignable, and 59 | # `exclusiveGrainStateSetter` is the setter returned by the get() call that returned 60 | # `grainState`. Thus, a `set()` call on `grainStateSetter` will fail if the grain state has 61 | # changed. 62 | # 63 | # The first thing the worker will do is attempt to set the grain state in order to assert its 64 | # exclusive ownership. If the initial `set()` fails, `restoreGrain()` throws a "disconnected" 65 | # exception, and the caller should start over. 66 | # 67 | # Assuming the `set()` succeeds, the worker will call `volume.getExclusive()` to make absolutely 68 | # sure that no other worker might still be writing to the voluse. 69 | 70 | unpackPackage @2 (storage :Storage.StorageFactory) -> (stream :PackageUploadStream); 71 | # Initiate upload of a package, unpacking it into a fresh Volume. 72 | 73 | interface PackageUploadStream extends(Util.ByteStream) { 74 | getResult @0 () -> (appId :Text, manifest :Package.Manifest, volume :Storage.OwnedVolume, 75 | authorPgpKeyFingerprint :Text); 76 | # Waits until `ByteStream.done()` is called, then returns: 77 | # 78 | # `appId`: The verified application ID string, as produced by the `spk` tool. 79 | # `manifest`: The parsed package manifest. 80 | # `volume`: The new Volume containing the unpacked app. 81 | # `authorPgpKeyFingerprint`: If the app was PGP-signed, the author's key fingerprint. 82 | } 83 | 84 | unpackBackup @3 (data :Storage.Blob, storage :Storage.StorageFactory) 85 | -> (volume :Storage.OwnedVolume, metadata :Grain.GrainInfo); 86 | packBackup @4 (volume :Storage.Volume, metadata :Grain.GrainInfo, storage :Storage.StorageFactory) 87 | -> (data :Storage.OwnedBlob); 88 | 89 | # TODO(someday): Enumerate grains. 90 | # TODO(someday): Resource usage stats. 91 | } 92 | 93 | interface Coordinator { 94 | # Decides which workers should be running which apps. 95 | # 96 | # The Coordinator's main interface is actually Restorer(SturdyRef.Hosted) -- the Coordinator will 97 | # start up the desired grain and restore the capability. The `Coordinator` interface is only 98 | # used for creating new grains. 99 | 100 | newGrain @0 (app :Util.Assignable(AppRestoreInfo).Getter, 101 | initCommand :Package.Manifest.Command, 102 | storage :Storage.StorageFactory) 103 | -> (grain :Supervisor, grainState :Storage.OwnedAssignable(GrainState)); 104 | # Create a new grain, just like Worker.newGrain(). 105 | 106 | restoreGrain @1 (storage :Storage.StorageFactory, 107 | grainState :Storage.Assignable(GrainState)) 108 | -> (grain :Supervisor); 109 | # Restore a grain. Permanently sets the grain's package to `package` and continue command to 110 | # `command` if these weren't already the case. 111 | } 112 | 113 | struct AppRestoreInfo { 114 | package @0 :PackageInfo; 115 | restoreCommand @1 :Package.Manifest.Command; 116 | } 117 | 118 | struct PackageInfo { 119 | id @0 :Data; 120 | # Some unique identifier for this package (not assigned by the worker). 121 | # 122 | # TODO(someday): Identify packages by capability. If it's the same `Volume`, it's the same 123 | # package. This is arguably a security issue if an attacker can get access to the `Worker` 124 | # or `Coordinator` interfaces and then poison workers by forging package IDs, though no 125 | # attacker should ever have direct access to those interfaces, of course. 126 | 127 | volume @1 :Storage.Volume; 128 | # Read-only volume containing the unpacked package. 129 | # 130 | # TODO(security): Enforce read-only. 131 | } 132 | -------------------------------------------------------------------------------- /src/blackrock/backend-set.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_BACKEND_SET_H_ 18 | #define BLACKROCK_BACKEND_SET_H_ 19 | 20 | #include "common.h" 21 | #include 22 | #include 23 | 24 | namespace blackrock { 25 | 26 | class BackendSetBase { 27 | public: 28 | BackendSetBase(): BackendSetBase(kj::newPromiseAndFulfiller()) {} 29 | ~BackendSetBase() noexcept(false); 30 | 31 | capnp::Capability::Client chooseOne(); 32 | 33 | void clear(); 34 | void add(uint64_t id, capnp::Capability::Client client); 35 | void remove(uint64_t id); 36 | 37 | private: 38 | struct Backend { 39 | capnp::Capability::Client client; 40 | 41 | Backend(Backend&&) = default; 42 | Backend(const Backend&) = delete; 43 | // Convince STL to use the move constructor. 44 | }; 45 | 46 | std::map backends; 47 | std::map::iterator next; 48 | kj::ForkedPromise readyPromise; 49 | kj::Own> readyFulfiller; 50 | 51 | explicit BackendSetBase(kj::PromiseFulfillerPair paf); 52 | }; 53 | 54 | template 55 | class BackendSetImpl: public BackendSet::Server, public kj::Refcounted { 56 | public: 57 | typename T::Client chooseOne() { return base.chooseOne().template castAs(); } 58 | // Choose a capability from the set and return it, cycling through the set every time this 59 | // method is called. If the backend set is empty, return a promise that resolves once a backend 60 | // is available. 61 | // 62 | // TODO(someady): Would be nice to build in disconnect handling here, e.g. pass in a callback 63 | // function that initiates the work, catches exceptions and retries with a different back-end. 64 | 65 | protected: 66 | typedef typename BackendSet::Server Interface; 67 | kj::Promise reset(typename Interface::ResetContext context) { 68 | base.clear(); 69 | for (auto backend: context.getParams().getBackends()) { 70 | base.add(backend.getId(), backend.getBackend()); 71 | } 72 | return kj::READY_NOW; 73 | } 74 | kj::Promise add(typename Interface::AddContext context) { 75 | auto params = context.getParams(); 76 | base.add(params.getId(), params.getBackend()); 77 | return kj::READY_NOW; 78 | } 79 | kj::Promise remove(typename Interface::RemoveContext context) { 80 | base.remove(context.getParams().getId()); 81 | return kj::READY_NOW; 82 | } 83 | 84 | private: 85 | BackendSetBase base; 86 | }; 87 | 88 | // ======================================================================================= 89 | 90 | class BackendSetFeederBase: private kj::TaskSet::ErrorHandler { 91 | public: 92 | explicit BackendSetFeederBase(uint minCount): minCount(minCount), tasks(*this) {} 93 | KJ_DISALLOW_COPY(BackendSetFeederBase); 94 | 95 | class Registration { 96 | public: 97 | virtual ~Registration() noexcept(false); 98 | }; 99 | 100 | kj::Own addBackend(capnp::Capability::Client cap); 101 | kj::Own addConsumer(BackendSet<>::Client set); 102 | 103 | private: 104 | class BackendRegistration; 105 | class ConsumerRegistration; 106 | 107 | uint minCount; 108 | bool ready = minCount == 0; // Becomes true when minCount backends are first available. 109 | uint64_t backendCount = 0; 110 | uint64_t nextId = 0; 111 | BackendRegistration* backendsHead = nullptr; 112 | BackendRegistration** backendsTail = &backendsHead; 113 | ConsumerRegistration* consumersHead = nullptr; 114 | ConsumerRegistration** consumersTail = &consumersHead; 115 | kj::TaskSet tasks; 116 | 117 | void taskFailed(kj::Exception&& exception) override; 118 | }; 119 | 120 | template 121 | class BackendSetFeeder final: public BackendSetFeederBase { 122 | // Manages the process of maintaining BackendSets. 123 | // 124 | // A BackendSetFeeder is created by the master machine for each kind of load-balanced set. For 125 | // example, there is a BackendSetFeeder for StorageRoots. Each StorageRoot capability is added 126 | // using addBackend(), then each BackendSet which needs to be populated by StorageRoots (e.g. 127 | // the front-end) is added using addConsumer(). 128 | 129 | public: 130 | explicit BackendSetFeeder(uint minCount) 131 | : BackendSetFeederBase(minCount) {} 132 | // The feeder will wait until at least minBackendCount backends have been added before it 133 | // initializes any consumers. This prevents flooding the first machine in a set with traffic 134 | // while the others are still coming online. 135 | 136 | using BackendSetFeederBase::Registration; 137 | 138 | kj::Own addBackend(typename T::Client cap) KJ_WARN_UNUSED_RESULT { 139 | // Inserts this capability into all consumer sets. When the returned Backend is dropped 140 | // (indicating that the back-end has disconnected), removes the capability from all consumer 141 | // sets. 142 | return BackendSetFeederBase::addBackend(kj::mv(cap)); 143 | } 144 | 145 | kj::Own addConsumer(typename BackendSet::Client set) KJ_WARN_UNUSED_RESULT { 146 | // Inserts all backends into this consumer. When the returned Consumer is dropped (indicating 147 | // that it has disconnected), stops updating it. 148 | return BackendSetFeederBase::addConsumer(set.template asGeneric<>()); 149 | } 150 | }; 151 | 152 | } // namespace blackrock 153 | 154 | #endif // BLACKROCK_BACKEND_SET_H_ 155 | -------------------------------------------------------------------------------- /src/blackrock/machine.capnp: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | @0x96022888188b4f2f; 18 | 19 | $import "/capnp/c++.capnp".namespace("blackrock"); 20 | 21 | using ClusterRpc = import "cluster-rpc.capnp"; 22 | using Storage = import "storage.capnp"; 23 | using StorageSchema = import "storage-schema.capnp"; 24 | using Worker = import "worker.capnp"; 25 | using Frontend = import "frontend.capnp"; 26 | using Util = import "/sandstorm/util.capnp"; 27 | 28 | using VatId = ClusterRpc.VatId; 29 | using Address = ClusterRpc.Address; 30 | using SturdyRef = ClusterRpc.SturdyRef; 31 | using Restorer = ClusterRpc.Restorer; 32 | using BackendSet = ClusterRpc.BackendSet; 33 | 34 | interface MasterRestorer(Ref) { 35 | # Represents a Restorer that can restore capabilities for any owner. This capability should only 36 | # be given to the cluster master, which must then attenuate it for specific owners before passing 37 | # it on to said owners. 38 | 39 | getForOwner @0 (domain :SturdyRef.Owner) -> (attenuated :Restorer(Ref)); 40 | } 41 | 42 | interface Gateway { 43 | # Gateway machines bridge between the cluster and the external network (usually the internet). 44 | # They bridge between different parameterizations of Cap'n Proto, serve as a firewall, and 45 | # provide a way for internal apps to make external requests which are explicitly prevented from 46 | # accessing internal machines (e.g. if an app requests to connect to some IP, we need to make 47 | # sure that IP is on the public internet, not internal; the best way to do that is to make 48 | # sure the connection is formed using a public network interface that can't even route to 49 | # internal IPs in the first place). 50 | # 51 | # On a more practical note, Gateway machines also accept HTTP traffic from the public internet, 52 | # which they may forward to frontend machines or directly to grains. 53 | 54 | # TODO(soon): Methods for: 55 | # - Sending / receiving general internet traffic. (In-cluster traffic is NOT permitted.) 56 | # - Making and accepting external Cap'n Proto connections and bridging those capabilities into 57 | # the fold. 58 | 59 | # TODO(cleanup): Move to its own file. 60 | } 61 | 62 | interface GatewayImplBase extends(Gateway, BackendSet(Frontend.Frontend)) {} 63 | # Implementation detail. TODO(cleanup): Put this somewhere private. 64 | 65 | interface Machine { 66 | # A machine, ready to serve. 67 | # 68 | # When a new machine is added to the cluster, its Machine capability is given to the cluster 69 | # master via an appropriately secure mechanism. Only the master should ever hold this capability. 70 | # 71 | # The master will call the methods below in order to tell the machine what it should do. Multiple 72 | # become*() method can be called to make the machine serve multiple purposes. Calling the same 73 | # become*() method twice, however, only updates the existing instance of that role and returns 74 | # the same capabilities as before. 75 | # 76 | # This interface is intentionally designed such that the master machine can perform its duties 77 | # without ever actually parsing any of the response messages. Everything the master does -- 78 | # introducing machines to each other -- can be expressed via pipelining. This implies that it is 79 | # not possible to confuse or compromise the master machine by sending it weird messages. In the 80 | # future we could even literally extend the VatNetwork to discard incoming messages. 81 | 82 | becomeStorage @0 () 83 | -> (sibling :Storage.StorageSibling, 84 | rootSet :Storage.StorageRootSet, 85 | storageRestorer :MasterRestorer(SturdyRef.Stored), 86 | storageFactory :Storage.StorageFactory, 87 | siblingSet: BackendSet(Storage.StorageSibling), 88 | hostedRestorerSet: BackendSet(Restorer(SturdyRef.Hosted)), 89 | gatewayRestorerSet: BackendSet(Restorer(SturdyRef.External))); 90 | becomeWorker @1 () -> (worker :Worker.Worker); 91 | becomeCoordinator @2 () 92 | -> (coordinator :Worker.Coordinator, 93 | hostedRestorer :MasterRestorer(SturdyRef.Hosted), 94 | workerSet :BackendSet(Worker.Worker), 95 | storageRestorerSet :BackendSet(Restorer(SturdyRef.Stored))); 96 | becomeGateway @3 (config :Frontend.FrontendConfig) 97 | -> (gateway :Gateway, 98 | frontends :BackendSet(Frontend.Frontend)); 99 | becomeFrontend @4 (config :Frontend.FrontendConfig, replicaNumber :UInt32) 100 | -> (frontend :Frontend.Frontend, 101 | storageRestorerSet :BackendSet(Restorer(SturdyRef.Stored)), 102 | storageRootSet :BackendSet(Storage.StorageRootSet), 103 | storageFactorySet :BackendSet(Storage.StorageFactory), 104 | hostedRestorerSet :BackendSet(Restorer(SturdyRef.Hosted)), 105 | workerSet :BackendSet(Worker.Worker), # `workerSet` is temporary 106 | mongoSet :BackendSet(Frontend.Mongo)); 107 | becomeMongo @6 () -> (mongo :Frontend.Mongo); 108 | 109 | shutdown @5 (); 110 | # Do whatever is necessary to prepare this machine for safe shutdown. Do not return until it's 111 | # safe. 112 | 113 | ping @7 (hang :Bool = false); 114 | # Returns immediately if `hang` is false, or never returns if `hang` is true. The master uses 115 | # both modes to detect machine death: a hanging ping() should throw an exception the moment the 116 | # connection dies, but periodic non-hanging ping()s are also used to verify that the connection 117 | # hasn't silently failed. 118 | } 119 | -------------------------------------------------------------------------------- /src/blackrock/nbd-test-loopback.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #include "nbd-bridge.h" 18 | #include "fs-storage.h" 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | namespace blackrock { 35 | namespace { 36 | 37 | class NbdLoopbackMain { 38 | // A test program that mounts a FUSE filesystem that just mirrors some other directory. 39 | 40 | public: 41 | NbdLoopbackMain(kj::ProcessContext& context): context(context) {} 42 | 43 | kj::MainFunc getMain() { 44 | return kj::MainBuilder(context, "Fuse test, unknown version", 45 | "Creates a Sandstore at containing a single Volume, then mounts that " 46 | "volume at .") 47 | .addOptionWithArg({'o', "options"}, KJ_BIND_METHOD(*this, setOptions), "", 48 | "Set mount options.") 49 | .addOption({'r', "reset"}, KJ_BIND_METHOD(*this, reset), 50 | "Reset all nbd devices, hopefully killing any processes blocked on them.") 51 | .expectArg("", KJ_BIND_METHOD(*this, setMountPoint)) 52 | .expectArg("", KJ_BIND_METHOD(*this, setStorageDir)) 53 | .expectOneOrMoreArgs("", KJ_BIND_METHOD(*this, addCommandArg)) 54 | .callAfterParsing(KJ_BIND_METHOD(*this, run)) 55 | .build(); 56 | } 57 | 58 | private: 59 | kj::ProcessContext& context; 60 | kj::StringPtr options; 61 | kj::StringPtr mountPoint; 62 | kj::AutoCloseFd storageDir; 63 | kj::Vector command; 64 | 65 | kj::MainBuilder::Validity setOptions(kj::StringPtr arg) { 66 | options = arg; 67 | return true; 68 | } 69 | 70 | kj::MainBuilder::Validity setMountPoint(kj::StringPtr arg) { 71 | mountPoint = arg; 72 | return true; 73 | } 74 | 75 | kj::MainBuilder::Validity setStorageDir(kj::StringPtr arg) { 76 | storageDir = sandstorm::raiiOpen(arg.cStr(), O_RDONLY | O_DIRECTORY | O_CLOEXEC); 77 | return true; 78 | } 79 | 80 | kj::MainBuilder::Validity addCommandArg(kj::StringPtr arg) { 81 | command.add(arg); 82 | return true; 83 | } 84 | 85 | kj::MainBuilder::Validity reset() { 86 | NbdDevice::resetAll(); 87 | context.exit(); 88 | } 89 | 90 | kj::MainBuilder::Validity run() { 91 | KJ_SYSCALL(unshare(CLONE_NEWNS), "are you root?"); 92 | KJ_SYSCALL(mount("none", "/", nullptr, MS_REC | MS_PRIVATE, nullptr)); 93 | 94 | NbdDevice::loadKernelModule(); 95 | bool isNew = faccessat(storageDir, "roots/root", F_OK, 0) < 0; 96 | 97 | int pair[2]; 98 | socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0, pair); 99 | kj::AutoCloseFd kernelEnd(pair[0]); 100 | kj::AutoCloseFd userEnd(pair[1]); 101 | 102 | kj::AutoCloseFd abortEvent = newEventFd(0, EFD_CLOEXEC | EFD_NONBLOCK); 103 | 104 | kj::Thread serverThread([&]() { 105 | KJ_IF_MAYBE(exception, kj::runCatchingExceptions([&]() { 106 | auto io = kj::setupAsyncIo(); 107 | StorageRootSet::Client storage = kj::heap( 108 | storageDir, io.unixEventPort, 109 | io.provider->getTimer(), nullptr); 110 | 111 | auto factory = storage.getFactoryRequest().send().getFactory(); 112 | OwnedVolume::Client volume = nullptr; 113 | 114 | if (isNew) { 115 | volume = factory.newVolumeRequest().send().getVolume(); 116 | auto req2 = factory.newAssignableRequest(); 117 | req2.setInitialValue(volume); 118 | 119 | auto req3 = storage.setRequest>(); 120 | req3.setName("root"); 121 | req3.setObject(req2.send().getAssignable()); 122 | 123 | req3.send().wait(io.waitScope); 124 | } else { 125 | auto req = storage.getRequest>(); 126 | req.setName("root"); 127 | volume = req.send().getObject().castAs>() 128 | .getRequest().send().getValue(); 129 | } 130 | 131 | kj::UnixEventPort::FdObserver cancelObserver(io.unixEventPort, abortEvent, 132 | kj::UnixEventPort::FdObserver::OBSERVE_READ); 133 | 134 | NbdVolumeAdapter volumeAdapter( 135 | io.lowLevelProvider->wrapSocketFd(userEnd, 136 | kj::LowLevelAsyncIoProvider::ALREADY_CLOEXEC | 137 | kj::LowLevelAsyncIoProvider::ALREADY_NONBLOCK), 138 | kj::mv(volume), NbdAccessType::READ_WRITE); 139 | volumeAdapter.run().exclusiveJoin(cancelObserver.whenBecomesReadable()) 140 | .wait(io.waitScope); 141 | })) { 142 | KJ_LOG(FATAL, "nbd server threw exception", *exception); 143 | } 144 | }); 145 | 146 | // Ensure thread gets canceled before its destructor is called. 147 | KJ_ON_SCOPE_FAILURE(writeEvent(abortEvent, 1)); 148 | 149 | NbdDevice device; 150 | context.warning(kj::str("using: ", device.getPath())); 151 | 152 | NbdBinding binding(device, kj::mv(kernelEnd), NbdAccessType::READ_WRITE); 153 | KJ_DEFER(context.warning("unbinding...")); 154 | 155 | if (isNew) { 156 | context.warning("formatting..."); 157 | device.format(); 158 | } 159 | 160 | context.warning("mounting..."); 161 | KJ_DEFER(device.trimJournalIfClean()); 162 | Mount mount(device.getPath(), mountPoint, 0, options); 163 | KJ_DEFER(context.warning("unmounting...")); 164 | 165 | KJ_SYSCALL(unshare(CLONE_NEWPID)); 166 | 167 | sandstorm::Subprocess(sandstorm::Subprocess::Options(command)).waitForSuccess(); 168 | 169 | return true; 170 | } 171 | }; 172 | 173 | } // namespace 174 | } // namespace blackrock 175 | 176 | KJ_MAIN(blackrock::NbdLoopbackMain); 177 | -------------------------------------------------------------------------------- /src/blackrock/bundle.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #include "bundle.h" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | namespace blackrock { 33 | 34 | #define BUNDLE_PATH "/blackrock/bundle" 35 | 36 | void createSandstormDirectories() { 37 | kj::StringPtr paths[] = { 38 | "/var/blackrock", 39 | "/var/blackrock/bundle", 40 | "/var/blackrock/bundle/sandstorm", 41 | "/var/blackrock/bundle/sandstorm/socket", 42 | "/var/blackrock/bundle/mongo", 43 | "/var/blackrock/bundle/log", 44 | "/var/blackrock/bundle/pid", 45 | "/tmp/blackrock-bundle" 46 | }; 47 | 48 | if (access("/tmp/blackrock-bundle", F_OK) >= 0) { 49 | sandstorm::recursivelyDelete("/tmp/blackrock-bundle"); 50 | } 51 | for (auto path: paths) { 52 | mkdir(path.cStr(), (path.startsWith("/tmp/") ? S_ISVTX | 0770 : 0750)); 53 | KJ_SYSCALL(chown(path.cStr(), 1000, 1000)); 54 | } 55 | } 56 | 57 | void enterSandstormBundle() { 58 | // Set up a small sandbox located inside the Sandstorm (i.e. non-Blackrock) bundle, for running 59 | // things like the front-end and Mongo. 60 | // 61 | // TODO(cleanup): Extend Subprocess to support a lot of these things? 62 | 63 | // Enter mount namespace so that we can bind stuff in. 64 | KJ_SYSCALL(unshare(CLONE_NEWNS)); 65 | 66 | KJ_SYSCALL(chdir(BUNDLE_PATH)); 67 | 68 | // To really unshare the mount namespace, we also have to make sure all mounts are private. 69 | // The parameters here were derived by strace'ing `mount --make-rprivate /`. AFAICT the flags 70 | // are undocumented. :( 71 | KJ_SYSCALL(mount("none", "/", nullptr, MS_REC | MS_PRIVATE, nullptr)); 72 | 73 | // Make sure that the current directory is a mount point so that we can use pivot_root. 74 | KJ_SYSCALL(mount(".", ".", nullptr, MS_BIND | MS_REC, nullptr)); 75 | 76 | // Now change directory into the new mount point. 77 | char cwdBuf[PATH_MAX + 1]; 78 | if (getcwd(cwdBuf, sizeof(cwdBuf)) == nullptr) { 79 | KJ_FAIL_SYSCALL("getcwd", errno); 80 | } 81 | KJ_SYSCALL(chdir(cwdBuf)); 82 | 83 | // Bind /proc for the global pid namespace in the chroot. 84 | KJ_SYSCALL(mount("/proc", "proc", nullptr, MS_BIND | MS_REC, nullptr)); 85 | 86 | // Bind /var and /tmp. 87 | KJ_SYSCALL(mount("/tmp/blackrock-bundle", "tmp", nullptr, MS_BIND, nullptr)); 88 | KJ_SYSCALL(mount("/var/blackrock/bundle", "var", nullptr, MS_BIND, nullptr)); 89 | 90 | // Bind desired devices from /dev into our chroot environment. 91 | KJ_SYSCALL(mount("/dev/null", "dev/null", nullptr, MS_BIND, nullptr)); 92 | KJ_SYSCALL(mount("/dev/zero", "dev/zero", nullptr, MS_BIND, nullptr)); 93 | KJ_SYSCALL(mount("/dev/random", "dev/random", nullptr, MS_BIND, nullptr)); 94 | KJ_SYSCALL(mount("/dev/urandom", "dev/urandom", nullptr, MS_BIND, nullptr)); 95 | 96 | // Mount a tmpfs at /etc and copy over necessary config files from the host. 97 | // Note that unlike regular Sandstorm, we don't bother bind-mounting in the host etc, because 98 | // we don't expect to have to deal with dynamic network configs. 99 | KJ_SYSCALL(mount("tmpfs", "etc", "tmpfs", MS_NOSUID | MS_NOEXEC, 100 | kj::str("size=2m,nr_inodes=128,mode=755,uid=0,gid=0").cStr())); 101 | { 102 | auto files = sandstorm::splitLines(sandstorm::readAll("host.list")); 103 | 104 | // Now copy over each file. 105 | for (auto& file: files) { 106 | if (access(file.cStr(), R_OK) == 0 && !sandstorm::isDirectory(file)) { 107 | auto in = sandstorm::raiiOpen(file, O_RDONLY); 108 | auto out = sandstorm::raiiOpen(kj::str(".", file), O_WRONLY | O_CREAT | O_EXCL); 109 | ssize_t n; 110 | do { 111 | KJ_SYSCALL(n = sendfile(out, in, nullptr, 1 << 20)); 112 | } while (n > 0); 113 | } 114 | } 115 | } 116 | 117 | // pivot_root into the frontend dir. (This is just a fancy more-secure chroot.) 118 | KJ_SYSCALL(syscall(SYS_pivot_root, ".", "tmp")); 119 | KJ_SYSCALL(chdir("/")); 120 | KJ_SYSCALL(umount2("tmp", MNT_DETACH)); 121 | 122 | // Drop privileges. Since we own the machine we can choose any UID, just don't want it to be 0. 123 | KJ_SYSCALL(setresgid(1000, 1000, 1000)); 124 | KJ_SYSCALL(setgroups(0, nullptr)); 125 | KJ_SYSCALL(setresuid(1000, 1000, 1000)); 126 | 127 | // Clear signal mask. Not strictly a sandboxing measure, just cleanup. 128 | // TODO(cleanup): We should probably discard any signals in this mask which are currently pending 129 | // before we unblock them. We should probably fix this in Sandstorm as well. 130 | sigset_t sigset; 131 | KJ_SYSCALL(sigemptyset(&sigset)); 132 | KJ_SYSCALL(sigprocmask(SIG_SETMASK, &sigset, nullptr)); 133 | 134 | // The environment inherited from the host is probably no good for us. E.g. an oddball 135 | // locale setting can crash Mongo because we don't have the appropriate locale files available. 136 | KJ_SYSCALL(clearenv()); 137 | 138 | // Set up an environment appropriate for us. 139 | KJ_SYSCALL(setenv("LANG", "C.UTF-8", true)); 140 | KJ_SYSCALL(setenv("PATH", "/usr/bin:/bin", true)); 141 | KJ_SYSCALL(setenv("LD_LIBRARY_PATH", "/usr/local/lib:/usr/lib:/lib", true)); 142 | } 143 | 144 | kj::Maybe checkPgpSignatureInBundle( 145 | kj::StringPtr appIdString, sandstorm::spk::Metadata::Reader metadata) { 146 | createSandstormDirectories(); 147 | 148 | auto pipe = sandstorm::Pipe::make(); 149 | 150 | sandstorm::Subprocess child([&]() -> int { 151 | enterSandstormBundle(); 152 | 153 | pipe.readEnd = nullptr; 154 | 155 | KJ_IF_MAYBE(s, sandstorm::checkPgpSignature(appIdString, metadata)) { 156 | kj::FdOutputStream(pipe.writeEnd.get()).write(s->begin(), s->size()); 157 | } 158 | 159 | return 0; 160 | }); 161 | 162 | pipe.writeEnd = nullptr; 163 | kj::String result = sandstorm::readAll(pipe.readEnd); 164 | 165 | child.waitForSuccess(); 166 | if (result == nullptr) { 167 | return nullptr; 168 | } else { 169 | return kj::mv(result); 170 | } 171 | } 172 | 173 | } // namespace blackrock 174 | 175 | -------------------------------------------------------------------------------- /src/blackrock/backend-set.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #include "backend-set.h" 18 | #include 19 | 20 | namespace blackrock { 21 | 22 | BackendSetBase::BackendSetBase(kj::PromiseFulfillerPair paf) 23 | : next(backends.end()), 24 | readyPromise(paf.promise.fork()), 25 | readyFulfiller(kj::mv(paf.fulfiller)) {} 26 | BackendSetBase::~BackendSetBase() noexcept(false) {} 27 | 28 | capnp::Capability::Client BackendSetBase::chooseOne() { 29 | if (backends.empty()) { 30 | return readyPromise.addBranch().then([this]() { 31 | return chooseOne(); 32 | }); 33 | } else { 34 | if (next == backends.end()) { 35 | next = backends.begin(); 36 | } 37 | 38 | return (next++)->second.client; 39 | } 40 | } 41 | 42 | void BackendSetBase::clear() { 43 | backends.clear(); 44 | } 45 | 46 | void BackendSetBase::add(uint64_t id, capnp::Capability::Client client) { 47 | if (backends.empty()) { 48 | readyFulfiller->fulfill(); 49 | } 50 | 51 | backends.insert(std::make_pair(id, Backend { kj::mv(client) })); 52 | } 53 | 54 | void BackendSetBase::remove(uint64_t id) { 55 | if (next != backends.end() && next->first == id) { 56 | ++next; 57 | } 58 | backends.erase(id); 59 | 60 | if (backends.empty()) { 61 | auto paf = kj::newPromiseAndFulfiller(); 62 | readyPromise = paf.promise.fork(); 63 | readyFulfiller = kj::mv(paf.fulfiller); 64 | } 65 | } 66 | 67 | // ======================================================================================= 68 | 69 | class BackendSetFeederBase::ConsumerRegistration final: public Registration { 70 | public: 71 | ConsumerRegistration(BackendSetFeederBase& feeder, BackendSet<>::Client set); 72 | 73 | ~ConsumerRegistration() noexcept(false); 74 | 75 | private: 76 | friend class BackendSetFeederBase; 77 | 78 | BackendSetFeederBase& feeder; 79 | BackendSet<>::Client set; 80 | ConsumerRegistration* next; 81 | ConsumerRegistration** prev; 82 | 83 | void init(); 84 | }; 85 | 86 | class BackendSetFeederBase::BackendRegistration final: public Registration { 87 | public: 88 | BackendRegistration(BackendSetFeederBase& feeder, capnp::Capability::Client cap); 89 | 90 | ~BackendRegistration() noexcept(false); 91 | 92 | private: 93 | friend class BackendSetFeederBase; 94 | 95 | BackendSetFeederBase& feeder; 96 | uint64_t id; 97 | capnp::Capability::Client cap; 98 | BackendRegistration* next; 99 | BackendRegistration** prev; 100 | }; 101 | 102 | auto BackendSetFeederBase::addBackend(capnp::Capability::Client cap) -> kj::Own { 103 | auto result = kj::heap(*this, kj::mv(cap)); 104 | 105 | if (ready) { 106 | // Consumers are already initialized. Add the new backend to each one. 107 | for (ConsumerRegistration* consumer = consumersHead; consumer != nullptr; 108 | consumer = consumer->next) { 109 | tasks.add(kj::evalNow([&]() { 110 | auto req = consumer->set.addRequest(capnp::MessageSize {4, 0}); 111 | req.setId(result->id); 112 | req.getBackend().setAs(result->cap); 113 | return req.send().then([](auto&&) {}); 114 | })); 115 | } 116 | } else if (backendCount >= minCount) { 117 | // We have enough backends to initialize all consumers. 118 | ready = true; 119 | for (ConsumerRegistration* consumer = consumersHead; consumer != nullptr; 120 | consumer = consumer->next) { 121 | consumer->init(); 122 | } 123 | } 124 | 125 | return kj::mv(result); 126 | } 127 | 128 | auto BackendSetFeederBase::addConsumer(BackendSet<>::Client set) -> kj::Own { 129 | auto result = kj::heap(*this, kj::mv(set)); 130 | 131 | if (ready) { 132 | // We already have all the backends we need, so go ahead and initialize the consumer. 133 | result->init(); 134 | } 135 | 136 | return kj::mv(result); 137 | } 138 | 139 | void BackendSetFeederBase::taskFailed(kj::Exception&& exception) { 140 | KJ_LOG(ERROR, exception); 141 | } 142 | 143 | BackendSetFeederBase::Registration::~Registration() noexcept(false) {} 144 | 145 | BackendSetFeederBase::ConsumerRegistration::ConsumerRegistration( 146 | BackendSetFeederBase& feeder, BackendSet<>::Client set) 147 | : feeder(feeder), set(kj::mv(set)), 148 | next(nullptr), prev(feeder.consumersTail) { 149 | *feeder.consumersTail = this; 150 | feeder.consumersTail = &next; 151 | } 152 | 153 | BackendSetFeederBase::ConsumerRegistration::~ConsumerRegistration() noexcept(false) { 154 | if (next == nullptr) { 155 | feeder.consumersTail = prev; 156 | } else { 157 | next->prev = prev; 158 | } 159 | *prev = next; 160 | } 161 | 162 | void BackendSetFeederBase::ConsumerRegistration::init() { 163 | auto req = set.resetRequest(); 164 | auto list = req.initBackends(feeder.backendCount); 165 | uint i = 0; 166 | for (BackendRegistration* backend = feeder.backendsHead; backend != nullptr; 167 | backend = backend->next) { 168 | auto element = list[i++]; 169 | element.setId(backend->id); 170 | element.getBackend().setAs(backend->cap); 171 | } 172 | feeder.tasks.add(req.send().then([](auto&&) {})); 173 | } 174 | 175 | BackendSetFeederBase::BackendRegistration::BackendRegistration( 176 | BackendSetFeederBase& feeder, capnp::Capability::Client cap) 177 | : feeder(feeder), id(feeder.nextId++), cap(kj::mv(cap)), 178 | next(nullptr), prev(feeder.backendsTail) { 179 | *feeder.backendsTail = this; 180 | feeder.backendsTail = &next; 181 | ++feeder.backendCount; 182 | } 183 | 184 | BackendSetFeederBase::BackendRegistration::~BackendRegistration() noexcept(false) { 185 | --feeder.backendCount; 186 | if (next == nullptr) { 187 | feeder.backendsTail = prev; 188 | } else { 189 | next->prev = prev; 190 | } 191 | *prev = next; 192 | 193 | // Remove from all consumers. 194 | for (ConsumerRegistration* consumer = feeder.consumersHead; consumer != nullptr; 195 | consumer = consumer->next) { 196 | feeder.tasks.add(kj::evalNow([&]() { 197 | auto req = consumer->set.removeRequest(capnp::MessageSize {4, 0}); 198 | req.setId(id); 199 | return req.send().then([](auto&&) {}); 200 | })); 201 | } 202 | } 203 | 204 | } // namespace blackrock 205 | 206 | -------------------------------------------------------------------------------- /src/blackrock/nbd-bridge.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_NBD_BRIDGE_H_ 18 | #define BLACKROCK_NBD_BRIDGE_H_ 19 | 20 | #include "common.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | namespace blackrock { 27 | 28 | enum class NbdAccessType { 29 | READ_ONLY, 30 | READ_WRITE 31 | }; 32 | 33 | class NbdVolumeAdapter: private kj::TaskSet::ErrorHandler { 34 | // Implements the NBD protocol in terms of `Volume`. 35 | public: 36 | NbdVolumeAdapter(kj::Own socket, Volume::Client volume, 37 | NbdAccessType access); 38 | // NBD requests are read from `socket` and implemented via `volume`. 39 | 40 | void updateVolume(Volume::Client newVolume); 41 | // Replaces the Volume capability with a new one, which must point to the exact same volume. 42 | // Useful for recovering after disconnects, if the driver hasn't noticed the disconnect yet. 43 | 44 | kj::Promise run(); 45 | // Actually runs the loop. The promise resolves successfully when the device has been shut down. 46 | // It is extremely important to wait for this before destroying the NbdVolumeAdapter; failure 47 | // to do so can leave the kernel in an unhappy state. 48 | 49 | kj::Promise onDisconnected() { return kj::mv(disconnectedPaf.promise); } 50 | // Resolves if the underlying volume becomes disconnected, in which case it's time to force-kill 51 | // everything using it. Can only be called once. 52 | 53 | private: 54 | kj::Own socket; 55 | Volume::Client volume; 56 | kj::PromiseFulfillerPair disconnectedPaf; 57 | NbdAccessType access; 58 | bool disconnected = false; 59 | kj::TaskSet tasks; 60 | 61 | kj::Promise replyQueue = kj::READY_NOW; 62 | // Promise for completion of previous write() operation to handle.socket. 63 | // 64 | // Becomes null when the run loop completes. 65 | // 66 | // TODO(someday): When overlapping write()s are supported by AsyncIoStream, simplify this. 67 | 68 | struct nbd_request request; 69 | // We only read one of these at a time, so might as well allocate it here. 70 | 71 | struct RequestHandle; 72 | struct ReplyAndIovec; 73 | void reply(RequestHandle reqHandle, int error = 0); 74 | void replyError(RequestHandle reqHandle, kj::Exception&& exception, const char* op); 75 | void taskFailed(kj::Exception&& exception) override; 76 | }; 77 | 78 | class NbdDevice { 79 | // Represents a claim to a specific `/dev/nbdX` device node. 80 | 81 | public: 82 | NbdDevice(); 83 | // Claims an unused NBD device and binds it to the given socket. (The other end of the socket 84 | // pair should be passed to `NbdVolumeAdapter`.) 85 | 86 | explicit NbdDevice(uint number); 87 | // Explicitly claim a specific device number. For debugging purposes only! 88 | 89 | kj::StringPtr getPath() { return path; } 90 | // E.g. "/dev/nbd12". 91 | 92 | int getFd() { return fd; } 93 | 94 | void format(); 95 | // Format the device as an ext4 filesystem with an initial size of 8GB. This is accomplished by 96 | // simply writing a template image directly to the disk, so format() will result in exactly the 97 | // same disk image every time. 98 | 99 | void trimJournalIfClean(); 100 | // Verify that the journal is currently clean, and then TRIM it. Call immediately after a clean 101 | // unmount to reduce disk usage. (The journal normally doesn't get TRIMed even when the contents 102 | // have already been committed. This seems to be a deficiency in the ext4 driver.) 103 | 104 | void fixSurpriseFeatures(); 105 | // Check if this volume has the "surprise features" of 64bit and metadata_checksum enabled. If 106 | // so, fix the situation. The surprise features were accidentally enabled on many grains in 107 | // production due to an unexpected change in /etc/mke2fs.conf landing in Debian Testing. Since 108 | // we use mke2fs to create the zygote image at compile time, these features ended up enabled 109 | // in production. The metadata_checksum feature is buggy on our older production kernels, and 110 | // the 64bit option breaks trimJournalIfClean() (we could fix that, but the 64bit option is 111 | // not helpful to us, so better to avoid having multiple code paths!). 112 | // 113 | // Note: This method runs subprocesses and may block. It CANNOT be run from the main worker 114 | // process! 115 | 116 | static void resetAll(); 117 | // Iterate through all the nbd devices and reset them, in order to un-block processes wedged 118 | // trying to read disconnected devices. 119 | // 120 | // THIS WILL BREAK EVERYTHING CURRENTLY USING ANY NBD DEVICE. 121 | 122 | static void disconnectAll(); 123 | // Iterate through all the nbd devices and disconnect them, in an attempt to forcefully tear 124 | // down a worker. 125 | // 126 | // THIS WILL BREAK EVERYTHING CURRENTLY USING ANY NBD DEVICE. 127 | 128 | static void loadKernelModule(); 129 | // Make sure the NBD kernel module is loaded. 130 | 131 | private: 132 | kj::String path; 133 | kj::AutoCloseFd fd; 134 | }; 135 | 136 | class NbdBinding { 137 | // Given an NBD device and a socket implementing the NBD protocol, makes the NBD device live and 138 | // mountable. 139 | // 140 | // NbdBinding MUST NOT be used in the same thread that is running the NbdVolumeAdapter. This is 141 | // because NbdBinding performs blocking system calls that will cause the kernel to issue reads 142 | // and writes to the device, and will not return until those operations complete. 143 | 144 | public: 145 | NbdBinding(NbdDevice& device, kj::AutoCloseFd socket, NbdAccessType access); 146 | // Binds the given NBD device to the given socket. (The other end of the socket pair should be 147 | // passed to `NbdVolumeAdapter`.) 148 | 149 | ~NbdBinding() noexcept(false); 150 | // Disconnects the binding. 151 | 152 | private: 153 | NbdDevice& device; 154 | kj::Thread doItThread; 155 | // Executes the NBD_DO_IT ioctl(), which runs the NBD device loop in the kernel, not returning 156 | // until the device is disconnected. 157 | 158 | static NbdDevice& setup(NbdDevice& device, kj::AutoCloseFd socket, NbdAccessType access); 159 | }; 160 | 161 | class Mount { 162 | // Mounts a device at a path. As with `NbdDevice`, `Mount` MUST NOT be used in the same thread 163 | // that is executing the NbdVolumeAdapter implementing the device. 164 | 165 | public: 166 | Mount(kj::StringPtr devPath, kj::StringPtr mountPoint, uint64_t flags, kj::StringPtr options); 167 | ~Mount() noexcept(false); 168 | 169 | private: 170 | kj::String mountPoint; 171 | uint64_t flags; 172 | }; 173 | 174 | } // namespace blackrock 175 | 176 | #endif // BLACKROCK_NBD_BRIDGE_H_ 177 | -------------------------------------------------------------------------------- /src/blackrock/worker.h: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #ifndef BLACKROCK_WORKER_H_ 18 | #define BLACKROCK_WORKER_H_ 19 | 20 | #include "common.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include "local-persistent-registry.h" 28 | 29 | namespace kj { 30 | class Thread; 31 | } 32 | 33 | namespace blackrock { 34 | 35 | class NbdVolumeAdapter; 36 | 37 | struct ByteStringHash { 38 | inline size_t operator()(const kj::ArrayPtr& token) const { 39 | size_t result = 0; 40 | memcpy(&result, token.begin(), kj::min(sizeof(result), token.size())); 41 | return result; 42 | } 43 | inline size_t operator()(const kj::ArrayPtr& a, 44 | const kj::ArrayPtr& b) const { 45 | return a.size() == b.size() && memcmp(a.begin(), b.begin(), a.size()) == 0; 46 | } 47 | }; 48 | 49 | class PackageMountSet: private kj::TaskSet::ErrorHandler { 50 | public: 51 | explicit PackageMountSet(kj::AsyncIoContext& ioContext); 52 | ~PackageMountSet() noexcept(false); 53 | KJ_DISALLOW_COPY(PackageMountSet); 54 | 55 | class PackageMount: public kj::Refcounted { 56 | public: 57 | PackageMount(PackageMountSet& mountSet, kj::ArrayPtr id, 58 | kj::String path, Volume::Client volume, 59 | kj::Own nbdUserEnd, 60 | kj::AutoCloseFd nbdKernelEnd); 61 | ~PackageMount() noexcept(false); 62 | 63 | kj::ArrayPtr getId() { return id; } 64 | 65 | kj::StringPtr getPath() { return path; } 66 | 67 | kj::Promise whenReady() { return loaded.addBranch(); } 68 | 69 | kj::Promise onDisconnected() { return disconnected.addBranch(); } 70 | 71 | void updateVolume(Volume::Client newVolume); 72 | 73 | private: 74 | friend class PackageMountSet; 75 | 76 | PackageMountSet& mountSet; 77 | 78 | kj::Array id; 79 | // ID string assigned to this package. 80 | 81 | kj::String path; 82 | 83 | kj::Own volumeAdapter; 84 | kj::Promise volumeRunTask; 85 | 86 | kj::AsyncIoProvider::PipeThread nbdThread; 87 | // Thread which mounts the NBD device. Protocol as follows: 88 | // 1) thread -> main: 1 byte: The mount point is ready. 89 | // 2) main -> thread: EOF: Please shut down. 90 | // 3) thread -> main: EOF: I've shut down now; it's safe to destroy the NbdVolumeAdapter and 91 | // join the thread. 92 | 93 | kj::ForkedPromise loaded; 94 | // Resolves when the thread reports that the mount point is active. 95 | 96 | bool unregistered = false; 97 | 98 | kj::ForkedPromise disconnected; 99 | // Resolves when this mount has been disconnecnted from storage and therefore will report I/O 100 | // errors. Grains using this package should attempt to shut down. 101 | 102 | void unregister(); 103 | }; 104 | 105 | kj::Promise> getPackage(PackageInfo::Reader package); 106 | 107 | void returnPackage(kj::Own package); 108 | // Grains "return" packages to the mount set where the package may remain mounted for some time 109 | // in case it is used again. 110 | 111 | private: 112 | kj::AsyncIoContext& ioContext; 113 | std::unordered_map, PackageMount*, 114 | ByteStringHash, ByteStringHash> mounts; 115 | uint64_t counter = 0; 116 | 117 | static byte dummyByte; 118 | // Target of pipe reads and writes where we don't care about the content. 119 | 120 | kj::TaskSet tasks; 121 | 122 | void taskFailed(kj::Exception&& exception) override; 123 | }; 124 | 125 | class WorkerImpl: public Worker::Server, private kj::TaskSet::ErrorHandler { 126 | public: 127 | WorkerImpl(kj::AsyncIoContext& ioContext, sandstorm::SubprocessSet& subprocessSet, 128 | LocalPersistentRegistry& persistentRegistry); 129 | ~WorkerImpl() noexcept(false); 130 | 131 | protected: 132 | kj::Promise newGrain(NewGrainContext context) override; 133 | kj::Promise restoreGrain(RestoreGrainContext context) override; 134 | kj::Promise unpackPackage(UnpackPackageContext context) override; 135 | kj::Promise unpackBackup(UnpackBackupContext context) override; 136 | kj::Promise packBackup(PackBackupContext context) override; 137 | 138 | private: 139 | class RunningGrain; 140 | class PackageUploadStreamImpl; 141 | struct CommandInfo; 142 | 143 | kj::LowLevelAsyncIoProvider& ioProvider; 144 | sandstorm::SubprocessSet& subprocessSet; 145 | LocalPersistentRegistry& persistentRegistry; 146 | PackageMountSet packageMountSet; 147 | std::unordered_map> runningGrains; 148 | kj::TaskSet tasks; 149 | 150 | sandstorm::Supervisor::Client bootGrain( 151 | PackageInfo::Reader packageInfo, kj::Own grainState, 152 | sandstorm::Assignable::Setter::Client grainStateSetter, 153 | sandstorm::spk::Manifest::Command::Reader command, bool isNew, 154 | kj::String grainId, sandstorm::SandstormCore::Client core, 155 | kj::Own persistentRegistration); 156 | 157 | void taskFailed(kj::Exception&& exception) override; 158 | }; 159 | 160 | class SupervisorMain: public sandstorm::AbstractMain { 161 | // Like sandstorm::SupervisorMain, except that it sets itself up on the Blackrock VatNetwork. 162 | 163 | public: 164 | SupervisorMain(kj::ProcessContext& context); 165 | 166 | kj::MainFunc getMain() override; 167 | 168 | kj::MainBuilder::Validity run(); 169 | 170 | private: 171 | kj::ProcessContext& context; 172 | sandstorm::SupervisorMain sandstormSupervisor; 173 | 174 | class SystemConnectorImpl; 175 | }; 176 | 177 | class MetaSupervisorMain: public sandstorm::AbstractMain { 178 | // A binary which is responsible for mounting nbd and then exec()ing the supervisor. 179 | 180 | public: 181 | MetaSupervisorMain(kj::ProcessContext& context); 182 | 183 | kj::MainFunc getMain() override; 184 | 185 | kj::MainBuilder::Validity run(); 186 | 187 | private: 188 | kj::ProcessContext& context; 189 | kj::StringPtr packageMount; 190 | kj::Vector args; 191 | bool isNew = false; 192 | }; 193 | 194 | class UnpackMain: public sandstorm::AbstractMain { 195 | // Thin wrapper around `spk unpack` for use by Blackrock worker. 196 | 197 | public: 198 | UnpackMain(kj::ProcessContext& context): context(context) {} 199 | 200 | kj::MainFunc getMain() override; 201 | 202 | kj::MainBuilder::Validity run(); 203 | 204 | private: 205 | kj::ProcessContext& context; 206 | }; 207 | 208 | class BackupMain: public sandstorm::AbstractMain { 209 | // Thin wrapper around backup/restore functionality for use by Blackrock worker. 210 | 211 | public: 212 | BackupMain(kj::ProcessContext& context): context(context) {} 213 | 214 | kj::MainFunc getMain() override; 215 | 216 | kj::MainBuilder::Validity run(kj::StringPtr filename); 217 | 218 | private: 219 | kj::ProcessContext& context; 220 | bool restore = false; 221 | }; 222 | 223 | } // namespace blackrock 224 | 225 | #endif // BLACKROCK_WORKER_H_ 226 | -------------------------------------------------------------------------------- /src/blackrock/distributed-blocks.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | // ************************************ 18 | // **** INCOMPLETE **** INCOMPLETE **** 19 | // ************************************ 20 | // 21 | // This file contains some initial ideas for data structures relating to a distributed block 22 | // storage system. At present we are not implementing this, but we might in the future. 23 | 24 | #include "common.h" 25 | #include 26 | 27 | namespace blackrock { 28 | namespace { 29 | 30 | struct UInt128 { 31 | uint64_t value[2]; 32 | }; 33 | 34 | struct UInt256 { 35 | uint64_t value[4]; 36 | }; 37 | 38 | struct Superblock { 39 | // First block of a physical disk which is part of the distributed block storage system. 40 | 41 | static constexpr UInt128 MAGIC = { { 0xcdf365304999bb98ull, 0xfd641ba781b04071ull } }; 42 | static constexpr uint16_t VERSION = 0; 43 | 44 | UInt128 magic; 45 | // Magic number indicating a Blackrock disk. Always set to MAGIC. 46 | 47 | UInt128 clusterId; 48 | // Number identifying the Blackrock cluster of which this disk is a part. 49 | 50 | uint16_t version; 51 | // Storage format version. Set to VERSION. 52 | 53 | uint8_t replicaId; 54 | // Each disk is part of one replica of the cluster's storage. If the underlying disk is already 55 | // considered robust, then there may be only one replica (replica zero). Otherwise, 2-3 replicas 56 | // are typical. All shards on one disk -- and preferrably all disks in one machine -- are 57 | // required to be in the same replica because otherwise it would defeat the purpose of replicas. 58 | 59 | uint8_t lgBucketCount; 60 | // Log base 2 of the number of hash table buckets in each local shard. 61 | 62 | uint8_t lgJournalSize; 63 | // Log base 2 of size of the journal, in multiples of sizeof(Transaction). (Technically a 64 | // Transaction is variable-width due to the trailing ref array.) 65 | 66 | uint8_t lgBlockCount; 67 | // Log base 2 of number of blocks (content) in each local shard. 68 | 69 | uint8_t shardCount; 70 | // Number of shards in this block device, minus 1 since there are never 0 shards. 71 | 72 | uint32_t shardIds[256]; 73 | // Each shard's location in the key space. shardIds of shards in a replica should be uniformly 74 | // distributed in the space of 32-bit integers. Each shard "owns" the IDs between its shardId 75 | // and the next higher shardId in the replica, using modular (wrap-around) arithmetic. 76 | // 77 | // The shardId for a particular block is (blockId >> (replicaId * 32)) % (1 << 32). Or, in 78 | // other words, if you defined it as uint32_t blockId[8], then shardId = blockId[replicaId]. 79 | // This is valid up to 8 replicas, which should be enough for anyone. 80 | // 81 | // Properties of this algorithm: 82 | // - Consistent hashing: adding a new shard only requires moving data from one other shard. 83 | // (But usually lots of shards are added at once.) 84 | // - Sharding of blocks is totally different between replicas, to avoid common hot spots. 85 | }; 86 | 87 | static_assert(sizeof(Superblock) < 4096, "Superblock is more than one block."); 88 | 89 | struct Bucket { 90 | // One bucket in the hashtable mapping block IDs to locally-stored blocks. 91 | 92 | UInt256 blockId; 93 | // Key. 0 = empty bucket. (The actual block 0 is never stored since it is known to map to the 94 | // block containing entirely zeros.) 95 | 96 | unsigned isMutable :1; 97 | // If true, this is a mutable block. 98 | // 99 | // TODO(someday): Unclear if this flag is strictly necessary. 100 | 101 | unsigned reserved0 :3; 102 | // Must be zero. 103 | 104 | unsigned offset :28; 105 | // Location (index) of block content within the content table. With 4k blocks and 28 bits this 106 | // can address 1TB of data. 107 | 108 | uint32_t refcount; 109 | // Number of references to this block. Usually always one for mutable blocks. 110 | 111 | uint32_t revision; 112 | // Revision counter. Incremented whenever the Bucket changes, which for mutable blocks includes 113 | // when the block is overwritten since this is always accomplished by writing the new data to 114 | // a new location and then updating `offset`. 115 | 116 | uint32_t reserved1[5]; 117 | // Must be zero. 118 | // 119 | // TODO(someday): 120 | // - Record crypto nonce? (Could union with refcount.) 121 | // - Record location of the block in long-term storage. 122 | // - Implement policy for pushing blocks to long-term storage. 123 | // - Implement policy for purging blocks from local storage once they are in long-term storage. 124 | }; 125 | 126 | static_assert(sizeof(Bucket) == 64, "Bucket size changed!"); 127 | 128 | struct Transaction { 129 | uint64_t id; 130 | // Transaction ID. Assigned sequentially per-shard. 131 | 132 | uint64_t firstIncompleteTx; 133 | // The transaction ID of the first incomplete transaction at the time 134 | 135 | uint32_t bucketIndex; 136 | // Which bucket to overwrite. 137 | 138 | unsigned trim :1; 139 | // Whether to perform a trim of `trimIndex`. 140 | 141 | unsigned reserved0 :3; 142 | // Must be zero. 143 | 144 | unsigned trimIndex :28; 145 | // Block index (in block content table) which can be freed after this transaction. 146 | 147 | uint64_t parentTxnId; 148 | uint32_t parentTxnShardId; 149 | // If `parentTxnId` is not ~0 then this transaction is occurring as a dependent of some other 150 | // transaction possibly occurring on a different shard. Until the parent transaction 151 | // completes, it's possible that we'll receive repeat requests to perform the child 152 | // transaction, which we'll need to de-dupe by noticing that it matches this journal entry. 153 | // Once we know the trigger transaction has completed, we can delete this journal entry. 154 | 155 | uint8_t refsAddedCount; 156 | uint8_t refsRemovedCount; 157 | // Number of block references added to or removed from this block as a result of this 158 | // transaction. The actual references are listed in the `refs` array. 159 | 160 | uint8_t reserved1[2]; 161 | 162 | uint32_t reserved2[6]; 163 | // Must be zero. 164 | // 165 | // TODO(someday): 166 | // - Verify valid transaction, e.g. with a checksum/hash, so that we can reliably find the end of 167 | // the journal after power failure. 168 | 169 | Bucket newBucket; 170 | // New bucket contents. 171 | 172 | UInt256 refs[]; 173 | // Array of references that were added to or removed from this block as a result of this 174 | // transaction. The referenced blocks will need to have their refcounts adjusted as part of this 175 | // transaction. The size of the array is `refsAddedCount + refsRemovedCount`, padded up to the 176 | // next `sizeof(Transaction)` boundary, so that all Transactions reside on such a boundary. 177 | }; 178 | 179 | static_assert(sizeof(Transaction) == 128, "Journal size changed!"); 180 | 181 | struct Block { 182 | // One block. Note that the content is normally encrypted by XORing with a ChaCha20 stream whose 183 | // key and nonce are determined differently depending on the block type. This struct defines 184 | // what the block looks like after decryption. 185 | 186 | union { 187 | byte data[4096]; 188 | // A regular data block containing bytes. 189 | // 190 | // The block is encrypted using its own 256-bit BLAKE2b hash (salted with the cluster ID) as 191 | // the key, and a nonce of zero. 192 | 193 | UInt256 blockTableSegment[128]; 194 | // A block which contains a list of references to other blocks. 195 | // 196 | // Each element is the salted 256-bit BLAKE2b hash of the plaintext of a block, XORed with the 197 | // hash if the block contents were all-zero, so that the blockRef for an all-zero block is 198 | // all-zero. 199 | // 200 | // To get the block ID, hash this value again, then XOR that with the hash of an all-zero 201 | // blockRef, so that again the block ID of an all-zero block is all-zero. 202 | // 203 | // The block is encrypted using 204 | }; 205 | }; 206 | 207 | static_assert(sizeof(Block) == 4096, "Block size changed!"); 208 | 209 | } // namespace 210 | } // namespace blackrock 211 | -------------------------------------------------------------------------------- /src/blackrock/storage-tool.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #include "fs-storage.h" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | namespace blackrock { 32 | 33 | class StorageTool { 34 | public: 35 | StorageTool(kj::ProcessContext& context): context(context) {} 36 | 37 | kj::MainFunc getMain() { 38 | return kj::MainBuilder(context, "Blackrock", 39 | "Tool for probing the contents of Blackrock storage.") 40 | .addOption({'f', "fix"}, KJ_BIND_METHOD(*this, setFix), "fix it") 41 | .expectArg("", KJ_BIND_METHOD(*this, setUserId)) 42 | .expectArg("", KJ_BIND_METHOD(*this, setGrainId)) 43 | .callAfterParsing(KJ_BIND_METHOD(*this, run)) 44 | .build(); 45 | } 46 | 47 | private: 48 | kj::ProcessContext& context; 49 | kj::StringPtr userId; 50 | kj::StringPtr grainId; 51 | bool fix = false; 52 | 53 | typedef FilesystemStorage::ObjectId ObjectId; 54 | typedef FilesystemStorage::ObjectKey ObjectKey; 55 | 56 | enum class Type: uint8_t { 57 | // (zero skipped to help detect errors) 58 | BLOB = 1, 59 | VOLUME, 60 | IMMUTABLE, 61 | ASSIGNABLE, 62 | COLLECTION, 63 | OPAQUE, 64 | REFERENCE 65 | }; 66 | 67 | struct Xattr { 68 | // Format of the xattr block stored on each file. On ext4 we have about 76 bytes available in 69 | // the inode to store this attribute, but in theory this space could get smaller in the future, 70 | // so we should try to keep this minimal. 71 | 72 | static constexpr const char* NAME = "user.sandstor"; 73 | // Extended attribute name. Abbreviated to be 8 bytes to avoid losing space to alignment (ext4 74 | // doesn't store the "user." prefix). Actually short for "sandstore", not "sandstorm". :) 75 | 76 | Type type; 77 | 78 | bool readOnly; 79 | // For volumes, prevents the volume from being modified. For Blobs, indicates that initialization 80 | // has completed with a `done()` call, indicating the entire stream was received (otherwise, 81 | // either the stream is still uploading, or it failed to fully upload). Once set this 82 | // can never be unset. 83 | 84 | byte reserved[2]; 85 | // Must be zero. 86 | 87 | uint32_t accountedBlockCount; 88 | // The number of 4k blocks consumed by this object the last time we considered it for 89 | // accounting/quota purposes. The on-disk size could have changed in the meantime. 90 | 91 | uint64_t transitiveBlockCount; 92 | // The number of 4k blocks in this object and all child objects. 93 | 94 | ObjectId owner; 95 | // What object owns this one? 96 | }; 97 | 98 | class RawClientHook: public capnp::ClientHook, public kj::Refcounted { 99 | public: 100 | explicit RawClientHook(StoredObject::CapDescriptor::Reader descriptor) 101 | : descriptor(descriptor) {} 102 | 103 | StoredObject::CapDescriptor::Reader descriptor; 104 | 105 | capnp::Request newCall( 106 | uint64_t interfaceId, uint16_t methodId, kj::Maybe sizeHint) override { 107 | KJ_UNIMPLEMENTED("RawClientHook doesn't implement anything"); 108 | } 109 | 110 | VoidPromiseAndPipeline call(uint64_t interfaceId, uint16_t methodId, 111 | kj::Own&& context) override { 112 | KJ_UNIMPLEMENTED("RawClientHook doesn't implement anything"); 113 | } 114 | 115 | kj::Maybe getResolved() override { 116 | KJ_UNIMPLEMENTED("RawClientHook doesn't implement anything"); 117 | } 118 | 119 | kj::Maybe>> whenMoreResolved() override { 120 | KJ_UNIMPLEMENTED("RawClientHook doesn't implement anything"); 121 | } 122 | 123 | kj::Own addRef() override { 124 | return kj::addRef(*this); 125 | } 126 | 127 | const void* getBrand() override { 128 | return nullptr; 129 | } 130 | 131 | kj::Maybe getFd() override { return nullptr; } 132 | }; 133 | 134 | ObjectKey getUser(kj::StringPtr userId) { 135 | capnp::StreamFdMessageReader reader(sandstorm::raiiOpen( 136 | kj::str("roots/user-", userId), O_RDONLY)); 137 | return reader.getRoot().getKey(); 138 | } 139 | 140 | ObjectKey getGrain(ObjectKey user, kj::StringPtr grainId) { 141 | auto fd = sandstorm::raiiOpen(kj::str("main/", ObjectId(user).filename('o').begin()), O_RDONLY); 142 | 143 | auto children = ({ 144 | capnp::StreamFdMessageReader reader(fd.get()); 145 | KJ_MAP(c, reader.getRoot().getChildren()) -> ObjectId { return c; }; 146 | }); 147 | 148 | capnp::StreamFdMessageReader reader(fd.get()); 149 | auto object = reader.getRoot(); 150 | 151 | capnp::ReaderCapabilityTable capTable(KJ_MAP(cap, object.getCapTable()) 152 | -> kj::Maybe> { 153 | return kj::Own(kj::refcounted(cap)); 154 | }); 155 | 156 | auto imbued = capTable.imbue(object); 157 | 158 | for (auto grain: imbued.getPayload().getAs().getGrains()) { 159 | if (grain.getId() == grainId) { 160 | auto descriptor = capnp::ClientHook::from(grain.getState()) 161 | .downcast()->descriptor; 162 | KJ_ASSERT(descriptor.isChild(), descriptor); 163 | return descriptor.getChild(); 164 | } 165 | } 166 | 167 | KJ_FAIL_REQUIRE("user had no such grain"); 168 | } 169 | 170 | ObjectKey getVolume(ObjectKey grain) { 171 | auto fd = sandstorm::raiiOpen(kj::str( 172 | "main/", ObjectId(grain).filename('o').begin()), O_RDONLY); 173 | 174 | auto children = ({ 175 | capnp::StreamFdMessageReader reader(fd.get()); 176 | KJ_MAP(c, reader.getRoot().getChildren()) -> ObjectId { return c; }; 177 | }); 178 | 179 | capnp::StreamFdMessageReader reader(fd.get()); 180 | auto object = reader.getRoot(); 181 | 182 | capnp::ReaderCapabilityTable capTable(KJ_MAP(cap, object.getCapTable()) 183 | -> kj::Maybe> { 184 | return kj::Own(kj::refcounted(cap)); 185 | }); 186 | 187 | auto imbued = capTable.imbue(object); 188 | auto volume = imbued.getPayload().getAs().getVolume(); 189 | auto descriptor = capnp::ClientHook::from(volume).downcast()->descriptor; 190 | KJ_ASSERT(descriptor.isChild(), descriptor); 191 | return descriptor.getChild(); 192 | } 193 | 194 | bool setUserId(kj::StringPtr arg) { 195 | userId = arg; 196 | return true; 197 | } 198 | 199 | bool setGrainId(kj::StringPtr arg) { 200 | grainId = arg; 201 | return true; 202 | } 203 | 204 | bool setFix() { 205 | fix = true; 206 | return true; 207 | } 208 | 209 | bool run() { 210 | auto grain = getGrain(getUser(userId), grainId); 211 | auto volume = getVolume(grain); 212 | 213 | auto filename = kj::str("main/", ObjectId(volume).filename('o').begin()); 214 | struct stat stats; 215 | KJ_SYSCALL(stat(filename.cStr(), &stats)); 216 | 217 | Xattr expected; 218 | memset(&expected, 0, sizeof(expected)); 219 | expected.type = Type::VOLUME; 220 | expected.accountedBlockCount = stats.st_blocks / 8; 221 | expected.transitiveBlockCount = expected.accountedBlockCount; 222 | expected.owner = grain; 223 | 224 | Xattr xattr; 225 | ssize_t n = getxattr(filename.cStr(), Xattr::NAME, &xattr, sizeof(xattr)); 226 | if (n < 0) { 227 | context.error(kj::str("missing xattr:", strerror(errno))); 228 | 229 | if (fix) { 230 | KJ_SYSCALL(setxattr(filename.cStr(), Xattr::NAME, &expected, sizeof(expected), XATTR_CREATE)); 231 | } 232 | } else if (n != sizeof(xattr)) { 233 | context.error(kj::str("unexpected xattr size: ", n)); 234 | } else if (memcmp(&xattr, &expected, sizeof(xattr)) != 0) { 235 | KJ_LOG(ERROR, (uint)expected.type, (uint)xattr.type); 236 | KJ_LOG(ERROR, expected.accountedBlockCount, xattr.accountedBlockCount); 237 | KJ_LOG(ERROR, expected.transitiveBlockCount, xattr.transitiveBlockCount); 238 | KJ_LOG(ERROR, expected.owner.filename('o').begin(), xattr.owner.filename('o').begin()); 239 | context.error("xattrs don't match"); 240 | 241 | if (fix) { 242 | KJ_SYSCALL(setxattr(filename.cStr(), Xattr::NAME, &expected, sizeof(expected), XATTR_REPLACE)); 243 | } 244 | } else { 245 | context.warning("xattrs match expected"); 246 | } 247 | 248 | context.exitInfo(kj::str(ObjectId(grain).filename('o').begin(), ' ', 249 | ObjectId(volume).filename('o').begin())); 250 | } 251 | }; 252 | 253 | } // namespace blackrock 254 | 255 | KJ_MAIN(blackrock::StorageTool) 256 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | Copyright 2015 Sandstorm Development Group, Inc. 179 | 180 | Licensed under the Apache License, Version 2.0 (the "License"); 181 | you may not use this file except in compliance with the License. 182 | You may obtain a copy of the License at 183 | 184 | http://www.apache.org/licenses/LICENSE-2.0 185 | 186 | Unless required by applicable law or agreed to in writing, software 187 | distributed under the License is distributed on an "AS IS" BASIS, 188 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 189 | See the License for the specific language governing permissions and 190 | limitations under the License. 191 | -------------------------------------------------------------------------------- /src/blackrock/gateway.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2017 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #include "gateway.h" 18 | #include 19 | 20 | namespace blackrock { 21 | 22 | void GatewayImpl::EntropySourceImpl::generate(kj::ArrayPtr buffer) { 23 | randombytes(buffer.begin(), buffer.size()); 24 | } 25 | 26 | GatewayImpl::GatewayImpl(kj::Timer& timer, kj::Network& network, FrontendConfig::Reader config) 27 | : GatewayImpl(timer, network, config, kj::HttpHeaderTable::Builder()) {} 28 | 29 | GatewayImpl::GatewayImpl(kj::Timer& timer, kj::Network& network, FrontendConfig::Reader config, 30 | kj::HttpHeaderTable::Builder headerTableBuilder) 31 | : timer(timer), network(network), 32 | gatewayServiceTables(headerTableBuilder), 33 | hXRealIp(headerTableBuilder.add("X-Real-IP")), 34 | headerTable(headerTableBuilder.build()), 35 | httpServer(timer, *headerTable, [this](kj::AsyncIoStream& conn) { 36 | return kj::heap(static_cast(*this), hXRealIp, conn); 37 | }), 38 | altPortService(*this, *headerTable, config.getBaseUrl(), config.getWildcardHost()), 39 | altPortHttpServer(timer, *headerTable, altPortService), 40 | smtpServer(*this), 41 | tlsManager(httpServer, smtpServer, config.hasPrivateKeyPassword() 42 | ? kj::Maybe(config.getPrivateKeyPassword()) 43 | : kj::Maybe(nullptr)), 44 | tasks(*this) { 45 | clientSettings.entropySource = entropySource; 46 | 47 | setConfig(config); 48 | 49 | if (config.getBaseUrl().startsWith("https://")) { 50 | tasks.add(network.parseAddress("*", 80) 51 | .then([this](kj::Own&& addr) { 52 | auto listener = addr->listen(); 53 | auto promise = altPortHttpServer.listenHttp(*listener); 54 | return promise.attach(kj::mv(listener)); 55 | })); 56 | 57 | tasks.add(network.parseAddress("*", 443) 58 | .then([this](kj::Own&& addr) { 59 | auto listener = addr->listen(); 60 | auto promise = tlsManager.listenHttps(*listener); 61 | return promise.attach(kj::mv(listener)); 62 | })); 63 | } else { 64 | tasks.add(network.parseAddress("*", 80) 65 | .then([this](kj::Own&& addr) { 66 | auto listener = addr->listen(); 67 | auto promise = httpServer.listenHttp(*listener); 68 | return promise.attach(kj::mv(listener)); 69 | })); 70 | } 71 | 72 | tasks.add(network.parseAddress("*", 25) 73 | .then([this](kj::Own&& addr) { 74 | auto listener = addr->listen(); 75 | auto promise = tlsManager.listenSmtp(*listener); 76 | return promise.attach(kj::mv(listener)); 77 | })); 78 | 79 | tasks.add(network.parseAddress("*", 465) 80 | .then([this](kj::Own&& addr) { 81 | auto listener = addr->listen(); 82 | auto promise = tlsManager.listenSmtps(*listener); 83 | return promise.attach(kj::mv(listener)); 84 | })); 85 | 86 | capnp::Capability::Client masterGateway = kj::refcounted([this]() { 87 | return chooseReplica(roundRobinCounter++) 88 | .then([](kj::Own replica) -> capnp::Capability::Client { 89 | return replica->router; 90 | }); 91 | }); 92 | 93 | tasks.add(tlsManager.subscribeKeys(masterGateway.castAs())); 94 | } 95 | 96 | void GatewayImpl::setConfig(FrontendConfig::Reader config) { 97 | configMessage = kj::heap(); 98 | configMessage->setRoot(config); 99 | this->config = configMessage->getRoot(); 100 | wildcardHost = sandstorm::WildcardMatcher(config.getWildcardHost()); 101 | 102 | // TODO(soon): Update all GatewayService instances to new config. 103 | } 104 | 105 | kj::Promise GatewayImpl::request( 106 | kj::HttpMethod method, kj::StringPtr url, const kj::HttpHeaders& headers, 107 | kj::AsyncInputStream& requestBody, Response& response) { 108 | return chooseReplica(urlSessionHash(url, headers)) 109 | .then([this,method,url,&headers,&requestBody,&response](kj::Own replica) { 110 | auto promise = replica->service.request(method, url, headers, requestBody, response); 111 | return promise.attach(kj::mv(replica)); 112 | }); 113 | } 114 | 115 | kj::Promise GatewayImpl::reset(ResetContext context) { 116 | shellReplicas.clear(); 117 | 118 | auto params = context.getParams(); 119 | auto promises = KJ_MAP(backend, params.getBackends()) { 120 | return addFrontend(backend.getId(), backend.getBackend()); 121 | }; 122 | context.releaseParams(); 123 | return kj::joinPromises(kj::mv(promises)); 124 | } 125 | 126 | kj::Promise GatewayImpl::add(AddContext context) { 127 | auto params = context.getParams(); 128 | auto promise = addFrontend(params.getId(), params.getBackend()); 129 | context.releaseParams(); 130 | return promise; 131 | } 132 | 133 | kj::Promise GatewayImpl::remove(RemoveContext context) { 134 | uint64_t backendId = context.getParams().getId(); 135 | 136 | for (auto& replica: shellReplicas) { 137 | KJ_IF_MAYBE(r, replica) { 138 | if (r->get()->backendId == backendId) { 139 | replica = nullptr; 140 | } 141 | } 142 | } 143 | 144 | return kj::READY_NOW; 145 | } 146 | 147 | kj::Promise> GatewayImpl::SmtpNetworkAddressImpl::connect() { 148 | return gateway.chooseReplica(gateway.roundRobinCounter++) 149 | .then([this](kj::Own&& replica) { 150 | auto promise = replica->smtpAddress->connect(); 151 | return promise.attach(kj::mv(replica)); 152 | }); 153 | } 154 | 155 | GatewayImpl::ShellReplica::ShellReplica( 156 | GatewayImpl& gateway, uint64_t backendId, Frontend::Instance::Reader instance) 157 | : backendId(backendId), 158 | httpAddress(SimpleAddress(instance.getHttpAddress()).onNetwork(gateway.network)), 159 | smtpAddress(SimpleAddress(instance.getSmtpAddress()).onNetwork(gateway.network)), 160 | shellHttp(kj::newHttpClient(gateway.timer, *gateway.headerTable, *httpAddress, 161 | gateway.clientSettings)), 162 | router(instance.getRouter()), 163 | service(gateway.timer, *shellHttp, router, gateway.gatewayServiceTables, 164 | gateway.config.getBaseUrl(), gateway.config.getWildcardHost(), 165 | gateway.config.hasTermsPublicId() 166 | ? kj::Maybe(gateway.config.getTermsPublicId()) 167 | : kj::Maybe(nullptr)), 168 | cleanupLoop(service.cleanupLoop().eagerlyEvaluate([](kj::Exception&& e) { 169 | KJ_LOG(FATAL, "cleanupLoop() threw", e); 170 | abort(); 171 | })) {} 172 | 173 | kj::Promise GatewayImpl::addFrontend(uint64_t backendId, Frontend::Client frontend) { 174 | return frontend.getInstancesRequest().send() 175 | .then([this,backendId](capnp::Response&& response) { 176 | auto newInstances = response.getInstances(); 177 | for (auto instance: newInstances) { 178 | kj::Maybe> replica = 179 | kj::refcounted(*this, backendId, instance); 180 | for (auto& slot: shellReplicas) { 181 | if (slot == nullptr) { 182 | slot = kj::mv(replica); 183 | break; 184 | } 185 | } 186 | if (replica != nullptr) { 187 | shellReplicas.add(kj::mv(replica)); 188 | } 189 | } 190 | 191 | KJ_IF_MAYBE(r, readyPaf) { 192 | r->fulfiller->fulfill(); 193 | readyPaf = nullptr; 194 | } 195 | }); 196 | } 197 | 198 | kj::Promise> GatewayImpl::chooseReplica(uint64_t hash) { 199 | std::set eliminated; 200 | while (eliminated.size() < shellReplicas.size()) { 201 | size_t bucket = hash % (shellReplicas.size() - eliminated.size()); 202 | for (auto e: eliminated) { 203 | if (bucket >= e) { 204 | ++bucket; 205 | } else { 206 | break; 207 | } 208 | } 209 | 210 | KJ_ASSERT(bucket < shellReplicas.size()); 211 | KJ_IF_MAYBE(replica, shellReplicas[bucket]) { 212 | return kj::addRef(**replica); 213 | } 214 | 215 | KJ_ASSERT(eliminated.insert(bucket).second); 216 | } 217 | 218 | if (readyPaf == nullptr) { 219 | auto paf = kj::newPromiseAndFulfiller(); 220 | readyPaf = ReadyPair { paf.promise.fork(), kj::mv(paf.fulfiller) }; 221 | } 222 | 223 | return KJ_ASSERT_NONNULL(readyPaf).promise.addBranch().then([this,hash]() { 224 | return chooseReplica(hash); 225 | }); 226 | } 227 | 228 | static bool isAllHex(kj::StringPtr text) { 229 | for (char c: text) { 230 | if ((c < '0' || '9' < c) && 231 | (c < 'a' || 'f' < c) && 232 | (c < 'A' || 'F' < c)) { 233 | return false; 234 | } 235 | } 236 | 237 | return true; 238 | } 239 | 240 | uint64_t GatewayImpl::urlSessionHash(kj::StringPtr url, const kj::HttpHeaders& headers) { 241 | KJ_IF_MAYBE(hostId, wildcardHost.match(headers)) { 242 | if (hostId->startsWith("ui-") || hostId->startsWith("api-") || 243 | (hostId->size() == 20 && isAllHex(*hostId))) { 244 | // These cases are really served by a grain, and we only use a shell to connect to the right 245 | // grain. We bucket on hostname so that a particular grain is always looked up from the same 246 | // shell and through the same local grain capability cache. The hostname ends in hex, so we 247 | // can just parse it. 248 | KJ_ASSERT(hostId->size() >= 20); 249 | auto hex = hostId->slice(hostId->size() - 16); 250 | char* end; 251 | auto result = strtoull(hex.begin(), &end, 16); 252 | KJ_REQUIRE(end == hex.end(), "invalid hostname", *hostId); 253 | return result; 254 | } 255 | } 256 | 257 | // Recognize paths beginning with `sockjs` as probably being Meteor DDP connections. 258 | // 259 | // TODO(cleanup): Currently every installation can configure DDP to happen on an arbitrary host, 260 | // as long as it maps to the server and doesn't already have some other designated purpose. We 261 | // should probably standardize on the wildcard host ID "ddp" instead. 262 | auto parsedUrl = kj::Url::parse(url, kj::Url::HTTP_REQUEST); 263 | if (parsedUrl.path.size() >= 2 && 264 | parsedUrl.path[0] == "sockjs") { 265 | // SockJS connections provide a 3-decimal-digit server ID in the path. BUT, it also has some 266 | // other endpoints like "info", so parse carefully. 267 | char* end; 268 | auto result = strtoul(parsedUrl.path[1].cStr(), &end, 10); 269 | if (end == parsedUrl.path[1].end()) { 270 | return result; 271 | } 272 | } 273 | 274 | // Anything else is probably a static asset. We hash the URL to make upstream caching more 275 | // efficient -- but probably these requests don't need to be load balanced anyway because CDN 276 | // caching ought to kick in here. 277 | 278 | // djb hash with xor 279 | // TODO(someday): Add hashing library to KJ. 280 | uint64_t result = 5381; 281 | for (char c: url) { 282 | result = (result * 33) ^ c; 283 | } 284 | return result; 285 | } 286 | 287 | void GatewayImpl::taskFailed(kj::Exception&& exception) { 288 | KJ_LOG(FATAL, exception); 289 | 290 | // Better restart since we may be in a degraded state. 291 | abort(); 292 | } 293 | 294 | } // namespace blackrock 295 | -------------------------------------------------------------------------------- /src/blackrock/gce.c++: -------------------------------------------------------------------------------- 1 | // Sandstorm Blackrock 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | // All Rights Reserved 4 | // 5 | // Licensed under the Apache License, Version 2.0 (the "License"); 6 | // you may not use this file except in compliance with the License. 7 | // You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, software 12 | // distributed under the License is distributed on an "AS IS" BASIS, 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | // See the License for the specific language governing permissions and 15 | // limitations under the License. 16 | 17 | #include "gce.h" 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | namespace blackrock { 25 | 26 | namespace { 27 | 28 | // TODO(cleanup): Share this code with version in master.c++. 29 | kj::Promise readAllAsync(kj::AsyncInputStream& input, 30 | kj::Vector buffer = kj::Vector()) { 31 | buffer.resize(buffer.size() + 4096); 32 | auto promise = input.tryRead(buffer.end() - 4096, 4096, 4096); 33 | return promise.then([KJ_MVCAP(buffer),&input](size_t n) mutable -> kj::Promise { 34 | if (n < 4096) { 35 | buffer.resize(buffer.size() - 4096 + n); 36 | buffer.add('\0'); 37 | return kj::String(buffer.releaseAsArray()); 38 | } else { 39 | return readAllAsync(input, kj::mv(buffer)); 40 | } 41 | }); 42 | } 43 | 44 | static kj::String getImageName() { 45 | char buffer[256]; 46 | ssize_t n; 47 | KJ_SYSCALL(n = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1)); 48 | buffer[n] = '\0'; 49 | kj::StringPtr exeName(buffer); 50 | return sandstorm::trim(exeName.slice(KJ_ASSERT_NONNULL(exeName.findLast('/')) + 1)); 51 | } 52 | 53 | } // namespace 54 | 55 | GceDriver::GceDriver(sandstorm::SubprocessSet& subprocessSet, 56 | kj::LowLevelAsyncIoProvider& ioProvider, 57 | GceConfig::Reader config) 58 | : subprocessSet(subprocessSet), ioProvider(ioProvider), config(config), image(getImageName()), 59 | masterBindAddress(SimpleAddress::getInterfaceAddress(AF_INET, "eth0")), 60 | logTask(nullptr), logSinkAddress(masterBindAddress) { 61 | // Create socket for the log sink acceptor. 62 | int sock; 63 | KJ_SYSCALL(sock = socket(masterBindAddress.family(), 64 | SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0)); 65 | { 66 | KJ_ON_SCOPE_FAILURE(close(sock)); 67 | logSinkAddress.setPort(0); 68 | KJ_SYSCALL(bind(sock, logSinkAddress.asSockaddr(), logSinkAddress.getSockaddrSize())); 69 | KJ_SYSCALL(listen(sock, SOMAXCONN)); 70 | 71 | // Read back the assigned port number. 72 | logSinkAddress = SimpleAddress::getLocal(sock); 73 | } 74 | 75 | // Accept log connections. 76 | auto listener = ioProvider.wrapListenSocketFd(sock, 77 | kj::LowLevelAsyncIoProvider::TAKE_OWNERSHIP | 78 | kj::LowLevelAsyncIoProvider::ALREADY_CLOEXEC | 79 | kj::LowLevelAsyncIoProvider::ALREADY_NONBLOCK); 80 | 81 | logTask = logSink.acceptLoop(kj::mv(listener)) 82 | .eagerlyEvaluate([](kj::Exception&& exception) { 83 | KJ_LOG(ERROR, "LogSink accept loop failed", exception); 84 | }); 85 | } 86 | 87 | GceDriver::~GceDriver() noexcept(false) {} 88 | 89 | SimpleAddress GceDriver::getMasterBindAddress() { 90 | return masterBindAddress; 91 | } 92 | 93 | auto GceDriver::listMachines() -> kj::Promise> { 94 | int fds[2]; 95 | KJ_SYSCALL(pipe2(fds, O_CLOEXEC)); 96 | kj::AutoCloseFd writeEnd(fds[1]); 97 | auto input = ioProvider.wrapInputFd(fds[0], 98 | kj::LowLevelAsyncIoProvider::Flags::TAKE_OWNERSHIP | 99 | kj::LowLevelAsyncIoProvider::Flags::ALREADY_CLOEXEC); 100 | 101 | // TODO(cleanup): Use `--format json` here (but then we need a json parser). 102 | auto exitPromise = gceCommand({"instances", "list", "--format", "text", "-q"}, 103 | STDIN_FILENO, writeEnd); 104 | 105 | auto outputPromise = readAllAsync(*input); 106 | return outputPromise.attach(kj::mv(input)) 107 | .then([this,KJ_MVCAP(exitPromise)](kj::String allText) mutable { 108 | kj::Vector result; 109 | 110 | kj::StringPtr text = allText; 111 | kj::Maybe lastSeenMachine; 112 | kj::Vector> promises; 113 | 114 | promises.add(kj::mv(exitPromise)); 115 | 116 | // Parse lines until there are no more. 117 | while (text.size() > 0) { 118 | uint eol = KJ_ASSERT_NONNULL(text.findFirst('\n')); 119 | 120 | // Look for "name:" lines, which are instance names. Ignore everything else. 121 | if (text.startsWith("name:")) { 122 | auto name = sandstorm::trim(text.slice(strlen("name:"), eol)); 123 | if (!name.startsWith("master") && 124 | !name.startsWith("build") && 125 | !name.startsWith("nginx")) { 126 | lastSeenMachine = MachineId(name); 127 | } 128 | } else if (text.startsWith("tags.items[")) { 129 | auto name = sandstorm::trim(text.slice(KJ_ASSERT_NONNULL(text.findFirst(':')) + 1, eol)); 130 | if (name == image) { 131 | // Cool, this machine has the right tag, so it checks out. 132 | result.add(KJ_ASSERT_NONNULL(lastSeenMachine)); 133 | lastSeenMachine = nullptr; 134 | } 135 | } else if (text.startsWith("---")) { 136 | KJ_IF_MAYBE(machine, lastSeenMachine) { 137 | KJ_LOG(INFO, "shutting down machine running old image", *machine); 138 | promises.add(stop(*machine)); 139 | lastSeenMachine = nullptr; 140 | } 141 | } 142 | 143 | text = text.slice(eol + 1); 144 | } 145 | 146 | KJ_IF_MAYBE(machine, lastSeenMachine) { 147 | KJ_LOG(INFO, "shutting down machine running old image", *machine); 148 | promises.add(stop(*machine)); 149 | lastSeenMachine = nullptr; 150 | } 151 | 152 | return kj::joinPromises(promises.releaseAsArray()) 153 | .then([KJ_MVCAP(result)]() mutable { return result.releaseAsArray(); }); 154 | }); 155 | } 156 | 157 | kj::Promise GceDriver::boot(MachineId id) { 158 | kj::Vector args; 159 | kj::Vector scratch; 160 | auto idStr = kj::str(id); 161 | auto tagStr = kj::str("--tags=", image); 162 | args.addAll(std::initializer_list 163 | { "instances", "create", idStr, "--image", image, "--no-scopes", "-q" }); 164 | kj::StringPtr startupScript; 165 | kj::StringPtr instanceType; 166 | switch (id.type) { 167 | case ComputeDriver::MachineType::STORAGE: { 168 | instanceType = config.getInstanceTypes().getStorage(); 169 | 170 | // Attach necessary disk. 171 | auto param = kj::str("--disk=name=", id, "-data,mode=rw,device-name=blackrock"); 172 | args.add(param); 173 | scratch.add(kj::mv(param)); 174 | startupScript = 175 | "#! /bin/sh\n" 176 | "mkdir -p /var/blackrock/storage\n" 177 | "mount /dev/disk/by-id/google-blackrock /var/blackrock/storage\n"; 178 | break; 179 | } 180 | 181 | case ComputeDriver::MachineType::WORKER: 182 | instanceType = config.getInstanceTypes().getWorker(); 183 | break; 184 | 185 | case ComputeDriver::MachineType::COORDINATOR: 186 | instanceType = config.getInstanceTypes().getCoordinator(); 187 | break; 188 | 189 | case ComputeDriver::MachineType::FRONTEND: 190 | instanceType = config.getInstanceTypes().getFrontend(); 191 | break; 192 | 193 | case ComputeDriver::MachineType::MONGO: { 194 | instanceType = config.getInstanceTypes().getMongo(); 195 | 196 | // Attach necessary disk. 197 | auto param = kj::str("--disk=name=", id, "-data,mode=rw,device-name=blackrock"); 198 | args.add(param); 199 | scratch.add(kj::mv(param)); 200 | startupScript = 201 | "#! /bin/sh\n" 202 | "mkdir -p /var/blackrock/bundle\n" 203 | "mount /dev/disk/by-id/google-blackrock /var/blackrock/bundle\n"; 204 | break; 205 | } 206 | 207 | case ComputeDriver::MachineType::GATEWAY: { 208 | instanceType = config.getInstanceTypes().getGateway(); 209 | 210 | // Tag to accept HTTP and SMTP traffic. 211 | tagStr = kj::str(tagStr, ",http,smtp"); 212 | 213 | // Assign static IP address if configured. 214 | auto addrs = config.getGatewayAddresses(); 215 | if (id.index < addrs.size()) { 216 | args.add("--address"); 217 | args.add(addrs[id.index]); 218 | } 219 | 220 | break; 221 | } 222 | } 223 | 224 | args.add(tagStr); 225 | 226 | args.add("--machine-type"); 227 | args.add(instanceType); 228 | 229 | if (startupScript == nullptr) { 230 | return gceCommand(args); 231 | } else { 232 | // We'll pass the startup script via stdin. 233 | args.add("--metadata-from-file=startup-script=/dev/stdin"); 234 | 235 | // No need for async pipe since the startup script almost certainly won't fill the pipe buffer 236 | // anyhow, and even if it did, the tool immediately reads it before doing other stuff. 237 | auto pipe = sandstorm::Pipe::make(); 238 | auto promise = gceCommand(args, pipe.readEnd); 239 | pipe.readEnd = nullptr; 240 | kj::FdOutputStream(kj::mv(pipe.writeEnd)).write(startupScript.begin(), startupScript.size()); 241 | return kj::mv(promise); 242 | } 243 | } 244 | 245 | kj::Promise GceDriver::run( 246 | MachineId id, blackrock::VatId::Reader masterVatId, bool requireRestartProcess) { 247 | kj::String name = kj::str(id); 248 | 249 | int fds[2]; 250 | KJ_SYSCALL(pipe2(fds, O_CLOEXEC)); 251 | kj::AutoCloseFd stdinReadEnd(fds[0]); 252 | auto stdinWriteEnd = ioProvider.wrapOutputFd(fds[1], 253 | kj::LowLevelAsyncIoProvider::Flags::TAKE_OWNERSHIP | 254 | kj::LowLevelAsyncIoProvider::Flags::ALREADY_CLOEXEC); 255 | KJ_SYSCALL(pipe2(fds, O_CLOEXEC)); 256 | kj::AutoCloseFd stdoutWriteEnd(fds[1]); 257 | auto stdoutReadEnd = ioProvider.wrapInputFd(fds[0], 258 | kj::LowLevelAsyncIoProvider::Flags::TAKE_OWNERSHIP | 259 | kj::LowLevelAsyncIoProvider::Flags::ALREADY_CLOEXEC); 260 | 261 | auto addr = kj::str(logSinkAddress, '/', name); 262 | auto target = kj::str("root@", name); 263 | kj::Vector args; 264 | auto command = kj::str("/blackrock/bin/blackrock slave --log ", addr, " if4:eth0"); 265 | args.addAll(kj::ArrayPtr({ 266 | "ssh", target, "--command", command, "-q"})); 267 | if (requireRestartProcess) args.add("-r"); 268 | 269 | auto exitPromise = gceCommand(args, stdinReadEnd, stdoutWriteEnd); 270 | 271 | auto message = kj::heap(masterVatId.totalSize().wordCount + 4); 272 | message->setRoot(masterVatId); 273 | 274 | auto& stdoutReadEndRef = *stdoutReadEnd; 275 | return capnp::writeMessage(*stdinWriteEnd, *message) 276 | .attach(kj::mv(stdinWriteEnd), kj::mv(message)) 277 | .then([&stdoutReadEndRef]() { 278 | return capnp::readMessage(stdoutReadEndRef); 279 | }).then([this,id,KJ_MVCAP(exitPromise),KJ_MVCAP(stdoutReadEnd)]( 280 | kj::Own reader) mutable { 281 | auto path = reader->getRoot(); 282 | vatPaths[id] = kj::mv(reader); 283 | return exitPromise.then([path]() { return path; }); 284 | }); 285 | } 286 | 287 | kj::Promise GceDriver::stop(MachineId id) { 288 | return gceCommand({"instances", "delete", kj::str(id), "-q"}); 289 | } 290 | 291 | kj::Promise GceDriver::gceCommand(kj::ArrayPtr args, 292 | int stdin, int stdout) { 293 | auto fullArgs = kj::heapArrayBuilder(args.size() + 4); 294 | fullArgs.add("gcloud"); 295 | fullArgs.add("--project"); 296 | fullArgs.add(config.getProject()); 297 | fullArgs.add("compute"); 298 | fullArgs.addAll(args); 299 | 300 | kj::Vector env; 301 | auto newEnv = kj::str("CLOUDSDK_COMPUTE_ZONE=", config.getZone()); 302 | env.add(newEnv); 303 | for (char** envp = environ; *envp != nullptr; ++envp) { 304 | kj::StringPtr e = *envp; 305 | if (!e.startsWith("CLOUDSDK_COMPUTE_ZONE=")) { 306 | env.add(e); 307 | } 308 | } 309 | 310 | sandstorm::Subprocess::Options options(fullArgs.finish()); 311 | auto command = kj::strArray(options.argv, " "); 312 | KJ_LOG(INFO, command); 313 | options.stdin = stdin; 314 | options.stdout = stdout; 315 | options.environment = kj::ArrayPtr(env); 316 | return subprocessSet.waitForSuccess(kj::mv(options)); 317 | } 318 | 319 | } // namespace blackrock 320 | 321 | -------------------------------------------------------------------------------- /src/blackrock/cluster-rpc.capnp: -------------------------------------------------------------------------------- 1 | # Sandstorm Blackrock 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc. 3 | # All Rights Reserved 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | @0xf49ffd606012a28b; 18 | 19 | $import "/capnp/c++.capnp".namespace("blackrock"); 20 | using GenericPersistent = import "/capnp/persistent.capnp".Persistent; 21 | 22 | struct VatId { 23 | # Identifies a machine in the cluster. 24 | # 25 | # Note that "vats" are expected to be somewhat ephemeral, as machines may be rotated in and out 26 | # of the cluster on a regular basis. In particular, it is important that upon compromise of a 27 | # vat's private key, the machine can simply be wiped and restarted with a new key without 28 | # significant long-term damage. Vats should probably be cycled in this way on a regular basis 29 | # (perhaps, every few days) even if no compromise is known to have occurred. 30 | 31 | publicKey0 @0 :UInt64; 32 | publicKey1 @1 :UInt64; 33 | publicKey2 @2 :UInt64; 34 | publicKey3 @3 :UInt64; 35 | # The Vat's Curve25519 public key, interpreted as little-endian. 36 | } 37 | 38 | struct Address { 39 | # Address at which you might connect to a vat. Used for three-party hand-offs. 40 | # 41 | # Note that any vat that listens for connections on a port should also listen for unix domain 42 | # socket connections on the "abstract" name "sandstorm-", so that other vats on the same machine 43 | # can connect via Unix sockets rather than IP. 44 | 45 | lower64 @0 :UInt64; 46 | upper64 @1 :UInt64; 47 | # Bits of the IPv6 address. Since IP is a big-endian spec, the "lower" bits are on the right, and 48 | # the "upper" bits on the left. E.g., if the address is "1:2:3:4:5:6:7:8", then the lower 64 bits 49 | # are "5:6:7:8" or 0x0005000600070008 while the upper 64 bits are "1:2:3:4" or 0x0001000200030004. 50 | # 51 | # Note that for an IPv4 address, according to the standard IPv4-mapped IPv6 address rules, you 52 | # would use code like this: 53 | # uint32 ipv4 = (octet[0] << 24) | (octet[1] << 16) | (octet[2] << 8) | octet[3]; 54 | # dest.setLower64(0x0000FFFF00000000 | ipv4); 55 | # dest.setUpper64(0); 56 | 57 | port @2 :UInt16; 58 | } 59 | 60 | struct VatPath { 61 | # Enough information to connect to a vat securely. 62 | 63 | id @0 :VatId; 64 | address @1 :Address; 65 | } 66 | 67 | struct SturdyRef { 68 | # Parameterization of SturdyRef for Sandstorm internal traffic. 69 | 70 | struct Owner { 71 | # Owner of a SturdyRef, for sealing purposes. See discussion of sealing in 72 | # import "/capnp/persistent.capnp".Persistent. 73 | 74 | union { 75 | vat @0 :VatId; 76 | # The domain of a single vat. Use this domain when saving refs in the vat's local storage. 77 | 78 | storage @1 :Void; 79 | # The domain of the storage system. Use when saving refs in long-term storage. 80 | 81 | coordinator @2 :Void; 82 | # The domain of the coordinators. Use when generating a `hosted` SturdyRef. 83 | 84 | gateway @3 :Void; 85 | # The domain of the gateways. Use when generating an `external` SturdyRef. 86 | 87 | frontend @4 :Void; 88 | # The domain of the front-end shell. 89 | } 90 | } 91 | 92 | union { 93 | transient @0 :Transient; 94 | stored @1 :Stored; 95 | hosted @2 :Hosted; 96 | external @3 :External; 97 | } 98 | 99 | struct Transient { 100 | # Referece to an object hosted by some specific vat in the cluster, which will eventually 101 | # become invalid when that vat is taken out of rotation. 102 | 103 | vat @0 :VatPath; 104 | # The vat where the object is located. 105 | 106 | localRef @1 :AnyPointer; 107 | # A SturdyRef in the format defined by the vat. 108 | } 109 | 110 | struct Stored { 111 | # Reference to an object in long-term storage. 112 | 113 | key0 @0 :UInt64; 114 | key1 @1 :UInt64; 115 | key2 @2 :UInt64; 116 | key3 @3 :UInt64; 117 | # 256-bit object key. This both identifies the object and may serve as a symmetric key for 118 | # decrypting the object. 119 | } 120 | 121 | struct Hosted { 122 | # Reference to an object hosted within a grain. 123 | 124 | grainState @0 :Stored; 125 | # Storage ID for an Assignable(GrainState) representing the grain. 126 | # 127 | # This stored object is sealed for coordinators, so that holding a SturdyRef to a capability 128 | # hosted by some grain does not grant direct access to the grain's storage. 129 | # 130 | # TODO(soon): This doesn't work: there's no way for the coordinator to enforce the seal on 131 | # this ref, because the owner isn't stored anywhere. Possible solutions: 132 | # 1) Use a reference to a wrapper object in storage owned by the coordinators, which itself 133 | # stores the actual object and Owner for enforcement. Problem: won't be cleaned up when 134 | # the grain is deleted. 135 | # 2) Extend Persistent.save() to accept a tag which is returned later on load. Or have 136 | # it return the Owner on load, and we can make our Owner type include information about 137 | # who is allowed to invoke the coordinator. But note that remote entities and apps won't 138 | # be expected to maintain such storage. 139 | 140 | supervisorRef @1 :AnyPointer; 141 | # A SturdyRef in the format defined by the Sandstorm supervisor. 142 | } 143 | 144 | struct External { 145 | # Reference to an object living outside the Sandstorm cluster. 146 | 147 | gatewayRef @0 :Stored; 148 | # Reference to a stored Immutable(SturdyRef), where that SturdyRef is designed for use on 149 | # the public internet. The stored object is sealed for the cluster's Cap'n Proto gateway 150 | # machines. 151 | } 152 | } 153 | 154 | interface Persistent extends(GenericPersistent(SturdyRef, SturdyRef.Owner)) {} 155 | 156 | interface Restorer(Ref) { 157 | # Interface for restoring a SturdyRef. 158 | # 159 | # Every vat exports a bootstrap interface of type Restorer that handles refs of type `Transient`. 160 | # (The interface is of type Restorer(LocalRefType), where LocalRefType is the type of 161 | # Transient.localRef for this vat. Every vat may have a different local ref type.) 162 | # 163 | # For non-transient ref types, the Master is responsible for collecting Restorers from the 164 | # machines currently assigned to handle that type and redistributing those to the machines that 165 | # need them. 166 | 167 | restore @0 (sturdyRef :Ref) -> (cap :Capability); 168 | drop @1 (sturdyRef :Ref); 169 | } 170 | 171 | struct ProvisionId { 172 | provider @0 :VatId; 173 | # ID of the vat providing the capability (aka the introducer). 174 | 175 | nonce0 @1 :UInt64; 176 | nonce1 @2 :UInt64; 177 | # 128-bit nonce randomly chosen by the introducer. 178 | } 179 | 180 | struct RecipientId { 181 | recipient @0 :VatPath; 182 | # ID of the vat receiving the capability. 183 | 184 | nonce0 @1 :UInt64; 185 | nonce1 @2 :UInt64; 186 | # 128-bit nonce randomly chosen by the introducer. 187 | } 188 | 189 | struct ThirdPartyCapId { 190 | provider @0 :VatPath; 191 | # ID and path to the host of this capability. 192 | 193 | nonce0 @1 :UInt64; 194 | nonce1 @2 :UInt64; 195 | # 128-bit nonce randomly chosen by the introducer. 196 | } 197 | 198 | struct JoinKeyPart { 199 | # TODO(someday) 200 | } 201 | 202 | struct JoinResult { 203 | # TODO(someday) 204 | } 205 | 206 | interface BackendSet(T) { 207 | # Callbacks for notifying a machine of changes to its set of back-ends. When the master tells 208 | # a machine to take on a role, the machine returns various BackendSets which the master then 209 | # populates. 210 | 211 | reset @0 (backends :List(IdBackendPair)); 212 | # Drop the entire existing backend list and replace it with this new one. Called in particular at 213 | # startup, or whenever the master has restarted. After reset(), previously-used ID values may 214 | # be reused. 215 | 216 | struct IdBackendPair { 217 | id @0 :UInt64; 218 | backend @1 :T; 219 | } 220 | 221 | add @1 (id :UInt64, backend :T); 222 | # Add a new back-end. 223 | 224 | remove @2 (id :UInt64); 225 | # Remove an existing back-end. The ID will NOT be reused for this set (unless reset() is first 226 | # called). 227 | # 228 | # Note that we cannot identify the backend as a capability here because it may be down, in which 229 | # case the receiver could never possibly figure out which existing backend in the set that it 230 | # matched. 231 | } 232 | 233 | # ======================================================================================== 234 | # Transport Protocol 235 | # 236 | # We assume an underlying sequential datagram transport supporting: 237 | # - Reliable and ordered delivery. 238 | # - Arbitrary-size datagrams. 239 | # - Congestion cotrol. 240 | # - Peer identified by VatId (not by sending IP/port). 241 | # - At the admin's option, encryption for privacy and integrity. (This is optional because many 242 | # Blackrock clusters may be on physically secure networks where encryption is not needed.) 243 | # 244 | # The simplest implementation of this protocol -- called "the simple protocol" -- is based on 245 | # unencrypted TCP, where we assume that the network infrastructure is secure enough to ensure 246 | # integrity and privacy when delivering packets. The protocol still uses crypto to authenticate 247 | # the connection upfront. 248 | # 249 | # TODO(security): The following protocol has not been reviewed by a crypto expert, and therefore 250 | # may be totally stupid. 251 | # 252 | # In the simple protocol, a connection is initiated by sending the following header: 253 | # - 32 bytes: The sender's X25519 public key. 254 | # - 8 bytes: Connection number (little-endian). Each time a new connection is initiated from the 255 | # sending vat to the same receiving vat, this number must increase. If the sender's public key 256 | # is less than the receiver's, this number must be even, otherwise it must be odd, so that 257 | # connection IDs in opposite directions between the same vats never collide. Any existing 258 | # connections with lower connection IDs must be invalidated when a new connection starts. 259 | # - 8 bytes: minIgnoredConnection, the minimum connection number which the sender guarantees that 260 | # it had not received at the time that it sent this message. The sender promises that it if 261 | # later receives an incoming connection with this number or greater, but less than the 262 | # connection number that the sender is initiating with this header, then it will reject any 263 | # such connection without reading any messages from it. This gives the receiver of this header 264 | # some assurance that if it had tried to form a connection previously and optimistically sent 265 | # messages on it, it is safe to send those messages again. 266 | # - 16 bytes: poly1305 MAC of the above *plus* the sender's IPv6 address (or IPv6-mapped IPv4 267 | # address) and port number (18 bytes). The key is constructed by taking the first 32 bytes of 268 | # the ChaCha20 stream generated using the two vats' shared secret as a key, and the connection 269 | # number as a nonce. The purpose of this MAC is to prevent an arbitrary node on the network 270 | # from impersonating an arbitrary vat by simply sending its public key, which would otherwise 271 | # be possible even assuming a secure physical network. 272 | # 273 | # Upon accepting a conneciton, the acceptor does the following: 274 | # - Wait for the header. 275 | # - Verify the header MAC (closing the connection immediately if invalid). 276 | # - If the connection number is less than that of any existing connection to the same remote vat -- 277 | # especially, one recently initiated in the opposite direction -- close it and do not continue. 278 | # - Send a reply header on the newly-accepted connection, which is similar to the received header 279 | # except that it bears the accepting vat's public key and the connection number (and MAC nonce) 280 | # is incremented by one. (Notice that this connection number could not possibly already have been 281 | # used because of the previous step.) 282 | # - If there is any other outstanding connection to the same remote vat (with a lower number), 283 | # close that other connection. If this vat had sent messages on said other connection but had not 284 | # yet received any data (including the header) from the peer, then re-send those messages on the 285 | # newly-accepted connection instead. 286 | # 287 | # Note that, for the initiator of the connection, between the time that the connection starts and 288 | # the time that the reply header is received, it is not yet known if the IP address connected to 289 | # really does correspond to the intended VatId. However, since the IP address was given to us by 290 | # the introducer, and the introducer could have introduced us to anybody, we can safely send 291 | # plaintext messages meant for the entity to whom we were introduced. The only problem is if 292 | # we receive another introduction for the same target VatId but a different IP/port pair in the 293 | # interim. In this case, we must wait until we've received the reply on our existing connection 294 | # authenticating it. If we receive no reply in a reasonable time, or we receive a bogus reply, 295 | # we must close the connection and create a new one with the new address. At this point we cannot 296 | # send *any* messages until the new connection comes back with a valid header, at which point we 297 | # can re-send the messages we had sent to the old connection. 298 | --------------------------------------------------------------------------------