├── hack
├── icons
├── shell
├── src
    ├── kj
    ├── blackrock
    │   ├── blackrock.link-static
    │   ├── storage-tool.link-static
    │   ├── blackrock.ekam-manifest
    │   ├── fs-storage-test.capnp
    │   ├── blank-ext4.capnp
    │   ├── blank-ext4.ekam-rule
    │   ├── mke2fs.conf
    │   ├── sparse-data.capnp
    │   ├── common.h
    │   ├── common.c++
    │   ├── bundle.h
    │   ├── master.capnp
    │   ├── gce.h
    │   ├── logs.h
    │   ├── frontend.capnp
    │   ├── storage-schema.capnp
    │   ├── local-persistent-registry.h
    │   ├── sparse-data.c++
    │   ├── master.h
    │   ├── fs-storage.capnp
    │   ├── frontend.h
    │   ├── logs-tester.c++
    │   ├── gateway.h
    │   ├── fs-storage.h
    │   ├── cluster-rpc.h
    │   ├── local-persistent-registry.c++
    │   ├── worker.capnp
    │   ├── backend-set.h
    │   ├── machine.capnp
    │   ├── nbd-test-loopback.c++
    │   ├── bundle.c++
    │   ├── backend-set.c++
    │   ├── nbd-bridge.h
    │   ├── worker.h
    │   ├── distributed-blocks.c++
    │   ├── storage-tool.c++
    │   ├── gateway.c++
    │   ├── gce.c++
    │   └── cluster-rpc.capnp
    ├── capnp
    ├── sodium
    ├── openssl
    ├── ekam-rules
    ├── joyent-http
    ├── libseccomp
    ├── node-capnp
    └── sandstorm
├── CHANGELOG.md
├── localedata-C
├── make-bundle.sh
├── meteor-bundle-main.js
├── find-meteor-dev-bundle.sh
├── .gitignore
├── test-config.capnp
├── check-e2fsprogs.sh
├── README.md
├── tests
    └── run-testrock.sh
├── Vagrantfile
├── Makefile
├── release.sh
└── LICENSE


/hack:
--------------------------------------------------------------------------------
1 | deps/sandstorm/hack


--------------------------------------------------------------------------------
/icons:
--------------------------------------------------------------------------------
1 | deps/sandstorm/icons


--------------------------------------------------------------------------------
/shell:
--------------------------------------------------------------------------------
1 | deps/sandstorm/shell


--------------------------------------------------------------------------------
/src/kj:
--------------------------------------------------------------------------------
1 | ../deps/sandstorm/src/kj


--------------------------------------------------------------------------------
/src/blackrock/blackrock.link-static:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/blackrock/storage-tool.link-static:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/capnp:
--------------------------------------------------------------------------------
1 | ../deps/sandstorm/src/capnp


--------------------------------------------------------------------------------
/src/sodium:
--------------------------------------------------------------------------------
1 | ../deps/sandstorm/src/sodium


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | deps/sandstorm/CHANGELOG.md


--------------------------------------------------------------------------------
/localedata-C:
--------------------------------------------------------------------------------
1 | deps/sandstorm/localedata-C


--------------------------------------------------------------------------------
/make-bundle.sh:
--------------------------------------------------------------------------------
1 | deps/sandstorm/make-bundle.sh


--------------------------------------------------------------------------------
/src/openssl:
--------------------------------------------------------------------------------
1 | ../deps/sandstorm/src/openssl


--------------------------------------------------------------------------------
/src/ekam-rules:
--------------------------------------------------------------------------------
1 | ../deps/sandstorm/src/ekam-rules


--------------------------------------------------------------------------------
/src/joyent-http:
--------------------------------------------------------------------------------
1 | ../deps/sandstorm/src/joyent-http


--------------------------------------------------------------------------------
/src/libseccomp:
--------------------------------------------------------------------------------
1 | ../deps/sandstorm/src/libseccomp


--------------------------------------------------------------------------------
/src/node-capnp:
--------------------------------------------------------------------------------
1 | ../deps/sandstorm/src/node-capnp


--------------------------------------------------------------------------------
/src/sandstorm:
--------------------------------------------------------------------------------
1 | ../deps/sandstorm/src/sandstorm


--------------------------------------------------------------------------------
/meteor-bundle-main.js:
--------------------------------------------------------------------------------
1 | deps/sandstorm/meteor-bundle-main.js


--------------------------------------------------------------------------------
/src/blackrock/blackrock.ekam-manifest:
--------------------------------------------------------------------------------
1 | blackrock bin
2 | 


--------------------------------------------------------------------------------
/find-meteor-dev-bundle.sh:
--------------------------------------------------------------------------------
1 | deps/sandstorm/find-meteor-dev-bundle.sh


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Copied from Sandstorm
 2 | bin
 3 | tmp
 4 | deps
 5 | node_modules
 6 | shell-build
 7 | sandstorm-*.tar.xz
 8 | bundle/
 9 | .docker
10 | .shell-env
11 | .vagrant/
12 | phantomjsdriver.log
13 | selenium-debug.log
14 | 
15 | # Added for Blackrock
16 | dbg
17 | config.capnp
18 | oasis-config
19 | testrock-config
20 | blackrock.tar.xz
21 | local-config
22 | .local
23 | 


--------------------------------------------------------------------------------
/test-config.capnp:
--------------------------------------------------------------------------------
 1 | @0xa9101b1fec595220;
 2 | 
 3 | using import "/blackrock/master.capnp".MasterConfig;
 4 | 
 5 | const vagrant :MasterConfig = (
 6 |   workerCount = 2,
 7 |   frontendCount = 2,
 8 |   frontendConfig = (
 9 |     baseUrl = "https://localrock.sandstorm.io",
10 |     wildcardHost = "*.localrock.sandstorm.io",
11 |     allowDemoAccounts = true,
12 |     isTesting = true,
13 | #    stripeKey = "sk_test_???",
14 | #    stripePublicKey = "pk_test_???",
15 |     outOfBeta = true,
16 |     allowUninvited = true,
17 |     replicasPerMachine = 2,
18 | #    mailchimpKey = "???",
19 | #    mailchimpListId = "???",
20 |     privateKeyPassword = "abcd1234",
21 |   ),
22 |   vagrant = ()
23 | );
24 | 
25 | 


--------------------------------------------------------------------------------
/src/blackrock/fs-storage-test.capnp:
--------------------------------------------------------------------------------
 1 | # Sandstorm Blackrock
 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | # All Rights Reserved
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | @0xed33f8595b36bba5;
18 | 
19 | $import "/capnp/c++.capnp".namespace("blackrock");
20 | using Storage = import "storage.capnp";
21 | 
22 | struct TestStoredObject {
23 |   text @0 :Text;
24 |   sub1 @1 :Storage.OwnedAssignable(TestStoredObject);
25 |   sub2 @2 :Storage.OwnedAssignable(TestStoredObject);
26 |   volume @3 :Storage.OwnedVolume;
27 | }
28 | 


--------------------------------------------------------------------------------
/src/blackrock/blank-ext4.capnp:
--------------------------------------------------------------------------------
 1 | # Sandstorm Blackrock
 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | # All Rights Reserved
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | @0x8eb19add5ef5349e;
18 | 
19 | $import "/capnp/c++.capnp".namespace("blackrock");
20 | 
21 | using import "sparse-data.capnp".SparseData;
22 | 
23 | const blankExt4 :SparseData = embed "blank-ext4.sparse";
24 | # blank-ext4.sparse is created by blank-ext4.ekam-rule which runs mkfs.ext4 to create a new
25 | # ext4 FS and then uses sparse-data.c++ to turn its contents into SparseData.
26 | 


--------------------------------------------------------------------------------
/src/blackrock/blank-ext4.ekam-rule:
--------------------------------------------------------------------------------
 1 | #! /bin/sh
 2 | 
 3 | set -eu
 4 | 
 5 | echo findProvider canonical:blackrock/sparse-data
 6 | read MKSPARSE
 7 | 
 8 | echo findProvider canonical:blackrock/mke2fs.conf
 9 | read MKE2FS_CONFIG
10 | export MKE2FS_CONFIG
11 | 
12 | echo newOutput blackrock/blank-ext4.sparse
13 | read OUTPUT
14 | 
15 | # Put temp file in /var/tmp because tmpfs doesn't handle sparse files well.
16 | TMPFILE=/var/tmp/blackrock-blank-ext4
17 | 
18 | # Start with an 8GB sparse file.
19 | # Experimentally, filesystem overhead (count of non-zero blocks after initialization) for
20 | # ext4 seems to be 12 blocks plus 1 for every 2GiB of space. At 8GiB we use 16 blocks, aka
21 | # 64KiB, which seems reasonable.
22 | rm -f $TMPFILE
23 | truncate -s 8589934592 $TMPFILE
24 | 
25 | # Format as ext4.
26 | # TODO(someday): Figure out if we can make this deterministic. It seems there's more randomness
27 | #   than just the UUID.
28 | /sbin/mkfs.ext4 -q -b 4096 -U 00000000-0000-0000-0000-000000000000 -m 0 -O sparse_super2 \
29 |     -E num_backup_sb=0,resize=4294967295 -J size=16 $TMPFILE
30 | $MKSPARSE $TMPFILE > $OUTPUT
31 | rm $TMPFILE
32 | 


--------------------------------------------------------------------------------
/src/blackrock/mke2fs.conf:
--------------------------------------------------------------------------------
 1 | [defaults]
 2 | 	base_features = sparse_super,filetype,resize_inode,dir_index,ext_attr
 3 | 	default_mntopts = acl,user_xattr
 4 | 	enable_periodic_fsck = 0
 5 | 	blocksize = 4096
 6 | 	inode_size = 256
 7 | 	inode_ratio = 16384
 8 | 
 9 | [fs_types]
10 | 	ext3 = {
11 | 		features = has_journal
12 | 	}
13 | 	ext4 = {
14 | 		features = has_journal,extent,huge_file,flex_bg,uninit_bg,dir_nlink,extra_isize
15 | 		auto_64-bit_support = 1
16 | 		inode_size = 256
17 | 	}
18 | 	ext4dev = {
19 | 		features = has_journal,extent,huge_file,flex_bg,uninit_bg,dir_nlink,extra_isize
20 | 		inode_size = 256
21 | 		options = test_fs=1
22 | 	}
23 | 	small = {
24 | 		blocksize = 1024
25 | 		inode_size = 128
26 | 		inode_ratio = 4096
27 | 	}
28 | 	floppy = {
29 | 		blocksize = 1024
30 | 		inode_size = 128
31 | 		inode_ratio = 8192
32 | 	}
33 | 	big = {
34 | 		inode_ratio = 32768
35 | 	}
36 | 	huge = {
37 | 		inode_ratio = 65536
38 | 	}
39 | 	news = {
40 | 		inode_ratio = 4096
41 | 	}
42 | 	largefile = {
43 | 		inode_ratio = 1048576
44 | 		blocksize = -1
45 | 	}
46 | 	largefile4 = {
47 | 		inode_ratio = 4194304
48 | 		blocksize = -1
49 | 	}
50 | 	hurd = {
51 | 	     blocksize = 4096
52 | 	     inode_size = 128
53 | 	}
54 | 


--------------------------------------------------------------------------------
/check-e2fsprogs.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | # Verify that we've correctly built e2fsprogs and then copy them into the bundle.
 4 | 
 5 | set -euo pipefail
 6 | 
 7 | fail() {
 8 |   echo "check-e2fsprogs: $@" >&2
 9 |   exit 1
10 | }
11 | 
12 | PROGS="tmp/e2fsprogs/e2fsck/e2fsck tmp/e2fsprogs/misc/tune2fs tmp/e2fsprogs/resize/resize2fs"
13 | 
14 | for PROG in $PROGS; do
15 |   if [ ! -e $PROG ]; then
16 |     fail "$PROG does not exist"
17 |   elif [ "$(ldd $PROG 2>&1 | tr -d '\t')" != "not a dynamic executable" ]; then
18 |     fail "$PROG is not statically-linked"
19 |   fi
20 | done
21 | 
22 | rm -f /var/tmp/test-ext4fs-uuid-bug
23 | truncate -s 8G /var/tmp/test-ext4fs-uuid-bug >/dev/null 2>&1
24 | tmp/e2fsprogs/misc/mke2fs -t ext4 -U 00000000-0000-0000-0000-000000000000 /var/tmp/test-ext4fs-uuid-bug >/dev/null 2>&1 || fail "mke2fs failed"
25 | tmp/e2fsprogs/e2fsck/e2fsck -p /var/tmp/test-ext4fs-uuid-bug >tmp/check-e2fsck.out 2>&1 || fail "e2fsck failed"
26 | if grep -q UUID tmp/check-e2fsck.out; then
27 |   fail "e2fsck not compiled to ignore null UUID"
28 | fi
29 | tmp/e2fsprogs/e2fsck/e2fsck -p /var/tmp/test-ext4fs-uuid-bug >/dev/null 2>&1 || fail "e2fsck repeat failed"
30 | rm /var/tmp/test-ext4fs-uuid-bug
31 | 
32 | cp $PROGS bin
33 | 


--------------------------------------------------------------------------------
/src/blackrock/sparse-data.capnp:
--------------------------------------------------------------------------------
 1 | # Sandstorm Blackrock
 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | # All Rights Reserved
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | @0xb4ec463ef590911d;
18 | 
19 | $import "/capnp/c++.capnp".namespace("blackrock");
20 | 
21 | struct SparseData {
22 |   # Represents a chunk of "sparse" data, i.e. bytes with a lot of long runs of zeros. We only
23 |   # include the non-zero bytes.
24 |   #
25 |   # This is used in particular to store a blank ext4 filesystem template directly into the
26 |   # Blackrock binary so that we can quickly format new volumes.
27 | 
28 |   chunks @0 :List(Chunk);
29 |   struct Chunk {
30 |     offset @0 :UInt64;
31 |     data @1 :Data;
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/blackrock/common.h:
--------------------------------------------------------------------------------
 1 | // Sandstorm Blackrock
 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | // All Rights Reserved
 4 | //
 5 | // Licensed under the Apache License, Version 2.0 (the "License");
 6 | // you may not use this file except in compliance with the License.
 7 | // You may obtain a copy of the License at
 8 | //
 9 | //   http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing, software
12 | // distributed under the License is distributed on an "AS IS" BASIS,
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // See the License for the specific language governing permissions and
15 | // limitations under the License.
16 | 
17 | #ifndef BLACKROCK_COMMON_H_
18 | #define BLACKROCK_COMMON_H_
19 | 
20 | #include <kj/common.h>
21 | #include <kj/io.h>
22 | #include <inttypes.h>
23 | 
24 | namespace blackrock {
25 | 
26 | #define KJ_MVCAP(var) var = ::kj::mv(var)
27 | // Capture the given variable by move.  Place this in a lambda capture list.  Requires C++14.
28 | //
29 | // TODO(cleanup):  Move to libkj.
30 | 
31 | using kj::uint;
32 | using kj::byte;
33 | 
34 | kj::AutoCloseFd newEventFd(uint value, int flags);
35 | uint64_t readEvent(int fd);
36 | void writeEvent(int fd, uint64_t value);
37 | // TODO(cleanup): Find a better home for these.
38 | 
39 | }  // namespace blackrock
40 | 
41 | #endif // BLACKROCK_COMMON_H_
42 | 


--------------------------------------------------------------------------------
/src/blackrock/common.c++:
--------------------------------------------------------------------------------
 1 | // Sandstorm Blackrock
 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | // All Rights Reserved
 4 | //
 5 | // Licensed under the Apache License, Version 2.0 (the "License");
 6 | // you may not use this file except in compliance with the License.
 7 | // You may obtain a copy of the License at
 8 | //
 9 | //   http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing, software
12 | // distributed under the License is distributed on an "AS IS" BASIS,
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // See the License for the specific language governing permissions and
15 | // limitations under the License.
16 | 
17 | #include "common.h"
18 | #include <sandstorm/util.h>
19 | #include <unistd.h>
20 | #include <sys/eventfd.h>
21 | 
22 | namespace blackrock {
23 | 
24 | kj::AutoCloseFd newEventFd(uint value, int flags) {
25 |   int fd;
26 |   KJ_SYSCALL(fd = eventfd(0, flags));
27 |   return kj::AutoCloseFd(fd);
28 | }
29 | 
30 | uint64_t readEvent(int fd) {
31 |   ssize_t n;
32 |   uint64_t result;
33 |   KJ_SYSCALL(n = read(fd, &result, sizeof(result)));
34 |   KJ_ASSERT(n == 8, "wrong-sized read from eventfd", n);
35 |   return result;
36 | }
37 | 
38 | void writeEvent(int fd, uint64_t value) {
39 |   ssize_t n;
40 |   KJ_SYSCALL(n = write(fd, &value, sizeof(value)));
41 |   KJ_ASSERT(n == 8, "wrong-sized write on eventfd", n);
42 | }
43 | 
44 | }  // namespace blackrock
45 | 


--------------------------------------------------------------------------------
/src/blackrock/bundle.h:
--------------------------------------------------------------------------------
 1 | // Sandstorm Blackrock
 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | // All Rights Reserved
 4 | //
 5 | // Licensed under the Apache License, Version 2.0 (the "License");
 6 | // you may not use this file except in compliance with the License.
 7 | // You may obtain a copy of the License at
 8 | //
 9 | //   http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing, software
12 | // distributed under the License is distributed on an "AS IS" BASIS,
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // See the License for the specific language governing permissions and
15 | // limitations under the License.
16 | 
17 | #ifndef BLACKROCK_BUNDLE_H_
18 | #define BLACKROCK_BUNDLE_H_
19 | 
20 | #include "common.h"
21 | #include <sandstorm/package.capnp.h>
22 | 
23 | namespace blackrock {
24 | 
25 | void createSandstormDirectories();
26 | // Call before enterSandstormBundle() (before forking) to ensure directory tree is initialized.
27 | 
28 | void enterSandstormBundle();
29 | // Call to cause the current process (typically newly-forked) to enter the Sandstorm bundle.
30 | // Its directory tree will then appear to be Sandstorm's.
31 | 
32 | kj::Maybe<kj::String> checkPgpSignatureInBundle(
33 |     kj::StringPtr appIdString, sandstorm::spk::Metadata::Reader metadata);
34 | // Runs sandstorm::ctheckPgpSignature() inside the Sandstorm bundle, since it invokes gpg.
35 | 
36 | } // namespace blackrock
37 | 
38 | #endif // BLACKROCK_BUNDLE_H_
39 | 


--------------------------------------------------------------------------------
/src/blackrock/master.capnp:
--------------------------------------------------------------------------------
 1 | # Sandstorm Blackrock
 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | # All Rights Reserved
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | @0xf58bc2dacec400ce;
18 | 
19 | $import "/capnp/c++.capnp".namespace("blackrock");
20 | 
21 | struct MasterConfig {
22 |   workerCount @0 :UInt32;
23 |   frontendCount @4 :UInt32 = 1;
24 | 
25 |   # For now, we expect exactly one of each of the other machine types.
26 | 
27 |   frontendConfig @1 :import "frontend.capnp".FrontendConfig;
28 | 
29 |   union {
30 |     vagrant @2 :VagrantConfig;
31 |     gce @3 :GceConfig;
32 |   }
33 | }
34 | 
35 | struct VagrantConfig {}
36 | 
37 | struct GceConfig {
38 |   project @0 :Text;
39 |   zone @1 :Text;
40 |   gatewayAddresses @8 :List(Text);
41 |   instanceTypes :group {
42 |     storage @2 :Text = "n1-standard-1";
43 |     worker @3 :Text = "n1-highmem-2";
44 |     coordinator @4 :Text = "n1-standard-1";
45 |     frontend @5 :Text = "n1-highcpu-2";
46 |     mongo @6 :Text = "n1-standard-1";
47 |     gateway @7 :Text = "n1-standard-1";
48 |   }
49 | }
50 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Blackrock
 2 | 
 3 | Blackrock is an extension to [Sandstorm](https://github.com/sandstorm-io/sandstorm) that allows a single instance to scale across a cluster of machines. It is used to power [Sandstorm Oasis](https://oasis.sandstorm.io), the managed hosting service offered by [Sandstorm.io](https://sandstorm.io).
 4 | 
 5 | See the [Blackrock roadmap](https://github.com/sandstorm-io/sandstorm/tree/master/roadmap/blackrock) for a design overview.
 6 | 
 7 | ## Running locally
 8 | 
 9 | WARNING: This runs six VMs, and the number may increase in the future. It also allocates disk images totalling 20GB, although they are sparse images so won't actually use that much space on your drive unless you use them a lot.
10 | 
11 | First, some prep:
12 | 
13 | * This has only been tested on Debian Sid. On other distros, YMMV. (Patches welcome.)
14 | * You will need Vagrant installed.
15 | * If you want to build with modified version of Sandstorm, make sure that `deps/sandstorm` symlinks to your Sandstorm source tree. Hint: You can symlink `deps` to `..` if Blockrock is checked out next to Sandstorm.
16 | * You may want to edit `test-config.capnp` to add your Stripe test key and Mailchip key, if you want to test those features. Otherwise, leave them commented out.
17 | 
18 | To run locally:
19 | 
20 |     make run-local
21 | 
22 | This will take a very long time the first time it runs, but once all the VMs are up you'll be able to ctrl+C and re-run quickly.
23 | 
24 | Your instance will be accessible at: http://localrock.sandstorm.io:6080/
25 | 
26 | To create an admin token:
27 | 
28 |     make local-admintoken
29 | 
30 | Then go to: http://localrock.sandstorm.io:6080/setup/token/testtoken
31 | 
32 | To get a Mongo shell:
33 | 
34 |     make local-mongo
35 | 
36 | To shut down:
37 | 
38 |     make kill-local
39 | 
40 | ## Deploying
41 | 
42 | Please talk to us on [sandstorm-dev](https://groups.google.com/group/sandstorm-dev).
43 | 
44 | If you are deploying on Google Compute Engine, this may be relatively easy. On any other infrastructure, a new `ComputeDriver` will be needed. See `src/blackrock/gce.{h,c++}` to see how this is implemented for GCE. Perhaps you'd like to contribute an implementation for another service?
45 | 


--------------------------------------------------------------------------------
/tests/run-testrock.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # Sandstorm - Personal Cloud Sandbox
 4 | # Copyright (c) 2014 Sandstorm Development Group, Inc. and contributors
 5 | # All rights reserved.
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #   http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | set -euo pipefail
20 | 
21 | XVFB_PID=""
22 | RUN_SELENIUM="true"
23 | 
24 | cleanExit () {
25 |   rc=$1
26 | 
27 |   if [ -n "$XVFB_PID" ] ; then
28 |     # Send SIGINT to the selenium-server child of the backgrounded xvfb-run, so
29 |     # it will exit cleanly and the Xvfb process will also be cleaned up.
30 |     # We don't actually know that PID, so we find it with pgrep.
31 |     kill -SIGINT $(pgrep --parent $XVFB_PID node)
32 |     wait $XVFB_PID
33 |   fi
34 |   exit $rc
35 | }
36 | 
37 | checkInstalled() {
38 |   if ! $(which $1 >/dev/null 2>/dev/null) ; then
39 |     echo "Couldn't find executable '$1' - try installing the $2 package?"
40 |     exit 1
41 |   fi
42 | }
43 | 
44 | 
45 | THIS_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
46 | 
47 | # Parse arguments.
48 | while [ $# -gt 0 ] ; do
49 |   case $1 in
50 |     --no-selenium)
51 |       RUN_SELENIUM="false"
52 |       ;;
53 |     *)
54 |       ;;
55 |   esac
56 |   shift
57 | done
58 | 
59 | cd "$THIS_DIR"/../deps/sandstorm/tests
60 | 
61 | checkInstalled npm npm
62 | checkInstalled firefox firefox
63 | 
64 | npm install
65 | 
66 | if [ "$RUN_SELENIUM" != "false" ] ; then
67 |   checkInstalled java default-jre-headless
68 |   checkInstalled xvfb-run Xvfb
69 |   checkInstalled pgrep procps
70 |   xvfb-run ./node_modules/selenium-standalone/bin/selenium-standalone start &
71 |   XVFB_PID=$!
72 | fi
73 | 
74 | export LAUNCH_URL="https://testrock.sandstorm.io"
75 | export DISABLE_DEMO=true
76 | export SKIP_UNITTESTS=true
77 | set +e
78 | 
79 | npm test
80 | 
81 | cleanExit $?
82 | 


--------------------------------------------------------------------------------
/src/blackrock/gce.h:
--------------------------------------------------------------------------------
 1 | // Sandstorm Blackrock
 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | // All Rights Reserved
 4 | //
 5 | // Licensed under the Apache License, Version 2.0 (the "License");
 6 | // you may not use this file except in compliance with the License.
 7 | // You may obtain a copy of the License at
 8 | //
 9 | //   http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing, software
12 | // distributed under the License is distributed on an "AS IS" BASIS,
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // See the License for the specific language governing permissions and
15 | // limitations under the License.
16 | 
17 | #ifndef BLACKROCK_GCE_H_
18 | #define BLACKROCK_GCE_H_
19 | 
20 | #include "master.h"
21 | #include <blackrock/master.capnp.h>
22 | #include <unistd.h>
23 | 
24 | namespace blackrock {
25 | 
26 | class GceDriver: public ComputeDriver {
27 | public:
28 |   GceDriver(sandstorm::SubprocessSet& subprocessSet, kj::LowLevelAsyncIoProvider& ioProvider,
29 |             GceConfig::Reader config);
30 |   ~GceDriver() noexcept(false);
31 | 
32 |   SimpleAddress getMasterBindAddress() override;
33 |   kj::Promise<kj::Array<MachineId>> listMachines() override;
34 |   kj::Promise<void> boot(MachineId id) override;
35 |   kj::Promise<VatPath::Reader> run(MachineId id, VatId::Reader masterVatId,
36 |                                    bool requireRestartProcess) override;
37 |   kj::Promise<void> stop(MachineId id) override;
38 | 
39 | private:
40 |   sandstorm::SubprocessSet& subprocessSet;
41 |   kj::LowLevelAsyncIoProvider& ioProvider;
42 |   GceConfig::Reader config;
43 |   kj::String image;
44 |   std::map<ComputeDriver::MachineId, kj::Own<capnp::MessageReader>> vatPaths;
45 |   SimpleAddress masterBindAddress;
46 | 
47 |   LogSink logSink;
48 |   kj::Promise<void> logTask;
49 |   SimpleAddress logSinkAddress;
50 | 
51 |   kj::Promise<void> gceCommand(kj::ArrayPtr<const kj::StringPtr> args,
52 |                                int stdin = STDIN_FILENO, int stdout = STDOUT_FILENO);
53 |   kj::Promise<void> gceCommand(std::initializer_list<const kj::StringPtr> args,
54 |                                int stdin = STDIN_FILENO, int stdout = STDOUT_FILENO) {
55 |     return gceCommand(kj::arrayPtr(args.begin(), args.size()), stdin, stdout);
56 |   }
57 | };
58 | 
59 | } // namespace blackrock
60 | 
61 | #endif // BLACKROCK_GCE_H_
62 | 


--------------------------------------------------------------------------------
/src/blackrock/logs.h:
--------------------------------------------------------------------------------
 1 | // Sandstorm Blackrock
 2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | // All Rights Reserved
 4 | //
 5 | // Licensed under the Apache License, Version 2.0 (the "License");
 6 | // you may not use this file except in compliance with the License.
 7 | // You may obtain a copy of the License at
 8 | //
 9 | //   http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing, software
12 | // distributed under the License is distributed on an "AS IS" BASIS,
13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | // See the License for the specific language governing permissions and
15 | // limitations under the License.
16 | 
17 | #ifndef BLACKROCK_LOGS_H_
18 | #define BLACKROCK_LOGS_H_
19 | 
20 | #include "common.h"
21 | #include <kj/async-io.h>
22 | #include <set>
23 | 
24 | namespace sandstorm {
25 |   class Subprocess;
26 | }
27 | 
28 | namespace blackrock {
29 | 
30 | class SimpleAddress;
31 | 
32 | class LogSink: private kj::TaskSet::ErrorHandler {
33 | public:
34 |   LogSink();
35 | 
36 |   kj::Promise<void> acceptLoop(kj::Own<kj::ConnectionReceiver> receiver);
37 | 
38 | private:
39 |   class ClientHandler;
40 | 
41 |   std::set<kj::String> namesSeen;
42 | 
43 |   kj::TaskSet tasks;
44 | 
45 |   void write(kj::ArrayPtr<const char> part1, kj::ArrayPtr<const char> part2 = nullptr);
46 |   // Write a line to the log file, prefixed by a timestamp.
47 | 
48 |   void taskFailed(kj::Exception&& exception) override;
49 | };
50 | 
51 | void rotateLogs(int input, int logDirFd);
52 | // Read logs on `input` and write them to files in `logDirFd`, rotated to avoid any file becoming
53 | // overly large.
54 | 
55 | void runLogClient(kj::StringPtr name, kj::StringPtr logAddressFile, kj::StringPtr backlogDir);
56 | // Reads logs from standard input and upload them to the log sink server, reconnecting to the
57 | // server as needed, buffering logs to a local file when the log server is unreachable. Note that
58 | // some logs may be lost around the moment of a disconnect; this is not intended to be 100%
59 | // reliable, only as reliable as is reasonable.
60 | //
61 | // `logAddressFile` is the name of a file on the hard drive which contains the address (in
62 | // SimpleAddress format). The file is re-read every time a reconnect is attempted. This allows an
63 | // external entity to update the log server address without restarting the process.
64 | 
65 | } // namespace blackrock
66 | 
67 | #endif // BLACKROCK_LOGS_H_
68 | 


--------------------------------------------------------------------------------
/src/blackrock/frontend.capnp:
--------------------------------------------------------------------------------
 1 | # Sandstorm Blackrock
 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | # All Rights Reserved
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | @0xfb7fa19ecd585d19;
18 | 
19 | $import "/capnp/c++.capnp".namespace("blackrock");
20 | 
21 | using ClusterRpc = import "cluster-rpc.capnp";
22 | using Util = import "/sandstorm/util.capnp";
23 | using Package = import "/sandstorm/package.capnp";
24 | using Supervisor = import "/sandstorm/supervisor.capnp".Supervisor;
25 | using GatewayRouter = import "/sandstorm/backend.capnp".GatewayRouter;
26 | 
27 | interface Frontend {
28 |   # Front-ends run the Sandstorm shell UI (a Meteor app). They accept HTTP connections proxied
29 |   # from the Gateways.
30 | 
31 |   struct Instance {
32 |     replicaNumber @0 :UInt32;
33 |     httpAddress @1 :ClusterRpc.Address;
34 |     smtpAddress @2 :ClusterRpc.Address;
35 |     router @3 :GatewayRouter;
36 |   }
37 | 
38 |   getInstances @0 () -> (instances :List(Instance));
39 |   # A front-end machine may run multiple instances of the Sandstorm Shell server. This method gets
40 |   # a list of instances, so that the gateway can consitently route requests from a particular user
41 |   # to a particular instance.
42 | }
43 | 
44 | interface Mongo {
45 |   getConnectionInfo @0 () -> (address :ClusterRpc.Address, username :Text, password :Text);
46 | 
47 |   # TODO(someday): Support replicas.
48 | }
49 | 
50 | struct FrontendConfig {
51 |   # Config for shells -- and for gateways, for historical reasons.
52 | 
53 |   baseUrl @0 :Text;
54 |   # Equivalent to BASE_URL from sandstorm.conf.
55 | 
56 |   wildcardHost @1 :Text;
57 |   # Equivalent to WILDCARD_HOST from sandstorm.conf.
58 | 
59 |   ddpUrl @2 :Text;
60 |   # Equivalent to DDP_DEFAULT_CONNECTION_URL from sandstorm.conf.
61 | 
62 |   mailUrl @3 :Text;
63 |   # Equivalent to MAIL_URL from sandstorm.conf.
64 | 
65 |   allowDemoAccounts @4 :Bool;
66 |   # Equivalent to ALLOW_DEMO_ACCOUNTS from sandstorm.conf.
67 | 
68 |   isTesting @5 :Bool;
69 |   # Equivalent to IS_TESTING from sandstorm.conf.
70 | 
71 |   isQuotaEnabled @13 :Bool = true;
72 | 
73 |   stripeKey @6 :Text;
74 |   stripePublicKey @7 :Text;
75 |   outOfBeta @12 :Bool;
76 | 
77 |   mailchimpKey @10 :Text;
78 |   mailchimpListId @11 :Text;
79 | 
80 |   allowUninvited @8 :Bool;
81 | 
82 |   replicasPerMachine @9 :UInt32;
83 | 
84 |   privateKeyPassword @14 :Text;
85 |   termsPublicId @15 :Text;
86 | }
87 | 


--------------------------------------------------------------------------------
/src/blackrock/storage-schema.capnp:
--------------------------------------------------------------------------------
 1 | # Sandstorm Blackrock
 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | # All Rights Reserved
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | @0xf1a0240d9d1e831b;
18 | # Main storage schemas for Blackrock.
19 | 
20 | $import "/capnp/c++.capnp".namespace("blackrock");
21 | 
22 | using Storage = import "storage.capnp";
23 | using OwnedAssignable = Storage.OwnedAssignable;
24 | using OwnedVolume = Storage.OwnedVolume;
25 | using Supervisor = import "/sandstorm/supervisor.capnp".Supervisor;
26 | using Package = import "/sandstorm/package.capnp";
27 | 
28 | struct AccountStorage {
29 |   # TODO(someday):
30 |   # - Basic metadata.
31 |   # - Quota etc.
32 |   # - Opaque collection of received capabilities.
33 | 
34 |   grains @0 :List(GrainInfo);
35 |   # All grains owned by the user.
36 |   #
37 |   # TODO(perf): Use a Collection here, when they are implemented.
38 | 
39 |   struct GrainInfo {
40 |     id @0 :Text;
41 |     state @1 :OwnedAssignable(GrainState);
42 |   }
43 | }
44 | 
45 | struct GatewayStorage {
46 |   # TODO(someday):
47 |   # - Incoming and outgoing SturdyRefs.
48 | }
49 | 
50 | struct PackageStorage {
51 |   volume @0 :OwnedVolume;
52 |   appId @1 :Text;
53 |   manifest @2 :Package.Manifest;
54 |   authorPgpKeyFingerprint @3 :Text;
55 | }
56 | 
57 | struct GrainState {
58 |   union {
59 |     inactive @0 :Void;
60 |     # No worker is currently assigned to this grain.
61 | 
62 |     active @1 :Supervisor;
63 |     # This grain is currently running on a worker machine.
64 |     #
65 |     # Upon loading the `GrainState` from storage and finding `active` is set, the first thing you
66 |     # should do is call `keepAlive()` on this capability. If that fails or times out, then it
67 |     # would appear that the grain is no longer running. Now we get into a complicated situation
68 |     # where it's necessary to either convince the worker holding the grain to give it up or revoke
69 |     # that worker's access to the grain state and volume entirely, but hopefully this is
70 |     # infrequent.
71 |   }
72 | 
73 |   volume @2 :OwnedVolume;
74 | 
75 |   savedCaps @3 :List(SavedCap);
76 |   # TODO(perf): Use a Collection here, when they are implemented.
77 | 
78 |   struct SavedCap {
79 |     token @0 :Data;
80 |     # Token given to the app. (We can't give the app the raw SturdyRef because it contains the
81 |     # encryption key which means the bits are powerful outside the context of the app.)
82 | 
83 |     cap @1 :Capability;
84 |   }
85 | }
86 | 


--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
 1 | # Sandstorm Blackrock
 2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
 3 | # All Rights Reserved
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Because Blackrock messes with kernel knobs that can break the system (like
18 | # nbd), it's preferable to run non-unit tests inside a VM with Vagrant.
19 | #
20 | # Cheat sheet:
21 | #    vagrant up        Initializes and starts a VM, with the source directory
22 | #                      mapped read-only at /blackrock.
23 | #    vagrant ssh       SSHes into the VM.
24 | #    vagrant destroy   Shuts down and deletes the VM. I recommend this over
25 | #                      `vagrant halt` to keep your dev environment clean.
26 | 
27 | VAGRANTFILE_API_VERSION = "2"
28 | 
29 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
30 |   config.vm.box = "debian/contrib-stretch64"
31 | 
32 |   # We build Blackrock outside of Vagrant, so there's no reason for the VM
33 |   # to be modifying the source directory. Mount it read-only.
34 |   config.vm.synced_folder ".", "/blackrock", type: "nfs", :mount_options => ["ro"]
35 | 
36 |   # The directory ".local" should contain two ext4 disk images: "storage"
37 |   # and "mongo". Server state will be stored in these so that
38 |   # "vagrant destroy"ing all VMs and bringing them back up doesn't mean
39 |   # wiping storage. To create this directory, do something like:
40 |   #
41 |   #     mkdir .local
42 |   #     truncate -s 10737418240 .local/storage
43 |   #     truncate -s 10737418240 .local/mongo
44 |   #     /sbin/mkfs.ext4 .local/storage
45 |   #     /sbin/mkfs.ext4 .local/mongo
46 |   config.vm.synced_folder ".local", "/blackrock-local", type: "nfs"
47 | 
48 |   # Don't check for image updates on every run; could be slow.
49 |   config.vm.box_check_update = false
50 | 
51 |   config.vm.provider "virtualbox" do |v|
52 |     v.memory = 1024
53 |     v.cpus = 1
54 |   end
55 | 
56 |   config.vm.define "storage0" do |storage0|
57 |     storage0.vm.network "private_network", ip: "172.28.128.10"
58 | 
59 |     storage0.vm.provision "shell",
60 |         inline: "mkdir -p /var/blackrock/storage && mount /blackrock-local/storage /var/blackrock/storage",
61 |         run: "always"
62 |   end
63 |   config.vm.define "worker0" do |worker0|
64 |     worker0.vm.network "private_network", ip: "172.28.128.20"
65 |   end
66 |   config.vm.define "worker1" do |worker1|
67 |     worker1.vm.network "private_network", ip: "172.28.128.21"
68 |   end
69 |   config.vm.define "worker2" do |worker2|
70 |     worker2.vm.network "private_network", ip: "172.28.128.22"
71 |   end
72 |   config.vm.define "worker3" do |worker3|
73 |     worker3.vm.network "private_network", ip: "172.28.128.23"
74 |   end
75 |   config.vm.define "coordinator0" do |coordinator0|
76 |     coordinator0.vm.network "private_network", ip: "172.28.128.30"
77 |   end
78 |   config.vm.define "frontend0" do |frontend0|
79 |     frontend0.vm.network "private_network", ip: "172.28.128.60"
80 |   end
81 |   config.vm.define "frontend1" do |frontend1|
82 |     frontend1.vm.network "private_network", ip: "172.28.128.61"
83 |   end
84 |   config.vm.define "mongo0" do |mongo0|
85 |     mongo0.vm.network "private_network", ip: "172.28.128.50"
86 | 
87 |     mongo0.vm.provision "shell",
88 |         inline: "mkdir -p /var/blackrock/bundle && mount /blackrock-local/mongo /var/blackrock/bundle",
89 |         run: "always"
90 |   end
91 |   config.vm.define "gateway0" do |gateway0|
92 |     gateway0.vm.network "private_network", ip: "172.28.128.40"
93 |   end
94 | end
95 | 


--------------------------------------------------------------------------------
/src/blackrock/local-persistent-registry.h:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #ifndef BLACKROCK_LOCAL_PERSISTENT_REGISTRY_H_
 18 | #define BLACKROCK_LOCAL_PERSISTENT_REGISTRY_H_
 19 | 
 20 | #include "common.h"
 21 | #include <blackrock/cluster-rpc.capnp.h>
 22 | #include <capnp/message.h>
 23 | #include <unordered_map>
 24 | #include <set>
 25 | 
 26 | namespace blackrock {
 27 | 
 28 | class LocalPersistentRegistry {
 29 |   // Class which manages the set of persistent capabilities hosted by this vat which, when saved,
 30 |   // will use the "Transient" SturdyRef type; i.e. these capabilities are specific to this vat and
 31 |   // won't continue to exist once the process exits.
 32 |   //
 33 |   // Typically a LocalPersistentRegistry& should be passed around to any component that needs to be
 34 |   // able to make its capabilities persistent. The LocalPersistentRegistry's scope should match the
 35 |   // RpcSystem.
 36 | 
 37 |   struct SavedRef;
 38 |   class PersistentImpl;
 39 |   class RestorerImpl;
 40 | 
 41 |   struct DataHash {
 42 |     inline size_t operator()(capnp::Data::Reader r) const {
 43 |       // The keys in the map are randomly-generated so the hash might as well be the prefix bytes.
 44 |       size_t result = 0;
 45 |       memcpy(&result, r.begin(), kj::min(r.size(), sizeof(result)));
 46 |       return result;
 47 |     }
 48 |   };
 49 | 
 50 | public:
 51 |   LocalPersistentRegistry(VatPath::Reader thisVatPath): thisVatPath(thisVatPath) {}
 52 | 
 53 |   class Registration {
 54 |   public:
 55 |     Registration(LocalPersistentRegistry& registry, capnp::Capability::Client cap);
 56 | 
 57 |     KJ_DISALLOW_COPY(Registration);
 58 |     ~Registration() noexcept(false);
 59 |     // Dropping the registration invalidates all saved SturdyRefs. Calls to save() will still
 60 |     // succeed but return tokens that don't work (as if save() had been called just before the
 61 |     // deregistration).
 62 | 
 63 |     Persistent::Client getWrapped();
 64 |     // Get a capability which forwards all calls to the original except for save() which is handled
 65 |     // by the LocalPersistentRegistry.
 66 | 
 67 |   private:
 68 |     LocalPersistentRegistry& registry;
 69 |     kj::Own<PersistentImpl> wrapped;
 70 |     std::set<SavedRef*> savedRefs;
 71 |     friend class LocalPersistentRegistry;
 72 |   };
 73 | 
 74 |   kj::Own<Registration> makePersistent(capnp::Capability::Client cap);
 75 |   // Wraps the capability in a wrapper that implements save() by returning a transient SturdyRef.
 76 | 
 77 |   Restorer<capnp::Data>::Client createRestorerFor(VatPath::Reader clientId);
 78 |   // Create a Restorer to be used by the given authenticated client.
 79 | 
 80 | private:
 81 |   VatPath::Reader thisVatPath;
 82 | 
 83 |   struct SavedRef {
 84 |     explicit SavedRef(Registration& registration);
 85 |     ~SavedRef() noexcept(false);
 86 | 
 87 |     Registration& registration;
 88 |     byte token[16];
 89 | 
 90 |     // TODO(security): Track the ref's owner. This is easy enough when the owner is another vat,
 91 |     //   but if it's e.g. the storage system then we don't really have a good way to authenticate
 92 |     //   that here.
 93 |   };
 94 | 
 95 |   std::unordered_map<capnp::Data::Reader, kj::Own<SavedRef>, DataHash> savedRefs;
 96 | };
 97 | 
 98 | } // namespace blackrock
 99 | 
100 | #endif // BLACKROCK_LOCAL_PERSISTENT_REGISTRY_H_
101 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # Sandstorm Blackrock
  2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | # All Rights Reserved
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | # Blackrock's Makefile augments Sandstorm's.
 18 | 
 19 | # You may override the following vars on the command line to suit
 20 | # your config.
 21 | CC=clang
 22 | CXX=clang++
 23 | CFLAGS=-O2 -g -Wall
 24 | CXXFLAGS=$(CFLAGS)
 25 | BUILD=0
 26 | PARALLEL=$(shell nproc)
 27 | 
 28 | .PHONY: all fast clean continuous deps update-deps
 29 | 
 30 | define color
 31 |   printf '\033[0;34m==== $1 ====\033[0m\n'
 32 | endef
 33 | 
 34 | all: blackrock.tar.xz
 35 | 
 36 | fast: blackrock-fast.tar.xz
 37 | 
 38 | clean:
 39 | 	rm -rf blackrock*.tar.xz local-config
 40 | 	make -f deps/sandstorm/Makefile clean
 41 | 
 42 | continuous: tmp/.deps
 43 | 	make -f deps/sandstorm/Makefile continuous
 44 | 
 45 | bundle: tmp/.deps
 46 | 	make -f deps/sandstorm/Makefile bundle
 47 | 
 48 | bin/e2fsck: tmp/e2fsprogs/e2fsck/e2fsck check-e2fsprogs.sh
 49 | 	./check-e2fsprogs.sh
 50 | 
 51 | shell-env: tmp/.deps
 52 | 	make -f deps/sandstorm/Makefile shell-env
 53 | 
 54 | deps: tmp/.deps
 55 | 
 56 | tmp/.deps: deps/sandstorm
 57 | 	cd deps/sandstorm && make deps
 58 | 	@mkdir -p tmp
 59 | 	@touch tmp/.deps
 60 | 
 61 | deps/sandstorm:
 62 | 	@$(call color,downloading sandstorm)
 63 | 	@mkdir -p deps
 64 | 	git clone https://github.com/sandstorm-io/sandstorm.git deps/sandstorm
 65 | 
 66 | deps/e2fsprogs:
 67 | 	@$(call color,downloading e2fsprogs)
 68 | 	@mkdir -p deps
 69 | 	git clone https://github.com/tytso/e2fsprogs.git deps/e2fsprogs
 70 | 
 71 | tmp/e2fsprogs/e2fsck/e2fsck: deps/e2fsprogs
 72 | 	@$(call color,build e2fsprogs)
 73 | 	@mkdir -p tmp/e2fsprogs
 74 | 	cd tmp/e2fsprogs && ../../deps/e2fsprogs/configure CFLAGS='-Os -DEXT2_SKIP_UUID' LDFLAGS='-static' && make -j$(PARALLEL)
 75 | 
 76 | update-deps:
 77 | 	@$(call color,updating sandstorm)
 78 | 	@cd deps/sandstorm && echo "pulling sandstorm..." && git pull && make update-deps
 79 | 
 80 | bin/blackrock.unstripped: bundle
 81 | 	@ # TODO(cleanup): This is ugly.
 82 | 	@$(call color,strip binaries)
 83 | 	@cp bin/blackrock bin/blackrock.unstripped
 84 | 	@strip bin/blackrock
 85 | 
 86 | blackrock.tar.xz: bundle bin/e2fsck bin/blackrock.unstripped
 87 | 	@$(call color,compress release bundle)
 88 | 	@tar c --transform="s,^,blackrock/,S" bin/blackrock bin/e2fsck bin/tune2fs bin/resize2fs bundle | xz -c -9e > blackrock.tar.xz
 89 | 
 90 | blackrock-fast.tar.xz: bundle bin/e2fsck bin/blackrock.unstripped
 91 | 	@$(call color,compress fast bundle)
 92 | 	@tar c --transform="s,^,blackrock/,S" bin/blackrock bin/e2fsck bin/tune2fs bin/resize2fs bundle | xz -c -0 > blackrock-fast.tar.xz
 93 | 
 94 | # ========================================================================================
 95 | # Local testing
 96 | 
 97 | .local/mongo:
 98 | 	@mkdir -p .local
 99 | 	truncate -s 10737418240 .local/mongo
100 | 	/sbin/mkfs.ext4 .local/mongo
101 | 
102 | .local/storage:
103 | 	@mkdir -p .local
104 | 	truncate -s 10737418240 .local/storage
105 | 	/sbin/mkfs.ext4 .local/storage
106 | 
107 | local-config: test-config.capnp
108 | 	capnp eval --binary -Isrc test-config.capnp vagrant > local-config
109 | 
110 | run-local: bundle bin/e2fsck local-config .local/mongo .local/storage
111 | 	# We need to bring up one VM in advance to make the vboxnet0 network interface appear.
112 | 	(vagrant status --machine-readable | grep -q 'storage0,state,running') || vagrant up storage0
113 | 	bin/blackrock master local-config -r
114 | 
115 | kill-local:
116 | 	vagrant destroy -f
117 | 
118 | local-mongo:
119 | 	mongo -u sandstorm --password="$$(vagrant ssh mongo0 -c 'cat /var/blackrock/bundle/mongo/passwd')" --authenticationDatabase admin 172.28.128.50/meteor
120 | 
121 | local-admintoken:
122 | 	vagrant ssh frontend0 -c 'echo -n testtoken > /var/blackrock/bundle/sandstorm/adminToken'
123 | 	@echo "Now go to: http://localrock.sandstorm.io:6080/setup/token/testtoken"
124 | 


--------------------------------------------------------------------------------
/release.sh:
--------------------------------------------------------------------------------
  1 | #! /bin/bash
  2 | 
  3 | set -euo pipefail
  4 | 
  5 | if [ $# -lt 1 ]; then
  6 |   echo "usage: $0 test|prod [-n|-m]" >&2
  7 |   exit 1
  8 | fi
  9 | 
 10 | case $1 in
 11 |   test )
 12 |     GCE_PROJECT=sandstorm-blackrock-testing
 13 |     export CLOUDSDK_COMPUTE_ZONE=us-central1-f
 14 |     BUILD=0
 15 |     BUILDSTAMP=$(date -u +%Y%m%d-%H%M%S)
 16 |     ;;
 17 |   prod )
 18 |     GCE_PROJECT=sandstorm-oasis
 19 |     export CLOUDSDK_COMPUTE_ZONE=us-central1-c
 20 | 
 21 |     # We always do a Blackrock prod release shortly after a Sandstorm release.
 22 |     BUILD=$(curl -s https://install.sandstorm.io/dev)
 23 |     BUILDSTAMP=$BUILD-$(date -u +%Y%m%d-%H%M%S)
 24 | 
 25 |     if (grep -r KJ_DBG src/* | egrep -v '/(debug(-test)?|exception)[.]'); then
 26 |       echo '*** Error:  There are instances of KJ_DBG in the code.' >&2
 27 |       exit 1
 28 |     fi
 29 | 
 30 |     if egrep -r 'TODO\(now\)' src/*; then
 31 |       echo '*** Error:  There are release-blocking TODOs in the code.' >&2
 32 |       exit 1
 33 |     fi
 34 | 
 35 |     if [ "x$(git status --porcelain)" != "x" ]; then
 36 |       echo "Please commit changes to git before releasing." >&2
 37 |       exit 1
 38 |     fi
 39 |     ;;
 40 |   * )
 41 |     echo "no such target: $1" >&2
 42 |     exit 1
 43 |     ;;
 44 | esac
 45 | 
 46 | shift
 47 | 
 48 | DRY_RUN=no
 49 | CONFIRM_EACH=no
 50 | HOTFIX=no
 51 | 
 52 | while [ $# -gt 0 ]; do
 53 |   case $1 in
 54 |     -n )
 55 |       DRY_RUN=yes
 56 |       ;;
 57 |     -m )
 58 |       CONFIRM_EACH=yes
 59 |       ;;
 60 |     -h )
 61 |       HOTFIX=yes
 62 |       ;;
 63 |     * )
 64 |       echo "unknown arg: $1" >&2
 65 |       exit 1
 66 |       ;;
 67 |   esac
 68 |   shift
 69 | done
 70 | 
 71 | gce() {
 72 |   gcloud --project=$GCE_PROJECT compute "$@"
 73 | }
 74 | 
 75 | doit() {
 76 |   local ANSWER
 77 |   if [ "$CONFIRM_EACH" != "no" ]; then
 78 |     printf "\033[0;33m=== RUN? %s ===\033[0m" "$*"
 79 |     read -sn 1 ANSWER
 80 |     if [ -z "$ANSWER" ]; then
 81 |       printf "\r\033[K"
 82 |     else
 83 |       printf "\033[0;31m\r=== SKIPPED: %s ===\033[0m\n" "$*"
 84 |       return
 85 |     fi
 86 |   fi
 87 | 
 88 |   printf "\033[0;35m=== %s ===\033[0m\n" "$*"
 89 | 
 90 |   if [ "$DRY_RUN" = "no" ]; then
 91 |     "$@"
 92 |   fi
 93 | }
 94 | 
 95 | doit make clean BUILD=$BUILD
 96 | doit make BUILD=$BUILD
 97 | 
 98 | if [ "$HOTFIX" = "yes" ]; then
 99 |   FRONTENDS=$(gce instances list --format=text | grep '^name:' | sed -e 's/^name: *//g' | grep '^frontend' | grep -v 'frontend0$')
100 |   FRONTENDS="$FRONTENDS frontend0"
101 |   GATEWAYS=$(gce instances list --format=text | grep '^name:' | sed -e 's/^name: *//g' | grep '^gateway')
102 |   MACHINES="$FRONTENDS $GATEWAYS"
103 | 
104 |   for MACHINE in $MACHINES; do
105 |     doit gce copy-files blackrock.tar.xz "root@$MACHINE:/root"
106 |   done
107 | 
108 |   for FRONTEND in $FRONTENDS; do
109 |     doit gce ssh "root@$FRONTEND" --command 'cd /root && rm -rf blackrock /blackrock/bundle.new && tar Jxof blackrock.tar.xz && mv blackrock/bundle /blackrock/bundle.new && cd /blackrock && mv bundle bundle.$(date -u +%Y%m%d-%H%M%S) && mv bundle.new bundle'
110 |   done
111 |   for GATEWAY in $GATEWAYS; do
112 |     doit gce ssh "root@$GATEWAY" --command 'cd /root && rm -rf blackrock /blackrock/bundle.new && tar Jxof blackrock.tar.xz && mv blackrock/bin/blackrock /blackrock/bin/blackrock.new && cd /blackrock/bin && mv blackrock blackrock.$(date -u +%Y%m%d-%H%M%S) && mv blackrock.new blackrock'
113 |   done
114 | 
115 |   for FRONTEND in $FRONTENDS; do
116 |     doit gce ssh "root@$FRONTEND" --command 'kill $(pidof node)'
117 |   done
118 |   for GATEWAY in $GATEWAYS; do
119 |     doit gce ssh "root@$GATEWAY" --command 'kill -9 $(ps ax | grep blackrock | grep slave | awk "{print \$1}")'
120 |   done
121 | 
122 |   exit 0
123 | fi
124 | 
125 | # Keep unstripped binary for debugging.
126 | mkdir -p dbg
127 | cp bin/blackrock.unstripped dbg/blackrock-$BUILDSTAMP
128 | 
129 | # Create a new image.
130 | doit gce instances create build --image blackrock-240-20181020-200157
131 | doit sleep 10 # make sure instance is up
132 | doit gce ssh build --command 'sudo sed -i -e "s/PermitRootLogin no/PermitRootLogin without-password/g" /etc/ssh/sshd_config; sudo service ssh restart'
133 | doit gce copy-files blackrock.tar.xz root@build:/
134 | doit gce ssh root@build --command "cd / && rm -rf /blackrock && tar Jxof blackrock.tar.xz && rm /blackrock.tar.xz"
135 | doit gce instances delete build -q --keep-disks boot
136 | doit gce images create blackrock-$BUILDSTAMP --source-disk build
137 | doit gce disks delete -q build
138 | 
139 | # Also upload to master.
140 | doit gce copy-files bin/blackrock root@master:/blackrock/bin/blackrock-$BUILDSTAMP
141 | 


--------------------------------------------------------------------------------
/src/blackrock/sparse-data.c++:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #include <blackrock/sparse-data.capnp.h>
 18 | #include <kj/main.h>
 19 | #include <kj/encoding.h>
 20 | #include <sandstorm/util.h>
 21 | #include <capnp/message.h>
 22 | #include <capnp/serialize.h>
 23 | #include <unistd.h>
 24 | #include <errno.h>
 25 | 
 26 | namespace blackrock {
 27 | 
 28 | class SparseDataMain {
 29 |   // Main class for a simple program that produces a SparseData from an input sparse file.
 30 |   // The output is written as a single-segment message (no leading segment table).
 31 | 
 32 | public:
 33 |   SparseDataMain(kj::ProcessContext& context): context(context) {}
 34 | 
 35 |   kj::MainFunc getMain() {
 36 |     return kj::MainBuilder(context, "unknown version",
 37 |                            "Given a sparse file, output (on stdout) a blackrock::SparseData "
 38 |                            "Cap'n Proto representation of the file content.")
 39 |         .expectArg("<file>", KJ_BIND_METHOD(*this, run))
 40 |         .build();
 41 |   }
 42 | 
 43 |   kj::MainBuilder::Validity run(kj::StringPtr arg) {
 44 |     auto fd = sandstorm::raiiOpen(arg, O_RDONLY | O_CLOEXEC);
 45 | 
 46 |     capnp::MallocMessageBuilder message(1 << 17);  // start with 1MB
 47 |     auto root = message.getRoot<SparseData>();
 48 |     auto orphanage = message.getOrphanage();
 49 | 
 50 |     kj::Vector<Chunk> chunks;
 51 |     Chunk chunk;
 52 |     chunk.offset = 0;
 53 | 
 54 |     kj::byte block[4096];
 55 | 
 56 |     off_t offset = 0;
 57 |     for (;;) {
 58 |     retry:
 59 |       offset = lseek(fd, offset, SEEK_DATA);
 60 |       if (offset < 0) {
 61 |         int error = errno;
 62 |         if (error == EINTR) {
 63 |           goto retry;
 64 |         } else if (error == ENXIO) {
 65 |           // reached EOF
 66 |           break;
 67 |         } else {
 68 |           KJ_FAIL_SYSCALL("lseek", error);
 69 |         }
 70 |       }
 71 | 
 72 |       KJ_ASSERT(offset % sizeof(block) == 0);
 73 | 
 74 |       size_t n = kj::FdInputStream(fd.get()).tryRead(block, sizeof(block), sizeof(block));
 75 |       KJ_ASSERT(n > 0);
 76 | 
 77 |       KJ_LOG(INFO, kj::hex((uint64_t)offset / sizeof(block)), kj::encodeHex(block));
 78 | 
 79 |       for (kj::byte b: block) {
 80 |         if (b != 0) {
 81 |           // This block has non-zero bytes. We need to add it to the results. Note that we write
 82 |           // a whole block even if it contains runs of zeros because block-aligned writes probably
 83 |           // will make our main use case (initializing ext4 block devices) more efficient.
 84 |           if (chunk.data == nullptr) {
 85 |           newChunk:
 86 |             chunk.offset = offset;
 87 |             chunk.data = orphanage.newOrphanCopy(capnp::Data::Reader(block, n));
 88 |           } else {
 89 |             size_t chunkSize = chunk.data.getReader().size();
 90 |             if (chunk.offset + chunkSize == offset) {
 91 |               // Extend the chunk.
 92 |               chunk.data.truncate(chunkSize + n);
 93 |               memcpy(chunk.data.get().begin() + chunkSize, block, n);
 94 |             } else {
 95 |               // Start new chunk.
 96 |               chunks.add(kj::mv(chunk));
 97 |               goto newChunk;
 98 |             }
 99 |           }
100 |           break;
101 |         }
102 |       }
103 | 
104 |       offset += n;
105 |     }
106 | 
107 |     if (chunk.data != nullptr) {
108 |       chunks.add(kj::mv(chunk));
109 |     }
110 | 
111 |     auto list = root.initChunks(chunks.size());
112 |     for (auto i: kj::indices(chunks)) {
113 |       auto chunkBuilder = list[i];
114 |       chunkBuilder.setOffset(chunks[i].offset);
115 |       chunkBuilder.adoptData(kj::mv(chunks[i].data));
116 |     }
117 | 
118 |     capnp::writeMessageToFd(STDOUT_FILENO, message);
119 | 
120 |     return true;
121 |   }
122 | 
123 | private:
124 |   kj::ProcessContext& context;
125 | 
126 |   struct Chunk {
127 |     uint64_t offset;
128 |     capnp::Orphan<capnp::Data> data;
129 |   };
130 | };
131 | 
132 | }  // namespace blackrock
133 | 
134 | KJ_MAIN(blackrock::SparseDataMain)
135 | 


--------------------------------------------------------------------------------
/src/blackrock/master.h:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #ifndef BLACKROCK_MASTER_H_
 18 | #define BLACKROCK_MASTER_H_
 19 | 
 20 | #include "common.h"
 21 | #include "cluster-rpc.h"
 22 | #include <kj/async-io.h>
 23 | #include <blackrock/master.capnp.h>
 24 | #include <map>
 25 | #include "logs.h"
 26 | 
 27 | namespace sandstorm {
 28 |   class SubprocessSet;
 29 | }
 30 | 
 31 | namespace blackrock {
 32 | 
 33 | class ComputeDriver {
 34 | public:
 35 |   enum class MachineType {
 36 |     STORAGE,
 37 |     WORKER,
 38 |     COORDINATOR,
 39 |     FRONTEND,
 40 |     MONGO,
 41 |     GATEWAY
 42 |   };
 43 | 
 44 |   struct MachineId {
 45 |     MachineType type;
 46 |     uint index;
 47 | 
 48 |     inline bool operator==(const MachineId& other) const {
 49 |       return type == other.type && index == other.index;
 50 |     }
 51 |     inline bool operator<(const MachineId& other) const {
 52 |       return type < other.type ? true :
 53 |              type > other.type ? false :
 54 |              index < other.index;
 55 |     }
 56 | 
 57 |     kj::String toString() const;
 58 |     // Makes reasonable hostnames. E.g. { STORAGE, 123 } becomes "storage123".
 59 | 
 60 |     MachineId() = default;
 61 |     inline MachineId(MachineType type, uint index): type(type), index(index) {}
 62 |     MachineId(kj::StringPtr name);
 63 |     // Parses results of toString().
 64 |   };
 65 | 
 66 |   struct MachineStatus {
 67 |     MachineId id;
 68 |     kj::Maybe<VatPath::Reader> path;
 69 |     // Current path, or null if not powered up. Path remains valid until halt() or destroy() is
 70 |     // called on the machine.
 71 |   };
 72 | 
 73 |   virtual SimpleAddress getMasterBindAddress() = 0;
 74 |   // Get the address at which other machines in the cluster will see the master (i.e. this)
 75 |   // machine.
 76 | 
 77 |   virtual kj::Promise<kj::Array<MachineId>> listMachines() KJ_WARN_UNUSED_RESULT = 0;
 78 |   // List all machines currently running in the cluster.
 79 | 
 80 |   virtual kj::Promise<void> boot(MachineId id) = 0;
 81 |   // Boot the given machine.
 82 | 
 83 |   virtual kj::Promise<VatPath::Reader> run(MachineId id,
 84 |       VatId::Reader masterVatId, bool requireRestartProcess) KJ_WARN_UNUSED_RESULT = 0;
 85 |   // Run the Blackrock process on the given machine. If `requireRestartProcess` is true,
 86 |   // then all blackrock processes on the machine should be immediately terminated and restarted.
 87 |   // Depending on the driver, this may or may not have the effect of updating the binary to the
 88 |   // latest version. Note that `requireRestartProcess` is often much faster than stop() followed
 89 |   // by boot() and run(), but not as reliable.
 90 | 
 91 |   virtual kj::Promise<void> stop(MachineId id) KJ_WARN_UNUSED_RESULT = 0;
 92 |   // Shut down the given machine.
 93 | };
 94 | 
 95 | void runMaster(kj::AsyncIoContext& ioContext, ComputeDriver& driver, MasterConfig::Reader config,
 96 |                bool shouldRestart, kj::ArrayPtr<kj::StringPtr> machinesToRestart);
 97 | 
 98 | class VagrantDriver: public ComputeDriver {
 99 | public:
100 |   VagrantDriver(sandstorm::SubprocessSet& subprocessSet, kj::LowLevelAsyncIoProvider& ioProvider);
101 |   ~VagrantDriver() noexcept(false);
102 | 
103 |   SimpleAddress getMasterBindAddress() override;
104 |   kj::Promise<kj::Array<MachineId>> listMachines() override;
105 |   kj::Promise<void> boot(MachineId id) override;
106 |   kj::Promise<VatPath::Reader> run(MachineId id, VatId::Reader masterVatId,
107 |                                    bool requireRestartProcess) override;
108 |   kj::Promise<void> stop(MachineId id) override;
109 | 
110 | private:
111 |   sandstorm::SubprocessSet& subprocessSet;
112 |   kj::LowLevelAsyncIoProvider& ioProvider;
113 |   std::map<ComputeDriver::MachineId, kj::Own<capnp::MessageReader>> vatPaths;
114 |   SimpleAddress masterBindAddress;
115 | 
116 |   LogSink logSink;
117 |   kj::Promise<void> logTask;
118 |   SimpleAddress logSinkAddress;
119 | 
120 |   kj::Promise<void> bootQueue = kj::READY_NOW;
121 | };
122 | 
123 | } // namespace blackrock
124 | 
125 | #endif // BLACKROCK_MASTER_H_
126 | 


--------------------------------------------------------------------------------
/src/blackrock/fs-storage.capnp:
--------------------------------------------------------------------------------
  1 | # Sandstorm Blackrock
  2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | # All Rights Reserved
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | @0xfc40bcbedafbe11c;
 18 | # An implementation of the Storage interfaces based on a standard filesystem.
 19 | #
 20 | # TODO(doc): The following is outdated!
 21 | #
 22 | # All objects are stored in a massive directory with filenames like:
 23 | #     o<objectId>: object content
 24 | #     c<objectId>: list of object IDs that should be deleted if this object is deleted. This list
 25 | #                  is generally append-only and so can contain IDs that no longer exist; those
 26 | #                  should be ignored when deleting.
 27 | #
 28 | # In all cases above, an <id> is a 16-byte value base64-encoded to 22 bytes (with '-' and '_' as
 29 | # digits 62 and 63). Note that ext4 directory entries are 8 + name_len bytes, rounded up to a
 30 | # multiple of 4, with no NUL terminator stored. Since our filenames are 23 bytes (including prefix
 31 | # character), each directory entry comes out to 32 bytes (31 rounded up). That seems sort of nice?
 32 | #
 33 | # A second directory, called "staging", stores files whose names consist of exactly 16 hex digits,
 34 | # and which are intended to be rename()ed into place later on. Files in staging exist only for
 35 | # their content. If that content includes outgoing owned references, the target objects are either
 36 | # in staging themselves or are owned by some non-staging objects and are scheduled to have owneship
 37 | # transferred in an upcoming transaction. In other words, when deleting an object out of staging,
 38 | # it does NOT make sense to recursively delete its children.
 39 | #
 40 | # A third directory, called "deathrow", contains objects scheduled for recursive deletion. Objects
 41 | # here used to be under the main directory, but have been deleted. Before actually deleting the
 42 | # file, it is necessary to move all of its children into "deathrow". This process of recursive
 43 | # deletion can occur in a separate thread (or process!) so that deep deletions do not block other
 44 | # tasks.
 45 | 
 46 | $import "/capnp/c++.capnp".namespace("blackrock");
 47 | using Storage = import "storage.capnp";
 48 | using SturdyRef = import "cluster-rpc.capnp".SturdyRef;
 49 | 
 50 | struct StoredObjectId {
 51 |   # 16-byte ID of the object. This is calculated as the 16-byte blake2b hash of the object key.
 52 | 
 53 |   id0 @0 :UInt64;
 54 |   id1 @1 :UInt64;
 55 | }
 56 | 
 57 | struct StoredObjectKey {
 58 |   # Key to decrypt an object.
 59 | 
 60 |   key0 @0 :UInt64;
 61 |   key1 @1 :UInt64;
 62 |   key2 @2 :UInt64;
 63 |   key3 @3 :UInt64;
 64 | }
 65 | 
 66 | struct StoredIncomingRef {
 67 |   # Stored in `ref/<id>`, where <id> is the base64('+','_') of the 16-byte blake2b hash of
 68 |   # the ref key (the 32-byte key stored in the SturdyRef). Encrypted by the ref key.
 69 | 
 70 |   owner @0 :SturdyRef.Owner;
 71 |   # Who is allowed to restore this ref?
 72 | 
 73 |   key @1 :StoredObjectKey;
 74 |   # Key to the object.
 75 | }
 76 | 
 77 | struct StoredChildIds {
 78 |   # A stored `Assignable` or `Immutable` object file contains two Cap'n Proto messages:
 79 |   # StoredChildIds followed by StoredObject. The latter could be encrypted.
 80 | 
 81 |   children @0 :List(StoredObjectId);
 82 |   # List of owned children of this object. If this object is deleted, all children should be
 83 |   # deleted as well.
 84 | }
 85 | 
 86 | struct StoredObject {
 87 |   # A stored `Assignable` or `Immutable` object file contains two Cap'n Proto messages:
 88 |   # StoredChildIds followed by StoredObject. The latter could be encrypted.
 89 | 
 90 |   capTable @0 :List(CapDescriptor);
 91 |   payload @1 :AnyPointer;
 92 | 
 93 |   struct CapDescriptor {
 94 |     union {
 95 |       none @0 :Void;
 96 |       # Null. (But `null` is not a good variable name due to macro conflicts.)
 97 | 
 98 |       child @1 :StoredObjectKey;
 99 |       # This points to an owned child object.
100 | 
101 |       external @2 :SturdyRef;
102 |       # A remote capability. (Could point back to storage, but the object isn't owned by us.)
103 |     }
104 |   }
105 | }
106 | 
107 | struct StoredRoot {
108 |   # A root object.
109 | 
110 |   key @0 :StoredObjectKey;
111 | }
112 | 


--------------------------------------------------------------------------------
/src/blackrock/frontend.h:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #ifndef BLACKROCK_FRONTEND_H_
 18 | #define BLACKROCK_FRONTEND_H_
 19 | 
 20 | #include "common.h"
 21 | #include <blackrock/frontend.capnp.h>
 22 | #include <blackrock/storage.capnp.h>
 23 | #include <blackrock/cluster-rpc.capnp.h>
 24 | #include <blackrock/worker.capnp.h>
 25 | #include <sandstorm/backend.capnp.h>
 26 | #include <capnp/message.h>
 27 | #include <sandstorm/util.h>
 28 | #include <kj/async-io.h>
 29 | #include <capnp/rpc.h>
 30 | #include <capnp/rpc-twoparty.h>
 31 | #include "backend-set.h"
 32 | #include "cluster-rpc.h"
 33 | 
 34 | namespace blackrock {
 35 | 
 36 | class FrontendImpl: public Frontend::Server {
 37 | public:
 38 |   FrontendImpl(kj::LowLevelAsyncIoProvider& llaiop,
 39 |                sandstorm::SubprocessSet& subprocessSet,
 40 |                FrontendConfig::Reader config, uint replicaNumber,
 41 |                SimpleAddress bindAddress);
 42 | 
 43 |   void setConfig(FrontendConfig::Reader config);
 44 | 
 45 |   BackendSet<StorageRootSet>::Client getStorageRootBackendSet();
 46 |   BackendSet<StorageFactory>::Client getStorageFactoryBackendSet();
 47 |   BackendSet<Worker>::Client getWorkerBackendSet();
 48 |   BackendSet<Mongo>::Client getMongoBackendSet();
 49 | 
 50 | protected:
 51 |   kj::Promise<void> getInstances(GetInstancesContext context) override;
 52 | 
 53 | private:
 54 |   class BackendImpl;
 55 |   struct MongoInfo;
 56 |   class Instance;
 57 | 
 58 |   kj::Own<capnp::MallocMessageBuilder> configMessage;
 59 |   FrontendConfig::Reader config;
 60 | 
 61 |   kj::Own<BackendSetImpl<StorageRootSet>> storageRoots;
 62 |   kj::Own<BackendSetImpl<StorageFactory>> storageFactories;
 63 |   kj::Own<BackendSetImpl<Worker>> workers;
 64 |   kj::Own<BackendSetImpl<Mongo>> mongos;
 65 | 
 66 |   kj::Vector<kj::Own<Instance>> instances;
 67 | 
 68 |   class Instance: private kj::TaskSet::ErrorHandler {
 69 |   public:
 70 |     Instance(FrontendImpl& frontend, kj::LowLevelAsyncIoProvider& llaiop,
 71 |              sandstorm::SubprocessSet& subprocessSet, uint frontendNumber, uint instanceNumber,
 72 |              SimpleAddress bindAddress,
 73 |              kj::PromiseFulfillerPair<sandstorm::Backend::Client> paf =
 74 |                    kj::newPromiseAndFulfiller<sandstorm::Backend::Client>());
 75 | 
 76 |     void restart(FrontendConfig::Reader config);
 77 | 
 78 |     void getInfo(Frontend::Instance::Builder info);
 79 | 
 80 |   private:
 81 |     kj::Timer& timer;
 82 |     sandstorm::SubprocessSet& subprocessSet;
 83 |     FrontendConfig::Reader config;
 84 |     uint replicaNumber;
 85 |     uint httpPort;
 86 |     uint smtpPort;
 87 |     SimpleAddress bindAddress;
 88 | 
 89 |     sandstorm::TwoPartyServerWithClientBootstrap capnpServer;
 90 |     pid_t pid = 0;
 91 |     kj::TaskSet tasks;
 92 | 
 93 |     kj::Promise<void> startExecLoop(MongoInfo&& mongoInfo, kj::AutoCloseFd&& backendClientFd);
 94 | 
 95 |     kj::Promise<void> execLoop(MongoInfo&& mongoInfo, kj::AutoCloseFd&& http,
 96 |                                kj::AutoCloseFd&& backendClientFd, kj::AutoCloseFd&& smtp);
 97 | 
 98 |     void taskFailed(kj::Exception&& exception) override;
 99 |   };
100 | };
101 | 
102 | class MongoImpl: public Mongo::Server {
103 | public:
104 |   explicit MongoImpl(
105 |       kj::Timer& timer, sandstorm::SubprocessSet& subprocessSet, SimpleAddress bindAddress,
106 |       kj::PromiseFulfillerPair<void> passwordPaf = kj::newPromiseAndFulfiller<void>());
107 | 
108 | protected:
109 |   kj::Promise<void> getConnectionInfo(GetConnectionInfoContext context) override;
110 | 
111 | private:
112 |   kj::Timer& timer;
113 |   sandstorm::SubprocessSet& subprocessSet;
114 |   SimpleAddress bindAddress;
115 |   kj::Maybe<kj::String> password;
116 |   kj::ForkedPromise<void> passwordPromise;
117 |   kj::Promise<void> execTask;
118 | 
119 |   kj::Promise<void> startExecLoop(kj::Own<kj::PromiseFulfiller<void>> passwordFulfiller);
120 |   kj::Promise<void> execLoop(kj::PromiseFulfiller<void>& passwordFulfiller);
121 |   kj::Promise<kj::String> initializeMongo();
122 |   kj::Promise<void> mongoCommand(kj::String command, kj::StringPtr dbName = "meteor");
123 | };
124 | 
125 | } // namespace blackrock
126 | 
127 | #endif // BLACKROCK_FRONTEND_H_
128 | 


--------------------------------------------------------------------------------
/src/blackrock/logs-tester.c++:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #include "logs.h"
 18 | #include <kj/main.h>
 19 | #include <kj/thread.h>
 20 | #include <sandstorm/util.h>
 21 | #include "cluster-rpc.h"
 22 | 
 23 | namespace blackrock {
 24 | 
 25 | class LogsTester {
 26 |   // A test program for the logging system.
 27 | 
 28 | public:
 29 |   LogsTester(kj::ProcessContext& context): context(context) {}
 30 | 
 31 |   kj::MainFunc getMain() {
 32 |     return kj::MainBuilder(context, "Blackrock logs tester", "Tests logs.")
 33 |         .addSubCommand("server", KJ_BIND_METHOD(*this, getServerMain), "run a logs server")
 34 |         .addSubCommand("client", KJ_BIND_METHOD(*this, getClientMain), "run a logs client")
 35 |         .addSubCommand("fake", KJ_BIND_METHOD(*this, getFakeMain), "run a fake log server")
 36 |         .build();
 37 |   }
 38 | 
 39 |   kj::MainFunc getServerMain() {
 40 |     return kj::MainBuilder(context, "Blackrock logs tester",
 41 |                            "Runs a log server locally and arranges for clients to be able "
 42 |                            "to connect to it. Prints all logs to stdout unless a log directory "
 43 |                            "is provided.")
 44 |         .addOptionWithArg({'d', "dir"}, KJ_BIND_METHOD(*this, setLogDir), "<path>",
 45 |                           "save logs to a directory")
 46 |         .callAfterParsing(KJ_BIND_METHOD(*this, runServer))
 47 |         .build();
 48 |   }
 49 | 
 50 |   kj::MainFunc getClientMain() {
 51 |     return kj::MainBuilder(context, "Blackrock logs tester",
 52 |                            "Runs a client with the given name connecting to the local server. "
 53 |                            "Whatever you enter on stdin will be logged.")
 54 |         .expectArg("<name>", KJ_BIND_METHOD(*this, setName))
 55 |         .callAfterParsing(KJ_BIND_METHOD(*this, runClient))
 56 |         .build();
 57 |   }
 58 | 
 59 |   kj::MainFunc getFakeMain() {
 60 |     return kj::MainBuilder(context, "Blackrock logs tester",
 61 |                            "Runs a fake server that closes connections immediately upon receipt.")
 62 |         .callAfterParsing(KJ_BIND_METHOD(*this, runFake))
 63 |         .build();
 64 |   }
 65 | 
 66 | private:
 67 |   kj::ProcessContext& context;
 68 |   kj::Maybe<kj::AutoCloseFd> logDir;
 69 |   kj::StringPtr name;
 70 |   kj::StringPtr addrFile = "/tmp/blackrock-logs-tester-addr";
 71 | 
 72 |   bool setLogDir(kj::StringPtr arg) {
 73 |     logDir = sandstorm::raiiOpen(arg, O_RDONLY | O_DIRECTORY | O_CLOEXEC);
 74 |     return true;
 75 |   }
 76 | 
 77 |   bool setName(kj::StringPtr arg) {
 78 |     name = arg;
 79 |     return true;
 80 |   }
 81 | 
 82 |   bool runServer() {
 83 |     auto io = kj::setupAsyncIo();
 84 |     sandstorm::SubprocessSet subprocessSet(io.unixEventPort);
 85 | 
 86 |     kj::Own<kj::Thread> rotater;
 87 |     KJ_IF_MAYBE(l, logDir) {
 88 |       auto logPipe = sandstorm::Pipe::make();
 89 |       auto readEnd = kj::mv(logPipe.readEnd);
 90 |       int logDirFd = *l;
 91 |       rotater = kj::heap<kj::Thread>([KJ_MVCAP(readEnd),logDirFd]() {
 92 |         rotateLogs(readEnd, logDirFd);
 93 |       });
 94 | 
 95 |       KJ_SYSCALL(dup2(logPipe.writeEnd, STDOUT_FILENO));
 96 |     }
 97 | 
 98 |     // Close log pipe on scope exit, so that thread stops.
 99 |     KJ_DEFER(KJ_SYSCALL(dup2(STDERR_FILENO, STDOUT_FILENO)));
100 | 
101 |     LogSink sink;
102 |     sink.acceptLoop(listen(io.provider->getNetwork())).wait(io.waitScope);
103 |     return true;
104 |   }
105 | 
106 |   bool runClient() {
107 |     runLogClient(name, addrFile, "/tmp");
108 |     return true;
109 |   }
110 | 
111 |   bool runFake() {
112 |     auto io = kj::setupAsyncIo();
113 |     auto listener = listen(io.provider->getNetwork());
114 |     for (;;) {
115 |       // Accept connections and just close them right away.
116 |       listener->accept().wait(io.waitScope);
117 |     }
118 |   }
119 | 
120 |   kj::Own<kj::ConnectionReceiver> listen(kj::Network& network) {
121 |     auto addr = SimpleAddress::getLocalhost(AF_INET);
122 |     auto listener = addr.onNetwork(network)->listen();
123 |     addr.setPort(listener->getPort());
124 |     kj::FdOutputStream(sandstorm::raiiOpen(addrFile, O_WRONLY | O_CREAT | O_TRUNC))
125 |         .write(&addr, sizeof(addr));
126 |     return listener;
127 |   }
128 | };
129 | 
130 | }  // namespace blackrock
131 | 
132 | KJ_MAIN(blackrock::LogsTester);
133 | 


--------------------------------------------------------------------------------
/src/blackrock/gateway.h:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2017 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #ifndef BLACKROCK_GATEWAY_H_
 18 | #define BLACKROCK_GATEWAY_H_
 19 | 
 20 | #include "common.h"
 21 | #include <blackrock/machine.capnp.h>
 22 | #include "backend-set.h"
 23 | #include "cluster-rpc.h"
 24 | #include <sandstorm/gateway.h>
 25 | #include <kj/compat/http.h>
 26 | #include <kj/debug.h>
 27 | 
 28 | namespace blackrock {
 29 | 
 30 | class GatewayImpl: public GatewayImplBase::Server, private kj::HttpService,
 31 |                    private kj::TaskSet::ErrorHandler {
 32 | public:
 33 |   GatewayImpl(kj::Timer& timer, kj::Network& network, FrontendConfig::Reader config);
 34 | 
 35 |   void setConfig(FrontendConfig::Reader config);
 36 | 
 37 | protected:
 38 |   kj::Promise<void> reset(ResetContext context) override;
 39 |   kj::Promise<void> add(AddContext context) override;
 40 |   kj::Promise<void> remove(RemoveContext context) override;
 41 |   // We implement BackendSet<Frontend> directly rather than use BackendSetImpl because we want to
 42 |   // implement session affinity.
 43 | 
 44 |   kj::Promise<void> request(
 45 |       kj::HttpMethod method, kj::StringPtr url, const kj::HttpHeaders& headers,
 46 |       kj::AsyncInputStream& requestBody, Response& response) override;
 47 | 
 48 | private:
 49 |   struct ShellReplica: kj::Refcounted {
 50 |     uint64_t backendId;
 51 |     kj::Own<kj::NetworkAddress> httpAddress;
 52 |     kj::Own<kj::NetworkAddress> smtpAddress;
 53 |     kj::Own<kj::HttpClient> shellHttp;
 54 |     sandstorm::GatewayRouter::Client router;
 55 |     sandstorm::GatewayService service;
 56 |     kj::Promise<void> cleanupLoop;
 57 | 
 58 |     ShellReplica(GatewayImpl& gateway, uint64_t backendId, Frontend::Instance::Reader instance);
 59 |   };
 60 | 
 61 |   class EntropySourceImpl: public kj::EntropySource {
 62 |   public:
 63 |     void generate(kj::ArrayPtr<byte> buffer) override;
 64 |   };
 65 | 
 66 |   class SmtpNetworkAddressImpl: public kj::NetworkAddress {
 67 |   public:
 68 |     SmtpNetworkAddressImpl(GatewayImpl& gateway): gateway(gateway) {}
 69 | 
 70 |     kj::Promise<kj::Own<kj::AsyncIoStream>> connect() override;
 71 |     kj::Own<kj::ConnectionReceiver> listen() override { KJ_UNIMPLEMENTED("fake address"); }
 72 |     kj::Own<kj::NetworkAddress> clone() override { KJ_UNIMPLEMENTED("fake address"); }
 73 |     kj::String toString() override { KJ_UNIMPLEMENTED("fake address"); }
 74 | 
 75 |   private:
 76 |     GatewayImpl& gateway;
 77 |   };
 78 | 
 79 |   kj::Timer& timer;
 80 |   kj::Network& network;
 81 | 
 82 |   sandstorm::GatewayService::Tables gatewayServiceTables;
 83 |   kj::HttpHeaderId hXRealIp;
 84 | 
 85 |   kj::Own<capnp::MallocMessageBuilder> configMessage;
 86 |   FrontendConfig::Reader config;
 87 |   sandstorm::WildcardMatcher wildcardHost;
 88 | 
 89 |   kj::Vector<kj::Maybe<kj::Own<ShellReplica>>> shellReplicas;
 90 |   // Maps replica number -> ShellReplica. Used as hash buckets when load balancing with affinity.
 91 |   // If a shell is down, its bucket will be null, and we have to search for an alternative.
 92 | 
 93 |   kj::Own<kj::ConnectionReceiver> httpReceiver;
 94 | 
 95 |   EntropySourceImpl entropySource;
 96 |   kj::HttpClientSettings clientSettings;
 97 | 
 98 |   kj::Own<kj::HttpHeaderTable> headerTable;
 99 |   kj::HttpServer httpServer;
100 |   sandstorm::AltPortService altPortService;
101 |   kj::HttpServer altPortHttpServer;
102 |   SmtpNetworkAddressImpl smtpServer;
103 |   sandstorm::GatewayTlsManager tlsManager;
104 | 
105 |   uint roundRobinCounter = 0;
106 | 
107 |   struct ReadyPair {
108 |     kj::ForkedPromise<void> promise;
109 |     kj::Own<kj::PromiseFulfiller<void>> fulfiller;
110 |   };
111 |   kj::Maybe<ReadyPair> readyPaf;
112 | 
113 |   kj::TaskSet tasks;
114 | 
115 |   GatewayImpl(kj::Timer& timer, kj::Network& network, FrontendConfig::Reader config,
116 |               kj::HttpHeaderTable::Builder headerTableBuilder);
117 | 
118 |   kj::Promise<void> addFrontend(uint64_t backendId, Frontend::Client frontend);
119 | 
120 |   void addReplica(kj::Own<ShellReplica> newReplica);
121 | 
122 |   void setReplica(uint replicaNumber, kj::Maybe<kj::Own<ShellReplica>> newReplica,
123 |                   kj::Maybe<uint64_t> requireBackendId = nullptr);
124 |   kj::Promise<kj::Own<ShellReplica>> chooseReplica(uint64_t hash);
125 | 
126 |   uint64_t urlSessionHash(kj::StringPtr url, const kj::HttpHeaders& headers);
127 | 
128 |   void taskFailed(kj::Exception&& exception) override;
129 | };
130 | 
131 | }  // namespace blackrock
132 | 
133 | #endif // BLACKROCK_GATEWAY_H_
134 | 


--------------------------------------------------------------------------------
/src/blackrock/fs-storage.h:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #ifndef BLACKROCK_VOLUME_H_
 18 | #define BLACKROCK_VOLUME_H_
 19 | 
 20 | #include "common.h"
 21 | #include <blackrock/storage.capnp.h>
 22 | #include <blackrock/fs-storage.capnp.h>
 23 | #include <kj/io.h>
 24 | #include <sodium/utils.h>
 25 | 
 26 | namespace kj {
 27 |   class UnixEventPort;
 28 |   class Timer;
 29 | }
 30 | 
 31 | namespace blackrock {
 32 | 
 33 | class FilesystemStorage: public StorageRootSet::Server {
 34 | public:
 35 |   FilesystemStorage(int directoryFd, kj::UnixEventPort& eventPort, kj::Timer& timer,
 36 |                     Restorer<SturdyRef>::Client&& restorer);
 37 |   ~FilesystemStorage() noexcept(false);
 38 | 
 39 | protected:
 40 |   kj::Promise<void> set(SetContext context) override;
 41 |   kj::Promise<void> get(GetContext context) override;
 42 |   kj::Promise<void> tryGet(TryGetContext context) override;
 43 |   kj::Promise<void> getOrCreateAssignable(GetOrCreateAssignableContext context) override;
 44 |   kj::Promise<void> remove(RemoveContext context) override;
 45 |   kj::Promise<void> getFactory(GetFactoryContext context) override;
 46 | 
 47 | public:
 48 |   struct ObjectKey {
 49 |     uint64_t key[4];
 50 | 
 51 |     ObjectKey() = default;
 52 |     ObjectKey(StoredObjectKey::Reader reader)
 53 |         : key { reader.getKey0(), reader.getKey1(), reader.getKey2(), reader.getKey3() } {}
 54 |     ~ObjectKey() {
 55 |       sodium_memzero(key, sizeof(key));
 56 |     }
 57 | 
 58 |     static ObjectKey generate();
 59 | 
 60 |     inline void copyTo(StoredObjectKey::Builder builder) const {
 61 |       builder.setKey0(key[0]);
 62 |       builder.setKey1(key[1]);
 63 |       builder.setKey2(key[2]);
 64 |       builder.setKey3(key[3]);
 65 |     }
 66 |   };
 67 | 
 68 |   struct ObjectId {
 69 |     uint64_t id[2];
 70 |     // The object ID. Equals the 16-byte blake2b hash of the key.
 71 | 
 72 |     ObjectId() = default;
 73 |     ObjectId(decltype(nullptr)): id {0, 0} {}
 74 |     ObjectId(StoredObjectId::Reader reader)
 75 |         : id { reader.getId0(), reader.getId1() } {}
 76 |     ObjectId(const ObjectKey& key);
 77 | 
 78 |     inline bool operator==(const ObjectId& other) const {
 79 |       return ((id[0] ^ other.id[0]) | (id[1] ^ other.id[1])) == 0;  // constant-time
 80 |     }
 81 |     inline bool operator!=(const ObjectId& other) const {
 82 |       return !operator==(other);
 83 |     }
 84 |     inline bool operator==(decltype(nullptr)) const {
 85 |       return (id[0] | id[1]) == 0;
 86 |     }
 87 |     inline bool operator!=(decltype(nullptr)) const {
 88 |       return !operator==(nullptr);
 89 |     }
 90 | 
 91 |     inline void copyTo(StoredObjectId::Builder builder) const {
 92 |       builder.setId0(id[0]);
 93 |       builder.setId1(id[1]);
 94 |     }
 95 | 
 96 |     struct Hash {
 97 |       inline size_t operator()(const ObjectId& id) const { return id.id[0]; }
 98 |     };
 99 | 
100 |     kj::FixedArray<char, 24> filename(char prefix) const;
101 |   };
102 | 
103 | private:
104 |   class ObjectBase;
105 |   class BlobImpl;
106 |   class VolumeImpl;
107 |   class ImmutableImpl;
108 |   class AssignableImpl;
109 |   class CollectionImpl;
110 |   class OpaqueImpl;
111 |   class StorageFactoryImpl;
112 |   enum class Type: uint8_t;
113 |   struct Xattr;
114 |   class Journal;
115 |   class DeathRow;
116 |   class ObjectFactory;
117 | 
118 |   kj::AutoCloseFd mainDirFd;
119 |   kj::AutoCloseFd stagingDirFd;
120 |   kj::AutoCloseFd deathRowFd;
121 |   kj::AutoCloseFd rootsFd;
122 | 
123 |   kj::Own<DeathRow> deathRow;
124 |   kj::Own<Journal> journal;
125 |   kj::Own<ObjectFactory> factory;
126 | 
127 |   kj::Promise<void> setImpl(kj::String name, OwnedStorage<>::Client object);
128 | 
129 |   kj::Maybe<kj::AutoCloseFd> openObject(ObjectId id);
130 |   kj::Maybe<kj::AutoCloseFd> openStaging(uint64_t number);
131 |   kj::AutoCloseFd createObject(ObjectId id);
132 |   kj::AutoCloseFd createTempFile();
133 |   void linkTempIntoStaging(uint64_t number, int fd, const Xattr& xattr);
134 |   void deleteStaging(uint64_t number);
135 |   void deleteAllStaging();
136 |   void createFromStagingIfExists(uint64_t stagingId, ObjectId finalId, const Xattr& attributes);
137 |   void replaceFromStagingIfExists(uint64_t stagingId, ObjectId finalId, const Xattr& attributes);
138 |   void setAttributesIfExists(ObjectId objectId, const Xattr& attributes);
139 |   void moveToDeathRowIfExists(ObjectId id, bool notify = true);
140 |   void sync();
141 | 
142 |   static bool isStoredObjectType(Type type);
143 | };
144 | 
145 | }  // namespace blackrock
146 | 
147 | #endif  // BLACKROCK_VOLUME_H_
148 | 


--------------------------------------------------------------------------------
/src/blackrock/cluster-rpc.h:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #ifndef BLACKROCK_CLUSTERRPC_H_
 18 | #define BLACKROCK_CLUSTERRPC_H_
 19 | 
 20 | #include "common.h"
 21 | #include <capnp/rpc.h>
 22 | #include <capnp/message.h>
 23 | #include <blackrock/cluster-rpc.capnp.h>
 24 | #include <sys/socket.h>
 25 | #include <netinet/ip.h>
 26 | #include <netinet/ip6.h>
 27 | #include <kj/async-io.h>
 28 | 
 29 | namespace blackrock {
 30 | 
 31 | class SimpleAddress {
 32 | public:
 33 |   SimpleAddress(decltype(nullptr)) {}
 34 |   SimpleAddress(struct sockaddr_in ip4);
 35 |   SimpleAddress(struct sockaddr_in6 ip6);
 36 |   SimpleAddress(struct sockaddr& addr, socklen_t addrLen);
 37 |   SimpleAddress(Address::Reader reader);
 38 | 
 39 |   static SimpleAddress getPeer(kj::AsyncIoStream& socket);
 40 |   static SimpleAddress getLocal(kj::AsyncIoStream& socket);
 41 |   static SimpleAddress getLocal(int fd);
 42 |   static SimpleAddress getWildcard(sa_family_t family);
 43 |   static SimpleAddress getLocalhost(sa_family_t family);
 44 |   static SimpleAddress getInterfaceAddress(sa_family_t family, kj::StringPtr ifname);
 45 |   static SimpleAddress lookup(kj::StringPtr address);
 46 | 
 47 |   inline sa_family_t family() const { return addr.sa_family; }
 48 | 
 49 |   uint16_t getPort() const;
 50 |   void setPort(uint16_t port);
 51 | 
 52 |   void copyTo(Address::Builder builder) const;
 53 | 
 54 |   static constexpr size_t FLAT_SIZE = 18;
 55 |   void getFlat(byte* target) const;
 56 | 
 57 |   kj::Own<kj::NetworkAddress> onNetwork(kj::Network& network);
 58 | 
 59 |   inline const struct sockaddr* asSockaddr() const { return &addr; }
 60 |   inline size_t getSockaddrSize() const {
 61 |     return addr.sa_family == AF_INET ? sizeof(ip4) : sizeof(ip6);
 62 |   }
 63 | 
 64 |   bool operator==(const SimpleAddress& other) const;
 65 |   inline bool operator!=(const SimpleAddress& other) const { return !operator==(other); }
 66 | 
 67 |   kj::String toStringWithoutPort() const;
 68 | 
 69 | private:
 70 |   union {
 71 |     struct sockaddr addr;
 72 |     struct sockaddr_in ip4;
 73 |     struct sockaddr_in6 ip6;
 74 |   };
 75 | 
 76 |   friend kj::String KJ_STRINGIFY(const SimpleAddress& addr);
 77 | };
 78 | 
 79 | kj::String KJ_STRINGIFY(const SimpleAddress& addr);
 80 | 
 81 | class VatNetwork final: public capnp::VatNetwork<VatPath, ProvisionId, RecipientId,
 82 |                                                  ThirdPartyCapId, JoinResult> {
 83 | public:
 84 |   VatNetwork(kj::Network& network, kj::Timer& timer, SimpleAddress address);
 85 |   // Create a new VatNetwork exported on the given local address. If the port is zero, an arbitrary
 86 |   // unused port will be chosen.
 87 | 
 88 |   ~VatNetwork();
 89 | 
 90 |   VatPath::Reader getSelf() { return self.getRoot<VatPath>(); }
 91 | 
 92 |   kj::Maybe<kj::Own<Connection>> connect(VatPath::Reader hostId) override;
 93 |   kj::Promise<kj::Own<Connection>> accept() override;
 94 | 
 95 | private:
 96 |   class LittleEndian64;
 97 |   class Mac;
 98 |   class SymmetricKey;
 99 |   class PrivateKey;
100 |   class PublicKey;
101 |   class Header;
102 | 
103 |   class PublicKey {
104 |   public:
105 |     inline PublicKey(decltype(nullptr)) {}
106 |     PublicKey(VatId::Reader id);
107 | 
108 |     void copyTo(VatId::Builder id);
109 | 
110 |     inline bool operator<(const PublicKey& other) const {
111 |       return memcmp(key, other.key, sizeof(key)) < 0;
112 |     }
113 |     inline bool operator==(const PublicKey& other) const {
114 |       return memcmp(key, other.key, sizeof(key)) == 0;
115 |     }
116 |     inline bool operator!=(const PublicKey& other) const {
117 |       return memcmp(key, other.key, sizeof(key)) != 0;
118 |     }
119 | 
120 |     struct Hash;
121 | 
122 |   private:
123 |     friend class PrivateKey;
124 | 
125 |     explicit PublicKey(const byte* privateBytes);
126 |     byte key[32];
127 |   };
128 | 
129 |   class PrivateKey {
130 |   public:
131 |     PrivateKey();
132 |     ~PrivateKey();
133 |     KJ_DISALLOW_COPY(PrivateKey);
134 | 
135 |     PublicKey getPublic() const;
136 |     SymmetricKey getSharedSecret(PublicKey otherPublic) const;
137 | 
138 |   private:
139 |     byte* key;  // Allocated with sodium_malloc.
140 |   };
141 | 
142 |   class ConnectionImpl;
143 |   struct ConnectionMap;
144 | 
145 |   kj::Network& network;
146 |   kj::Timer& timer;
147 |   PrivateKey privateKey;
148 |   PublicKey publicKey;
149 |   SimpleAddress address;
150 |   capnp::MallocMessageBuilder self;
151 |   kj::Own<kj::ConnectionReceiver> connectionReceiver;
152 |   kj::Own<ConnectionMap> connectionMap;
153 | };
154 | 
155 | }  // namespace blackrock
156 | 
157 | #endif // BLACKROCK_CLUSTERRPC_H_
158 | 


--------------------------------------------------------------------------------
/src/blackrock/local-persistent-registry.c++:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #include "local-persistent-registry.h"
 18 | #include <sodium/randombytes.h>
 19 | #include <kj/debug.h>
 20 | #include <kj/encoding.h>
 21 | 
 22 | namespace blackrock {
 23 | 
 24 | class LocalPersistentRegistry::PersistentImpl: public Persistent::Server, public kj::Refcounted {
 25 | public:
 26 |   PersistentImpl(Registration& registration, capnp::Capability::Client inner)
 27 |       : registry(registration.registry), registration(registration),
 28 |         inner(kj::mv(inner)) {}
 29 | 
 30 |   void unregister() {
 31 |     registration = nullptr;
 32 |   }
 33 | 
 34 |   capnp::Capability::Server::DispatchCallResult dispatchCall(
 35 |       uint64_t interfaceId, uint16_t methodId,
 36 |       capnp::CallContext<capnp::AnyPointer, capnp::AnyPointer> context) override {
 37 |     // TODO(perf): We need a better way to check if a method is implemented locally. Here we
 38 |     //   attempt a local call and catch UNIMPLEMENTED exceptions, but constructing exceptions is
 39 |     //   slow due to string manipulation (even though no actual throw/catch will take place here).
 40 |     auto result = Persistent::Server::dispatchCall(interfaceId, methodId, context);
 41 |     result.promise = result.promise.catch_([=](kj::Exception&& e) mutable -> kj::Promise<void> {
 42 |       if (e.getType() == kj::Exception::Type::UNIMPLEMENTED) {
 43 |         auto params = context.getParams();
 44 |         auto req = inner.typelessRequest(interfaceId, methodId, params.targetSize());
 45 |         req.set(params);
 46 |         return context.tailCall(kj::mv(req));
 47 |       } else {
 48 |         return kj::mv(e);
 49 |       }
 50 |     });
 51 |     return result;
 52 |   }
 53 | 
 54 |   kj::Promise<void> save(SaveContext context) override {
 55 |     // TODO(security): Pay attention to `sealFor`.
 56 |     context.releaseParams();
 57 | 
 58 |     auto ref = context.getResults(capnp::MessageSize {16, 0}).initSturdyRef().initTransient();
 59 |     ref.setVat(registry.thisVatPath);
 60 | 
 61 |     KJ_IF_MAYBE(reg, registration) {
 62 |       auto savedRef = kj::heap<SavedRef>(*reg);
 63 |       ref.getLocalRef().setAs<capnp::Data>(kj::ArrayPtr<byte>(savedRef->token));
 64 |       auto key = kj::ArrayPtr<byte>(savedRef->token);
 65 |       auto insertResult = reg->registry.savedRefs.insert(std::make_pair(key, kj::mv(savedRef)));
 66 |       KJ_ASSERT(insertResult.second, kj::encodeHex(savedRef->token));
 67 |     } else {
 68 |       ref.getLocalRef().initAs<capnp::Data>(sizeof(SavedRef::token));
 69 |     }
 70 |     return kj::READY_NOW;
 71 |   }
 72 | 
 73 | private:
 74 |   LocalPersistentRegistry& registry;
 75 |   kj::Maybe<Registration&> registration;
 76 |   capnp::Capability::Client inner;
 77 | };
 78 | 
 79 | LocalPersistentRegistry::Registration::Registration(
 80 |     LocalPersistentRegistry& registry, capnp::Capability::Client cap)
 81 |     : registry(registry), wrapped(kj::refcounted<PersistentImpl>(*this, kj::mv(cap))) {}
 82 | 
 83 | LocalPersistentRegistry::Registration::~Registration() noexcept(false) {
 84 |   wrapped->unregister();
 85 | 
 86 |   for (auto ref: savedRefs) {
 87 |     // Note: This actually deletes the ref.
 88 |     registry.savedRefs.erase(kj::ArrayPtr<byte>(ref->token));
 89 |   }
 90 | }
 91 | 
 92 | Persistent::Client LocalPersistentRegistry::Registration::getWrapped() {
 93 |   return kj::addRef(*wrapped);
 94 | }
 95 | 
 96 | LocalPersistentRegistry::SavedRef::SavedRef(Registration& registration)
 97 |     : registration(registration) {
 98 |   randombytes_buf(token, sizeof(token));
 99 |   registration.savedRefs.insert(this);
100 | }
101 | 
102 | LocalPersistentRegistry::SavedRef::~SavedRef() noexcept(false) {
103 |   registration.savedRefs.erase(this);
104 | }
105 | 
106 | kj::Own<LocalPersistentRegistry::Registration>
107 | LocalPersistentRegistry::makePersistent(capnp::Capability::Client cap) {
108 |   return kj::heap<Registration>(*this, kj::mv(cap));
109 | }
110 | 
111 | // =======================================================================================
112 | 
113 | class LocalPersistentRegistry::RestorerImpl: public Restorer<capnp::Data>::Server {
114 | public:
115 |   RestorerImpl(LocalPersistentRegistry& registry, VatPath::Reader clientId)
116 |       : registry(registry), clientId(clientId.totalSize().wordCount + 4) {
117 |     this->clientId.setRoot(clientId);
118 |   }
119 | 
120 | protected:
121 |   kj::Promise<void> restore(RestoreContext context) override {
122 |     auto iter = registry.savedRefs.find(context.getParams().getSturdyRef());
123 |     KJ_REQUIRE(iter != registry.savedRefs.end(),
124 |         "requested local SturdyRef doesn't exist; maybe the object was deleted");
125 |     context.releaseParams();
126 | 
127 |     SavedRef& savedRef = *iter->second;
128 |     context.getResults(capnp::MessageSize { 4, 1 }).setCap(savedRef.registration.getWrapped());
129 |     return kj::READY_NOW;
130 |   }
131 | 
132 |   kj::Promise<void> drop(DropContext context) override {
133 |     registry.savedRefs.erase(context.getParams().getSturdyRef());
134 |     return kj::READY_NOW;
135 |   }
136 | 
137 | private:
138 |   LocalPersistentRegistry& registry;
139 |   capnp::MallocMessageBuilder clientId;
140 | };
141 | 
142 | Restorer<capnp::Data>::Client
143 | LocalPersistentRegistry::createRestorerFor(VatPath::Reader clientId) {
144 |   return kj::heap<RestorerImpl>(*this, clientId);
145 | }
146 | 
147 | } // namespace blackrock
148 | 


--------------------------------------------------------------------------------
/src/blackrock/worker.capnp:
--------------------------------------------------------------------------------
  1 | # Sandstorm Blackrock
  2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | # All Rights Reserved
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | @0x95ec494d81e25bb1;
 18 | 
 19 | $import "/capnp/c++.capnp".namespace("blackrock");
 20 | 
 21 | using Supervisor = import "/sandstorm/supervisor.capnp".Supervisor;
 22 | using SandstormCore = import "/sandstorm/supervisor.capnp".SandstormCore;
 23 | using Grain = import "/sandstorm/grain.capnp";
 24 | using Storage = import "storage.capnp";
 25 | using StorageSchema = import "storage-schema.capnp";
 26 | using Package = import "/sandstorm/package.capnp";
 27 | using Util = import "/sandstorm/util.capnp";
 28 | 
 29 | using GrainState = StorageSchema.GrainState;
 30 | 
 31 | using Timepoint = UInt64;
 32 | # Nanoseconds since epoch.
 33 | 
 34 | interface Worker {
 35 |   # Top-level interface to a Sandstorm worker node, which runs apps.
 36 | 
 37 |   newGrain @0 (package :PackageInfo,
 38 |                command :Package.Manifest.Command,
 39 |                storage :Storage.StorageFactory,
 40 |                grainId :Text,
 41 |                core :SandstormCore)
 42 |            -> (grain :Supervisor, grainState :Storage.OwnedAssignable(GrainState));
 43 |   # Start a new grain using the given package.
 44 |   #
 45 |   # The caller needs to save `grainState` into a user's grain collection to make the grain
 46 |   # permanent.
 47 | 
 48 |   restoreGrain @1 (package :PackageInfo,
 49 |                    command :Package.Manifest.Command,
 50 |                    storage :Storage.StorageFactory,
 51 |                    grainState :GrainState,
 52 |                    exclusiveGrainStateSetter :Util.Assignable(GrainState).Setter,
 53 |                    grainId :Text,
 54 |                    core :SandstormCore)
 55 |                -> (grain :Supervisor);
 56 |   # Continue an existing grain.
 57 |   #
 58 |   # `grainState` is the current value of the grain's GrainState assignable, and
 59 |   # `exclusiveGrainStateSetter` is the setter returned by the get() call that returned
 60 |   # `grainState`. Thus, a `set()` call on `grainStateSetter` will fail if the grain state has
 61 |   # changed.
 62 |   #
 63 |   # The first thing the worker will do is attempt to set the grain state in order to assert its
 64 |   # exclusive ownership. If the initial `set()` fails, `restoreGrain()` throws a "disconnected"
 65 |   # exception, and the caller should start over.
 66 |   #
 67 |   # Assuming the `set()` succeeds, the worker will call `volume.getExclusive()` to make absolutely
 68 |   # sure that no other worker might still be writing to the voluse.
 69 | 
 70 |   unpackPackage @2 (storage :Storage.StorageFactory) -> (stream :PackageUploadStream);
 71 |   # Initiate upload of a package, unpacking it into a fresh Volume.
 72 | 
 73 |   interface PackageUploadStream extends(Util.ByteStream) {
 74 |     getResult @0 () -> (appId :Text, manifest :Package.Manifest, volume :Storage.OwnedVolume,
 75 |                         authorPgpKeyFingerprint :Text);
 76 |     # Waits until `ByteStream.done()` is called, then returns:
 77 |     #
 78 |     # `appId`: The verified application ID string, as produced by the `spk` tool.
 79 |     # `manifest`: The parsed package manifest.
 80 |     # `volume`: The new Volume containing the unpacked app.
 81 |     # `authorPgpKeyFingerprint`: If the app was PGP-signed, the author's key fingerprint.
 82 |   }
 83 | 
 84 |   unpackBackup @3 (data :Storage.Blob, storage :Storage.StorageFactory)
 85 |                -> (volume :Storage.OwnedVolume, metadata :Grain.GrainInfo);
 86 |   packBackup @4 (volume :Storage.Volume, metadata :Grain.GrainInfo, storage :Storage.StorageFactory)
 87 |              -> (data :Storage.OwnedBlob);
 88 | 
 89 |   # TODO(someday): Enumerate grains.
 90 |   # TODO(someday): Resource usage stats.
 91 | }
 92 | 
 93 | interface Coordinator {
 94 |   # Decides which workers should be running which apps.
 95 |   #
 96 |   # The Coordinator's main interface is actually Restorer(SturdyRef.Hosted) -- the Coordinator will
 97 |   # start up the desired grain and restore the capability. The `Coordinator` interface is only
 98 |   # used for creating new grains.
 99 | 
100 |   newGrain @0 (app :Util.Assignable(AppRestoreInfo).Getter,
101 |                initCommand :Package.Manifest.Command,
102 |                storage :Storage.StorageFactory)
103 |            -> (grain :Supervisor, grainState :Storage.OwnedAssignable(GrainState));
104 |   # Create a new grain, just like Worker.newGrain().
105 | 
106 |   restoreGrain @1 (storage :Storage.StorageFactory,
107 |                    grainState :Storage.Assignable(GrainState))
108 |                -> (grain :Supervisor);
109 |   # Restore a grain. Permanently sets the grain's package to `package` and continue command to
110 |   # `command` if these weren't already the case.
111 | }
112 | 
113 | struct AppRestoreInfo {
114 |   package @0 :PackageInfo;
115 |   restoreCommand @1 :Package.Manifest.Command;
116 | }
117 | 
118 | struct PackageInfo {
119 |   id @0 :Data;
120 |   # Some unique identifier for this package (not assigned by the worker).
121 |   #
122 |   # TODO(someday): Identify packages by capability. If it's the same `Volume`, it's the same
123 |   #   package. This is arguably a security issue if an attacker can get access to the `Worker`
124 |   #   or `Coordinator` interfaces and then poison workers by forging package IDs, though no
125 |   #   attacker should ever have direct access to those interfaces, of course.
126 | 
127 |   volume @1 :Storage.Volume;
128 |   # Read-only volume containing the unpacked package.
129 |   #
130 |   # TODO(security): Enforce read-only.
131 | }
132 | 


--------------------------------------------------------------------------------
/src/blackrock/backend-set.h:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #ifndef BLACKROCK_BACKEND_SET_H_
 18 | #define BLACKROCK_BACKEND_SET_H_
 19 | 
 20 | #include "common.h"
 21 | #include <blackrock/cluster-rpc.capnp.h>
 22 | #include <map>
 23 | 
 24 | namespace blackrock {
 25 | 
 26 | class BackendSetBase {
 27 | public:
 28 |   BackendSetBase(): BackendSetBase(kj::newPromiseAndFulfiller<void>()) {}
 29 |   ~BackendSetBase() noexcept(false);
 30 | 
 31 |   capnp::Capability::Client chooseOne();
 32 | 
 33 |   void clear();
 34 |   void add(uint64_t id, capnp::Capability::Client client);
 35 |   void remove(uint64_t id);
 36 | 
 37 | private:
 38 |   struct Backend {
 39 |     capnp::Capability::Client client;
 40 | 
 41 |     Backend(Backend&&) = default;
 42 |     Backend(const Backend&) = delete;
 43 |     // Convince STL to use the move constructor.
 44 |   };
 45 | 
 46 |   std::map<uint64_t, Backend> backends;
 47 |   std::map<uint64_t, Backend>::iterator next;
 48 |   kj::ForkedPromise<void> readyPromise;
 49 |   kj::Own<kj::PromiseFulfiller<void>> readyFulfiller;
 50 | 
 51 |   explicit BackendSetBase(kj::PromiseFulfillerPair<void> paf);
 52 | };
 53 | 
 54 | template <typename T>
 55 | class BackendSetImpl: public BackendSet<T>::Server, public kj::Refcounted {
 56 | public:
 57 |   typename T::Client chooseOne() { return base.chooseOne().template castAs<T>(); }
 58 |   // Choose a capability from the set and return it, cycling through the set every time this
 59 |   // method is called. If the backend set is empty, return a promise that resolves once a backend
 60 |   // is available.
 61 |   //
 62 |   // TODO(someady): Would be nice to build in disconnect handling here, e.g. pass in a callback
 63 |   //   function that initiates the work, catches exceptions and retries with a different back-end.
 64 | 
 65 | protected:
 66 |   typedef typename BackendSet<T>::Server Interface;
 67 |   kj::Promise<void> reset(typename Interface::ResetContext context) {
 68 |     base.clear();
 69 |     for (auto backend: context.getParams().getBackends()) {
 70 |       base.add(backend.getId(), backend.getBackend());
 71 |     }
 72 |     return kj::READY_NOW;
 73 |   }
 74 |   kj::Promise<void> add(typename Interface::AddContext context) {
 75 |     auto params = context.getParams();
 76 |     base.add(params.getId(), params.getBackend());
 77 |     return kj::READY_NOW;
 78 |   }
 79 |   kj::Promise<void> remove(typename Interface::RemoveContext context) {
 80 |     base.remove(context.getParams().getId());
 81 |     return kj::READY_NOW;
 82 |   }
 83 | 
 84 | private:
 85 |   BackendSetBase base;
 86 | };
 87 | 
 88 | // =======================================================================================
 89 | 
 90 | class BackendSetFeederBase: private kj::TaskSet::ErrorHandler {
 91 | public:
 92 |   explicit BackendSetFeederBase(uint minCount): minCount(minCount), tasks(*this) {}
 93 |   KJ_DISALLOW_COPY(BackendSetFeederBase);
 94 | 
 95 |   class Registration {
 96 |   public:
 97 |     virtual ~Registration() noexcept(false);
 98 |   };
 99 | 
100 |   kj::Own<Registration> addBackend(capnp::Capability::Client cap);
101 |   kj::Own<Registration> addConsumer(BackendSet<>::Client set);
102 | 
103 | private:
104 |   class BackendRegistration;
105 |   class ConsumerRegistration;
106 | 
107 |   uint minCount;
108 |   bool ready = minCount == 0;  // Becomes true when minCount backends are first available.
109 |   uint64_t backendCount = 0;
110 |   uint64_t nextId = 0;
111 |   BackendRegistration* backendsHead = nullptr;
112 |   BackendRegistration** backendsTail = &backendsHead;
113 |   ConsumerRegistration* consumersHead = nullptr;
114 |   ConsumerRegistration** consumersTail = &consumersHead;
115 |   kj::TaskSet tasks;
116 | 
117 |   void taskFailed(kj::Exception&& exception) override;
118 | };
119 | 
120 | template <typename T>
121 | class BackendSetFeeder final: public BackendSetFeederBase {
122 |   // Manages the process of maintaining BackendSets.
123 |   //
124 |   // A BackendSetFeeder is created by the master machine for each kind of load-balanced set. For
125 |   // example, there is a BackendSetFeeder for StorageRoots. Each StorageRoot capability is added
126 |   // using addBackend(), then each BackendSet which needs to be populated by StorageRoots (e.g.
127 |   // the front-end) is added using addConsumer().
128 | 
129 | public:
130 |   explicit BackendSetFeeder(uint minCount)
131 |       : BackendSetFeederBase(minCount) {}
132 |   // The feeder will wait until at least minBackendCount backends have been added before it
133 |   // initializes any consumers. This prevents flooding the first machine in a set with traffic
134 |   // while the others are still coming online.
135 | 
136 |   using BackendSetFeederBase::Registration;
137 | 
138 |   kj::Own<Registration> addBackend(typename T::Client cap) KJ_WARN_UNUSED_RESULT {
139 |     // Inserts this capability into all consumer sets. When the returned Backend is dropped
140 |     // (indicating that the back-end has disconnected), removes the capability from all consumer
141 |     // sets.
142 |     return BackendSetFeederBase::addBackend(kj::mv(cap));
143 |   }
144 | 
145 |   kj::Own<Registration> addConsumer(typename BackendSet<T>::Client set) KJ_WARN_UNUSED_RESULT {
146 |     // Inserts all backends into this consumer. When the returned Consumer is dropped (indicating
147 |     // that it has disconnected), stops updating it.
148 |     return BackendSetFeederBase::addConsumer(set.template asGeneric<>());
149 |   }
150 | };
151 | 
152 | } // namespace blackrock
153 | 
154 | #endif // BLACKROCK_BACKEND_SET_H_
155 | 


--------------------------------------------------------------------------------
/src/blackrock/machine.capnp:
--------------------------------------------------------------------------------
  1 | # Sandstorm Blackrock
  2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | # All Rights Reserved
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | @0x96022888188b4f2f;
 18 | 
 19 | $import "/capnp/c++.capnp".namespace("blackrock");
 20 | 
 21 | using ClusterRpc = import "cluster-rpc.capnp";
 22 | using Storage = import "storage.capnp";
 23 | using StorageSchema = import "storage-schema.capnp";
 24 | using Worker = import "worker.capnp";
 25 | using Frontend = import "frontend.capnp";
 26 | using Util = import "/sandstorm/util.capnp";
 27 | 
 28 | using VatId = ClusterRpc.VatId;
 29 | using Address = ClusterRpc.Address;
 30 | using SturdyRef = ClusterRpc.SturdyRef;
 31 | using Restorer = ClusterRpc.Restorer;
 32 | using BackendSet = ClusterRpc.BackendSet;
 33 | 
 34 | interface MasterRestorer(Ref) {
 35 |   # Represents a Restorer that can restore capabilities for any owner. This capability should only
 36 |   # be given to the cluster master, which must then attenuate it for specific owners before passing
 37 |   # it on to said owners.
 38 | 
 39 |   getForOwner @0 (domain :SturdyRef.Owner) -> (attenuated :Restorer(Ref));
 40 | }
 41 | 
 42 | interface Gateway {
 43 |   # Gateway machines bridge between the cluster and the external network (usually the internet).
 44 |   # They bridge between different parameterizations of Cap'n Proto, serve as a firewall, and
 45 |   # provide a way for internal apps to make external requests which are explicitly prevented from
 46 |   # accessing internal machines (e.g. if an app requests to connect to some IP, we need to make
 47 |   # sure that IP is on the public internet, not internal; the best way to do that is to make
 48 |   # sure the connection is formed using a public network interface that can't even route to
 49 |   # internal IPs in the first place).
 50 |   #
 51 |   # On a more practical note, Gateway machines also accept HTTP traffic from the public internet,
 52 |   # which they may forward to frontend machines or directly to grains.
 53 | 
 54 |   # TODO(soon): Methods for:
 55 |   # - Sending / receiving general internet traffic. (In-cluster traffic is NOT permitted.)
 56 |   # - Making and accepting external Cap'n Proto connections and bridging those capabilities into
 57 |   #   the fold.
 58 | 
 59 |   # TODO(cleanup): Move to its own file.
 60 | }
 61 | 
 62 | interface GatewayImplBase extends(Gateway, BackendSet(Frontend.Frontend)) {}
 63 | # Implementation detail. TODO(cleanup): Put this somewhere private.
 64 | 
 65 | interface Machine {
 66 |   # A machine, ready to serve.
 67 |   #
 68 |   # When a new machine is added to the cluster, its Machine capability is given to the cluster
 69 |   # master via an appropriately secure mechanism. Only the master should ever hold this capability.
 70 |   #
 71 |   # The master will call the methods below in order to tell the machine what it should do. Multiple
 72 |   # become*() method can be called to make the machine serve multiple purposes. Calling the same
 73 |   # become*() method twice, however, only updates the existing instance of that role and returns
 74 |   # the same capabilities as before.
 75 |   #
 76 |   # This interface is intentionally designed such that the master machine can perform its duties
 77 |   # without ever actually parsing any of the response messages. Everything the master does --
 78 |   # introducing machines to each other -- can be expressed via pipelining. This implies that it is
 79 |   # not possible to confuse or compromise the master machine by sending it weird messages. In the
 80 |   # future we could even literally extend the VatNetwork to discard incoming messages.
 81 | 
 82 |   becomeStorage @0 ()
 83 |                 -> (sibling :Storage.StorageSibling,
 84 |                     rootSet :Storage.StorageRootSet,
 85 |                     storageRestorer :MasterRestorer(SturdyRef.Stored),
 86 |                     storageFactory :Storage.StorageFactory,
 87 |                     siblingSet: BackendSet(Storage.StorageSibling),
 88 |                     hostedRestorerSet: BackendSet(Restorer(SturdyRef.Hosted)),
 89 |                     gatewayRestorerSet: BackendSet(Restorer(SturdyRef.External)));
 90 |   becomeWorker @1 () -> (worker :Worker.Worker);
 91 |   becomeCoordinator @2 ()
 92 |                     -> (coordinator :Worker.Coordinator,
 93 |                         hostedRestorer :MasterRestorer(SturdyRef.Hosted),
 94 |                         workerSet :BackendSet(Worker.Worker),
 95 |                         storageRestorerSet :BackendSet(Restorer(SturdyRef.Stored)));
 96 |   becomeGateway @3 (config :Frontend.FrontendConfig)
 97 |                 -> (gateway :Gateway,
 98 |                     frontends :BackendSet(Frontend.Frontend));
 99 |   becomeFrontend @4 (config :Frontend.FrontendConfig, replicaNumber :UInt32)
100 |                  -> (frontend :Frontend.Frontend,
101 |                      storageRestorerSet :BackendSet(Restorer(SturdyRef.Stored)),
102 |                      storageRootSet :BackendSet(Storage.StorageRootSet),
103 |                      storageFactorySet :BackendSet(Storage.StorageFactory),
104 |                      hostedRestorerSet :BackendSet(Restorer(SturdyRef.Hosted)),
105 |                      workerSet :BackendSet(Worker.Worker),  # `workerSet` is temporary
106 |                      mongoSet :BackendSet(Frontend.Mongo));
107 |   becomeMongo @6 () -> (mongo :Frontend.Mongo);
108 | 
109 |   shutdown @5 ();
110 |   # Do whatever is necessary to prepare this machine for safe shutdown. Do not return until it's
111 |   # safe.
112 | 
113 |   ping @7 (hang :Bool = false);
114 |   # Returns immediately if `hang` is false, or never returns if `hang` is true. The master uses
115 |   # both modes to detect machine death: a hanging ping() should throw an exception the moment the
116 |   # connection dies, but periodic non-hanging ping()s are also used to verify that the connection
117 |   # hasn't silently failed.
118 | }
119 | 


--------------------------------------------------------------------------------
/src/blackrock/nbd-test-loopback.c++:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #include "nbd-bridge.h"
 18 | #include "fs-storage.h"
 19 | #include <kj/main.h>
 20 | #include <kj/async-io.h>
 21 | #include <kj/async-unix.h>
 22 | #include <sys/types.h>
 23 | #include <sys/wait.h>
 24 | #include <sys/socket.h>
 25 | #include <errno.h>
 26 | #include <signal.h>
 27 | #include <sandstorm/util.h>
 28 | #include <unistd.h>
 29 | #include <sched.h>
 30 | #include <sys/eventfd.h>
 31 | #include <sys/prctl.h>
 32 | #include <sys/mount.h>
 33 | 
 34 | namespace blackrock {
 35 | namespace {
 36 | 
 37 | class NbdLoopbackMain {
 38 |   // A test program that mounts a FUSE filesystem that just mirrors some other directory.
 39 | 
 40 | public:
 41 |   NbdLoopbackMain(kj::ProcessContext& context): context(context) {}
 42 | 
 43 |   kj::MainFunc getMain() {
 44 |     return kj::MainBuilder(context, "Fuse test, unknown version",
 45 |           "Creates a Sandstore at <source-dir> containing a single Volume, then mounts that "
 46 |           "volume at <mount-point>.")
 47 |         .addOptionWithArg({'o', "options"}, KJ_BIND_METHOD(*this, setOptions), "<options>",
 48 |                           "Set mount options.")
 49 |         .addOption({'r', "reset"}, KJ_BIND_METHOD(*this, reset),
 50 |                    "Reset all nbd devices, hopefully killing any processes blocked on them.")
 51 |         .expectArg("<mount-point>", KJ_BIND_METHOD(*this, setMountPoint))
 52 |         .expectArg("<soure-dir>", KJ_BIND_METHOD(*this, setStorageDir))
 53 |         .expectOneOrMoreArgs("<command>", KJ_BIND_METHOD(*this, addCommandArg))
 54 |         .callAfterParsing(KJ_BIND_METHOD(*this, run))
 55 |         .build();
 56 |   }
 57 | 
 58 | private:
 59 |   kj::ProcessContext& context;
 60 |   kj::StringPtr options;
 61 |   kj::StringPtr mountPoint;
 62 |   kj::AutoCloseFd storageDir;
 63 |   kj::Vector<kj::StringPtr> command;
 64 | 
 65 |   kj::MainBuilder::Validity setOptions(kj::StringPtr arg) {
 66 |     options = arg;
 67 |     return true;
 68 |   }
 69 | 
 70 |   kj::MainBuilder::Validity setMountPoint(kj::StringPtr arg) {
 71 |     mountPoint = arg;
 72 |     return true;
 73 |   }
 74 | 
 75 |   kj::MainBuilder::Validity setStorageDir(kj::StringPtr arg) {
 76 |     storageDir = sandstorm::raiiOpen(arg.cStr(), O_RDONLY | O_DIRECTORY | O_CLOEXEC);
 77 |     return true;
 78 |   }
 79 | 
 80 |   kj::MainBuilder::Validity addCommandArg(kj::StringPtr arg) {
 81 |     command.add(arg);
 82 |     return true;
 83 |   }
 84 | 
 85 |   kj::MainBuilder::Validity reset() {
 86 |     NbdDevice::resetAll();
 87 |     context.exit();
 88 |   }
 89 | 
 90 |   kj::MainBuilder::Validity run() {
 91 |     KJ_SYSCALL(unshare(CLONE_NEWNS), "are you root?");
 92 |     KJ_SYSCALL(mount("none", "/", nullptr, MS_REC | MS_PRIVATE, nullptr));
 93 | 
 94 |     NbdDevice::loadKernelModule();
 95 |     bool isNew = faccessat(storageDir, "roots/root", F_OK, 0) < 0;
 96 | 
 97 |     int pair[2];
 98 |     socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0, pair);
 99 |     kj::AutoCloseFd kernelEnd(pair[0]);
100 |     kj::AutoCloseFd userEnd(pair[1]);
101 | 
102 |     kj::AutoCloseFd abortEvent = newEventFd(0, EFD_CLOEXEC | EFD_NONBLOCK);
103 | 
104 |     kj::Thread serverThread([&]() {
105 |       KJ_IF_MAYBE(exception, kj::runCatchingExceptions([&]() {
106 |         auto io = kj::setupAsyncIo();
107 |         StorageRootSet::Client storage = kj::heap<FilesystemStorage>(
108 |             storageDir, io.unixEventPort,
109 |             io.provider->getTimer(), nullptr);
110 | 
111 |         auto factory = storage.getFactoryRequest().send().getFactory();
112 |         OwnedVolume::Client volume = nullptr;
113 | 
114 |         if (isNew) {
115 |           volume = factory.newVolumeRequest().send().getVolume();
116 |           auto req2 = factory.newAssignableRequest<OwnedVolume>();
117 |           req2.setInitialValue(volume);
118 | 
119 |           auto req3 = storage.setRequest<Assignable<OwnedVolume>>();
120 |           req3.setName("root");
121 |           req3.setObject(req2.send().getAssignable());
122 | 
123 |           req3.send().wait(io.waitScope);
124 |         } else {
125 |           auto req = storage.getRequest<Assignable<OwnedVolume>>();
126 |           req.setName("root");
127 |           volume = req.send().getObject().castAs<OwnedAssignable<OwnedVolume>>()
128 |               .getRequest().send().getValue();
129 |         }
130 | 
131 |         kj::UnixEventPort::FdObserver cancelObserver(io.unixEventPort, abortEvent,
132 |             kj::UnixEventPort::FdObserver::OBSERVE_READ);
133 | 
134 |         NbdVolumeAdapter volumeAdapter(
135 |             io.lowLevelProvider->wrapSocketFd(userEnd,
136 |                 kj::LowLevelAsyncIoProvider::ALREADY_CLOEXEC |
137 |                 kj::LowLevelAsyncIoProvider::ALREADY_NONBLOCK),
138 |             kj::mv(volume), NbdAccessType::READ_WRITE);
139 |         volumeAdapter.run().exclusiveJoin(cancelObserver.whenBecomesReadable())
140 |             .wait(io.waitScope);
141 |       })) {
142 |         KJ_LOG(FATAL, "nbd server threw exception", *exception);
143 |       }
144 |     });
145 | 
146 |     // Ensure thread gets canceled before its destructor is called.
147 |     KJ_ON_SCOPE_FAILURE(writeEvent(abortEvent, 1));
148 | 
149 |     NbdDevice device;
150 |     context.warning(kj::str("using: ", device.getPath()));
151 | 
152 |     NbdBinding binding(device, kj::mv(kernelEnd), NbdAccessType::READ_WRITE);
153 |     KJ_DEFER(context.warning("unbinding..."));
154 | 
155 |     if (isNew) {
156 |       context.warning("formatting...");
157 |       device.format();
158 |     }
159 | 
160 |     context.warning("mounting...");
161 |     KJ_DEFER(device.trimJournalIfClean());
162 |     Mount mount(device.getPath(), mountPoint, 0, options);
163 |     KJ_DEFER(context.warning("unmounting..."));
164 | 
165 |     KJ_SYSCALL(unshare(CLONE_NEWPID));
166 | 
167 |     sandstorm::Subprocess(sandstorm::Subprocess::Options(command)).waitForSuccess();
168 | 
169 |     return true;
170 |   }
171 | };
172 | 
173 | }  // namespace
174 | }  // namespace blackrock
175 | 
176 | KJ_MAIN(blackrock::NbdLoopbackMain);
177 | 


--------------------------------------------------------------------------------
/src/blackrock/bundle.c++:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #include "bundle.h"
 18 | #include <unistd.h>
 19 | #include <sched.h>
 20 | #include <syscall.h>
 21 | #include <sys/mount.h>
 22 | #include <errno.h>
 23 | #include <limits.h>
 24 | #include <sys/sendfile.h>
 25 | #include <sys/signal.h>
 26 | #include <sys/types.h>
 27 | #include <grp.h>
 28 | #include <sandstorm/util.h>
 29 | #include <sandstorm/spk.h>
 30 | #include <stdlib.h>
 31 | 
 32 | namespace blackrock {
 33 | 
 34 | #define BUNDLE_PATH "/blackrock/bundle"
 35 | 
 36 | void createSandstormDirectories() {
 37 |   kj::StringPtr paths[] = {
 38 |     "/var/blackrock",
 39 |     "/var/blackrock/bundle",
 40 |     "/var/blackrock/bundle/sandstorm",
 41 |     "/var/blackrock/bundle/sandstorm/socket",
 42 |     "/var/blackrock/bundle/mongo",
 43 |     "/var/blackrock/bundle/log",
 44 |     "/var/blackrock/bundle/pid",
 45 |     "/tmp/blackrock-bundle"
 46 |   };
 47 | 
 48 |   if (access("/tmp/blackrock-bundle", F_OK) >= 0) {
 49 |     sandstorm::recursivelyDelete("/tmp/blackrock-bundle");
 50 |   }
 51 |   for (auto path: paths) {
 52 |     mkdir(path.cStr(), (path.startsWith("/tmp/") ? S_ISVTX | 0770 : 0750));
 53 |     KJ_SYSCALL(chown(path.cStr(), 1000, 1000));
 54 |   }
 55 | }
 56 | 
 57 | void enterSandstormBundle() {
 58 |   // Set up a small sandbox located inside the Sandstorm (i.e. non-Blackrock) bundle, for running
 59 |   // things like the front-end and Mongo.
 60 |   //
 61 |   // TODO(cleanup): Extend Subprocess to support a lot of these things?
 62 | 
 63 |   // Enter mount namespace so that we can bind stuff in.
 64 |   KJ_SYSCALL(unshare(CLONE_NEWNS));
 65 | 
 66 |   KJ_SYSCALL(chdir(BUNDLE_PATH));
 67 | 
 68 |   // To really unshare the mount namespace, we also have to make sure all mounts are private.
 69 |   // The parameters here were derived by strace'ing `mount --make-rprivate /`.  AFAICT the flags
 70 |   // are undocumented.  :(
 71 |   KJ_SYSCALL(mount("none", "/", nullptr, MS_REC | MS_PRIVATE, nullptr));
 72 | 
 73 |   // Make sure that the current directory is a mount point so that we can use pivot_root.
 74 |   KJ_SYSCALL(mount(".", ".", nullptr, MS_BIND | MS_REC, nullptr));
 75 | 
 76 |   // Now change directory into the new mount point.
 77 |   char cwdBuf[PATH_MAX + 1];
 78 |   if (getcwd(cwdBuf, sizeof(cwdBuf)) == nullptr) {
 79 |     KJ_FAIL_SYSCALL("getcwd", errno);
 80 |   }
 81 |   KJ_SYSCALL(chdir(cwdBuf));
 82 | 
 83 |   // Bind /proc for the global pid namespace in the chroot.
 84 |   KJ_SYSCALL(mount("/proc", "proc", nullptr, MS_BIND | MS_REC, nullptr));
 85 | 
 86 |   // Bind /var and /tmp.
 87 |   KJ_SYSCALL(mount("/tmp/blackrock-bundle", "tmp", nullptr, MS_BIND, nullptr));
 88 |   KJ_SYSCALL(mount("/var/blackrock/bundle", "var", nullptr, MS_BIND, nullptr));
 89 | 
 90 |   // Bind desired devices from /dev into our chroot environment.
 91 |   KJ_SYSCALL(mount("/dev/null", "dev/null", nullptr, MS_BIND, nullptr));
 92 |   KJ_SYSCALL(mount("/dev/zero", "dev/zero", nullptr, MS_BIND, nullptr));
 93 |   KJ_SYSCALL(mount("/dev/random", "dev/random", nullptr, MS_BIND, nullptr));
 94 |   KJ_SYSCALL(mount("/dev/urandom", "dev/urandom", nullptr, MS_BIND, nullptr));
 95 | 
 96 |   // Mount a tmpfs at /etc and copy over necessary config files from the host.
 97 |   // Note that unlike regular Sandstorm, we don't bother bind-mounting in the host etc, because
 98 |   // we don't expect to have to deal with dynamic network configs.
 99 |   KJ_SYSCALL(mount("tmpfs", "etc", "tmpfs", MS_NOSUID | MS_NOEXEC,
100 |                    kj::str("size=2m,nr_inodes=128,mode=755,uid=0,gid=0").cStr()));
101 |   {
102 |     auto files = sandstorm::splitLines(sandstorm::readAll("host.list"));
103 | 
104 |     // Now copy over each file.
105 |     for (auto& file: files) {
106 |       if (access(file.cStr(), R_OK) == 0 && !sandstorm::isDirectory(file)) {
107 |         auto in = sandstorm::raiiOpen(file, O_RDONLY);
108 |         auto out = sandstorm::raiiOpen(kj::str(".", file), O_WRONLY | O_CREAT | O_EXCL);
109 |         ssize_t n;
110 |         do {
111 |           KJ_SYSCALL(n = sendfile(out, in, nullptr, 1 << 20));
112 |         } while (n > 0);
113 |       }
114 |     }
115 |   }
116 | 
117 |   // pivot_root into the frontend dir. (This is just a fancy more-secure chroot.)
118 |   KJ_SYSCALL(syscall(SYS_pivot_root, ".", "tmp"));
119 |   KJ_SYSCALL(chdir("/"));
120 |   KJ_SYSCALL(umount2("tmp", MNT_DETACH));
121 | 
122 |   // Drop privileges. Since we own the machine we can choose any UID, just don't want it to be 0.
123 |   KJ_SYSCALL(setresgid(1000, 1000, 1000));
124 |   KJ_SYSCALL(setgroups(0, nullptr));
125 |   KJ_SYSCALL(setresuid(1000, 1000, 1000));
126 | 
127 |   // Clear signal mask. Not strictly a sandboxing measure, just cleanup.
128 |   // TODO(cleanup): We should probably discard any signals in this mask which are currently pending
129 |   //   before we unblock them. We should probably fix this in Sandstorm as well.
130 |   sigset_t sigset;
131 |   KJ_SYSCALL(sigemptyset(&sigset));
132 |   KJ_SYSCALL(sigprocmask(SIG_SETMASK, &sigset, nullptr));
133 | 
134 |   // The environment inherited from the host is probably no good for us. E.g. an oddball
135 |   // locale setting can crash Mongo because we don't have the appropriate locale files available.
136 |   KJ_SYSCALL(clearenv());
137 | 
138 |   // Set up an environment appropriate for us.
139 |   KJ_SYSCALL(setenv("LANG", "C.UTF-8", true));
140 |   KJ_SYSCALL(setenv("PATH", "/usr/bin:/bin", true));
141 |   KJ_SYSCALL(setenv("LD_LIBRARY_PATH", "/usr/local/lib:/usr/lib:/lib", true));
142 | }
143 | 
144 | kj::Maybe<kj::String> checkPgpSignatureInBundle(
145 |     kj::StringPtr appIdString, sandstorm::spk::Metadata::Reader metadata) {
146 |   createSandstormDirectories();
147 | 
148 |   auto pipe = sandstorm::Pipe::make();
149 | 
150 |   sandstorm::Subprocess child([&]() -> int {
151 |     enterSandstormBundle();
152 | 
153 |     pipe.readEnd = nullptr;
154 | 
155 |     KJ_IF_MAYBE(s, sandstorm::checkPgpSignature(appIdString, metadata)) {
156 |       kj::FdOutputStream(pipe.writeEnd.get()).write(s->begin(), s->size());
157 |     }
158 | 
159 |     return 0;
160 |   });
161 | 
162 |   pipe.writeEnd = nullptr;
163 |   kj::String result = sandstorm::readAll(pipe.readEnd);
164 | 
165 |   child.waitForSuccess();
166 |   if (result == nullptr) {
167 |     return nullptr;
168 |   } else {
169 |     return kj::mv(result);
170 |   }
171 | }
172 | 
173 | } // namespace blackrock
174 | 
175 | 


--------------------------------------------------------------------------------
/src/blackrock/backend-set.c++:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #include "backend-set.h"
 18 | #include <kj/debug.h>
 19 | 
 20 | namespace blackrock {
 21 | 
 22 | BackendSetBase::BackendSetBase(kj::PromiseFulfillerPair<void> paf)
 23 |     : next(backends.end()),
 24 |       readyPromise(paf.promise.fork()),
 25 |       readyFulfiller(kj::mv(paf.fulfiller)) {}
 26 | BackendSetBase::~BackendSetBase() noexcept(false) {}
 27 | 
 28 | capnp::Capability::Client BackendSetBase::chooseOne() {
 29 |   if (backends.empty()) {
 30 |     return readyPromise.addBranch().then([this]() {
 31 |       return chooseOne();
 32 |     });
 33 |   } else {
 34 |     if (next == backends.end()) {
 35 |       next = backends.begin();
 36 |     }
 37 | 
 38 |     return (next++)->second.client;
 39 |   }
 40 | }
 41 | 
 42 | void BackendSetBase::clear() {
 43 |   backends.clear();
 44 | }
 45 | 
 46 | void BackendSetBase::add(uint64_t id, capnp::Capability::Client client) {
 47 |   if (backends.empty()) {
 48 |     readyFulfiller->fulfill();
 49 |   }
 50 | 
 51 |   backends.insert(std::make_pair(id, Backend { kj::mv(client) }));
 52 | }
 53 | 
 54 | void BackendSetBase::remove(uint64_t id) {
 55 |   if (next != backends.end() && next->first == id) {
 56 |     ++next;
 57 |   }
 58 |   backends.erase(id);
 59 | 
 60 |   if (backends.empty()) {
 61 |     auto paf = kj::newPromiseAndFulfiller<void>();
 62 |     readyPromise = paf.promise.fork();
 63 |     readyFulfiller = kj::mv(paf.fulfiller);
 64 |   }
 65 | }
 66 | 
 67 | // =======================================================================================
 68 | 
 69 | class BackendSetFeederBase::ConsumerRegistration final: public Registration {
 70 | public:
 71 |   ConsumerRegistration(BackendSetFeederBase& feeder, BackendSet<>::Client set);
 72 | 
 73 |   ~ConsumerRegistration() noexcept(false);
 74 | 
 75 | private:
 76 |   friend class BackendSetFeederBase;
 77 | 
 78 |   BackendSetFeederBase& feeder;
 79 |   BackendSet<>::Client set;
 80 |   ConsumerRegistration* next;
 81 |   ConsumerRegistration** prev;
 82 | 
 83 |   void init();
 84 | };
 85 | 
 86 | class BackendSetFeederBase::BackendRegistration final: public Registration {
 87 | public:
 88 |   BackendRegistration(BackendSetFeederBase& feeder, capnp::Capability::Client cap);
 89 | 
 90 |   ~BackendRegistration() noexcept(false);
 91 | 
 92 | private:
 93 |   friend class BackendSetFeederBase;
 94 | 
 95 |   BackendSetFeederBase& feeder;
 96 |   uint64_t id;
 97 |   capnp::Capability::Client cap;
 98 |   BackendRegistration* next;
 99 |   BackendRegistration** prev;
100 | };
101 | 
102 | auto BackendSetFeederBase::addBackend(capnp::Capability::Client cap) -> kj::Own<Registration> {
103 |   auto result = kj::heap<BackendRegistration>(*this, kj::mv(cap));
104 | 
105 |   if (ready) {
106 |     // Consumers are already initialized. Add the new backend to each one.
107 |     for (ConsumerRegistration* consumer = consumersHead; consumer != nullptr;
108 |          consumer = consumer->next) {
109 |       tasks.add(kj::evalNow([&]() {
110 |         auto req = consumer->set.addRequest(capnp::MessageSize {4, 0});
111 |         req.setId(result->id);
112 |         req.getBackend().setAs<capnp::Capability>(result->cap);
113 |         return req.send().then([](auto&&) {});
114 |       }));
115 |     }
116 |   } else if (backendCount >= minCount) {
117 |     // We have enough backends to initialize all consumers.
118 |     ready = true;
119 |     for (ConsumerRegistration* consumer = consumersHead; consumer != nullptr;
120 |          consumer = consumer->next) {
121 |       consumer->init();
122 |     }
123 |   }
124 | 
125 |   return kj::mv(result);
126 | }
127 | 
128 | auto BackendSetFeederBase::addConsumer(BackendSet<>::Client set) -> kj::Own<Registration> {
129 |   auto result = kj::heap<ConsumerRegistration>(*this, kj::mv(set));
130 | 
131 |   if (ready) {
132 |     // We already have all the backends we need, so go ahead and initialize the consumer.
133 |     result->init();
134 |   }
135 | 
136 |   return kj::mv(result);
137 | }
138 | 
139 | void BackendSetFeederBase::taskFailed(kj::Exception&& exception) {
140 |   KJ_LOG(ERROR, exception);
141 | }
142 | 
143 | BackendSetFeederBase::Registration::~Registration() noexcept(false) {}
144 | 
145 | BackendSetFeederBase::ConsumerRegistration::ConsumerRegistration(
146 |     BackendSetFeederBase& feeder, BackendSet<>::Client set)
147 |     : feeder(feeder), set(kj::mv(set)),
148 |       next(nullptr), prev(feeder.consumersTail) {
149 |   *feeder.consumersTail = this;
150 |   feeder.consumersTail = &next;
151 | }
152 | 
153 | BackendSetFeederBase::ConsumerRegistration::~ConsumerRegistration() noexcept(false) {
154 |   if (next == nullptr) {
155 |     feeder.consumersTail = prev;
156 |   } else {
157 |     next->prev = prev;
158 |   }
159 |   *prev = next;
160 | }
161 | 
162 | void BackendSetFeederBase::ConsumerRegistration::init() {
163 |   auto req = set.resetRequest();
164 |   auto list = req.initBackends(feeder.backendCount);
165 |   uint i = 0;
166 |   for (BackendRegistration* backend = feeder.backendsHead; backend != nullptr;
167 |        backend = backend->next) {
168 |     auto element = list[i++];
169 |     element.setId(backend->id);
170 |     element.getBackend().setAs<capnp::Capability>(backend->cap);
171 |   }
172 |   feeder.tasks.add(req.send().then([](auto&&) {}));
173 | }
174 | 
175 | BackendSetFeederBase::BackendRegistration::BackendRegistration(
176 |     BackendSetFeederBase& feeder, capnp::Capability::Client cap)
177 |     : feeder(feeder), id(feeder.nextId++), cap(kj::mv(cap)),
178 |       next(nullptr), prev(feeder.backendsTail) {
179 |   *feeder.backendsTail = this;
180 |   feeder.backendsTail = &next;
181 |   ++feeder.backendCount;
182 | }
183 | 
184 | BackendSetFeederBase::BackendRegistration::~BackendRegistration() noexcept(false) {
185 |   --feeder.backendCount;
186 |   if (next == nullptr) {
187 |     feeder.backendsTail = prev;
188 |   } else {
189 |     next->prev = prev;
190 |   }
191 |   *prev = next;
192 | 
193 |   // Remove from all consumers.
194 |   for (ConsumerRegistration* consumer = feeder.consumersHead; consumer != nullptr;
195 |        consumer = consumer->next) {
196 |     feeder.tasks.add(kj::evalNow([&]() {
197 |       auto req = consumer->set.removeRequest(capnp::MessageSize {4, 0});
198 |       req.setId(id);
199 |       return req.send().then([](auto&&) {});
200 |     }));
201 |   }
202 | }
203 | 
204 | } // namespace blackrock
205 | 
206 | 


--------------------------------------------------------------------------------
/src/blackrock/nbd-bridge.h:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #ifndef BLACKROCK_NBD_BRIDGE_H_
 18 | #define BLACKROCK_NBD_BRIDGE_H_
 19 | 
 20 | #include "common.h"
 21 | #include <kj/string.h>
 22 | #include <kj/async-io.h>
 23 | #include <blackrock/storage.capnp.h>
 24 | #include <linux/nbd.h>
 25 | 
 26 | namespace blackrock {
 27 | 
 28 | enum class NbdAccessType {
 29 |   READ_ONLY,
 30 |   READ_WRITE
 31 | };
 32 | 
 33 | class NbdVolumeAdapter: private kj::TaskSet::ErrorHandler {
 34 |   // Implements the NBD protocol in terms of `Volume`.
 35 | public:
 36 |   NbdVolumeAdapter(kj::Own<kj::AsyncIoStream> socket, Volume::Client volume,
 37 |                    NbdAccessType access);
 38 |   // NBD requests are read from `socket` and implemented via `volume`.
 39 | 
 40 |   void updateVolume(Volume::Client newVolume);
 41 |   // Replaces the Volume capability with a new one, which must point to the exact same volume.
 42 |   // Useful for recovering after disconnects, if the driver hasn't noticed the disconnect yet.
 43 | 
 44 |   kj::Promise<void> run();
 45 |   // Actually runs the loop. The promise resolves successfully when the device has been shut down.
 46 |   // It is extremely important to wait for this before destroying the NbdVolumeAdapter; failure
 47 |   // to do so can leave the kernel in an unhappy state.
 48 | 
 49 |   kj::Promise<void> onDisconnected() { return kj::mv(disconnectedPaf.promise); }
 50 |   // Resolves if the underlying volume becomes disconnected, in which case it's time to force-kill
 51 |   // everything using it. Can only be called once.
 52 | 
 53 | private:
 54 |   kj::Own<kj::AsyncIoStream> socket;
 55 |   Volume::Client volume;
 56 |   kj::PromiseFulfillerPair<void> disconnectedPaf;
 57 |   NbdAccessType access;
 58 |   bool disconnected = false;
 59 |   kj::TaskSet tasks;
 60 | 
 61 |   kj::Promise<void> replyQueue = kj::READY_NOW;
 62 |   // Promise for completion of previous write() operation to handle.socket.
 63 |   //
 64 |   // Becomes null when the run loop completes.
 65 |   //
 66 |   // TODO(someday): When overlapping write()s are supported by AsyncIoStream, simplify this.
 67 | 
 68 |   struct nbd_request request;
 69 |   // We only read one of these at a time, so might as well allocate it here.
 70 | 
 71 |   struct RequestHandle;
 72 |   struct ReplyAndIovec;
 73 |   void reply(RequestHandle reqHandle, int error = 0);
 74 |   void replyError(RequestHandle reqHandle, kj::Exception&& exception, const char* op);
 75 |   void taskFailed(kj::Exception&& exception) override;
 76 | };
 77 | 
 78 | class NbdDevice {
 79 |   // Represents a claim to a specific `/dev/nbdX` device node.
 80 | 
 81 | public:
 82 |   NbdDevice();
 83 |   // Claims an unused NBD device and binds it to the given socket. (The other end of the socket
 84 |   // pair should be passed to `NbdVolumeAdapter`.)
 85 | 
 86 |   explicit NbdDevice(uint number);
 87 |   // Explicitly claim a specific device number. For debugging purposes only!
 88 | 
 89 |   kj::StringPtr getPath() { return path; }
 90 |   // E.g. "/dev/nbd12".
 91 | 
 92 |   int getFd() { return fd; }
 93 | 
 94 |   void format();
 95 |   // Format the device as an ext4 filesystem with an initial size of 8GB. This is accomplished by
 96 |   // simply writing a template image directly to the disk, so format() will result in exactly the
 97 |   // same disk image every time.
 98 | 
 99 |   void trimJournalIfClean();
100 |   // Verify that the journal is currently clean, and then TRIM it. Call immediately after a clean
101 |   // unmount to reduce disk usage. (The journal normally doesn't get TRIMed even when the contents
102 |   // have already been committed. This seems to be a deficiency in the ext4 driver.)
103 | 
104 |   void fixSurpriseFeatures();
105 |   // Check if this volume has the "surprise features" of 64bit and metadata_checksum enabled. If
106 |   // so, fix the situation. The surprise features were accidentally enabled on many grains in
107 |   // production due to an unexpected change in /etc/mke2fs.conf landing in Debian Testing. Since
108 |   // we use mke2fs to create the zygote image at compile time, these features ended up enabled
109 |   // in production. The metadata_checksum feature is buggy on our older production kernels, and
110 |   // the 64bit option breaks trimJournalIfClean() (we could fix that, but the 64bit option is
111 |   // not helpful to us, so better to avoid having multiple code paths!).
112 |   //
113 |   // Note: This method runs subprocesses and may block. It CANNOT be run from the main worker
114 |   //   process!
115 | 
116 |   static void resetAll();
117 |   // Iterate through all the nbd devices and reset them, in order to un-block processes wedged
118 |   // trying to read disconnected devices.
119 |   //
120 |   // THIS WILL BREAK EVERYTHING CURRENTLY USING ANY NBD DEVICE.
121 | 
122 |   static void disconnectAll();
123 |   // Iterate through all the nbd devices and disconnect them, in an attempt to forcefully tear
124 |   // down a worker.
125 |   //
126 |   // THIS WILL BREAK EVERYTHING CURRENTLY USING ANY NBD DEVICE.
127 | 
128 |   static void loadKernelModule();
129 |   // Make sure the NBD kernel module is loaded.
130 | 
131 | private:
132 |   kj::String path;
133 |   kj::AutoCloseFd fd;
134 | };
135 | 
136 | class NbdBinding {
137 |   // Given an NBD device and a socket implementing the NBD protocol, makes the NBD device live and
138 |   // mountable.
139 |   //
140 |   // NbdBinding MUST NOT be used in the same thread that is running the NbdVolumeAdapter. This is
141 |   // because NbdBinding performs blocking system calls that will cause the kernel to issue reads
142 |   // and writes to the device, and will not return until those operations complete.
143 | 
144 | public:
145 |   NbdBinding(NbdDevice& device, kj::AutoCloseFd socket, NbdAccessType access);
146 |   // Binds the given NBD device to the given socket. (The other end of the socket pair should be
147 |   // passed to `NbdVolumeAdapter`.)
148 | 
149 |   ~NbdBinding() noexcept(false);
150 |   // Disconnects the binding.
151 | 
152 | private:
153 |   NbdDevice& device;
154 |   kj::Thread doItThread;
155 |   // Executes the NBD_DO_IT ioctl(), which runs the NBD device loop in the kernel, not returning
156 |   // until the device is disconnected.
157 | 
158 |   static NbdDevice& setup(NbdDevice& device, kj::AutoCloseFd socket, NbdAccessType access);
159 | };
160 | 
161 | class Mount {
162 |   // Mounts a device at a path. As with `NbdDevice`, `Mount` MUST NOT be used in the same thread
163 |   // that is executing the NbdVolumeAdapter implementing the device.
164 | 
165 | public:
166 |   Mount(kj::StringPtr devPath, kj::StringPtr mountPoint, uint64_t flags, kj::StringPtr options);
167 |   ~Mount() noexcept(false);
168 | 
169 | private:
170 |   kj::String mountPoint;
171 |   uint64_t flags;
172 | };
173 | 
174 | }  // namespace blackrock
175 | 
176 | #endif // BLACKROCK_NBD_BRIDGE_H_
177 | 


--------------------------------------------------------------------------------
/src/blackrock/worker.h:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #ifndef BLACKROCK_WORKER_H_
 18 | #define BLACKROCK_WORKER_H_
 19 | 
 20 | #include "common.h"
 21 | #include <blackrock/worker.capnp.h>
 22 | #include <kj/main.h>
 23 | #include <unordered_map>
 24 | #include <sandstorm/util.h>
 25 | #include <sandstorm/supervisor.h>
 26 | #include <kj/async-io.h>
 27 | #include "local-persistent-registry.h"
 28 | 
 29 | namespace kj {
 30 |   class Thread;
 31 | }
 32 | 
 33 | namespace blackrock {
 34 | 
 35 | class NbdVolumeAdapter;
 36 | 
 37 | struct ByteStringHash {
 38 |   inline size_t operator()(const kj::ArrayPtr<const byte>& token) const {
 39 |     size_t result = 0;
 40 |     memcpy(&result, token.begin(), kj::min(sizeof(result), token.size()));
 41 |     return result;
 42 |   }
 43 |   inline size_t operator()(const kj::ArrayPtr<const byte>& a,
 44 |                            const kj::ArrayPtr<const byte>& b) const {
 45 |     return a.size() == b.size() && memcmp(a.begin(), b.begin(), a.size()) == 0;
 46 |   }
 47 | };
 48 | 
 49 | class PackageMountSet: private kj::TaskSet::ErrorHandler {
 50 | public:
 51 |   explicit PackageMountSet(kj::AsyncIoContext& ioContext);
 52 |   ~PackageMountSet() noexcept(false);
 53 |   KJ_DISALLOW_COPY(PackageMountSet);
 54 | 
 55 |   class PackageMount: public kj::Refcounted {
 56 |   public:
 57 |     PackageMount(PackageMountSet& mountSet, kj::ArrayPtr<const byte> id,
 58 |                  kj::String path, Volume::Client volume,
 59 |                  kj::Own<kj::AsyncIoStream> nbdUserEnd,
 60 |                  kj::AutoCloseFd nbdKernelEnd);
 61 |     ~PackageMount() noexcept(false);
 62 | 
 63 |     kj::ArrayPtr<const byte> getId() { return id; }
 64 | 
 65 |     kj::StringPtr getPath() { return path; }
 66 | 
 67 |     kj::Promise<void> whenReady() { return loaded.addBranch(); }
 68 | 
 69 |     kj::Promise<void> onDisconnected() { return disconnected.addBranch(); }
 70 | 
 71 |     void updateVolume(Volume::Client newVolume);
 72 | 
 73 |   private:
 74 |     friend class PackageMountSet;
 75 | 
 76 |     PackageMountSet& mountSet;
 77 | 
 78 |     kj::Array<byte> id;
 79 |     // ID string assigned to this package.
 80 | 
 81 |     kj::String path;
 82 | 
 83 |     kj::Own<NbdVolumeAdapter> volumeAdapter;
 84 |     kj::Promise<void> volumeRunTask;
 85 | 
 86 |     kj::AsyncIoProvider::PipeThread nbdThread;
 87 |     // Thread which mounts the NBD device. Protocol as follows:
 88 |     // 1) thread -> main: 1 byte: The mount point is ready.
 89 |     // 2) main -> thread: EOF: Please shut down.
 90 |     // 3) thread -> main: EOF: I've shut down now; it's safe to destroy the NbdVolumeAdapter and
 91 |     //                         join the thread.
 92 | 
 93 |     kj::ForkedPromise<void> loaded;
 94 |     // Resolves when the thread reports that the mount point is active.
 95 | 
 96 |     bool unregistered = false;
 97 | 
 98 |     kj::ForkedPromise<void> disconnected;
 99 |     // Resolves when this mount has been disconnecnted from storage and therefore will report I/O
100 |     // errors. Grains using this package should attempt to shut down.
101 | 
102 |     void unregister();
103 |   };
104 | 
105 |   kj::Promise<kj::Own<PackageMount>> getPackage(PackageInfo::Reader package);
106 | 
107 |   void returnPackage(kj::Own<PackageMount> package);
108 |   // Grains "return" packages to the mount set where the package may remain mounted for some time
109 |   // in case it is used again.
110 | 
111 | private:
112 |   kj::AsyncIoContext& ioContext;
113 |   std::unordered_map<kj::ArrayPtr<const byte>, PackageMount*,
114 |                      ByteStringHash, ByteStringHash> mounts;
115 |   uint64_t counter = 0;
116 | 
117 |   static byte dummyByte;
118 |   // Target of pipe reads and writes where we don't care about the content.
119 | 
120 |   kj::TaskSet tasks;
121 | 
122 |   void taskFailed(kj::Exception&& exception) override;
123 | };
124 | 
125 | class WorkerImpl: public Worker::Server, private kj::TaskSet::ErrorHandler {
126 | public:
127 |   WorkerImpl(kj::AsyncIoContext& ioContext, sandstorm::SubprocessSet& subprocessSet,
128 |              LocalPersistentRegistry& persistentRegistry);
129 |   ~WorkerImpl() noexcept(false);
130 | 
131 | protected:
132 |   kj::Promise<void> newGrain(NewGrainContext context) override;
133 |   kj::Promise<void> restoreGrain(RestoreGrainContext context) override;
134 |   kj::Promise<void> unpackPackage(UnpackPackageContext context) override;
135 |   kj::Promise<void> unpackBackup(UnpackBackupContext context) override;
136 |   kj::Promise<void> packBackup(PackBackupContext context) override;
137 | 
138 | private:
139 |   class RunningGrain;
140 |   class PackageUploadStreamImpl;
141 |   struct CommandInfo;
142 | 
143 |   kj::LowLevelAsyncIoProvider& ioProvider;
144 |   sandstorm::SubprocessSet& subprocessSet;
145 |   LocalPersistentRegistry& persistentRegistry;
146 |   PackageMountSet packageMountSet;
147 |   std::unordered_map<RunningGrain*, kj::Own<RunningGrain>> runningGrains;
148 |   kj::TaskSet tasks;
149 | 
150 |   sandstorm::Supervisor::Client bootGrain(
151 |       PackageInfo::Reader packageInfo, kj::Own<capnp::MessageBuilder> grainState,
152 |       sandstorm::Assignable<GrainState>::Setter::Client grainStateSetter,
153 |       sandstorm::spk::Manifest::Command::Reader command, bool isNew,
154 |       kj::String grainId, sandstorm::SandstormCore::Client core,
155 |       kj::Own<LocalPersistentRegistry::Registration> persistentRegistration);
156 | 
157 |   void taskFailed(kj::Exception&& exception) override;
158 | };
159 | 
160 | class SupervisorMain: public sandstorm::AbstractMain {
161 |   // Like sandstorm::SupervisorMain, except that it sets itself up on the Blackrock VatNetwork.
162 | 
163 | public:
164 |   SupervisorMain(kj::ProcessContext& context);
165 | 
166 |   kj::MainFunc getMain() override;
167 | 
168 |   kj::MainBuilder::Validity run();
169 | 
170 | private:
171 |   kj::ProcessContext& context;
172 |   sandstorm::SupervisorMain sandstormSupervisor;
173 | 
174 |   class SystemConnectorImpl;
175 | };
176 | 
177 | class MetaSupervisorMain: public sandstorm::AbstractMain {
178 |   // A binary which is responsible for mounting nbd and then exec()ing the supervisor.
179 | 
180 | public:
181 |   MetaSupervisorMain(kj::ProcessContext& context);
182 | 
183 |   kj::MainFunc getMain() override;
184 | 
185 |   kj::MainBuilder::Validity run();
186 | 
187 | private:
188 |   kj::ProcessContext& context;
189 |   kj::StringPtr packageMount;
190 |   kj::Vector<kj::StringPtr> args;
191 |   bool isNew = false;
192 | };
193 | 
194 | class UnpackMain: public sandstorm::AbstractMain {
195 |   // Thin wrapper around `spk unpack` for use by Blackrock worker.
196 | 
197 | public:
198 |   UnpackMain(kj::ProcessContext& context): context(context) {}
199 | 
200 |   kj::MainFunc getMain() override;
201 | 
202 |   kj::MainBuilder::Validity run();
203 | 
204 | private:
205 |   kj::ProcessContext& context;
206 | };
207 | 
208 | class BackupMain: public sandstorm::AbstractMain {
209 |   // Thin wrapper around backup/restore functionality for use by Blackrock worker.
210 | 
211 | public:
212 |   BackupMain(kj::ProcessContext& context): context(context) {}
213 | 
214 |   kj::MainFunc getMain() override;
215 | 
216 |   kj::MainBuilder::Validity run(kj::StringPtr filename);
217 | 
218 | private:
219 |   kj::ProcessContext& context;
220 |   bool restore = false;
221 | };
222 | 
223 | }  // namespace blackrock
224 | 
225 | #endif // BLACKROCK_WORKER_H_
226 | 


--------------------------------------------------------------------------------
/src/blackrock/distributed-blocks.c++:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | // ************************************
 18 | // **** INCOMPLETE **** INCOMPLETE ****
 19 | // ************************************
 20 | //
 21 | // This file contains some initial ideas for data structures relating to a distributed block
 22 | // storage system. At present we are not implementing this, but we might in the future.
 23 | 
 24 | #include "common.h"
 25 | #include <inttypes.h>
 26 | 
 27 | namespace blackrock {
 28 | namespace {
 29 | 
 30 | struct UInt128 {
 31 |   uint64_t value[2];
 32 | };
 33 | 
 34 | struct UInt256 {
 35 |   uint64_t value[4];
 36 | };
 37 | 
 38 | struct Superblock {
 39 |   // First block of a physical disk which is part of the distributed block storage system.
 40 | 
 41 |   static constexpr UInt128 MAGIC = { { 0xcdf365304999bb98ull, 0xfd641ba781b04071ull } };
 42 |   static constexpr uint16_t VERSION = 0;
 43 | 
 44 |   UInt128 magic;
 45 |   // Magic number indicating a Blackrock disk. Always set to MAGIC.
 46 | 
 47 |   UInt128 clusterId;
 48 |   // Number identifying the Blackrock cluster of which this disk is a part.
 49 | 
 50 |   uint16_t version;
 51 |   // Storage format version. Set to VERSION.
 52 | 
 53 |   uint8_t replicaId;
 54 |   // Each disk is part of one replica of the cluster's storage. If the underlying disk is already
 55 |   // considered robust, then there may be only one replica (replica zero). Otherwise, 2-3 replicas
 56 |   // are typical. All shards on one disk -- and preferrably all disks in one machine -- are
 57 |   // required to be in the same replica because otherwise it would defeat the purpose of replicas.
 58 | 
 59 |   uint8_t lgBucketCount;
 60 |   // Log base 2 of the number of hash table buckets in each local shard.
 61 | 
 62 |   uint8_t lgJournalSize;
 63 |   // Log base 2 of size of the journal, in multiples of sizeof(Transaction). (Technically a
 64 |   // Transaction is variable-width due to the trailing ref array.)
 65 | 
 66 |   uint8_t lgBlockCount;
 67 |   // Log base 2 of number of blocks (content) in each local shard.
 68 | 
 69 |   uint8_t shardCount;
 70 |   // Number of shards in this block device, minus 1 since there are never 0 shards.
 71 | 
 72 |   uint32_t shardIds[256];
 73 |   // Each shard's location in the key space. shardIds of shards in a replica should be uniformly
 74 |   // distributed in the space of 32-bit integers. Each shard "owns" the IDs between its shardId
 75 |   // and the next higher shardId in the replica, using modular (wrap-around) arithmetic.
 76 |   //
 77 |   // The shardId for a particular block is (blockId >> (replicaId * 32)) % (1 << 32). Or, in
 78 |   // other words, if you defined it as uint32_t blockId[8], then shardId = blockId[replicaId].
 79 |   // This is valid up to 8 replicas, which should be enough for anyone.
 80 |   //
 81 |   // Properties of this algorithm:
 82 |   // - Consistent hashing: adding a new shard only requires moving data from one other shard.
 83 |   //   (But usually lots of shards are added at once.)
 84 |   // - Sharding of blocks is totally different between replicas, to avoid common hot spots.
 85 | };
 86 | 
 87 | static_assert(sizeof(Superblock) < 4096, "Superblock is more than one block.");
 88 | 
 89 | struct Bucket {
 90 |   // One bucket in the hashtable mapping block IDs to locally-stored blocks.
 91 | 
 92 |   UInt256 blockId;
 93 |   // Key. 0 = empty bucket. (The actual block 0 is never stored since it is known to map to the
 94 |   // block containing entirely zeros.)
 95 | 
 96 |   unsigned isMutable :1;
 97 |   // If true, this is a mutable block.
 98 |   //
 99 |   // TODO(someday): Unclear if this flag is strictly necessary.
100 | 
101 |   unsigned reserved0 :3;
102 |   // Must be zero.
103 | 
104 |   unsigned offset :28;
105 |   // Location (index) of block content within the content table. With 4k blocks and 28 bits this
106 |   // can address 1TB of data.
107 | 
108 |   uint32_t refcount;
109 |   // Number of references to this block. Usually always one for mutable blocks.
110 | 
111 |   uint32_t revision;
112 |   // Revision counter. Incremented whenever the Bucket changes, which for mutable blocks includes
113 |   // when the block is overwritten since this is always accomplished by writing the new data to
114 |   // a new location and then updating `offset`.
115 | 
116 |   uint32_t reserved1[5];
117 |   // Must be zero.
118 |   //
119 |   // TODO(someday):
120 |   // - Record crypto nonce? (Could union with refcount.)
121 |   // - Record location of the block in long-term storage.
122 |   // - Implement policy for pushing blocks to long-term storage.
123 |   // - Implement policy for purging blocks from local storage once they are in long-term storage.
124 | };
125 | 
126 | static_assert(sizeof(Bucket) == 64, "Bucket size changed!");
127 | 
128 | struct Transaction {
129 |   uint64_t id;
130 |   // Transaction ID. Assigned sequentially per-shard.
131 | 
132 |   uint64_t firstIncompleteTx;
133 |   // The transaction ID of the first incomplete transaction at the time
134 | 
135 |   uint32_t bucketIndex;
136 |   // Which bucket to overwrite.
137 | 
138 |   unsigned trim :1;
139 |   // Whether to perform a trim of `trimIndex`.
140 | 
141 |   unsigned reserved0 :3;
142 |   // Must be zero.
143 | 
144 |   unsigned trimIndex :28;
145 |   // Block index (in block content table) which can be freed after this transaction.
146 | 
147 |   uint64_t parentTxnId;
148 |   uint32_t parentTxnShardId;
149 |   // If `parentTxnId` is not ~0 then this transaction is occurring as a dependent of some other
150 |   // transaction possibly occurring on a different shard. Until the parent transaction
151 |   // completes, it's possible that we'll receive repeat requests to perform the child
152 |   // transaction, which we'll need to de-dupe by noticing that it matches this journal entry.
153 |   // Once we know the trigger transaction has completed, we can delete this journal entry.
154 | 
155 |   uint8_t refsAddedCount;
156 |   uint8_t refsRemovedCount;
157 |   // Number of block references added to or removed from this block as a result of this
158 |   // transaction. The actual references are listed in the `refs` array.
159 | 
160 |   uint8_t reserved1[2];
161 | 
162 |   uint32_t reserved2[6];
163 |   // Must be zero.
164 |   //
165 |   // TODO(someday):
166 |   // - Verify valid transaction, e.g. with a checksum/hash, so that we can reliably find the end of
167 |   //   the journal after power failure.
168 | 
169 |   Bucket newBucket;
170 |   // New bucket contents.
171 | 
172 |   UInt256 refs[];
173 |   // Array of references that were added to or removed from this block as a result of this
174 |   // transaction. The referenced blocks will need to have their refcounts adjusted as part of this
175 |   // transaction. The size of the array is `refsAddedCount + refsRemovedCount`, padded up to the
176 |   // next `sizeof(Transaction)` boundary, so that all Transactions reside on such a boundary.
177 | };
178 | 
179 | static_assert(sizeof(Transaction) == 128, "Journal size changed!");
180 | 
181 | struct Block {
182 |   // One block. Note that the content is normally encrypted by XORing with a ChaCha20 stream whose
183 |   // key and nonce are determined differently depending on the block type. This struct defines
184 |   // what the block looks like after decryption.
185 | 
186 |   union {
187 |     byte data[4096];
188 |     // A regular data block containing bytes.
189 |     //
190 |     // The block is encrypted using its own 256-bit BLAKE2b hash (salted with the cluster ID) as
191 |     // the key, and a nonce of zero.
192 | 
193 |     UInt256 blockTableSegment[128];
194 |     // A block which contains a list of references to other blocks.
195 |     //
196 |     // Each element is the salted 256-bit BLAKE2b hash of the plaintext of a block, XORed with the
197 |     // hash if the block contents were all-zero, so that the blockRef for an all-zero block is
198 |     // all-zero.
199 |     //
200 |     // To get the block ID, hash this value again, then XOR that with the hash of an all-zero
201 |     // blockRef, so that again the block ID of an all-zero block is all-zero.
202 |     //
203 |     // The block is encrypted using
204 |   };
205 | };
206 | 
207 | static_assert(sizeof(Block) == 4096, "Block size changed!");
208 | 
209 | }  // namespace
210 | }  // namespace blackrock
211 | 


--------------------------------------------------------------------------------
/src/blackrock/storage-tool.c++:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #include "fs-storage.h"
 18 | #include <blackrock/fs-storage.capnp.h>
 19 | #include <blackrock/storage-schema.capnp.h>
 20 | #include <kj/main.h>
 21 | #include <capnp/serialize.h>
 22 | #include <unistd.h>
 23 | #include <sandstorm/util.h>
 24 | #include <capnp/pretty-print.h>
 25 | #include <string.h>
 26 | #include <errno.h>
 27 | #include <sys/types.h>
 28 | #include <sys/stat.h>
 29 | #include <sys/xattr.h>
 30 | 
 31 | namespace blackrock {
 32 | 
 33 | class StorageTool {
 34 | public:
 35 |   StorageTool(kj::ProcessContext& context): context(context) {}
 36 | 
 37 |   kj::MainFunc getMain() {
 38 |     return kj::MainBuilder(context, "Blackrock",
 39 |           "Tool for probing the contents of Blackrock storage.")
 40 |         .addOption({'f', "fix"}, KJ_BIND_METHOD(*this, setFix), "fix it")
 41 |         .expectArg("<userId>", KJ_BIND_METHOD(*this, setUserId))
 42 |         .expectArg("<grainId>", KJ_BIND_METHOD(*this, setGrainId))
 43 |         .callAfterParsing(KJ_BIND_METHOD(*this, run))
 44 |         .build();
 45 |   }
 46 | 
 47 | private:
 48 |   kj::ProcessContext& context;
 49 |   kj::StringPtr userId;
 50 |   kj::StringPtr grainId;
 51 |   bool fix = false;
 52 | 
 53 |   typedef FilesystemStorage::ObjectId ObjectId;
 54 |   typedef FilesystemStorage::ObjectKey ObjectKey;
 55 | 
 56 |   enum class Type: uint8_t {
 57 |     // (zero skipped to help detect errors)
 58 |     BLOB = 1,
 59 |     VOLUME,
 60 |     IMMUTABLE,
 61 |     ASSIGNABLE,
 62 |     COLLECTION,
 63 |     OPAQUE,
 64 |     REFERENCE
 65 |   };
 66 | 
 67 |   struct Xattr {
 68 |     // Format of the xattr block stored on each file. On ext4 we have about 76 bytes available in
 69 |     // the inode to store this attribute, but in theory this space could get smaller in the future,
 70 |     // so we should try to keep this minimal.
 71 | 
 72 |     static constexpr const char* NAME = "user.sandstor";
 73 |     // Extended attribute name. Abbreviated to be 8 bytes to avoid losing space to alignment (ext4
 74 |     // doesn't store the "user." prefix). Actually short for "sandstore", not "sandstorm". :)
 75 | 
 76 |     Type type;
 77 | 
 78 |     bool readOnly;
 79 |     // For volumes, prevents the volume from being modified. For Blobs, indicates that initialization
 80 |     // has completed with a `done()` call, indicating the entire stream was received (otherwise,
 81 |     // either the stream is still uploading, or it failed to fully upload). Once set this
 82 |     // can never be unset.
 83 | 
 84 |     byte reserved[2];
 85 |     // Must be zero.
 86 | 
 87 |     uint32_t accountedBlockCount;
 88 |     // The number of 4k blocks consumed by this object the last time we considered it for
 89 |     // accounting/quota purposes. The on-disk size could have changed in the meantime.
 90 | 
 91 |     uint64_t transitiveBlockCount;
 92 |     // The number of 4k blocks in this object and all child objects.
 93 | 
 94 |     ObjectId owner;
 95 |     // What object owns this one?
 96 |   };
 97 | 
 98 |   class RawClientHook: public capnp::ClientHook, public kj::Refcounted {
 99 |   public:
100 |     explicit RawClientHook(StoredObject::CapDescriptor::Reader descriptor)
101 |         : descriptor(descriptor) {}
102 | 
103 |     StoredObject::CapDescriptor::Reader descriptor;
104 | 
105 |     capnp::Request<capnp::AnyPointer, capnp::AnyPointer> newCall(
106 |         uint64_t interfaceId, uint16_t methodId, kj::Maybe<capnp::MessageSize> sizeHint) override {
107 |       KJ_UNIMPLEMENTED("RawClientHook doesn't implement anything");
108 |     }
109 | 
110 |     VoidPromiseAndPipeline call(uint64_t interfaceId, uint16_t methodId,
111 |                                 kj::Own<capnp::CallContextHook>&& context) override {
112 |       KJ_UNIMPLEMENTED("RawClientHook doesn't implement anything");
113 |     }
114 | 
115 |     kj::Maybe<ClientHook&> getResolved() override {
116 |       KJ_UNIMPLEMENTED("RawClientHook doesn't implement anything");
117 |     }
118 | 
119 |     kj::Maybe<kj::Promise<kj::Own<ClientHook>>> whenMoreResolved() override {
120 |       KJ_UNIMPLEMENTED("RawClientHook doesn't implement anything");
121 |     }
122 | 
123 |     kj::Own<ClientHook> addRef() override {
124 |       return kj::addRef(*this);
125 |     }
126 | 
127 |     const void* getBrand() override {
128 |       return nullptr;
129 |     }
130 |     
131 |     kj::Maybe<int> getFd() override { return nullptr; }
132 |   };
133 | 
134 |   ObjectKey getUser(kj::StringPtr userId) {
135 |     capnp::StreamFdMessageReader reader(sandstorm::raiiOpen(
136 |         kj::str("roots/user-", userId), O_RDONLY));
137 |     return reader.getRoot<StoredRoot>().getKey();
138 |   }
139 | 
140 |   ObjectKey getGrain(ObjectKey user, kj::StringPtr grainId) {
141 |     auto fd = sandstorm::raiiOpen(kj::str("main/", ObjectId(user).filename('o').begin()), O_RDONLY);
142 | 
143 |     auto children = ({
144 |       capnp::StreamFdMessageReader reader(fd.get());
145 |       KJ_MAP(c, reader.getRoot<StoredChildIds>().getChildren()) -> ObjectId { return c; };
146 |     });
147 | 
148 |     capnp::StreamFdMessageReader reader(fd.get());
149 |     auto object = reader.getRoot<StoredObject>();
150 | 
151 |     capnp::ReaderCapabilityTable capTable(KJ_MAP(cap, object.getCapTable())
152 |         -> kj::Maybe<kj::Own<capnp::ClientHook>> {
153 |       return kj::Own<capnp::ClientHook>(kj::refcounted<RawClientHook>(cap));
154 |     });
155 | 
156 |     auto imbued = capTable.imbue(object);
157 | 
158 |     for (auto grain: imbued.getPayload().getAs<AccountStorage>().getGrains()) {
159 |       if (grain.getId() == grainId) {
160 |         auto descriptor = capnp::ClientHook::from(grain.getState())
161 |             .downcast<RawClientHook>()->descriptor;
162 |         KJ_ASSERT(descriptor.isChild(), descriptor);
163 |         return descriptor.getChild();
164 |       }
165 |     }
166 | 
167 |     KJ_FAIL_REQUIRE("user had no such grain");
168 |   }
169 | 
170 |   ObjectKey getVolume(ObjectKey grain) {
171 |     auto fd = sandstorm::raiiOpen(kj::str(
172 |         "main/", ObjectId(grain).filename('o').begin()), O_RDONLY);
173 | 
174 |     auto children = ({
175 |       capnp::StreamFdMessageReader reader(fd.get());
176 |       KJ_MAP(c, reader.getRoot<StoredChildIds>().getChildren()) -> ObjectId { return c; };
177 |     });
178 | 
179 |     capnp::StreamFdMessageReader reader(fd.get());
180 |     auto object = reader.getRoot<StoredObject>();
181 | 
182 |     capnp::ReaderCapabilityTable capTable(KJ_MAP(cap, object.getCapTable())
183 |         -> kj::Maybe<kj::Own<capnp::ClientHook>> {
184 |       return kj::Own<capnp::ClientHook>(kj::refcounted<RawClientHook>(cap));
185 |     });
186 | 
187 |     auto imbued = capTable.imbue(object);
188 |     auto volume = imbued.getPayload().getAs<GrainState>().getVolume();
189 |     auto descriptor = capnp::ClientHook::from(volume).downcast<RawClientHook>()->descriptor;
190 |     KJ_ASSERT(descriptor.isChild(), descriptor);
191 |     return descriptor.getChild();
192 |   }
193 | 
194 |   bool setUserId(kj::StringPtr arg) {
195 |     userId = arg;
196 |     return true;
197 |   }
198 | 
199 |   bool setGrainId(kj::StringPtr arg) {
200 |     grainId = arg;
201 |     return true;
202 |   }
203 | 
204 |   bool setFix() {
205 |     fix = true;
206 |     return true;
207 |   }
208 | 
209 |   bool run() {
210 |     auto grain = getGrain(getUser(userId), grainId);
211 |     auto volume = getVolume(grain);
212 | 
213 |     auto filename = kj::str("main/", ObjectId(volume).filename('o').begin());
214 |     struct stat stats;
215 |     KJ_SYSCALL(stat(filename.cStr(), &stats));
216 | 
217 |     Xattr expected;
218 |     memset(&expected, 0, sizeof(expected));
219 |     expected.type = Type::VOLUME;
220 |     expected.accountedBlockCount = stats.st_blocks / 8;
221 |     expected.transitiveBlockCount = expected.accountedBlockCount;
222 |     expected.owner = grain;
223 | 
224 |     Xattr xattr;
225 |     ssize_t n = getxattr(filename.cStr(), Xattr::NAME, &xattr, sizeof(xattr));
226 |     if (n < 0) {
227 |       context.error(kj::str("missing xattr:", strerror(errno)));
228 | 
229 |       if (fix) {
230 |         KJ_SYSCALL(setxattr(filename.cStr(), Xattr::NAME, &expected, sizeof(expected), XATTR_CREATE));
231 |       }
232 |     } else if (n != sizeof(xattr)) {
233 |       context.error(kj::str("unexpected xattr size: ", n));
234 |     } else if (memcmp(&xattr, &expected, sizeof(xattr)) != 0) {
235 |       KJ_LOG(ERROR, (uint)expected.type, (uint)xattr.type);
236 |       KJ_LOG(ERROR, expected.accountedBlockCount, xattr.accountedBlockCount);
237 |       KJ_LOG(ERROR, expected.transitiveBlockCount, xattr.transitiveBlockCount);
238 |       KJ_LOG(ERROR, expected.owner.filename('o').begin(), xattr.owner.filename('o').begin());
239 |       context.error("xattrs don't match");
240 | 
241 |       if (fix) {
242 |         KJ_SYSCALL(setxattr(filename.cStr(), Xattr::NAME, &expected, sizeof(expected), XATTR_REPLACE));
243 |       }
244 |     } else {
245 |       context.warning("xattrs match expected");
246 |     }
247 | 
248 |     context.exitInfo(kj::str(ObjectId(grain).filename('o').begin(), ' ',
249 |                              ObjectId(volume).filename('o').begin()));
250 |   }
251 | };
252 | 
253 | }  // namespace blackrock
254 | 
255 | KJ_MAIN(blackrock::StorageTool)
256 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    Copyright 2015 Sandstorm Development Group, Inc.
179 | 
180 |    Licensed under the Apache License, Version 2.0 (the "License");
181 |    you may not use this file except in compliance with the License.
182 |    You may obtain a copy of the License at
183 | 
184 |        http://www.apache.org/licenses/LICENSE-2.0
185 | 
186 |    Unless required by applicable law or agreed to in writing, software
187 |    distributed under the License is distributed on an "AS IS" BASIS,
188 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
189 |    See the License for the specific language governing permissions and
190 |    limitations under the License.
191 | 


--------------------------------------------------------------------------------
/src/blackrock/gateway.c++:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2017 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #include "gateway.h"
 18 | #include <sodium/randombytes.h>
 19 | 
 20 | namespace blackrock {
 21 | 
 22 | void GatewayImpl::EntropySourceImpl::generate(kj::ArrayPtr<byte> buffer) {
 23 |   randombytes(buffer.begin(), buffer.size());
 24 | }
 25 | 
 26 | GatewayImpl::GatewayImpl(kj::Timer& timer, kj::Network& network, FrontendConfig::Reader config)
 27 |     : GatewayImpl(timer, network, config, kj::HttpHeaderTable::Builder()) {}
 28 | 
 29 | GatewayImpl::GatewayImpl(kj::Timer& timer, kj::Network& network, FrontendConfig::Reader config,
 30 |                          kj::HttpHeaderTable::Builder headerTableBuilder)
 31 |     : timer(timer), network(network),
 32 |       gatewayServiceTables(headerTableBuilder),
 33 |       hXRealIp(headerTableBuilder.add("X-Real-IP")),
 34 |       headerTable(headerTableBuilder.build()),
 35 |       httpServer(timer, *headerTable, [this](kj::AsyncIoStream& conn) {
 36 |         return kj::heap<sandstorm::RealIpService>(static_cast<HttpService&>(*this), hXRealIp, conn);
 37 |       }),
 38 |       altPortService(*this, *headerTable, config.getBaseUrl(), config.getWildcardHost()),
 39 |       altPortHttpServer(timer, *headerTable, altPortService),
 40 |       smtpServer(*this),
 41 |       tlsManager(httpServer, smtpServer, config.hasPrivateKeyPassword()
 42 |           ? kj::Maybe<kj::StringPtr>(config.getPrivateKeyPassword())
 43 |           : kj::Maybe<kj::StringPtr>(nullptr)),
 44 |       tasks(*this) {
 45 |   clientSettings.entropySource = entropySource;
 46 | 
 47 |   setConfig(config);
 48 | 
 49 |   if (config.getBaseUrl().startsWith("https://")) {
 50 |     tasks.add(network.parseAddress("*", 80)
 51 |         .then([this](kj::Own<kj::NetworkAddress>&& addr) {
 52 |       auto listener = addr->listen();
 53 |       auto promise = altPortHttpServer.listenHttp(*listener);
 54 |       return promise.attach(kj::mv(listener));
 55 |     }));
 56 | 
 57 |     tasks.add(network.parseAddress("*", 443)
 58 |         .then([this](kj::Own<kj::NetworkAddress>&& addr) {
 59 |       auto listener = addr->listen();
 60 |       auto promise = tlsManager.listenHttps(*listener);
 61 |       return promise.attach(kj::mv(listener));
 62 |     }));
 63 |   } else {
 64 |     tasks.add(network.parseAddress("*", 80)
 65 |         .then([this](kj::Own<kj::NetworkAddress>&& addr) {
 66 |       auto listener = addr->listen();
 67 |       auto promise = httpServer.listenHttp(*listener);
 68 |       return promise.attach(kj::mv(listener));
 69 |     }));
 70 |   }
 71 | 
 72 |   tasks.add(network.parseAddress("*", 25)
 73 |       .then([this](kj::Own<kj::NetworkAddress>&& addr) {
 74 |     auto listener = addr->listen();
 75 |     auto promise = tlsManager.listenSmtp(*listener);
 76 |     return promise.attach(kj::mv(listener));
 77 |   }));
 78 | 
 79 |   tasks.add(network.parseAddress("*", 465)
 80 |       .then([this](kj::Own<kj::NetworkAddress>&& addr) {
 81 |     auto listener = addr->listen();
 82 |     auto promise = tlsManager.listenSmtps(*listener);
 83 |     return promise.attach(kj::mv(listener));
 84 |   }));
 85 | 
 86 |   capnp::Capability::Client masterGateway = kj::refcounted<sandstorm::CapRedirector>([this]() {
 87 |     return chooseReplica(roundRobinCounter++)
 88 |         .then([](kj::Own<ShellReplica> replica) -> capnp::Capability::Client {
 89 |       return replica->router;
 90 |     });
 91 |   });
 92 | 
 93 |   tasks.add(tlsManager.subscribeKeys(masterGateway.castAs<sandstorm::GatewayRouter>()));
 94 | }
 95 | 
 96 | void GatewayImpl::setConfig(FrontendConfig::Reader config) {
 97 |   configMessage = kj::heap<capnp::MallocMessageBuilder>();
 98 |   configMessage->setRoot(config);
 99 |   this->config = configMessage->getRoot<FrontendConfig>();
100 |   wildcardHost = sandstorm::WildcardMatcher(config.getWildcardHost());
101 | 
102 |   // TODO(soon): Update all GatewayService instances to new config.
103 | }
104 | 
105 | kj::Promise<void> GatewayImpl::request(
106 |     kj::HttpMethod method, kj::StringPtr url, const kj::HttpHeaders& headers,
107 |     kj::AsyncInputStream& requestBody, Response& response) {
108 |   return chooseReplica(urlSessionHash(url, headers))
109 |       .then([this,method,url,&headers,&requestBody,&response](kj::Own<ShellReplica> replica) {
110 |     auto promise = replica->service.request(method, url, headers, requestBody, response);
111 |     return promise.attach(kj::mv(replica));
112 |   });
113 | }
114 | 
115 | kj::Promise<void> GatewayImpl::reset(ResetContext context) {
116 |   shellReplicas.clear();
117 | 
118 |   auto params = context.getParams();
119 |   auto promises = KJ_MAP(backend, params.getBackends()) {
120 |     return addFrontend(backend.getId(), backend.getBackend());
121 |   };
122 |   context.releaseParams();
123 |   return kj::joinPromises(kj::mv(promises));
124 | }
125 | 
126 | kj::Promise<void> GatewayImpl::add(AddContext context) {
127 |   auto params = context.getParams();
128 |   auto promise = addFrontend(params.getId(), params.getBackend());
129 |   context.releaseParams();
130 |   return promise;
131 | }
132 | 
133 | kj::Promise<void> GatewayImpl::remove(RemoveContext context) {
134 |   uint64_t backendId = context.getParams().getId();
135 | 
136 |   for (auto& replica: shellReplicas) {
137 |     KJ_IF_MAYBE(r, replica) {
138 |       if (r->get()->backendId == backendId) {
139 |         replica = nullptr;
140 |       }
141 |     }
142 |   }
143 | 
144 |   return kj::READY_NOW;
145 | }
146 | 
147 | kj::Promise<kj::Own<kj::AsyncIoStream>> GatewayImpl::SmtpNetworkAddressImpl::connect() {
148 |   return gateway.chooseReplica(gateway.roundRobinCounter++)
149 |       .then([this](kj::Own<GatewayImpl::ShellReplica>&& replica) {
150 |     auto promise = replica->smtpAddress->connect();
151 |     return promise.attach(kj::mv(replica));
152 |   });
153 | }
154 | 
155 | GatewayImpl::ShellReplica::ShellReplica(
156 |     GatewayImpl& gateway, uint64_t backendId, Frontend::Instance::Reader instance)
157 |     : backendId(backendId),
158 |       httpAddress(SimpleAddress(instance.getHttpAddress()).onNetwork(gateway.network)),
159 |       smtpAddress(SimpleAddress(instance.getSmtpAddress()).onNetwork(gateway.network)),
160 |       shellHttp(kj::newHttpClient(gateway.timer, *gateway.headerTable, *httpAddress,
161 |                                   gateway.clientSettings)),
162 |       router(instance.getRouter()),
163 |       service(gateway.timer, *shellHttp, router, gateway.gatewayServiceTables,
164 |               gateway.config.getBaseUrl(), gateway.config.getWildcardHost(),
165 |               gateway.config.hasTermsPublicId()
166 |                   ? kj::Maybe<kj::StringPtr>(gateway.config.getTermsPublicId())
167 |                   : kj::Maybe<kj::StringPtr>(nullptr)),
168 |       cleanupLoop(service.cleanupLoop().eagerlyEvaluate([](kj::Exception&& e) {
169 |         KJ_LOG(FATAL, "cleanupLoop() threw", e);
170 |         abort();
171 |       })) {}
172 | 
173 | kj::Promise<void> GatewayImpl::addFrontend(uint64_t backendId, Frontend::Client frontend) {
174 |   return frontend.getInstancesRequest().send()
175 |       .then([this,backendId](capnp::Response<Frontend::GetInstancesResults>&& response) {
176 |     auto newInstances = response.getInstances();
177 |     for (auto instance: newInstances) {
178 |       kj::Maybe<kj::Own<ShellReplica>> replica =
179 |           kj::refcounted<ShellReplica>(*this, backendId, instance);
180 |       for (auto& slot: shellReplicas) {
181 |         if (slot == nullptr) {
182 |           slot = kj::mv(replica);
183 |           break;
184 |         }
185 |       }
186 |       if (replica != nullptr) {
187 |         shellReplicas.add(kj::mv(replica));
188 |       }
189 |     }
190 | 
191 |     KJ_IF_MAYBE(r, readyPaf) {
192 |       r->fulfiller->fulfill();
193 |       readyPaf = nullptr;
194 |     }
195 |   });
196 | }
197 | 
198 | kj::Promise<kj::Own<GatewayImpl::ShellReplica>> GatewayImpl::chooseReplica(uint64_t hash) {
199 |   std::set<size_t> eliminated;
200 |   while (eliminated.size() < shellReplicas.size()) {
201 |     size_t bucket = hash % (shellReplicas.size() - eliminated.size());
202 |     for (auto e: eliminated) {
203 |       if (bucket >= e) {
204 |         ++bucket;
205 |       } else {
206 |         break;
207 |       }
208 |     }
209 | 
210 |     KJ_ASSERT(bucket < shellReplicas.size());
211 |     KJ_IF_MAYBE(replica, shellReplicas[bucket]) {
212 |       return kj::addRef(**replica);
213 |     }
214 | 
215 |     KJ_ASSERT(eliminated.insert(bucket).second);
216 |   }
217 | 
218 |   if (readyPaf == nullptr) {
219 |     auto paf = kj::newPromiseAndFulfiller<void>();
220 |     readyPaf = ReadyPair { paf.promise.fork(), kj::mv(paf.fulfiller) };
221 |   }
222 | 
223 |   return KJ_ASSERT_NONNULL(readyPaf).promise.addBranch().then([this,hash]() {
224 |     return chooseReplica(hash);
225 |   });
226 | }
227 | 
228 | static bool isAllHex(kj::StringPtr text) {
229 |   for (char c: text) {
230 |     if ((c < '0' || '9' < c) &&
231 |         (c < 'a' || 'f' < c) &&
232 |         (c < 'A' || 'F' < c)) {
233 |       return false;
234 |     }
235 |   }
236 | 
237 |   return true;
238 | }
239 | 
240 | uint64_t GatewayImpl::urlSessionHash(kj::StringPtr url, const kj::HttpHeaders& headers) {
241 |   KJ_IF_MAYBE(hostId, wildcardHost.match(headers)) {
242 |     if (hostId->startsWith("ui-") || hostId->startsWith("api-") ||
243 |         (hostId->size() == 20 && isAllHex(*hostId))) {
244 |       // These cases are really served by a grain, and we only use a shell to connect to the right
245 |       // grain. We bucket on hostname so that a particular grain is always looked up from the same
246 |       // shell and through the same local grain capability cache. The hostname ends in hex, so we
247 |       // can just parse it.
248 |       KJ_ASSERT(hostId->size() >= 20);
249 |       auto hex = hostId->slice(hostId->size() - 16);
250 |       char* end;
251 |       auto result = strtoull(hex.begin(), &end, 16);
252 |       KJ_REQUIRE(end == hex.end(), "invalid hostname", *hostId);
253 |       return result;
254 |     }
255 |   }
256 | 
257 |   // Recognize paths beginning with `sockjs` as probably being Meteor DDP connections.
258 |   //
259 |   // TODO(cleanup): Currently every installation can configure DDP to happen on an arbitrary host,
260 |   //   as long as it maps to the server and doesn't already have some other designated purpose. We
261 |   //   should probably standardize on the wildcard host ID "ddp" instead.
262 |   auto parsedUrl = kj::Url::parse(url, kj::Url::HTTP_REQUEST);
263 |   if (parsedUrl.path.size() >= 2 &&
264 |       parsedUrl.path[0] == "sockjs") {
265 |     // SockJS connections provide a 3-decimal-digit server ID in the path. BUT, it also has some
266 |     // other endpoints like "info", so parse carefully.
267 |     char* end;
268 |     auto result = strtoul(parsedUrl.path[1].cStr(), &end, 10);
269 |     if (end == parsedUrl.path[1].end()) {
270 |       return result;
271 |     }
272 |   }
273 | 
274 |   // Anything else is probably a static asset. We hash the URL to make upstream caching more
275 |   // efficient -- but probably these requests don't need to be load balanced anyway because CDN
276 |   // caching ought to kick in here.
277 | 
278 |   // djb hash with xor
279 |   // TODO(someday):  Add hashing library to KJ.
280 |   uint64_t result = 5381;
281 |   for (char c: url) {
282 |     result = (result * 33) ^ c;
283 |   }
284 |   return result;
285 | }
286 | 
287 | void GatewayImpl::taskFailed(kj::Exception&& exception) {
288 |   KJ_LOG(FATAL, exception);
289 | 
290 |   // Better restart since we may be in a degraded state.
291 |   abort();
292 | }
293 | 
294 | }  // namespace blackrock
295 | 


--------------------------------------------------------------------------------
/src/blackrock/gce.c++:
--------------------------------------------------------------------------------
  1 | // Sandstorm Blackrock
  2 | // Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | // All Rights Reserved
  4 | //
  5 | // Licensed under the Apache License, Version 2.0 (the "License");
  6 | // you may not use this file except in compliance with the License.
  7 | // You may obtain a copy of the License at
  8 | //
  9 | //   http://www.apache.org/licenses/LICENSE-2.0
 10 | //
 11 | // Unless required by applicable law or agreed to in writing, software
 12 | // distributed under the License is distributed on an "AS IS" BASIS,
 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | // See the License for the specific language governing permissions and
 15 | // limitations under the License.
 16 | 
 17 | #include "gce.h"
 18 | #include <kj/debug.h>
 19 | #include <unistd.h>
 20 | #include <fcntl.h>
 21 | #include <sandstorm/util.h>
 22 | #include <capnp/serialize-async.h>
 23 | 
 24 | namespace blackrock {
 25 | 
 26 | namespace {
 27 | 
 28 | // TODO(cleanup): Share this code with version in master.c++.
 29 | kj::Promise<kj::String> readAllAsync(kj::AsyncInputStream& input,
 30 |                                      kj::Vector<char> buffer = kj::Vector<char>()) {
 31 |   buffer.resize(buffer.size() + 4096);
 32 |   auto promise = input.tryRead(buffer.end() - 4096, 4096, 4096);
 33 |   return promise.then([KJ_MVCAP(buffer),&input](size_t n) mutable -> kj::Promise<kj::String> {
 34 |     if (n < 4096) {
 35 |       buffer.resize(buffer.size() - 4096 + n);
 36 |       buffer.add('\0');
 37 |       return kj::String(buffer.releaseAsArray());
 38 |     } else {
 39 |       return readAllAsync(input, kj::mv(buffer));
 40 |     }
 41 |   });
 42 | }
 43 | 
 44 | static kj::String getImageName() {
 45 |   char buffer[256];
 46 |   ssize_t n;
 47 |   KJ_SYSCALL(n = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1));
 48 |   buffer[n] = '\0';
 49 |   kj::StringPtr exeName(buffer);
 50 |   return sandstorm::trim(exeName.slice(KJ_ASSERT_NONNULL(exeName.findLast('/')) + 1));
 51 | }
 52 | 
 53 | }  // namespace
 54 | 
 55 | GceDriver::GceDriver(sandstorm::SubprocessSet& subprocessSet,
 56 |                      kj::LowLevelAsyncIoProvider& ioProvider,
 57 |                      GceConfig::Reader config)
 58 |     : subprocessSet(subprocessSet), ioProvider(ioProvider), config(config), image(getImageName()),
 59 |       masterBindAddress(SimpleAddress::getInterfaceAddress(AF_INET, "eth0")),
 60 |       logTask(nullptr), logSinkAddress(masterBindAddress) {
 61 |   // Create socket for the log sink acceptor.
 62 |   int sock;
 63 |   KJ_SYSCALL(sock = socket(masterBindAddress.family(),
 64 |       SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0));
 65 |   {
 66 |     KJ_ON_SCOPE_FAILURE(close(sock));
 67 |     logSinkAddress.setPort(0);
 68 |     KJ_SYSCALL(bind(sock, logSinkAddress.asSockaddr(), logSinkAddress.getSockaddrSize()));
 69 |     KJ_SYSCALL(listen(sock, SOMAXCONN));
 70 | 
 71 |     // Read back the assigned port number.
 72 |     logSinkAddress = SimpleAddress::getLocal(sock);
 73 |   }
 74 | 
 75 |   // Accept log connections.
 76 |   auto listener = ioProvider.wrapListenSocketFd(sock,
 77 |       kj::LowLevelAsyncIoProvider::TAKE_OWNERSHIP |
 78 |       kj::LowLevelAsyncIoProvider::ALREADY_CLOEXEC |
 79 |       kj::LowLevelAsyncIoProvider::ALREADY_NONBLOCK);
 80 | 
 81 |   logTask = logSink.acceptLoop(kj::mv(listener))
 82 |       .eagerlyEvaluate([](kj::Exception&& exception) {
 83 |     KJ_LOG(ERROR, "LogSink accept loop failed", exception);
 84 |   });
 85 | }
 86 | 
 87 | GceDriver::~GceDriver() noexcept(false) {}
 88 | 
 89 | SimpleAddress GceDriver::getMasterBindAddress() {
 90 |   return masterBindAddress;
 91 | }
 92 | 
 93 | auto GceDriver::listMachines() -> kj::Promise<kj::Array<MachineId>> {
 94 |   int fds[2];
 95 |   KJ_SYSCALL(pipe2(fds, O_CLOEXEC));
 96 |   kj::AutoCloseFd writeEnd(fds[1]);
 97 |   auto input = ioProvider.wrapInputFd(fds[0],
 98 |       kj::LowLevelAsyncIoProvider::Flags::TAKE_OWNERSHIP |
 99 |       kj::LowLevelAsyncIoProvider::Flags::ALREADY_CLOEXEC);
100 | 
101 |   // TODO(cleanup): Use `--format json` here (but then we need a json parser).
102 |   auto exitPromise = gceCommand({"instances", "list", "--format", "text", "-q"},
103 |                                 STDIN_FILENO, writeEnd);
104 | 
105 |   auto outputPromise = readAllAsync(*input);
106 |   return outputPromise.attach(kj::mv(input))
107 |       .then([this,KJ_MVCAP(exitPromise)](kj::String allText) mutable {
108 |     kj::Vector<MachineId> result;
109 | 
110 |     kj::StringPtr text = allText;
111 |     kj::Maybe<MachineId> lastSeenMachine;
112 |     kj::Vector<kj::Promise<void>> promises;
113 | 
114 |     promises.add(kj::mv(exitPromise));
115 | 
116 |     // Parse lines until there are no more.
117 |     while (text.size() > 0) {
118 |       uint eol = KJ_ASSERT_NONNULL(text.findFirst('\n'));
119 | 
120 |       // Look for "name:" lines, which are instance names. Ignore everything else.
121 |       if (text.startsWith("name:")) {
122 |         auto name = sandstorm::trim(text.slice(strlen("name:"), eol));
123 |         if (!name.startsWith("master") &&
124 |             !name.startsWith("build") &&
125 |             !name.startsWith("nginx")) {
126 |           lastSeenMachine = MachineId(name);
127 |         }
128 |       } else if (text.startsWith("tags.items[")) {
129 |         auto name = sandstorm::trim(text.slice(KJ_ASSERT_NONNULL(text.findFirst(':')) + 1, eol));
130 |         if (name == image) {
131 |           // Cool, this machine has the right tag, so it checks out.
132 |           result.add(KJ_ASSERT_NONNULL(lastSeenMachine));
133 |           lastSeenMachine = nullptr;
134 |         }
135 |       } else if (text.startsWith("---")) {
136 |         KJ_IF_MAYBE(machine, lastSeenMachine) {
137 |           KJ_LOG(INFO, "shutting down machine running old image", *machine);
138 |           promises.add(stop(*machine));
139 |           lastSeenMachine = nullptr;
140 |         }
141 |       }
142 | 
143 |       text = text.slice(eol + 1);
144 |     }
145 | 
146 |     KJ_IF_MAYBE(machine, lastSeenMachine) {
147 |       KJ_LOG(INFO, "shutting down machine running old image", *machine);
148 |       promises.add(stop(*machine));
149 |       lastSeenMachine = nullptr;
150 |     }
151 | 
152 |     return kj::joinPromises(promises.releaseAsArray())
153 |         .then([KJ_MVCAP(result)]() mutable { return result.releaseAsArray(); });
154 |   });
155 | }
156 | 
157 | kj::Promise<void> GceDriver::boot(MachineId id) {
158 |   kj::Vector<kj::StringPtr> args;
159 |   kj::Vector<kj::String> scratch;
160 |   auto idStr = kj::str(id);
161 |   auto tagStr = kj::str("--tags=", image);
162 |   args.addAll(std::initializer_list<const kj::StringPtr>
163 |       { "instances", "create", idStr, "--image", image, "--no-scopes", "-q" });
164 |   kj::StringPtr startupScript;
165 |   kj::StringPtr instanceType;
166 |   switch (id.type) {
167 |     case ComputeDriver::MachineType::STORAGE: {
168 |       instanceType = config.getInstanceTypes().getStorage();
169 | 
170 |       // Attach necessary disk.
171 |       auto param = kj::str("--disk=name=", id, "-data,mode=rw,device-name=blackrock");
172 |       args.add(param);
173 |       scratch.add(kj::mv(param));
174 |       startupScript =
175 |           "#! /bin/sh\n"
176 |           "mkdir -p /var/blackrock/storage\n"
177 |           "mount /dev/disk/by-id/google-blackrock /var/blackrock/storage\n";
178 |       break;
179 |     }
180 | 
181 |     case ComputeDriver::MachineType::WORKER:
182 |       instanceType = config.getInstanceTypes().getWorker();
183 |       break;
184 | 
185 |     case ComputeDriver::MachineType::COORDINATOR:
186 |       instanceType = config.getInstanceTypes().getCoordinator();
187 |       break;
188 | 
189 |     case ComputeDriver::MachineType::FRONTEND:
190 |       instanceType = config.getInstanceTypes().getFrontend();
191 |       break;
192 | 
193 |     case ComputeDriver::MachineType::MONGO: {
194 |       instanceType = config.getInstanceTypes().getMongo();
195 | 
196 |       // Attach necessary disk.
197 |       auto param = kj::str("--disk=name=", id, "-data,mode=rw,device-name=blackrock");
198 |       args.add(param);
199 |       scratch.add(kj::mv(param));
200 |       startupScript =
201 |           "#! /bin/sh\n"
202 |           "mkdir -p /var/blackrock/bundle\n"
203 |           "mount /dev/disk/by-id/google-blackrock /var/blackrock/bundle\n";
204 |       break;
205 |     }
206 | 
207 |     case ComputeDriver::MachineType::GATEWAY: {
208 |       instanceType = config.getInstanceTypes().getGateway();
209 | 
210 |       // Tag to accept HTTP and SMTP traffic.
211 |       tagStr = kj::str(tagStr, ",http,smtp");
212 | 
213 |       // Assign static IP address if configured.
214 |       auto addrs = config.getGatewayAddresses();
215 |       if (id.index < addrs.size()) {
216 |         args.add("--address");
217 |         args.add(addrs[id.index]);
218 |       }
219 | 
220 |       break;
221 |     }
222 |   }
223 | 
224 |   args.add(tagStr);
225 | 
226 |   args.add("--machine-type");
227 |   args.add(instanceType);
228 | 
229 |   if (startupScript == nullptr) {
230 |     return gceCommand(args);
231 |   } else {
232 |     // We'll pass the startup script via stdin.
233 |     args.add("--metadata-from-file=startup-script=/dev/stdin");
234 | 
235 |     // No need for async pipe since the startup script almost certainly won't fill the pipe buffer
236 |     // anyhow, and even if it did, the tool immediately reads it before doing other stuff.
237 |     auto pipe = sandstorm::Pipe::make();
238 |     auto promise = gceCommand(args, pipe.readEnd);
239 |     pipe.readEnd = nullptr;
240 |     kj::FdOutputStream(kj::mv(pipe.writeEnd)).write(startupScript.begin(), startupScript.size());
241 |     return kj::mv(promise);
242 |   }
243 | }
244 | 
245 | kj::Promise<VatPath::Reader> GceDriver::run(
246 |     MachineId id, blackrock::VatId::Reader masterVatId, bool requireRestartProcess) {
247 |   kj::String name = kj::str(id);
248 | 
249 |   int fds[2];
250 |   KJ_SYSCALL(pipe2(fds, O_CLOEXEC));
251 |   kj::AutoCloseFd stdinReadEnd(fds[0]);
252 |   auto stdinWriteEnd = ioProvider.wrapOutputFd(fds[1],
253 |       kj::LowLevelAsyncIoProvider::Flags::TAKE_OWNERSHIP |
254 |       kj::LowLevelAsyncIoProvider::Flags::ALREADY_CLOEXEC);
255 |   KJ_SYSCALL(pipe2(fds, O_CLOEXEC));
256 |   kj::AutoCloseFd stdoutWriteEnd(fds[1]);
257 |   auto stdoutReadEnd = ioProvider.wrapInputFd(fds[0],
258 |       kj::LowLevelAsyncIoProvider::Flags::TAKE_OWNERSHIP |
259 |       kj::LowLevelAsyncIoProvider::Flags::ALREADY_CLOEXEC);
260 | 
261 |   auto addr = kj::str(logSinkAddress, '/', name);
262 |   auto target = kj::str("root@", name);
263 |   kj::Vector<kj::StringPtr> args;
264 |   auto command = kj::str("/blackrock/bin/blackrock slave --log ", addr, " if4:eth0");
265 |   args.addAll(kj::ArrayPtr<const kj::StringPtr>({
266 |       "ssh", target, "--command", command, "-q"}));
267 |   if (requireRestartProcess) args.add("-r");
268 | 
269 |   auto exitPromise = gceCommand(args, stdinReadEnd, stdoutWriteEnd);
270 | 
271 |   auto message = kj::heap<capnp::MallocMessageBuilder>(masterVatId.totalSize().wordCount + 4);
272 |   message->setRoot(masterVatId);
273 | 
274 |   auto& stdoutReadEndRef = *stdoutReadEnd;
275 |   return capnp::writeMessage(*stdinWriteEnd, *message)
276 |       .attach(kj::mv(stdinWriteEnd), kj::mv(message))
277 |       .then([&stdoutReadEndRef]() {
278 |     return capnp::readMessage(stdoutReadEndRef);
279 |   }).then([this,id,KJ_MVCAP(exitPromise),KJ_MVCAP(stdoutReadEnd)](
280 |       kj::Own<capnp::MessageReader> reader) mutable {
281 |     auto path = reader->getRoot<VatPath>();
282 |     vatPaths[id] = kj::mv(reader);
283 |     return exitPromise.then([path]() { return path; });
284 |   });
285 | }
286 | 
287 | kj::Promise<void> GceDriver::stop(MachineId id) {
288 |   return gceCommand({"instances", "delete", kj::str(id), "-q"});
289 | }
290 | 
291 | kj::Promise<void> GceDriver::gceCommand(kj::ArrayPtr<const kj::StringPtr> args,
292 |                                         int stdin, int stdout) {
293 |   auto fullArgs = kj::heapArrayBuilder<const kj::StringPtr>(args.size() + 4);
294 |   fullArgs.add("gcloud");
295 |   fullArgs.add("--project");
296 |   fullArgs.add(config.getProject());
297 |   fullArgs.add("compute");
298 |   fullArgs.addAll(args);
299 | 
300 |   kj::Vector<kj::StringPtr> env;
301 |   auto newEnv = kj::str("CLOUDSDK_COMPUTE_ZONE=", config.getZone());
302 |   env.add(newEnv);
303 |   for (char** envp = environ; *envp != nullptr; ++envp) {
304 |     kj::StringPtr e = *envp;
305 |     if (!e.startsWith("CLOUDSDK_COMPUTE_ZONE=")) {
306 |       env.add(e);
307 |     }
308 |   }
309 | 
310 |   sandstorm::Subprocess::Options options(fullArgs.finish());
311 |   auto command = kj::strArray(options.argv, " ");
312 |   KJ_LOG(INFO, command);
313 |   options.stdin = stdin;
314 |   options.stdout = stdout;
315 |   options.environment = kj::ArrayPtr<const kj::StringPtr>(env);
316 |   return subprocessSet.waitForSuccess(kj::mv(options));
317 | }
318 | 
319 | } // namespace blackrock
320 | 
321 | 


--------------------------------------------------------------------------------
/src/blackrock/cluster-rpc.capnp:
--------------------------------------------------------------------------------
  1 | # Sandstorm Blackrock
  2 | # Copyright (c) 2015 Sandstorm Development Group, Inc.
  3 | # All Rights Reserved
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | @0xf49ffd606012a28b;
 18 | 
 19 | $import "/capnp/c++.capnp".namespace("blackrock");
 20 | using GenericPersistent = import "/capnp/persistent.capnp".Persistent;
 21 | 
 22 | struct VatId {
 23 |   # Identifies a machine in the cluster.
 24 |   #
 25 |   # Note that "vats" are expected to be somewhat ephemeral, as machines may be rotated in and out
 26 |   # of the cluster on a regular basis. In particular, it is important that upon compromise of a
 27 |   # vat's private key, the machine can simply be wiped and restarted with a new key without
 28 |   # significant long-term damage. Vats should probably be cycled in this way on a regular basis
 29 |   # (perhaps, every few days) even if no compromise is known to have occurred.
 30 | 
 31 |   publicKey0 @0 :UInt64;
 32 |   publicKey1 @1 :UInt64;
 33 |   publicKey2 @2 :UInt64;
 34 |   publicKey3 @3 :UInt64;
 35 |   # The Vat's Curve25519 public key, interpreted as little-endian.
 36 | }
 37 | 
 38 | struct Address {
 39 |   # Address at which you might connect to a vat. Used for three-party hand-offs.
 40 |   #
 41 |   # Note that any vat that listens for connections on a port should also listen for unix domain
 42 |   # socket connections on the "abstract" name "sandstorm-<port>", so that other vats on the same machine
 43 |   # can connect via Unix sockets rather than IP.
 44 | 
 45 |   lower64 @0 :UInt64;
 46 |   upper64 @1 :UInt64;
 47 |   # Bits of the IPv6 address. Since IP is a big-endian spec, the "lower" bits are on the right, and
 48 |   # the "upper" bits on the left. E.g., if the address is "1:2:3:4:5:6:7:8", then the lower 64 bits
 49 |   # are "5:6:7:8" or 0x0005000600070008 while the upper 64 bits are "1:2:3:4" or 0x0001000200030004.
 50 |   #
 51 |   # Note that for an IPv4 address, according to the standard IPv4-mapped IPv6 address rules, you
 52 |   # would use code like this:
 53 |   #     uint32 ipv4 = (octet[0] << 24) | (octet[1] << 16) | (octet[2] << 8) | octet[3];
 54 |   #     dest.setLower64(0x0000FFFF00000000 | ipv4);
 55 |   #     dest.setUpper64(0);
 56 | 
 57 |   port @2 :UInt16;
 58 | }
 59 | 
 60 | struct VatPath {
 61 |   # Enough information to connect to a vat securely.
 62 | 
 63 |   id @0 :VatId;
 64 |   address @1 :Address;
 65 | }
 66 | 
 67 | struct SturdyRef {
 68 |   # Parameterization of SturdyRef for Sandstorm internal traffic.
 69 | 
 70 |   struct Owner {
 71 |     # Owner of a SturdyRef, for sealing purposes. See discussion of sealing in
 72 |     # import "/capnp/persistent.capnp".Persistent.
 73 | 
 74 |     union {
 75 |       vat @0 :VatId;
 76 |       # The domain of a single vat. Use this domain when saving refs in the vat's local storage.
 77 | 
 78 |       storage @1 :Void;
 79 |       # The domain of the storage system. Use when saving refs in long-term storage.
 80 | 
 81 |       coordinator @2 :Void;
 82 |       # The domain of the coordinators. Use when generating a `hosted` SturdyRef.
 83 | 
 84 |       gateway @3 :Void;
 85 |       # The domain of the gateways. Use when generating an `external` SturdyRef.
 86 | 
 87 |       frontend @4 :Void;
 88 |       # The domain of the front-end shell.
 89 |     }
 90 |   }
 91 | 
 92 |   union {
 93 |     transient @0 :Transient;
 94 |     stored @1 :Stored;
 95 |     hosted @2 :Hosted;
 96 |     external @3 :External;
 97 |   }
 98 | 
 99 |   struct Transient {
100 |     # Referece to an object hosted by some specific vat in the cluster, which will eventually
101 |     # become invalid when that vat is taken out of rotation.
102 | 
103 |     vat @0 :VatPath;
104 |     # The vat where the object is located.
105 | 
106 |     localRef @1 :AnyPointer;
107 |     # A SturdyRef in the format defined by the vat.
108 |   }
109 | 
110 |   struct Stored {
111 |     # Reference to an object in long-term storage.
112 | 
113 |     key0 @0 :UInt64;
114 |     key1 @1 :UInt64;
115 |     key2 @2 :UInt64;
116 |     key3 @3 :UInt64;
117 |     # 256-bit object key. This both identifies the object and may serve as a symmetric key for
118 |     # decrypting the object.
119 |   }
120 | 
121 |   struct Hosted {
122 |     # Reference to an object hosted within a grain.
123 | 
124 |     grainState @0 :Stored;
125 |     # Storage ID for an Assignable(GrainState) representing the grain.
126 |     #
127 |     # This stored object is sealed for coordinators, so that holding a SturdyRef to a capability
128 |     # hosted by some grain does not grant direct access to the grain's storage.
129 |     #
130 |     # TODO(soon): This doesn't work: there's no way for the coordinator to enforce the seal on
131 |     #   this ref, because the owner isn't stored anywhere. Possible solutions:
132 |     #   1) Use a reference to a wrapper object in storage owned by the coordinators, which itself
133 |     #      stores the actual object and Owner for enforcement. Problem: won't be cleaned up when
134 |     #      the grain is deleted.
135 |     #   2) Extend Persistent.save() to accept a tag which is returned later on load. Or have
136 |     #      it return the Owner on load, and we can make our Owner type include information about
137 |     #      who is allowed to invoke the coordinator. But note that remote entities and apps won't
138 |     #      be expected to maintain such storage.
139 | 
140 |     supervisorRef @1 :AnyPointer;
141 |     # A SturdyRef in the format defined by the Sandstorm supervisor.
142 |   }
143 | 
144 |   struct External {
145 |     # Reference to an object living outside the Sandstorm cluster.
146 | 
147 |     gatewayRef @0 :Stored;
148 |     # Reference to a stored Immutable(SturdyRef), where that SturdyRef is designed for use on
149 |     # the public internet. The stored object is sealed for the cluster's Cap'n Proto gateway
150 |     # machines.
151 |   }
152 | }
153 | 
154 | interface Persistent extends(GenericPersistent(SturdyRef, SturdyRef.Owner)) {}
155 | 
156 | interface Restorer(Ref) {
157 |   # Interface for restoring a SturdyRef.
158 |   #
159 |   # Every vat exports a bootstrap interface of type Restorer that handles refs of type `Transient`.
160 |   # (The interface is of type Restorer(LocalRefType), where LocalRefType is the type of
161 |   # Transient.localRef for this vat. Every vat may have a different local ref type.)
162 |   #
163 |   # For non-transient ref types, the Master is responsible for collecting Restorers from the
164 |   # machines currently assigned to handle that type and redistributing those to the machines that
165 |   # need them.
166 | 
167 |   restore @0 (sturdyRef :Ref) -> (cap :Capability);
168 |   drop @1 (sturdyRef :Ref);
169 | }
170 | 
171 | struct ProvisionId {
172 |   provider @0 :VatId;
173 |   # ID of the vat providing the capability (aka the introducer).
174 | 
175 |   nonce0 @1 :UInt64;
176 |   nonce1 @2 :UInt64;
177 |   # 128-bit nonce randomly chosen by the introducer.
178 | }
179 | 
180 | struct RecipientId {
181 |   recipient @0 :VatPath;
182 |   # ID of the vat receiving the capability.
183 | 
184 |   nonce0 @1 :UInt64;
185 |   nonce1 @2 :UInt64;
186 |   # 128-bit nonce randomly chosen by the introducer.
187 | }
188 | 
189 | struct ThirdPartyCapId {
190 |   provider @0 :VatPath;
191 |   # ID and path to the host of this capability.
192 | 
193 |   nonce0 @1 :UInt64;
194 |   nonce1 @2 :UInt64;
195 |   # 128-bit nonce randomly chosen by the introducer.
196 | }
197 | 
198 | struct JoinKeyPart {
199 |   # TODO(someday)
200 | }
201 | 
202 | struct JoinResult {
203 |   # TODO(someday)
204 | }
205 | 
206 | interface BackendSet(T) {
207 |   # Callbacks for notifying a machine of changes to its set of back-ends. When the master tells
208 |   # a machine to take on a role, the machine returns various BackendSets which the master then
209 |   # populates.
210 | 
211 |   reset @0 (backends :List(IdBackendPair));
212 |   # Drop the entire existing backend list and replace it with this new one. Called in particular at
213 |   # startup, or whenever the master has restarted. After reset(), previously-used ID values may
214 |   # be reused.
215 | 
216 |   struct IdBackendPair {
217 |     id @0 :UInt64;
218 |     backend @1 :T;
219 |   }
220 | 
221 |   add @1 (id :UInt64, backend :T);
222 |   # Add a new back-end.
223 | 
224 |   remove @2 (id :UInt64);
225 |   # Remove an existing back-end. The ID will NOT be reused for this set (unless reset() is first
226 |   # called).
227 |   #
228 |   # Note that we cannot identify the backend as a capability here because it may be down, in which
229 |   # case the receiver could never possibly figure out which existing backend in the set that it
230 |   # matched.
231 | }
232 | 
233 | # ========================================================================================
234 | # Transport Protocol
235 | #
236 | # We assume an underlying sequential datagram transport supporting:
237 | # - Reliable and ordered delivery.
238 | # - Arbitrary-size datagrams.
239 | # - Congestion cotrol.
240 | # - Peer identified by VatId (not by sending IP/port).
241 | # - At the admin's option, encryption for privacy and integrity. (This is optional because many
242 | #   Blackrock clusters may be on physically secure networks where encryption is not needed.)
243 | #
244 | # The simplest implementation of this protocol -- called "the simple protocol" -- is based on
245 | # unencrypted TCP, where we assume that the network infrastructure is secure enough to ensure
246 | # integrity and privacy when delivering packets. The protocol still uses crypto to authenticate
247 | # the connection upfront.
248 | #
249 | # TODO(security): The following protocol has not been reviewed by a crypto expert, and therefore
250 | #   may be totally stupid.
251 | #
252 | # In the simple protocol, a connection is initiated by sending the following header:
253 | # - 32 bytes: The sender's X25519 public key.
254 | # - 8 bytes: Connection number (little-endian). Each time a new connection is initiated from the
255 | #     sending vat to the same receiving vat, this number must increase. If the sender's public key
256 | #     is less than the receiver's, this number must be even, otherwise it must be odd, so that
257 | #     connection IDs in opposite directions between the same vats never collide. Any existing
258 | #     connections with lower connection IDs must be invalidated when a new connection starts.
259 | # - 8 bytes: minIgnoredConnection, the minimum connection number which the sender guarantees that
260 | #     it had not received at the time that it sent this message. The sender promises that it if
261 | #     later receives an incoming connection with this number or greater, but less than the
262 | #     connection number that the sender is initiating with this header, then it will reject any
263 | #     such connection without reading any messages from it. This gives the receiver of this header
264 | #     some assurance that if it had tried to form a connection previously and optimistically sent
265 | #     messages on it, it is safe to send those messages again.
266 | # - 16 bytes: poly1305 MAC of the above *plus* the sender's IPv6 address (or IPv6-mapped IPv4
267 | #     address) and port number (18 bytes). The key is constructed by taking the first 32 bytes of
268 | #     the ChaCha20 stream generated using the two vats' shared secret as a key, and the connection
269 | #     number as a nonce. The purpose of this MAC is to prevent an arbitrary node on the network
270 | #     from impersonating an arbitrary vat by simply sending its public key, which would otherwise
271 | #     be possible even assuming a secure physical network.
272 | #
273 | # Upon accepting a conneciton, the acceptor does the following:
274 | # - Wait for the header.
275 | # - Verify the header MAC (closing the connection immediately if invalid).
276 | # - If the connection number is less than that of any existing connection to the same remote vat --
277 | #   especially, one recently initiated in the opposite direction -- close it and do not continue.
278 | # - Send a reply header on the newly-accepted connection, which is similar to the received header
279 | #   except that it bears the accepting vat's public key and the connection number (and MAC nonce)
280 | #   is incremented by one. (Notice that this connection number could not possibly already have been
281 | #   used because of the previous step.)
282 | # - If there is any other outstanding connection to the same remote vat (with a lower number),
283 | #   close that other connection. If this vat had sent messages on said other connection but had not
284 | #   yet received any data (including the header) from the peer, then re-send those messages on the
285 | #   newly-accepted connection instead.
286 | #
287 | # Note that, for the initiator of the connection, between the time that the connection starts and
288 | # the time that the reply header is received, it is not yet known if the IP address connected to
289 | # really does correspond to the intended VatId. However, since the IP address was given to us by
290 | # the introducer, and the introducer could have introduced us to anybody, we can safely send
291 | # plaintext messages meant for the entity to whom we were introduced. The only problem is if
292 | # we receive another introduction for the same target VatId but a different IP/port pair in the
293 | # interim. In this case, we must wait until we've received the reply on our existing connection
294 | # authenticating it. If we receive no reply in a reasonable time, or we receive a bogus reply,
295 | # we must close the connection and create a new one with the new address. At this point we cannot
296 | # send *any* messages until the new connection comes back with a valid header, at which point we
297 | # can re-send the messages we had sent to the old connection.
298 | 


--------------------------------------------------------------------------------