├── .cargo └── config.toml ├── .config └── hakari.toml ├── .github ├── buildomat │ ├── config.toml │ └── jobs │ │ ├── build-release.sh │ │ ├── build.sh │ │ ├── test-ds.sh │ │ ├── test-live-repair.sh │ │ ├── test-memory.sh │ │ ├── test-region-create.sh │ │ ├── test-repair.sh │ │ ├── test-replay.sh │ │ ├── test-up-2region-encrypted.sh │ │ ├── test-up-encrypted.sh │ │ └── test-up-unencrypted.sh └── workflows │ ├── hakari.yml │ └── rust.yml ├── .gitignore ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── agent-antagonist ├── Cargo.toml └── src │ └── main.rs ├── agent-client ├── Cargo.toml └── src │ └── lib.rs ├── agent ├── Cargo.toml ├── downstairs_method_script.sh ├── smf │ ├── agent.xml │ └── downstairs.xml └── src │ ├── datafile.rs │ ├── main.rs │ ├── model.rs │ ├── server.rs │ ├── smf_interface.rs │ └── snapshot_interface.rs ├── aws_benchmark ├── README.md ├── ansible.cfg ├── args.sh ├── bench.sh ├── bring_up_resources.sh ├── cleanup.sh ├── cleanup.yml ├── crucible_simple_test_vpc │ ├── ec2.tf │ ├── helios_user_data.sh │ ├── outputs.tf │ ├── sg.tf │ ├── ubuntu_user_data.sh │ ├── variables.tf │ └── vpc.tf ├── downstairs.service.j2 ├── downstairs.xml ├── driver.sh ├── install_crucible.yml ├── inv.sh ├── main.tf ├── providers.tf └── run_benchmark.sh ├── cmon ├── Cargo.toml └── src │ └── main.rs ├── common ├── Cargo.toml ├── build.rs ├── proptest-regressions │ └── impacted_blocks.txt └── src │ ├── impacted_blocks.rs │ ├── lib.rs │ ├── region.rs │ └── x509.rs ├── control-client ├── Cargo.toml └── src │ └── lib.rs ├── crucible-client-types ├── Cargo.toml └── src │ └── lib.rs ├── crudd ├── Cargo.toml ├── README.md ├── src │ └── main.rs └── test.sh ├── crutest ├── Cargo.toml └── src │ ├── cli.rs │ ├── main.rs │ ├── protocol.rs │ └── stats.rs ├── downstairs ├── Cargo.toml ├── build.rs └── src │ ├── admin.rs │ ├── complete_jobs.rs │ ├── dump.rs │ ├── dynamometer.rs │ ├── extent.rs │ ├── extent_inner_raw.rs │ ├── extent_inner_raw_common.rs │ ├── extent_inner_sqlite.rs │ ├── lib.rs │ ├── main.rs │ ├── region.rs │ ├── repair.rs │ └── stats.rs ├── dsc-client ├── Cargo.toml └── src │ └── lib.rs ├── dsc ├── Cargo.toml └── src │ ├── client.rs │ ├── control.rs │ └── main.rs ├── fio ├── README.md ├── cumulative_points_over_time.py ├── fio.sh ├── generate_crucible_fio.py ├── hist.py └── plt.plt ├── hammer ├── Cargo.toml └── src │ ├── README.md │ ├── hammer.c │ └── main.rs ├── integration_tests ├── Cargo.toml └── src │ └── lib.rs ├── measure_iops ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── nbd_server ├── Cargo.toml └── src │ ├── README.md │ └── main.rs ├── openapi ├── README.md ├── crucible-agent.json ├── crucible-control.json ├── crucible-pantry.json ├── downstairs-repair.json └── dsc-control.json ├── package-manifest.toml ├── package ├── Cargo.toml ├── README.md └── src │ └── main.rs ├── pantry-client ├── Cargo.toml └── src │ └── lib.rs ├── pantry ├── Cargo.toml ├── smf │ └── pantry.xml └── src │ ├── lib.rs │ ├── main.rs │ ├── pantry.rs │ └── server.rs ├── protocol ├── Cargo.toml └── src │ └── lib.rs ├── renovate.json ├── repair-client ├── Cargo.toml └── src │ └── lib.rs ├── rust-toolchain.toml ├── rustfmt.toml ├── smf ├── Cargo.toml ├── examples │ └── info.rs └── src │ ├── instance.rs │ ├── lib.rs │ ├── property.rs │ ├── propertygroup.rs │ ├── scf_sys.rs │ ├── scope.rs │ ├── service.rs │ ├── snapshot.rs │ ├── transaction.rs │ └── value.rs ├── tools ├── README.md ├── crudd-speed-battery.sh ├── dtrace │ ├── README.md │ ├── all_downstairs.d │ ├── downstairs_count.d │ ├── get-ds-state.d │ ├── get-ds-state.sh │ ├── get-lr-state.d │ ├── get-lr-state.sh │ ├── get-up-state.d │ ├── get-up-state.sh │ ├── perf-downstairs-finegrain-extent-timings.d │ ├── perf-downstairs-os.d │ ├── perf-downstairs-three.d │ ├── perf-downstairs-tick.d │ ├── perf-downstairs.d │ ├── perf-ds-client.d │ ├── perf-ds-net.d │ ├── perf-online-repair.d │ ├── perf-reqwest.d │ ├── perf-upstairs-wf.d │ ├── perf-vol.d │ ├── perfgw.d │ ├── simple.d │ ├── single_up_info.d │ ├── sled_upstairs_info.d │ ├── trace-vol.d │ ├── tracegw.d │ ├── up-info.d │ ├── upstairs_action.d │ ├── upstairs_count.d │ ├── upstairs_info.d │ ├── upstairs_raw.d │ └── upstairs_repair.d ├── hammer_loop.sh ├── loop-double-repair.sh ├── loop-repair.sh ├── make-dtrace.sh ├── make-nightly.sh ├── show_ox_propolis.sh ├── show_ox_stats.sh ├── show_ox_upstairs.sh ├── test_ds.sh ├── test_dsc.sh ├── test_fail_live_repair.sh ├── test_live_repair.sh ├── test_mem.sh ├── test_nightly.sh ├── test_read_only.sh ├── test_reconnect.sh ├── test_repair.sh ├── test_repair_perf.sh ├── test_replace_special.sh ├── test_replay.sh ├── test_restart_repair.sh └── test_up.sh ├── upstairs ├── Cargo.toml ├── build.rs ├── proptest-regressions │ ├── active_jobs.txt │ └── impacted_blocks.txt └── src │ ├── active_jobs.rs │ ├── block_io.rs │ ├── block_req.rs │ ├── buffer.rs │ ├── client.rs │ ├── control.rs │ ├── deferred.rs │ ├── downstairs.rs │ ├── dummy_downstairs_tests.rs │ ├── guest.rs │ ├── in_memory.rs │ ├── io_limits.rs │ ├── lib.rs │ ├── live_repair.rs │ ├── mend.rs │ ├── notify.rs │ ├── pseudo_file.rs │ ├── stats.rs │ ├── test.rs │ ├── upstairs.rs │ └── volume.rs ├── workspace-hack ├── .gitattributes ├── Cargo.toml ├── build.rs └── src │ └── lib.rs └── x509 ├── Makefile ├── README.md └── gen_certs.sh /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | # Workaround to enable this lint for all packages in the workspace 3 | # 4 | # Once https://github.com/rust-lang/cargo/issues/12115 makes it to our 5 | # toolchain, we'll be able to put this in the `Cargo.toml` manifest instead. 6 | rustflags = ["-Wclippy::unused-async"] 7 | -------------------------------------------------------------------------------- /.config/hakari.toml: -------------------------------------------------------------------------------- 1 | # This file contains settings for `cargo hakari`. 2 | # See https://docs.rs/cargo-hakari/latest/cargo_hakari/config for a full list of options. 3 | 4 | hakari-package = "crucible-workspace-hack" 5 | 6 | # Format version for hakari's output. Version 4 requires cargo-hakari 0.9.22 or above. 7 | dep-format-version = "4" 8 | 9 | workspace-hack-line-style = "workspace-dotted" 10 | 11 | # Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended. 12 | # Hakari works much better with the new feature resolver. 13 | # For more about the new feature resolver, see: 14 | # https://blog.rust-lang.org/2021/03/25/Rust-1.51.0.html#cargos-new-feature-resolver 15 | resolver = "2" 16 | 17 | # Add triples corresponding to platforms commonly used by developers here. 18 | # https://doc.rust-lang.org/rustc/platform-support.html 19 | platforms = [ 20 | "x86_64-unknown-linux-gnu", 21 | "aarch64-apple-darwin", 22 | "x86_64-unknown-illumos", 23 | # "x86_64-pc-windows-msvc", 24 | ] 25 | 26 | # Write out exact versions rather than a semver range. (Defaults to false.) 27 | # exact-versions = true 28 | -------------------------------------------------------------------------------- /.github/buildomat/config.toml: -------------------------------------------------------------------------------- 1 | # 2 | # This file, with this flag, must be present in the default branch in order for 3 | # the buildomat integration to create check suites. 4 | # 5 | enable = true 6 | 7 | # 8 | # Require approval for pull requests made by users outside our organisation. 9 | # 10 | org_only = true 11 | 12 | # 13 | # We accept pull requests from several automated services that are outside the 14 | # organisation. Allow jobs from those sources to proceed without manual 15 | # approval: 16 | # 17 | allow_users = [ 18 | "dependabot[bot]", 19 | "renovate[bot]", 20 | ] 21 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/build-release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "rbuild" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: rust_toolchain = "1.84.0" 7 | #: output_rules = [ 8 | #: "/out/*", 9 | #: "/work/rbins/*", 10 | #: "/work/scripts/*", 11 | #: "/tmp/core.*", 12 | #: "/tmp/*.log", 13 | #: ] 14 | #: 15 | #: [[publish]] 16 | #: series = "nightly-image" 17 | #: name = "crucible-nightly.tar.gz" 18 | #: from_output = "/out/crucible-nightly.tar.gz" 19 | #: 20 | #: [[publish]] 21 | #: series = "nightly-image" 22 | #: name = "crucible-nightly.sha256.txt" 23 | #: from_output = "/out/crucible-nightly.sha256.txt" 24 | #: 25 | #: [[publish]] 26 | #: series = "image" 27 | #: name = "crucible.tar.gz" 28 | #: from_output = "/out/crucible.tar.gz" 29 | #: 30 | #: [[publish]] 31 | #: series = "image" 32 | #: name = "crucible.sha256.txt" 33 | #: from_output = "/out/crucible.sha256.txt" 34 | #: 35 | #: [[publish]] 36 | #: series = "image" 37 | #: name = "crucible-pantry.tar.gz" 38 | #: from_output = "/out/crucible-pantry.tar.gz" 39 | #: 40 | #: [[publish]] 41 | #: series = "image" 42 | #: name = "crucible-pantry.sha256.txt" 43 | #: from_output = "/out/crucible-pantry.sha256.txt" 44 | #: 45 | #: [[publish]] 46 | #: series = "image" 47 | #: name = "crucible-dtrace.tar" 48 | #: from_output = "/out/crucible-dtrace.tar" 49 | #: 50 | #: [[publish]] 51 | #: series = "image" 52 | #: name = "crucible-dtrace.sha256.txt" 53 | #: from_output = "/out/crucible-dtrace.sha256.txt" 54 | #: 55 | 56 | set -o errexit 57 | set -o pipefail 58 | set -o xtrace 59 | 60 | cargo --version 61 | rustc --version 62 | 63 | banner cores 64 | pfexec coreadm -i /tmp/core.%f.%p \ 65 | -g /tmp/core.%f.%p \ 66 | -e global \ 67 | -e log \ 68 | -e proc-setid \ 69 | -e global-setid 70 | 71 | banner rbuild 72 | ptime -m cargo build --verbose --release --all-features 73 | 74 | banner rtest 75 | ptime -m cargo test --verbose --features=omicron-build -- --nocapture > /tmp/cargo-test-out.log 2>&1 76 | 77 | banner output 78 | mkdir -p /work/rbins 79 | for t in crucible-downstairs crucible-hammer crutest dsc crudd; do 80 | gzip < "target/release/$t" > "/work/rbins/$t.gz" 81 | done 82 | 83 | mkdir -p /work/scripts 84 | for s in tools/crudd-speed-battery.sh tools/dtrace/perf-downstairs-tick.d tools/dtrace/upstairs_info.d tools/test_mem.sh; do 85 | cp "$s" /work/scripts/ 86 | done 87 | 88 | # Make the top level /out directory 89 | pfexec mkdir -p /out 90 | pfexec chown "$UID" /out 91 | 92 | # Make the crucible package images 93 | banner image 94 | ptime -m cargo run --bin crucible-package 95 | 96 | banner contents 97 | tar tvfz out/crucible.tar.gz 98 | tar tvfz out/crucible-pantry.tar.gz 99 | mv out/crucible.tar.gz out/crucible-pantry.tar.gz /out/ 100 | 101 | # Build the nightly archive file which should include all the scripts 102 | # and binaries needed to run the nightly test. 103 | # This needs the ./out directory created above 104 | banner nightly 105 | ./tools/make-nightly.sh 106 | 107 | banner copy 108 | mv out/crucible-nightly.tar.gz /out/crucible-nightly.tar.gz 109 | 110 | # Build the dtrace archive file which should include all the dtrace scripts. 111 | # This needs the ./out directory created above 112 | banner dtrace 113 | ./tools/make-dtrace.sh 114 | 115 | banner copy 116 | mv out/crucible-dtrace.tar /out/crucible-dtrace.tar 117 | 118 | banner checksum 119 | cd /out 120 | digest -a sha256 crucible.tar.gz > crucible.sha256.txt 121 | digest -a sha256 crucible-pantry.tar.gz > crucible-pantry.sha256.txt 122 | digest -a sha256 crucible-nightly.tar.gz > crucible-nightly.sha256.txt 123 | digest -a sha256 crucible-dtrace.tar > crucible-dtrace.sha256.txt 124 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "build" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: rust_toolchain = "1.84.0" 7 | #: output_rules = [ 8 | #: "/work/bins/*", 9 | #: "/work/scripts/*", 10 | #: "/tmp/core.*", 11 | #: "/tmp/*.log", 12 | #: ] 13 | #: 14 | 15 | set -o errexit 16 | set -o pipefail 17 | set -o xtrace 18 | 19 | cargo --version 20 | rustc --version 21 | 22 | banner cores 23 | pfexec coreadm -i /tmp/core.%f.%p \ 24 | -g /tmp/core.%f.%p \ 25 | -e global \ 26 | -e log \ 27 | -e proc-setid \ 28 | -e global-setid 29 | 30 | banner build 31 | ptime -m cargo build --verbose --all-features 32 | 33 | banner output 34 | 35 | mkdir -p /work/bins 36 | for t in crucible-downstairs crucible-hammer crutest dsc; do 37 | gzip < "target/debug/$t" > "/work/bins/$t.gz" 38 | done 39 | 40 | mkdir -p /work/scripts 41 | for s in tools/test_live_repair.sh tools/test_repair.sh tools/test_up.sh \ 42 | tools/test_ds.sh tools/test_replay.sh tools/dtrace/upstairs_info.d \ 43 | tools/dtrace/perf-downstairs-tick.d; do 44 | cp "$s" /work/scripts/ 45 | done 46 | 47 | echo in_work_scripts 48 | ls -l /work/scripts 49 | echo in_work_bins 50 | ls -l /work/bins 51 | 52 | banner test 53 | ptime -m cargo test --verbose --features=omicron-build -- --nocapture > /tmp/cargo-test-out.log 2>&1 54 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/test-ds.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "test-ds" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: output_rules = [ 7 | #: "/tmp/*.txt", 8 | #: "/tmp/core.*", 9 | #: ] 10 | #: skip_clone = true 11 | #: 12 | #: [dependencies.build] 13 | #: job = "build" 14 | 15 | input="/input/build/work" 16 | 17 | set -o errexit 18 | set -o pipefail 19 | set -o xtrace 20 | 21 | banner cores 22 | pfexec coreadm -i /tmp/core.%f.%p \ 23 | -g /tmp/core.%f.%p \ 24 | -e global \ 25 | -e log \ 26 | -e proc-setid \ 27 | -e global-setid 28 | 29 | echo "input bins dir contains:" 30 | ls -ltr "$input"/bins || true 31 | 32 | banner unpack 33 | mkdir -p /var/tmp/bins 34 | for t in "$input/bins/"*.gz; do 35 | b=$(basename "$t") 36 | b=${b%.gz} 37 | gunzip < "$t" > "/var/tmp/bins/$b" 38 | chmod +x "/var/tmp/bins/$b" 39 | done 40 | 41 | export BINDIR=/var/tmp/bins 42 | export RUST_BACKTRACE=1 43 | 44 | banner test_ds 45 | ptime -m bash "$input/scripts/test_ds.sh" 46 | 47 | # Save the output files? 48 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/test-live-repair.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "test-live-repair" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: output_rules = [ 7 | #: "/tmp/*.txt", 8 | #: "/tmp/*.log", 9 | #: "%/tmp/debug/*", 10 | #: "/tmp/core.*", 11 | #: "/tmp/dsc/*.txt", 12 | #: "/tmp/dsc.tar", 13 | #: ] 14 | #: skip_clone = true 15 | #: 16 | #: [dependencies.build] 17 | #: job = "build" 18 | 19 | # 20 | # If we fail, try to collect some debugging information 21 | # 22 | _exit_trap() { 23 | local status=$? 24 | [[ $status -eq 0 ]] && exit 0 25 | 26 | set +o errexit 27 | set -o xtrace 28 | sleep 5 29 | banner evidence 30 | 31 | CORES=$(ls /tmp/core*) 32 | for c in $CORES; do 33 | echo "Stack for Core file $c" 34 | pfexec pstack "$c" 35 | done 36 | 37 | tar cf /tmp/dsc.tar /var/tmp/dsc/region 38 | 39 | echo "Final region compare" 40 | $BINDIR/crucible-downstairs dump \ 41 | -d /var/tmp/dsc/region/8810 \ 42 | -d /var/tmp/dsc/region/8820 \ 43 | -d /var/tmp/dsc/region/8830 44 | 45 | exit $status 46 | } 47 | 48 | trap _exit_trap EXIT 49 | 50 | input="/input/build/work" 51 | 52 | set -o errexit 53 | set -o pipefail 54 | set -o xtrace 55 | 56 | banner cores 57 | pfexec coreadm -i /tmp/core.%f.%p \ 58 | -g /tmp/core.%f.%p \ 59 | -e global \ 60 | -e log \ 61 | -e proc-setid \ 62 | -e global-setid 63 | 64 | pfexec chmod +x "$input"/scripts/* || true 65 | 66 | echo "input bins dir contains:" 67 | ls -ltr "$input"/bins || true 68 | echo "input script dir contains:" 69 | ls -ltr "$input"/scripts || true 70 | 71 | banner Unpack 72 | mkdir -p /var/tmp/bins 73 | for t in "$input/bins/"*.gz; do 74 | b=$(basename "$t") 75 | b=${b%.gz} 76 | gunzip < "$t" > "/var/tmp/bins/$b" 77 | chmod +x "/var/tmp/bins/$b" 78 | done 79 | 80 | export BINDIR=/var/tmp/bins 81 | export RUST_BACKTRACE=1 82 | 83 | echo "BINDIR is $BINDIR" 84 | echo "bindir contains:" 85 | ls -ltr "$BINDIR" || true 86 | 87 | banner CreateDS 88 | echo $BINDIR/dsc create \ 89 | --ds-bin "$BINDIR"/crucible-downstairs \ 90 | --extent-size 4000 \ 91 | --extent-count 200 \ 92 | --region-count 4 \ 93 | --cleanup 94 | $BINDIR/dsc create \ 95 | --ds-bin "$BINDIR"/crucible-downstairs \ 96 | --extent-size 4000 \ 97 | --extent-count 200 \ 98 | --region-count 4 \ 99 | --cleanup 100 | 101 | banner StartDS 102 | $BINDIR/dsc start \ 103 | --ds-bin "$BINDIR"/crucible-downstairs \ 104 | --region-count 4 >> /tmp/dsc.log 2>&1 & 105 | 106 | # This gives dsc time to fail, as it is known to happen. If we don't check, 107 | # then the later test will just hang forever waiting for downstairs that 108 | # will never show up. 109 | sleep 5 110 | dsc_pid=$(pgrep dsc); 111 | 112 | if [[ "$dsc_pid" -eq 0 ]]; then 113 | echo "dsc_pid is zero, which is bad, exit" 114 | cat /tmp/dsc.log || true 115 | exit 1 116 | fi 117 | 118 | if ps -p "$dsc_pid"; then 119 | echo "Found dsc running, continue tests" 120 | else 121 | echo "dsc failed" 122 | cat /tmp/dsc.log || true 123 | exit 1 124 | fi 125 | 126 | echo "Setup self timeout" 127 | # Give this test two hours to finish 128 | jobpid=$$; (sleep $(( 120 * 60 )); banner fail-timeout; ps -ef; zfs list;kill $jobpid) & 129 | 130 | echo "Setup debug logging" 131 | mkdir /tmp/debug 132 | psrinfo -v > /tmp/debug/psrinfo.txt 133 | df -h > /tmp/debug/df.txt || true 134 | prstat -d d -mLc 1 > /tmp/debug/prstat.txt 2>&1 & 135 | iostat -T d -xn 1 > /tmp/debug/iostat.txt 2>&1 & 136 | mpstat -T d 1 > /tmp/debug/mpstat.txt 2>&1 & 137 | vmstat -T d -p 1 < /dev/null > /tmp/debug/paging.txt 2>&1 & 138 | pfexec dtrace -Z -s $input/scripts/perf-downstairs-tick.d > /tmp/debug/dtrace.txt 2>&1 & 139 | pfexec dtrace -Z -s $input/scripts/upstairs_info.d > /tmp/debug/upstairs-info.txt 2>&1 & 140 | 141 | banner LR 142 | ptime -m "$BINDIR"/crutest replace \ 143 | -t 127.0.0.1:8810 -t 127.0.0.1:8820 -t 127.0.0.1:8830 \ 144 | --replacement 127.0.0.1:8840 \ 145 | -g 1 -c 10 --stable > /tmp/crutest-replace.log 2>&1 146 | 147 | banner StopDSC 148 | $BINDIR/dsc cmd shutdown 149 | 150 | banner WaitStop 151 | wait "$dsc_pid" 152 | 153 | # Save the output files? 154 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/test-memory.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "test-memory" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: output_rules = [ 7 | #: "/tmp/*.txt", 8 | #: "/tmp/*.log", 9 | #: "%/tmp/debug/*.txt", 10 | #: "%/tmp/dsc/*.txt", 11 | #: "/tmp/core.*", 12 | #: ] 13 | #: skip_clone = true 14 | #: 15 | #: [dependencies.rbuild] 16 | #: job = "rbuild" 17 | 18 | input="/input/rbuild/work" 19 | 20 | set -o errexit 21 | set -o pipefail 22 | set -o xtrace 23 | 24 | banner cores 25 | pfexec coreadm -i /tmp/core.%f.%p \ 26 | -g /tmp/core.%f.%p \ 27 | -e global \ 28 | -e log \ 29 | -e proc-setid \ 30 | -e global-setid 31 | 32 | banner unpack 33 | mkdir -p /var/tmp/bins 34 | for t in "$input/rbins/"*.gz; do 35 | b=$(basename "$t") 36 | b=${b%.gz} 37 | gunzip < "$t" > "/var/tmp/bins/$b" 38 | chmod +x "/var/tmp/bins/$b" 39 | done 40 | 41 | export BINDIR=/var/tmp/bins 42 | export RUST_BACKTRACE=1 43 | 44 | banner setup 45 | pfexec plimit -n 9123456 $$ 46 | 47 | echo "Setup self timeout" 48 | # one hour should be enough 49 | jobpid=$$; (sleep 3600; banner fail-timeout; ps -ef; zfs list;kill $jobpid) & 50 | 51 | echo "Setup debug logging" 52 | mkdir /tmp/debug 53 | psrinfo -v > /tmp/debug/psrinfo.txt 54 | df -h > /tmp/debug/df.txt || true 55 | prstat -d d -mLc 1 > /tmp/debug/prstat.txt 2>&1 & 56 | iostat -T d -xn 1 > /tmp/debug/iostat.txt 2>&1 & 57 | mpstat -T d 1 > /tmp/debug/mpstat.txt 2>&1 & 58 | vmstat -T d -p 1 < /dev/null > /tmp/debug/paging.txt 2>&1 & 59 | pfexec dtrace -Z -s $input/scripts/perf-downstairs-tick.d > /tmp/debug/perf.txt 2>&1 & 60 | pfexec dtrace -Z -s $input/scripts/upstairs_info.d > /tmp/debug/upinfo.txt 2>&1 & 61 | 62 | banner 512-memtest 63 | ptime -m bash $input/scripts/test_mem.sh -b 512 -e 131072 -c 160 64 | banner 4k-memtest 65 | ptime -m bash $input/scripts/test_mem.sh -b 4096 -e 16384 -c 160 66 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/test-region-create.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "test-region-create" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: output_rules = [ 7 | #: "/tmp/region.csv", 8 | #: "/tmp/core.*", 9 | #: ] 10 | #: skip_clone = true 11 | #: 12 | #: [dependencies.rbuild] 13 | #: job = "rbuild" 14 | 15 | input="/input/rbuild/work" 16 | 17 | set -o errexit 18 | set -o pipefail 19 | set -o xtrace 20 | 21 | banner cores 22 | pfexec coreadm -i /tmp/core.%f.%p \ 23 | -g /tmp/core.%f.%p \ 24 | -e global \ 25 | -e log \ 26 | -e proc-setid \ 27 | -e global-setid 28 | 29 | echo "input rbins dir contains:" 30 | ls -ltr "$input"/rbins || true 31 | 32 | banner unpack 33 | mkdir -p /var/tmp/bins 34 | for t in "$input/rbins/"*.gz; do 35 | b=$(basename "$t") 36 | b=${b%.gz} 37 | gunzip < "$t" > "/var/tmp/bins/$b" 38 | chmod +x "/var/tmp/bins/$b" 39 | done 40 | 41 | export BINDIR=/var/tmp/bins 42 | export RUST_BACKTRACE=1 43 | 44 | banner region 45 | pfexec plimit -n 9123456 $$ 46 | 47 | ptime -m "$BINDIR/dsc" region-perf \ 48 | --ds-bin "${BINDIR}/crucible-downstairs" \ 49 | --csv-out /tmp/region.csv 50 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/test-repair.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "test-repair" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: output_rules = [ 7 | #: "%/tmp/*.txt", 8 | #: "%/tmp/debug/*", 9 | #: "/tmp/core.*", 10 | #: ] 11 | #: skip_clone = true 12 | #: 13 | #: [dependencies.build] 14 | #: job = "build" 15 | 16 | input="/input/build/work" 17 | 18 | set -o errexit 19 | set -o pipefail 20 | set -o xtrace 21 | 22 | banner cores 23 | pfexec coreadm -i /tmp/core.%f.%p \ 24 | -g /tmp/core.%f.%p \ 25 | -e global \ 26 | -e log \ 27 | -e proc-setid \ 28 | -e global-setid 29 | 30 | echo "input bins dir contains:" 31 | ls -ltr "$input"/bins || true 32 | echo "input script dir contains:" 33 | ls -ltr "$input"/scripts || true 34 | pfexec chmod +x "$input"/scripts/* || true 35 | echo " chmod input script dir contains:" 36 | ls -ltr "$input"/scripts || true 37 | 38 | banner unpack 39 | mkdir -p /var/tmp/bins 40 | for t in "$input/bins/"*.gz; do 41 | b=$(basename "$t") 42 | b=${b%.gz} 43 | gunzip < "$t" > "/var/tmp/bins/$b" 44 | chmod +x "/var/tmp/bins/$b" 45 | done 46 | 47 | export BINDIR=/var/tmp/bins 48 | export RUST_BACKTRACE=1 49 | 50 | echo "Setup self timeout" 51 | # Give this test two hours to finish 52 | jobpid=$$; (sleep $(( 120 * 60 )); banner fail-timeout; ps -ef; zfs list;kill $jobpid) & 53 | 54 | echo "Setup debug logging" 55 | mkdir /tmp/debug 56 | psrinfo -v > /tmp/debug/psrinfo.txt 57 | df -h > /tmp/debug/df.txt || true 58 | prstat -d d -mLc 1 > /tmp/debug/prstat.txt 2>&1 & 59 | iostat -T d -xn 1 > /tmp/debug/iostat.txt 2>&1 & 60 | mpstat -T d 1 > /tmp/debug/mpstat.txt 2>&1 & 61 | vmstat -T d -p 1 < /dev/null > /tmp/debug/paging.txt 2>&1 & 62 | pfexec dtrace -Z -s $input/scripts/perf-downstairs-tick.d > /tmp/debug/dtrace.txt 2>&1 & 63 | pfexec dtrace -Z -s $input/scripts/upstairs_info.d > /tmp/debug/upstairs-info.txt 2>&1 & 64 | 65 | banner repair 66 | ptime -m bash "$input/scripts/test_repair.sh" "-N" > /tmp/test-repair-out.txt 2>&1 67 | 68 | echo "Test repair finished with $?" 69 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/test-replay.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "test-replay" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: output_rules = [ 7 | #: "/tmp/*.txt", 8 | #: "/tmp/*.log", 9 | #: "%/tmp/debug/*.txt", 10 | #: "%/tmp/dsc/*.txt", 11 | #: "/tmp/core.*", 12 | #: ] 13 | #: skip_clone = true 14 | #: 15 | #: [dependencies.build] 16 | #: job = "build" 17 | 18 | input="/input/build/work" 19 | 20 | set -o errexit 21 | set -o pipefail 22 | set -o xtrace 23 | 24 | banner cores 25 | pfexec coreadm -i /tmp/core.%f.%p \ 26 | -g /tmp/core.%f.%p \ 27 | -e global \ 28 | -e log \ 29 | -e proc-setid \ 30 | -e global-setid 31 | 32 | pfexec chmod +x "$input"/scripts/* || true 33 | echo " chmod input script dir contains:" 34 | ls -ltr "$input"/scripts || true 35 | 36 | banner unpack 37 | mkdir -p /var/tmp/bins 38 | for t in "$input/bins/"*.gz; do 39 | b=$(basename "$t") 40 | b=${b%.gz} 41 | gunzip < "$t" > "/var/tmp/bins/$b" 42 | chmod +x "/var/tmp/bins/$b" 43 | done 44 | 45 | export BINDIR=/var/tmp/bins 46 | export RUST_BACKTRACE=1 47 | banner setup 48 | 49 | echo "Setup self timeout" 50 | # Three hours should be enough 51 | jobpid=$$; (sleep 10800; banner fail-timeout; ps -ef; zfs list;kill $jobpid) & 52 | 53 | echo "Setup debug logging" 54 | mkdir /tmp/debug 55 | psrinfo -v > /tmp/debug/psrinfo.txt 56 | df -h > /tmp/debug/df.txt || true 57 | prstat -d d -mLc 1 > /tmp/debug/prstat.txt 2>&1 & 58 | iostat -T d -xn 1 > /tmp/debug/iostat.txt 2>&1 & 59 | mpstat -T d 1 > /tmp/debug/mpstat.txt 2>&1 & 60 | vmstat -T d -p 1 < /dev/null > /tmp/debug/paging.txt 2>&1 & 61 | pfexec dtrace -Z -s $input/scripts/perf-downstairs-tick.d > /tmp/debug/perf.txt 2>&1 & 62 | pfexec dtrace -Z -s $input/scripts/upstairs_info.d > /tmp/debug/upinfo.txt 2>&1 & 63 | 64 | banner replay 65 | ptime -m bash "$input/scripts/test_replay.sh" 66 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/test-up-2region-encrypted.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "test-up-2region-encrypted" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: output_rules = [ 7 | #: "%/tmp/test_up*/*.txt", 8 | #: "%/tmp/test_up*/dsc/*.txt", 9 | #: "%/tmp/debug/*", 10 | #: "/tmp/core.*", 11 | #: ] 12 | #: skip_clone = true 13 | #: 14 | #: [dependencies.build] 15 | #: job = "build" 16 | 17 | input="/input/build/work" 18 | 19 | set -o errexit 20 | set -o pipefail 21 | set -o xtrace 22 | 23 | banner cores 24 | pfexec coreadm -i /tmp/core.%f.%p \ 25 | -g /tmp/core.%f.%p \ 26 | -e global \ 27 | -e log \ 28 | -e proc-setid \ 29 | -e global-setid 30 | 31 | banner unpack 32 | mkdir -p /var/tmp/bins 33 | for t in "$input/bins/"*.gz; do 34 | b=$(basename "$t") 35 | b=${b%.gz} 36 | gunzip < "$t" > "/var/tmp/bins/$b" 37 | chmod +x "/var/tmp/bins/$b" 38 | done 39 | 40 | export BINDIR=/var/tmp/bins 41 | 42 | # Give this test one hour to finish 43 | jobpid=$$; (sleep $(( 60 * 60 )); banner fail-timeout; ps -ef; zfs list;kill $jobpid) & 44 | 45 | echo "Setup debug logging" 46 | mkdir /tmp/debug 47 | psrinfo -v > /tmp/debug/psrinfo.txt 48 | df -h > /tmp/debug/df.txt || true 49 | prstat -d d -mLc 1 > /tmp/debug/prstat.txt 2>&1 & 50 | iostat -T d -xn 1 > /tmp/debug/iostat.txt 2>&1 & 51 | mpstat -T d 1 > /tmp/debug/mpstat.txt 2>&1 & 52 | vmstat -T d -p 1 < /dev/null > /tmp/debug/paging.txt 2>&1 & 53 | pfexec dtrace -Z -s $input/scripts/perf-downstairs-tick.d > /tmp/debug/dtrace.txt 2>&1 & 54 | pfexec dtrace -Z -s $input/scripts/upstairs_info.d > /tmp/debug/upstairs-info.txt 2>&1 & 55 | 56 | banner test_up_2r_encrypted 57 | ptime -m bash "$input/scripts/test_up.sh" -r 2 -N encrypted 58 | 59 | echo "test-up-2region-encrypted ends" 60 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/test-up-encrypted.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "test-up-encrypted" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: output_rules = [ 7 | #: "%/tmp/test_up*/*.txt", 8 | #: "%/tmp/test_up*/dsc/*.txt", 9 | #: "%/tmp/debug/*", 10 | #: "/tmp/core.*", 11 | #: ] 12 | #: skip_clone = true 13 | #: 14 | #: [dependencies.build] 15 | #: job = "build" 16 | 17 | input="/input/build/work" 18 | 19 | set -o errexit 20 | set -o pipefail 21 | set -o xtrace 22 | 23 | banner cores 24 | pfexec coreadm -i /tmp/core.%f.%p \ 25 | -g /tmp/core.%f.%p \ 26 | -e global \ 27 | -e log \ 28 | -e proc-setid \ 29 | -e global-setid 30 | 31 | echo "input bins dir contains:" 32 | ls -ltr "$input"/bins || true 33 | 34 | banner unpack 35 | mkdir -p /var/tmp/bins 36 | for t in "$input/bins/"*.gz; do 37 | b=$(basename "$t") 38 | b=${b%.gz} 39 | gunzip < "$t" > "/var/tmp/bins/$b" 40 | chmod +x "/var/tmp/bins/$b" 41 | done 42 | 43 | export BINDIR=/var/tmp/bins 44 | export RUST_BACKTRACE=1 45 | 46 | # Give this test one hour to finish 47 | jobpid=$$; (sleep $(( 60 * 60 )); banner fail-timeout; ps -ef; zfs list;kill $jobpid) & 48 | 49 | echo "Setup debug logging" 50 | mkdir /tmp/debug 51 | psrinfo -v > /tmp/debug/psrinfo.txt 52 | df -h > /tmp/debug/df.txt || true 53 | prstat -d d -mLc 1 > /tmp/debug/prstat.txt 2>&1 & 54 | iostat -T d -xn 1 > /tmp/debug/iostat.txt 2>&1 & 55 | mpstat -T d 1 > /tmp/debug/mpstat.txt 2>&1 & 56 | vmstat -T d -p 1 < /dev/null > /tmp/debug/paging.txt 2>&1 & 57 | pfexec dtrace -Z -s $input/scripts/perf-downstairs-tick.d > /tmp/debug/dtrace.txt 2>&1 & 58 | pfexec dtrace -Z -s $input/scripts/upstairs_info.d > /tmp/debug/upstairs-info.txt 2>&1 & 59 | 60 | banner test_up_encrypted 61 | ptime -m bash "$input/scripts/test_up.sh" -N encrypted 62 | 63 | echo "test-up-encrypted ends" 64 | -------------------------------------------------------------------------------- /.github/buildomat/jobs/test-up-unencrypted.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #: 3 | #: name = "test-up-unencrypted" 4 | #: variety = "basic" 5 | #: target = "helios-2.0" 6 | #: output_rules = [ 7 | #: "%/tmp/test_up*/*.txt", 8 | #: "%/tmp/test_up*/dsc/*.txt", 9 | #: "%/tmp/debug/*", 10 | #: "/tmp/core.*", 11 | #: ] 12 | #: skip_clone = true 13 | #: 14 | #: [dependencies.build] 15 | #: job = "build" 16 | 17 | input="/input/build/work" 18 | 19 | set -o errexit 20 | set -o pipefail 21 | set -o xtrace 22 | 23 | banner cores 24 | pfexec coreadm -i /tmp/core.%f.%p \ 25 | -g /tmp/core.%f.%p \ 26 | -e global \ 27 | -e log \ 28 | -e proc-setid \ 29 | -e global-setid 30 | 31 | echo "input bins dir contains:" 32 | ls -ltr "$input"/bins || true 33 | 34 | banner unpack 35 | mkdir -p /var/tmp/bins 36 | for t in "$input/bins/"*.gz; do 37 | b=$(basename "$t") 38 | b=${b%.gz} 39 | gunzip < "$t" > "/var/tmp/bins/$b" 40 | chmod +x "/var/tmp/bins/$b" 41 | done 42 | 43 | export BINDIR=/var/tmp/bins 44 | export RUST_BACKTRACE=1 45 | 46 | # Give this test two hours to finish 47 | jobpid=$$; (sleep $(( 120 * 60 )); banner fail-timeout; ps -ef; zfs list;kill $jobpid) & 48 | 49 | echo "Setup debug logging" 50 | mkdir /tmp/debug 51 | psrinfo -v > /tmp/debug/psrinfo.txt 52 | df -h > /tmp/debug/df.txt || true 53 | prstat -d d -mLc 1 > /tmp/debug/prstat.txt 2>&1 & 54 | iostat -T d -xn 1 > /tmp/debug/iostat.txt 2>&1 & 55 | mpstat -T d 1 > /tmp/debug/mpstat.txt 2>&1 & 56 | vmstat -T d -p 1 < /dev/null > /tmp/debug/paging.txt 2>&1 & 57 | pfexec dtrace -Z -s $input/scripts/perf-downstairs-tick.d > /tmp/debug/dtrace.txt 2>&1 & 58 | pfexec dtrace -Z -s $input/scripts/upstairs_info.d > /tmp/debug/upstairs-info.txt 2>&1 & 59 | 60 | banner test_up_unencrypted 61 | ptime -m bash "$input/scripts/test_up.sh" -N unencrypted 62 | 63 | echo "test-up-unencrypted ends" 64 | -------------------------------------------------------------------------------- /.github/workflows/hakari.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | pull_request: 6 | branches: 7 | - main 8 | 9 | name: cargo hakari 10 | 11 | jobs: 12 | workspace-hack-check: 13 | name: Check workspace-hack 14 | runs-on: ubuntu-latest 15 | env: 16 | RUSTFLAGS: -D warnings 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Install cargo-hakari 20 | uses: taiki-e/install-action@v2 21 | with: 22 | tool: cargo-hakari 23 | - name: Check workspace-hack Cargo.toml is up-to-date 24 | run: cargo hakari generate --diff 25 | - name: Check all crates depend on workspace-hack 26 | run: cargo hakari manage-deps --dry-run 27 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | check-style: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 17 | - name: Report rustfmt version 18 | run: cargo fmt -- --version 19 | - name: Check style 20 | run: cargo fmt -- --check 21 | build-docs: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 25 | - name: Test build documentation 26 | run: cargo doc 27 | build-and-test: 28 | runs-on: ubuntu-latest 29 | steps: 30 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 31 | - name: Install dependencies 32 | run: sudo apt-get install libsqlite3-dev 33 | - name: Build 34 | run: cargo build --verbose 35 | - name: Install latest nextest release 36 | uses: taiki-e/install-action@nextest 37 | - name: Test nextest all 38 | run: cargo nextest run --verbose 39 | clippy: 40 | runs-on: ubuntu-latest 41 | steps: 42 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 43 | - name: Test Libraries 44 | run: cargo clippy --all-targets 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /var 3 | core 4 | *.swp 5 | /.*.sock 6 | .cli_history.txt 7 | /fio/*.png 8 | /fio/*.log 9 | /fio/*.log.hist 10 | /fio/*.report 11 | /aws_benchmark/.terraform* 12 | /aws_benchmark/inventory 13 | /aws_benchmark/results.txt 14 | /aws_benchmark/terraform.tfstate* 15 | /x509/*.pem 16 | /x509/*.json 17 | /x509/*.csr 18 | /out 19 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Crucible project status and open source 2 | 3 | The Crucible repo is public because it has always been our intention to 4 | make this open-source. We thought it was important to explain where we're 5 | currently at, and manage your expectations. 6 | 7 | - We are a small company. 8 | 9 | - Our current goal is support our first generation products. 10 | 11 | - We're writing Crucible in support of that goal, not as its own thing. We're 12 | all working on the products, and block storage is an important product 13 | feature. 14 | 15 | - Crucible itself has dependencies on many other Oxide repositories, which 16 | themselves are undergoing a similar development churn. 17 | 18 | - These points together mean that we may not have enough bandwidth to review 19 | and integrate outside PRs right now. We hope this will change in the future. 20 | 21 | You're welcome to send PRs, but we want to set expectations right: if we have 22 | time, or if the PRs are very small or fix bugs, we may integrate them in the 23 | near future. But we might also not get to any PR for a while, by which time it 24 | might no longer be relevant. 25 | 26 | We've all dealt with those open source projects that feel open in name only, and 27 | have big patches and history-free source drops appearing from behind the walls 28 | of some large organization. We don't like that, and we're not going to do that. 29 | But it will take some time for us to scale up -- please bear with us. 30 | 31 | If you want to ask about whether a PR is consistent with our short-term plan 32 | _before_ you put in the work -- and you should! -- hit us up on the repo 33 | Discussions tab on GitHub. 34 | 35 | Thanks! 36 | -------------------------------------------------------------------------------- /agent-antagonist/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "agent-antagonist" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | anyhow.workspace = true 10 | clap.workspace = true 11 | crucible-agent-client.workspace = true 12 | crucible-common.workspace = true 13 | crucible-workspace-hack.workspace = true 14 | futures-core.workspace = true 15 | futures.workspace = true 16 | rand.workspace = true 17 | reqwest.workspace = true 18 | serde.workspace = true 19 | signal-hook-tokio.workspace = true 20 | signal-hook.workspace = true 21 | slog.workspace = true 22 | slog-term.workspace = true 23 | slog-bunyan.workspace = true 24 | tokio.workspace = true 25 | uuid.workspace = true 26 | -------------------------------------------------------------------------------- /agent-client/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-agent-client" 3 | version = "0.0.1" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | anyhow.workspace = true 9 | chrono.workspace = true 10 | percent-encoding.workspace = true 11 | progenitor.workspace = true 12 | reqwest.workspace = true 13 | schemars.workspace = true 14 | serde.workspace = true 15 | serde_json.workspace = true 16 | crucible-workspace-hack.workspace = true 17 | -------------------------------------------------------------------------------- /agent-client/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Oxide Computer Company 2 | 3 | use progenitor::generate_api; 4 | 5 | generate_api!( 6 | spec = "../openapi/crucible-agent.json", 7 | derives = [schemars::JsonSchema], 8 | ); 9 | -------------------------------------------------------------------------------- /agent/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-agent" 3 | version = "0.0.1" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | anyhow.workspace = true 9 | chrono.workspace = true 10 | clap.workspace = true 11 | crucible-common.workspace = true 12 | crucible-smf.workspace = true 13 | dropshot.workspace = true 14 | futures.workspace = true 15 | http.workspace = true 16 | hyper.workspace = true 17 | omicron-common.workspace = true 18 | schemars.workspace = true 19 | semver.workspace = true 20 | serde.workspace = true 21 | serde_json.workspace = true 22 | slog.workspace = true 23 | tokio.workspace = true 24 | uuid.workspace = true 25 | crucible-workspace-hack.workspace = true 26 | 27 | [dev-dependencies] 28 | expectorate.workspace = true 29 | openapi-lint.workspace = true 30 | openapiv3.workspace = true 31 | subprocess.workspace = true 32 | tempfile.workspace = true 33 | slog-term.workspace = true 34 | -------------------------------------------------------------------------------- /agent/downstairs_method_script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit 4 | set -o pipefail 5 | 6 | args=( 7 | '--data' "$(svcprop -c -p config/directory "${SMF_FMRI}")" 8 | '--address' "$(svcprop -c -p config/address "${SMF_FMRI}")" 9 | '--port' "$(svcprop -c -p config/port "${SMF_FMRI}")" 10 | '--mode' "$(svcprop -c -p config/mode "${SMF_FMRI}")" 11 | ) 12 | 13 | # man 1 svcprop says: 14 | # 15 | # Empty ASCII string values are represented by a pair of double quotes (""). 16 | # 17 | # This is trouble for bash, so it's explicitly checked for here: 18 | 19 | val=$(svcprop -c -p config/cert_pem_path "${SMF_FMRI}") 20 | if [ "$val" != '""' ]; then 21 | args+=( '--cert-pem' ) 22 | args+=( "$val" ) 23 | fi 24 | 25 | val="$(svcprop -c -p config/key_pem_path "${SMF_FMRI}")" 26 | if [ "$val" != '""' ]; then 27 | args+=( '--key-pem' ) 28 | args+=( "$val" ) 29 | fi 30 | 31 | val="$(svcprop -c -p config/root_pem_path "${SMF_FMRI}")" 32 | if [ "$val" != '""' ]; then 33 | args+=( '--root-cert-pem' ) 34 | args+=( "$val" ) 35 | fi 36 | 37 | exec /opt/oxide/crucible/bin/crucible-downstairs run "${args[@]}" 38 | -------------------------------------------------------------------------------- /agent/smf/agent.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | 13 | 14 | 15 | 16 | 18 | 19 | 20 | 21 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /agent/smf/downstairs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | 13 | 14 | 15 | 16 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /aws_benchmark/README.md: -------------------------------------------------------------------------------- 1 | # Crucible AWS benchmark test. 2 | These scripts and tools will: 3 | * Create instances in AWS 4 | * Run performance tests 5 | * Remove the instances. 6 | 7 | In addition to other software, you should have configured access to AWS using the `aws` command line program and configured your credentials. You can find more info on how to install the `aws` command here: https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html 8 | 9 | ## How to run the tests 10 | The main program to drive the test is driver.sh. It will verify existance of some needed programs and serves as the outer loop for the steps listed above. 11 | 12 | Specifically, driver.sh will benchmark Crucible by doing the following: 13 | 14 | - use terraform to create three downstairs and one upstairs 15 | - use ansible to (among other things): 16 | - ship this repo to each instance, 17 | - compile crucible, 18 | - create a 2G region on the downstairs, 19 | - start the downstairs as a service 20 | - run bench.sh on the upstairs, saving the timing output to results.txt 21 | - clean up the resources 22 | 23 | Both helios and ubuntu are supported as host operating systems, and AWS region 24 | is a parameter: 25 | 26 | ``` 27 | ./driver.sh helios us-east-1 28 | ``` 29 | or 30 | ``` 31 | ./driver.sh ubuntu ca-central-1 32 | ``` 33 | 34 | ## Results 35 | Results are output into the file `results.txt`, which is `cat`ed at the end of driver.sh. 36 | 37 | Any error results in cleanup being performed. Add an `echo` to the terraform 38 | apply -destroy command in cleanup() if you want to debug further, but make sure 39 | to clean up your AWS resources! 40 | 41 | ## Customization 42 | Edit bench.sh to modify which program is run as part of the benchmark. 43 | 44 | To individually run steps: 45 | 46 | * ./bring_up_resources.sh will bring up AWS resources and run ansible to 47 | install everything that's required 48 | * ./run_benchmark.sh will run a warm up plus benchmarking step 49 | * ./cleanup.sh will deprovision all AWS resources 50 | 51 | -------------------------------------------------------------------------------- /aws_benchmark/ansible.cfg: -------------------------------------------------------------------------------- 1 | [defaults] 2 | retry_files_enabled = False 3 | forks = 10 4 | nocows = 1 5 | host_key_checking = False 6 | 7 | [ssh_connection] 8 | pipelining = True 9 | -------------------------------------------------------------------------------- /aws_benchmark/args.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [[ ${#} -lt 2 ]]; 4 | then 5 | echo "usage: " 6 | exit 1 7 | fi 8 | 9 | case "${1}" in 10 | "ubuntu") 11 | # ok 12 | ;; 13 | "helios") 14 | # ok 15 | ;; 16 | *) 17 | echo "for OS, choose ubuntu or helios!" 18 | exit 1 19 | ;; 20 | esac 21 | 22 | OS="${1}" 23 | REGION="${2}" 24 | 25 | # Prevent re-entry from hitting AWS api again 26 | if [[ -n "${user}" ]]; 27 | then 28 | return 29 | fi 30 | 31 | # set AMI, instance type, user data path 32 | case "${OS}" in 33 | "ubuntu") 34 | # get the latest ami that matches the filter 35 | ami_id=$(aws ec2 describe-images --region "${REGION}" --owner 099720109477 \ 36 | --filter "Name=name,Values=ubuntu/images/hvm-ssd/ubuntu-*-21.04-amd64-server-*" \ 37 | --query 'Images[*].[ImageId,CreationDate]' --output text \ 38 | | sort -k2 \ 39 | | tail -n1 | awk '{ print $1 }') 40 | echo "ubuntu ami: ${ami_id} $(aws ec2 describe-images --region "${REGION}" --image-id "${ami_id}" --query 'Images[*].[Name]' --output text)" 41 | 42 | export ami_id="$ami_id" 43 | export instance_type="m5d.2xlarge" 44 | export user_data_path="ubuntu_user_data.sh" 45 | export user="ubuntu" 46 | ;; 47 | 48 | "helios") 49 | # get the latest ami that matches the filter 50 | ami_id=$(aws ec2 describe-images --region "${REGION}" --owner 128433874814 \ 51 | --filter "Name=name,Values=helios-full-*-*" \ 52 | --query 'Images[*].[ImageId,CreationDate]' --output text \ 53 | | sort -k2 \ 54 | | tail -n1 | awk '{ print $1 }') 55 | echo "helios ami: ${ami_id} $(aws ec2 describe-images --region "${REGION}" --image-id "${ami_id}" --query 'Images[*].[Name]' --output text)" 56 | 57 | # TODO: rpz's ena patch for m5d? need an updated helios-full-* image. 58 | # need: 6f443ebc1fb4fec01d6e8fa8ca4648182ed215bb, so helios version at least 20793 59 | export ami_id="$ami_id" 60 | export instance_type="m4.2xlarge" 61 | export user_data_path="helios_user_data.sh" 62 | export user="helios" 63 | ;; 64 | 65 | *) 66 | echo "for OS, choose ubuntu or helios!" 67 | exit 1 68 | ;; 69 | esac 70 | 71 | -------------------------------------------------------------------------------- /aws_benchmark/bench.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | D0=$(dig +short downstairs0.private.lan | tail -1) 4 | D1=$(dig +short downstairs1.private.lan | tail -1) 5 | D2=$(dig +short downstairs2.private.lan | tail -1) 6 | 7 | set -e 8 | 9 | #./target/release/crucible-hammer \ 10 | # -t $D0:3801 -t $D1:3801 -t $D2:3801 \ 11 | # --key "ukJBfV956H22EH5Qv4L0iKPWdtTYhdsdw1+eV5/6xdU=" --num-upstairs 1 >/dev/null 12 | 13 | # downstairs uses 512b sectors 14 | ./target/release/measure-iops \ 15 | -t $D0:3801 -t $D1:3801 -t $D2:3801 --samples 30 \ 16 | --io-depth 8 --io-size-in-bytes $((512 * 5)) 17 | 18 | -------------------------------------------------------------------------------- /aws_benchmark/bring_up_resources.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o errexit 3 | set -o pipefail 4 | 5 | source ./args.sh; 6 | 7 | if [[ ! -e "${HOME}/.ssh/id_ed25519.pub" ]]; 8 | then 9 | echo "terraform code expects ${HOME}/.ssh/id_ed25519.pub to exist, please create: ssh-keygen -t ed25519 ..." 10 | exit 1 11 | fi 12 | 13 | # make sure the chosen instance type is available in at least 3 azs, 14 | # otherwise terraform will fail 15 | azs=$(aws ec2 describe-instance-type-offerings \ 16 | --location-type availability-zone \ 17 | --filters "Name=instance-type,Values=${instance_type}" \ 18 | --region "${REGION}" \ 19 | --output text | wc -l) 20 | 21 | if [[ ${azs} -lt 3 ]]; 22 | then 23 | echo "instance ${instance_type} only available in ${azs} azs!" 24 | exit 1 25 | fi 26 | 27 | set -o xtrace 28 | 29 | # bring up aws resources 30 | terraform init 31 | terraform apply -auto-approve \ 32 | -var "ami_id=${ami_id}" -var "region=${REGION}" \ 33 | -var "instance_type=${instance_type}" \ 34 | -var "user_data_path=${user_data_path}" >/dev/null 35 | 36 | # create ansible inventory from terraform outputs 37 | ./inv.sh 38 | 39 | # install ansible into python virtualenv 40 | if [[ ! -e .venv/bin/activate ]]; 41 | then 42 | virtualenv -p python3 .venv 43 | fi 44 | 45 | source .venv/bin/activate 46 | 47 | if ! pip show ansible; 48 | then 49 | os_name=$(uname) 50 | if [[ "$os_name" == 'Darwin' ]]; then 51 | echo "Setting CPP flags for openssl on Mac" 52 | export CPPFLAGS=-I/usr/local/opt/openssl/include 53 | export LDFLAGS=-L/usr/local/opt/openssl/lib 54 | fi 55 | pip install "ansible==5.0.1" 56 | fi 57 | 58 | # wait for instance status ok (status checks ok, user data has run) 59 | INSTANCE_ID_0=$(terraform output -raw upstairs_id) 60 | INSTANCE_ID_1=$(terraform output -json downstairs_ids 2>&1 | jq -r .[0]) 61 | INSTANCE_ID_2=$(terraform output -json downstairs_ids 2>&1 | jq -r .[1]) 62 | INSTANCE_ID_3=$(terraform output -json downstairs_ids 2>&1 | jq -r .[2]) 63 | 64 | aws ec2 wait instance-status-ok --region "${REGION}" --instance-ids \ 65 | "$INSTANCE_ID_0" "$INSTANCE_ID_1" "$INSTANCE_ID_2" "$INSTANCE_ID_3" 66 | 67 | # prepare instances - install crucible, run downstairs, etc 68 | ansible-playbook -i inventory install_crucible.yml -e user="${user}" -e os="${OS}" 69 | 70 | -------------------------------------------------------------------------------- /aws_benchmark/cleanup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit 3 | set -o pipefail 4 | 5 | source ./args.sh 6 | 7 | ansible-playbook -i inventory cleanup.yml -e user="${user}" -e os="${OS}" || true 8 | 9 | terraform apply -destroy -auto-approve \ 10 | -var "ami_id=${ami_id}" -var "region=${REGION}" \ 11 | -var "instance_type=${instance_type}" \ 12 | -var "user_data_path=${user_data_path}" >/dev/null 13 | 14 | -------------------------------------------------------------------------------- /aws_benchmark/cleanup.yml: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | - hosts: downstairs 4 | user: "{{ user }}" 5 | tasks: 6 | # don't hold on to ebs volumes, this prevents shutdown 7 | - name: stop downstairs smf instances 8 | become: yes 9 | shell: | 10 | svcadm disable -s 'svc:/oxide/crucible/downstairs*' 11 | svccfg delete svc:/oxide/crucible/downstairs 12 | when: os == "helios" 13 | 14 | - name: export zpools (why doesn't shut down do this?) 15 | become: yes 16 | shell: | 17 | zpool export data 18 | when: os == "helios" 19 | 20 | -------------------------------------------------------------------------------- /aws_benchmark/crucible_simple_test_vpc/ec2.tf: -------------------------------------------------------------------------------- 1 | resource "aws_key_pair" "temp" { 2 | key_name_prefix = "crucible-benchmarking-" 3 | public_key = file(pathexpand("~/.ssh/id_ed25519.pub")) 4 | } 5 | 6 | module "upstairs" { 7 | source = "terraform-aws-modules/ec2-instance/aws" 8 | version = "~> 5.0" 9 | 10 | name = "upstairs" 11 | 12 | ami = var.ami_id 13 | instance_type = var.instance_type 14 | key_name = aws_key_pair.temp.id 15 | monitoring = true 16 | vpc_security_group_ids = [module.upstairs_sg.security_group_id] 17 | associate_public_ip_address = true 18 | subnet_id = module.vpc.public_subnets[0] 19 | user_data = var.user_data_path != null ? file("${path.module}/${var.user_data_path}") : null 20 | 21 | root_block_device = [ 22 | { 23 | name = "/dev/sda1" 24 | volume_type = "gp3" 25 | iops = 5000, 26 | volume_size = 50 27 | }, 28 | ] 29 | 30 | tags = { 31 | Terraform = "true" 32 | Environment = "dev" 33 | } 34 | } 35 | 36 | resource "aws_route53_record" "upstairs" { 37 | zone_id = aws_route53_zone.private.zone_id 38 | name = "upstairs.${aws_route53_zone.private.name}" 39 | type = "CNAME" 40 | ttl = "60" 41 | records = [module.upstairs.private_dns] 42 | } 43 | 44 | resource "aws_ebs_volume" "downstairs" { 45 | for_each = toset(["0", "1", "2"]) 46 | 47 | availability_zone = module.vpc.azs[each.key] 48 | 49 | # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html 50 | type = "io1" 51 | size = 100 52 | iops = 5000 53 | } 54 | 55 | resource "aws_volume_attachment" "downstairs" { 56 | device_name = "/dev/sdh" 57 | volume_id = aws_ebs_volume.downstairs["${each.key}"].id 58 | instance_id = module.downstairs["${each.key}"].id 59 | 60 | for_each = toset(["0", "1", "2"]) 61 | } 62 | 63 | module "downstairs" { 64 | source = "terraform-aws-modules/ec2-instance/aws" 65 | version = "~> 5.0" 66 | 67 | for_each = toset(["0", "1", "2"]) 68 | 69 | name = "downstairs-${each.key}" 70 | 71 | ami = var.ami_id 72 | instance_type = var.instance_type 73 | key_name = aws_key_pair.temp.id 74 | monitoring = true 75 | vpc_security_group_ids = [module.downstairs_sg.security_group_id] 76 | associate_public_ip_address = true 77 | subnet_id = module.vpc.public_subnets["${each.key}"] 78 | user_data = var.user_data_path != null ? file("${path.module}/${var.user_data_path}") : null 79 | 80 | root_block_device = [ 81 | { 82 | name = "/dev/sda1" 83 | volume_type = "gp3" 84 | iops = 5000, 85 | volume_size = 50 86 | }, 87 | ] 88 | 89 | tags = { 90 | Terraform = "true" 91 | Environment = "dev" 92 | } 93 | } 94 | 95 | resource "aws_route53_record" "private" { 96 | for_each = toset(["0", "1", "2"]) 97 | 98 | zone_id = aws_route53_zone.private.zone_id 99 | name = "downstairs${each.key}.${aws_route53_zone.private.name}" 100 | type = "CNAME" 101 | ttl = "60" 102 | records = [module.downstairs["${each.key}"].private_dns] 103 | } 104 | -------------------------------------------------------------------------------- /aws_benchmark/crucible_simple_test_vpc/helios_user_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | export PATH="/usr/bin/:/sbin/:/usr/sbin/:$PATH" 3 | 4 | zfs create -o mountpoint=/home/helios 'rpool/helios' 5 | # XGECOS=$(getent passwd helios | cut -d: -f5) == helios,,, 6 | useradd -u '1000' -g staff -c 'helios,,,' -d '/home/helios' -P 'Primary Administrator' -s /bin/bash 'helios' 7 | passwd -N 'helios' 8 | mkdir '/home/helios/.ssh' 9 | cp /root/.ssh/authorized_keys /home/helios/.ssh/authorized_keys 10 | chown -R 'helios:staff' '/home/helios' 11 | chmod 0700 '/home/helios' 12 | sed -i -e '/^PATH=/s#\$#:/opt/ooce/bin:/opt/ooce/sbin#' /etc/default/login 13 | ntpdig -S 0.pool.ntp.org || true 14 | 15 | # passwordless sudo for ansible 16 | echo 'helios ALL=(ALL) NOPASSWD: ALL' > /etc/sudoers.d/helios 17 | 18 | cd /home/helios 19 | 20 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > rustup.sh 21 | chmod u+x rustup.sh 22 | sed -i -e 's_/bin/sh_/bin/bash_g' rustup.sh 23 | sed -i -e '/shellcheck shell=dash/d' rustup.sh 24 | chown helios rustup.sh 25 | sudo -u helios ./rustup.sh -y 26 | 27 | pkg install htop 28 | pkg install rsync 29 | 30 | touch /var/booted_ok 31 | 32 | -------------------------------------------------------------------------------- /aws_benchmark/crucible_simple_test_vpc/outputs.tf: -------------------------------------------------------------------------------- 1 | output "upstairs_ip" { 2 | value = module.upstairs.public_ip 3 | } 4 | output "upstairs_id" { 5 | value = module.upstairs.id 6 | } 7 | 8 | output "downstairs_ips" { 9 | value = [ 10 | module.downstairs["0"].public_ip, 11 | module.downstairs["1"].public_ip, 12 | module.downstairs["2"].public_ip, 13 | ] 14 | } 15 | output "downstairs_ids" { 16 | value = [ 17 | module.downstairs["0"].id, 18 | module.downstairs["1"].id, 19 | module.downstairs["2"].id, 20 | ] 21 | } 22 | -------------------------------------------------------------------------------- /aws_benchmark/crucible_simple_test_vpc/sg.tf: -------------------------------------------------------------------------------- 1 | module "upstairs_sg" { 2 | source = "terraform-aws-modules/security-group/aws" 3 | 4 | name = "crucible-upstairs" 5 | vpc_id = module.vpc.vpc_id 6 | 7 | ingress_with_cidr_blocks = [ 8 | { 9 | rule = "ssh-tcp", 10 | cidr_blocks = "0.0.0.0/0" 11 | }, 12 | ] 13 | 14 | egress_with_cidr_blocks = [ 15 | { 16 | rule = "all-all", 17 | cidr_blocks = "0.0.0.0/0", 18 | } 19 | ] 20 | } 21 | 22 | module "downstairs_sg" { 23 | source = "terraform-aws-modules/security-group/aws" 24 | 25 | name = "crucible-downstairs" 26 | vpc_id = module.vpc.vpc_id 27 | 28 | ingress_with_cidr_blocks = [ 29 | { 30 | rule = "ssh-tcp", 31 | cidr_blocks = "0.0.0.0/0" 32 | }, 33 | { 34 | from_port = 3801, 35 | to_port = 3801, 36 | protocol = "tcp", 37 | description = "crucible traffic", 38 | source_security_group_id = module.upstairs_sg.security_group_id, 39 | cidr_blocks = module.vpc.vpc_cidr_block 40 | }, 41 | ] 42 | 43 | egress_with_cidr_blocks = [ 44 | { 45 | rule = "all-all", 46 | cidr_blocks = "0.0.0.0/0", 47 | } 48 | ] 49 | } 50 | -------------------------------------------------------------------------------- /aws_benchmark/crucible_simple_test_vpc/ubuntu_user_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | 4 | # format data volume 5 | apt update 6 | apt install -y gdisk 7 | 8 | if [[ -e /dev/nvme1n1 ]]; 9 | then 10 | sgdisk --zap /dev/nvme1n1 11 | sgdisk -n1 /dev/nvme1n1 12 | mkfs.ext4 /dev/nvme1n1p1 13 | mkdir /data 14 | mount /dev/nvme1n1p1 /data 15 | chown -R ubuntu /data 16 | fi 17 | 18 | # done 19 | touch /var/booted_ok 20 | 21 | -------------------------------------------------------------------------------- /aws_benchmark/crucible_simple_test_vpc/variables.tf: -------------------------------------------------------------------------------- 1 | variable "ami_id" { 2 | type = string 3 | } 4 | 5 | variable "instance_type" { 6 | type = string 7 | } 8 | 9 | variable "user_data_path" { 10 | type = string 11 | default = null 12 | } 13 | -------------------------------------------------------------------------------- /aws_benchmark/crucible_simple_test_vpc/vpc.tf: -------------------------------------------------------------------------------- 1 | data "aws_availability_zones" "available" { 2 | state = "available" 3 | } 4 | 5 | resource "random_string" "random" { 6 | length = 32 7 | special = false 8 | lower = true 9 | upper = true 10 | number = true 11 | } 12 | 13 | module "vpc" { 14 | source = "terraform-aws-modules/vpc/aws" 15 | 16 | name = "crucible-benchmark-vpc-${random_string.random.id}" 17 | cidr = "10.0.0.0/16" 18 | 19 | azs = slice(data.aws_availability_zones.available.names, 0, 3) 20 | private_subnets = ["10.0.103.0/24", "10.0.104.0/24", "10.0.105.0/24"] 21 | public_subnets = ["10.0.100.0/24", "10.0.101.0/24", "10.0.102.0/24"] 22 | 23 | enable_nat_gateway = true 24 | enable_vpn_gateway = false 25 | enable_dns_hostnames = true 26 | enable_dns_support = true 27 | 28 | # TODO: IPv6 testing 29 | #enable_ipv6 = true 30 | #assign_ipv6_address_on_creation = true 31 | #private_subnet_assign_ipv6_address_on_creation = false 32 | #public_subnet_ipv6_prefixes = [0, 1] 33 | #private_subnet_ipv6_prefixes = [2, 3] 34 | #database_subnet_ipv6_prefixes = [4, 5] 35 | 36 | tags = { 37 | Terraform = "true" 38 | Environment = "dev" 39 | } 40 | } 41 | 42 | resource "aws_route53_zone" "private" { 43 | name = "private.lan" 44 | 45 | vpc { 46 | vpc_id = module.vpc.vpc_id 47 | } 48 | } 49 | 50 | -------------------------------------------------------------------------------- /aws_benchmark/downstairs.service.j2: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description="Crucible Downstairs" 3 | Requires=network.target 4 | After=syslog.target network.target 5 | 6 | [Service] 7 | Type=simple 8 | ExecStart=/opt/crucible/target/release/crucible-downstairs run -p "3801" -d /data/disk 9 | User=ubuntu 10 | LimitNOFILE=65536 11 | 12 | [Install] 13 | WantedBy=multi-user.target 14 | -------------------------------------------------------------------------------- /aws_benchmark/downstairs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | 13 | 14 | 15 | 16 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /aws_benchmark/driver.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o errexit 3 | set -o pipefail 4 | 5 | source ./args.sh 6 | 7 | cleanup() { 8 | ./cleanup.sh "${OS}" "${REGION}" 9 | } 10 | 11 | # with errexit, cleanup 12 | trap 'cleanup' ERR 13 | 14 | # check pre-reqs 15 | function program_required() { 16 | if ! type "${1}"; 17 | then 18 | echo "please install ${1}"; 19 | exit 1 20 | fi 21 | } 22 | 23 | program_required terraform 24 | program_required virtualenv 25 | program_required python3 26 | program_required pip 27 | program_required aws 28 | 29 | # bring up resources, run the benchmark, and clean up 30 | ./bring_up_resources.sh "${OS}" "${REGION}" 31 | ./run_benchmark.sh "${OS}" "${REGION}" 32 | cleanup 33 | 34 | # show results 35 | set -x 36 | 37 | # show IOPS measurements 38 | grep "IOPS mean " results.txt || true 39 | 40 | -------------------------------------------------------------------------------- /aws_benchmark/inv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm inventory 4 | 5 | echo '[upstairs]' >> inventory 6 | terraform output -raw upstairs_ip >> inventory 7 | echo '' >> inventory 8 | echo '' >> inventory 9 | 10 | echo '[downstairs]' >> inventory 11 | terraform output -json downstairs_ips 2>&1 | jq -r .[0] >> inventory 12 | terraform output -json downstairs_ips 2>&1 | jq -r .[1] >> inventory 13 | terraform output -json downstairs_ips 2>&1 | jq -r .[2] >> inventory 14 | 15 | echo '' >> inventory 16 | 17 | -------------------------------------------------------------------------------- /aws_benchmark/main.tf: -------------------------------------------------------------------------------- 1 | variable "ami_id" { 2 | type = string 3 | } 4 | 5 | variable "instance_type" { 6 | type = string 7 | } 8 | 9 | variable "user_data_path" { 10 | type = string 11 | } 12 | 13 | module "crucible_simple_test" { 14 | source = "./crucible_simple_test_vpc/" 15 | ami_id = var.ami_id 16 | instance_type = var.instance_type 17 | user_data_path = var.user_data_path 18 | } 19 | 20 | output "upstairs_ip" { 21 | value = module.crucible_simple_test.upstairs_ip 22 | } 23 | output "upstairs_id" { 24 | value = module.crucible_simple_test.upstairs_id 25 | } 26 | 27 | output "downstairs_ips" { 28 | value = module.crucible_simple_test.downstairs_ips 29 | } 30 | output "downstairs_ids" { 31 | value = module.crucible_simple_test.downstairs_ids 32 | } 33 | 34 | -------------------------------------------------------------------------------- /aws_benchmark/providers.tf: -------------------------------------------------------------------------------- 1 | terraform { 2 | required_providers { 3 | aws = { 4 | source = "hashicorp/aws" 5 | version = "~> 5.0" 6 | } 7 | } 8 | } 9 | 10 | variable "region" { 11 | type = string 12 | } 13 | 14 | provider "aws" { 15 | region = var.region 16 | } 17 | 18 | -------------------------------------------------------------------------------- /aws_benchmark/run_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -o errexit 3 | set -o pipefail 4 | 5 | source ./args.sh 6 | 7 | # re-upload + build crucible code and bench.sh for rerun 8 | ansible-playbook -i inventory install_crucible.yml -e user="${user}" -e os="${OS}" -t rerun 9 | 10 | # run upstairs benchmark, collect results 11 | # warm up 3 times 12 | for i in $(seq 1 3); 13 | do 14 | ssh -o "StrictHostKeyChecking no" "${user}@$(terraform output -raw upstairs_ip)" \ 15 | "cd /opt/crucible/ && /usr/bin/time -p ./bench.sh" 16 | done 17 | 18 | ## 25 (or user configurable) real runs 19 | rm -f results.txt 20 | for i in $(seq 1 "${RUNS:-25}"); 21 | do 22 | ssh -o "StrictHostKeyChecking no" "${user}@$(terraform output -raw upstairs_ip)" \ 23 | "cd /opt/crucible/ && /usr/bin/time -p ./bench.sh 2>&1" | tee -a results.txt 24 | done 25 | -------------------------------------------------------------------------------- /cmon/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cmon" 3 | version = "0.1.0" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | clap.workspace = true 9 | clearscreen.workspace = true 10 | crucible.workspace = true 11 | crucible-control-client.workspace = true 12 | crucible-protocol.workspace = true 13 | serde_json.workspace = true 14 | strum.workspace = true 15 | strum_macros.workspace = true 16 | tokio.workspace = true 17 | crucible-workspace-hack.workspace = true 18 | -------------------------------------------------------------------------------- /common/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-common" 3 | version = "0.0.1" 4 | authors = ["Joshua M. Clulow ", "Alan Hanson "] 5 | license = "MPL-2.0" 6 | edition = "2021" 7 | 8 | [dependencies] 9 | anyhow.workspace = true 10 | atty.workspace = true 11 | nix.workspace = true 12 | rustls-pemfile.workspace = true 13 | schemars.workspace = true 14 | serde.workspace = true 15 | serde_json.workspace = true 16 | slog.workspace = true 17 | slog-async.workspace = true 18 | slog-bunyan.workspace = true 19 | slog-dtrace.workspace = true 20 | slog-term.workspace = true 21 | tempfile.workspace = true 22 | thiserror.workspace = true 23 | tokio.workspace = true 24 | tokio-rustls.workspace = true 25 | toml.workspace = true 26 | twox-hash.workspace = true 27 | uuid.workspace = true 28 | crucible-workspace-hack.workspace = true 29 | dropshot.workspace = true 30 | 31 | [dev-dependencies] 32 | proptest.workspace = true 33 | test-strategy.workspace = true 34 | 35 | [build-dependencies] 36 | vergen = { version = "8.3.2", features = ["cargo", "git", "git2", "rustc" ] } 37 | -------------------------------------------------------------------------------- /common/build.rs: -------------------------------------------------------------------------------- 1 | // This Source Code Form is subject to the terms of the Mozilla Public 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/. 4 | 5 | // Copyright 2023 Oxide Computer Company 6 | use vergen::EmitBuilder; 7 | 8 | fn main() { 9 | println!("cargo:rerun-if-changed=build.rs"); 10 | 11 | EmitBuilder::builder() 12 | .all_cargo() 13 | .all_git() 14 | .all_rustc() 15 | .emit() 16 | .unwrap(); 17 | } 18 | -------------------------------------------------------------------------------- /common/proptest-regressions/impacted_blocks.txt: -------------------------------------------------------------------------------- 1 | # Seeds for failure cases proptest has generated in the past. It is 2 | # automatically read and these particular cases re-run before any 3 | # novel cases are generated. 4 | # 5 | # It is recommended to check this file in to source control so that 6 | # everyone who runs the test benefits from these saved cases. 7 | cc a0eeb2a4e85b48b2bc28fd4353784f7f4a587cd07b624761558a41e2cc981e6c # shrinks to input = _IblocksNewPanicsForFlippedPolarityArgs { start_block: 198292286474641, end_block: 198292286474884 } 8 | cc 71508540f37d62051cb8623d470e89dd6af7571763083f694fcf728e43550bfe # shrinks to input = _IblocksBlocksIteratesOverAllBlocksArgs { extent_count: 2, extent_size: 76, start_eid: 0, start_block: 56, end_eid: 1, end_block: 63 } 9 | -------------------------------------------------------------------------------- /common/src/x509.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Oxide Computer Company 2 | use std::fs::File; 3 | use std::io::{self, BufReader}; 4 | use std::sync::Arc; 5 | 6 | // Reference tokio-rustls repo examples/server/src/main.rs 7 | use rustls_pemfile::{certs, rsa_private_keys}; 8 | use tokio_rustls::rustls::server::AllowAnyAuthenticatedClient; 9 | use tokio_rustls::rustls::{ 10 | Certificate, ClientConfig, PrivateKey, RootCertStore, ServerConfig, 11 | }; 12 | 13 | pub fn load_certs(path: &str) -> io::Result> { 14 | certs(&mut BufReader::new(File::open(path)?)) 15 | .map_err(|_| { 16 | io::Error::new(io::ErrorKind::InvalidInput, "invalid cert") 17 | }) 18 | .map(|mut certs| certs.drain(..).map(Certificate).collect()) 19 | } 20 | 21 | pub fn load_rsa_keys(path: &str) -> io::Result> { 22 | rsa_private_keys(&mut BufReader::new(File::open(path)?)) 23 | .map_err(|_| io::Error::new(io::ErrorKind::InvalidInput, "invalid key")) 24 | .map(|mut keys| keys.drain(..).map(PrivateKey).collect()) 25 | } 26 | 27 | #[derive(thiserror::Error, Debug)] 28 | pub enum TLSContextError { 29 | #[error("IO error")] 30 | IOError(#[from] std::io::Error), 31 | 32 | #[error("rustls error")] 33 | RusTLSError(#[from] tokio_rustls::rustls::Error), 34 | } 35 | 36 | #[derive(Debug)] 37 | pub struct TLSContext { 38 | certs: Vec, 39 | keys: Vec, 40 | root_cert_store: RootCertStore, 41 | } 42 | 43 | impl TLSContext { 44 | pub fn from_paths( 45 | cert_pem_path: &str, 46 | key_pem_path: &str, 47 | root_cert_pem_path: &str, 48 | ) -> Result { 49 | let mut root_cert_store = RootCertStore::empty(); 50 | for root_cert in load_certs(root_cert_pem_path)? { 51 | root_cert_store.add(&root_cert)?; 52 | } 53 | 54 | Ok(Self { 55 | certs: load_certs(cert_pem_path)?, 56 | keys: load_rsa_keys(key_pem_path)?, 57 | root_cert_store, 58 | }) 59 | } 60 | 61 | pub fn get_client_config(&self) -> Result { 62 | Ok(ClientConfig::builder() 63 | .with_safe_defaults() 64 | .with_root_certificates(self.root_cert_store.clone()) 65 | .with_client_auth_cert(self.certs.clone(), self.keys[0].clone())?) 66 | } 67 | 68 | pub fn get_server_config(&self) -> Result { 69 | let client_cert_verifier = 70 | AllowAnyAuthenticatedClient::new(self.root_cert_store.clone()); 71 | 72 | Ok(ServerConfig::builder() 73 | .with_safe_defaults() 74 | .with_client_cert_verifier(Arc::new(client_cert_verifier)) 75 | .with_single_cert(self.certs.clone(), self.keys[0].clone())?) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /control-client/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-control-client" 3 | version = "0.0.1" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | anyhow.workspace = true 9 | percent-encoding.workspace = true 10 | progenitor.workspace = true 11 | reqwest.workspace = true 12 | schemars.workspace = true 13 | serde.workspace = true 14 | serde_json.workspace = true 15 | crucible-workspace-hack.workspace = true 16 | -------------------------------------------------------------------------------- /control-client/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Oxide Computer Company 2 | 3 | use progenitor::generate_api; 4 | 5 | generate_api!( 6 | spec = "../openapi/crucible-control.json", 7 | derives = [schemars::JsonSchema], 8 | ); 9 | -------------------------------------------------------------------------------- /crucible-client-types/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-client-types" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | base64.workspace = true 8 | schemars.workspace = true 9 | serde.workspace = true 10 | serde_json.workspace = true 11 | uuid.workspace = true 12 | crucible-workspace-hack.workspace = true 13 | -------------------------------------------------------------------------------- /crudd/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crudd" 3 | version = "0.1.0" 4 | authors = ["Artemis Everfree "] 5 | license = "MPL-2.0" 6 | edition = "2021" 7 | 8 | [dependencies] 9 | anyhow.workspace = true 10 | bytes.workspace = true 11 | clap.workspace = true 12 | crucible-common.workspace = true 13 | crucible-protocol.workspace = true 14 | crucible.workspace = true 15 | futures-core.workspace = true 16 | futures.workspace = true 17 | ringbuffer.workspace = true 18 | serde.workspace = true 19 | serde_json.workspace = true 20 | signal-hook-tokio.workspace = true 21 | signal-hook.workspace = true 22 | tokio-util.workspace = true 23 | tokio.workspace = true 24 | toml.workspace = true 25 | crucible-workspace-hack.workspace = true 26 | itertools.workspace = true 27 | -------------------------------------------------------------------------------- /crudd/README.md: -------------------------------------------------------------------------------- 1 | # crudd 2 | 3 | It's `dd`, for crucible! Now with 80% less draconic command arguments! 4 | 5 | You can use this to read data from some downstairses, or write data to them. `crudd` 6 | goes through Upstairs, so it supports things like encryption and replication. 7 | 8 | Some things you could do with crudd: 9 | 10 | - test read/write performance of your downstairs or upstairs implementations. 11 | - image a region with some data. 12 | - grab a copy of a region to check if it looks like what you expect it to look like. 13 | - perform small nano-writes in a region. 14 | - make sure your ISO header is where you expect it to be. 15 | 16 | Some things you can't do with crudd (yet): 17 | 18 | - Literally anything involving the Volumes feature 19 | 20 | See the help for the most up to date options. 21 | 22 | There is one drawback: in exchange for better command arguments, you get a 23 | hard-requirement on shell IO stream redirection for reads! It's the law of 24 | equivalent exchange. To obtain, something of equal value must be lost. 25 | 26 | Anyway, since Upstairs writes a lot of stuff to STDOUT and I don't know how 27 | to turn it off, I write output to FD3 for the `read` subcommand. That means 28 | you need to run it a bit like this: 29 | 30 | ``` 31 | # Redirect crudd's FD3 to your STDOUT, and crudd's STDOUT to your STDERR 32 | # Then you can pipe it around like normal 33 | crudd -t 127.0.0.1:3010 -t 127.0.0.1:3020 -t 127.0.0.1:3030 read 3>&1 1>&2 | wc -c 34 | 35 | # If you just want to write to a file, it's simpler 36 | crudd -t 127.0.0.1:3010 -t 127.0.0.1:3020 -t 127.0.0.1:3030 read 3>outputfile.img 37 | ``` 38 | 39 | 40 | `write` is unaffected, so writes are more normal: 41 | 42 | ``` 43 | cat inputfile.img | crudd -t 127.0.0.1:3010 -t 127.0.0.1:3020 -t 127.0.0.1:3030 write 44 | ``` -------------------------------------------------------------------------------- /crudd/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # this is a simple test script that takes 3 targets, writes some data, 4 | # and reads it back. I don't know how to integrate with our CI yet so 5 | # for now this is what you're getting so I can go work on something 6 | # else for a bit 7 | 8 | if [ $# -lt 3 ]; then 9 | echo "Usage: $0 " 10 | echo "For example, $0 127.0.0.1:3010 127.0.0.1:3020 127.0.0.1:3030" 11 | exit 1 12 | fi 13 | 14 | # generate some temp data 15 | tmpfile="$(mktemp)" 16 | tmpout="$(mktemp)" 17 | 18 | cleanup() { 19 | rm "$tmpfile" 20 | rm "$tmpout" 21 | } 22 | 23 | die() { 24 | echo "$@" 25 | echo "Not cleaning up $tmpfile or $tmpout" 26 | exit 1 27 | } 28 | 29 | #512 megs of data 30 | tmpbytes=$(( 512 * 1024 * 1024)) 31 | dd if=/dev/urandom of="$tmpfile" bs=4M count=$((tmpbytes / ( 4 * 1024 * 1024 ) )) 32 | 33 | # test aligned write/read 34 | cat "$tmpfile" | cargo run --release -- -g 1 -b 0 -n $tmpbytes -t "$1" -t "$2" -t "$3" write 35 | cargo run --release -- -g 2 -b 0 -n "$tmpbytes" -t "$1" -t "$2" -t "$3" 3>"$tmpout" read 36 | 37 | if diff -q "$tmpfile" "$tmpout"; then 38 | echo "Success: aligned read/write" 39 | else 40 | die "Failure: aligned read/write" 41 | fi 42 | 43 | 44 | # misaligned write/read 45 | dd if=/dev/urandom of="$tmpfile" bs=4M count=$((tmpbytes / ( 4 * 1024 * 1024 ) )) 46 | cat "$tmpfile" | cargo run --release -- -g 3 -b 29 -n $tmpbytes -t "$1" -t "$2" -t "$3" write 47 | cargo run --release -- -g 4 -b 29 -n "$tmpbytes" -t "$1" -t "$2" -t "$3" 3>"$tmpout" read 48 | 49 | if diff -q "$tmpfile" "$tmpout"; then 50 | echo "Success: misaligned read/write" 51 | else 52 | die "Failure: misaligned read/write" 53 | fi 54 | 55 | # nano-read/write 56 | nanosize=39 57 | dd if=/dev/urandom of="$tmpfile" bs=$nanosize count=1 58 | cat "$tmpfile" | cargo run --release -- -g 5 -b 647 -n $nanosize -t "$1" -t "$2" -t "$3" write 59 | cargo run --release -- -g 6 -b 647 -n $nanosize -t "$1" -t "$2" -t "$3" 3>"$tmpout" read 60 | 61 | if diff -q "$tmpfile" "$tmpout"; then 62 | echo "Success: nano read/write" 63 | else 64 | die "Failure: nano read/write" 65 | fi 66 | 67 | cleanup 68 | exit 0 69 | -------------------------------------------------------------------------------- /crutest/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crutest" 3 | version = "0.1.0" 4 | authors = ["Joshua M. Clulow ", "Alan Hanson Result { 18 | println!( 19 | "Attempt to register {:?} with Nexus/Oximeter at {:?}", 20 | my_address, registration_address 21 | ); 22 | 23 | let logging_config = ConfigLogging::StderrTerminal { 24 | level: ConfigLoggingLevel::Error, 25 | }; 26 | 27 | let server_info = ProducerEndpoint { 28 | id: Uuid::new_v4(), 29 | kind: ProducerKind::Service, 30 | address: my_address, 31 | interval: tokio::time::Duration::from_secs(10), 32 | }; 33 | 34 | let config = Config { 35 | server_info, 36 | registration_address: Some(registration_address), 37 | default_request_body_max_bytes: 2048, 38 | log: LogConfig::Config(logging_config), 39 | }; 40 | 41 | match Server::start(&config) { 42 | Ok(server) => { 43 | println!( 44 | "registered with nexus at {:?}, serving metrics to \ 45 | oximeter from {:?}", 46 | registration_address, my_address, 47 | ); 48 | Ok(server) 49 | } 50 | Err(e) => { 51 | bail!("Failed to register as metric producer with Nexus: {}", e) 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /downstairs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-downstairs" 3 | version = "0.0.1" 4 | authors = ["Joshua M. Clulow ", "Alan Hanson , 8 | } 9 | 10 | impl CompletedJobs { 11 | pub fn new(last_flush: Option) -> Self { 12 | Self { 13 | completed: last_flush 14 | .into_iter() 15 | .map(|id| id..JobId(id.0 + 1)) 16 | .collect(), 17 | } 18 | } 19 | 20 | #[cfg(test)] 21 | pub fn is_empty(&self) -> bool { 22 | self.completed.is_empty() 23 | } 24 | 25 | /// Records a new complete job in the list 26 | pub fn push(&mut self, id: JobId) { 27 | self.completed.insert(id..JobId(id.0 + 1)); 28 | } 29 | 30 | /// Resets the data structure, given a new barrier operation 31 | /// 32 | /// All older jobs are forgotten, and the provided operation becomes the 33 | /// oldest complete job. 34 | pub fn reset(&mut self, id: JobId) { 35 | self.completed.clear(); 36 | self.completed.insert(id..JobId(id.0 + 1)); 37 | } 38 | 39 | /// Checks whether the given job is complete 40 | /// 41 | /// A job is complete if it is listed in the set of complete jobs. 42 | pub fn is_complete(&self, id: JobId) -> bool { 43 | self.completed.contains(&id) 44 | } 45 | 46 | /// Returns the list of completed jobs 47 | pub fn completed(&self) -> impl Iterator + use<'_> { 48 | self.completed 49 | .iter() 50 | .flat_map(|r| (r.start.0..r.end.0).map(JobId)) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /downstairs/src/extent_inner_raw_common.rs: -------------------------------------------------------------------------------- 1 | use crate::{extent::extent_path, CrucibleError}; 2 | use crucible_common::ExtentId; 3 | use serde::{Deserialize, Serialize}; 4 | use std::fs::OpenOptions; 5 | use std::io::{Read, Seek, SeekFrom}; 6 | use std::os::fd::AsFd; 7 | use std::path::Path; 8 | 9 | /// Equivalent to `ExtentMeta`, but ordered for efficient on-disk serialization 10 | /// 11 | /// In particular, the `dirty` byte is first, so it's easy to read at a known 12 | /// offset within the file. 13 | #[derive(Debug, Clone, Serialize, Deserialize)] 14 | pub(super) struct OnDiskMeta { 15 | pub dirty: bool, 16 | pub gen_number: u64, 17 | pub flush_number: u64, 18 | pub ext_version: u32, 19 | } 20 | 21 | impl OnDiskMeta { 22 | /// Looks up the version tag 23 | /// 24 | /// Across all of our raw file formats, `OnDiskMeta` is guaranteed to be 25 | /// placed at the end of the file in a `BLOCK_META_SIZE_BYTES`-length chunk, 26 | /// so we can get a tag without knowing anything else about the file. 27 | pub fn get_version_tag( 28 | dir: &Path, 29 | extent_number: ExtentId, 30 | ) -> Result { 31 | let path = extent_path(dir, extent_number); 32 | let mut f = OpenOptions::new() 33 | .read(true) 34 | .write(false) 35 | .open(&path) 36 | .map_err(|e| { 37 | CrucibleError::IoError(format!( 38 | "extent {extent_number}: open of {path:?} failed: {e}", 39 | )) 40 | })?; 41 | 42 | let mut buf = [0u8; BLOCK_META_SIZE_BYTES as usize]; 43 | f.seek(SeekFrom::End(-(BLOCK_META_SIZE_BYTES as i64)))?; 44 | f.read_exact(&mut buf)?; 45 | let meta: OnDiskMeta = bincode::deserialize(&buf) 46 | .map_err(|e| CrucibleError::BadMetadata(e.to_string()))?; 47 | Ok(meta.ext_version) 48 | } 49 | } 50 | 51 | /// Size of metadata region 52 | /// 53 | /// This must be large enough to contain an `OnDiskMeta` serialized using 54 | /// `bincode`. 55 | pub(super) const BLOCK_META_SIZE_BYTES: u64 = 32; 56 | 57 | /// Call `pread` repeatedly to read an entire buffer 58 | /// 59 | /// Quoth the standard, 60 | /// 61 | /// > The value returned may be less than nbyte if the number of bytes left in 62 | /// > the file is less than nbyte, if the read() request was interrupted by a 63 | /// > signal, or if the file is a pipe or FIFO or special file and has fewer 64 | /// > than nbyte bytes immediately available for reading. For example, a read() 65 | /// > from a file associated with a terminal may return one typed line of data. 66 | /// 67 | /// We don't have to worry about most of these conditions, but it may be 68 | /// possible for Crucible to be interrupted by a signal, so let's play it safe. 69 | pub(super) fn pread_all( 70 | fd: F, 71 | mut buf: &mut [u8], 72 | mut offset: i64, 73 | ) -> Result<(), nix::errno::Errno> { 74 | while !buf.is_empty() { 75 | let n = nix::sys::uio::pread(fd, buf, offset)?; 76 | offset += n as i64; 77 | buf = &mut buf[n..]; 78 | } 79 | Ok(()) 80 | } 81 | 82 | /// Call `pwrite` repeatedly to write an entire buffer 83 | /// 84 | /// See details for why this is necessary in [`pread_all`] 85 | pub(super) fn pwrite_all( 86 | fd: F, 87 | mut buf: &[u8], 88 | mut offset: i64, 89 | ) -> Result<(), nix::errno::Errno> { 90 | while !buf.is_empty() { 91 | let n = nix::sys::uio::pwrite(fd, buf, offset)?; 92 | offset += n as i64; 93 | buf = &buf[n..]; 94 | } 95 | Ok(()) 96 | } 97 | 98 | #[cfg(test)] 99 | mod test { 100 | use super::*; 101 | 102 | #[test] 103 | fn on_disk_meta_serialized_size() { 104 | let m = OnDiskMeta { 105 | dirty: true, 106 | gen_number: u64::MAX, 107 | flush_number: u64::MAX, 108 | ext_version: u32::MAX, 109 | }; 110 | let mut meta_buf = [0u8; BLOCK_META_SIZE_BYTES as usize]; 111 | bincode::serialize_into(meta_buf.as_mut_slice(), &Some(m)).unwrap(); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /dsc-client/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dsc-client" 3 | version = "0.0.1" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | anyhow.workspace = true 9 | crucible-workspace-hack.workspace = true 10 | percent-encoding.workspace = true 11 | progenitor.workspace = true 12 | reqwest.workspace = true 13 | schemars.workspace = true 14 | serde_json.workspace = true 15 | serde.workspace = true 16 | uuid.workspace = true 17 | -------------------------------------------------------------------------------- /dsc-client/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Oxide Computer Company 2 | 3 | use progenitor::generate_api; 4 | 5 | generate_api!( 6 | spec = "../openapi/dsc-control.json", 7 | derives = [schemars::JsonSchema], 8 | ); 9 | -------------------------------------------------------------------------------- /dsc/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dsc" 3 | version = "0.1.0" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | anyhow.workspace = true 9 | byte-unit.workspace = true 10 | clap.workspace = true 11 | crucible-client-types.workspace = true 12 | crucible-common.workspace = true 13 | crucible-workspace-hack.workspace = true 14 | csv.workspace = true 15 | dsc-client.workspace = true 16 | dropshot.workspace = true 17 | rand.workspace = true 18 | rand_chacha.workspace = true 19 | schemars.workspace = true 20 | semver.workspace = true 21 | serde.workspace = true 22 | statistical.workspace = true 23 | tokio.workspace = true 24 | uuid.workspace = true 25 | 26 | [dev-dependencies] 27 | expectorate.workspace = true 28 | openapiv3.workspace = true 29 | openapi-lint.workspace = true 30 | tempfile.workspace = true 31 | serde_json.workspace = true 32 | -------------------------------------------------------------------------------- /fio/README.md: -------------------------------------------------------------------------------- 1 | This directory contains some scripts for running fio against crucible through 2 | NBD. 3 | 4 | To get crucible-nbd-server running, please follow [this README](../nbd_server/src/README.md). 5 | 6 | Run fio.sh with a path to a region.json and a name for the run: 7 | 8 | ./fio.sh ../disks/d1/region.json "before" 9 | 10 | gnuplot will run and probably complain unless you have "before" and "after" 11 | run results. 12 | 13 | -------------------------------------------------------------------------------- /fio/cumulative_points_over_time.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import math 5 | 6 | # time (msec), value, data direction, block size (bytes), offset (bytes) 7 | 8 | for fi in sys.argv[1:]: 9 | with open(fi) as fp: 10 | text = fp.read() 11 | 12 | with open(fi + ".cumulative", "w") as fp: 13 | # at time 0, there are 0 points 14 | fp.write("0,0\n") 15 | 16 | points = 1 17 | for line in text.split("\n"): 18 | if not line: 19 | continue 20 | 21 | time, value, direction, bs, offset = \ 22 | [int(x.strip()) for x in line.strip().split(",")] 23 | 24 | fp.write("{},{}\n".format(time, points)) 25 | points += 1 26 | 27 | -------------------------------------------------------------------------------- /fio/fio.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | #rm -f *.png *.report *.log 5 | 6 | if [[ ${#} -lt 2 ]]; 7 | then 8 | echo "supply region file and run name!" 9 | exit 1 10 | fi 11 | 12 | REGION_FILE="${1}" 13 | NAME="${2}" 14 | 15 | ./generate_crucible_fio.py "${REGION_FILE}" "${NAME}" 16 | sudo fio --output="fio.report" --output-format=normal crucible.fio 17 | 18 | ./hist.py crucible_"${NAME}"_bw.1.log crucible_"${NAME}"_lat.1.log 19 | 20 | ./cumulative_points_over_time.py crucible_"${NAME}"_bw.1.log 21 | 22 | gnuplot plt.plt 23 | 24 | -------------------------------------------------------------------------------- /fio/generate_crucible_fio.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import json 4 | import sys 5 | 6 | 7 | with open(sys.argv[1]) as fp: 8 | d = json.load(fp) 9 | 10 | block_size = int(d["block_size"]) 11 | extent_size = int(d["extent_size"]["value"]) 12 | extent_count = int(d["extent_count"]) 13 | 14 | total_bytes = block_size * extent_size * extent_count 15 | 16 | with open("crucible.fio", "w") as fp: 17 | fp.write("""[global] 18 | rw=randwrite 19 | bs={} 20 | size={} 21 | ioengine=libaio 22 | iodepth=32 23 | direct=1 24 | write_bw_log 25 | write_lat_log 26 | 27 | [crucible_{}] 28 | filename=/dev/nbd0""".format(block_size, total_bytes, sys.argv[2])) 29 | 30 | -------------------------------------------------------------------------------- /fio/hist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import sys 4 | import math 5 | 6 | 7 | # bandwidth: value is KiB/sec 8 | # time (msec), value, data direction, block size (bytes), offset (bytes) 9 | 10 | for fi in sys.argv[1:]: 11 | bins = 200 12 | min_v = 0 13 | max_v = 0 14 | 15 | with open(fi) as fp: 16 | text = fp.read() 17 | 18 | for line in text.split("\n"): 19 | if not line: 20 | continue 21 | 22 | time, value, direction, bs, offset = \ 23 | [int(x.strip()) for x in line.strip().split(",")] 24 | 25 | if value > max_v: 26 | max_v = value 27 | 28 | binwidth = (max_v - min_v) / bins 29 | 30 | with open(fi) as fp: 31 | text = fp.read() 32 | 33 | packed = {} 34 | 35 | for i in range(0, bins): 36 | packed[i] = 0 37 | 38 | for line in text.split("\n"): 39 | if not line: 40 | continue 41 | 42 | time, value, direction, bs, offset = \ 43 | [int(x.strip()) for x in line.strip().split(",")] 44 | 45 | if value < min_v or value >= max_v: 46 | continue 47 | 48 | histbin = math.floor((value-min_v) / binwidth) 49 | packed[histbin] += 1 50 | 51 | with open(fi + ".hist", "w") as fp: 52 | for i in range(0, bins): 53 | fp.write("{},{}\n".format(i * binwidth, packed[i])) 54 | 55 | -------------------------------------------------------------------------------- /fio/plt.plt: -------------------------------------------------------------------------------- 1 | set terminal png nocrop enhanced size 1920,1080 2 | 3 | set datafile separator ',' 4 | 5 | # time (msec), value, data direction, block size (bytes), offset (bytes) 6 | set xlabel 'msec' 7 | #set logscale y 8 | #set yrange [0:4000] 9 | 10 | # Bandwidth log: Value is in KiB/sec 11 | set ylabel 'KiB/sec' 12 | 13 | set output 'crucible_bw.png' 14 | plot \ 15 | 'crucible_before_bw.1.log' using 1:2 lc rgb"blue" title 'before', \ 16 | 'crucible_after_bw.1.log' using 1:2 lc rgb"red" title 'after' 17 | 18 | # Latency log: Value is latency in nsecs 19 | 20 | unset xrange 21 | #set yrange [0:300000000] 22 | set ylabel 'nsec' 23 | unset logscale y 24 | 25 | set output 'crucible_lat.png' 26 | 27 | plot \ 28 | 'crucible_before_lat.1.log' using 1:2 lc rgb"blue" title 'before', \ 29 | 'crucible_after_lat.1.log' using 1:2 lc rgb"red" title 'after' 30 | 31 | # cumulative points over time 32 | set ylabel 'cumulative points' 33 | set output 'crucible_cumulative.png' 34 | 35 | plot \ 36 | 'crucible_before_bw.1.log.cumulative' using 1:2 lc rgb"blue" title 'before', \ 37 | 'crucible_after_bw.1.log.cumulative' using 1:2 lc rgb"red" title 'after' 38 | 39 | # histograms 40 | 41 | set xlabel 'KiB/sec' 42 | set ylabel 'frequency' 43 | 44 | #unset yrange 45 | 46 | set output 'crucible_bw_histogram.png' 47 | set xlabel 'KiB/sec' 48 | set xrange [0:500] 49 | 50 | plot \ 51 | 'crucible_before_bw.1.log.hist' using 1:2 with lines lc rgb"blue" title 'before', \ 52 | 'crucible_after_bw.1.log.hist' using 1:2 with lines lc rgb"red" title 'after' 53 | 54 | set output 'crucible_lat_histogram.png' 55 | set xlabel 'nsec' 56 | unset xrange 57 | 58 | plot \ 59 | 'crucible_before_lat.1.log.hist' using 1:2 with lines lc rgb"blue" title 'before', \ 60 | 'crucible_after_lat.1.log.hist' using 1:2 with lines lc rgb"red" title 'after' 61 | 62 | -------------------------------------------------------------------------------- /hammer/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-hammer" 3 | version = "0.1.0" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | anyhow.workspace = true 11 | bytes.workspace = true 12 | clap.workspace = true 13 | crucible.workspace = true 14 | crucible-common.workspace = true 15 | rand.workspace = true 16 | tokio.workspace = true 17 | opentelemetry.workspace = true 18 | opentelemetry-jaeger.workspace = true 19 | tracing-subscriber.workspace = true 20 | tracing-opentelemetry.workspace = true 21 | uuid.workspace = true 22 | crucible-workspace-hack.workspace = true 23 | -------------------------------------------------------------------------------- /hammer/src/README.md: -------------------------------------------------------------------------------- 1 | 2 | This directory contains a stress test for Crucible called Hammer, which does 3 | the following: 4 | 5 | 1. randomly choose a legal offset and size (not writing past the end of the 6 | region) 7 | 1. fill a buffer of that size with random data 8 | 1. write the buffer to that offset 9 | 1. read from that offset 10 | 1. compare the two buffers, and bail if they're not correct. 11 | 12 | This tool is written in Rust. When running, supply a list of targets: 13 | 14 | cargo run -p crucible-hammer -- -t 127.0.0.1:3801 -t 127.0.0.1:3802 -t 127.0.0.1:3803 15 | 16 | There's also a hammer.c which does the same but uses `/dev/nbd0` instead of 17 | sending work directly to the guest (through the pseudo file). 18 | 19 | gcc -o hammer hammer.c 20 | sudo ./hammer 21 | 22 | To get crucible-nbd-server running, please follow [this 23 | README](../../nbd_server/src/README.md). 24 | 25 | -------------------------------------------------------------------------------- /hammer/src/hammer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | int main() { 10 | srandom(clock()); 11 | 12 | const size_t total_size = 512*100*1000; // TODO: get from region.json 13 | 14 | while (1) { 15 | size_t offset = random() % total_size; 16 | size_t sz = random() % (4096); 17 | 18 | while (((offset + sz) > total_size) || (sz == 0)) { 19 | sz = random() % (4096*10); 20 | } 21 | 22 | printf("read write test for %lu %lu\n", offset, sz); 23 | 24 | uint8_t * buf = malloc(sz); 25 | uint8_t * buf2 = malloc(sz); 26 | FILE * fp; 27 | int fn; 28 | size_t r; 29 | 30 | // get some random data 31 | fp = fopen("/dev/random", "rb"); 32 | fn = fileno(fp); 33 | 34 | if (sz != read(fn, buf, sz)) { 35 | free(buf); 36 | free(buf2); 37 | return 1; 38 | } 39 | fclose(fp); 40 | 41 | // write out random data 42 | fp = fopen("/dev/nbd0", "wb"); 43 | fn = fileno(fp); 44 | 45 | if (0 != fseek(fp, offset, SEEK_SET)) { 46 | free(buf); 47 | free(buf2); 48 | return 2; 49 | } 50 | 51 | if (sz != (r = write(fn, buf, sz))) { 52 | printf("write fail at %lu %lu\n", offset, sz); 53 | free(buf); 54 | free(buf2); 55 | return 3; 56 | } 57 | 58 | if (0 != fsync(fn)) { 59 | free(buf); 60 | free(buf2); 61 | return 4; 62 | } 63 | 64 | fclose(fp); 65 | 66 | // Read data 67 | fp = fopen("/dev/nbd0", "rb"); 68 | fn = fileno(fp); 69 | 70 | if (0 != fseek(fp, offset, SEEK_SET)) { 71 | free(buf); 72 | free(buf2); 73 | return 5; 74 | } 75 | 76 | if (sz != (r = read(fn, buf2, sz))) { 77 | printf("read fail at %lu %lu: %lu\n", offset, sz, r); 78 | free(buf); 79 | free(buf2); 80 | return 6; 81 | } 82 | 83 | fclose(fp); 84 | 85 | if (memcmp(buf, buf2, sz) != 0) { 86 | printf("memcmp fail at %lu %lu\n", offset, sz); 87 | abort(); 88 | 89 | for (size_t i = 0; i < sz; i++) { 90 | if (buf[i] != buf2[i]) { 91 | printf("%lu offset is bad: 0x%X != 0x%X\n", i, buf[i], buf2[i]); 92 | } else { 93 | printf("%lu offset is ok: 0x%X == 0x%X\n", i, buf[i], buf2[i]); 94 | } 95 | } 96 | free(buf); 97 | free(buf2); 98 | return 7; 99 | } 100 | 101 | free(buf); 102 | free(buf2); 103 | } 104 | 105 | return 0; 106 | } 107 | -------------------------------------------------------------------------------- /integration_tests/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-integration-tests" 3 | version = "0.1.0" 4 | authors = [ 5 | "Joshua M. Clulow ", 6 | "Alan Hanson ", 7 | "James MacMahon " 8 | ] 9 | license = "MPL-2.0" 10 | edition = "2021" 11 | 12 | [dev-dependencies] 13 | anyhow.workspace = true 14 | base64.workspace = true 15 | bytes.workspace = true 16 | crucible-client-types.workspace = true 17 | # importantly, don't use features = ["zfs_snapshot"] here, this will cause 18 | # cleanup issues! 19 | crucible-downstairs = { workspace = true, features = ["integration-tests"] } 20 | crucible-pantry-client.workspace = true 21 | crucible-pantry.workspace = true 22 | crucible = { workspace = true, features = ["integration-tests"] } 23 | dropshot.workspace = true 24 | futures-core.workspace = true 25 | futures.workspace = true 26 | hex.workspace = true 27 | httptest.workspace = true 28 | rand.workspace = true 29 | repair-client.workspace = true 30 | reqwest.workspace = true 31 | serde.workspace = true 32 | serde_json.workspace = true 33 | sha2.workspace = true 34 | slog-async.workspace = true 35 | slog-dtrace.workspace = true 36 | slog-term.workspace = true 37 | slog.workspace = true 38 | tempfile.workspace = true 39 | tokio.workspace = true 40 | uuid.workspace = true 41 | 42 | [dependencies] 43 | crucible-workspace-hack.workspace = true 44 | -------------------------------------------------------------------------------- /measure_iops/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "measure-iops" 3 | version = "0.0.1" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | anyhow.workspace = true 9 | bytes.workspace = true 10 | clap.workspace = true 11 | crucible.workspace = true 12 | crucible-common.workspace = true 13 | rand.workspace = true 14 | tokio.workspace = true 15 | statistical.workspace = true 16 | uuid.workspace = true 17 | crucible-workspace-hack.workspace = true 18 | -------------------------------------------------------------------------------- /measure_iops/README.md: -------------------------------------------------------------------------------- 1 | This tool measures the IOPS and bandwidth from an upstairs to a set of three 2 | downstairs. It accepts the usual set of Crucible arguments: 3 | 4 | cargo run --release -p measure-iops -- \ 5 | -t 127.0.0.1:44101 \ 6 | -t 127.0.0.1:44102 \ 7 | -t 127.0.0.1:44103 \ 8 | --key "$(openssl rand -base64 32)" 9 | 10 | Output will show some statistics related to the run: 11 | 12 | IOPS: [1060.5188, 535.55096, 342.29413, 266.0666, 975.55774, 581.5098, 368.57507, 277.63745, 210.09323, 997.99286, 564.8316, 370.5759, 281.1167, 214.91843, 1022.8198, 576.4766, 370.70322, 266.79498, 204.88252, 1049.1289, 521.58124, 326.27725, 231.60463, 250.97418, 1206.1144, 495.56763, 311.36276, 256.4053, 301.95493, 1284.1713, 411.6274, 320.6126, 228.9134, 198.15662, 1222.0442, 489.95615, 303.24838, 255.41228, 318.83762, 1215.2572, 432.60532, 306.87247, 228.31776, 336.62692, 1181.6915, 436.94943, 305.37833, 243.4393, 361.98187, 1170.3591, 411.28217, 297.5957, 227.53906, 375.8531, 1110.321, 428.7218, 301.49493, 269.94434, 402.8902, 1144.8269, 383.7488, 284.26498, 252.16966, 403.8011, 1078.9503, 438.96133, 312.3859, 260.41327, 429.90765, 1150.9951, 430.98767, 299.80392, 229.90257, 299.97418, 1186.887, 427.21585, 297.108, 220.93596, 464.9715, 1053.9406, 434.46204, 296.8956, 248.68004, 441.9947, 971.58954, 465.7012, 310.55557, 259.9112, 463.90683, 963.07007, 482.0798, 296.60132, 259.49713, 452.9664, 1023.61053, 414.3616, 312.71567, 240.88193, 506.94513, 1089.1649] 13 | IOPS mean 505.05734 stddev 324.76517 14 | IOPS min 198.15662 max 1284.1713 15 | 16 | BW: [4343885.0, 2193616.8, 1402036.8, 1089808.8, 3995884.5, 2381864.3, 1509683.5, 1137203.0, 860541.9, 4087778.8, 2313550.3, 1517878.9, 1151454.0, 880305.9, 4189470.0, 2361248.3, 1518400.4, 1092792.3, 839198.8, 4297232.0, 2136396.8, 1336431.6, 948652.56, 1027990.25, 4940244.5, 2029845.0, 1275341.9, 1050236.1, 1236807.4, 5259965.5, 1686025.9, 1313229.3, 937629.3, 811649.5, 5005493.0, 2006860.4, 1242105.4, 1046168.7, 1305958.9, 4977693.5, 1771951.4, 1256949.6, 935189.56, 1378823.9, 4840208.5, 1789744.9, 1250829.6, 997127.4, 1482677.8, 4793791.0, 1684611.8, 1218952.0, 932000.0, 1539494.3, 4547875.0, 1756044.5, 1234923.3, 1105692.0, 1650238.3, 4689211.0, 1571835.1, 1164349.4, 1032886.94, 1653969.3, 4419380.5, 1797985.6, 1279532.6, 1066652.8, 1760901.8, 4714476.0, 1765325.5, 1227996.9, 941680.94, 1228694.3, 4861489.0, 1749876.1, 1216954.4, 904953.7, 1904523.3, 4316940.5, 1779556.5, 1216084.4, 1018593.44, 1810410.3, 3979630.8, 1907512.1, 1272035.6, 1064596.3, 1900162.4, 3944735.0, 1974598.9, 1214879.0, 1062900.3, 1855350.4, 4192708.8, 1697225.1, 1280883.4, 986652.4, 2076447.3, 4461219.5] 17 | BW mean 2068714.9 stddev 1330238.1 18 | BW min 811649.5 max 5259965.5 19 | 20 | Each measurement is shown in an unsorted list so changes over time can be 21 | observed, along with mean, stddev, min, and max. 22 | 23 | For Crucible, one IOP is measured as 16384 bytes. Crucible will round up to 24 | this: 25 | 26 | - if you send one 1024 byte request, that counts as one IOP. 27 | - if you send one 16000 byte request, that counts as one IOP. 28 | - if you send one 16384 + 1, that counts as two IOPs. 29 | 30 | Tool arguments: 31 | 32 | --samples 33 | how many samples to take (default 100). also the number of seconds the tool runs. 34 | 35 | --io-size-in-bytes 36 | how large of a read or write operation to send. defaults to block size. 37 | 38 | --io-depth 39 | how many IOs to send at a time 40 | -------------------------------------------------------------------------------- /nbd_server/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-nbd-server" 3 | version = "0.1.0" 4 | authors = ["Joshua M. Clulow ", "Alan Hanson ", "James MacMahon "] 5 | license = "MPL-2.0" 6 | edition = "2021" 7 | 8 | [dependencies] 9 | anyhow.workspace = true 10 | bytes.workspace = true 11 | clap.workspace = true 12 | crucible-common.workspace = true 13 | crucible-protocol.workspace = true 14 | crucible.workspace = true 15 | futures-core.workspace = true 16 | futures.workspace = true 17 | nbd.workspace = true 18 | ringbuffer.workspace = true 19 | serde.workspace = true 20 | serde_json.workspace = true 21 | tokio-util.workspace = true 22 | tokio.workspace = true 23 | toml.workspace = true 24 | crucible-workspace-hack.workspace = true 25 | -------------------------------------------------------------------------------- /nbd_server/src/README.md: -------------------------------------------------------------------------------- 1 | To spin up a NBD server to issue work to Crucible, do the following: 2 | 3 | 1. Start up three separate crucible-downstairs: 4 | 5 | $ cargo run -p crucible-downstairs -- -p 3801 -d "$PWD/disks/d1" 6 | $ cargo run -p crucible-downstairs -- -p 3802 -d "$PWD/disks/d2" 7 | $ cargo run -p crucible-downstairs -- -p 3803 -d "$PWD/disks/d3" 8 | 9 | 1. Start up crucible-nbd-server: 10 | 11 | $ cargo run -p crucible-nbd-server -- -t 127.0.0.1:3801 -t 127.0.0.1:3802 -t 127.0.0.1:3803 12 | 13 | 1. Connect nbd-client to the crucible-nbd-server: 14 | 15 | $ sudo nbd-client 127.0.0.1 10809 /dev/nbd0 16 | Warning: the oldstyle protocol is no longer supported. 17 | This method now uses the newstyle protocol with a default export 18 | Negotiation: ..size = 0MB 19 | Connected /dev/nbd0 20 | 21 | 1. From here, use nbd0 as normal: 22 | 23 | $ sudo mkfs.vfat -F 32 /dev/nbd0 24 | mkfs.fat 4.2 (2021-01-31) 25 | WARNING: Number of clusters for 32 bit FAT is less then suggested minimum. 26 | 27 | (if your OS does not auto mount, run "sudo mount /dev/nbd0 /mnt/") 28 | 29 | $ date | tee /media/jwm/9287-806A/date 30 | Fri 23 Jul 2021 03:33:06 PM EDT 31 | 32 | $ df -h /media/jwm/9287-806A/ 33 | Filesystem Size Used Avail Use% Mounted on 34 | /dev/nbd0p1 472K 1.0K 471K 1% /media/jwm/9287-806A 35 | 36 | You should see crucible-downstairs and crucible-nbd-server activity in the other terminal windows. 37 | 38 | 1. To clean up: 39 | 40 | $ sudo umount /media/jwm/9287-806A/ 41 | $ sudo nbd-client -d /dev/nbd0 42 | 43 | Important: when developing, make sure to disconnect and reconnect nbd-client every time crucible-nbd-server is restarted! 44 | 45 | -------------------------------------------------------------------------------- /nbd_server/src/main.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Oxide Computer Company 2 | use std::net::SocketAddr; 3 | 4 | use anyhow::{bail, Result}; 5 | use clap::Parser; 6 | 7 | use crucible::*; 8 | 9 | use nbd::server::{handshake, transmission, Export}; 10 | use std::net::{TcpListener, TcpStream as NetTcpStream}; 11 | 12 | /* 13 | * NBD server commands translate through the CruciblePseudoFile and turn 14 | * into Guest work ops. 15 | */ 16 | 17 | fn handle_nbd_client( 18 | cpf: &mut crucible::CruciblePseudoFile, 19 | mut stream: NetTcpStream, 20 | ) -> Result<()> { 21 | let e: Export<()> = Export { 22 | size: cpf.sz(), 23 | readonly: false, 24 | ..Default::default() 25 | }; 26 | handshake(&mut stream, |_name| Ok(e))?; 27 | transmission(&mut stream, cpf)?; 28 | Ok(()) 29 | } 30 | 31 | #[derive(Debug, Parser)] 32 | #[clap(about = "volume-side storage component")] 33 | pub struct Opt { 34 | #[clap(short, long, default_value = "127.0.0.1:9000", action)] 35 | target: Vec, 36 | 37 | #[clap(short, long, action)] 38 | key: Option, 39 | 40 | #[clap(short, long, default_value = "0", action)] 41 | gen: u64, 42 | 43 | // TLS options 44 | #[clap(long, action)] 45 | cert_pem: Option, 46 | #[clap(long, action)] 47 | key_pem: Option, 48 | #[clap(long, action)] 49 | root_cert_pem: Option, 50 | 51 | // Start upstairs control http server 52 | #[clap(long, action)] 53 | control: Option, 54 | } 55 | 56 | pub fn opts() -> Result { 57 | let opt: Opt = Opt::parse(); 58 | println!("raw options: {:?}", opt); 59 | 60 | if opt.target.is_empty() { 61 | bail!("must specify at least one --target"); 62 | } 63 | 64 | Ok(opt) 65 | } 66 | 67 | /* 68 | * Crucible needs a runtime as it will create several async tasks to handle 69 | * adding new IOs, communication with the three downstairs instances, and 70 | * completing IOs. 71 | */ 72 | #[tokio::main] 73 | async fn main() -> Result<()> { 74 | let opt = opts()?; 75 | let crucible_opts = CrucibleOpts { 76 | target: opt.target, 77 | lossy: false, 78 | flush_timeout: None, 79 | key: opt.key, 80 | cert_pem: opt.cert_pem, 81 | key_pem: opt.key_pem, 82 | root_cert_pem: opt.root_cert_pem, 83 | control: opt.control, 84 | ..Default::default() 85 | }; 86 | 87 | /* 88 | * The structure we use to send work from outside crucible into the 89 | * Upstairs main task. 90 | * We create this here instead of inside up_main() so we can use 91 | * the methods provided by guest to interact with Crucible. 92 | */ 93 | let (guest, io) = Guest::new(None); 94 | 95 | let _join_handle = up_main(crucible_opts, opt.gen, None, io, None)?; 96 | println!("Crucible runtime is spawned"); 97 | 98 | // NBD server 99 | 100 | let mut cpf = crucible::CruciblePseudoFile::from(guest)?; 101 | cpf.activate().await?; 102 | 103 | let listener = TcpListener::bind("127.0.0.1:10809").unwrap(); 104 | 105 | // sent to NBD client during handshake through Export struct 106 | println!("NBD advertised size as {} bytes", cpf.sz()); 107 | 108 | for stream in listener.incoming() { 109 | println!("waiting on nbd traffic"); 110 | match stream { 111 | Ok(stream) => match handle_nbd_client(&mut cpf, stream) { 112 | Ok(_) => {} 113 | Err(e) => { 114 | eprintln!("handle_nbd_client error: {}", e); 115 | } 116 | }, 117 | Err(_) => { 118 | println!("Error"); 119 | } 120 | } 121 | } 122 | 123 | Ok(()) 124 | } 125 | -------------------------------------------------------------------------------- /openapi/README.md: -------------------------------------------------------------------------------- 1 | # json files describing Crucible APIs 2 | 3 | These files are validated via tests; changes can be accepted by running tests 4 | with `EXPECTORATE=overwrite`. 5 | 6 | ## crucible-agent.json 7 | Described in this file is the control API for Crucible Agent. 8 | 9 | ## crucible-control.json 10 | Described in this file is the control API for Crucible Upstairs. 11 | 12 | ## downstairs-repair.json 13 | This file describes the API for repairing between Crucible Downstairs. 14 | 15 | ### dsc-control.json 16 | The API for controlling the dsc (DownStairs Control) test program. 17 | -------------------------------------------------------------------------------- /package-manifest.toml: -------------------------------------------------------------------------------- 1 | [package.crucible] 2 | service_name = "crucible" 3 | source.type = "local" 4 | source.rust.binary_names = ["crucible-agent", "crucible-downstairs"] 5 | source.rust.release = true 6 | source.paths = [ 7 | { from = "agent/smf", to = "/var/svc/manifest/site/crucible" }, 8 | { from = "agent/downstairs_method_script.sh", to = "/opt/oxide/lib/svc/manifest/crucible/downstairs.sh" }, 9 | { from = "tools/dtrace/downstairs_count.d", to = "/opt/oxide/dtrace/crucible/downstairs_count.d" }, 10 | ] 11 | output.type = "zone" 12 | 13 | [package.crucible-pantry] 14 | service_name = "pantry" 15 | source.type = "local" 16 | source.rust.binary_names = ["crucible-pantry"] 17 | source.rust.release = true 18 | source.paths = [ 19 | { from = "pantry/smf/pantry.xml", to = "/var/svc/manifest/site/crucible/pantry.xml" }, 20 | { from = "tools/dtrace/upstairs_info.d", to = "/opt/oxide/dtrace/upstairs_info.d" }, 21 | { from = "tools/dtrace/upstairs_repair.d", to = "/opt/oxide/dtrace/upstairs_repair.d" }, 22 | { from = "tools/dtrace/upstairs_raw.d", to = "/opt/oxide/dtrace/crucible/upstairs_raw.d" }, 23 | { from = "tools/dtrace/get-lr-state.sh", to = "/opt/oxide/dtrace/crucible/get-lr-state.sh" }, 24 | { from = "tools/dtrace/get-ds-state.sh", to = "/opt/oxide/dtrace/crucible/get-ds-state.sh" }, 25 | { from = "tools/dtrace/single_up_info.d", to = "/opt/oxide/dtrace/crucible/single_up_info.d" }, 26 | { from = "tools/dtrace/sled_upstairs_info.d", to = "/opt/oxide/dtrace/crucible/sled_upstairs_info.d" }, 27 | { from = "tools/dtrace/all_downstairs.d", to = "/opt/oxide/dtrace/crucible/all_downstairs.d" }, 28 | { from = "tools/dtrace/up-info.d", to = "/opt/oxide/dtrace/crucible/up-info.d" }, 29 | ] 30 | output.type = "zone" 31 | -------------------------------------------------------------------------------- /package/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-package" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | anyhow.workspace = true 8 | camino.workspace = true 9 | crucible-workspace-hack.workspace = true 10 | omicron-zone-package.workspace = true 11 | tokio.workspace = true 12 | -------------------------------------------------------------------------------- /package/README.md: -------------------------------------------------------------------------------- 1 | # Crucible Zone 2 | 3 | This binary can be used to produce an Omicron-branded Zone image, 4 | which consists of the Crucible Agent and Downstairs binaries (along 5 | with some auxiliary files) in a specially-formatted tarball. 6 | 7 | A manifest describing this Zone image exists in `package-manifest.toml`, 8 | and the resulting image is created as `out/crucible.tar.gz`. 9 | 10 | To create the Zone image: 11 | 12 | ```rust 13 | $ cargo build --release 14 | $ cargo run --bin crucible-package 15 | ``` 16 | -------------------------------------------------------------------------------- /package/src/main.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Oxide Computer Company 2 | 3 | use anyhow::Result; 4 | use camino::Utf8Path; 5 | use omicron_zone_package::config; 6 | use omicron_zone_package::target::TargetMap; 7 | use std::fs::create_dir_all; 8 | 9 | #[tokio::main] 10 | async fn main() -> Result<()> { 11 | let cfg = config::parse("package-manifest.toml")?; 12 | 13 | let output_dir = Utf8Path::new("out"); 14 | create_dir_all(output_dir)?; 15 | 16 | let packages = cfg.packages_to_deploy(&TargetMap::default()); 17 | let package_iter = packages.build_order(); 18 | 19 | for batch in package_iter { 20 | for (name, package) in &batch { 21 | println!("Building '{name}'"); 22 | let build_config = 23 | omicron_zone_package::package::BuildConfig::default(); 24 | package.create(name, output_dir, &build_config).await?; 25 | } 26 | } 27 | 28 | Ok(()) 29 | } 30 | -------------------------------------------------------------------------------- /pantry-client/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-pantry-client" 3 | version = "0.0.1" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | anyhow.workspace = true 9 | chrono.workspace = true 10 | percent-encoding.workspace = true 11 | progenitor.workspace = true 12 | reqwest.workspace = true 13 | schemars.workspace = true 14 | serde.workspace = true 15 | serde_json.workspace = true 16 | uuid.workspace = true 17 | crucible-workspace-hack.workspace = true 18 | -------------------------------------------------------------------------------- /pantry-client/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Oxide Computer Company 2 | 3 | use progenitor::generate_api; 4 | 5 | generate_api!( 6 | spec = "../openapi/crucible-pantry.json", 7 | derives = [schemars::JsonSchema], 8 | ); 9 | -------------------------------------------------------------------------------- /pantry/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-pantry" 3 | version = "0.0.1" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | anyhow.workspace = true 9 | bytes.workspace = true 10 | base64.workspace = true 11 | chrono.workspace = true 12 | clap.workspace = true 13 | dropshot.workspace = true 14 | futures.workspace = true 15 | http.workspace = true 16 | hyper.workspace = true 17 | schemars.workspace = true 18 | semver.workspace = true 19 | serde.workspace = true 20 | serde_json.workspace = true 21 | slog.workspace = true 22 | static_assertions.workspace = true 23 | crucible.workspace = true 24 | crucible-common.workspace = true 25 | crucible-smf.workspace = true 26 | omicron-common.workspace = true 27 | tokio.workspace = true 28 | uuid.workspace = true 29 | reqwest.workspace = true 30 | hex.workspace = true 31 | sha2.workspace = true 32 | crucible-workspace-hack.workspace = true 33 | 34 | [dev-dependencies] 35 | expectorate.workspace = true 36 | openapi-lint.workspace = true 37 | openapiv3.workspace = true 38 | subprocess.workspace = true 39 | -------------------------------------------------------------------------------- /pantry/smf/pantry.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | 13 | 14 | 15 | 16 | 18 | 19 | 20 | 21 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /pantry/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Oxide Computer Company 2 | 3 | use std::sync::Arc; 4 | 5 | use anyhow::Result; 6 | use dropshot::{ConfigLogging, ConfigLoggingIfExists, ConfigLoggingLevel}; 7 | use slog::{info, o, Logger}; 8 | 9 | pub const PROG: &str = "crucible-pantry"; 10 | 11 | pub mod pantry; 12 | pub mod server; 13 | 14 | pub fn initialize_pantry() -> Result<(Logger, Arc)> { 15 | let log = ConfigLogging::File { 16 | level: ConfigLoggingLevel::Info, 17 | path: "/dev/stdout".into(), 18 | if_exists: ConfigLoggingIfExists::Append, 19 | } 20 | .to_logger(PROG)?; 21 | 22 | let info = crucible_common::BuildInfo::default(); 23 | info!(log, "Crucible Version: {}", info); 24 | let pantry = 25 | Arc::new(pantry::Pantry::new(log.new(o!("component" => "datafile")))?); 26 | 27 | Ok((log, pantry)) 28 | } 29 | -------------------------------------------------------------------------------- /pantry/src/main.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Oxide Computer Company 2 | 3 | use anyhow::{anyhow, Result}; 4 | use clap::Parser; 5 | use semver::Version; 6 | use std::io::Write; 7 | use std::net::SocketAddr; 8 | use std::path::PathBuf; 9 | 10 | use crucible_pantry::*; 11 | 12 | #[derive(Debug, Parser)] 13 | #[clap(name = PROG, about = "Crucible volume maintenance agent")] 14 | enum Args { 15 | OpenApi { 16 | #[clap(short = 'o', action)] 17 | output: PathBuf, 18 | }, 19 | Run { 20 | #[clap(short = 'l', action)] 21 | listen: SocketAddr, 22 | }, 23 | } 24 | 25 | #[tokio::main] 26 | async fn main() -> Result<()> { 27 | let args = Args::try_parse()?; 28 | 29 | match args { 30 | Args::OpenApi { output } => { 31 | let mut f = std::fs::OpenOptions::new() 32 | .create(true) 33 | .write(true) 34 | .truncate(true) 35 | .open(output)?; 36 | write_openapi(&mut f) 37 | } 38 | Args::Run { listen } => { 39 | let (log, pantry) = initialize_pantry()?; 40 | 41 | let (_, join_handle) = server::run_server(&log, listen, &pantry)?; 42 | 43 | join_handle.await?.map_err(|e| anyhow!(e)) 44 | } 45 | } 46 | } 47 | 48 | fn write_openapi(f: &mut W) -> Result<()> { 49 | let api = server::make_api().map_err(|e| anyhow!(e))?; 50 | api.openapi("Crucible Pantry", Version::new(0, 0, 1)) 51 | .write(f)?; 52 | Ok(()) 53 | } 54 | 55 | #[cfg(test)] 56 | mod tests { 57 | use openapiv3::OpenAPI; 58 | 59 | use crate::write_openapi; 60 | 61 | #[test] 62 | fn test_crucible_pantry_openapi() { 63 | let mut raw = Vec::new(); 64 | write_openapi(&mut raw).unwrap(); 65 | let actual = String::from_utf8(raw).unwrap(); 66 | 67 | // Make sure the result parses as a valid OpenAPI spec. 68 | let spec = serde_json::from_str::(&actual) 69 | .expect("output was not valid OpenAPI"); 70 | 71 | // Check for lint errors. 72 | let errors = openapi_lint::validate(&spec); 73 | assert!(errors.is_empty(), "{}", errors.join("\n\n")); 74 | 75 | expectorate::assert_contents( 76 | "../openapi/crucible-pantry.json", 77 | &actual, 78 | ); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /protocol/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-protocol" 3 | version = "0.0.0" 4 | authors = ["Joshua M. Clulow "] 5 | license = "MPL-2.0" 6 | edition = "2021" 7 | 8 | [dependencies] 9 | anyhow.workspace = true 10 | bincode.workspace = true 11 | bytes.workspace = true 12 | crucible-common.workspace = true 13 | num_enum.workspace = true 14 | schemars.workspace = true 15 | serde.workspace = true 16 | strum.workspace = true 17 | strum_macros.workspace = true 18 | tokio-util.workspace = true 19 | tokio.workspace = true 20 | uuid.workspace = true 21 | crucible-workspace-hack.workspace = true 22 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "local>oxidecomputer/renovate-config" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /repair-client/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "repair-client" 3 | version = "0.0.1" 4 | license = "MPL-2.0" 5 | edition = "2021" 6 | 7 | [dependencies] 8 | anyhow.workspace = true 9 | chrono.workspace = true 10 | percent-encoding.workspace = true 11 | progenitor.workspace = true 12 | reqwest.workspace = true 13 | schemars.workspace = true 14 | serde.workspace = true 15 | serde_json.workspace = true 16 | crucible-common.workspace = true 17 | crucible-workspace-hack.workspace = true 18 | uuid.workspace = true 19 | -------------------------------------------------------------------------------- /repair-client/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Oxide Computer Company 2 | 3 | use progenitor::generate_api; 4 | 5 | generate_api!( 6 | spec = "../openapi/downstairs-repair.json", 7 | derives = [schemars::JsonSchema], 8 | replace = { 9 | RegionDefinition = crucible_common::RegionDefinition, 10 | } 11 | ); 12 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "1.86.0" 3 | profile = "default" 4 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 80 2 | edition = "2021" 3 | -------------------------------------------------------------------------------- /smf/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible-smf" 3 | version = "0.0.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | libc.workspace = true 8 | thiserror.workspace = true 9 | num-traits.workspace = true 10 | num-derive.workspace = true 11 | crucible-workspace-hack.workspace = true 12 | -------------------------------------------------------------------------------- /smf/examples/info.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Oxide Computer Company 2 | 3 | use crucible_smf::{PropertyGroups, Result}; 4 | 5 | fn fil(n: usize, c: char) -> String { 6 | let mut s = String::new(); 7 | while s.len() < n { 8 | s.push(c); 9 | } 10 | s 11 | } 12 | 13 | fn ind(n: usize) -> String { 14 | fil(n * 4, ' ') 15 | } 16 | 17 | fn dump_pgs(indent: usize, name: &str, mut pgs: PropertyGroups) -> Result<()> { 18 | while let Some(pg) = pgs.next().transpose()? { 19 | let mut infostr = format!("type {:?}", pg.type_()?); 20 | if pg.is_persistent()? { 21 | infostr += ", persistent"; 22 | } else { 23 | infostr += ", non-persistent"; 24 | } 25 | 26 | println!("{}{}: {} ({})", ind(indent), name, pg.name()?, infostr); 27 | let mut properties = pg.properties()?; 28 | while let Some(prop) = properties.next().transpose()? { 29 | println!( 30 | "{}prop: {} ({:?})", 31 | ind(indent + 1), 32 | prop.name()?, 33 | prop.type_()? 34 | ); 35 | 36 | let mut values = prop.values()?; 37 | while let Some(v) = values.next().transpose()? { 38 | println!( 39 | "{}value: {:?} ({:?}, base {:?})", 40 | ind(indent + 2), 41 | v.as_string()?, 42 | v.type_()?, 43 | v.base_type()? 44 | ); 45 | } 46 | } 47 | } 48 | Ok(()) 49 | } 50 | 51 | fn main() -> Result<()> { 52 | let args = std::env::args().skip(1).collect::>(); 53 | 54 | let scf = crucible_smf::Scf::new()?; 55 | let scope = scf.scope_local()?; 56 | 57 | let mut services = scope.services()?; 58 | while let Some(service) = services.next().transpose()? { 59 | let n = service.name()?; 60 | 61 | if !args.is_empty() && !args.iter().any(|a| n.contains(a)) { 62 | continue; 63 | } 64 | 65 | println!("{}", fil(78, '=')); 66 | println!("{}service: {}", ind(0), n); 67 | 68 | dump_pgs(1, "pg(s)", service.pgs()?)?; 69 | println!(); 70 | 71 | let mut instances = service.instances()?; 72 | while let Some(instance) = instances.next().transpose()? { 73 | println!("{}", fil(78, '-')); 74 | println!("{}instance: {}", ind(1), instance.name()?); 75 | 76 | dump_pgs(2, "pg(i)", instance.pgs()?)?; 77 | println!(); 78 | 79 | let mut snapshots = instance.snapshots()?; 80 | while let Some(snapshot) = snapshots.next().transpose()? { 81 | println!("{}snapshot: {}", ind(2), snapshot.name()?); 82 | 83 | dump_pgs(3, "pg(c)", snapshot.pgs()?)?; 84 | println!(); 85 | } 86 | 87 | println!(); 88 | } 89 | 90 | println!(); 91 | } 92 | 93 | Ok(()) 94 | } 95 | -------------------------------------------------------------------------------- /smf/src/property.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Oxide Computer Company 2 | 3 | use std::ptr::NonNull; 4 | 5 | use super::scf_sys::*; 6 | use super::{ 7 | buf_for, str_from, Iter, PropertyGroup, Result, Scf, ScfError, Value, 8 | Values, 9 | }; 10 | 11 | #[derive(Debug)] 12 | pub struct Property<'a> { 13 | pub(crate) scf: &'a Scf, 14 | pub(crate) property: NonNull, 15 | } 16 | 17 | impl<'a> Property<'a> { 18 | pub(crate) fn new(scf: &'a Scf) -> Result> { 19 | if let Some(property) = 20 | NonNull::new(unsafe { scf_property_create(scf.handle.as_ptr()) }) 21 | { 22 | Ok(Property { scf, property }) 23 | } else { 24 | Err(ScfError::last()) 25 | } 26 | } 27 | 28 | pub fn name(&self) -> Result { 29 | let mut buf = buf_for(SCF_LIMIT_MAX_NAME_LENGTH)?; 30 | 31 | let ret = unsafe { 32 | scf_property_get_name( 33 | self.property.as_ptr(), 34 | buf.as_mut_ptr() as *mut libc::c_char, 35 | buf.len(), 36 | ) 37 | }; 38 | 39 | str_from(&mut buf, ret) 40 | } 41 | 42 | pub fn type_(&self) -> Result { 43 | let mut typ: scf_type_t = scf_type_t::SCF_TYPE_INVALID; 44 | 45 | if unsafe { scf_property_type(self.property.as_ptr(), &mut typ) } == 0 { 46 | Ok(typ) 47 | } else { 48 | Err(ScfError::last()) 49 | } 50 | } 51 | 52 | /* 53 | * XXX fn value(&self) -> Result { 54 | * scf_property_get_value(3SCF) 55 | */ 56 | 57 | pub fn values(&self) -> Result { 58 | Values::new(self) 59 | } 60 | 61 | pub fn value(&self) -> Result> { 62 | let mut values = Values::new(self)?.collect::>>()?; 63 | match values.len() { 64 | 0 => Ok(None), 65 | 1 => Ok(values.pop()), 66 | _ => Err(ScfError::Internal), 67 | } 68 | } 69 | } 70 | 71 | impl Drop for Property<'_> { 72 | fn drop(&mut self) { 73 | unsafe { scf_property_destroy(self.property.as_ptr()) }; 74 | } 75 | } 76 | 77 | pub struct Properties<'a> { 78 | pub(crate) scf: &'a Scf, 79 | pub(crate) iter: Iter<'a>, 80 | } 81 | 82 | impl<'a> Properties<'a> { 83 | pub(crate) fn new(pg: &'a PropertyGroup) -> Result> { 84 | let scf = pg.scf; 85 | let iter = Iter::new(scf)?; 86 | 87 | if unsafe { 88 | scf_iter_pg_properties( 89 | iter.iter.as_ptr(), 90 | pg.propertygroup.as_ptr(), 91 | ) 92 | } != 0 93 | { 94 | Err(ScfError::last()) 95 | } else { 96 | Ok(Properties { scf, iter }) 97 | } 98 | } 99 | 100 | fn get(&self) -> Result>> { 101 | let property = Property::new(self.scf)?; 102 | 103 | let res = unsafe { 104 | scf_iter_next_property( 105 | self.iter.iter.as_ptr(), 106 | property.property.as_ptr(), 107 | ) 108 | }; 109 | 110 | match res { 111 | 0 => Ok(None), 112 | 1 => Ok(Some(property)), 113 | _ => Err(ScfError::last()), 114 | } 115 | } 116 | } 117 | 118 | impl<'a> Iterator for Properties<'a> { 119 | type Item = Result>; 120 | 121 | fn next(&mut self) -> Option>> { 122 | self.get().transpose() 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /smf/src/scope.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Oxide Computer Company 2 | 3 | use std::ffi::CString; 4 | use std::ptr::NonNull; 5 | 6 | use super::scf_sys::*; 7 | use super::{ 8 | buf_for, str_from, Iter, Result, Scf, ScfError, Service, Services, 9 | }; 10 | 11 | #[derive(Debug)] 12 | pub struct Scope<'a> { 13 | pub(crate) scf: &'a Scf, 14 | pub(crate) scope: NonNull, 15 | } 16 | 17 | impl<'a> Scope<'a> { 18 | pub(crate) fn new(scf: &'a Scf) -> Result> { 19 | if let Some(scope) = 20 | NonNull::new(unsafe { scf_scope_create(scf.handle.as_ptr()) }) 21 | { 22 | Ok(Scope { scf, scope }) 23 | } else { 24 | Err(ScfError::last()) 25 | } 26 | } 27 | 28 | pub fn name(&self) -> Result { 29 | let mut buf = buf_for(SCF_LIMIT_MAX_NAME_LENGTH)?; 30 | 31 | let ret = unsafe { 32 | scf_scope_get_name( 33 | self.scope.as_ptr(), 34 | buf.as_mut_ptr() as *mut libc::c_char, 35 | buf.len(), 36 | ) 37 | }; 38 | 39 | str_from(&mut buf, ret) 40 | } 41 | 42 | pub fn services(&self) -> Result { 43 | Services::new(self) 44 | } 45 | 46 | pub fn get_service(&self, name: &str) -> Result> { 47 | let name = CString::new(name).unwrap(); 48 | let service = Service::new(self.scf)?; 49 | 50 | let ret = unsafe { 51 | scf_scope_get_service( 52 | self.scope.as_ptr(), 53 | name.as_ptr(), 54 | service.service.as_ptr(), 55 | ) 56 | }; 57 | 58 | if ret == 0 { 59 | Ok(Some(service)) 60 | } else { 61 | match ScfError::last() { 62 | ScfError::NotFound => Ok(None), 63 | other => Err(other), 64 | } 65 | } 66 | } 67 | 68 | pub fn add_service(&self, name: &str) -> Result { 69 | let name = CString::new(name).unwrap(); 70 | let service = Service::new(self.scf)?; 71 | 72 | let ret = unsafe { 73 | scf_scope_add_service( 74 | self.scope.as_ptr(), 75 | name.as_ptr(), 76 | service.service.as_ptr(), 77 | ) 78 | }; 79 | 80 | if ret == 0 { 81 | Ok(service) 82 | } else { 83 | Err(ScfError::last()) 84 | } 85 | } 86 | 87 | /* 88 | * XXX fn add(&self, name: &str) -> Result { 89 | * scf_scope_add_service(3SCF) 90 | */ 91 | } 92 | 93 | impl Drop for Scope<'_> { 94 | fn drop(&mut self) { 95 | unsafe { scf_scope_destroy(self.scope.as_ptr()) }; 96 | } 97 | } 98 | 99 | pub struct Scopes<'a> { 100 | scf: &'a Scf, 101 | iter: Iter<'a>, 102 | } 103 | 104 | impl<'a> Scopes<'a> { 105 | pub(crate) fn new(scf: &'a Scf) -> Result> { 106 | let iter = Iter::new(scf)?; 107 | 108 | if unsafe { 109 | scf_iter_handle_scopes(iter.iter.as_ptr(), scf.handle.as_ptr()) 110 | } != 0 111 | { 112 | Err(ScfError::last()) 113 | } else { 114 | Ok(Scopes { scf, iter }) 115 | } 116 | } 117 | 118 | fn get(&self) -> Result>> { 119 | let scope = Scope::new(self.scf)?; 120 | 121 | let res = unsafe { 122 | scf_iter_next_scope(self.iter.iter.as_ptr(), scope.scope.as_ptr()) 123 | }; 124 | 125 | match res { 126 | 0 => Ok(None), 127 | 1 => Ok(Some(scope)), 128 | _ => Err(ScfError::last()), 129 | } 130 | } 131 | } 132 | 133 | impl<'a> Iterator for Scopes<'a> { 134 | type Item = Result>; 135 | 136 | fn next(&mut self) -> Option>> { 137 | self.get().transpose() 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /smf/src/snapshot.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Oxide Computer Company 2 | 3 | use std::ffi::CString; 4 | use std::ptr::NonNull; 5 | 6 | use super::scf_sys::*; 7 | use super::{ 8 | buf_for, str_from, Instance, Iter, PropertyGroup, PropertyGroups, Result, 9 | ScfError, 10 | }; 11 | 12 | #[derive(Debug)] 13 | pub struct Snapshot<'a> { 14 | pub(crate) instance: &'a Instance<'a>, 15 | pub(crate) snapshot: NonNull, 16 | } 17 | 18 | impl<'a> Snapshot<'a> { 19 | pub(crate) fn new(instance: &'a Instance) -> Result> { 20 | if let Some(snapshot) = NonNull::new(unsafe { 21 | scf_snapshot_create(instance.scf.handle.as_ptr()) 22 | }) { 23 | Ok(Snapshot { instance, snapshot }) 24 | } else { 25 | Err(ScfError::last()) 26 | } 27 | } 28 | 29 | pub fn name(&self) -> Result { 30 | let mut buf = buf_for(SCF_LIMIT_MAX_NAME_LENGTH)?; 31 | 32 | let ret = unsafe { 33 | scf_snapshot_get_name( 34 | self.snapshot.as_ptr(), 35 | buf.as_mut_ptr() as *mut libc::c_char, 36 | buf.len(), 37 | ) 38 | }; 39 | 40 | str_from(&mut buf, ret) 41 | } 42 | 43 | pub fn pgs(&self) -> Result { 44 | PropertyGroups::new_composed(self.instance, self) 45 | } 46 | 47 | pub fn get_pg(&self, name: &str) -> Result> { 48 | let name = CString::new(name).unwrap(); 49 | let pg = PropertyGroup::new(self.instance.scf)?; 50 | 51 | let ret = unsafe { 52 | scf_instance_get_pg_composed( 53 | self.instance.instance.as_ptr(), 54 | self.snapshot.as_ptr(), 55 | name.as_ptr(), 56 | pg.propertygroup.as_ptr(), 57 | ) 58 | }; 59 | 60 | if ret == 0 { 61 | Ok(Some(pg)) 62 | } else { 63 | match ScfError::last() { 64 | ScfError::NotFound => Ok(None), 65 | other => Err(other), 66 | } 67 | } 68 | } 69 | 70 | /* 71 | * XXX fn delete(&self) -> Result<()> { 72 | * scf_snapshot_delete(3SCF) 73 | */ 74 | } 75 | 76 | impl Drop for Snapshot<'_> { 77 | fn drop(&mut self) { 78 | unsafe { scf_snapshot_destroy(self.snapshot.as_ptr()) }; 79 | } 80 | } 81 | 82 | pub struct Snapshots<'a> { 83 | pub(crate) instance: &'a Instance<'a>, 84 | pub(crate) iter: Iter<'a>, 85 | } 86 | 87 | impl<'a> Snapshots<'a> { 88 | pub(crate) fn new(instance: &'a Instance) -> Result> { 89 | let iter = Iter::new(instance.scf)?; 90 | 91 | if unsafe { 92 | scf_iter_instance_snapshots( 93 | iter.iter.as_ptr(), 94 | instance.instance.as_ptr(), 95 | ) 96 | } != 0 97 | { 98 | Err(ScfError::last()) 99 | } else { 100 | Ok(Snapshots { instance, iter }) 101 | } 102 | } 103 | 104 | fn get(&self) -> Result>> { 105 | let snapshot = Snapshot::new(self.instance)?; 106 | 107 | let res = unsafe { 108 | scf_iter_next_snapshot( 109 | self.iter.iter.as_ptr(), 110 | snapshot.snapshot.as_ptr(), 111 | ) 112 | }; 113 | 114 | match res { 115 | 0 => Ok(None), 116 | 1 => Ok(Some(snapshot)), 117 | _ => Err(ScfError::last()), 118 | } 119 | } 120 | } 121 | 122 | impl<'a> Iterator for Snapshots<'a> { 123 | type Item = Result>; 124 | 125 | fn next(&mut self) -> Option>> { 126 | self.get().transpose() 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /smf/src/value.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Oxide Computer Company 2 | 3 | use std::ffi::CString; 4 | use std::ptr::NonNull; 5 | 6 | use num_traits::cast::FromPrimitive; 7 | 8 | use super::scf_sys::*; 9 | use super::{buf_for, str_from, Iter, Property, Result, Scf, ScfError}; 10 | 11 | #[derive(Debug)] 12 | pub struct Value<'a> { 13 | _scf: &'a Scf, // Prevent from being dropped 14 | pub(crate) value: NonNull, 15 | } 16 | 17 | impl<'a> Value<'a> { 18 | pub(crate) fn new(scf: &'a Scf) -> Result> { 19 | if let Some(value) = 20 | NonNull::new(unsafe { scf_value_create(scf.handle.as_ptr()) }) 21 | { 22 | Ok(Value { _scf: scf, value }) 23 | } else { 24 | Err(ScfError::last()) 25 | } 26 | } 27 | 28 | pub fn as_string(&self) -> Result { 29 | let mut buf = buf_for(SCF_LIMIT_MAX_VALUE_LENGTH)?; 30 | 31 | let ret = unsafe { 32 | scf_value_get_as_string( 33 | self.value.as_ptr(), 34 | buf.as_mut_ptr() as *mut libc::c_char, 35 | buf.len(), 36 | ) 37 | }; 38 | 39 | str_from(&mut buf, ret) 40 | } 41 | 42 | pub fn set_from_string(&self, typ: scf_type_t, val: &str) -> Result<()> { 43 | let val = CString::new(val).unwrap(); 44 | 45 | let ret = unsafe { 46 | scf_value_set_from_string(self.value.as_ptr(), typ, val.as_ptr()) 47 | }; 48 | 49 | if ret == 0 { 50 | Ok(()) 51 | } else { 52 | Err(ScfError::last()) 53 | } 54 | } 55 | 56 | pub fn type_(&self) -> Result { 57 | let ret = unsafe { scf_value_type(self.value.as_ptr()) }; 58 | match scf_type_t::from_i32(ret) { 59 | Some(scf_type_t::SCF_TYPE_INVALID) => Err(ScfError::last()), 60 | Some(typ) => Ok(typ), 61 | None => Err(ScfError::Internal), 62 | } 63 | } 64 | 65 | pub fn base_type(&self) -> Result { 66 | let ret = unsafe { scf_value_base_type(self.value.as_ptr()) }; 67 | match scf_type_t::from_i32(ret) { 68 | Some(scf_type_t::SCF_TYPE_INVALID) => Err(ScfError::last()), 69 | Some(typ) => Ok(typ), 70 | None => Err(ScfError::Internal), 71 | } 72 | } 73 | 74 | /* 75 | * XXX fn value(&self) -> Result { 76 | * scf_value_get_value(3SCF) 77 | */ 78 | 79 | /* 80 | * XXX fn values(&self) -> Result { 81 | * scf_iter_value_values(3SCF) 82 | */ 83 | } 84 | 85 | impl Drop for Value<'_> { 86 | fn drop(&mut self) { 87 | unsafe { scf_value_destroy(self.value.as_ptr()) }; 88 | } 89 | } 90 | 91 | pub struct Values<'a> { 92 | pub(crate) scf: &'a Scf, 93 | pub(crate) iter: Iter<'a>, 94 | } 95 | 96 | impl<'a> Values<'a> { 97 | pub(crate) fn new(prop: &'a Property) -> Result> { 98 | let scf = prop.scf; 99 | let iter = Iter::new(scf)?; 100 | 101 | if unsafe { 102 | scf_iter_property_values(iter.iter.as_ptr(), prop.property.as_ptr()) 103 | } != 0 104 | { 105 | Err(ScfError::last()) 106 | } else { 107 | Ok(Values { scf, iter }) 108 | } 109 | } 110 | 111 | fn get(&self) -> Result>> { 112 | let value = Value::new(self.scf)?; 113 | 114 | let res = unsafe { 115 | scf_iter_next_value(self.iter.iter.as_ptr(), value.value.as_ptr()) 116 | }; 117 | 118 | match res { 119 | 0 => Ok(None), 120 | 1 => Ok(Some(value)), 121 | _ => Err(ScfError::last()), 122 | } 123 | } 124 | } 125 | 126 | impl<'a> Iterator for Values<'a> { 127 | type Item = Result>; 128 | 129 | fn next(&mut self) -> Option>> { 130 | self.get().transpose() 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | # Oxide Crucible tools 2 | 3 | Various scripts used for Crucible 4 | 5 | ## dtrace 6 | A collection of dtrace scripts for use on Crucible. A README.md in that 7 | directory contains more information. 8 | 9 | ## hammer-loop.sh 10 | A loop test that runs the crucible-hammer test in a loop. It is expected 11 | that you already have downstairs running on port 88[1-3]0. 12 | The test will check for panic or assert in the output and stop if it 13 | detects them or a test exits with an error. 14 | 15 | ## make-dtrace.sh 16 | Build and package the DTrace scripts for use in the global zone of each sled. 17 | The output of this script is published as an artifact by buildomat. 18 | 19 | ## make-nightly.sh 20 | A simple script to build and package all that is required to run the 21 | test_nightly.sh script. Use this when you want to manually create and 22 | run the nightly tests on a system. 23 | 24 | ## show_ox_propolis.sh 25 | A sample script that uses `oxdb` and `jq` to dump some oximeter stats 26 | produced from running propolis and requesting metrics. This requires 27 | oximeter running and collecting stats from propolis. 28 | 29 | ## show_ox_stats.sh 30 | A sample script that uses `oxdb` and `jq` to dump some oximeter stats 31 | produced from running downstairs with the `--oximeter` option. This script 32 | is hard coded with a downstairs UUID and is intended to provide a sample to 33 | build off of. 34 | 35 | ## show_ox_upstairs.sh 36 | A sample script that uses `oxdb` and `jq` to dump some oximeter stats 37 | produced from running the upstairs. This script is hard coded with a 38 | downstairs UUID and is intended to provide a sample to build off of. 39 | 40 | ## test_ds.sh 41 | Test import then export for crucible downstairs. 42 | Then, test the clone subcommand and verify that the cloned downstairs 43 | exports the same file as the original downstairs. 44 | 45 | ## test_nightly.sh 46 | This runs a selection of tests from this directory and reports their 47 | results. It is intended to be a test for Crucible that runs nightly 48 | and does deeper/longer tests than what we do as part of every push. 49 | 50 | ## test_repair.sh 51 | A test to break, then repair a downstairs region that is out of sync with 52 | the other regions, in a loop 53 | 54 | ## test_replace_special.sh 55 | A test to verify that we can replace a downstairs while reconciliation is 56 | underway. 57 | 58 | ## test_replay.sh 59 | A test that checks the replay code path, if a downstairs disconnects and 60 | then reconnects, we replay jobs to it. This is a thin wrapper around the 61 | crutest replay test. We use dsc to start and run the downstairs, then 62 | subject the crutest upstairs to disconnecting downstairs. 63 | 64 | ## test_restart_repair.sh 65 | Test the repair process while the downstairs are restarting, in a loop. 66 | 67 | ## test_up.sh 68 | A simple script that will start three downstairs, then run through some tests in 69 | client/src/main. It's an easy way to quickly run some simple tests without 70 | having to spin up a bunch of things. These tests are limited in their scope and 71 | should not be considered substantial. 72 | 73 | Specify "unencrypted" or "encrypted" when running the script to test both code 74 | paths. 75 | 76 | That's all for now! 77 | -------------------------------------------------------------------------------- /tools/dtrace/all_downstairs.d: -------------------------------------------------------------------------------- 1 | #pragma D option quiet 2 | /* 3 | * Print IO counters for all running downstairs. 4 | */ 5 | crucible_downstairs*:::submit-flush-start 6 | { 7 | @sf_start[pid] = count(); 8 | } 9 | 10 | crucible_downstairs*:::submit-flush-done 11 | { 12 | @sf_done[pid] = count(); 13 | } 14 | 15 | crucible_downstairs*:::submit-write-start 16 | { 17 | @sw_start[pid] = count(); 18 | } 19 | 20 | crucible_downstairs*:::submit-write-done 21 | { 22 | @sw_done[pid] = count(); 23 | } 24 | 25 | crucible_downstairs*:::submit-read-start 26 | { 27 | @sr_start[pid] = count(); 28 | } 29 | 30 | crucible_downstairs*:::submit-read-done 31 | { 32 | @sr_done[pid] = count(); 33 | } 34 | 35 | crucible_downstairs*:::submit-writeunwritten-start 36 | { 37 | @swu_start[pid] = count(); 38 | } 39 | 40 | crucible_downstairs*:::submit-writeunwritten-done 41 | { 42 | @swu_done[pid] = count(); 43 | } 44 | crucible_downstairs*:::work-start 45 | { 46 | @work_start[pid] = count(); 47 | } 48 | crucible_downstairs*:::work-process 49 | { 50 | @work_process[pid] = count(); 51 | } 52 | crucible_downstairs*:::work-done 53 | { 54 | @work_done[pid] = count(); 55 | } 56 | 57 | 58 | tick-4s 59 | { 60 | printf("%5s %4s %4s %4s %4s %5s %5s %5s %5s %5s\n", 61 | "PID", "F>", "F<", "W>", "W<", "R>", "R<", "WS", "WIP", "WD"); 62 | printa("%05d %@4u %@4u %@4u %@4u %@5u %@5u %@5u %@5u %@5u\n", 63 | @sf_start, @sf_done, @sw_start, @sw_done, @sr_start, @sr_done, 64 | @work_start, @work_process, @work_done 65 | ); 66 | clear(@sf_start); 67 | clear(@sf_done); 68 | clear(@sw_start); 69 | clear(@sw_done); 70 | clear(@sr_start); 71 | clear(@sr_done); 72 | clear(@swu_start); 73 | clear(@swu_done); 74 | clear(@work_start); 75 | clear(@work_process); 76 | clear(@work_done); 77 | } 78 | -------------------------------------------------------------------------------- /tools/dtrace/downstairs_count.d: -------------------------------------------------------------------------------- 1 | #pragma D option quiet 2 | /* 3 | * IO counters for downstairs. 4 | */ 5 | 6 | /* 7 | * Print the header right away 8 | */ 9 | dtrace:::BEGIN 10 | { 11 | show = 21; 12 | } 13 | 14 | crucible_downstairs*:::submit-flush-start 15 | /pid == $1/ 16 | { 17 | @sf_start = count(); 18 | } 19 | 20 | crucible_downstairs*:::submit-flush-done 21 | /pid == $1/ 22 | { 23 | @sf_done = count(); 24 | } 25 | 26 | crucible_downstairs*:::submit-write-start 27 | /pid == $1/ 28 | { 29 | @sw_start = count(); 30 | } 31 | 32 | crucible_downstairs*:::submit-write-done 33 | /pid == $1/ 34 | { 35 | @sw_done = count(); 36 | } 37 | 38 | crucible_downstairs*:::submit-read-start 39 | /pid == $1/ 40 | { 41 | @sr_start = count(); 42 | } 43 | 44 | crucible_downstairs*:::submit-read-done 45 | /pid == $1/ 46 | { 47 | @sr_done = count(); 48 | } 49 | 50 | crucible_downstairs*:::submit-writeunwritten-start 51 | /pid == $1/ 52 | { 53 | @swu_start = count(); 54 | } 55 | 56 | crucible_downstairs*:::submit-writeunwritten-done 57 | /pid == $1/ 58 | { 59 | @swu_done = count(); 60 | } 61 | crucible_downstairs*:::work-start 62 | /pid == $1/ 63 | { 64 | @work_start = count(); 65 | } 66 | crucible_downstairs*:::work-process 67 | /pid == $1/ 68 | { 69 | @work_process = count(); 70 | } 71 | crucible_downstairs*:::work-done 72 | /pid == $1/ 73 | { 74 | @work_done = count(); 75 | } 76 | 77 | /* 78 | * Every second, check and see if we have printed enough that it is 79 | * time to print the header again 80 | */ 81 | tick-1s 82 | /show > 20/ 83 | { 84 | printf("%4s %4s %4s %4s %5s %5s", "F>", "F<", "W>", "W<", "R>", "R<"); 85 | printf(" %5s %5s %5s", "WS", "WIP", "WD"); 86 | printf("\n"); 87 | show = 0; 88 | } 89 | 90 | tick-1s 91 | { 92 | printa("%@4u %@4u %@4u %@4u %@5u %@5u %@5u %@5u %@5u", 93 | @sf_start, @sf_done, @sw_start, @sw_done, @sr_start, @sr_done, 94 | @work_start, @work_process, @work_done 95 | ); 96 | printf("\n"); 97 | clear(@sf_start); 98 | clear(@sf_done); 99 | clear(@sw_start); 100 | clear(@sw_done); 101 | clear(@sr_start); 102 | clear(@sr_done); 103 | clear(@swu_start); 104 | clear(@swu_done); 105 | clear(@work_start); 106 | clear(@work_process); 107 | clear(@work_done); 108 | show = show + 1; 109 | } 110 | -------------------------------------------------------------------------------- /tools/dtrace/get-ds-state.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Print a status line for all matching probes. 3 | * Exit after 5 seconds. 4 | */ 5 | #pragma D option quiet 6 | #pragma D option strsize=1k 7 | 8 | /* 9 | * Translate the longer state string into a shorter version 10 | */ 11 | inline string short_state[string ss] = 12 | ss == "active" ? "ACT" : 13 | ss == "new" ? "NEW" : 14 | ss == "replaced" ? "RPL" : 15 | ss == "live_repair_ready" ? "LRR" : 16 | ss == "live_repair" ? "LR" : 17 | ss == "faulted" ? "FLT" : 18 | ss == "offline" ? "OFL" : 19 | ss == "reconcile" ? "REC" : 20 | ss == "wait_quorum" ? "WQ" : 21 | ss == "wait_active" ? "WA" : 22 | ss == "connecting" ? "CON" : 23 | ss; 24 | 25 | crucible_upstairs*:::up-status 26 | { 27 | my_id = json(copyinstr(arg1), "ok.upstairs_id"); 28 | my_sesh = json(copyinstr(arg1), "ok.session_id"); 29 | 30 | this->ds0state = json(copyinstr(arg1), "ok.ds_state[0].type"); 31 | this->d0 = short_state[this->ds0state]; 32 | 33 | this->ds1state = json(copyinstr(arg1), "ok.ds_state[1].type"); 34 | this->d1 = short_state[this->ds1state]; 35 | 36 | this->ds2state = json(copyinstr(arg1), "ok.ds_state[2].type"); 37 | this->d2 = short_state[this->ds2state]; 38 | 39 | printf("%6d %8s %8s %3s %3s %3s\n", 40 | pid, 41 | substr(my_id, 0, 8), 42 | substr(my_sesh, 0, 8), 43 | this->d0, 44 | this->d1, 45 | this->d2); 46 | } 47 | 48 | tick-5s 49 | { 50 | exit(0); 51 | } 52 | -------------------------------------------------------------------------------- /tools/dtrace/get-ds-state.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This script will display the downstairs states for each pid/session 4 | # it finds running on a system. 5 | filename='/tmp/get-ds-state.out' 6 | 7 | # Clear out any previous state 8 | echo "" > "$filename" 9 | # Gather state on all running propolis servers, record summary to a file 10 | dtrace -s /opt/oxide/crucible_dtrace/get-ds-state.d | sort -n | uniq | awk 'NF' > "$filename" 11 | # Walk the lines in the file, append the zone name to each line. 12 | while read -r p; do 13 | # For each line in the file, pull out the PID we are looking at and 14 | # use it to find the zone so we can print the zone name as well. 15 | pid=$(echo $p | awk '{print $1}') 16 | zone=$(ps -o zone -p $pid | tail -1 | cut -c 1-28) 17 | # Our zone string size is already set from above, force the 18 | # rest of the line to take up 26 columns, this prevents PIDs 19 | # with fewer than 5 digits from using less columns. 20 | printf "%s %26s\n" "$zone" "$p" 21 | done < "$filename" 22 | -------------------------------------------------------------------------------- /tools/dtrace/get-lr-state.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Print a live repair status line for all matching probes. 3 | * Exit after 5 seconds. 4 | */ 5 | #pragma D option quiet 6 | #pragma D option strsize=1k 7 | 8 | crucible_upstairs*:::up-status 9 | { 10 | my_id = json(copyinstr(arg1), "ok.upstairs_id"); 11 | my_sesh = json(copyinstr(arg1), "ok.session_id"); 12 | 13 | printf("%6d %8s %8s %s %s %s %s %s %s\n", 14 | pid, 15 | substr(my_id, 0, 8), 16 | substr(my_sesh, 0, 8), 17 | json(copyinstr(arg1), "ok.ds_live_repair_completed[0]"), 18 | json(copyinstr(arg1), "ok.ds_live_repair_completed[1]"), 19 | json(copyinstr(arg1), "ok.ds_live_repair_completed[2]"), 20 | json(copyinstr(arg1), "ok.ds_live_repair_aborted[0]"), 21 | json(copyinstr(arg1), "ok.ds_live_repair_aborted[1]"), 22 | json(copyinstr(arg1), "ok.ds_live_repair_aborted[2]")); 23 | } 24 | 25 | tick-5s 26 | { 27 | exit(0); 28 | } 29 | -------------------------------------------------------------------------------- /tools/dtrace/get-lr-state.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script will display the downstairs live repair for each 4 | # pid/session it finds running on a system. 5 | filename='/tmp/get-lr-state.out' 6 | 7 | # Clear out any previous state 8 | echo "" > "$filename" 9 | # Gather state on all running propolis servers, record summary to a file 10 | dtrace -s /opt/oxide/crucible_dtrace/get-lr-state.d | sort -n | uniq | awk 'NF' > "$filename" 11 | # Walk the lines in the file, append the zone name to each line. 12 | while read -r p; do 13 | # For each line in the file, pull out the PID we are looking at and 14 | # print the zone that process is running in. 15 | pid=$(echo $p | awk '{print $1}') 16 | zone=$(ps -o zone -p $pid | tail -1 | cut -c 1-28) 17 | # Our zone string size is already set from above, force the 18 | # rest of the line to take up 26 columns, this prevents PIDs 19 | # with fewer than 5 digits from using less columns. 20 | printf "%s %26s\n" "$zone" "$p" 21 | done < "$filename" 22 | -------------------------------------------------------------------------------- /tools/dtrace/get-up-state.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Display Upstairs status for all matching processes 3 | */ 4 | #pragma D option quiet 5 | #pragma D option strsize=1k 6 | 7 | /* 8 | * Print the header right away 9 | */ 10 | dtrace:::BEGIN 11 | { 12 | /* 13 | * We have to init something for last_id so we can use the 14 | * default values for all the session IDs that we don't yet have. 15 | */ 16 | last_id["string"] = (int64_t)1; 17 | printf("%5s %8s ", "PID", "SESSION"); 18 | printf("%3s %3s %3s", "DS0", "DS1", "DS2"); 19 | printf(" %10s %6s %4s", "NEXT_JOB", "DELTA", "CONN"); 20 | printf(" %5s %5s", "ELR", "ELC"); 21 | printf(" %5s %5s", "ERR", "ERN"); 22 | printf("\n"); 23 | } 24 | 25 | /* 26 | * After reporting for 10 seconds, exit 27 | */ 28 | tick-10s 29 | { 30 | exit(0); 31 | } 32 | 33 | /* 34 | * Translate the longer state string into a shorter version 35 | */ 36 | inline string short_state[string ss] = 37 | ss == "active" ? "ACT" : 38 | ss == "new" ? "NEW" : 39 | ss == "replaced" ? "RPL" : 40 | ss == "live_repair_ready" ? "LRR" : 41 | ss == "live_repair" ? "LR" : 42 | ss == "faulted" ? "FLT" : 43 | ss == "offline" ? "OFL" : 44 | ss == "reconcile" ? "REC" : 45 | ss == "wait_quorum" ? "WQ" : 46 | ss == "wait_active" ? "WA" : 47 | ss == "connecting" ? "CON" : 48 | ss; 49 | 50 | /* 51 | * All variables should be this-> 52 | * Otherwise, there is a chance another probe will fire and 53 | * clobber the contents. 54 | */ 55 | crucible_upstairs*:::up-status 56 | { 57 | this->ds0state = json(copyinstr(arg1), "ok.ds_state[0].type"); 58 | this->d0 = short_state[this->ds0state]; 59 | 60 | this->ds1state = json(copyinstr(arg1), "ok.ds_state[1].type"); 61 | this->d1 = short_state[this->ds1state]; 62 | 63 | this->ds2state = json(copyinstr(arg1), "ok.ds_state[2].type"); 64 | this->d2 = short_state[this->ds2state]; 65 | 66 | this->full_session_id = json(copyinstr(arg1), "ok.session_id"); 67 | this->session_id = substr(this->full_session_id, 0, 8); 68 | 69 | this->next_id_str = json(copyinstr(arg1), "ok.next_job_id"); 70 | this->next_id_value = strtoll(this->next_id_str); 71 | 72 | if (last_id[this->session_id] == 0) { 73 | this->delta = 0; 74 | last_id[this->session_id] = this->next_id_value; 75 | } else { 76 | this->delta = this->next_id_value - last_id[this->session_id]; 77 | } 78 | 79 | /* Total of extents live repaired */ 80 | this->elr = strtoll(json(copyinstr(arg1), "ok.ds_extents_repaired[0]")) + 81 | strtoll(json(copyinstr(arg1), "ok.ds_extents_repaired[1]")) + 82 | strtoll(json(copyinstr(arg1), "ok.ds_extents_repaired[2]")); 83 | /* Total of extents not needing repair during live repair */ 84 | this->elc = strtoll(json(copyinstr(arg1), "ok.ds_extents_confirmed[0]")) + 85 | strtoll(json(copyinstr(arg1), "ok.ds_extents_confirmed[1]")) + 86 | strtoll(json(copyinstr(arg1), "ok.ds_extents_confirmed[2]")); 87 | 88 | this->connections = strtoll(json(copyinstr(arg1), "ok.ds_connected[0]")) + 89 | strtoll(json(copyinstr(arg1), "ok.ds_connected[1]")) + 90 | strtoll(json(copyinstr(arg1), "ok.ds_connected[2]")); 91 | 92 | printf("%5d %8s %3s %3s %3s %10s %6d %4d %5d %5d %5s %5s\n", 93 | pid, 94 | this->session_id, 95 | /* 96 | * State for the three downstairs 97 | */ 98 | this->d0, 99 | this->d1, 100 | this->d2, 101 | 102 | /* 103 | * Job ID, job delta and write bytes outstanding 104 | */ 105 | json(copyinstr(arg1), "ok.next_job_id"), 106 | this->delta, 107 | this->connections, 108 | this->elr, 109 | this->elc, 110 | json(copyinstr(arg1), "ok.ds_reconciled"), 111 | json(copyinstr(arg1), "ok.ds_reconcile_needed")); 112 | } 113 | -------------------------------------------------------------------------------- /tools/dtrace/get-up-state.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o pipefail 4 | 5 | filename='/tmp/get-up-state.out' 6 | final='/tmp/get-up-state.final' 7 | rm -f $final 8 | 9 | # Gather our output first. 10 | dtrace -s /opt/oxide/crucible_dtrace/get-up-state.d | awk 'NF' > "$filename" 11 | if [[ $? -ne 0 ]]; then 12 | exit 1 13 | fi 14 | 15 | # For each session we find, get the latest line and store that in 16 | # the result file. 17 | for id in $(cat $filename | grep -v SESSION | awk '{print $2}' | sort -n | uniq); do 18 | # Find our session, then print the final line 19 | grep "$id" "$filename" | tail -1 >> $final 20 | done 21 | # Print the header 22 | grep "SESSION" "$filename" 23 | # Sort our result by PID and print it out. 24 | sort -n < $final 25 | -------------------------------------------------------------------------------- /tools/dtrace/perf-downstairs-os.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Trace all IOs from when the downstairs sent them to the OS for 3 | * servicing (almost, sort of, see the code) to when we got an 4 | * answer back from the OS. 5 | * Group by IO type (R/W/F). 6 | * 7 | * arg0 is the job ID number. 8 | */ 9 | crucible_downstairs*:::os-*-start 10 | { 11 | start[pid,arg0] = timestamp; 12 | } 13 | 14 | crucible_downstairs*:::os-*-done 15 | /start[pid,arg0]/ 16 | { 17 | strtok(probename, "-"); 18 | this->cmd = strtok(NULL, "-"); 19 | 20 | @time[pid,this->cmd] = quantize(timestamp - start[pid,arg0]); 21 | start[pid,arg0] = 0; 22 | } 23 | -------------------------------------------------------------------------------- /tools/dtrace/perf-downstairs-three.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Watch a downstairs IO. 3 | * 1st report is submit to sending it to the OS. 4 | * 2nd report is OS time (for flush, to flush all extents) 5 | * 3rd report is OS done to sending ACK back to upstairs 6 | * 7 | * arg0 is the job ID number. 8 | */ 9 | crucible_downstairs*:::submit-flush-start 10 | { 11 | start[pid,arg0] = timestamp; 12 | } 13 | 14 | crucible_downstairs*:::os-flush-start 15 | /start[pid,arg0]/ 16 | { 17 | @timeone[pid,"flush submit-OS"] = quantize(timestamp - start[pid,arg0]); 18 | start[pid,arg0] = 0; 19 | substart[pid,arg0] = timestamp; 20 | } 21 | 22 | crucible_downstairs*:::os-flush-done 23 | /substart[pid,arg0]/ 24 | { 25 | @timetwo[pid,"flush OS"] = quantize(timestamp - substart[pid,arg0]); 26 | substart[pid,arg0] = 0; 27 | final[pid,arg0] = timestamp; 28 | } 29 | 30 | crucible_downstairs*:::submit-flush-done 31 | /final[pid,arg0]/ 32 | { 33 | @timethree[pid,"flush OS-done"] = quantize(timestamp - final[pid,arg0]); 34 | final[pid,arg0] = 0; 35 | } 36 | 37 | /* 38 | * Now the same, but for writes 39 | */ 40 | crucible_downstairs*:::submit-write-start 41 | { 42 | wstart[pid,arg0] = timestamp; 43 | } 44 | 45 | crucible_downstairs*:::os-write-start 46 | /wstart[pid,arg0]/ 47 | { 48 | @wtimeone[pid,"write submit-OS"] = quantize(timestamp - wstart[pid,arg0]); 49 | wstart[pid,arg0] = 0; 50 | wsubstart[pid,arg0] = timestamp; 51 | } 52 | 53 | crucible_downstairs*:::os-write-done 54 | /wsubstart[pid,arg0]/ 55 | { 56 | @wtimetwo[pid,"write OS"] = quantize(timestamp - wsubstart[pid,arg0]); 57 | wsubstart[pid,arg0] = 0; 58 | wfinal[pid,arg0] = timestamp; 59 | } 60 | 61 | crucible_downstairs*:::submit-write-done 62 | /wfinal[pid,arg0]/ 63 | { 64 | @wtimethree[pid,"write OS-done"] = quantize(timestamp - wfinal[pid,arg0]); 65 | wfinal[pid,arg0] = 0; 66 | } 67 | 68 | /* 69 | * Now the same, but for reads 70 | */ 71 | crucible_downstairs*:::submit-read-start 72 | { 73 | wstart[pid,arg0] = timestamp; 74 | } 75 | 76 | crucible_downstairs*:::os-read-start 77 | /wstart[pid,arg0]/ 78 | { 79 | @wtimeone[pid,"read submit-OS"] = quantize(timestamp - wstart[pid,arg0]); 80 | wstart[pid,arg0] = 0; 81 | wsubstart[pid,arg0] = timestamp; 82 | } 83 | 84 | crucible_downstairs*:::os-read-done 85 | /wsubstart[pid,arg0]/ 86 | { 87 | @wtimetwo[pid,"read OS"] = quantize(timestamp - wsubstart[pid,arg0]); 88 | wsubstart[pid,arg0] = 0; 89 | wfinal[pid,arg0] = timestamp; 90 | } 91 | 92 | crucible_downstairs*:::submit-read-done 93 | /wfinal[pid,arg0]/ 94 | { 95 | @wtimethree[pid,"read OS-done"] = quantize(timestamp - wfinal[pid,arg0]); 96 | wfinal[pid,arg0] = 0; 97 | } 98 | 99 | /* 100 | * Now the same, but for write unwritten 101 | */ 102 | crucible_downstairs*:::submit-writeunwritten-start 103 | { 104 | wstart[pid,arg0] = timestamp; 105 | } 106 | 107 | crucible_downstairs*:::os-writeunwritten-start 108 | /wstart[pid,arg0]/ 109 | { 110 | @wtimeone[pid,"read submit-OS"] = quantize(timestamp - wstart[pid,arg0]); 111 | wstart[pid,arg0] = 0; 112 | wsubstart[pid,arg0] = timestamp; 113 | } 114 | 115 | crucible_downstairs*:::os-writeunwritten-done 116 | /wsubstart[pid,arg0]/ 117 | { 118 | @wtimetwo[pid,"read OS"] = quantize(timestamp - wsubstart[pid,arg0]); 119 | wsubstart[pid,arg0] = 0; 120 | wfinal[pid,arg0] = timestamp; 121 | } 122 | 123 | crucible_downstairs*:::submit-writeunwritten-done 124 | /wfinal[pid,arg0]/ 125 | { 126 | @wtimethree[pid,"read OS-done"] = quantize(timestamp - wfinal[pid,arg0]); 127 | wfinal[pid,arg0] = 0; 128 | } 129 | -------------------------------------------------------------------------------- /tools/dtrace/perf-downstairs-tick.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Watch a downstairs IO. 3 | * 1st report is submit to sending it to the OS. 4 | * 2nd report is OS time (for flush, to flush all extents) 5 | * 3rd report is OS done to sending ACK back to upstairs 6 | * 7 | * arg0 is the job ID number. 8 | */ 9 | crucible_downstairs*:::submit-flush-start 10 | { 11 | start[pid,arg0] = timestamp; 12 | } 13 | 14 | crucible_downstairs*:::os-flush-start 15 | /start[pid,arg0]/ 16 | { 17 | @timeone[pid,"flush submit-OS"] = quantize(timestamp - start[pid,arg0]); 18 | start[pid,arg0] = 0; 19 | substart[pid,arg0] = timestamp; 20 | } 21 | 22 | crucible_downstairs*:::os-flush-done 23 | /substart[pid,arg0]/ 24 | { 25 | @timetwo[pid,"flush OS"] = quantize(timestamp - substart[pid,arg0]); 26 | substart[pid,arg0] = 0; 27 | final[pid,arg0] = timestamp; 28 | } 29 | 30 | crucible_downstairs*:::submit-flush-done 31 | /final[pid,arg0]/ 32 | { 33 | @timethree[pid,"flush OS-done"] = quantize(timestamp - final[pid,arg0]); 34 | final[pid,arg0] = 0; 35 | } 36 | 37 | /* 38 | * Now the same, but for writes 39 | */ 40 | crucible_downstairs*:::submit-write-start 41 | { 42 | start[pid,arg0] = timestamp; 43 | } 44 | 45 | crucible_downstairs*:::os-write-start 46 | /start[pid,arg0]/ 47 | { 48 | @timeone[pid,"write submit-OS"] = quantize(timestamp - start[pid,arg0]); 49 | start[pid,arg0] = 0; 50 | substart[pid,arg0] = timestamp; 51 | } 52 | 53 | crucible_downstairs*:::os-write-done 54 | /substart[pid,arg0]/ 55 | { 56 | @timetwo[pid,"write OS"] = quantize(timestamp - substart[pid,arg0]); 57 | substart[pid,arg0] = 0; 58 | final[pid,arg0] = timestamp; 59 | } 60 | 61 | crucible_downstairs*:::submit-write-done 62 | /final[pid,arg0]/ 63 | { 64 | @timethree[pid,"write OS-done"] = quantize(timestamp - final[pid,arg0]); 65 | final[pid,arg0] = 0; 66 | } 67 | 68 | /* 69 | * Now the same, but for reads 70 | */ 71 | crucible_downstairs*:::submit-read-start 72 | { 73 | start[pid,arg0] = timestamp; 74 | } 75 | 76 | crucible_downstairs*:::os-read-start 77 | /start[pid,arg0]/ 78 | { 79 | @timeone[pid,"read submit-OS"] = quantize(timestamp - start[pid,arg0]); 80 | start[pid,arg0] = 0; 81 | substart[pid,arg0] = timestamp; 82 | } 83 | 84 | crucible_downstairs*:::os-read-done 85 | /substart[pid,arg0]/ 86 | { 87 | @timetwo[pid,"read OS"] = quantize(timestamp - substart[pid,arg0]); 88 | substart[pid,arg0] = 0; 89 | final[pid,arg0] = timestamp; 90 | } 91 | 92 | crucible_downstairs*:::submit-read-done 93 | /final[pid,arg0]/ 94 | { 95 | @timethree[pid,"read OS-done"] = quantize(timestamp - final[pid,arg0]); 96 | final[pid,arg0] = 0; 97 | } 98 | 99 | /* 100 | * Now the same, but for write unwritten 101 | */ 102 | crucible_downstairs*:::submit-writeunwritten-start 103 | { 104 | start[pid,arg0] = timestamp; 105 | } 106 | 107 | crucible_downstairs*:::os-writeunwritten-start 108 | /start[pid,arg0]/ 109 | { 110 | @timeone[pid,"read submit-OS"] = quantize(timestamp - start[pid,arg0]); 111 | start[pid,arg0] = 0; 112 | substart[pid,arg0] = timestamp; 113 | } 114 | 115 | crucible_downstairs*:::os-writeunwritten-done 116 | /substart[pid,arg0]/ 117 | { 118 | @timetwo[pid,"read OS"] = quantize(timestamp - substart[pid,arg0]); 119 | substart[pid,arg0] = 0; 120 | final[pid,arg0] = timestamp; 121 | } 122 | 123 | crucible_downstairs*:::submit-writeunwritten-done 124 | /final[pid,arg0]/ 125 | { 126 | @timethree[pid,"read OS-done"] = quantize(timestamp - final[pid,arg0]); 127 | final[pid,arg0] = 0; 128 | } 129 | 130 | tick-60s 131 | { 132 | printa(@timeone) 133 | } 134 | 135 | tick-60s 136 | { 137 | printa(@timetwo) 138 | } 139 | 140 | tick-60s 141 | { 142 | printa(@timethree) 143 | } 144 | -------------------------------------------------------------------------------- /tools/dtrace/perf-downstairs.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Trace all IOs from when the downstairs received them to when the 3 | * downstairs has completed them and is about to ack to the upstairs. 4 | * Group by IO type (R/W/F). 5 | * 6 | * arg0 is the job ID number. 7 | */ 8 | crucible_downstairs*:::submit-*-start 9 | { 10 | start[pid,arg0] = timestamp; 11 | } 12 | 13 | crucible_downstairs*:::submit-*-done 14 | /start[pid,arg0]/ 15 | { 16 | strtok(probename, "-"); 17 | this->cmd = strtok(NULL, "-"); 18 | 19 | @time[pid,this->cmd] = quantize(timestamp - start[pid,arg0]); 20 | start[pid,arg0] = 0; 21 | } 22 | 23 | -------------------------------------------------------------------------------- /tools/dtrace/perf-ds-client.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Trace all IOs from the Upstairs for each Downstairs from the time they 3 | * are sent to the client task who handles the network tranmission to the 4 | * time the result message is returned to the main task and processing 5 | * is about to begin. 6 | * Group by IO type (R/W/F) and client ID (Which downstairs). 7 | * 8 | * arg0 is the job ID number. 9 | * arg1 is the client ID 10 | */ 11 | crucible_upstairs*:::ds-*-client-start 12 | { 13 | start[arg0, arg1] = timestamp; 14 | } 15 | 16 | crucible_upstairs*:::ds-*-client-done 17 | /start[arg0, arg1]/ 18 | { 19 | strtok(probename, "-"); 20 | this->cmd = strtok(NULL, "-"); 21 | 22 | @time[strjoin(this->cmd, " for downstairs client"), arg1] = 23 | quantize(timestamp - start[arg0, arg1]); 24 | start[arg0, arg1] = 0; 25 | } 26 | -------------------------------------------------------------------------------- /tools/dtrace/perf-ds-net.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Trace all IOs from the Upstairs for each Downstairs from the time they 3 | * are sent over the network socket to when the ack for an IO is received 4 | * back in the Upstairs. 5 | * Group by IO type (R/W/F) and client ID (Which downstairs). 6 | * 7 | * arg0 is the job ID number. 8 | * arg1 is the client ID 9 | */ 10 | crucible_upstairs*:::ds-*-net-start 11 | { 12 | start[arg0, arg1] = timestamp; 13 | } 14 | 15 | crucible_upstairs*:::ds-*-net-done 16 | /start[arg0, arg1]/ 17 | { 18 | strtok(probename, "-"); 19 | this->cmd = strtok(NULL, "-"); 20 | 21 | @time[strjoin(this->cmd, " for downstairs client"), arg1] = 22 | quantize(timestamp - start[arg0, arg1]); 23 | start[arg0, arg1] = 0; 24 | } 25 | -------------------------------------------------------------------------------- /tools/dtrace/perf-online-repair.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Track the time it takes for the repair of an extent. 3 | */ 4 | crucible_upstairs*:::extent-or-start 5 | { 6 | start[arg0] = timestamp; 7 | } 8 | 9 | crucible_upstairs*:::extent-or-done 10 | /start[arg0]/ 11 | { 12 | strtok(probename, "-"); 13 | this->cmd = strtok(NULL, "-"); 14 | @time[this->cmd] = quantize(timestamp - start[arg0]); 15 | start[arg0] = 0; 16 | } 17 | -------------------------------------------------------------------------------- /tools/dtrace/perf-reqwest.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Trace read ReqwestBlockIO. 3 | */ 4 | crucible_upstairs*:::reqwest-read-start 5 | { 6 | start[arg0, json(copyinstr(arg1), "ok")] = timestamp; 7 | } 8 | 9 | crucible_upstairs*:::reqwest-read-done 10 | /start[arg0, json(copyinstr(arg1), "ok")]/ 11 | { 12 | this->uuid = json(copyinstr(arg1), "ok"); 13 | @time[this->uuid, "reqwest-read"] = quantize(timestamp - start[arg0, this->uuid]); 14 | start[arg0, this->uuid] = 0; 15 | } 16 | 17 | tick-5s 18 | { 19 | printa(@time) 20 | } 21 | -------------------------------------------------------------------------------- /tools/dtrace/perf-upstairs-wf.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Watch upstairs flush and write IOs. 3 | * Report on: 4 | * 1: From IO received in the upstairs to IO being submitted to the 5 | * queue of work for the three downstairs. 6 | * 2: From IO on downstairs queue, to enough downstairs completing the 7 | * IO that it is ready to ack. 8 | * 3: From the IO being ready to ack, to that ack being sent. 9 | * 10 | * arg0 is the job ID number. 11 | */ 12 | crucible_upstairs*:::gw-flush-start, 13 | crucible_upstairs*:::gw-write-start 14 | { 15 | start[arg0] = timestamp; 16 | } 17 | 18 | crucible_upstairs*:::up-to-ds-flush-start, 19 | crucible_upstairs*:::up-to-ds-write-start 20 | /start[arg0]/ 21 | { 22 | @[probename] = quantize(timestamp - start[arg0]); 23 | start[arg0] = 0; 24 | substart[arg0] = timestamp; 25 | } 26 | 27 | crucible_upstairs*:::gw-flush-done, 28 | crucible_upstairs*:::gw-write-done 29 | /substart[arg0]/ 30 | { 31 | @[probename] = quantize(timestamp - substart[arg0]); 32 | substart[arg0] = 0; 33 | } 34 | -------------------------------------------------------------------------------- /tools/dtrace/perf-vol.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Trace and time all the volume IOs. 3 | * 4 | * If you want a specific UUID only, do something like 5 | * this to the volume-*-start: 6 | * 7 | * /json(copyinstr(arg1), "ok")] == "SOME_UUID_YOU_WANT"/ 8 | */ 9 | crucible_upstairs*:::volume-*-start 10 | { 11 | start[arg0, json(copyinstr(arg1), "ok")] = timestamp; 12 | } 13 | 14 | crucible_upstairs*:::volume-*-done 15 | /start[arg0, json(copyinstr(arg1), "ok")]/ 16 | { 17 | strtok(probename, "-"); 18 | this->cmd = strtok(NULL, "-"); 19 | this->uuid = json(copyinstr(arg1), "ok"); 20 | @time[this->uuid, this->cmd] = quantize(timestamp - start[arg0, this->uuid]); 21 | start[arg0, this->uuid] = 0; 22 | } 23 | 24 | tick-5s 25 | { 26 | printa(@time) 27 | } 28 | -------------------------------------------------------------------------------- /tools/dtrace/perfgw.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Trace all the guest submitted and completed IOs. 3 | */ 4 | crucible_upstairs*:::gw-*-start 5 | { 6 | start[arg0] = timestamp; 7 | } 8 | 9 | crucible_upstairs*:::gw-*-done 10 | /start[arg0]/ 11 | { 12 | strtok(probename, "-"); 13 | this->cmd = strtok(NULL, "-"); 14 | @time[this->cmd] = quantize(timestamp - start[arg0]); 15 | start[arg0] = 0; 16 | } 17 | -------------------------------------------------------------------------------- /tools/dtrace/simple.d: -------------------------------------------------------------------------------- 1 | #pragma D option quiet 2 | #pragma D option strsize=1k 3 | 4 | dtrace:::BEGIN 5 | { 6 | /* 7 | * We have to init something for the associative array last_id. 8 | * This means it will be created and later, when we have a 9 | * session ID, we can add that element. 10 | */ 11 | last_id["string"] = (int64_t)1; 12 | } 13 | 14 | /* 15 | * Print our header at some interval 16 | */ 17 | dtrace:::BEGIN, tick-20s 18 | { 19 | printf("%5s %8s %8s", "PID", "UUID", "SESSION"); 20 | printf(" %3s %3s %3s", "DS0", "DS1", "DS2"); 21 | printf(" %10s %6s %4s", "NEXT_JOB", "DELTA", "CONN"); 22 | printf(" %5s %5s", "ELR", "ELC"); 23 | printf(" %5s %5s", "ERR", "ERN"); 24 | printf("\n"); 25 | } 26 | 27 | /* 28 | * Translate the longer state string into a shorter version 29 | */ 30 | inline string short_state[string ss] = 31 | ss == "active" ? "ACT" : 32 | ss == "new" ? "NEW" : 33 | ss == "live_repair_ready" ? "LRR" : 34 | ss == "live_repair" ? "LR" : 35 | ss == "faulted" ? "FLT" : 36 | ss == "offline" ? "OFL" : 37 | ss == "reconcile" ? "REC" : 38 | ss == "wait_quorum" ? "WQ" : 39 | ss == "wait_active" ? "WA" : 40 | ss == "replaced" ? "RPL" : 41 | ss == "connecting" ? "CON" : 42 | ss; 43 | 44 | /* 45 | * All variables should be this-> 46 | * Otherwise, there is a chance another probe will fire and 47 | * clobber the contents. 48 | */ 49 | crucible_upstairs*:::up-status 50 | { 51 | this->ds0state = json(copyinstr(arg1), "ok.ds_state[0].type"); 52 | this->d0 = short_state[this->ds0state]; 53 | 54 | this->ds1state = json(copyinstr(arg1), "ok.ds_state[1].type"); 55 | this->d1 = short_state[this->ds1state]; 56 | 57 | this->ds2state = json(copyinstr(arg1), "ok.ds_state[2].type"); 58 | this->d2 = short_state[this->ds2state]; 59 | 60 | this->full_upstairs_id = json(copyinstr(arg1), "ok.upstairs_id"); 61 | this->upstairs_id = substr(this->full_upstairs_id, 0, 8); 62 | 63 | this->full_session_id = json(copyinstr(arg1), "ok.session_id"); 64 | this->session_id = substr(this->full_session_id, 0, 8); 65 | 66 | this->next_id_str = json(copyinstr(arg1), "ok.next_job_id"); 67 | this->next_id_value = strtoll(this->next_id_str); 68 | 69 | /* 70 | * The first time through, we don't know delta, so start with 0. 71 | */ 72 | if (last_id[this->session_id] == 0) { 73 | this->delta = 0; 74 | last_id[this->session_id] = this->next_id_value; 75 | } else { 76 | this->delta = this->next_id_value - last_id[this->session_id]; 77 | last_id[this->session_id] = this->next_id_value; 78 | } 79 | 80 | this->connections = strtoll(json(copyinstr(arg1), "ok.ds_connected[0]")) + 81 | strtoll(json(copyinstr(arg1), "ok.ds_connected[1]")) + 82 | strtoll(json(copyinstr(arg1), "ok.ds_connected[2]")); 83 | 84 | /* Total of extents live repaired */ 85 | this->elr = strtoll(json(copyinstr(arg1), "ok.ds_extents_repaired[0]")) + 86 | strtoll(json(copyinstr(arg1), "ok.ds_extents_repaired[1]")) + 87 | strtoll(json(copyinstr(arg1), "ok.ds_extents_repaired[2]")); 88 | 89 | /* Total of extents not needing repair during live repair */ 90 | this->elc = strtoll(json(copyinstr(arg1), "ok.ds_extents_confirmed[0]")) + 91 | strtoll(json(copyinstr(arg1), "ok.ds_extents_confirmed[1]")) + 92 | strtoll(json(copyinstr(arg1), "ok.ds_extents_confirmed[2]")); 93 | 94 | printf("%5d %8s %8s %3s %3s %3s %10d %6d %4d %5d %5d %5s %5s\n", 95 | pid, 96 | this->upstairs_id, 97 | this->session_id, 98 | this->d0, 99 | this->d1, 100 | this->d2, 101 | this->next_id_value, 102 | this->delta, 103 | this->connections, 104 | this->elr, 105 | this->elc, 106 | json(copyinstr(arg1), "ok.ds_reconciled"), 107 | json(copyinstr(arg1), "ok.ds_reconcile_needed")); 108 | } 109 | -------------------------------------------------------------------------------- /tools/dtrace/trace-vol.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Trace all the volume submitted and completed IOs. 3 | */ 4 | #pragma D option quiet 5 | crucible_upstairs*:::volume-*-start 6 | { 7 | uuid = json(copyinstr(arg1), "ok"); 8 | @io_count[uuid, probename] = count(); 9 | } 10 | crucible_upstairs*:::volume-*-done 11 | { 12 | uuid = json(copyinstr(arg1), "ok"); 13 | @io_count[uuid, probename] = count(); 14 | } 15 | 16 | END 17 | { 18 | printa(@io_count); 19 | } 20 | -------------------------------------------------------------------------------- /tools/dtrace/tracegw.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Trace all the guest submitted and completed IOs. 3 | */ 4 | #pragma D option quiet 5 | crucible_upstairs*:::gw-read-start 6 | { 7 | @read_start = count(); 8 | } 9 | crucible_upstairs*:::gw-read-done 10 | { 11 | @read_done = count(); 12 | } 13 | crucible_upstairs*:::gw-write-start 14 | { 15 | @write_start = count(); 16 | } 17 | crucible_upstairs*:::gw-write-done 18 | { 19 | @write_done = count(); 20 | } 21 | crucible_upstairs*:::gw-flush-start 22 | { 23 | @flush_start = count(); 24 | } 25 | crucible_upstairs*:::gw-flush-done 26 | { 27 | @flush_done = count(); 28 | } 29 | 30 | END 31 | { 32 | printa(" read-start:%@d read-done:%@d\n", @read_start, @read_done); 33 | printa("write-start:%@d write-done:%@d\n", @write_start, @write_done); 34 | printa("flush-start:%@d flush-done:%@d\n", @flush_start, @flush_done); 35 | } 36 | -------------------------------------------------------------------------------- /tools/dtrace/upstairs_action.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Display internal Upstairs status. 3 | */ 4 | #pragma D option quiet 5 | #pragma D option strsize=1k 6 | /* 7 | * Print the header right away 8 | */ 9 | dtrace:::BEGIN 10 | { 11 | show = 21; 12 | } 13 | 14 | /* 15 | * Every second, check and see if we have printed enough that it is 16 | * time to print the header again 17 | */ 18 | tick-1s 19 | /show > 20/ 20 | { 21 | printf("%9s %9s %9s", "APPLY", "DOWN_S", "GUEST"); 22 | printf(" %9s %9s %9s", "DFR_BLK", "DFR_MSG", "LEAK_CHK"); 23 | printf(" %9s %9s", "FLUSH_CHK", "STAT_CHK"); 24 | printf(" %9s %9s", "CTRL_CHK", "NOOP"); 25 | printf("\n"); 26 | show = 0; 27 | } 28 | 29 | crucible_upstairs*:::up-status 30 | { 31 | show = show + 1; 32 | printf("%9s", json(copyinstr(arg1), "ok.up_counters.apply")); 33 | printf(" %9s", json(copyinstr(arg1), "ok.up_counters.action_downstairs")); 34 | printf(" %9s", json(copyinstr(arg1), "ok.up_counters.action_guest")); 35 | printf(" %9s", json(copyinstr(arg1), "ok.up_counters.action_deferred_block")); 36 | printf(" %9s", json(copyinstr(arg1), "ok.up_counters.action_deferred_message")); 37 | printf(" %9s", json(copyinstr(arg1), "ok.up_counters.action_leak_check")); 38 | printf(" %9s", json(copyinstr(arg1), "ok.up_counters.action_flush_check")); 39 | printf(" %9s", json(copyinstr(arg1), "ok.up_counters.action_stat_check")); 40 | printf(" %9s", json(copyinstr(arg1), "ok.up_counters.action_control_check")); 41 | printf(" %9s", json(copyinstr(arg1), "ok.up_counters.action_noop")); 42 | printf("\n"); 43 | } 44 | -------------------------------------------------------------------------------- /tools/dtrace/upstairs_count.d: -------------------------------------------------------------------------------- 1 | #pragma D option quiet 2 | /* 3 | * IO counters for upstairs. 4 | */ 5 | 6 | /* 7 | * Print the header right away 8 | */ 9 | dtrace:::BEGIN 10 | { 11 | show = 21; 12 | } 13 | 14 | crucible_upstairs*:::gw-flush-start 15 | /pid == $1/ 16 | { 17 | @flush_start = count(); 18 | } 19 | 20 | crucible_upstairs*:::gw-flush-done 21 | /pid == $1/ 22 | { 23 | @flush_done = count(); 24 | } 25 | 26 | crucible_upstairs*:::gw-write-start 27 | /pid == $1/ 28 | { 29 | @write_start = count(); 30 | } 31 | 32 | crucible_upstairs*:::gw-write-done 33 | /pid == $1/ 34 | { 35 | @write_done = count(); 36 | } 37 | 38 | crucible_upstairs*:::gw-read-start 39 | /pid == $1/ 40 | { 41 | @read_start = count(); 42 | } 43 | 44 | crucible_upstairs*:::gw-read-done 45 | /pid == $1/ 46 | { 47 | @read_done = count(); 48 | } 49 | 50 | crucible_upstairs*:::gw-write-unwritten-start 51 | /pid == $1/ 52 | { 53 | @write_unwritten_start = count(); 54 | } 55 | 56 | crucible_upstairs*:::gw-write-unwritten-done 57 | /pid == $1/ 58 | { 59 | @write_unwritten_done = count(); 60 | } 61 | 62 | crucible_upstairs*:::gw-barrier-start 63 | /pid == $1/ 64 | { 65 | @barrier_start = count(); 66 | } 67 | 68 | crucible_upstairs*:::gw-barrier-done 69 | /pid == $1/ 70 | { 71 | @barrier_done = count(); 72 | } 73 | 74 | /* 75 | * Every second, check and see if we have printed enough that it is 76 | * time to print the header again 77 | */ 78 | tick-1s 79 | /show > 20/ 80 | { 81 | printf("%5s %5s %5s %5s %5s %5s %5s %5s %5s %5s", 82 | "F>", "F<", "W>", "W<", "R>", "R<", "WU>", "WU<", "B>", "B<"); 83 | printf("\n"); 84 | show = 0; 85 | } 86 | 87 | tick-1s 88 | { 89 | printa("%@5u %@5u %@5u %@5u %@5u %@5u %@5u %@5u %@5u %@5u", 90 | @flush_start, @flush_done, @write_start, @write_done, 91 | @read_start, @read_done, @write_unwritten_start, @write_unwritten_done, 92 | @barrier_start, @barrier_done 93 | ); 94 | printf("\n"); 95 | clear(@flush_start); 96 | clear(@flush_done); 97 | clear(@write_start); 98 | clear(@write_done); 99 | clear(@read_start); 100 | clear(@read_done); 101 | clear(@write_unwritten_start); 102 | clear(@write_unwritten_done); 103 | clear(@barrier_start); 104 | clear(@barrier_done); 105 | show = show + 1; 106 | } 107 | -------------------------------------------------------------------------------- /tools/dtrace/upstairs_info.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Display internal Upstairs status. 3 | */ 4 | #pragma D option quiet 5 | #pragma D option strsize=1k 6 | /* 7 | * Print the header right away 8 | */ 9 | dtrace:::BEGIN 10 | { 11 | show = 21; 12 | } 13 | 14 | /* 15 | * Every second, check and see if we have printed enough that it is 16 | * time to print the header again 17 | */ 18 | dtrace:::BEGIN, tick-1s 19 | /show > 20/ 20 | { 21 | printf("%6s ", "PID"); 22 | printf("%3s %3s %3s", "DS0", "DS1", "DS2"); 23 | printf(" %5s %5s %10s", "UPW", "DSW", "JOBID"); 24 | printf(" %10s", "WRITE_BO"); 25 | printf(" %5s %5s %5s", "IP0", "IP1", "IP2"); 26 | printf(" %5s %5s %5s", "D0", "D1", "D2"); 27 | printf(" %5s %5s %5s", "S0", "S1", "S2"); 28 | printf("\n"); 29 | show = 0; 30 | } 31 | 32 | /* 33 | * Translate the longer state string into a shorter version 34 | */ 35 | inline string short_state[string ss] = 36 | ss == "active" ? "ACT" : 37 | ss == "new" ? "NEW" : 38 | ss == "live_repair_ready" ? "LRR" : 39 | ss == "live_repair" ? "LR" : 40 | ss == "faulted" ? "FLT" : 41 | ss == "offline" ? "OFL" : 42 | ss == "reconcile" ? "REC" : 43 | ss == "wait_quorum" ? "WQ" : 44 | ss == "wait_active" ? "WA" : 45 | ss == "replaced" ? "RPL" : 46 | ss == "connecting" ? "CON" : 47 | ss; 48 | 49 | crucible_upstairs*:::up-status 50 | { 51 | show = show + 1; 52 | this->ds0state = json(copyinstr(arg1), "ok.ds_state[0].type"); 53 | this->d0 = short_state[this->ds0state]; 54 | 55 | this->ds1state = json(copyinstr(arg1), "ok.ds_state[1].type"); 56 | this->d1 = short_state[this->ds1state]; 57 | 58 | this->ds2state = json(copyinstr(arg1), "ok.ds_state[2].type"); 59 | this->d2 = short_state[this->ds2state]; 60 | 61 | printf("%6d", pid); 62 | /* 63 | * State for the three downstairs 64 | */ 65 | printf(" %3s", this->d0); 66 | printf(" %3s", this->d1); 67 | printf(" %3s", this->d2); 68 | 69 | /* 70 | * Work queue counts for Upstairs and Downstairs 71 | */ 72 | printf(" %5s", json(copyinstr(arg1), "ok.up_count")); 73 | printf(" %5s", json(copyinstr(arg1), "ok.ds_count")); 74 | 75 | /* 76 | * Job ID and outstanding bytes 77 | */ 78 | printf(" %10s", json(copyinstr(arg1), "ok.next_job_id")); 79 | printf(" %10s", json(copyinstr(arg1), "ok.write_bytes_out")); 80 | 81 | /* 82 | * In progress jobs on the work list for each downstairs 83 | */ 84 | printf(" "); 85 | printf(" %5s", json(copyinstr(arg1), "ok.ds_io_count.in_progress[0]")); 86 | printf(" %5s", json(copyinstr(arg1), "ok.ds_io_count.in_progress[1]")); 87 | printf(" %5s", json(copyinstr(arg1), "ok.ds_io_count.in_progress[2]")); 88 | 89 | /* 90 | * Completed (done) jobs on the work list for each downstairs 91 | */ 92 | printf(" "); 93 | printf(" %5s", json(copyinstr(arg1), "ok.ds_io_count.done[0]")); 94 | printf(" %5s", json(copyinstr(arg1), "ok.ds_io_count.done[1]")); 95 | printf(" %5s", json(copyinstr(arg1), "ok.ds_io_count.done[2]")); 96 | 97 | /* 98 | * Skipped jobs on the work list for each downstairs 99 | */ 100 | printf(" "); 101 | printf(" %5s", json(copyinstr(arg1), "ok.ds_io_count.skipped[0]")); 102 | printf(" %5s", json(copyinstr(arg1), "ok.ds_io_count.skipped[1]")); 103 | printf(" %5s", json(copyinstr(arg1), "ok.ds_io_count.skipped[2]")); 104 | 105 | printf("\n"); 106 | } 107 | -------------------------------------------------------------------------------- /tools/dtrace/upstairs_raw.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Dump the dtrace up-status in json format 3 | * This can be used to pipe to other command that 4 | * can display whatever fields of the structure you wish. 5 | */ 6 | #pragma D option quiet 7 | #pragma D option strsize=1k 8 | crucible_upstairs*:::up-status 9 | { 10 | trace(json(copyinstr(arg1), "ok")); 11 | printf("\n"); 12 | } 13 | -------------------------------------------------------------------------------- /tools/dtrace/upstairs_repair.d: -------------------------------------------------------------------------------- 1 | /* 2 | * Display internal Upstairs live repair status. 3 | */ 4 | #pragma D option quiet 5 | #pragma D option strsize=1k 6 | /* 7 | * Print the header right away 8 | */ 9 | dtrace:::BEGIN 10 | { 11 | show = 21; 12 | } 13 | 14 | /* 15 | * Every second, check and see if we have printed enough that it is 16 | * time to print the header again 17 | */ 18 | dtrace:::BEGIN, tick-1s 19 | /show > 20/ 20 | { 21 | printf("%3s %3s %3s", "DS0", "DS1", "DS2"); 22 | printf(" "); 23 | 24 | /* Header width, three downstairs, space between: 5+1+5+1+5 = 17 */ 25 | printf(" %17s %17s", 26 | "LR_COMPLETED", "LR_ABORTED"); 27 | printf(" %17s %17s", 28 | "CONNECTED", "REPLACED"); 29 | printf(" %17s %17s", 30 | "EXTENTS_REPAIRED", "EXTENTS_CONFIRMED"); 31 | printf("\n"); 32 | show = 0; 33 | } 34 | 35 | /* 36 | * Translate the longer state string into a shorter version 37 | */ 38 | inline string short_state[string ss] = 39 | ss == "active" ? "ACT" : 40 | ss == "new" ? "NEW" : 41 | ss == "live_repair_ready" ? "LRR" : 42 | ss == "live_repair" ? "LR" : 43 | ss == "faulted" ? "FLT" : 44 | ss == "offline" ? "OFL" : 45 | ss == "reconcile" ? "REC" : 46 | ss == "wait_quorum" ? "WQ" : 47 | ss == "wait_active" ? "WA" : 48 | ss == "replaced" ? "RPL" : 49 | ss == "connecting" ? "CON" : 50 | ss; 51 | 52 | crucible_upstairs*:::up-status 53 | { 54 | show = show + 1; 55 | /* 56 | * State for the three downstairs 57 | */ 58 | 59 | this->ds0state = json(copyinstr(arg1), "ok.ds_state[0].type"); 60 | this->d0 = short_state[this->ds0state]; 61 | 62 | this->ds1state = json(copyinstr(arg1), "ok.ds_state[1].type"); 63 | this->d1 = short_state[this->ds1state]; 64 | 65 | this->ds2state = json(copyinstr(arg1), "ok.ds_state[2].type"); 66 | this->d2 = short_state[this->ds2state]; 67 | 68 | printf("%3s", this->d0); 69 | printf(" %3s", this->d1); 70 | printf(" %3s", this->d2); 71 | 72 | printf(" "); 73 | printf(" %5s", json(copyinstr(arg1), "ok.ds_live_repair_completed[0]")); 74 | printf(" %5s", json(copyinstr(arg1), "ok.ds_live_repair_completed[1]")); 75 | printf(" %5s", json(copyinstr(arg1), "ok.ds_live_repair_completed[2]")); 76 | 77 | printf(" %5s", json(copyinstr(arg1), "ok.ds_live_repair_aborted[0]")); 78 | printf(" %5s", json(copyinstr(arg1), "ok.ds_live_repair_aborted[1]")); 79 | printf(" %5s", json(copyinstr(arg1), "ok.ds_live_repair_aborted[2]")); 80 | 81 | printf(" %5s", json(copyinstr(arg1), "ok.ds_connected[0]")); 82 | printf(" %5s", json(copyinstr(arg1), "ok.ds_connected[1]")); 83 | printf(" %5s", json(copyinstr(arg1), "ok.ds_connected[2]")); 84 | 85 | printf(" %5s", json(copyinstr(arg1), "ok.ds_replaced[0]")); 86 | printf(" %5s", json(copyinstr(arg1), "ok.ds_replaced[1]")); 87 | printf(" %5s", json(copyinstr(arg1), "ok.ds_replaced[2]")); 88 | 89 | printf(" %5s", json(copyinstr(arg1), "ok.ds_extents_repaired[0]")); 90 | printf(" %5s", json(copyinstr(arg1), "ok.ds_extents_repaired[1]")); 91 | printf(" %5s", json(copyinstr(arg1), "ok.ds_extents_repaired[2]")); 92 | 93 | printf(" %5s", json(copyinstr(arg1), "ok.ds_extents_confirmed[0]")); 94 | printf(" %5s", json(copyinstr(arg1), "ok.ds_extents_confirmed[1]")); 95 | printf(" %5s", json(copyinstr(arg1), "ok.ds_extents_confirmed[2]")); 96 | 97 | printf("\n"); 98 | } 99 | -------------------------------------------------------------------------------- /tools/make-dtrace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Build a tar archive with selected DTrace scripts. 4 | # This archive is used to install DTrace scripts on the global zone of each 5 | # sled. As such, the scripts here should match the probes that exist for 6 | # any consumer of the upstairs, like propolis, the pantry, or crucible agent. 7 | set -eux 8 | 9 | rm -f out/crucible-dtrace.tar 2> /dev/null 10 | 11 | mkdir -p out 12 | 13 | 14 | echo "$(date) Create DTrace archive on $(hostname)" > /tmp/dtrace-info.txt 15 | echo "git log -1:" >> dtrace-info.txt 16 | git log -1 >> dtrace-info.txt 17 | echo "git status:" >> dtrace-info.txt 18 | git status >> dtrace-info.txt 19 | mv dtrace-info.txt tools/dtrace 20 | 21 | pushd tools/dtrace 22 | tar cvf ../../out/crucible-dtrace.tar \ 23 | dtrace-info.txt \ 24 | README.md \ 25 | all_downstairs.d \ 26 | downstairs_count.d \ 27 | get-ds-state.d \ 28 | get-ds-state.sh \ 29 | get-lr-state.d \ 30 | get-lr-state.sh \ 31 | get-up-state.d \ 32 | get-up-state.sh \ 33 | perf-downstairs-os.d \ 34 | perf-downstairs-three.d \ 35 | perf-downstairs-tick.d \ 36 | perf-downstairs.d \ 37 | perf-ds-client.d \ 38 | perf-ds-net.d \ 39 | perf-online-repair.d \ 40 | perf-reqwest.d \ 41 | perf-upstairs-wf.d \ 42 | perf-vol.d \ 43 | perfgw.d \ 44 | single_up_info.d \ 45 | sled_upstairs_info.d \ 46 | trace-vol.d \ 47 | tracegw.d \ 48 | up-info.d \ 49 | upstairs_action.d \ 50 | upstairs_count.d \ 51 | upstairs_info.d \ 52 | upstairs_raw.d \ 53 | upstairs_repair.d 54 | 55 | rm dtrace-info.txt 56 | popd 57 | ls -l out/crucible-dtrace.tar 58 | -------------------------------------------------------------------------------- /tools/make-nightly.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eux 3 | 4 | cargo build --release --all-features 5 | 6 | rm -f out/crucible-nightly.tar.gz 2> /dev/null 7 | 8 | mkdir -p out 9 | 10 | echo "$(date) Create nightly archive on $(hostname)" > /tmp/nightly-info.txt 11 | echo "git log -1:" >> nightly-info.txt 12 | git log -1 >> nightly-info.txt 13 | echo "git status:" >> nightly-info.txt 14 | git status >> nightly-info.txt 15 | 16 | tar cavf out/crucible-nightly.tar.gz \ 17 | target/release/cmon \ 18 | target/release/crudd \ 19 | target/release/crutest \ 20 | target/release/crucible-agent \ 21 | target/release/crucible-downstairs \ 22 | target/release/crucible-hammer \ 23 | target/release/dsc \ 24 | tools/crudd-speed-battery.sh \ 25 | tools/dtrace/* \ 26 | tools/hammer_loop.sh \ 27 | tools/test_live_repair.sh \ 28 | tools/test_fail_live_repair.sh \ 29 | tools/test_mem.sh \ 30 | tools/test_replay.sh \ 31 | tools/test_repair.sh \ 32 | tools/test_replace_special.sh \ 33 | tools/test_restart_repair.sh \ 34 | tools/test_nightly.sh \ 35 | tools/loop-repair.sh \ 36 | tools/loop-double-repair.sh \ 37 | nightly-info.txt 38 | 39 | ls -l out/crucible-nightly.tar.gz 40 | rm nightly-info.txt 41 | -------------------------------------------------------------------------------- /tools/show_ox_propolis.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ -n $1 ]]; then 4 | UUID=$1 5 | else 6 | echo "Please provide the UUID of the instance" 7 | exit 1 8 | fi 9 | 10 | # Show me the stats for $UUID 11 | # All sorts of assumptions here. Make it better if you so desire. 12 | 13 | if which oxdb > /dev/null; then 14 | OXDB=$(which oxdb) 15 | elif [[ -f ../omicron/target/debug/oxdb ]]; then 16 | OXDB="../omicron/target/debug/oxdb" 17 | elif [[ -f ../omicron/target/release/oxdb ]]; then 18 | OXDB="../omicron/target/release/oxdb" 19 | else 20 | echo "Can't find oxdb" 21 | exit 1 22 | fi 23 | 24 | target=instance_uuid 25 | 26 | echo "Showing $target stats for UUID: $UUID" 27 | for stat in reset ; do 28 | last_time=$($OXDB -a fd00:1122:3344:101::5 query ${target}:${stat} uuid=="$UUID" | jq '.[].measurements[].timestamp '| sort -n | tail -1) 29 | if [[ -z "$last_time" ]]; then 30 | echo "Error finding last timestamp for $stat" 31 | continue 32 | fi 33 | 34 | count=$($OXDB -a fd00:1122:3344:101::5 query ${target}:${stat} uuid=="$UUID" | jq ".[].measurements[] | select(.timestamp == $last_time) | .datum.datum.value") 35 | if [[ -z "$count" ]]; then 36 | echo "Error finding count value for $stat" 37 | continue 38 | fi 39 | 40 | last_time=$(echo $last_time | awk -F\. '{print $1}' | tr 'T' ' ' | tr -d '"') 41 | echo "$last_time count: $count for $stat" 42 | done 43 | -------------------------------------------------------------------------------- /tools/show_ox_stats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Show me the stats for a hard coded downstairs UUID 4 | # All sorts of assumptions here. Make it better if you so desire. 5 | if [[ -f ../omicron/target/debug/oxdb ]]; then 6 | OXDB="../omicron/target/debug/oxdb" 7 | elif [[ -f ../omicron/target/release/oxdb ]]; then 8 | OXDB="../omicron/target/release/oxdb" 9 | else 10 | echo "Can't find oxdb" 11 | exit 1 12 | fi 13 | 14 | for stat in flush read write connect; do 15 | last_time=$($OXDB query crucible_downstairs:$stat downstairs_uuid==12345678-3801-3801-3801-000000003801 | jq '.[].measurements[].timestamp '| sort -n | tail -1) 16 | 17 | flush_count=$($OXDB query crucible_downstairs:$stat downstairs_uuid==12345678-3801-3801-3801-000000003801 | jq ".[].measurements[] | select(.timestamp == $last_time) | .datum.CumulativeI64.value") 18 | 19 | echo "$last_time count: $flush_count for $stat" 20 | done 21 | -------------------------------------------------------------------------------- /tools/show_ox_upstairs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ -n $1 ]]; then 4 | UUID=$1 5 | else 6 | echo "Please provide the Crucible Upstairs UUID" 7 | exit 1 8 | fi 9 | 10 | # Show me the stats for $UUID 11 | # All sorts of assumptions here. Make it better if you so desire. 12 | 13 | if which oxdb > /dev/null; then 14 | OXDB=$(which oxdb) 15 | elif [[ -f ../omicron/target/debug/oxdb ]]; then 16 | OXDB="../omicron/target/debug/oxdb" 17 | elif [[ -f ../omicron/target/release/oxdb ]]; then 18 | OXDB="../omicron/target/release/oxdb" 19 | else 20 | echo "Can't find oxdb" 21 | exit 1 22 | fi 23 | 24 | echo "Showing upstairs stats for UUID: $UUID" 25 | for stat in activated flush read read_bytes write write_bytes ; do 26 | last_time=$($OXDB -a fd00:1122:3344:101::5 query crucible_upstairs:$stat upstairs_uuid=="$UUID" | jq '.[].measurements[].timestamp '| sort -n | tail -1) 27 | if [[ -z "$last_time" ]]; then 28 | echo "Error finding last timestamp for $stat" 29 | continue 30 | fi 31 | 32 | count=$($OXDB -a fd00:1122:3344:101::5 query crucible_upstairs:$stat upstairs_uuid=="$UUID" | jq ".[].measurements[] | select(.timestamp == $last_time) | .datum.datum.value") 33 | if [[ -z "$count" ]]; then 34 | echo "Error finding count value for $stat" 35 | continue 36 | fi 37 | 38 | last_time=$(echo $last_time | awk -F\. '{print $1}' | tr 'T' ' ' | tr -d '"') 39 | echo "$last_time count: $count for $stat" 40 | done 41 | -------------------------------------------------------------------------------- /tools/test_ds.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Test import and export functions of crucible-downstairs 4 | 5 | set -o pipefail 6 | 7 | ulimit -n 16384 8 | 9 | ROOT=$(cd "$(dirname "$0")/.." && pwd) 10 | cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1) 11 | BINDIR=${BINDIR:-$ROOT/target/debug} 12 | 13 | if pgrep -fl crucible-downstairs; then 14 | echo 'Downstairs already running?' >&2 15 | exit 1 16 | fi 17 | 18 | cds="$BINDIR/crucible-downstairs" 19 | if [[ ! -f ${cds} ]]; then 20 | echo "Can't find crucible binary at $cds" 21 | exit 1 22 | fi 23 | 24 | testdir="/tmp/ds_test" 25 | if [[ -d ${testdir} ]]; then 26 | rm -rf ${testdir} 27 | fi 28 | mkdir "${testdir}" 29 | 30 | set -o errexit 31 | uuid="12345678-1234-1234-1234-000000000001" 32 | region_dir="${testdir}/region" 33 | exp="${testdir}/exported_file" 34 | imp="${testdir}/import" 35 | clone_dir="${testdir}/clone" 36 | clone_exp="${testdir}/clone_export_file" 37 | create_clone_dir="${testdir}/create_clone" 38 | echo "Create file for import" 39 | dd if=/dev/urandom of="$imp" bs=512 count=300 40 | 41 | echo "Import region" 42 | ${cds} create -i "$imp" -u $uuid -d "$region_dir" 43 | echo "Export region" 44 | ${cds} export -d "$region_dir" -e "$exp" --count 300 45 | 46 | diff $imp $exp 47 | echo "Import Export test passed" 48 | 49 | # We can make use of the export function to test downstairs clone 50 | echo "Test clone" 51 | echo "Starting source downstairs" 52 | ${cds} run -d "$region_dir" -p 8810 --mode ro > ${testdir}/ds_out.txt & 53 | ds_pid=$! 54 | 55 | sleep 1 56 | if ! ps -p $ds_pid; then 57 | echo "Failed to start downstairs" 58 | exit 1 59 | else 60 | echo "Downstairs running" 61 | fi 62 | 63 | echo "Creating new downstairs" 64 | ${cds} create -u $(uuidgen) -d "$clone_dir" --extent-size 100 --extent-count 15 --block-size 512 65 | echo "Cloning existing downstairs" 66 | ${cds} clone -d "$clone_dir" -s 127.0.0.1:12810 67 | 68 | echo "Verify clone using export" 69 | ${cds} export -d "$clone_dir" -e "$clone_exp" --count 300 70 | 71 | diff $imp $clone_exp 72 | 73 | echo "Creating new downstairs from clone directly" 74 | ${cds} create -u $(uuidgen) -d "$create_clone_dir" --extent-size 100 --extent-count 15 --block-size 512 --clone-source 127.0.0.1:12810 75 | 76 | echo "Verify second clone using export" 77 | ${cds} export -d "$create_clone_dir" -e "$clone_exp" --count 300 78 | diff $imp $clone_exp 79 | 80 | echo "Stopping downstairs" 81 | kill "$ds_pid" 82 | echo "Clone test passed" 83 | rm -rf ${testdir} 84 | -------------------------------------------------------------------------------- /tools/test_nightly.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # A nightly test, which is just a collection of other tests. 4 | # If you are adding a test, make sure the binaries/scripts it needs are 5 | # also part of what buildomat puts in the nightly archive, currently 6 | # generated in: .github/buildomat/jobs/build-release.sh 7 | 8 | SECONDS=0 9 | err=0 10 | output_file="/tmp/nightly_results" 11 | rm -f "$output_file" 12 | 13 | ROOT=$(cd "$(dirname "$0")/.." && pwd) 14 | cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1) 15 | export BINDIR=${BINDIR:-$ROOT/target/release} 16 | 17 | echo "Nightly starts at $(date)" | tee "$output_file" 18 | echo "Running on $(git log -1 --no-color | head -20)" | tee -a "$output_file" 19 | echo "" >> "$output_file" 20 | echo "Environment settings are (Some may be unset):" | tee -a "$output_file" 21 | echo "BINDIR is: $BINDIR" | tee -a "$output_file" 22 | echo "REGION_ROOT is: $REGION_ROOT" | tee -a "$output_file" 23 | echo "WORK_ROOT is: $WORK_ROOT" | tee -a "$output_file" 24 | echo "REGION_SETS is: $REGION_SETS" | tee -a "$output_file" 25 | echo "$(date) hammer start" >> "$output_file" 26 | banner hammer 27 | banner loop 28 | ./tools/hammer_loop.sh -l 200 29 | res=$? 30 | if [[ "$res" -eq 0 ]]; then 31 | echo "$(date) hammer pass" >> "$output_file" 32 | else 33 | echo "$(date) hammer fail with: $res" >> "$output_file" 34 | (( err += 1 )) 35 | fi 36 | echo "" 37 | 38 | sleep 1 39 | banner test 40 | banner replay 41 | echo "$(date) test_replay start" >> "$output_file" 42 | ./tools/test_replay.sh -l 200 43 | res=$? 44 | if [[ "$res" -eq 0 ]]; then 45 | echo "$(date) test_replay pass" >> "$output_file" 46 | else 47 | echo "$(date) test_replay fail with: $res" >> "$output_file" 48 | (( err += 1 )) 49 | fi 50 | echo "" 51 | 52 | sleep 1 53 | banner "test" 54 | banner repair 55 | echo "$(date) test_repair start" >> "$output_file" 56 | ./tools/test_repair.sh -l 500 57 | res=$? 58 | if [[ "$res" -eq 0 ]]; then 59 | echo "$(date) test_repair pass" >> "$output_file" 60 | else 61 | echo "$(date) test_repair fail with: $res" >> "$output_file" 62 | (( err += 1 )) 63 | exit 1 64 | fi 65 | echo "" 66 | 67 | sleep 1 68 | banner restart 69 | banner repair 70 | echo "$(date) test_restart_repair start" >> "$output_file" 71 | ./tools/test_restart_repair.sh -l 50 72 | res=$? 73 | if [[ "$res" -eq 0 ]]; then 74 | echo "$(date) test_restart_repair pass" >> "$output_file" 75 | else 76 | echo "$(date) test_restart_repair fail with: $res" >> "$output_file" 77 | (( err += 1 )) 78 | exit 1 79 | fi 80 | echo "" 81 | 82 | sleep 1 83 | banner live 84 | banner repair 85 | echo "$(date) test_live_repair start" >> "$output_file" 86 | ./tools/test_live_repair.sh -l 20 87 | res=$? 88 | if [[ "$res" -eq 0 ]]; then 89 | echo "$(date) test_live_repair pass" >> "$output_file" 90 | else 91 | echo "$(date) test_live_repair fail with: $res" >> "$output_file" 92 | (( err += 1 )) 93 | exit 1 94 | fi 95 | echo "" 96 | 97 | sleep 1 98 | banner replace 99 | banner special 100 | echo "$(date) test_replace_special start" >> "$output_file" 101 | ./tools/test_replace_special.sh -l 50 102 | res=$? 103 | if [[ "$res" -eq 0 ]]; then 104 | echo "$(date) test_replace_special pass" >> "$output_file" 105 | else 106 | echo "$(date) test_replace_special fail with: $res" >> "$output_file" 107 | (( err += 1 )) 108 | exit 1 109 | fi 110 | duration=$SECONDS 111 | 112 | banner results 113 | cat "$output_file" 114 | printf "Tests took %d:%02d errors:%d\n" \ 115 | $((duration / 60)) $((duration % 60)) "$err" | tee -a "$output_file" 116 | 117 | -------------------------------------------------------------------------------- /tools/test_reconnect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Start up the downstairs-deamon script. 4 | # Run the quick client one test to verify that restarting 5 | # crucible downstairs in a loop will still work. 6 | err=0 7 | total=0 8 | pass_total=0 9 | SECONDS=0 10 | 11 | # Control-C to cleanup. 12 | trap ctrl_c INT 13 | function ctrl_c() { 14 | echo "Stopping at your request" 15 | ${dsc} cmd shutdown 16 | } 17 | 18 | loop_log=/tmp/test_reconnect_summary.log 19 | test_log=/tmp/test_reconnect.log 20 | verify_log="/tmp/test_reconnect_verify.log" 21 | 22 | 23 | ROOT=$(cd "$(dirname "$0")/.." && pwd) 24 | cd "$ROOT" || (echo failed to cd "$ROOT"; exit 1) 25 | export BINDIR=${BINDIR:-$ROOT/target/debug} 26 | crucible_test="$BINDIR/crutest" 27 | dsc="$BINDIR/dsc" 28 | if [[ ! -f "$crucible_test" ]] || [[ ! -f "$dsc" ]]; then 29 | echo "Can't find crucible-test binary at $crucible_test" 30 | exit 1 31 | fi 32 | 33 | echo "" > ${loop_log} 34 | echo "" > ${test_log} 35 | echo "starting $(date)" | tee ${loop_log} 36 | echo "Tail $test_log for test output" 37 | 38 | if ! ${dsc} create --cleanup --extent-count 50 >> "$test_log"; then 39 | echo "Failed to create downstairs regions" 40 | exit 1 41 | fi 42 | ${dsc} start >> "$test_log" 2>&1 & 43 | dsc_pid=$! 44 | sleep 5 45 | if ! ps -p $dsc_pid > /dev/null; then 46 | echo "$dsc failed to start" 47 | exit 1 48 | fi 49 | 50 | args=() 51 | port_base=8810 52 | for (( i = 0; i < 30; i += 10 )); do 53 | (( port = port_base + i )) 54 | args+=( -t "127.0.0.1:$port" ) 55 | done 56 | 57 | gen=1 58 | # Initial seed for verify file 59 | if ! "$crucible_test" fill "${args[@]}" -q -g "$gen"\ 60 | --verify-out "$verify_log" --retry-activate >> "$test_log" 2>&1 ; then 61 | echo Failed on initial verify seed, check "$test_log" 62 | ${dsc} cmd shutdown 63 | fi 64 | (( gen += 1 )) 65 | 66 | # Tell dsc to restart downstairs. 67 | if ! "$dsc" cmd enable-restart-all; then 68 | echo "Failed to enable auto-restart on dsc" 69 | exit 1 70 | fi 71 | 72 | # Allow the downstairs to start restarting now. 73 | if ! ${dsc} cmd enable-random-stop; then 74 | echo "Failed to enable random-stop on dsc" 75 | exit 1 76 | fi 77 | sleep 5 78 | 79 | # Now run the quick crucible client test in a loop 80 | for i in {1..5} 81 | do 82 | SECONDS=0 83 | echo "" > "$test_log" 84 | echo "New loop starts now $(date)" >> "$test_log" 85 | "$crucible_test" generic "${args[@]}" -c 15000 \ 86 | -q -g "$gen" --verify-out "$verify_log" \ 87 | --range \ 88 | --verify-in "$verify_log" \ 89 | --retry-activate >> "$test_log" 2>&1 90 | result=$? 91 | if [[ $result -ne 0 ]]; then 92 | (( err += 1 )) 93 | duration=$SECONDS 94 | printf "[%03d] Error $result after %d:%02d\n" "$i" \ 95 | $((duration / 60)) $((duration % 60)) | tee -a ${loop_log} 96 | mv "$test_log" "$test_log".lastfail 97 | break 98 | fi 99 | duration=$SECONDS 100 | (( gen += 1 )) 101 | (( pass_total += 1 )) 102 | (( total += duration )) 103 | ave=$(( total / pass_total )) 104 | printf "[%03d] %d:%02d ave:%d:%02d total:%d:%02d errors:%d \ 105 | last_run_seconds:%d\n" "$i" $((duration / 60)) $((duration % 60)) \ 106 | $((ave / 60)) $((ave % 60)) $((total / 60)) $((total % 60)) \ 107 | "$err" $duration | tee -a ${loop_log} 108 | 109 | done 110 | ${dsc} cmd shutdown 111 | sleep 4 112 | echo "Final results:" | tee -a ${loop_log} 113 | printf "[%03d] %d:%02d ave:%d:%02d total:%d:%02d errors:%d last_run_seconds:%d\n" "$i" $((duration / 60)) $((duration % 60)) $((ave / 60)) $((ave % 60)) $((total / 60)) $((total % 60)) "$err" $duration | tee -a ${loop_log} 114 | echo "$(date) Test ends with $err" >> "$test_log" 2>&1 115 | exit "$err" 116 | 117 | -------------------------------------------------------------------------------- /upstairs/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "crucible" 3 | version = "0.0.1" 4 | authors = ["Joshua M. Clulow ", "Alan Hanson ( 8 | Option>>, 9 | ); 10 | 11 | impl BlockRes { 12 | /// Consume this BlockRes and send Ok to the receiver 13 | pub fn send_ok(self, t: T) { 14 | self.send_result(Ok(t)) 15 | } 16 | 17 | /// Consume this BlockRes and send an Err to the receiver 18 | pub fn send_err(self, e: E) { 19 | self.send_result(Err(e)) 20 | } 21 | 22 | /// Consume this BlockRes and send a Result to the receiver 23 | pub fn send_result(mut self, result: Result) { 24 | // XXX this eats the result! 25 | let _ = self.0.take().expect("sender was populated").send(result); 26 | } 27 | 28 | /// Builds an empty `BlockRes`, for use in unit testing 29 | #[cfg(test)] 30 | pub fn dummy() -> Self { 31 | let (tx, _) = oneshot::channel(); 32 | Self(Some(tx)) 33 | } 34 | } 35 | 36 | impl Drop for BlockRes { 37 | fn drop(&mut self) { 38 | if self.0.is_some() { 39 | // During normal operation, we expect to reply to every BlockOp, so 40 | // we'll fire a DTrace probe here. 41 | cdt::up__block__req__dropped!(); 42 | } 43 | } 44 | } 45 | 46 | /** 47 | * When BlockOps are sent to a guest, the calling function receives a waiter 48 | * that it can block on. This uses a single-use channel to receive the 49 | * result of a particular operation, and is meant to be paired with a 50 | * sender stored in the `BlockOp`. 51 | */ 52 | #[must_use] 53 | pub(crate) struct BlockOpWaiter { 54 | recv: oneshot::Receiver>, 55 | } 56 | 57 | impl BlockOpWaiter { 58 | /// Create associated `BlockOpWaiter`/`BlockRes` pair 59 | pub fn pair() -> (Self, BlockRes) { 60 | let (send, recv) = oneshot::channel(); 61 | (Self { recv }, BlockRes(Some(send))) 62 | } 63 | 64 | /// Consume this BlockOpWaiter and wait on the message 65 | /// 66 | /// Returns `None` if the other side drops without a reply 67 | pub async fn wait_raw(self) -> Option> { 68 | self.recv.await.ok() 69 | } 70 | } 71 | 72 | impl BlockOpWaiter { 73 | /// Wait, translating disconnection into `RecvDisconnected` 74 | pub async fn wait(self) -> Result { 75 | self.wait_raw() 76 | .await 77 | .unwrap_or(Err(CrucibleError::RecvDisconnected)) 78 | } 79 | } 80 | 81 | #[cfg(test)] 82 | impl BlockOpWaiter { 83 | pub fn try_wait(&mut self) -> Option> { 84 | match self.recv.try_recv() { 85 | Ok(reply) => Some(reply), 86 | Err(e) => match e { 87 | oneshot::error::TryRecvError::Empty => None, 88 | oneshot::error::TryRecvError::Closed => { 89 | Some(Err(CrucibleError::RecvDisconnected)) 90 | } 91 | }, 92 | } 93 | } 94 | } 95 | 96 | #[cfg(test)] 97 | mod test { 98 | use super::*; 99 | 100 | #[tokio::test] 101 | async fn test_blockreq_and_blockreqwaiter() { 102 | let (brw, res) = BlockOpWaiter::pair(); 103 | 104 | res.send_ok(()); 105 | 106 | let reply = brw.wait().await; 107 | assert!(reply.is_ok()); 108 | } 109 | 110 | #[tokio::test] 111 | async fn test_blockreq_and_blockreqwaiter_err() { 112 | let (brw, res) = BlockOpWaiter::<()>::pair(); 113 | 114 | res.send_err(CrucibleError::UpstairsInactive); 115 | 116 | let reply = brw.wait().await; 117 | assert!(reply.is_err()); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /workspace-hack/.gitattributes: -------------------------------------------------------------------------------- 1 | # Avoid putting conflict markers in the generated Cargo.toml file, since their presence breaks 2 | # Cargo. 3 | # Also do not check out the file as CRLF on Windows, as that's what hakari needs. 4 | Cargo.toml merge=binary -crlf 5 | -------------------------------------------------------------------------------- /workspace-hack/build.rs: -------------------------------------------------------------------------------- 1 | // A build script is required for cargo to consider build dependencies. 2 | fn main() {} 3 | -------------------------------------------------------------------------------- /workspace-hack/src/lib.rs: -------------------------------------------------------------------------------- 1 | // This is a stub lib.rs. 2 | -------------------------------------------------------------------------------- /x509/Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | .PHONY: certs 4 | 5 | all: certs 6 | 7 | certs: 8 | ./gen_certs.sh 9 | 10 | clean: 11 | rm *.json *.pem *.csr || true 12 | -------------------------------------------------------------------------------- /x509/README.md: -------------------------------------------------------------------------------- 1 | # How to enable TLS between the Upstairs and Downstairs # 2 | 3 | Generate the appropriate X509 keys and certificates by running `make` in this 4 | directory. 5 | 6 | Create a downstairs like normal, but run with new arguments: 7 | 8 | cargo run --release -q -p crucible-downstairs -- \ 9 | run -p "44100" -d "disks/d0/" \ 10 | --cert-pem x509/downstairs0.pem \ 11 | --key-pem x509/downstairs0-key.pem \ 12 | --root-cert-pem x509/ca.pem 13 | 14 | cargo run --release -q -p crucible-downstairs -- \ 15 | run -p "44101" -d "disks/d1/" \ 16 | --cert-pem x509/downstairs1.pem \ 17 | --key-pem x509/downstairs1-key.pem \ 18 | --root-cert-pem x509/ca.pem 19 | 20 | cargo run --release -q -p crucible-downstairs -- \ 21 | run -p "44102" -d "disks/d2/" \ 22 | --cert-pem x509/downstairs2.pem \ 23 | --key-pem x509/downstairs2-key.pem \ 24 | --root-cert-pem x509/ca.pem 25 | 26 | Run the upstairs with similar arguments: 27 | 28 | cargo run --bin=crutest -- \ 29 | -q \ 30 | -t 127.0.0.1:44100 \ 31 | -t 127.0.0.1:44101 \ 32 | -t 127.0.0.1:44102 \ 33 | --key "HLyo7ZhAf/E9IdX2DDHPHJO0dLgrRxZabWiTlnoKZXc=" \ 34 | --cert-pem x509/upstairs.pem \ 35 | --key-pem x509/upstairs-key.pem \ 36 | --root-cert-pem x509/ca.pem \ 37 | --workload one 38 | 39 | Note that the Downstairs at port 44100 is running with the key and certificate 40 | for downstairs0, the Downstairs at port 44101 is running for downstairs1, etc. 41 | This is important because the Upstairs will attempt connecting to the clients in 42 | its list with server name "downstairs${i}", where `i` is the number of the 43 | client in its list: 44 | 45 | -t 127.0.0.1:44100 => downstairs0 46 | -t 127.0.0.1:44101 => downstairs1 47 | -t 127.0.0.1:44102 => downstairs2 48 | 49 | To test that certificates not signed by x509/ca.pem are rejected, try using 50 | x509/selfsigned.pem and x509/selfsigned-key.pem in the client connection. The 51 | client should fail with: 52 | 53 | error: InvalidCertificateData("invalid peer certificate: UnknownIssuer") 54 | 55 | And the Downstairs should say: 56 | 57 | rejecting connection from 127.0.0.1:49462: Custom { kind: InvalidData, error: AlertReceived(BadCertificate) } 58 | 59 | If you try to connect without certificates in the client, it should fail, and 60 | the Downstairs should say: 61 | 62 | rejecting connection from 127.0.0.1:38116: Custom { kind: InvalidData, error: CorruptMessage } 63 | 64 | -------------------------------------------------------------------------------- /x509/gen_certs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | #cfssl print-defaults config > ca-config.json 5 | #cfssl print-defaults csr > ca-csr.json 6 | cat > ca-config.json < ca-csr.json < downstairs${i}-csr.json < upstairs-csr.json < selfsigned-csr.json < bad-upstairs-csr.json <